#include <policygradienttrainer.h>
Inherits cpid::ReplayBufferFrame.
|
ag::Variant | state |
|
torch::Tensor | action |
|
float | pAction |
| Probability of action according to the policy that was used to obtain this frame. More...
|
|
double | reward |
| Reward observed since taking previous action. More...
|
|
cpid::BatchedPGReplayBufferFrame::BatchedPGReplayBufferFrame |
( |
ag::Variant |
state, |
|
|
torch::Tensor |
action, |
|
|
float |
pAction, |
|
|
double |
reward |
|
) |
| |
|
inline |
torch::Tensor cpid::BatchedPGReplayBufferFrame::action |
float cpid::BatchedPGReplayBufferFrame::pAction |
Probability of action according to the policy that was used to obtain this frame.
double cpid::BatchedPGReplayBufferFrame::reward |
Reward observed since taking previous action.
ag::Variant cpid::BatchedPGReplayBufferFrame::state |
The documentation for this struct was generated from the following file: