#include <policygradienttrainer.h>
Inherits cpid::ReplayBufferFrame.
|
| ag::Variant | state |
| |
| torch::Tensor | action |
| |
| float | pAction |
| | Probability of action according to the policy that was used to obtain this frame. More...
|
| |
| double | reward |
| | Reward observed since taking previous action. More...
|
| |
| cpid::BatchedPGReplayBufferFrame::BatchedPGReplayBufferFrame |
( |
ag::Variant |
state, |
|
|
torch::Tensor |
action, |
|
|
float |
pAction, |
|
|
double |
reward |
|
) |
| |
|
inline |
| torch::Tensor cpid::BatchedPGReplayBufferFrame::action |
| float cpid::BatchedPGReplayBufferFrame::pAction |
Probability of action according to the policy that was used to obtain this frame.
| double cpid::BatchedPGReplayBufferFrame::reward |
Reward observed since taking previous action.
| ag::Variant cpid::BatchedPGReplayBufferFrame::state |
The documentation for this struct was generated from the following file: