12 #include <autogradpp/autograd.h> 14 #include <torch/torch.h> 36 std::vector<torch::Tensor> state,
37 std::vector<long> actions,
39 : state(state), actions(actions), reward(reward) {}
40 std::vector<torch::Tensor>
state;
75 int64_t episodes_ = 0;
76 std::mutex updateLock_;
77 std::mutex noiseLock_;
80 std::unordered_map<EpisodeKey, std::vector<torch::Tensor>>>
82 std::vector<torch::Tensor> lastNoise_;
84 std::atomic<int> nEpisodes_;
89 bool update()
override;
91 ag::Variant forward(ag::Variant inp,
EpisodeHandle const&)
override;
96 virtual std::shared_ptr<ReplayBufferFrame> makeFrame(
97 ag::Variant trainerOutput,
99 float reward)
override;
102 TORCH_ARG(
float, valueLambda) = 0;
103 TORCH_ARG(
float, delta) = 1e-3;
104 TORCH_ARG(
int, batchSize) = 10;
106 TORCH_ARG(
bool, antithetic) =
false;
std::string GameUID
Definition: trainer.h:31
Definition: trainer.h:158
State, Action taken, reward Taking in an additional action taken allows you to not just take the max ...
Definition: zeroordertrainer.h:34
The Trainer should be shared amongst multiple different nodes, and attached to a single Module...
Definition: trainer.h:156
std::vector< long > actions
Definition: zeroordertrainer.h:41
std::string EpisodeKey
Definition: trainer.h:32
See trainer for output format of these models.
Definition: zeroordertrainer.h:21
OnlineZORBReplayBufferFrame(std::vector< torch::Tensor > state, std::vector< long > actions, double reward)
Definition: zeroordertrainer.h:35
double reward
Definition: zeroordertrainer.h:42
The TorchCraftAI training library.
Definition: batcher.cpp:15
Stub base class for replay buffer frames.
Definition: trainer.h:69
std::vector< torch::Tensor > state
Definition: zeroordertrainer.h:40
virtual std::vector< torch::Tensor > generateNoise()=0
std::vector< std::shared_ptr< ReplayBufferFrame >> Episode
Definition: trainer.h:89
This is a OnlineZORBTrainer that works with multiple actions per frame.
Definition: zeroordertrainer.h:74