TorchCraftAI/reference/estrainer_8h_source.html

 /*
  * Copyright (c) 2017-present, Facebook, Inc.
  *
  * This source code is licensed under the MIT license found in the
  * LICENSE file in the root directory of this source tree.
  */

 #pragma once

 #include "metrics.h"
 #include "policygradienttrainer.h"
 #include "sampler.h"
 #include <shared_mutex>

 #include "distributed.h"
 #include <stack>

 namespace cpid {

 class ESTrainer : public Trainer {
  public:
   enum RewardTransform {
     kNone = 0,
     // Transforms a vector of elements into a vector of floats
     // uniformly distributed within [-0.5,+0.5] according to their ranks.
     // Used in https://arxiv.org/pdf/1703.03864.pdf
     kRankTransform,
     // Divides by the std of the rewards.
     // Defined in https://arxiv.org/pdf/1803.07055.pdf
     kStdNormalize
   };

  protected:
   float std_;
   size_t batchSize_;

   // (generation, seed) => model mapping used to speed up forward in the active
   // local models
   std::unordered_map<std::pair<int, int64_t>, ag::Container, pairhash>
       modelCache_;
   // (GameUID, Key) => (generation, seed) for the active local models
   std::unordered_map<
       std::pair<GameUID, EpisodeKey>,
       std::pair<int, int64_t>,
       pairhash>
       gameToGenerationSeed_;
   std::shared_timed_mutex modelStorageMutex_;

   // the max number of historical models to store for the off-policy mode
   size_t historyLength_;
   // historyLength_ pairs of (generationId, model) stored in the order of
   // sequentially increasing generations (front() is the oldest, back() is
   // the newest)
   std::deque<std::pair<int, ag::Container>> modelsHistory_;
   std::shared_timed_mutex currentModelMutex_;

   bool antithetic_;
   RewardTransform transform_;

   bool onPolicy_;

   std::shared_timed_mutex insertionMutex_;
   std::deque<std::pair<GameUID, EpisodeKey>> newGames_;

   std::mutex seedQueueMutex_;
   std::vector<int64_t> seedQueue_;

   std::mutex updateMutex_;
   size_t gamesStarted_ = 0;
   std::condition_variable batchBarrier_;

   size_t gatherSize_;
   std::vector<float> allRewards_;
   std::vector<int> allGenerations_;
   std::vector<int64_t> allSeeds_;

   std::vector<float> rewards_;
   std::vector<int> generations_;
   std::vector<int64_t> seeds_;

   virtual void stepEpisode(
       GameUID const&,
       EpisodeKey const&,
       ReplayBuffer::Episode&) override;

   ag::Container generateModel(int generation, int64_t seed);
   void populateSeedQueue();

  public:
   ESTrainer(
       ag::Container model,
       ag::Optimizer optim,
       std::unique_ptr<BaseSampler> sampler,
       float std,
       size_t batchSize,
       size_t historyLength,
       bool antithetic,
       RewardTransform transform,
       bool onPolicy);

   ag::Container getGameModel(GameUID const& gameIUID, EpisodeKey const& key);
   void forceStopEpisode(EpisodeHandle const&) override;
   EpisodeHandle startEpisode() override;
   bool update() override;
   virtual ag::Variant forward(ag::Variant inp, EpisodeHandle const&) override;
   std::shared_ptr<Evaluator> makeEvaluator(
       size_t n,
       std::unique_ptr<BaseSampler> sampler =
           std::make_unique<DiscreteMaxSampler>()) override;
   torch::Tensor rewardTransform(
       torch::Tensor const& rewards,
       RewardTransform transform);
   void reset() override;
   virtual std::shared_ptr<ReplayBufferFrame> makeFrame(
       ag::Variant trainerOutput,
       ag::Variant state,
       float reward) override;

   // If set to true, after successful update() worker threads would remain
   // blocked until the next update() call.
   TORCH_ARG(bool, waitUpdate) = false;
 };

 } // namespace cpid
cpid::ESTrainer::rewardTransform
torch::Tensor rewardTransform(torch::Tensor const &rewards, RewardTransform transform)
Definition: estrainer.cpp:458

cpid::GameUID
std::string GameUID
Definition: trainer.h:31

cpid::Trainer::model
ag::Container model() const
Definition: trainer.cpp:231

cpid::ESTrainer::gamesStarted_
size_t gamesStarted_
Definition: estrainer.h:69

cpid::ESTrainer::currentModelMutex_
std::shared_timed_mutex currentModelMutex_
Definition: estrainer.h:55

cpid::ESTrainer::seedQueue_
std::vector< int64_t > seedQueue_
Definition: estrainer.h:66

cpid::Trainer::EpisodeHandle
Definition: trainer.h:158

cpid::ESTrainer::makeFrame
virtual std::shared_ptr< ReplayBufferFrame > makeFrame(ag::Variant trainerOutput, ag::Variant state, float reward) override
Definition: estrainer.cpp:505

cpid::ESTrainer::kStdNormalize
Definition: estrainer.h:30

cpid::ESTrainer::modelsHistory_
std::deque< std::pair< int, ag::Container > > modelsHistory_
Definition: estrainer.h:54

cpid::ESTrainer::reset
void reset() override
Releases all the worker threads so that they can be joined.
Definition: estrainer.cpp:493

cpid::ESTrainer::seedQueueMutex_
std::mutex seedQueueMutex_
Definition: estrainer.h:65

std
STL namespace.

cpid::ESTrainer::onPolicy_
bool onPolicy_
Definition: estrainer.h:60

cpid::ESTrainer::insertionMutex_
std::shared_timed_mutex insertionMutex_
Definition: estrainer.h:62

cpid::ESTrainer::batchBarrier_
std::condition_variable batchBarrier_
Definition: estrainer.h:70

cpid::ESTrainer::rewards_
std::vector< float > rewards_
Definition: estrainer.h:77

cpid::ESTrainer::allGenerations_
std::vector< int > allGenerations_
Definition: estrainer.h:74

cpid::ESTrainer::stepEpisode
virtual void stepEpisode(GameUID const &, EpisodeKey const &, ReplayBuffer::Episode &) override
Definition: estrainer.cpp:81

cpid::ESTrainer::RewardTransform
RewardTransform
Definition: estrainer.h:22

cpid::ESTrainer::kNone
Definition: estrainer.h:23

cpid::Trainer
The Trainer should be shared amongst multiple different nodes, and attached to a single Module...
Definition: trainer.h:156

cpid::ESTrainer::populateSeedQueue
void populateSeedQueue()
Definition: estrainer.cpp:485

cpid::ESTrainer::antithetic_
bool antithetic_
Definition: estrainer.h:57

cpid::ESTrainer::newGames_
std::deque< std::pair< GameUID, EpisodeKey > > newGames_
Definition: estrainer.h:63

cpid::ESTrainer::allRewards_
std::vector< float > allRewards_
Definition: estrainer.h:73

cpid::ESTrainer::startEpisode
EpisodeHandle startEpisode() override
Returns true if succeeded to register an episode, and false otherwise.
Definition: estrainer.cpp:318

cpid::ESTrainer::forward
virtual ag::Variant forward(ag::Variant inp, EpisodeHandle const &) override
Definition: estrainer.cpp:437

cpid::ESTrainer::kRankTransform
Definition: estrainer.h:27

cpid::ESTrainer::update
bool update() override
Definition: estrainer.cpp:100

cpid::ESTrainer::forceStopEpisode
void forceStopEpisode(EpisodeHandle const &) override
Definition: estrainer.cpp:306

cpid::ESTrainer::seeds_
std::vector< int64_t > seeds_
Definition: estrainer.h:79

cpid::ESTrainer::modelStorageMutex_
std::shared_timed_mutex modelStorageMutex_
Definition: estrainer.h:47

cpid::ESTrainer::gatherSize_
size_t gatherSize_
Definition: estrainer.h:72

cpid::EpisodeKey
std::string EpisodeKey
Definition: trainer.h:32

cpid::ESTrainer::getGameModel
ag::Container getGameModel(GameUID const &gameIUID, EpisodeKey const &key)
Definition: estrainer.cpp:371

cpid::ESTrainer::allSeeds_
std::vector< int64_t > allSeeds_
Definition: estrainer.h:75

cpid::ESTrainer::std_
float std_
Definition: estrainer.h:34

cpid::ESTrainer::historyLength_
size_t historyLength_
Definition: estrainer.h:50

cpid::ESTrainer::makeEvaluator
std::shared_ptr< Evaluator > makeEvaluator(size_t n, std::unique_ptr< BaseSampler > sampler=std::make_unique< DiscreteMaxSampler >()) override
Definition: estrainer.cpp:445

cpid::ESTrainer::generateModel
ag::Container generateModel(int generation, int64_t seed)
Re-creates model based on its seed and the generation it was produced from.
Definition: estrainer.cpp:394

cpid::ESTrainer::transform_
RewardTransform transform_
Definition: estrainer.h:58

cpid::Trainer::optim
ag::Optimizer optim() const
Definition: trainer.cpp:235

cpid
The TorchCraftAI training library.
Definition: batcher.cpp:15

cpid::ESTrainer::TORCH_ARG
TORCH_ARG(bool, waitUpdate)

cpid::ESTrainer::batchSize_
size_t batchSize_
Definition: estrainer.h:35

cpid::ESTrainer::gameToGenerationSeed_
std::unordered_map< std::pair< GameUID, EpisodeKey >, std::pair< int, int64_t >, pairhash > gameToGenerationSeed_
Definition: estrainer.h:46

cpid::ESTrainer::ESTrainer
ESTrainer(ag::Container model, ag::Optimizer optim, std::unique_ptr< BaseSampler > sampler, float std, size_t batchSize, size_t historyLength, bool antithetic, RewardTransform transform, bool onPolicy)
Definition: estrainer.cpp:53

cpid::pairhash
Definition: trainer.h:44

cpid::ReplayBuffer::Episode
std::vector< std::shared_ptr< ReplayBufferFrame >> Episode
Definition: trainer.h:89

cpid::ESTrainer::generations_
std::vector< int > generations_
Definition: estrainer.h:78

cpid::ESTrainer::modelCache_
std::unordered_map< std::pair< int, int64_t >, ag::Container, pairhash > modelCache_
Definition: estrainer.h:40

cpid::ESTrainer::updateMutex_
std::mutex updateMutex_
Definition: estrainer.h:68

cpid::ESTrainer
Definition: estrainer.h:20