TorchCraftAI/reference/sampler_8h_source.html

 /*
  * Copyright (c) 2017-present, Facebook, Inc.
  *
  * This source code is licensed under the MIT license found in the
  * LICENSE file in the root directory of this source tree.
  */

 #pragma once
 #include "trainer.h"
 #include <autogradpp/autograd.h>

 namespace cpid {

 /**
  * A sampler takes the output of the model, and outputs an action accordingly.
  * The exact shape of the action is dependent on the rest of the training loop.
  * For convenience, the base sampling function is the identity.
  */
 class BaseSampler {
  public:
   BaseSampler(){};

   virtual ~BaseSampler() = default;
   virtual ag::Variant sample(ag::Variant in) {
     return in;
   };

   virtual ag::Variant computeProba(
       const ag::Variant& in,
       const ag::Variant& action) {
     throw std::runtime_error("Proba computation not implemented...");
     return ag::Variant(0);
   }
 };

 /**
  * This sampler expects as input an unordered_map<string, Variant>, which
  * contains an entry policyKey, which is a tensor of size [b, n]. It outputs the
  * same map, with a new key actionKey, a tensor of size [b] where each entry is
  * in [0,n-1], and is the result of multinomial sampling over pi. It also adds a
  * key pActionKey which corresponds to the probability of the sampled action.
  */
 class MultinomialSampler : public BaseSampler {
  public:
   MultinomialSampler(
       const std::string& policyKey = kPiKey,
       const std::string& actionKey = kActionKey,
       const std::string& pActionKey = kPActionKey);
   ag::Variant sample(ag::Variant in) override;
   ag::Variant computeProba(const ag::Variant& in, const ag::Variant& action)
       override;

  protected:
   std::string policyKey_, actionKey_, pActionKey_;
 };

 /**
  * This sampler expects as input an unordered_map<string, Variant>,  containing
  * an entry QKey, which is a tensor of size [b, n]. It outputs the same map,
  * with a new key kActionKey, a tensor of size [b] where each entry is in
  * [0,n-1], and correspond to the action with the highest score.
  */
 class DiscreteMaxSampler : public BaseSampler {
  public:
   DiscreteMaxSampler(
       const std::string& policyKey = kPiKey,
       const std::string& actionKey = kActionKey);
   ag::Variant sample(ag::Variant in) override;

  protected:
   std::string policyKey_, actionKey_;
 };

 /**
  * This sampler expects as input an unordered_map<string, Variant>, containing
  * an entry policyKey, which is a tensor of size [b, n]. It outputs the same
  * map, with a new key kActionKey, a tensor of size [b] where each entry
  * action[i] is sampled from a normal distribution centered in policy[i]. It
  * also expects the stdKey to be set, it will be used as the standard deviation
  * of the normal. It can be either a float/double, in which case the deviation
  * will be the same for the batch, or it can be the same shape as the policy,
  * for a finer control. It also adds a key pActionKey which corresponds to the
  * probability of the sampled action.
  */
 class ContinuousGaussianSampler : public BaseSampler {
  public:
   ContinuousGaussianSampler(
       const std::string& policyKey = kPiKey,
       const std::string& stdKey = kSigmaKey,
       const std::string& actionKey = kActionKey,
       const std::string& pActionKey = kPActionKey);
   ag::Variant sample(ag::Variant in) override;
   ag::Variant computeProba(const ag::Variant& in, const ag::Variant& action)
       override;

  protected:
   std::string policyKey_, stdKey_;
   std::string actionKey_, pActionKey_;
 };

 /**
  * This sampler expects as input an unordered_map<string, Variant> containing an
  * entry policyKey, which is a tensor of size [b, n]. It outputs the same map,
  * with a new key kActionKey, a clone of the policy.
  */
 class ContinuousDeterministicSampler : public BaseSampler {
  public:
   ContinuousDeterministicSampler(
       const std::string& policyKey = kPiKey,
       const std::string& actionKey = kActionKey);
   ag::Variant sample(ag::Variant in) override;

  protected:
   std::string policyKey_;
   std::string actionKey_;
 };

 /**
  * This sampler expects as input an unordered_map<string, Variant> containing an
  * entry QKey, which is a tensor of size [b, n]. It outputs the same map, with a
  * new key actionKey, which contains the best action with proba 1-eps, and a
  * random action with proba eps.
  */
 class EpsGreedySampler : public BaseSampler {
  public:
   EpsGreedySampler(
       double eps = 0.07,
       const std::string& QKey = kQKey,
       const std::string& actionKey = kActionKey);

   ag::Variant sample(ag::Variant in) override;

   double eps_;
   std::string QKey_, actionKey_;
 };
 } // namespace cpid
cpid::kQKey
const std::string kQKey
Definition: trainer.h:37

cpid::DiscreteMaxSampler
This sampler expects as input an unordered_map<string, Variant>, containing an entry QKey...
Definition: sampler.h:63

cpid::ContinuousDeterministicSampler::policyKey_
std::string policyKey_
Definition: sampler.h:114

cpid::EpsGreedySampler
This sampler expects as input an unordered_map<string, Variant> containing an entry QKey...
Definition: sampler.h:124

cpid::BaseSampler::computeProba
virtual ag::Variant computeProba(const ag::Variant &in, const ag::Variant &action)
Definition: sampler.h:28

cpid::kActionKey
const std::string kActionKey
Definition: trainer.h:41

cpid::kPiKey
const std::string kPiKey
Definition: trainer.h:38

cpid::ContinuousDeterministicSampler::actionKey_
std::string actionKey_
Definition: sampler.h:115

cpid::EpsGreedySampler::QKey_
std::string QKey_
Definition: sampler.h:134

cpid::ContinuousGaussianSampler::pActionKey_
std::string pActionKey_
Definition: sampler.h:98

cpid::kPActionKey
const std::string kPActionKey
Definition: trainer.h:42

cpid::DiscreteMaxSampler::policyKey_
std::string policyKey_
Definition: sampler.h:71

cpid::BaseSampler::BaseSampler
BaseSampler()
Definition: sampler.h:21

cpid::ContinuousGaussianSampler::stdKey_
std::string stdKey_
Definition: sampler.h:97

cpid::BaseSampler
A sampler takes the output of the model, and outputs an action accordingly.
Definition: sampler.h:19

cpid::BaseSampler::~BaseSampler
virtual ~BaseSampler()=default

cpid::MultinomialSampler
This sampler expects as input an unordered_map<string, Variant>, which contains an entry policyKey...
Definition: sampler.h:43

cpid
The TorchCraftAI training library.
Definition: batcher.cpp:15

cpid::BaseSampler::sample
virtual ag::Variant sample(ag::Variant in)
Definition: sampler.h:24

cpid::EpsGreedySampler::eps_
double eps_
Definition: sampler.h:133

cpid::ContinuousGaussianSampler
This sampler expects as input an unordered_map<string, Variant>, containing an entry policyKey...
Definition: sampler.h:85

cpid::ContinuousDeterministicSampler
This sampler expects as input an unordered_map<string, Variant> containing an entry policyKey...
Definition: sampler.h:106

cpid::kSigmaKey
const std::string kSigmaKey
Definition: trainer.h:39

cpid::MultinomialSampler::policyKey_
std::string policyKey_
Definition: sampler.h:54