TorchCraftAI
A bot for machine learning research on StarCraft: Brood War
bandit.h
1 /*
2  * Copyright (c) 2017-present, Facebook, Inc.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #pragma once
9 
10 #include "cherrypi.h"
11 #include "fmt/format.h"
12 #include "utils.h"
13 
14 #include <cereal/archives/json.hpp>
15 #include <cereal/types/memory.hpp>
16 #include <cereal/types/polymorphic.hpp>
17 #include <cereal/types/vector.hpp>
18 
19 #include <numeric>
20 #include <random>
21 
22 static const std::string kBanditNone("none");
23 static const std::string kBanditRandom("random");
24 static const std::string kBanditUCB1("ucb1");
25 static const std::string kBanditUCB1Rolling("ucb1rolling");
26 static const std::string kBanditUCB1Exploit("ucb1exploit");
27 static const std::string kBanditThompson("thompson");
28 static const std::string kBanditThompsonRolling("thompsonrolling");
29 static const std::string kBanditExpMooRolling("expmoorolling");
30 
31 namespace cherrypi {
32 
33 namespace model {
34 
35 /**
36  * Defines a build order, from the standpoint of strategy selection.
37  */
39  /// Whether this build order can be used from the beginning of the game
40  CPI_ARG(bool, validOpening) = false;
41 
42  /// Whether Build Order Switch is allowed to swap into this
43  CPI_ARG(bool, validSwitch) = false;
44 
45  /// Whether Build Order Switch is enabled with this opening
46  CPI_ARG(bool, switchEnabled) = true;
47 
48  /// priority for UCB1 when testing unplayed builds
49  CPI_ARG(int, priority) = 1;
50 
51  /// Which of our races are allowed to use this build order
52  CPI_ARG(std::vector<tc::BW::Race>, ourRaces) = {tc::BW::Race::Zerg};
53 
54  /// Against which enemy races this build order is valid
55  CPI_ARG(std::vector<tc::BW::Race>, enemyRaces) = {tc::BW::Race::Terran,
56  tc::BW::Race::Protoss,
57  tc::BW::Race::Zerg,
58  tc::BW::Race::Unknown};
59 };
60 
61 typedef std::unordered_map<std::string, BuildOrderConfig>
63 
64 /// Implements the default configuration of each build order
65 /// This function needs to be modified in order to update
66 /// the configuration.
68 
69 /// Returns tournament-specific build order configurations given an opponent
71  const std::string& rawOpponentName);
72 
73 /// Returns a vector of acceptable build orders for fighting against
74 /// a given race.
75 std::vector<std::string> acceptableBuildOrders(
76  const std::unordered_map<std::string, BuildOrderConfig>& configs,
77  tc::BW::Race ourRace,
78  tc::BW::Race enemyRace);
79 
80 /// Handle on a vector of victory status for each game, giving
81 /// easy access to relevant figures
83  public:
85 
86  /// Adds a value to the win history vector
87  void addGame(bool won) {
88  wins_.push_back(won);
89  }
90  /// Updates the last value of the win history vector
91  void updateLastGame(bool won);
92  int numWins() const {
93  return std::accumulate(wins_.begin(), wins_.end(), 0);
94  }
95  int numGames() const {
96  return wins_.size();
97  }
98  int numLosses() const {
99  return numGames() - numWins();
100  }
101  float winRate() const {
102  return numGames() == 0 ? 0.0f : static_cast<float>(numWins()) / numGames();
103  }
104  const std::vector<bool>& wins() const {
105  return wins_;
106  }
107 
108  /// Returns a string of type "{numWins}/{numGames}" which
109  /// is only useful for fast debugging and testing
110  std::string statusString() const {
111  return fmt::format("{0}/{1}", numWins(), numGames());
112  }
113  template <class Archive>
114  void serialize(Archive& ar) {
115  ar(CEREAL_NVP(wins_));
116  }
117 
118  /// Configuration for the build,
119  /// providing acceptable races and priors
120  /// This is not serialized, because the configuration
121  /// needs to be implemented in one and only one
122  /// location (see buildOrderConfigurations).
123  /// It must therefore be populated when required.
125 
126  private:
127  std::vector<bool> wins_; // vector of "won" status per game
128 };
129 
130 /// Class holding a playedGames vector for a given enemy.
131 /// History is loaded at instanciation from either read folder
132 /// (in priority) or write folder, or a new empty history is
133 /// created.
134 /// An updated version is saved when calling either "addPlayedGame",
135 /// "write" or "updateLastGameToVictory" methods.
136 /// Beware: it does not check that the file
137 /// was updated after it was loaded (i.e. at instance creation).
139  public:
140  EnemyHistory(
141  std::string enemyName,
142  std::string readFolder = "bwapi-data/read/",
143  std::string writeFolder = "bwapi-data/write/");
144 
145  /// Records a failed game for the given build order, which will be updated on
146  /// game end with the actual win status.
147  /// This is done so that in case of crash, the game is accounted for as a
148  /// crash.
149  /// Updates the opponent file.
150  void addStartingGame(std::string buildOrder);
151 
152  /// In case of won games, this modifies the last
153  /// history into a won game (while it was set to
154  /// loss as default)
155  /// Updates the opponent file.
156  void updateLastGameToVictory(std::string buildOrder);
157 
158  /// Writes the current win history for all builds
159  /// into the opponnent file
160  void write() const;
161 
162  /// Prints all strategies and their counts (for debugging)
163  void printStatus() const;
164 
165  /// map from build order to its counts (number of played games,
166  /// won games etc...)
167  std::unordered_map<std::string, BuildOrderCount> buildOrderCounts;
168 
169  /// Path to the file where the history
170  /// is recorded
171  /// Defaults to returning writeFilepath if
172  /// readFilepath does not exist.
173  std::string readFilepath() const {
174  return readFolder_ + "/" + enemyName_ + ".json";
175  }
176 
177  /// Path to the file where the history
178  /// is recorded (used as default if
179  /// readFilepath() does not exists)
180  std::string writeFilepath() const {
181  return writeFolder_ + "/" + enemyName_ + ".json";
182  }
183 
184  private:
185  friend class cereal::access;
186  std::string enemyName_;
187  std::string readFolder_;
188  std::string writeFolder_;
189 };
190 
191 namespace score {
192 
193 /// Computes a score for build order j based on Thompson sampling (stochastic)
194 ///
195 /// For each opening you keep S and F
196 /// (Successes and Failures, or S and N and F = N-S).
197 ///
198 /// a and b are hyperparameters. A good initial guess is a=1, b=1.
199 /// In a game, for each build order i that we can start with:
200 /// sample p_i in Beta(S_i + a, F_i + b) // here you get your stochasticity
201 /// j = argmax over all p_i
202 /// play with build order j
203 /// update S_j and F_j
204 float thompsonSamplingScore(
206  float thompson_a,
207  float thompson_b);
208 
209 /// Computes a Thompson sampling score on a rolling average
210 /// (w/ exponential decay)
211 float thompsonRollingSamplingScore(
213  float thompson_a,
214  float thompson_b,
215  float thompson_gamma);
216 
217 /// Computes UCB1 score providing build order j:
218 /// (win_j / total_j) + sqrt(2 * log(sum(total)) / total_j)
219 /// Untested build orders get a priority
220 float ucb1Score(
222  int allStrategyGamesCount,
223  float ucb1_c);
224 
225 /// Computes UCB1 score on a rolling average (w/ exponential decay)
226 float ucb1RollingScore(
228  int allStrategyGamesCount,
229  float ucb1_c,
230  float ucb1_gamma);
231 
232 /// Computes Exp Moo score on a rolling average (w/ exponential decay)
233 float expMooRollingSamplingScore(
235  float moo_mult,
236  float moo_gamma);
237 
238 /// Computes UCB1-style score providing build order j:
239 /// (win_j / total_j) + sqrt(2 * log(sum(total)) / total_j)
240 /// but builds with high win rate get first priority
241 /// Untested build orders get second priority
242 float maxExploitScore(
244  int allStrategyGamesCount,
245  float ucb1_c);
246 
247 /// Chooses the build order with maximum score according
248 /// to the provided scoring algorithm
249 /// The assumption is that this is called once per game,
250 /// or at least acted upon based on the last call!
251 std::string chooseBuildOrder(
252  std::map<std::string, cherrypi::model::BuildOrderCount> const&
253  buildOrderCounts,
254  std::string scoreAlgorithm,
255  float ucb1_c,
256  float bandit_gamma,
257  float thompson_a,
258  float thompson_b,
259  float moo_mult);
260 
261 } // namespace score
262 
263 } // namespace model
264 
265 } // namespace cherrypi
BuildOrderConfigurations buildOrdersForTournament(const std::string &rawOpponentName)
Returns tournament-specific build order configurations given an opponent.
Definition: banditconfigurations.cpp:24
Defines a build order, from the standpoint of strategy selection.
Definition: bandit.h:38
float winRate() const
Definition: bandit.h:101
std::unordered_map< std::string, BuildOrderCount > buildOrderCounts
map from build order to its counts (number of played games, won games etc...)
Definition: bandit.h:167
int numLosses() const
Definition: bandit.h:98
BuildOrderConfig config
Configuration for the build, providing acceptable races and priors This is not serialized, because the configuration needs to be implemented in one and only one location (see buildOrderConfigurations).
Definition: bandit.h:124
std::string writeFilepath() const
Path to the file where the history is recorded (used as default if readFilepath() does not exists) ...
Definition: bandit.h:180
int numGames() const
Definition: bandit.h:95
void serialize(Archive &ar)
Definition: bandit.h:114
Handle on a vector of victory status for each game, giving easy access to relevant figures...
Definition: bandit.h:82
std::unordered_map< std::string, BuildOrderConfig > BuildOrderConfigurations
Definition: bandit.h:62
BuildOrderCount()
Definition: bandit.h:84
BuildOrderConfigurations buildOrdersForTraining()
Implements the default configuration of each build order This function needs to be modified in order ...
Definition: banditconfigurations.cpp:210
int numWins() const
Definition: bandit.h:92
std::string statusString() const
Returns a string of type "{numWins}/{numGames}" which is only useful for fast debugging and testing...
Definition: bandit.h:110
Class holding a playedGames vector for a given enemy.
Definition: bandit.h:138
void addGame(bool won)
Adds a value to the win history vector.
Definition: bandit.h:87
CPI_ARG(bool, validOpening)
Whether this build order can be used from the beginning of the game.
const std::vector< bool > & wins() const
Definition: bandit.h:104
Main namespace for bot-related code.
Definition: areainfo.cpp:17
std::vector< std::string > acceptableBuildOrders(const std::unordered_map< std::string, BuildOrderConfig > &configs, tc::BW::Race ourRace, tc::BW::Race enemyRace)
Returns a vector of acceptable build orders for fighting against a given race.
Definition: bandit.cpp:32
std::string readFilepath() const
Path to the file where the history is recorded Defaults to returning writeFilepath if readFilepath do...
Definition: bandit.h:173