TorchCraftAI
A bot for machine learning research on StarCraft: Brood War
operations.h
1 /*
2  * Copyright (c) 2017-present, Facebook, Inc.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #pragma once
9 
10 #include <autogradpp/autograd.h>
11 #ifndef WITHOUT_POSIX
12 #include <cnpy/cnpy.h>
13 #endif // WITHOUT_POSIX
14 
15 /*
16  * Useful helpers for neural networks expressed with Torch.
17  */
18 namespace common {
19 
20 /**
21  * Repeat a 1D tensor so that you end up with a (#channels, sizes[0],
22  * size[1]) tensor
23  */
24 torch::Tensor repeat2d(torch::Tensor data, at::IntList sizes);
25 
26 /**
27  * Scatter data into dest at given positions.
28  * Depending on device that data lives on, different algorithms will be used:
29  * - For CPU tensors, multiple scatter_add_ calls are used in order to sum up
30  * channels for duplicate positions
31  * - For GPU tensors, data will be scattered onto all planes in a single
32  * scatter_ call and summed up afterwards. This means the function will allocate
33  * an intermediate buffer of size (b, c, np, H, W) if there are no more than np
34  * duplicate positions.
35  *
36  * There's a benchmark for this function in the corresponding unit tests.
37  *
38  * positions is a (b, n, 2) integer tensor with elements greater than or equal
39  * to zero. positions[i][0] refers to the Y position, positions[i][1] to the X
40  * position of the data entry i.
41  * data is a (b, n, c) tensor. Each of the n entries will be placed in dest
42  * according to the respective position.
43  * Entries on each batch until the first negative entry will be considered.
44  * sizes is the {H, W} tuple of the size of the plane to scatter onto.
45  *
46  * For single element, it's sufficient to unsqueeze(0) for it to look batched.
47  * Positions don't have to be unique -- this function performs sum-pooling
48  * by default.
49  * The output is of size (b, c, y, x), similar to the input to a convnet.
50  */
51 torch::Tensor
52 scatterSum2d(torch::Tensor positions, torch::Tensor data, at::IntList sizes);
53 
54 /**
55  * Equivalent to a stack along dim 0 of the input, but with the values
56  * padded correct so the size is rectangular
57  *
58  * For example, if the list has size [(6, 2), (5, 2), (7, 3)]
59  * The result is a tensor of (3, 7, 3)
60  *
61  **/
62 torch::Tensor makeBatch(ag::tensor_list const&, double pad = 0);
63 
64 /**
65  * This function works similarly as makeBatch but handles more input types.
66  * The queries variants are requested to be the same type (we tolerate to mix
67  * tensors and size 1 tensor_list)
68  * It behaves as follows, depending on the variant type:
69  * - If the queries are tensors, it calls makeBatch and returns a tensor
70  * - If the queries are dict, it batches each key individually, and returns a
71  * dict. Note that all queries must contain the same keys
72  * - If the queriest are a tensor_list, it batches each item of the list
73  * individually, and returns a tensor_list. Note that all the queries must
74  * have the same number of tensors, semantically in the same order.
75  */
76 ag::Variant makeBatchVariant(
77  const std::vector<ag::Variant>& queries,
78  double pad = 0);
79 
80 /**
81  * This function is the opposite of makeBatchVariant. It assumes that the
82  * tensors to be found in the batch have a first dimension of size b,
83  * interpreted as the batch dimension. It will take slices of size
84  * \param{stride} along that dimension, and put them in the result vector. The
85  * size the result vector will be b/stride. Note that if the stride is 1, then
86  * the batch dimension will be squeezed out. Additionally, if \param{maskOut} is
87  * true, then we mask out any item of value \param{maskValue}
88  */
89 std::vector<ag::Variant> unBatchVariant(
90  ag::Variant const& batch,
91  int stride = 1,
92  bool maskOut = false,
93  double maskValue = -1);
94 
95 /// Zero-padding (only supports 3d input)
96 torch::Tensor pad2d(torch::Tensor input, at::IntList pad);
97 
98 /// Zero-padding (for any number of dimensions)
99 /// For every dimensions of the input, pad contains 2 elements: the padding
100 /// before and after along this dimension.
101 torch::Tensor padNd(torch::Tensor input, at::IntList pad);
102 
103 /**
104  * Flips a tensor along a given dimension.
105  * y[-a-,i,-b-] = x[-a-,n-i-1,-b-]
106  */
107 torch::Tensor flip(torch::Tensor x, int dim);
108 
109 /// Mimics pytorch's upsample function
111 torch::Tensor
112 upsample(torch::Tensor input, UpsampleMode mode, at::IntList size);
113 torch::Tensor upsample(torch::Tensor input, UpsampleMode mode, int scaleFactor);
114 
115 /// Replace (in-place) all zeroes of x by ones.
116 void zerosToOnes_(torch::Tensor x);
117 
118 #ifndef WITHOUT_POSIX
119 torch::Tensor tensorFromNpyArray(cnpy::NpyArray, torch::TensorOptions);
120 #endif // WITHOUT_POSIX
121 /**
122  * Squash contiguous dimensions of a tensor into a single dimension.
123  *
124  * The dimensions [i..j] (both included) will be squashed into a single one.
125  * So if x is of size s_1 x ... x s_d, the returned tensor will be a view of x
126  * of size s_1 x ... x s_i-1 x s_i * s_i+1 * ... * s_j x s_j+1 x ... x s_d.
127  */
128 torch::Tensor squash(torch::Tensor x, int i, int j);
129 
130 /**
131  * Unsquash a dimension of a tensor into several dimensions.
132  *
133  * Replace the i-th dimension of x by sizes (this augments the number of
134  * dimensions of x).
135  * The product of the elements of sizes should be x.size(i) (sizes can also
136  * contain a -1).
137  * If x is of size s_1 x ... x s_d, the returned tensor will be a view of x of
138  * size s_1 x ... x s_i-1 x sizes x s_i+1 x ... s_d.
139  */
140 torch::Tensor unsquash(torch::Tensor x, int i, at::IntList sizes);
141 
142 /**
143  * Sum x across non-masked indices.
144  * This does not work if x contains NaNs or infinities at masked indices.
145  *
146  * - mask should be expandable to x's shape
147  * - returns a scalar which is a masked sum of x.
148  */
149 torch::Tensor maskedSum(torch::Tensor x, torch::Tensor mask);
150 
151 /**
152  * Average x over non-masked indices, returning 0 if all indices are masked.
153  * This does not work if x contains NaNs or infinities at masked indices.
154  *
155  * - mask should be expandable to x's shape
156  * - returns a scalar which is a masked average of x.
157  */
158 torch::Tensor maskedMean(torch::Tensor x, torch::Tensor mask);
159 
160 /**
161  * Computes the MSE loss between x and y.
162  *
163  * - x and y must have same shape, and mask be expandable to x's shape
164  * - if reduce is true, losses will be summed or averaged depending on
165  * sizeAverage (averaged over non-masked indices)
166  * - returns a scalar if reduce is true, otherwise a x-like tensor (with zeros
167  * at masked indices)
168  */
169 torch::Tensor mseLoss(
170  torch::Tensor x,
171  torch::Tensor y,
172  torch::Tensor mask,
173  bool sizeAverage = true,
174  bool reduce = true);
175 
176 /*
177  * Compute the Cross Entropy Loss between the distribution defined when applying
178  * the softmax layer to the input and the distribution defined by target.
179  * This can be described as the expectation of the negative log-likelihood of
180  * the predicted distribution when sampled according to the target
181  * distribution.
182  *
183  * - input is a *xCx* tensor, defining a probability over {1, .., C}
184  * - target is a *xCx* tensor, non-negative and summing to 1 on the C axis
185  * - weight is an optional C tensor rescaling each class (useful for unbalanced
186  * datasets)
187  * - mask is *x1x* (or any other broadcastable shape) boolean (only 0s and 1s)
188  * tensor where 0s means that the loss won't be computed for that distribution
189  * - if reduce is true, losses will be reduced (over non-masked indices)
190  * Possible arguments are Sum, None, or Mean
191  * - returns a scalar if reduce is true, otherwise a *x1x* tensor (with zeros
192  * at masked indices)
193  */
194 torch::Tensor crossEntropyLoss(
195  torch::Tensor input,
196  int dim,
197  torch::Tensor target,
198  torch::Tensor weight = {},
199  torch::Tensor mask = {},
200  Reduction::Reduction = Reduction::Reduction::Mean);
201 
202 /*
203  * Compute the Negative Log Likelihood loss between the input and target.
204  *
205  * - input is a *xCx* tensor, non-negative and summing to 1 on the C axis
206  * - target is a *xCx* tensor, non-negative and summing to 1 on the C axis
207  * - weight is an optional C tensor rescaling each class (useful for unbalanced
208  * datasets)
209  * - mask is *x1x* (or any other broadcastable shape) boolean (only 0s and 1s)
210  * tensor where 0s means that the loss won't be computed for that distribution
211  * - if reduce is true, losses will be reduced (over non-masked indices)
212  * Possible arguments are Sum, None, or Mean
213  * - returns a scalar if reduce is true, otherwise a *x1x* tensor (with zeros
214  * at masked indices)
215  */
216 torch::Tensor nllLoss(
217  torch::Tensor input,
218  int dim,
219  torch::Tensor target,
220  torch::Tensor weight = {},
221  torch::Tensor mask = {},
222  Reduction::Reduction = Reduction::Reduction::Mean);
223 
224 /**
225  * Rescale gradients so that the norm of all gradients (concatenated) is smaller
226  * than maxNorm.
227  */
228 void clipGradientNorms(std::vector<torch::Tensor> parameters, float maxNorm);
229 
230 /**
231  * Compute a masked softmax of a tensor in a numerically stable way by
232  * removing the max value before exponentiating. The passed in mask must be
233  * a variable of 0.0's and 1.0's (floats) of the same shape as the input.
234  *
235  * - input is a float tensor of the same shape as the mask
236  * - mask is a binary float tensor of the same shape as the input
237  * - dim is the dimension along which to apply the softmax
238  * - clampEpsilon is the minimum value to clamp the output to
239  *
240  * Returns the output after masking and softmaxing.
241  */
242 torch::Tensor maskedSoftmax(
243  torch::Tensor input,
244  torch::Tensor mask,
245  int dim,
246  float clampEpsilon = 0);
247 
248 /**
249  * Compute a masked max/argmax of a tensor.
250  * The passed in mask must be a variable of 0.0's and 1.0's (floats) of the
251  * same shape as the input.
252  *
253  * - input is a float tensor of the same shape as the mask
254  * - mask is a binary float tensor of the same shape as the input
255  * - dim is the dimension along which to apply the softmax
256  * - keepDim is whether to squeeze the resulting tensors or not
257  *
258  * Returns the output after masking and softmaxing.
259  * NOTE: behavior is undefined if mask is zero for some batch.
260  */
261 std::tuple<torch::Tensor, torch::Tensor> maskedMax(
262  torch::Tensor input,
263  torch::Tensor mask,
264  int dim,
265  bool keepDim = false);
266 
267 /**
268  * Compute a weighted masked softmax of a tensor in a numerically stable way by
269  * removing the max value before exponentiating. The passed in mask must be
270  * a variable of floats of the same shape as the input. It should include
271  * weighting and masking as desired (it need not be binary).
272  *
273  * - input is a float tensor of the same shape as the mask
274  * - mask is a float tensor of the same shape as the input
275  * - dim is the dimension along which to apply the softmax
276  * - clampEpsilon is the minimum value to clamp the output to
277  *
278  * Returns the output after weighting, masking, and softmaxing.
279  */
280 torch::Tensor weightedMaskedSoftmax(
281  torch::Tensor input,
282  torch::Tensor mask,
283  int dim,
284  float clampEpsilon = 0);
285 
286 /**
287  * Returns a byte tensor x such that selectIndex(x, y, axis) are only 1s.
288  * y is of shape ... x 1 x ...
289  * x is of shape ... x d x ...
290  */
291 torch::Tensor extendIndex(torch::Tensor y, int axis, int d);
292 
293 /**
294  * For 1D tensors, this is equivalent to:
295  * x[i] <- source[i] if mask[i] == 1
296  */
297 void maskedCopy_(torch::Tensor x, torch::Tensor mask, torch::Tensor source);
298 
299 /**
300  * Immutable masked copy (equivalent to x.clone().maskedCopy_()).
301  * NOTE: this does not work if x contains NaNs or infinities!
302  * mask should have the same type as x.
303  */
304 torch::Tensor
305 maskedCopy(torch::Tensor x, torch::Tensor mask, torch::Tensor source);
306 
307 /**
308  * Copies elements from source into x at positions determined by index.
309  * If accumulate is true, adds instead of copy (otherwise, indices should appear
310  * at most once in index).
311  * x has shape X1 x .. x XD
312  * index has shape N x D
313  * source has shape N
314  * For 2D tensors, this is equivalent to:
315  * x[index[i][0], index[i][1]] <- source[i]
316  */
317 void putNd_(
318  torch::Tensor x,
319  torch::Tensor index,
320  torch::Tensor source,
321  bool accumulate = false);
322 
323 /**
324  * Inverse operation of putNd_.
325  * x has shape X1 x .. x Xd
326  * index has shape N x d
327  * y (return value) has shape N
328  * For 2D tensors, this is equivalent to:
329  * y[i] = x[index[i][0], index[i][1]];
330  */
331 torch::Tensor takeNd(torch::Tensor x, torch::Tensor index);
332 
333 /**
334  * Like zeros.index_add_ but with the mean.
335  * source has shape X1 x ... Xdim-1 x N x Xdim+1 x ... Xd
336  * index has shape N, with values ranging from 0 to size - 1
337  * x (return value) has shape X1 x ... Xdim-1 x size x Xdim+1 x ... x Xd
338  * x[-a-,i,-b-] is the mean of {source[-a-,j,-b-] where index[j]=i} and
339  * zero if this set is empty.
340  */
341 torch::Tensor
342 indexMean(int size, int dim, torch::Tensor index, torch::Tensor source);
343 
344 /**
345  * Do multiple unsqueezes on first and last dimensions.
346  */
347 torch::Tensor unsqueezes(int before, torch::Tensor x, int after);
348 
349 /**
350  * Takes N 1D tensors xi of size Xi and returns a tensor y of size
351  * X1 x ... x XN x N such that y[a1]...[aN][i] = xi[ai].
352  */
353 torch::Tensor meshGrid(ag::tensor_list tensors);
354 
355 /**
356  * This is a convenience function to apply a tensor transformation to a complex
357  * type. For example, you would like to write something like t = t.view(-1), but
358  * t is a tensor_list (and you'd like the operation to be applied to each
359  * element of the list). You can write instead t = applyTransform(t,
360  * [](torch::Tensor t){return t.view(-1);});
361  */
362 using TensorTransform = std::function<torch::Tensor(torch::Tensor)>;
363 ag::Variant applyTransform(ag::Variant input, const TensorTransform& fun);
364 
365 /**
366  * Utility to get the device of a variant. If the variants contains several
367  * tensors, we assume they have the same device
368  */
369 at::Device getVariantDevice(ag::Variant const& x);
370 
371 } // namespace common
torch::Tensor extendIndex(torch::Tensor y, int axis, int d)
Returns a byte tensor x such that selectIndex(x, y, axis) are only 1s.
Definition: operations.cpp:700
void putNd_(torch::Tensor x, torch::Tensor index, torch::Tensor source, bool accumulate)
Copies elements from source into x at positions determined by index.
Definition: operations.cpp:743
torch::Tensor meshGrid(ag::tensor_list tensors)
Takes N 1D tensors xi of size Xi and returns a tensor y of size X1 x ...
Definition: operations.cpp:795
torch::Tensor unsquash(torch::Tensor x, int i, at::IntList sizes)
Unsquash a dimension of a tensor into several dimensions.
Definition: operations.cpp:484
torch::Tensor pad2d(torch::Tensor input, at::IntList pad)
Zero-padding (only supports 3d input)
Definition: operations.cpp:344
torch::Tensor maskedSoftmax(torch::Tensor input, torch::Tensor mask, int dim, float clampEpsilon)
Compute a masked softmax of a tensor in a numerically stable way by removing the max value before exp...
Definition: operations.cpp:615
torch::Tensor maskedSum(torch::Tensor x, torch::Tensor mask)
Sum x across non-masked indices.
Definition: operations.cpp:495
torch::Tensor indexMean(int size, int dim, torch::Tensor index, torch::Tensor source)
Like zeros.index_add_ but with the mean.
Definition: operations.cpp:758
torch::Tensor mseLoss(torch::Tensor x, torch::Tensor y, torch::Tensor mask, bool sizeAverage, bool reduce)
Computes the MSE loss between x and y.
Definition: operations.cpp:521
torch::Tensor padNd(torch::Tensor input, at::IntList pad)
Zero-padding (for any number of dimensions) For every dimensions of the input, pad contains 2 element...
Definition: operations.cpp:371
UpsampleMode
Mimics pytorch&#39;s upsample function.
Definition: operations.h:110
ag::Variant makeBatchVariant(const std::vector< ag::Variant > &queries, double pad)
This function works similarly as makeBatch but handles more input types.
Definition: operations.cpp:221
torch::Tensor scatterSum2d(torch::Tensor positions, torch::Tensor data, at::IntList sizes)
Scatter data into dest at given positions.
Definition: operations.cpp:164
torch::Tensor unsqueezes(int before, torch::Tensor x, int after)
Do multiple unsqueezes on first and last dimensions.
Definition: operations.cpp:781
torch::Tensor crossEntropyLoss(torch::Tensor input, int dim, torch::Tensor target, torch::Tensor weight, torch::Tensor mask, Reduction::Reduction reduction)
Definition: operations.cpp:537
torch::Tensor squash(torch::Tensor x, int i, int j)
Squash contiguous dimensions of a tensor into a single dimension.
Definition: operations.cpp:470
torch::Tensor maskedCopy(torch::Tensor x, torch::Tensor mask, torch::Tensor source)
Immutable masked copy (equivalent to x.clone().maskedCopy_()).
Definition: operations.cpp:715
torch::Tensor flip(torch::Tensor x, int dim)
Flips a tensor along a given dimension.
Definition: operations.cpp:395
torch::Tensor upsample(torch::Tensor input, UpsampleMode mode, at::IntList size)
Definition: operations.cpp:441
torch::Tensor maskedMean(torch::Tensor x, torch::Tensor mask)
Average x over non-masked indices, returning 0 if all indices are masked.
Definition: operations.cpp:500
torch::Tensor takeNd(torch::Tensor x, torch::Tensor index)
Inverse operation of putNd_.
Definition: operations.cpp:752
void maskedCopy_(torch::Tensor x, torch::Tensor mask, torch::Tensor source)
For 1D tensors, this is equivalent to: x[i] <- source[i] if mask[i] == 1.
Definition: operations.cpp:709
at::Device getVariantDevice(ag::Variant const &x)
Utility to get the device of a variant.
Definition: operations.cpp:837
General utilities.
Definition: assert.cpp:7
torch::Tensor weightedMaskedSoftmax(torch::Tensor input, torch::Tensor mask, int dim, float clampEpsilon)
Compute a weighted masked softmax of a tensor in a numerically stable way by removing the max value b...
Definition: operations.cpp:662
torch::Tensor nllLoss(torch::Tensor input, int dim, torch::Tensor target, torch::Tensor weight, torch::Tensor mask, Reduction::Reduction reduction)
Definition: operations.cpp:570
std::vector< ag::Variant > unBatchVariant(ag::Variant const &batch, int stride, bool maskOut, double maskValue)
This function is the opposite of makeBatchVariant.
Definition: operations.cpp:274
std::tuple< torch::Tensor, torch::Tensor > maskedMax(torch::Tensor input, torch::Tensor mask, int dim, bool keepDim)
Compute a masked max/argmax of a tensor.
Definition: operations.cpp:644
ag::Variant applyTransform(ag::Variant input, const TensorTransform &fun)
Definition: operations.cpp:814
torch::Tensor repeat2d(torch::Tensor data, at::IntList sizes)
Repeat a 1D tensor so that you end up with a (#channels, sizes[0], size[1]) tensor.
Definition: operations.cpp:15
void clipGradientNorms(std::vector< torch::Tensor > parameters, float maxNorm)
Rescale gradients so that the norm of all gradients (concatenated) is smaller than maxNorm...
Definition: operations.cpp:598
torch::Tensor tensorFromNpyArray(cnpy::NpyArray array, torch::TensorOptions op)
Definition: operations.cpp:455
std::function< torch::Tensor(torch::Tensor)> TensorTransform
This is a convenience function to apply a tensor transformation to a complex type.
Definition: operations.h:362
void zerosToOnes_(torch::Tensor x)
Replace (in-place) all zeroes of x by ones.
Definition: operations.cpp:450
torch::Tensor makeBatch(ag::tensor_list const &lst, double pad)
Equivalent to a stack along dim 0 of the input, but with the values padded correct so the size is rec...
Definition: operations.cpp:186