Beatmup
conv2d.cpp
Go to the documentation of this file.
1 /*
2  Beatmup image and signal processing library
3  Copyright (C) 2020, lnstadrum
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #include "../exception.h"
20 #include "conv2d.h"
21 #include "deserialized_model.h"
22 #include <algorithm>
23 
24 using namespace Beatmup;
25 using namespace NNets;
26 
27 
28 // hardcoded coefficient and coordinate formats
29 #define COEF_FMT "%0.6f"
30 #define COORD_FMT "%0.10f"
31 
32 const char *Conv2D::FILTERS_CHUNK_SUFFIX = "/w";
33 const char *Conv2D::BIAS_CHUNK_SUFFIX = "/b";
34 
35 static const char
36  *UNIFORM_INPUT = "features",
37  *UNIFORM_RESIDUAL_INPUT = "residual",
39 
40 
42  const std::string& name,
43  const int kernelSize,
44  const int numInputChannels,
45  const int numOutputChannels,
46  const int stride,
47  const Size::Padding padding,
48  const bool useBias,
49  const int numGroups,
50  const ActivationFunction activation
51 ):
52  AbstractOperation(name), SpatialFilteringMixin(kernelSize, kernelSize), ActivationFunctionMixin(activation),
53  kernelSize(kernelSize, kernelSize, numInputChannels / numGroups), numOutputChannels(numOutputChannels), numGroups(numGroups),
54  stride(stride), padding(padding),
55  useInputImage(numInputChannels == 3),
56  isDepthwise(numInputChannels == numGroups && numOutputChannels == numGroups),
57  useBias(useBias),
58  ready(false),
59  inputImage(nullptr)
60 {
61  if (useInputImage) {
62  InvalidArgument::check(numGroups == 1, "Cannot apply a group convolution to the input image");
63  InvalidArgument::check(padding == Size::Padding::VALID, "Only valid zero padding setting is supported when an image is used as input");
64  }
65  else
66  Storage::checkChannelNumber(numInputChannels);
68  OutOfRange::checkMin(stride, 1, "Positive convolution stride expected, %d got");
69  OutOfRange::checkMin(kernelSize, 1, "Positive convolution kernel size expected, %d got");
70  if (!useInputImage && !isDepthwise)
71  InvalidArgument::check(this->kernelSize.getDepth() % 4 == 0, "A multiple of 4 is expected as number of input channels in the convolution kernel.");
72  if (!isDepthwise && numGroups > 1)
73  OutOfRange::checkMin(this->kernelSize.getDepth(), 4, "Kernels having less than 4 input channels are not supported in grouped convolutions. Got %d channels.");
74 
75  // check groups alignment: each group must contain 4k inputs and outputs channels
76  if (!isDepthwise) {
77  if (!useInputImage)
78  InvalidArgument::check(numInputChannels % (4 * numGroups) == 0,
79  "Cannot split " +std::to_string(numInputChannels)+ " input channels on " +std::to_string(numGroups)+ " groups of 4*k channels each.");
81  "Cannot split " +std::to_string(numOutputChannels)+ " output channels on " +std::to_string(numGroups)+ " groups of 4*k channels each.");
82  }
83  programs.reserve(numOutputChannels / 4);
84  groupViews.reserve(numGroups);
85 }
86 
87 
89  RuntimeError::check((useInputImage && inputImage) || (!useInputImage && input), "Input is not provided to Conv2D operation " + getName());
90  RuntimeError::check(output, "Output is not provided to Conv2D operation " + getName());
91 
92  // get coefficients
93  const Chunk kernel(data, getName() + FILTERS_CHUNK_SUFFIX);
94  if (kernel.size() != kernelSize.volume() * numOutputChannels * sizeof(float))
95  throw InconsistentModelData(this, "Weights size mismatch");
96 
97  const Chunk* biases = nullptr;
98  if (useBias) {
99  biases = new Chunk(data, getName() + BIAS_CHUNK_SUFFIX);
100  if (biases->size() != numOutputChannels * sizeof(float))
101  throw InconsistentModelData(this, "Biases size mismatch");
102  }
103 
104  // free old stuff
105  for (auto program : programs)
106  bank.release(gpu, program);
107  programs.clear();
108  coeffs.clear();
109 
110  // decide whether use uniforms or not
111  static const int MAX_ALLOWED_NUMBER_OF_PROGRAMS = 0; // discovered empirically that uniforms are faster on Pi, Nano and desktop
112  static const int NUM_RESERVED_UNFORM_VECTORS = 8 + std::max(kernelSize[0], kernelSize[1]) / 2; // number of uniform vectors to keep unused
113  const int numberOfPrograms = numOutputChannels / 4;
114  const int uniformsLength = kernelSize.volume() + 1; // number of uniform vectors per program
115  const bool useUniforms = !useInputImage && // if an image is given on input, the uniforms use is not unsupported
116  numberOfPrograms > MAX_ALLOWED_NUMBER_OF_PROGRAMS && // if not too many programs, rather go with hardcoded model data
117  uniformsLength + NUM_RESERVED_UNFORM_VECTORS < gpu.getLimit(GraphicPipeline::Limit::FRAGMENT_UNIFORM_VECTORS);
118  if (useUniforms)
119  coeffs.reserve(numberOfPrograms * uniformsLength);
120 
121  const bool useUniformShift = useUniforms && kernelSize.getDepth() <= 4;
122  // use uniform shift if only one input texture is sampled, i.e., depthwise or grouped with groups of 4
123 
124  // init new programs
125  for (int outputChannel = 0; outputChannel < numOutputChannels; outputChannel += 4) {
126  const size_t coefStart = coeffs.size(); // index of the first coefficient in coeffs for the current program
127 
128  // compute indices delimiting the current group
129  const int groupIdx = outputChannel * numGroups / numOutputChannels;
130  const int firstInputChannel = groupIdx * kernelSize.getDepth();
131  const int lastInputChannel = firstInputChannel + (isDepthwise ? 4 : kernelSize.getDepth());
132 
133  // set up GLSL code
135 
136 #ifdef BEATMUP_DEBUG
137  if (!groupViews.empty())
138  DebugAssertion::check(groupViews.back().getNumberOfTextures() <= gpu.getLimit(GraphicPipeline::Limit::TEXTURE_IMAGE_UNITS),
139  "Cannot compute Conv2D operation " + getName() + " on the current GPU: too many textures per group");
140 #endif
141 
142  code.printf("uniform sampler2D %s[%d];", UNIFORM_INPUT, useInputImage || isDepthwise ? 1 : groupViews[groupIdx].getNumberOfTextures());
143  if (residualInput)
144  code.printf("uniform sampler2D %s[1];", UNIFORM_RESIDUAL_INPUT);
145  if (useUniforms)
146  code.printf("uniform highp vec4 %s[%d];", UNIFORM_COEFFICIENT, uniformsLength);
147 
149  code.line("void main() {");
150  code.line("highp vec4 sum;");
151 
152  // declare neighborhood: vec4 for storage, vec3 for image
153  SpatialFilteringMixin::declare(code, useInputImage ? "highp vec3" : "highp vec4", !useInputImage);
154 
155  // loop through input channels
156  for (int inputChannel = firstInputChannel; inputChannel < lastInputChannel; inputChannel += 4) {
157  const int channelInGroup = inputChannel - firstInputChannel;
158 
159  const Point shift = (useUniformShift || !input) ? Point::ZERO :
160  (Point(input.getChannelOrigin(inputChannel) - input.getChannelOrigin(firstInputChannel)) / input.getTextureSize());
161  // texture coordinates sample the first channel in the current group, so shift is relative to its origin
162 
163  // compute depthwise convolution: inline sampling used
164  if (isDepthwise) {
165  code("sum = ");
166  for (int y = 0; y < kernelSize[1]; ++y)
167  for (int x = 0; x < kernelSize[0]; ++x) {
168  if (x > 0 || y > 0) code(" + ");
169  const float* w = kernel.ptr<float>(getIdx(outputChannel, 0, x, y));
170  if (useUniforms) {
171  code.printf("%s[%d] * ", UNIFORM_COEFFICIENT, (int)(coeffs.size() - coefStart));
172  coeffs.emplace_back(std::array<float, 4>{ w[0], w[1], w[2], w[3] });
173  }
174  else
175  code.printf("vec4(" COEF_FMT "," COEF_FMT "," COEF_FMT "," COEF_FMT ") * ", w[0], w[1], w[2], w[3]);
177  }
178  code.line(";");
179  }
180 
181  // compute convolution with 3-channel input image using dot product; no inline sampling
182  else if (useInputImage) {
184  const int offset[4] = { 0, 1 * numOutputChannels, 2 * numOutputChannels, 3 * numOutputChannels };
185  for (int y = 0; y < kernelSize[1]; ++y)
186  for (int x = 0; x < kernelSize[0]; ++x) {
187  code((channelInGroup == 0 && x == 0 && y == 0) ? "sum = vec4(" : "sum += vec4(");
188  for (int c = 0; c < 4; ++c) {
189  if (c > 0) code(",");
190  const float* w = kernel.ptr<float>(getIdx(c + outputChannel, channelInGroup, x, y));
191  code.printf("dot(vec3(" COEF_FMT "," COEF_FMT "," COEF_FMT "), %s%d%d)",
192  w[0], w[offset[1]], w[offset[2]], SpatialFilteringMixin::SAMPLE_ID_PREFIX, x, y);
193  }
194  code.line(");");
195  }
196  }
197 
198  // compute 4m to 4n channels using vector by 4x4 matrix multiply: inline sampling used
199  else {
200  code.printf("sum %s", channelInGroup == 0 ? "=" : "+=");
201  const int offset[4] = { 0, 1 * numOutputChannels, 2 * numOutputChannels, 3 * numOutputChannels };
202  for (int y = 0; y < kernelSize[1]; ++y)
203  for (int x = 0; x < kernelSize[0]; ++x) {
204  if (x > 0 || y > 0) code(" + ");
205  SpatialFilteringMixin::sampleInline(code, UNIFORM_INPUT, groupViews[groupIdx].getChannelTextureNumber(channelInGroup), IntPoint(x, y), shift);
206  code.printf(" * mat4(");
207  for (int c = 0; c < 4; ++c) {
208  if (c > 0) code(",");
209  const float* w = kernel.ptr<float>(getIdx(c + outputChannel, channelInGroup, x, y));
210  if (useUniforms) {
211  code.printf("%s[%d]", UNIFORM_COEFFICIENT, (int)(coeffs.size() - coefStart));
212  coeffs.emplace_back(std::array<float, 4>{ w[0], w[offset[1]], w[offset[2]], w[offset[3]] });
213  }
214  else
215  code.printf(COEF_FMT "," COEF_FMT "," COEF_FMT "," COEF_FMT, w[0], w[offset[1]], w[offset[2]], w[offset[3]]);
216  }
217  code.printf(")");
218  }
219  code.line(";");
220  }
221  }
222 
223  // add residual input
224  if (residualInput) {
225  // get linear mapping of channel pixel positions to sample the residual input properly
226  const IntPoint mainOrigin = input.getChannelOrigin(useUniformShift ? outputChannel : firstInputChannel);
227  const IntPoint residualOrigin = residualInput.getChannelOrigin(outputChannel);
228  const Rectangle mainArea(mainOrigin, mainOrigin + input.getSpatialSize());
229  const Rectangle resArea(residualOrigin, residualOrigin + residualInput.getSpatialSize());
230  const Point mainTexSize(input.getTextureWidth(), input.getTextureHeight());
232  Point scale, offset;
233  (mainArea / mainTexSize).getMapping(resArea / resTexSize, scale, offset);
234  // sample, add to sum
235  code.printf("sum += texture2D(%s[0], %s * vec2(" COORD_FMT "," COORD_FMT ") + vec2(" COORD_FMT "," COORD_FMT "));\n",
236  UNIFORM_RESIDUAL_INPUT, getInputSamplingPos().c_str(), scale.x, scale.y, offset.x, offset.y);
237  }
238 
239  // add bias if enabled
240  if (useBias) {
241  const float* b = biases->ptr<float>(outputChannel);
242  if (useUniforms) {
243  code.printf("sum += %s[%d];", UNIFORM_COEFFICIENT, (int)(coeffs.size() - coefStart));
244  coeffs.emplace_back(std::array<float, 4>{ b[0], b[1], b[2], b[3] });
245  }
246  else
247  code.printf("sum += vec4(" COEF_FMT "," COEF_FMT "," COEF_FMT "," COEF_FMT ");\n", b[0], b[1], b[2], b[3]);
248  }
249 
250  // apply activation
251  ActivationFunctionMixin::apply(code, "sum");
252  code("}");
253 
254  // init program
255  programs.push_back(bank(gpu, code));
256  }
257 
258  // setup execution order: same programs writing to the same texture are next to each other
259  execOrder.resize(programs.size());
260  for (size_t i = 0; i < execOrder.size(); ++i)
261  execOrder[i] = (int)i;
262  std::sort(execOrder.begin(), execOrder.end(), [&](int i, int j) {
263  return programs[i] < programs[j] || (programs[i] == programs[j] &&
264  output.getChannelTextureNumber(4 * i) < output.getChannelTextureNumber(4 * j));
265  });
266 
267  delete biases;
268  ready = true;
269 }
270 
271 
273  if (!ready)
274  throw NotReady(this);
275 
276  RuntimeError::check((useInputImage && inputImage) || (!useInputImage && input), "Input is not provided to a Conv2D operation.");
277  RuntimeError::check(output, "Output is not provided to Conv2D operation " + getName());
279  throw RuntimeError("Residual input size does not match the output size");
280 
281 #ifdef BEATMUP_DEBUG
282  RuntimeError::check(output.getSize() == getOutputSize(), "Operation output storage size mismatch");
283 #endif
284 
285  // static program setup
289  );
290 
291  // compute tex coords
292  const IntPoint strides(stride, stride);
293  const IntPoint inputTextureSize = useInputImage ?
297  const IntRectangle samplingArea = useInputImage ?
299  getSamplingArea(input, 0, strides, padding);
300 
301  gpu.setTextureCoordinates(samplingArea, inputTextureSize, output.getSpatialSize());
302  }
303 
304  const int coeffsPerProgram = (int)(coeffs.size() / programs.size());
305  const bool uniformsAreUsed = coeffsPerProgram > 0;
306 
307  // for each output channel
308  Storage::Binder bind(gpu);
309  for (size_t i = 0; i < execOrder.size(); ++i) {
310  const int programNum = execOrder[i];
311  const int outputChannel = 4 * programNum;
312 
313  GL::RenderingProgram& program = *programs[programNum];
314 
315  if (isDepthwise) {
316  const int channel = outputChannel;
317 
318  // bind output to a program
319  const bool fast = bind.begin(program, output, outputChannel);
320 
321  if (!fast) {
322  // bind inputs
323  bind(input, UNIFORM_INPUT, outputChannel);
324  if (residualInput)
325  bind(residualInput, UNIFORM_RESIDUAL_INPUT, outputChannel);
327  }
328 
329  // setup the remaining stuff
330  if (isUniformShiftUsed())
332  else
333  gpu.setTextureCoordinates(getSamplingArea(input, channel, strides, padding), inputTextureSize, output.getSpatialSize());
334  }
335 
336  else {
337  // bind output to a program
338  const int groupIdx = outputChannel * numGroups / numOutputChannels;
339  const bool isSameGroup = i > 0 && 4 * execOrder[i - 1] * numGroups / numOutputChannels == groupIdx;
340  const bool fast = bind.begin(program, output, outputChannel) && isSameGroup;
341 
342  const int firstInputChannel = groupIdx * kernelSize.getDepth();
343  const int lastInputChannel = firstInputChannel + (isDepthwise ? 4 : kernelSize.getDepth());
344 
345  if (!fast) {
346  // bind inputs
347  if (useInputImage)
349  else {
350  const int firstInputChannel = groupIdx * kernelSize.getDepth();
351  bind(groupViews[groupIdx], UNIFORM_INPUT);
352 
353  if (residualInput)
354  bind(residualInput, UNIFORM_RESIDUAL_INPUT, outputChannel);
355 
356  if (isUniformShiftUsed())
358  else
359  gpu.setTextureCoordinates(getSamplingArea(input, firstInputChannel, strides, padding), inputTextureSize, output.getSpatialSize());
360  }
361 
362  // setup the remaining stuff
364  }
365  }
366 
367  // update uniforms if needed
368  if (uniformsAreUsed)
369  program.setVec4Array(UNIFORM_COEFFICIENT, coeffs[coeffsPerProgram * programNum].data(), coeffsPerProgram);
370 
371  // g-g-go
372  program.blend();
373  }
374 }
375 
376 
378  return (index == 0 && padding == Size::Padding::SAME) ? std::max(kernelSize[0], kernelSize[1]) / 2 : 0;
379 }
380 
381 
382 void Conv2D::getSampledChannels(int index, int& min, int& max) const {
383  if (index == 0) {
384  // main input: sampling an entire group at once
385  min = useInputImage ? 3 : 4;
387  }
388  else if (index == 1) {
389  // residual input: sampling 1 texture at once
390  min = max = 4;
391  }
392  else
393  min = max = 0;
394 }
395 
396 
397 Size Conv2D::getOutputSize(int outputIndex) const {
398  if (outputIndex == 0) {
400  "Input is not provided to Conv2D operation " + getName());
401  const Size inputSize = useInputImage ? Size(inputImage->getWidth(), inputImage->getHeight(), 3) : input.getSize();
402  const Size result = inputSize.transform(
403  kernelSize,
404  Size(stride, stride, 0),
405  padding,
407  );
408  RuntimeError::check(result.volume() > 0, "Invalid (zero or negative) output size got in " + getName());
409  return result;
410  }
411  return Size::EMPTY;
412 }
413 
414 
415 std::map<std::string, std::string> Conv2D::serialize() const {
416  return {
417  { "_name", getName() },
418  { "_type", "conv2d" },
419  { "kernel_size", std::to_string(kernelSize[0]) },
420  { "input_channels", std::to_string(kernelSize.getDepth() * numGroups) },
421  { "output_channels", std::to_string(numOutputChannels) },
422  { "stride", std::to_string(stride) },
423  { "padding", std::to_string(padding) },
424  { "use_bias", useBias ? "true" : "false" },
425  { "groups", std::to_string(numGroups) },
426  { "activation", std::to_string(activationFunc) }
427  };
428 }
429 
430 
432  static class Conv2DDeserializer : public AbstractOperation::Deserializer {
433  public:
434  Conv2DDeserializer() : Deserializer("conv2d") {}
435  AbstractOperation* deserialize(Context& context, const Listing::Block& block) {
436  /** \page NNetsOpsSerialization Operations serialization
437  This page describes the operation options.
438 
439  Every operation necessary has `_name` and `_type` parameters. The rest of operation parameters depends on its type.
440 
441  \section Conv2D
442  \code{yaml}
443  - _name: arbitrary operation name
444  _type: conv2d # fixed string
445  kernel_size: 3 # size of convolution kernel
446  input_channels: 3 # number of input feature channels
447  output_channels: 16 # number of output feature channels
448  stride: 2 # stride (defaults to 1)
449  padding: valid # paddling, string, "valid" or "same" (defaults to "valid")
450  use_bias: true # bias addition, "true" or "false" (defaults to "true")
451  groups: 1 # number of groups for grouped convolution (defaults to 1)
452  activation: default # activation function
453  \endcode
454 
455  For activation functions see \ref NNetsActivationFunctionsSerialization.
456  */
457  return new Conv2D(
458  block["_name"],
459  block.get<int>("kernel_size"),
460  block.get<int>("input_channels"),
461  block.get<int>("output_channels"),
462  block.get<int>("stride", 1),
463  paddingFromString(block.get<std::string>("padding", std::to_string(Size::Padding::VALID))),
464  block.get<bool>("use_bias", true),
465  block.get<int>("groups", 1),
467  );
468  }
469  } john;
470 
471  return true;
472 }
473 
475  inputImage = nullptr;
476  input = Storage::View();
478  output = Storage::View();
479  groupViews.clear();
480 }
481 
482 
483 void Conv2D::setInput(Storage::View&& view, int inputIndex) {
484  OutOfRange::check(inputIndex, 0, 1, "Input index out of range: %d");
485  RuntimeError::check(view.getStorage().getPadding() >= getInputPadding(inputIndex), "The storage has insufficient padding");
486  if (inputIndex == 0) {
487  if (view) {
488  RuntimeError::check(!useInputImage, "An image is expected on input, but a tensor is passed");
489  RuntimeError::check(view.getDepth() == kernelSize.getDepth() * numGroups, "Tensor depth does not match kernel depth");
490  // create group views
491  groupViews.clear();
492  if (!isDepthwise)
493  for (int groupIdx = 0; groupIdx < numGroups; ++groupIdx) {
494  const int firstInputChannel = groupIdx * kernelSize.getDepth();
495  const int lastInputChannel = firstInputChannel + (isDepthwise ? 4 : kernelSize.getDepth());
496  groupViews.emplace_back(std::move(view), firstInputChannel, lastInputChannel - firstInputChannel);
497  }
498  }
499  this->input = std::move(view);
500  this->inputImage = nullptr;
501  }
502  else {
503  if (view) {
504  RuntimeError::check(!useInputImage, "Cannot use the residual input when an image is used as the main input");
505  RuntimeError::check(view.getDepth() == numOutputChannels, "Residual input tensor depth does not match output depth");
506  }
507  this->residualInput = std::move(view);
508  }
509 }
510 
511 
512 void Conv2D::setOutput(Storage::View&& storage, int outputIndex) {
513  OutOfRange::check(outputIndex, 0, 0, "Output index out of range: %d");
514  this->output = std::move(storage);
515 }
516 
517 
518 void Conv2D::setInput(GL::TextureHandler& image, int inputIndex) {
519  if (inputIndex == 0) {
520  RuntimeError::check(useInputImage, "Cannot use image as Conv2D input");
521  this->inputImage = &image;
522  }
523  else
524  AbstractOperation::setInput(image, inputIndex);
525 }
526 
527 
528 unsigned long Conv2D::countMultiplyAdds() const {
529  return getOutputSize(0).volume() * kernelSize.volume();
530 }
531 
532 
533 unsigned long Conv2D::countTexelFetches() const {
534  unsigned long count = getOutputSize(0).volume() / 4 * kernelSize.volume() / (useInputImage ? 3 : 4);
535  if (residualInput)
536  count += getOutputSize(0).volume() / 4;
537  return count;
538 }
A key-value pair set storing pieces of arbitrary data (chunks) under string keys.
Definition: chunkfile.h:36
Simply a piece of binary data of a specific size.
Definition: chunkfile.h:210
datatype * ptr(size_t offset=0)
Definition: chunkfile.h:264
size_t size() const
Definition: chunkfile.h:257
Basic class: task and memory management, any kind of static data.
Definition: context.h:59
static const CustomPoint ZERO
Definition: geometry.h:122
void setVec4Array(const std::string &name, const float *xyzw, const int length)
Definition: program.cpp:466
Stores linked GLSL programs and their associated fragment shader codes.
Definition: program_bank.h:31
void release(GraphicPipeline &gpu, GL::RenderingProgram *program)
Marks a program as unused any more.
GLSL program to render images Makes use of default vertex attributes to pass the texture coordinates ...
Definition: program.h:240
void blend(bool onScreen)
Definition: program.cpp:548
static const char * DECLARE_TEXTURE_COORDINATES_IN_FRAG
Declaring texture coordinates in fragment shader.
virtual const int getHeight() const =0
Height of the texture in pixels.
virtual const int getWidth() const =0
Width of the texture in pixels.
Internal low-level GPU control API.
Definition: pipeline.h:33
int getLimit(Limit limit) const
Definition: pipeline.cpp:936
void setTextureCoordinates(const Rectangle &coords)
Specifies texture coordinates for the next rendering pass.
Definition: pipeline.cpp:966
@ TEXTURE_IMAGE_UNITS
maximum number of texture units per fragment shader
@ FRAGMENT_UNIFORM_VECTORS
maximum number of 4-dimensional uniform vectors per fragment shader
static void check(const bool condition, const std::string &message)
Definition: exception.h:75
Set of key-value pairs.
Definition: listing.h:46
T get(const std::string &key) const
Returns a value by key casted to a given type.
Enables construction of an operation from its serialized representation.
Definition: operation.h:248
Abstract neural net operation (layer).
Definition: operation.h:46
virtual void setInput(Storage::View &&storage, int index=0)
Definition: operation.cpp:52
std::string getName() const
Definition: operation.h:242
A mixin implementing activation functions in GLSL.
Definition: operation.h:414
const ActivationFunction activationFunc
Definition: operation.h:417
void apply(StringBuilder &code, const char *inputVariable)
Renders a GLSL code applying activation function to a specific variable and writing the result to gl_...
Definition: operation.cpp:282
const bool isDepthwise
if true, the convolution is depthwise, otherwise regular
Definition: conv2d.h:72
std::vector< std::array< float, 4 > > coeffs
model data to pass to uniform variables, if used
Definition: conv2d.h:80
static const char * BIAS_CHUNK_SUFFIX
suffix added to the op name to get the bias chunk id in the model data
Definition: conv2d.h:98
void setOutput(Storage::View &&storage, int outputIndex=0)
Definition: conv2d.cpp:512
const int numGroups
number of convolution groups
Definition: conv2d.h:68
std::vector< GL::RenderingProgram * > programs
pointers to GLSL program, one per quad of output channels
Definition: conv2d.h:79
const bool useBias
if true, the bias addition is enabled
Definition: conv2d.h:73
void execute(TaskThread &thread, GraphicPipeline &gpu)
Executes the operation.
Definition: conv2d.cpp:272
const Size::Padding padding
Definition: conv2d.h:70
const int stride
Definition: conv2d.h:69
Size getOutputSize(int outputIndex=0) const
Returns full size of a specific operation output.
Definition: conv2d.cpp:397
GL::TextureHandler * inputImage
input texture handler to be used instead input view
Definition: conv2d.h:78
static bool initDeserializer()
Sets up deserialization of the operation.
void prepare(GraphicPipeline &gpu, ChunkCollection &data, GL::ProgramBank &bank)
Compiles GLSL shaders.
Definition: conv2d.cpp:88
std::map< std::string, std::string > serialize() const
Returns a serialized representation of th operation;.
Definition: conv2d.cpp:415
static const char * FILTERS_CHUNK_SUFFIX
suffix added to the op name to get the filters chunk id in the model data
Definition: conv2d.h:97
const bool useInputImage
if true, input is the texture handler, not the view
Definition: conv2d.h:71
Storage::View output
Definition: conv2d.h:76
std::vector< int > execOrder
execution order of GLSL programs
Definition: conv2d.h:81
void getSampledChannels(int index, int &min, int &max) const
Retrieves range of input features channels sampled at the same time for a specific input.
Definition: conv2d.cpp:382
int getInputPadding(int index=0) const
Retrieves minimum required size of zero padding for a given input.
Definition: conv2d.cpp:377
const int numOutputChannels
number of output feature maps
Definition: conv2d.h:67
std::vector< Storage::View > groupViews
views per convolution group
Definition: conv2d.h:82
Storage::View input
Definition: conv2d.h:76
void setInput(Storage::View &&storage, int inputIndex=0)
Definition: conv2d.cpp:483
unsigned long countMultiplyAdds() const
Counts (approximate) number of multiply-adds used by this operation.
Definition: conv2d.cpp:528
const Size kernelSize
Definition: conv2d.h:66
Conv2D(const std::string &name, const int kernelSize, const int numInputChannels, const int numOutputChannels, const int stride=1, const Size::Padding padding=Size::Padding::VALID, const bool useBias=true, const int numGroups=1, const ActivationFunction activation=ActivationFunction::DEFAULT)
Instantiates a 2D convolution operation.
Definition: conv2d.cpp:41
void disconnect()
Assigns empty inputs and outputs.
Definition: conv2d.cpp:474
Storage::View residualInput
optional tensor to be added to the output before activation
Definition: conv2d.h:77
unsigned long countTexelFetches() const
Counts (approximate) number of texels fetches.
Definition: conv2d.cpp:533
int getIdx(int output, int input, int x, int y) const
Maps an (inputChannel, outputChannel, x, y) position to a linear coefficient index in the chunkfile.
Definition: conv2d.h:87
Operation 3D input/output size.
Definition: storage.h:37
Padding
Zero padding specification.
Definition: storage.h:45
@ SAME
operation output size matches its input size for unit strides
int volume() const
Definition: storage.h:79
static const Size EMPTY
Definition: storage.h:50
Size transform(Size kernel, Size stride, Padding padding, int depth=0) const
Computes operation output size in function of operation kernel, padding, stride and depth,...
Definition: storage.cpp:58
int getDepth() const
Definition: storage.h:77
Generates GLSL fragment shader code sampling a local neighborhood around the current texture coordina...
Definition: operation.h:272
void sample(StringBuilder &code, const char *inputName, const int inputIndex, const Point &shift, const bool isFirstSample=true, const char *suffix="")
Samples a neighborhood of a given texture.
Definition: operation.cpp:150
void sampleInline(StringBuilder &code, const char *inputName, const int inputIndex, const IntPoint &position, const Point &shift, const char *suffix="")
Definition: operation.cpp:174
void setup(const int width, const int height)
Prepares the spatial filtering operation execution.
Definition: operation.cpp:197
static const char * SAMPLE_ID_PREFIX
prefix of variables declaring a neighbor sample
Definition: operation.h:285
Point shift
current static shift of the sampling position
Definition: operation.h:275
void writeHeader(StringBuilder &code, bool useUniformShift)
Writes out the very GLSL fragment shader header required for spatial neighborhood sampling.
Definition: operation.cpp:110
std::string getInputSamplingPos() const
Retrieves input sampling point position for the current fragment.
Definition: operation.cpp:276
void declare(StringBuilder &code, const char *datatype, bool inlineSampling=false)
Declares GLSL fragment shader main(..) code part required for spatial neighborhood sampling.
Definition: operation.cpp:119
void setupProgram(GL::Program &program)
Prepares a given program for spatial filtering.
Definition: operation.cpp:217
IntRectangle getSamplingArea(const IntPoint &size, const IntPoint &stride, const Size::Padding padding) const
Implements common padding policies by computing a rectangular area of positions the sampling kernel t...
Definition: operation.cpp:223
bool useUniformShift
if true, the sampling position can be shifted dynamically at every run
Definition: operation.h:277
void setUniformShift(GL::Program &program, const IntPoint &shift, const IntPoint &inputSize)
Applies an offset to the sampling position at runtime.
Definition: operation.cpp:209
Binding of different input/output storages/texture handlers to a GLSL program.
Definition: storage.h:419
Maps a 3D tensor onto a storage.
Definition: storage.h:308
int getTextureHeight() const
Returns height in pixels of all the textures.
Definition: storage.h:375
IntPoint getChannelOrigin(int channel) const
Returns origin in pixels of a given channel within the texture containing it.
Definition: storage.cpp:509
IntPoint getTextureSize() const
Definition: storage.h:377
IntPoint getSpatialSize() const
Returns the spatial size (width and height) of the storage in pixels.
Definition: storage.h:389
int getTextureWidth() const
Returns width in pixels of all the textures.
Definition: storage.h:370
friend class View
Definition: storage.h:135
static void checkChannelNumber(int channel)
Checks whether a channel number points to the first channel in a texture.
Definition: storage.h:290
static void checkMin(const datatype value, const datatype min, const char *message)
Definition: exception.h:92
static void check(const datatype value, const datatype min, const datatype max, const char *message)
Definition: exception.h:86
static void check(const bool condition, const std::string &message)
Definition: exception.h:64
StringBuilder including a string container.
Thread executing tasks.
Definition: parallelism.h:154
#define COEF_FMT
Definition: conv2d.cpp:29
static const char * UNIFORM_COEFFICIENT
Definition: conv2d.cpp:38
#define COORD_FMT
Definition: conv2d.cpp:30
static const char * UNIFORM_RESIDUAL_INPUT
Definition: conv2d.cpp:37
static const char * UNIFORM_INPUT
Definition: conv2d.cpp:36
ActivationFunction activationFunctionFromString(const std::string &str)
Returns a zero padding value from a string.
Definition: operation.cpp:329
Size::Padding paddingFromString(const std::string &str)
Returns a zero padding value from a string.
Definition: storage.cpp:703
ActivationFunction
Activation function specification.
Definition: operation.h:401
@ DEFAULT
default activation: 0..1 bounded ReLU (identity clipped to 0..1 range)
CustomPoint< float > Point
Definition: geometry.h:626
CustomPoint< int > IntPoint
Definition: geometry.h:629
std::string to_string(Beatmup::NNets::ActivationFunction function)
CustomPoint< numeric > min(const CustomPoint< numeric > &a, const CustomPoint< numeric > &b)
Definition: geometry.h:724
CustomPoint< numeric > max(const CustomPoint< numeric > &a, const CustomPoint< numeric > &b)
Definition: geometry.h:728
JNIEnv jlong jint jint jint jint jfloat scale
return(jlong) new Beatmup jlong jstring name
jobject jlong jint jint jint jint jint b
jlong jint index
jobject jlong jint jint y
jlong jstring jint jint jint jint w
JNIEnv jlong jint jint count
Beatmup::IntPoint result
jobject jlong jint x
bitmap bind(jenv, jobj)
layer getMapping().position.x