Beatmup
conv2d.h
Go to the documentation of this file.
1 /*
2  Beatmup image and signal processing library
3  Copyright (C) 2020, lnstadrum
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #pragma once
20 
21 #include "operation.h"
22 #include "../gpu/texture_handler.h"
23 #include <vector>
24 #include <array>
25 #include <map>
26 
27 
28 namespace Beatmup {
29  namespace NNets {
30 
31  /**
32  2D convolution operation computed on GPU.
33  Has 2 inputs: main and residual (detailed below), and a single output.
34  Constraints:
35  - Input and output contain values in [0, 1] range sampled over 8 bits.
36  - Number of input channels is 3 (i.e., the input is an RGB image) or a multiple of 4.
37  - Number of output feature maps is a multiple of 4.
38  - For group convolutions, each group contains a multiple of 4 input channels and a multiple of 4 output
39  channels, or exactly 1 input and 1 output channel (i.e., depthwise).
40  - Kernels are of square shape.
41  - Strides are equal along X and Y.
42  - Dilations are equal to 1.
43  - If an image is given on input (3 input feature maps), only valid padding is supported.
44  - An activation function is always applied on output.
45 
46  Raspberry Pi-related constraints:
47  - Pi cannot sample more than 256 channels to compute a single output value. Actual practical limit is
48  yet lower: something about 128 channels for pointwise convolutions and less than 100 channels for
49  bigger kernels. When the limit is reached, Pi OpenGL driver reports an out of memory error (0x505).
50 
51  Features:
52  - Bias addition integrated.
53  - An optional residual input is available: a tensor of output shape added to the convolution result
54  before applying the activation function.
55 
56  Convolution filters and bias are searched in chunks. The chunk names consist of the operation name followed
57  by Conv2D::FILTERS_CHUNK_SUFFIX and Conv2D::BIAS_CHUNK_SUFFIX respectively.
58  The chunk contents is a single precision floating point arrays.
59  The filter coefficients are taken in "OIHW" layout, i.e., there are 'O*I' contiguous packets of 'H*W'
60  values each. "O" and "I" are output and input channel numbers, "H" and "W" are filter height and width.
61  */
62  class Conv2D :
64  {
65  private:
67  const int numOutputChannels; //!< number of output feature maps
68  const int numGroups; //!< number of convolution groups
69  const int stride;
71  const bool useInputImage; //!< if `true`, input is the texture handler, not the view
72  const bool isDepthwise; //!< if `true`, the convolution is depthwise, otherwise regular
73  const bool useBias; //!< if `true`, the bias addition is enabled
74  bool ready;
75 
77  Storage::View residualInput; //!< optional tensor to be added to the output before activation
78  GL::TextureHandler *inputImage; //!< input texture handler to be used instead input view
79  std::vector<GL::RenderingProgram*> programs; //!< pointers to GLSL program, one per quad of output channels
80  std::vector<std::array<float, 4>> coeffs; //!< model data to pass to uniform variables, if used
81  std::vector<int> execOrder; //!< execution order of GLSL programs
82  std::vector<Storage::View> groupViews; //!< views per convolution group
83 
84  /**
85  Maps an (inputChannel, outputChannel, x, y) position to a linear coefficient index in the chunkfile.
86  */
87  inline int getIdx(int output, int input, int x, int y) const {
88  return output + numOutputChannels * (input + kernelSize[2] * (x + kernelSize[0] * y));
89  }
90 
91  void prepare(GraphicPipeline& gpu, ChunkCollection& data, GL::ProgramBank& bank);
92  void execute(TaskThread& thread, GraphicPipeline& gpu);
93  int getInputPadding(int index = 0) const;
94  void getSampledChannels(int index, int& min, int& max) const;
95 
96  public:
97  static const char* FILTERS_CHUNK_SUFFIX; //!< suffix added to the op name to get the filters chunk id in the model data
98  static const char* BIAS_CHUNK_SUFFIX; //!< suffix added to the op name to get the bias chunk id in the model data
99 
100  /**
101  Instantiates a 2D convolution operation.
102  \param[in] name Operation name
103  \param[in] kernelSize Convolution kernel size
104  \param[in] numInputChannels Number of input feature map channels (input depth)
105  \param[in] numOutputChannels Number of output feature map channels (output depth)
106  \param[in] stride Convolution stride
107  \param[in] padding Padding policy
108  \param[in] useBias If `true`, the bias addition is enabled. The bias vector is searched in the model data.
109  \param[in] numGroups Number of convolution groups to get a group/depthwise convolution
110  \param[in] activation Activation function applied to the operation output
111  */
112  Conv2D(
113  const std::string& name,
114  const int kernelSize,
115  const int numInputChannels,
116  const int numOutputChannels,
117  const int stride = 1,
119  const bool useBias = true,
120  const int numGroups = 1,
122  );
123 
124  inline bool isBiasUsed() const { return useBias; }
125 
126  inline int getInputCount() const { return 2; }
127  inline int getOutputCount() const { return 1; }
128 
129  inline bool acceptsStorageInput(int index = 0) const { return (index == 0 && !useInputImage) || index == 1; }
130  inline bool acceptsStorageOutput(int index = 0) const { return index == 0; }
131  inline bool acceptsTextureInput(int index = 0) const { return index == 0 && useInputImage; }
132 
133  Size getOutputSize(int outputIndex = 0) const;
134 
135  inline Storage::View getOutput(int index = 0) { return output; }
136 
137  void setInput(Storage::View&& storage, int inputIndex = 0);
138  void setInput(GL::TextureHandler& image, int inputIndex = 0);
139  void setOutput(Storage::View&& storage, int outputIndex = 0);
140 
141  std::map<std::string, std::string> serialize() const;
142 
143  void disconnect();
144 
145  /**
146  \brief Connects a tensor to a residual input.
147  This input is optional. The tensor is added to the convolution result before the non-linear activation
148  is applied. Its size must match the output size.
149  \param[in] storage A storage view containing the residual input tensor.
150  */
151  inline void setResidualInput(Storage::View&& storage) { setInput(std::move(storage), 1); }
152 
153  unsigned long countMultiplyAdds() const;
154  unsigned long countTexelFetches() const;
155 
156  /**
157  Sets up deserialization of the operation.
158  */
159  static bool initDeserializer();
160  };
161 
162  /**
163  \internal
164  Being declared here, this variable ensures Conv2D::initDeserializer() is called with inclusion of this header file.
165  */
167  }
168 }
A key-value pair set storing pieces of arbitrary data (chunks) under string keys.
Definition: chunkfile.h:36
Stores linked GLSL programs and their associated fragment shader codes.
Definition: program_bank.h:31
Internal low-level GPU control API.
Definition: pipeline.h:33
Abstract neural net operation (layer).
Definition: operation.h:46
A mixin implementing activation functions in GLSL.
Definition: operation.h:414
2D convolution operation computed on GPU.
Definition: conv2d.h:64
const bool isDepthwise
if true, the convolution is depthwise, otherwise regular
Definition: conv2d.h:72
std::vector< std::array< float, 4 > > coeffs
model data to pass to uniform variables, if used
Definition: conv2d.h:80
Storage::View getOutput(int index=0)
Returns a storage view bound to a specific operation output.
Definition: conv2d.h:135
static const char * BIAS_CHUNK_SUFFIX
suffix added to the op name to get the bias chunk id in the model data
Definition: conv2d.h:98
void setOutput(Storage::View &&storage, int outputIndex=0)
Definition: conv2d.cpp:512
const int numGroups
number of convolution groups
Definition: conv2d.h:68
std::vector< GL::RenderingProgram * > programs
pointers to GLSL program, one per quad of output channels
Definition: conv2d.h:79
const bool useBias
if true, the bias addition is enabled
Definition: conv2d.h:73
void execute(TaskThread &thread, GraphicPipeline &gpu)
Executes the operation.
Definition: conv2d.cpp:272
const Size::Padding padding
Definition: conv2d.h:70
int getInputCount() const
Returns number of operation inputs.
Definition: conv2d.h:126
const int stride
Definition: conv2d.h:69
Size getOutputSize(int outputIndex=0) const
Returns full size of a specific operation output.
Definition: conv2d.cpp:397
GL::TextureHandler * inputImage
input texture handler to be used instead input view
Definition: conv2d.h:78
static bool initDeserializer()
Sets up deserialization of the operation.
void prepare(GraphicPipeline &gpu, ChunkCollection &data, GL::ProgramBank &bank)
Compiles GLSL shaders.
Definition: conv2d.cpp:88
std::map< std::string, std::string > serialize() const
Returns a serialized representation of th operation;.
Definition: conv2d.cpp:415
static const char * FILTERS_CHUNK_SUFFIX
suffix added to the op name to get the filters chunk id in the model data
Definition: conv2d.h:97
const bool useInputImage
if true, input is the texture handler, not the view
Definition: conv2d.h:71
Storage::View output
Definition: conv2d.h:76
bool acceptsTextureInput(int index=0) const
Returns true if the operation can take a GL::TextureHandler at a specific input.
Definition: conv2d.h:131
std::vector< int > execOrder
execution order of GLSL programs
Definition: conv2d.h:81
void getSampledChannels(int index, int &min, int &max) const
Retrieves range of input features channels sampled at the same time for a specific input.
Definition: conv2d.cpp:382
int getInputPadding(int index=0) const
Retrieves minimum required size of zero padding for a given input.
Definition: conv2d.cpp:377
void setResidualInput(Storage::View &&storage)
Connects a tensor to a residual input.
Definition: conv2d.h:151
bool acceptsStorageInput(int index=0) const
Returns true if the operation can take a Storage::View at a specific input.
Definition: conv2d.h:129
bool isBiasUsed() const
Definition: conv2d.h:124
bool acceptsStorageOutput(int index=0) const
Returns true if the operation can take a Storage::View at a specific output.
Definition: conv2d.h:130
const int numOutputChannels
number of output feature maps
Definition: conv2d.h:67
std::vector< Storage::View > groupViews
views per convolution group
Definition: conv2d.h:82
Storage::View input
Definition: conv2d.h:76
void setInput(Storage::View &&storage, int inputIndex=0)
Definition: conv2d.cpp:483
unsigned long countMultiplyAdds() const
Counts (approximate) number of multiply-adds used by this operation.
Definition: conv2d.cpp:528
const Size kernelSize
Definition: conv2d.h:66
int getOutputCount() const
Returns number of operation outputs.
Definition: conv2d.h:127
Conv2D(const std::string &name, const int kernelSize, const int numInputChannels, const int numOutputChannels, const int stride=1, const Size::Padding padding=Size::Padding::VALID, const bool useBias=true, const int numGroups=1, const ActivationFunction activation=ActivationFunction::DEFAULT)
Instantiates a 2D convolution operation.
Definition: conv2d.cpp:41
void disconnect()
Assigns empty inputs and outputs.
Definition: conv2d.cpp:474
Storage::View residualInput
optional tensor to be added to the output before activation
Definition: conv2d.h:77
unsigned long countTexelFetches() const
Counts (approximate) number of texels fetches.
Definition: conv2d.cpp:533
int getIdx(int output, int input, int x, int y) const
Maps an (inputChannel, outputChannel, x, y) position to a linear coefficient index in the chunkfile.
Definition: conv2d.h:87
Operation 3D input/output size.
Definition: storage.h:37
Padding
Zero padding specification.
Definition: storage.h:45
Generates GLSL fragment shader code sampling a local neighborhood around the current texture coordina...
Definition: operation.h:272
Maps a 3D tensor onto a storage.
Definition: storage.h:308
Thread executing tasks.
Definition: parallelism.h:154
static const bool CONV2D_OP_DESERIALIZABLE
Definition: conv2d.h:166
ActivationFunction
Activation function specification.
Definition: operation.h:401
@ DEFAULT
default activation: 0..1 bounded ReLU (identity clipped to 0..1 range)
CustomPoint< numeric > min(const CustomPoint< numeric > &a, const CustomPoint< numeric > &b)
Definition: geometry.h:724
CustomPoint< numeric > max(const CustomPoint< numeric > &a, const CustomPoint< numeric > &b)
Definition: geometry.h:728
jlong jint index
jobject jlong jint jint y
jobject jlong jint x