Beatmup
cnn.cpp
Go to the documentation of this file.
1 /*
2  Beatmup image and signal processing library
3  Copyright (C) 2020, lnstadrum
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #include "cnn.h"
20 #include <algorithm>
21 
22 #ifdef ENABLE_PROFILING
23 #include "../../../utils/profiler.h"
24 #include <iostream>
25 #endif
26 
27 using namespace Beatmup;
28 
29 
30 #ifndef BEATMUP_OPENGLVERSION_GLES20
31 
33  if (!prepared || (input && inputFormat != input->getTextureFormat())) {
34  std::string code = "#version 310 es\n";
35 
36  if (input) {
37  switch (inputFormat = input->getTextureFormat()) {
38  case GL::TextureHandler::TextureFormat::Rx8:
39  case GL::TextureHandler::TextureFormat::RGBx8:
40  case GL::TextureHandler::TextureFormat::RGBAx8:
41  case GL::TextureHandler::TextureFormat::Rx32f:
42  case GL::TextureHandler::TextureFormat::RGBx32f:
43  case GL::TextureHandler::TextureFormat::RGBAx32f:
44  code += "#define beatmupSampler sampler2D\n";
45  break;
46  case GL::TextureHandler::TextureFormat::OES_Ext:
47  code +=
48  "#extension GL_OES_EGL_image_external_essl3 : require\n"
49  "#define beatmupSampler samplerExternalOES\n"
50  "#define texelFetch texture\n";
51  break;
52  default:
53  throw UnsupportedTextureFormat(inputFormat);
54  }
55  }
56 
57  code += "layout(local_size_x = " + std::to_string(wgSize[0]) + ", local_size_y = " + std::to_string(wgSize[1]) + ", local_size_z = " + std::to_string(wgSize[2]) + ") in;\n";
58 
59  program->make(gpu, code + sourceCodeTemplate);
60  prepared = true;
61  }
62 
63 }
64 
66  GraphicPipeline& gpu, GL::RecycleBin& recycleBin, BitmapContentLock& lock, std::string sourceCodeTemplate,
67  int inputZDim, int outputZDim, bool pointwise
68 ):
69  recycleBin(recycleBin), lock(lock), sourceCodeTemplate(sourceCodeTemplate), numInputs(inputZDim / 4), numOutputs(outputZDim / 4), prepared(false)
70 {
71  program = new GL::ComputeProgram(gpu);
72 
73  wgSize[2] = 1;
74 
75  const unsigned int totalLimit = (unsigned int)gpu.getLimit(GraphicPipeline::Limit::LOCAL_GROUPS_TOTAL);
76  const unsigned int yLimit = std::min<unsigned int>(
77  std::sqrt(totalLimit / wgSize[2]),
79  );
80 
81  for (wgSize[1] = 1; wgSize[1] <= yLimit; wgSize[1] <<= 1);
82 
83  wgSize[0] = std::min<unsigned int>(
84  totalLimit / (wgSize[1] * wgSize[2]),
86  );
87 }
88 
89 
91  recycleBin.put(program);
92 }
93 
94 
96  prepare(gpu, &input);
97 
98  // enable
99  program->enable(gpu);
100 
101  // bind outputs
102  for (int i = 0; i < numOutputs; ++i) {
103  if (outputs[i]->getWidth() != input.getWidth() || outputs[i]->getHeight() != input.getHeight())
104  outputs[i]->reshape(input.getWidth(), input.getHeight());
105  lock.writeLock(&gpu, outputs[i], ProcessingTarget::GPU);
106  gpu.bind(*outputs[i], i, false, true);
107  }
108 
109  // bind input
110  program->setInteger("image", 0);
111  gpu.bind(input, 0, TextureParam::INTERP_NEAREST);
112 
113  // g-g-go
114  const int
115  xWorkgroups = ceili(input.getWidth(), wgSize[0]),
116  yWorkgroups = ceili(input.getHeight(), wgSize[1]);
117  program->dispatch(gpu, xWorkgroups, yWorkgroups, 1);
118 
119  // unlock outputs
120  for (int i = 0; i < numOutputs; ++i)
121  lock.unlock(outputs[i]);
122 }
123 
124 
125 unsigned int GLES31X2UpsamplingNetwork::Layer::process(GraphicPipeline& gpu, InternalBitmap** inputs, GL::StorageBuffer& output, int numOutputParts) {
126  prepare(gpu);
127 
128  // enable program
129  program->enable(gpu);
130 
131  // bind inputs
132  int bindingCtr = 0;
133  for (int i = 0; i < numInputs; ++i) {
134  InternalBitmap* input = inputs[i];
135  lock.readLock(&gpu, input, ProcessingTarget::GPU);
136  gpu.bind(*input, i, TextureParam::INTERP_NEAREST);
137  }
138  program->setIntegerArray("inFeatures", 0, numInputs);
139 
140  // bind output
141  const unsigned int
142  xWorkgroups = ceili(inputs[0]->getWidth(), wgSize[0]),
143  yWorkgroups = ceili(inputs[0]->getHeight(), wgSize[1]);
144  const unsigned int outputSize = xWorkgroups * wgSize[0] * yWorkgroups * wgSize[1] * (numOutputs * numOutputParts * 4);
145  if (output.getCurrentCapacity() < outputSize)
146  output.allocate(gpu, outputSize);
147  output.bind(gpu, 0);
148 
149  // g-g-go
150  program->dispatch(gpu, xWorkgroups, yWorkgroups, 1);
151 
152  // unlock
153  for (int i = 0; i < numInputs; ++i)
154  lock.unlock(inputs[i]);
155 
156  return xWorkgroups * wgSize[0];
157 }
158 
159 
160 void GLES31X2UpsamplingNetwork::Layer::processPointwise(GraphicPipeline& gpu, GL::StorageBuffer& input, unsigned int inputStridePix, InternalBitmap** outputs, int width, int height) {
161  prepare(gpu);
162 
163  // enable program
164  program->enable(gpu);
165 
166  // bind inputs
167  input.bind(gpu, 0);
168  program->setUnsignedInteger("inputStride", inputStridePix);
169 
170  // bind outputs
171  for (int i = 0; i < numOutputs; ++i) {
172  if (outputs[i]->getWidth() != width || outputs[i]->getHeight() != height)
173  outputs[i]->reshape(width, height);
174  lock.writeLock(&gpu, outputs[i], ProcessingTarget::GPU);
175  gpu.bind(*outputs[i], i, false, true);
176  }
177 
178  // g-g-go
179  const unsigned int
180  xWorkgroups = ceili(width, wgSize[0]),
181  yWorkgroups = ceili(height, wgSize[1]);
182  program->dispatch(gpu, xWorkgroups, yWorkgroups, 1);
183 
184  // unlock
185  for (int i = 0; i < numOutputs; ++i)
186  lock.unlock(outputs[i]);
187 }
188 
189 
190 void GLES31X2UpsamplingNetwork::Layer::processPointwise(GraphicPipeline& gpu, GL::StorageBuffer& inputFeatures, unsigned int inputStridePix, GL::TextureHandler& inputImage, AbstractBitmap& output) {
191  prepare(gpu, &inputImage);
192 
193  // enable program
194  program->enable(gpu);
195 
196  // bind outputs
197  gpu.bind(output, 1, false, true);
198 
199  // bind inputs
200  inputFeatures.bind(gpu, 0);
201  gpu.bind(inputImage, 0, TextureParam::INTERP_LINEAR);
202  program->setUnsignedInteger("inputStride", inputStridePix);
203  program->setVector2("d1", 1.0f / inputImage.getWidth(), 1.0f / inputImage.getHeight());
204 
205  // g-g-go
206  const unsigned int
207  xWorkgroups = ceili(inputImage.getWidth(), wgSize[0]),
208  yWorkgroups = ceili(inputImage.getHeight(), wgSize[1]);
209  program->dispatch(gpu, xWorkgroups, yWorkgroups, 1);
210 }
211 
212 
214  // disable alpha blend
216 
217 #ifdef ENABLE_PROFILING
218  Profiler profiler;
219  profiler("layer 1");
220 #endif
221 
222  layer1_0.process(gpu, input, storage + 0);
223  layer1_1.process(gpu, input, storage + 6);
224 
225 #ifdef ENABLE_PROFILING
226  gpu.flush();
227  profiler.lap();
228  profiler("layer 2");
229 #endif
230 
231  layer2_0.process(gpu, storage, buffer, 4);
232  layer2_1.process(gpu, storage + 3, buffer, 4);
233  layer2_2.process(gpu, storage + 6, buffer, 4);
234  unsigned int stride = layer2_3.process(gpu, storage + 9, buffer, 4);
235 
236 #ifdef ENABLE_PROFILING
237  gpu.flush();
238  profiler.lap();
239  profiler("layer 3");
240 #endif
241 
242  layer3.processPointwise(gpu, buffer, stride, storage, input.getWidth(), input.getHeight());
243 
244 #ifdef ENABLE_PROFILING
245  gpu.flush();
246  profiler.lap();
247  profiler("layer 4");
248 #endif
249 
250  layer4_0.process(gpu, storage, buffer, 2);
251  stride = layer4_1.process(gpu, storage + 3, buffer, 2);
252 
253 #ifdef ENABLE_PROFILING
254  gpu.flush();
255  profiler.lap();
256  profiler("layer 5");
257 #endif
258 
259  layer5.processPointwise(gpu, buffer, stride, input, output);
260 
261  gpu.flush();
262 #ifdef ENABLE_PROFILING
263  profiler.lap();
264  profiler.report(std::cout);
265 #endif
266 }
267 
268 
270 #define STRINGIFY(...) #__VA_ARGS__
271 
272  layer1_0(gpu, *ctx.getGpuRecycleBin(), *this,
273 #include "l1-0.glsl"
274  , 1, 24
275  ),
276  layer1_1(gpu, *ctx.getGpuRecycleBin(), *this,
277 #include "l1-1.glsl"
278  , 1, 24
279  ),
280 
281  layer2_0(gpu, *ctx.getGpuRecycleBin(), *this,
282 #include "l2-0.glsl"
283  , 12, 8
284  ),
285  layer2_1(gpu, *ctx.getGpuRecycleBin(), *this,
286 #include "l2-1.glsl"
287  , 12, 8
288  ),
289  layer2_2(gpu, *ctx.getGpuRecycleBin(), *this,
290 #include "l2-2.glsl"
291  , 12, 8
292  ),
293  layer2_3(gpu, *ctx.getGpuRecycleBin(), *this,
294 #include "l2-3.glsl"
295  , 12, 8
296  ),
297 
298  layer3(gpu, *ctx.getGpuRecycleBin(), *this,
299 #include "l3.glsl"
300  , 32, 24, true
301  ),
302 
303  layer4_0(gpu, *ctx.getGpuRecycleBin(), *this,
304 #include "l4-0.glsl"
305  , 12, 8
306  ),
307  layer4_1(gpu, *ctx.getGpuRecycleBin(), *this,
308 #include "l4-1.glsl"
309  , 12, 8
310  ),
311 
312  layer5(gpu, *ctx.getGpuRecycleBin(), *this,
313 #include "l5.glsl"
314  , 16, 1, true
315  ),
316 
318 {
319  for (int i = 0; i < STORAGE_SIZE; ++i)
320  storage[i] = new InternalBitmap(ctx, PixelFormat::QuadByte, 64, 64, false);
321 }
322 
323 
325  for (int i = 0; i < STORAGE_SIZE; ++i)
326  delete storage[i];
327 }
328 
329 #endif
A very basic class for any image.
Makes sure the bitmap content is accessible within an image processing task.
Definition: content_lock.h:34
void lock(GraphicPipeline *gpu, AbstractBitmap *input, AbstractBitmap *output)
Definition: content_lock.h:84
Basic class: task and memory management, any kind of static data.
Definition: context.h:59
Layer(GraphicPipeline &gpu, GL::RecycleBin &recycleBin, BitmapContentLock &lock, std::string sourceCodeTemplate, int inputZDim, int outputZDim, bool pointwise=false)
Definition: cnn.cpp:65
void prepare(GraphicPipeline &gpu, GL::TextureHandler *inputImage=nullptr)
Definition: cnn.cpp:32
void process(GraphicPipeline &gpu, GL::TextureHandler &input, InternalBitmap **outputs)
Definition: cnn.cpp:95
void processPointwise(GraphicPipeline &gpu, GL::StorageBuffer &input, unsigned int inputStridePix, InternalBitmap **outputs, int width, int height)
Definition: cnn.cpp:160
GL::ComputeProgram * program
Definition: cnn.h:38
GLES31X2UpsamplingNetwork(Context &ctx, GraphicPipeline &gpu)
Definition: cnn.cpp:269
void process(GraphicPipeline &gpu, GL::TextureHandler &input, AbstractBitmap &output)
Definition: cnn.cpp:213
InternalBitmap * storage[STORAGE_SIZE]
Definition: cnn.h:72
GL::StorageBuffer buffer
Definition: cnn.h:73
static const int STORAGE_SIZE
Definition: cnn.h:33
GLSL compute program.
Stores references to GPU resources that will not be used anymore and needed to be recycled in a threa...
Definition: recycle_bin.h:34
void bind(GraphicPipeline &gpu, int unit) const
void allocate(GraphicPipeline &gpu, const size_t sizeBytes, const void *data=nullptr)
size_t getCurrentCapacity() const
virtual const int getHeight() const =0
Height of the texture in pixels.
virtual const TextureFormat getTextureFormat() const =0
Returns the texture format specifying how the shader must interpret the data.
virtual const int getWidth() const =0
Width of the texture in pixels.
Internal low-level GPU control API.
Definition: pipeline.h:33
void switchMode(Mode mode)
Switches GPU mode.
Definition: pipeline.cpp:941
int getLimit(Limit limit) const
Definition: pipeline.cpp:936
@ INFERENCE
Textures are feature maps computed in fragment shaders.
void flush()
Waits until all operations submitted to GPU are finished.
Definition: pipeline.cpp:931
void bind(GL::TextureHandler &texture, size_t texUnit, const TextureParam param)
Definition: pipeline.cpp:881
Bitmap whose memory is managed by the Beatmup engine.
const int getHeight() const
Height of the texture in pixels.
void reshape(int width, int height)
Changes bitmap size.
Collects running time statistics of multiple tracks.
Definition: profiler.h:31
void report(std::ostream &, ReportType type=ReportType::FULL) const
Definition: profiler.cpp:56
Exception thrown when texture format does not match any supported format.
#define STRINGIFY(...)
@ QuadByte
4 channels of 8 bits per pixel (like RGBA), unsigned integer values
@ INTERP_LINEAR
bilinear pixel interpolation
@ INTERP_NEAREST
nearest neighbor pixel interpolation
std::string to_string(Beatmup::NNets::ActivationFunction function)
#define ceili(x, y)
integer division x/y with ceiling
Definition: utils.hpp:21
ctx getGpuRecycleBin() -> emptyBin()
Beatmup::Context * ctx
jlong jint width
jlong jint jint height
return bitmap getWidth()
return bitmap getHeight()