cpp/html/conv2d_8cpp_source.html

 /*

     Beatmup image and signal processing library

     Copyright (C) 2020, lnstadrum


     This program is free software: you can redistribute it and/or modify

     it under the terms of the GNU General Public License as published by

     the Free Software Foundation, either version 3 of the License, or

     (at your option) any later version.


     This program is distributed in the hope that it will be useful,

     but WITHOUT ANY WARRANTY; without even the implied warranty of

     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

     GNU General Public License for more details.


     You should have received a copy of the GNU General Public License

     along with this program.  If not, see <http://www.gnu.org/licenses/>.

 */


 #include "../exception.h"

 #include "conv2d.h"

 #include "deserialized_model.h"

 #include <algorithm>


 using namespace Beatmup;

 using namespace NNets;


 // hardcoded coefficient and coordinate formats

 #define COEF_FMT "%0.6f"

 #define COORD_FMT "%0.10f"


 const char *Conv2D::FILTERS_CHUNK_SUFFIX = "/w";

 const char *Conv2D::BIAS_CHUNK_SUFFIX = "/b";


 static const char

     *UNIFORM_INPUT = "features",

     *UNIFORM_RESIDUAL_INPUT = "residual",

     *UNIFORM_COEFFICIENT = "cc";


 Conv2D::Conv2D(

     const std::string& name,

     const int kernelSize,

     const int numInputChannels,

     const int numOutputChannels,

     const int stride,

     const Size::Padding padding,

     const bool useBias,

     const int numGroups,

     const ActivationFunction activation

 ):

     AbstractOperation(name), SpatialFilteringMixin(kernelSize, kernelSize), ActivationFunctionMixin(activation),

     kernelSize(kernelSize, kernelSize, numInputChannels / numGroups), numOutputChannels(numOutputChannels), numGroups(numGroups),

     stride(stride), padding(padding),

     useInputImage(numInputChannels == 3),

     isDepthwise(numInputChannels == numGroups && numOutputChannels == numGroups),

     useBias(useBias),

     ready(false),

     inputImage(nullptr)

 {

     if (useInputImage) {

         InvalidArgument::check(numGroups == 1, "Cannot apply a group convolution to the input image");

         InvalidArgument::check(padding == Size::Padding::VALID, "Only valid zero padding setting is supported when an image is used as input");

     }

     else

         Storage::checkChannelNumber(numInputChannels);

     Storage::checkChannelNumber(numOutputChannels);

     OutOfRange::checkMin(stride, 1, "Positive convolution stride expected, %d got");

     OutOfRange::checkMin(kernelSize, 1, "Positive convolution kernel size expected, %d got");

     if (!useInputImage && !isDepthwise)

         InvalidArgument::check(this->kernelSize.getDepth() % 4 == 0, "A multiple of 4 is expected as number of input channels in the convolution kernel.");

     if (!isDepthwise && numGroups > 1)

         OutOfRange::checkMin(this->kernelSize.getDepth(), 4, "Kernels having less than 4 input channels are not supported in grouped convolutions. Got %d channels.");


     // check groups alignment: each group must contain 4k inputs and outputs channels

     if (!isDepthwise) {

         if (!useInputImage)

             InvalidArgument::check(numInputChannels % (4 * numGroups) == 0,

                 "Cannot split " +std::to_string(numInputChannels)+ " input channels on " +std::to_string(numGroups)+ " groups of 4*k channels each.");

         InvalidArgument::check(numOutputChannels % (4 * numGroups) == 0,

             "Cannot split " +std::to_string(numOutputChannels)+ " output channels on " +std::to_string(numGroups)+ " groups of 4*k channels each.");

     }

     programs.reserve(numOutputChannels / 4);

     groupViews.reserve(numGroups);

 }


 void Conv2D::prepare(GraphicPipeline& gpu, ChunkCollection& data, GL::ProgramBank& bank) {

     RuntimeError::check((useInputImage && inputImage) || (!useInputImage && input), "Input is not provided to Conv2D operation " + getName());

     RuntimeError::check(output, "Output is not provided to Conv2D operation " + getName());


     // get coefficients

     const Chunk kernel(data, getName() + FILTERS_CHUNK_SUFFIX);

     if (kernel.size() != kernelSize.volume() * numOutputChannels * sizeof(float))

         throw InconsistentModelData(this, "Weights size mismatch");


     const Chunk* biases = nullptr;

     if (useBias) {

         biases = new Chunk(data, getName() + BIAS_CHUNK_SUFFIX);

         if (biases->size() != numOutputChannels * sizeof(float))

             throw InconsistentModelData(this, "Biases size mismatch");

     }


     // free old stuff

     for (auto program : programs)

         bank.release(gpu, program);

     programs.clear();

     coeffs.clear();


     // decide whether use uniforms or not

     static const int MAX_ALLOWED_NUMBER_OF_PROGRAMS = 0;    // discovered empirically that uniforms are faster on Pi, Nano and desktop

     static const int NUM_RESERVED_UNFORM_VECTORS = 8 + std::max(kernelSize[0], kernelSize[1]) / 2;     // number of uniform vectors to keep unused

     const int numberOfPrograms = numOutputChannels / 4;

     const int uniformsLength = kernelSize.volume() + 1;     // number of uniform vectors per program

     const bool useUniforms = !useInputImage &&                  // if an image is given on input, the uniforms use is not unsupported

         numberOfPrograms > MAX_ALLOWED_NUMBER_OF_PROGRAMS &&    // if not too many programs, rather go with hardcoded model data

         uniformsLength + NUM_RESERVED_UNFORM_VECTORS < gpu.getLimit(GraphicPipeline::Limit::FRAGMENT_UNIFORM_VECTORS);

     if (useUniforms)

         coeffs.reserve(numberOfPrograms * uniformsLength);


     const bool useUniformShift = useUniforms && kernelSize.getDepth() <= 4;

         // use uniform shift if only one input texture is sampled, i.e., depthwise or grouped with groups of 4


     // init new programs

     for (int outputChannel = 0; outputChannel < numOutputChannels; outputChannel += 4) {

         const size_t coefStart = coeffs.size();     // index of the first coefficient in coeffs for the current program


         // compute indices delimiting the current group

         const int groupIdx = outputChannel * numGroups / numOutputChannels;

         const int firstInputChannel = groupIdx * kernelSize.getDepth();

         const int lastInputChannel  = firstInputChannel + (isDepthwise ? 4 : kernelSize.getDepth());


         // set up GLSL code

         String code(GL::RenderingPrograms::DECLARE_TEXTURE_COORDINATES_IN_FRAG);


 #ifdef BEATMUP_DEBUG

         if (!groupViews.empty())

             DebugAssertion::check(groupViews.back().getNumberOfTextures() <= gpu.getLimit(GraphicPipeline::Limit::TEXTURE_IMAGE_UNITS),

                 "Cannot compute Conv2D operation " + getName() + " on the current GPU: too many textures per group");

 #endif


         code.printf("uniform sampler2D %s[%d];", UNIFORM_INPUT, useInputImage || isDepthwise ? 1 : groupViews[groupIdx].getNumberOfTextures());

         if (residualInput)

             code.printf("uniform sampler2D %s[1];", UNIFORM_RESIDUAL_INPUT);

         if (useUniforms)

             code.printf("uniform highp vec4 %s[%d];", UNIFORM_COEFFICIENT, uniformsLength);


         SpatialFilteringMixin::writeHeader(code, useUniformShift);

         code.line("void main() {");

         code.line("highp vec4 sum;");


         // declare neighborhood: vec4 for storage, vec3 for image

         SpatialFilteringMixin::declare(code, useInputImage ? "highp vec3" : "highp vec4", !useInputImage);


         // loop through input channels

         for (int inputChannel = firstInputChannel; inputChannel < lastInputChannel; inputChannel += 4) {

             const int channelInGroup = inputChannel - firstInputChannel;


             const Point shift = (useUniformShift || !input) ? Point::ZERO :

                 (Point(input.getChannelOrigin(inputChannel) - input.getChannelOrigin(firstInputChannel)) / input.getTextureSize());

                 // texture coordinates sample the first channel in the current group, so shift is relative to its origin


             // compute depthwise convolution: inline sampling used

             if (isDepthwise) {

                 code("sum = ");

                 for (int y = 0; y < kernelSize[1]; ++y)

                 for (int x = 0; x < kernelSize[0]; ++x) {

                     if (x > 0 || y > 0) code(" + ");

                     const float* w = kernel.ptr<float>(getIdx(outputChannel, 0, x, y));

                     if (useUniforms) {

                         code.printf("%s[%d] * ", UNIFORM_COEFFICIENT, (int)(coeffs.size() - coefStart));

                         coeffs.emplace_back(std::array<float, 4>{ w[0], w[1], w[2], w[3] });

                     }

                     else

                         code.printf("vec4(" COEF_FMT "," COEF_FMT "," COEF_FMT "," COEF_FMT ") * ", w[0], w[1], w[2], w[3]);

                     SpatialFilteringMixin::sampleInline(code, UNIFORM_INPUT, 0, IntPoint(x, y), shift);

                 }

                 code.line(";");

             }


             // compute convolution with 3-channel input image using dot product; no inline sampling

             else if (useInputImage) {

                 SpatialFilteringMixin::sample(code, UNIFORM_INPUT, 0, Point::ZERO, true, useInputImage ? ".rgb" : "");

                 const int offset[4] = { 0, 1 * numOutputChannels, 2 * numOutputChannels, 3 * numOutputChannels };

                 for (int y = 0; y < kernelSize[1]; ++y)

                 for (int x = 0; x < kernelSize[0]; ++x) {

                     code((channelInGroup == 0 && x == 0 && y == 0) ? "sum = vec4(" : "sum += vec4(");

                     for (int c = 0; c < 4; ++c) {

                         if (c > 0) code(",");

                         const float* w = kernel.ptr<float>(getIdx(c + outputChannel, channelInGroup, x, y));

                         code.printf("dot(vec3(" COEF_FMT "," COEF_FMT "," COEF_FMT "), %s%d%d)",

                                 w[0], w[offset[1]], w[offset[2]], SpatialFilteringMixin::SAMPLE_ID_PREFIX, x, y);

                     }

                     code.line(");");

                 }

             }


             // compute 4m to 4n channels using vector by 4x4 matrix multiply: inline sampling used

             else {

                 code.printf("sum %s", channelInGroup == 0 ? "=" : "+=");

                 const int offset[4] = { 0, 1 * numOutputChannels, 2 * numOutputChannels, 3 * numOutputChannels };

                 for (int y = 0; y < kernelSize[1]; ++y)

                 for (int x = 0; x < kernelSize[0]; ++x) {

                     if (x > 0 || y > 0) code(" + ");

                     SpatialFilteringMixin::sampleInline(code, UNIFORM_INPUT, groupViews[groupIdx].getChannelTextureNumber(channelInGroup), IntPoint(x, y), shift);

                     code.printf(" * mat4(");

                     for (int c = 0; c < 4; ++c) {

                         if (c > 0) code(",");

                         const float* w = kernel.ptr<float>(getIdx(c + outputChannel, channelInGroup, x, y));

                         if (useUniforms) {

                             code.printf("%s[%d]", UNIFORM_COEFFICIENT, (int)(coeffs.size() - coefStart));

                             coeffs.emplace_back(std::array<float, 4>{ w[0], w[offset[1]], w[offset[2]], w[offset[3]] });

                         }

                         else

                             code.printf(COEF_FMT "," COEF_FMT "," COEF_FMT "," COEF_FMT, w[0], w[offset[1]], w[offset[2]], w[offset[3]]);

                     }

                     code.printf(")");

                 }

                 code.line(";");

             }

         }


         // add residual input

         if (residualInput) {

             // get linear mapping of channel pixel positions to sample the residual input properly

             const IntPoint mainOrigin = input.getChannelOrigin(useUniformShift ? outputChannel : firstInputChannel);

             const IntPoint residualOrigin = residualInput.getChannelOrigin(outputChannel);

             const Rectangle mainArea(mainOrigin, mainOrigin + input.getSpatialSize());

             const Rectangle resArea(residualOrigin, residualOrigin + residualInput.getSpatialSize());

             const Point mainTexSize(input.getTextureWidth(), input.getTextureHeight());

             const Point resTexSize(residualInput.getTextureWidth(), residualInput.getTextureHeight());

             Point scale, offset;

             (mainArea / mainTexSize).getMapping(resArea / resTexSize, scale, offset);

             // sample, add to sum

             code.printf("sum += texture2D(%s[0], %s * vec2(" COORD_FMT "," COORD_FMT ") + vec2(" COORD_FMT "," COORD_FMT "));\n",

                 UNIFORM_RESIDUAL_INPUT, getInputSamplingPos().c_str(), scale.x, scale.y, offset.x, offset.y);

         }


         // add bias if enabled

         if (useBias) {

             const float* b = biases->ptr<float>(outputChannel);

             if (useUniforms) {

                 code.printf("sum += %s[%d];", UNIFORM_COEFFICIENT, (int)(coeffs.size() - coefStart));

                 coeffs.emplace_back(std::array<float, 4>{ b[0], b[1], b[2], b[3] });

             }

             else

                 code.printf("sum += vec4(" COEF_FMT "," COEF_FMT "," COEF_FMT "," COEF_FMT ");\n", b[0], b[1], b[2], b[3]);

         }


         // apply activation

         ActivationFunctionMixin::apply(code, "sum");

         code("}");


         // init program

         programs.push_back(bank(gpu, code));

     }


     // setup execution order: same programs writing to the same texture are next to each other

     execOrder.resize(programs.size());

     for (size_t i = 0; i < execOrder.size(); ++i)

         execOrder[i] = (int)i;

     std::sort(execOrder.begin(), execOrder.end(), [&](int i, int j) {

         return programs[i] < programs[j] || (programs[i] == programs[j] &&

             output.getChannelTextureNumber(4 * i) < output.getChannelTextureNumber(4 * j));

     });


     delete biases;

     ready = true;

 }


 void Conv2D::execute(TaskThread& thread, GraphicPipeline& gpu) {

     if (!ready)

         throw NotReady(this);


     RuntimeError::check((useInputImage && inputImage) || (!useInputImage && input), "Input is not provided to a Conv2D operation.");

     RuntimeError::check(output, "Output is not provided to Conv2D operation " + getName());

     if (residualInput && residualInput.getSize() != output.getSize())

         throw RuntimeError("Residual input size does not match the output size");


 #ifdef BEATMUP_DEBUG

     RuntimeError::check(output.getSize() == getOutputSize(), "Operation output storage size mismatch");

 #endif


     // static program setup

     SpatialFilteringMixin::setup(

         useInputImage ? inputImage->getWidth()  : input.getTextureWidth(),

         useInputImage ? inputImage->getHeight() : input.getTextureHeight()

     );


     // compute tex coords

     const IntPoint strides(stride, stride);

     const IntPoint inputTextureSize = useInputImage ?

         IntPoint(inputImage->getWidth(), inputImage->getHeight()) :

         IntPoint(input.getTextureWidth(), input.getTextureHeight());

     if (useInputImage || isUniformShiftUsed()) {

         const IntRectangle samplingArea = useInputImage ?

             getSamplingArea(IntPoint(inputImage->getWidth(), inputImage->getHeight()), strides, padding) :

             getSamplingArea(input, 0, strides, padding);


         gpu.setTextureCoordinates(samplingArea, inputTextureSize, output.getSpatialSize());

     }


     const int coeffsPerProgram = (int)(coeffs.size() / programs.size());

     const bool uniformsAreUsed = coeffsPerProgram > 0;


     // for each output channel

     Storage::Binder bind(gpu);

     for (size_t i = 0; i < execOrder.size(); ++i) {

         const int programNum = execOrder[i];

         const int outputChannel = 4 * programNum;


         GL::RenderingProgram& program = *programs[programNum];


         if (isDepthwise) {

             const int channel = outputChannel;


             // bind output to a program

             const bool fast = bind.begin(program, output, outputChannel);


             if (!fast) {

                 // bind inputs

                 bind(input, UNIFORM_INPUT, outputChannel);

                 if (residualInput)

                     bind(residualInput, UNIFORM_RESIDUAL_INPUT, outputChannel);

                 SpatialFilteringMixin::setupProgram(program);

             }


             // setup the remaining stuff

             if (isUniformShiftUsed())

                 SpatialFilteringMixin::setUniformShift(program, input.getChannelOrigin(channel) - input.getChannelOrigin(0), input.getTextureSize());

             else

                 gpu.setTextureCoordinates(getSamplingArea(input, channel, strides, padding), inputTextureSize, output.getSpatialSize());

         }


         else {

             // bind output to a program

             const int groupIdx = outputChannel * numGroups / numOutputChannels;

             const bool isSameGroup =  i > 0 && 4 * execOrder[i - 1] * numGroups / numOutputChannels == groupIdx;

             const bool fast = bind.begin(program, output, outputChannel) && isSameGroup;


             const int firstInputChannel = groupIdx * kernelSize.getDepth();

             const int lastInputChannel  = firstInputChannel + (isDepthwise ? 4 : kernelSize.getDepth());


             if (!fast) {

                 // bind inputs

                 if (useInputImage)

                     bind(*inputImage, UNIFORM_INPUT);

                 else {

                     const int firstInputChannel = groupIdx * kernelSize.getDepth();

                     bind(groupViews[groupIdx], UNIFORM_INPUT);


                     if (residualInput)

                         bind(residualInput, UNIFORM_RESIDUAL_INPUT, outputChannel);


                     if (isUniformShiftUsed())

                         SpatialFilteringMixin::setUniformShift(program, input.getChannelOrigin(firstInputChannel) - input.getChannelOrigin(0), input.getTextureSize());

                     else

                         gpu.setTextureCoordinates(getSamplingArea(input, firstInputChannel, strides, padding), inputTextureSize, output.getSpatialSize());

                 }


                 // setup the remaining stuff

                 SpatialFilteringMixin::setupProgram(program);

             }

         }


         // update uniforms if needed

         if (uniformsAreUsed)

             program.setVec4Array(UNIFORM_COEFFICIENT, coeffs[coeffsPerProgram * programNum].data(), coeffsPerProgram);


         // g-g-go

         program.blend();

     }

 }


 int Conv2D::getInputPadding(int index) const {

     return (index == 0 && padding == Size::Padding::SAME) ? std::max(kernelSize[0], kernelSize[1]) / 2 : 0;

 }


 void Conv2D::getSampledChannels(int index, int& min, int& max) const {

     if (index == 0) {

         // main input: sampling an entire group at once

         min = useInputImage ? 3 : 4;

         max = useInputImage ? 3 : isDepthwise ? 4 : kernelSize.getDepth();

     }

     else if (index == 1) {

         // residual input: sampling 1 texture at once

         min = max = 4;

     }

     else

         min = max = 0;

 }


 Size Conv2D::getOutputSize(int outputIndex) const {

     if (outputIndex == 0) {

         RuntimeError::check((useInputImage && inputImage) || (!useInputImage && input),

             "Input is not provided to Conv2D operation " + getName());

         const Size inputSize = useInputImage ? Size(inputImage->getWidth(), inputImage->getHeight(), 3) : input.getSize();

         const Size result = inputSize.transform(

             kernelSize,

             Size(stride, stride, 0),

             padding,

             numOutputChannels

         );

         RuntimeError::check(result.volume() > 0, "Invalid (zero or negative) output size got in " + getName());

         return result;

     }

     return Size::EMPTY;

 }


 std::map<std::string, std::string> Conv2D::serialize() const {

     return {

         { "_name",              getName() },

         { "_type",              "conv2d" },

         { "kernel_size",        std::to_string(kernelSize[0]) },

         { "input_channels",     std::to_string(kernelSize.getDepth() * numGroups) },

         { "output_channels",    std::to_string(numOutputChannels) },

         { "stride",             std::to_string(stride) },

         { "padding",            std::to_string(padding) },

         { "use_bias",           useBias ? "true" : "false" },

         { "groups",             std::to_string(numGroups) },

         { "activation",         std::to_string(activationFunc) }

     };

 }


 bool Conv2D::initDeserializer() {

     static class Conv2DDeserializer : public AbstractOperation::Deserializer {

     public:

         Conv2DDeserializer() : Deserializer("conv2d") {}

         AbstractOperation* deserialize(Context& context, const Listing::Block& block) {

             /** \page NNetsOpsSerialization Operations serialization

                 This page describes the operation options.


                 Every operation necessary has `_name` and `_type` parameters. The rest of operation parameters depends on its type.


                 \section Conv2D

                 \code{yaml}

                 - _name: arbitrary operation name

                   _type: conv2d        # fixed string

                   kernel_size: 3       # size of convolution kernel

                   input_channels: 3    # number of input feature channels

                   output_channels: 16  # number of output feature channels

                   stride: 2            # stride (defaults to 1)

                   padding: valid       # paddling, string, "valid" or "same" (defaults to "valid")

                   use_bias: true       # bias addition, "true" or "false" (defaults to "true")

                   groups: 1            # number of groups for grouped convolution (defaults to 1)

                   activation: default  # activation function

                 \endcode


                 For activation functions see \ref NNetsActivationFunctionsSerialization.

             */

             return new Conv2D(

                 block["_name"],

                 block.get<int>("kernel_size"),

                 block.get<int>("input_channels"),

                 block.get<int>("output_channels"),

                 block.get<int>("stride", 1),

                 paddingFromString(block.get<std::string>("padding", std::to_string(Size::Padding::VALID))),

                 block.get<bool>("use_bias", true),

                 block.get<int>("groups", 1),

                 activationFunctionFromString(block.get<std::string>("activation", std::to_string(ActivationFunction::DEFAULT)))

             );

         }

     } john;


     return true;

 }


 void Conv2D::disconnect() {

     inputImage = nullptr;

     input = Storage::View();

     residualInput = Storage::View();

     output = Storage::View();

     groupViews.clear();

 }


 void Conv2D::setInput(Storage::View&& view, int inputIndex) {

     OutOfRange::check(inputIndex, 0, 1, "Input index out of range: %d");

     RuntimeError::check(view.getStorage().getPadding() >= getInputPadding(inputIndex), "The storage has insufficient padding");

     if (inputIndex == 0) {

         if (view) {

             RuntimeError::check(!useInputImage, "An image is expected on input, but a tensor is passed");

             RuntimeError::check(view.getDepth() == kernelSize.getDepth() * numGroups, "Tensor depth does not match kernel depth");

             // create group views

             groupViews.clear();

             if (!isDepthwise)

                 for (int groupIdx = 0; groupIdx < numGroups; ++groupIdx) {

                     const int firstInputChannel = groupIdx * kernelSize.getDepth();

                     const int lastInputChannel  = firstInputChannel + (isDepthwise ? 4 : kernelSize.getDepth());

                     groupViews.emplace_back(std::move(view), firstInputChannel, lastInputChannel - firstInputChannel);

                 }

         }

         this->input = std::move(view);

         this->inputImage = nullptr;

     }

     else {

         if (view) {

             RuntimeError::check(!useInputImage, "Cannot use the residual input when an image is used as the main input");

             RuntimeError::check(view.getDepth() == numOutputChannels, "Residual input tensor depth does not match output depth");

         }

         this->residualInput = std::move(view);

     }

 }


 void Conv2D::setOutput(Storage::View&& storage, int outputIndex) {

     OutOfRange::check(outputIndex, 0, 0, "Output index out of range: %d");

     this->output = std::move(storage);

 }


 void Conv2D::setInput(GL::TextureHandler& image, int inputIndex) {

     if (inputIndex == 0) {

         RuntimeError::check(useInputImage, "Cannot use image as Conv2D input");

         this->inputImage = &image;

     }

     else

         AbstractOperation::setInput(image, inputIndex);

 }


 unsigned long Conv2D::countMultiplyAdds() const {

     return getOutputSize(0).volume() * kernelSize.volume();

 }


 unsigned long Conv2D::countTexelFetches() const {

     unsigned long count = getOutputSize(0).volume() / 4 * kernelSize.volume() / (useInputImage ? 3 : 4);

     if (residualInput)

         count += getOutputSize(0).volume() / 4;

     return count;

 }

Beatmup::ChunkCollection
A key-value pair set storing pieces of arbitrary data (chunks) under string keys.
Definition: chunkfile.h:36

Beatmup::Chunk
Simply a piece of binary data of a specific size.
Definition: chunkfile.h:210

Beatmup::Chunk::ptr
datatype * ptr(size_t offset=0)
Definition: chunkfile.h:264

Beatmup::Chunk::size
size_t size() const
Definition: chunkfile.h:257

Beatmup::Context
Basic class: task and memory management, any kind of static data.
Definition: context.h:59

Beatmup::CustomPoint< float >

Beatmup::CustomPoint< float >::ZERO
static const CustomPoint ZERO
Definition: geometry.h:122

Beatmup::CustomPoint::x
numeric x
Definition: geometry.h:40

Beatmup::CustomPoint::y
numeric y
Definition: geometry.h:40

Beatmup::CustomRectangle< float >

Beatmup::GL::AbstractProgram::setVec4Array
void setVec4Array(const std::string &name, const float *xyzw, const int length)
Definition: program.cpp:466

Beatmup::GL::ProgramBank
Stores linked GLSL programs and their associated fragment shader codes.
Definition: program_bank.h:31

Beatmup::GL::ProgramBank::release
void release(GraphicPipeline &gpu, GL::RenderingProgram *program)
Marks a program as unused any more.
Definition: program_bank.cpp:68

Beatmup::GL::RenderingProgram
GLSL program to render images Makes use of default vertex attributes to pass the texture coordinates ...
Definition: program.h:240

Beatmup::GL::RenderingProgram::blend
void blend(bool onScreen)
Definition: program.cpp:548

Beatmup::GL::RenderingPrograms::DECLARE_TEXTURE_COORDINATES_IN_FRAG
static const char * DECLARE_TEXTURE_COORDINATES_IN_FRAG
Declaring texture coordinates in fragment shader.
Definition: rendering_programs.h:57

Beatmup::GL::TextureHandler
Definition: texture_handler.h:37

Beatmup::GL::TextureHandler::getHeight
virtual const int getHeight() const =0
Height of the texture in pixels.

Beatmup::GL::TextureHandler::getWidth
virtual const int getWidth() const =0
Width of the texture in pixels.

Beatmup::GraphicPipeline
Internal low-level GPU control API.
Definition: pipeline.h:33

Beatmup::GraphicPipeline::getLimit
int getLimit(Limit limit) const
Definition: pipeline.cpp:936

Beatmup::GraphicPipeline::setTextureCoordinates
void setTextureCoordinates(const Rectangle &coords)
Specifies texture coordinates for the next rendering pass.
Definition: pipeline.cpp:966

Beatmup::GraphicPipeline::Limit::TEXTURE_IMAGE_UNITS
@ TEXTURE_IMAGE_UNITS
maximum number of texture units per fragment shader

Beatmup::GraphicPipeline::Limit::FRAGMENT_UNIFORM_VECTORS
@ FRAGMENT_UNIFORM_VECTORS
maximum number of 4-dimensional uniform vectors per fragment shader

Beatmup::InvalidArgument::check
static void check(const bool condition, const std::string &message)
Definition: exception.h:75

Beatmup::Listing::Block
Set of key-value pairs.
Definition: listing.h:46

Beatmup::Listing::Block::get
T get(const std::string &key) const
Returns a value by key casted to a given type.

Beatmup::NNets::AbstractOperation::Deserializer
Enables construction of an operation from its serialized representation.
Definition: operation.h:248

Beatmup::NNets::AbstractOperation::InconsistentModelData
Definition: operation.h:100

Beatmup::NNets::AbstractOperation::NotReady
Definition: operation.h:112

Beatmup::NNets::AbstractOperation
Abstract neural net operation (layer).
Definition: operation.h:46

Beatmup::NNets::AbstractOperation::setInput
virtual void setInput(Storage::View &&storage, int index=0)
Definition: operation.cpp:52

Beatmup::NNets::AbstractOperation::getName
std::string getName() const
Definition: operation.h:242

Beatmup::NNets::ActivationFunctionMixin
A mixin implementing activation functions in GLSL.
Definition: operation.h:414

Beatmup::NNets::ActivationFunctionMixin::activationFunc
const ActivationFunction activationFunc
Definition: operation.h:417

Beatmup::NNets::ActivationFunctionMixin::apply
void apply(StringBuilder &code, const char *inputVariable)
Renders a GLSL code applying activation function to a specific variable and writing the result to gl_...
Definition: operation.cpp:282

Beatmup::NNets::Conv2D::isDepthwise
const bool isDepthwise
if true, the convolution is depthwise, otherwise regular
Definition: conv2d.h:72

Beatmup::NNets::Conv2D::coeffs
std::vector< std::array< float, 4 > > coeffs
model data to pass to uniform variables, if used
Definition: conv2d.h:80

Beatmup::NNets::Conv2D::BIAS_CHUNK_SUFFIX
static const char * BIAS_CHUNK_SUFFIX
suffix added to the op name to get the bias chunk id in the model data
Definition: conv2d.h:98

Beatmup::NNets::Conv2D::setOutput
void setOutput(Storage::View &&storage, int outputIndex=0)
Definition: conv2d.cpp:512

Beatmup::NNets::Conv2D::numGroups
const int numGroups
number of convolution groups
Definition: conv2d.h:68

Beatmup::NNets::Conv2D::programs
std::vector< GL::RenderingProgram * > programs
pointers to GLSL program, one per quad of output channels
Definition: conv2d.h:79

Beatmup::NNets::Conv2D::useBias
const bool useBias
if true, the bias addition is enabled
Definition: conv2d.h:73

Beatmup::NNets::Conv2D::execute
void execute(TaskThread &thread, GraphicPipeline &gpu)
Executes the operation.
Definition: conv2d.cpp:272

Beatmup::NNets::Conv2D::padding
const Size::Padding padding
Definition: conv2d.h:70

Beatmup::NNets::Conv2D::ready
bool ready
Definition: conv2d.h:74

Beatmup::NNets::Conv2D::stride
const int stride
Definition: conv2d.h:69

Beatmup::NNets::Conv2D::getOutputSize
Size getOutputSize(int outputIndex=0) const
Returns full size of a specific operation output.
Definition: conv2d.cpp:397

Beatmup::NNets::Conv2D::inputImage
GL::TextureHandler * inputImage
input texture handler to be used instead input view
Definition: conv2d.h:78

Beatmup::NNets::Conv2D::initDeserializer
static bool initDeserializer()
Sets up deserialization of the operation.

Beatmup::NNets::Conv2D::prepare
void prepare(GraphicPipeline &gpu, ChunkCollection &data, GL::ProgramBank &bank)
Compiles GLSL shaders.
Definition: conv2d.cpp:88

Beatmup::NNets::Conv2D::serialize
std::map< std::string, std::string > serialize() const
Returns a serialized representation of th operation;.
Definition: conv2d.cpp:415

Beatmup::NNets::Conv2D::FILTERS_CHUNK_SUFFIX
static const char * FILTERS_CHUNK_SUFFIX
suffix added to the op name to get the filters chunk id in the model data
Definition: conv2d.h:97

Beatmup::NNets::Conv2D::useInputImage
const bool useInputImage
if true, input is the texture handler, not the view
Definition: conv2d.h:71

Beatmup::NNets::Conv2D::output
Storage::View output
Definition: conv2d.h:76

Beatmup::NNets::Conv2D::execOrder
std::vector< int > execOrder
execution order of GLSL programs
Definition: conv2d.h:81

Beatmup::NNets::Conv2D::getSampledChannels
void getSampledChannels(int index, int &min, int &max) const
Retrieves range of input features channels sampled at the same time for a specific input.
Definition: conv2d.cpp:382

Beatmup::NNets::Conv2D::getInputPadding
int getInputPadding(int index=0) const
Retrieves minimum required size of zero padding for a given input.
Definition: conv2d.cpp:377

Beatmup::NNets::Conv2D::numOutputChannels
const int numOutputChannels
number of output feature maps
Definition: conv2d.h:67

Beatmup::NNets::Conv2D::groupViews
std::vector< Storage::View > groupViews
views per convolution group
Definition: conv2d.h:82

Beatmup::NNets::Conv2D::input
Storage::View input
Definition: conv2d.h:76

Beatmup::NNets::Conv2D::setInput
void setInput(Storage::View &&storage, int inputIndex=0)
Definition: conv2d.cpp:483

Beatmup::NNets::Conv2D::countMultiplyAdds
unsigned long countMultiplyAdds() const
Counts (approximate) number of multiply-adds used by this operation.
Definition: conv2d.cpp:528

Beatmup::NNets::Conv2D::kernelSize
const Size kernelSize
Definition: conv2d.h:66

Beatmup::NNets::Conv2D::Conv2D
Conv2D(const std::string &name, const int kernelSize, const int numInputChannels, const int numOutputChannels, const int stride=1, const Size::Padding padding=Size::Padding::VALID, const bool useBias=true, const int numGroups=1, const ActivationFunction activation=ActivationFunction::DEFAULT)
Instantiates a 2D convolution operation.
Definition: conv2d.cpp:41

Beatmup::NNets::Conv2D::disconnect
void disconnect()
Assigns empty inputs and outputs.
Definition: conv2d.cpp:474

Beatmup::NNets::Conv2D::residualInput
Storage::View residualInput
optional tensor to be added to the output before activation
Definition: conv2d.h:77

Beatmup::NNets::Conv2D::countTexelFetches
unsigned long countTexelFetches() const
Counts (approximate) number of texels fetches.
Definition: conv2d.cpp:533

Beatmup::NNets::Conv2D::getIdx
int getIdx(int output, int input, int x, int y) const
Maps an (inputChannel, outputChannel, x, y) position to a linear coefficient index in the chunkfile.
Definition: conv2d.h:87

Beatmup::NNets::Size
Operation 3D input/output size.
Definition: storage.h:37

Beatmup::NNets::Size::Padding
Padding
Zero padding specification.
Definition: storage.h:45

Beatmup::NNets::Size::Padding::SAME
@ SAME
operation output size matches its input size for unit strides

Beatmup::NNets::Size::Padding::VALID
@ VALID
no zero padding

Beatmup::NNets::Size::volume
int volume() const
Definition: storage.h:79

Beatmup::NNets::Size::EMPTY
static const Size EMPTY
Definition: storage.h:50

Beatmup::NNets::Size::transform
Size transform(Size kernel, Size stride, Padding padding, int depth=0) const
Computes operation output size in function of operation kernel, padding, stride and depth,...
Definition: storage.cpp:58

Beatmup::NNets::Size::getDepth
int getDepth() const
Definition: storage.h:77

Beatmup::NNets::SpatialFilteringMixin
Generates GLSL fragment shader code sampling a local neighborhood around the current texture coordina...
Definition: operation.h:272

Beatmup::NNets::SpatialFilteringMixin::sample
void sample(StringBuilder &code, const char *inputName, const int inputIndex, const Point &shift, const bool isFirstSample=true, const char *suffix="")
Samples a neighborhood of a given texture.
Definition: operation.cpp:150

Beatmup::NNets::SpatialFilteringMixin::sampleInline
void sampleInline(StringBuilder &code, const char *inputName, const int inputIndex, const IntPoint &position, const Point &shift, const char *suffix="")
Definition: operation.cpp:174

Beatmup::NNets::SpatialFilteringMixin::isUniformShiftUsed
bool isUniformShiftUsed() const
Definition: operation.h:394

Beatmup::NNets::SpatialFilteringMixin::setup
void setup(const int width, const int height)
Prepares the spatial filtering operation execution.
Definition: operation.cpp:197

Beatmup::NNets::SpatialFilteringMixin::SAMPLE_ID_PREFIX
static const char * SAMPLE_ID_PREFIX
prefix of variables declaring a neighbor sample
Definition: operation.h:285

Beatmup::NNets::SpatialFilteringMixin::shift
Point shift
current static shift of the sampling position
Definition: operation.h:275

Beatmup::NNets::SpatialFilteringMixin::writeHeader
void writeHeader(StringBuilder &code, bool useUniformShift)
Writes out the very GLSL fragment shader header required for spatial neighborhood sampling.
Definition: operation.cpp:110

Beatmup::NNets::SpatialFilteringMixin::getInputSamplingPos
std::string getInputSamplingPos() const
Retrieves input sampling point position for the current fragment.
Definition: operation.cpp:276

Beatmup::NNets::SpatialFilteringMixin::declare
void declare(StringBuilder &code, const char *datatype, bool inlineSampling=false)
Declares GLSL fragment shader main(..) code part required for spatial neighborhood sampling.
Definition: operation.cpp:119

Beatmup::NNets::SpatialFilteringMixin::setupProgram
void setupProgram(GL::Program &program)
Prepares a given program for spatial filtering.
Definition: operation.cpp:217

Beatmup::NNets::SpatialFilteringMixin::getSamplingArea
IntRectangle getSamplingArea(const IntPoint &size, const IntPoint &stride, const Size::Padding padding) const
Implements common padding policies by computing a rectangular area of positions the sampling kernel t...
Definition: operation.cpp:223

Beatmup::NNets::SpatialFilteringMixin::useUniformShift
bool useUniformShift
if true, the sampling position can be shifted dynamically at every run
Definition: operation.h:277

Beatmup::NNets::SpatialFilteringMixin::setUniformShift
void setUniformShift(GL::Program &program, const IntPoint &shift, const IntPoint &inputSize)
Applies an offset to the sampling position at runtime.
Definition: operation.cpp:209

Beatmup::NNets::Storage::Binder
Binding of different input/output storages/texture handlers to a GLSL program.
Definition: storage.h:419

Beatmup::NNets::Storage::View
Maps a 3D tensor onto a storage.
Definition: storage.h:308

Beatmup::NNets::Storage::View::getTextureHeight
int getTextureHeight() const
Returns height in pixels of all the textures.
Definition: storage.h:375

Beatmup::NNets::Storage::View::getChannelOrigin
IntPoint getChannelOrigin(int channel) const
Returns origin in pixels of a given channel within the texture containing it.
Definition: storage.cpp:509

Beatmup::NNets::Storage::View::getTextureSize
IntPoint getTextureSize() const
Definition: storage.h:377

Beatmup::NNets::Storage::View::getSpatialSize
IntPoint getSpatialSize() const
Returns the spatial size (width and height) of the storage in pixels.
Definition: storage.h:389

Beatmup::NNets::Storage::View::getSize
Size getSize() const
Definition: storage.h:384

Beatmup::NNets::Storage::View::getTextureWidth
int getTextureWidth() const
Returns width in pixels of all the textures.
Definition: storage.h:370

Beatmup::NNets::Storage::View
friend class View
Definition: storage.h:135

Beatmup::NNets::Storage::checkChannelNumber
static void checkChannelNumber(int channel)
Checks whether a channel number points to the first channel in a texture.
Definition: storage.h:290

Beatmup::OutOfRange::checkMin
static void checkMin(const datatype value, const datatype min, const char *message)
Definition: exception.h:92

Beatmup::OutOfRange::check
static void check(const datatype value, const datatype min, const datatype max, const char *message)
Definition: exception.h:86

Beatmup::RuntimeError
Definition: exception.h:61

Beatmup::RuntimeError::check
static void check(const bool condition, const std::string &message)
Definition: exception.h:64

Beatmup::String
StringBuilder including a string container.
Definition: string_builder.h:83

Beatmup::TaskThread
Thread executing tasks.
Definition: parallelism.h:154

COEF_FMT
#define COEF_FMT
Definition: conv2d.cpp:29

UNIFORM_COEFFICIENT
static const char * UNIFORM_COEFFICIENT
Definition: conv2d.cpp:38

COORD_FMT
#define COORD_FMT
Definition: conv2d.cpp:30

UNIFORM_RESIDUAL_INPUT
static const char * UNIFORM_RESIDUAL_INPUT
Definition: conv2d.cpp:37

UNIFORM_INPUT
static const char * UNIFORM_INPUT
Definition: conv2d.cpp:36

conv2d.h

deserialized_model.h

Beatmup::NNets::activationFunctionFromString
ActivationFunction activationFunctionFromString(const std::string &str)
Returns a zero padding value from a string.
Definition: operation.cpp:329

Beatmup::NNets::paddingFromString
Size::Padding paddingFromString(const std::string &str)
Returns a zero padding value from a string.
Definition: storage.cpp:703

Beatmup::NNets::ActivationFunction
ActivationFunction
Activation function specification.
Definition: operation.h:401

Beatmup::NNets::ActivationFunction::DEFAULT
@ DEFAULT
default activation: 0..1 bounded ReLU (identity clipped to 0..1 range)

Beatmup
Definition: basic_types.h:22

Beatmup::Point
CustomPoint< float > Point
Definition: geometry.h:626

Beatmup::IntPoint
CustomPoint< int > IntPoint
Definition: geometry.h:629

std::to_string
std::string to_string(Beatmup::NNets::ActivationFunction function)

std::min
CustomPoint< numeric > min(const CustomPoint< numeric > &a, const CustomPoint< numeric > &b)
Definition: geometry.h:724

std::max
CustomPoint< numeric > max(const CustomPoint< numeric > &a, const CustomPoint< numeric > &b)
Definition: geometry.h:728

scale
JNIEnv jlong jint jint jint jint jfloat scale
Definition: wrapper_audio.cpp:142

name
return(jlong) new Beatmup jlong jstring name
Definition: wrapper_core.cpp:331

b
jobject jlong jint jint jint jint jint b
Definition: wrapper_core.cpp:253

index
jlong jint index
Definition: wrapper_core.cpp:434

y
jobject jlong jint jint y
Definition: wrapper_core.cpp:253

w
jlong jstring jint jint jint jint w
Definition: wrapper_core.cpp:355

count
JNIEnv jlong jint jint count
Definition: wrapper_core.cpp:288

result
Beatmup::IntPoint result
Definition: wrapper_core.cpp:256

x
jobject jlong jint x
Definition: wrapper_core.cpp:253

bind
bitmap bind(jenv, jobj)

getMapping
layer getMapping().position.x
Definition: wrapper_imag.cpp:300