19 #include "../exception.h"
25 using namespace NNets;
29 #define COEF_FMT "%0.6f"
30 #define COORD_FMT "%0.10f"
42 const std::string&
name,
44 const int numInputChannels,
45 const int numOutputChannels,
53 kernelSize(kernelSize, kernelSize, numInputChannels / numGroups), numOutputChannels(numOutputChannels), numGroups(numGroups),
54 stride(stride), padding(padding),
55 useInputImage(numInputChannels == 3),
56 isDepthwise(numInputChannels == numGroups && numOutputChannels == numGroups),
71 InvalidArgument::check(this->kernelSize.getDepth() % 4 == 0,
"A multiple of 4 is expected as number of input channels in the convolution kernel.");
73 OutOfRange::checkMin(this->kernelSize.getDepth(), 4,
"Kernels having less than 4 input channels are not supported in grouped convolutions. Got %d channels.");
97 const Chunk* biases =
nullptr;
111 static const int MAX_ALLOWED_NUMBER_OF_PROGRAMS = 0;
116 numberOfPrograms > MAX_ALLOWED_NUMBER_OF_PROGRAMS &&
119 coeffs.reserve(numberOfPrograms * uniformsLength);
125 for (
int outputChannel = 0; outputChannel <
numOutputChannels; outputChannel += 4) {
126 const size_t coefStart =
coeffs.size();
139 "Cannot compute Conv2D operation " +
getName() +
" on the current GPU: too many textures per group");
149 code.line(
"void main() {");
150 code.line(
"highp vec4 sum;");
156 for (
int inputChannel = firstInputChannel; inputChannel < lastInputChannel; inputChannel += 4) {
157 const int channelInGroup = inputChannel - firstInputChannel;
168 if (
x > 0 ||
y > 0) code(
" + ");
169 const float*
w = kernel.
ptr<
float>(
getIdx(outputChannel, 0,
x,
y));
172 coeffs.emplace_back(std::array<float, 4>{
w[0],
w[1],
w[2],
w[3] });
187 code((channelInGroup == 0 &&
x == 0 &&
y == 0) ?
"sum = vec4(" :
"sum += vec4(");
188 for (
int c = 0; c < 4; ++c) {
189 if (c > 0) code(
",");
190 const float*
w = kernel.
ptr<
float>(
getIdx(c + outputChannel, channelInGroup,
x,
y));
200 code.printf(
"sum %s", channelInGroup == 0 ?
"=" :
"+=");
204 if (
x > 0 ||
y > 0) code(
" + ");
206 code.printf(
" * mat4(");
207 for (
int c = 0; c < 4; ++c) {
208 if (c > 0) code(
",");
209 const float*
w = kernel.
ptr<
float>(
getIdx(c + outputChannel, channelInGroup,
x,
y));
212 coeffs.emplace_back(std::array<float, 4>{
w[0],
w[offset[1]],
w[offset[2]],
w[offset[3]] });
233 (mainArea / mainTexSize).
getMapping(resArea / resTexSize,
scale, offset);
241 const float*
b = biases->
ptr<
float>(outputChannel);
244 coeffs.emplace_back(std::array<float, 4>{
b[0],
b[1],
b[2],
b[3] });
255 programs.push_back(bank(gpu, code));
260 for (
size_t i = 0; i <
execOrder.size(); ++i)
263 return programs[i] < programs[j] || (programs[i] == programs[j] &&
264 output.getChannelTextureNumber(4 * i) < output.getChannelTextureNumber(4 * j));
279 throw RuntimeError(
"Residual input size does not match the output size");
304 const int coeffsPerProgram = (int)(
coeffs.size() /
programs.size());
305 const bool uniformsAreUsed = coeffsPerProgram > 0;
309 for (
size_t i = 0; i <
execOrder.size(); ++i) {
311 const int outputChannel = 4 * programNum;
316 const int channel = outputChannel;
319 const bool fast =
bind.begin(program,
output, outputChannel);
340 const bool fast =
bind.begin(program,
output, outputChannel) && isSameGroup;
388 else if (
index == 1) {
398 if (outputIndex == 0) {
400 "Input is not provided to Conv2D operation " +
getName());
418 {
"_type",
"conv2d" },
424 {
"use_bias",
useBias ?
"true" :
"false" },
434 Conv2DDeserializer() : Deserializer(
"conv2d") {}
459 block.
get<
int>(
"kernel_size"),
460 block.
get<
int>(
"input_channels"),
461 block.
get<
int>(
"output_channels"),
462 block.
get<
int>(
"stride", 1),
464 block.
get<
bool>(
"use_bias",
true),
465 block.
get<
int>(
"groups", 1),
486 if (inputIndex == 0) {
493 for (
int groupIdx = 0; groupIdx <
numGroups; ++groupIdx) {
496 groupViews.emplace_back(std::move(view), firstInputChannel, lastInputChannel - firstInputChannel);
499 this->
input = std::move(view);
514 this->
output = std::move(storage);
519 if (inputIndex == 0) {
A key-value pair set storing pieces of arbitrary data (chunks) under string keys.
Simply a piece of binary data of a specific size.
datatype * ptr(size_t offset=0)
Basic class: task and memory management, any kind of static data.
static const CustomPoint ZERO
void setVec4Array(const std::string &name, const float *xyzw, const int length)
Stores linked GLSL programs and their associated fragment shader codes.
void release(GraphicPipeline &gpu, GL::RenderingProgram *program)
Marks a program as unused any more.
GLSL program to render images Makes use of default vertex attributes to pass the texture coordinates ...
void blend(bool onScreen)
static const char * DECLARE_TEXTURE_COORDINATES_IN_FRAG
Declaring texture coordinates in fragment shader.
virtual const int getHeight() const =0
Height of the texture in pixels.
virtual const int getWidth() const =0
Width of the texture in pixels.
Internal low-level GPU control API.
int getLimit(Limit limit) const
void setTextureCoordinates(const Rectangle &coords)
Specifies texture coordinates for the next rendering pass.
@ TEXTURE_IMAGE_UNITS
maximum number of texture units per fragment shader
@ FRAGMENT_UNIFORM_VECTORS
maximum number of 4-dimensional uniform vectors per fragment shader
static void check(const bool condition, const std::string &message)
T get(const std::string &key) const
Returns a value by key casted to a given type.
Enables construction of an operation from its serialized representation.
Abstract neural net operation (layer).
virtual void setInput(Storage::View &&storage, int index=0)
std::string getName() const
A mixin implementing activation functions in GLSL.
const ActivationFunction activationFunc
void apply(StringBuilder &code, const char *inputVariable)
Renders a GLSL code applying activation function to a specific variable and writing the result to gl_...
const bool isDepthwise
if true, the convolution is depthwise, otherwise regular
std::vector< std::array< float, 4 > > coeffs
model data to pass to uniform variables, if used
static const char * BIAS_CHUNK_SUFFIX
suffix added to the op name to get the bias chunk id in the model data
void setOutput(Storage::View &&storage, int outputIndex=0)
const int numGroups
number of convolution groups
std::vector< GL::RenderingProgram * > programs
pointers to GLSL program, one per quad of output channels
const bool useBias
if true, the bias addition is enabled
void execute(TaskThread &thread, GraphicPipeline &gpu)
Executes the operation.
const Size::Padding padding
Size getOutputSize(int outputIndex=0) const
Returns full size of a specific operation output.
GL::TextureHandler * inputImage
input texture handler to be used instead input view
static bool initDeserializer()
Sets up deserialization of the operation.
void prepare(GraphicPipeline &gpu, ChunkCollection &data, GL::ProgramBank &bank)
Compiles GLSL shaders.
std::map< std::string, std::string > serialize() const
Returns a serialized representation of th operation;.
static const char * FILTERS_CHUNK_SUFFIX
suffix added to the op name to get the filters chunk id in the model data
const bool useInputImage
if true, input is the texture handler, not the view
std::vector< int > execOrder
execution order of GLSL programs
void getSampledChannels(int index, int &min, int &max) const
Retrieves range of input features channels sampled at the same time for a specific input.
int getInputPadding(int index=0) const
Retrieves minimum required size of zero padding for a given input.
const int numOutputChannels
number of output feature maps
std::vector< Storage::View > groupViews
views per convolution group
void setInput(Storage::View &&storage, int inputIndex=0)
unsigned long countMultiplyAdds() const
Counts (approximate) number of multiply-adds used by this operation.
Conv2D(const std::string &name, const int kernelSize, const int numInputChannels, const int numOutputChannels, const int stride=1, const Size::Padding padding=Size::Padding::VALID, const bool useBias=true, const int numGroups=1, const ActivationFunction activation=ActivationFunction::DEFAULT)
Instantiates a 2D convolution operation.
void disconnect()
Assigns empty inputs and outputs.
Storage::View residualInput
optional tensor to be added to the output before activation
unsigned long countTexelFetches() const
Counts (approximate) number of texels fetches.
int getIdx(int output, int input, int x, int y) const
Maps an (inputChannel, outputChannel, x, y) position to a linear coefficient index in the chunkfile.
Operation 3D input/output size.
Padding
Zero padding specification.
@ SAME
operation output size matches its input size for unit strides
Size transform(Size kernel, Size stride, Padding padding, int depth=0) const
Computes operation output size in function of operation kernel, padding, stride and depth,...
Generates GLSL fragment shader code sampling a local neighborhood around the current texture coordina...
void sample(StringBuilder &code, const char *inputName, const int inputIndex, const Point &shift, const bool isFirstSample=true, const char *suffix="")
Samples a neighborhood of a given texture.
void sampleInline(StringBuilder &code, const char *inputName, const int inputIndex, const IntPoint &position, const Point &shift, const char *suffix="")
bool isUniformShiftUsed() const
void setup(const int width, const int height)
Prepares the spatial filtering operation execution.
static const char * SAMPLE_ID_PREFIX
prefix of variables declaring a neighbor sample
Point shift
current static shift of the sampling position
void writeHeader(StringBuilder &code, bool useUniformShift)
Writes out the very GLSL fragment shader header required for spatial neighborhood sampling.
std::string getInputSamplingPos() const
Retrieves input sampling point position for the current fragment.
void declare(StringBuilder &code, const char *datatype, bool inlineSampling=false)
Declares GLSL fragment shader main(..) code part required for spatial neighborhood sampling.
void setupProgram(GL::Program &program)
Prepares a given program for spatial filtering.
IntRectangle getSamplingArea(const IntPoint &size, const IntPoint &stride, const Size::Padding padding) const
Implements common padding policies by computing a rectangular area of positions the sampling kernel t...
bool useUniformShift
if true, the sampling position can be shifted dynamically at every run
void setUniformShift(GL::Program &program, const IntPoint &shift, const IntPoint &inputSize)
Applies an offset to the sampling position at runtime.
Binding of different input/output storages/texture handlers to a GLSL program.
Maps a 3D tensor onto a storage.
int getTextureHeight() const
Returns height in pixels of all the textures.
IntPoint getChannelOrigin(int channel) const
Returns origin in pixels of a given channel within the texture containing it.
IntPoint getTextureSize() const
IntPoint getSpatialSize() const
Returns the spatial size (width and height) of the storage in pixels.
int getTextureWidth() const
Returns width in pixels of all the textures.
static void checkChannelNumber(int channel)
Checks whether a channel number points to the first channel in a texture.
static void checkMin(const datatype value, const datatype min, const char *message)
static void check(const datatype value, const datatype min, const datatype max, const char *message)
static void check(const bool condition, const std::string &message)
StringBuilder including a string container.
static const char * UNIFORM_COEFFICIENT
static const char * UNIFORM_RESIDUAL_INPUT
static const char * UNIFORM_INPUT
ActivationFunction activationFunctionFromString(const std::string &str)
Returns a zero padding value from a string.
Size::Padding paddingFromString(const std::string &str)
Returns a zero padding value from a string.
ActivationFunction
Activation function specification.
@ DEFAULT
default activation: 0..1 bounded ReLU (identity clipped to 0..1 range)
CustomPoint< float > Point
CustomPoint< int > IntPoint
std::string to_string(Beatmup::NNets::ActivationFunction function)
CustomPoint< numeric > min(const CustomPoint< numeric > &a, const CustomPoint< numeric > &b)
CustomPoint< numeric > max(const CustomPoint< numeric > &a, const CustomPoint< numeric > &b)
JNIEnv jlong jint jint jint jint jfloat scale
return(jlong) new Beatmup jlong jstring name
jobject jlong jint jint jint jint jint b
jobject jlong jint jint y
jlong jstring jint jint jint jint w
JNIEnv jlong jint jint count
layer getMapping().position.x