20 #include "../utils/bitset.h"
24 using namespace NNets;
29 profiler(nullptr), ready(false),
30 ops(ops.begin(), ops.
end())
33 for (
size_t i = 1; i < this->ops.size(); ++i)
50 throw RuntimeError(
"Cannot add operation " + newOp->
getName() +
" to the model: already added");
53 throw RuntimeError(
"Cannot add operation " + newOp->
getName() +
" to the model: an operation with the same exists in the model");
62 void Model::append(std::initializer_list<AbstractOperation*> newOps,
bool connect) {
63 for (
auto op : newOps)
72 ops.insert(it, newOp);
77 auto it = std::find(
ops.begin(),
ops.end(), &
op);
80 ops.insert(it, newOp);
84 void Model::addConnection(
const std::string& sourceOpName,
const std::string& destOpName,
int output,
int input,
int shuffle) {
104 for (
auto i = outputs.first; i != outputs.second; ++i)
105 if (i->second.index == output)
115 auto outputs =
userOutputs.equal_range(&operation);
116 for (
auto i = outputs.first; i != outputs.second; ++i)
117 if (i->second.index == output)
126 return getOutputData(numSamples, *(*
this)[operation], output);
131 auto outputs =
userOutputs.equal_range(&operation);
132 for (
auto i = outputs.first; i != outputs.second; ++i)
133 if (i->second.index == output) {
134 numSamples = i->second.data.size();
135 return i->second.data.data();
148 std::map<Storage*, std::vector<AbstractOperation*>> refs;
155 std::map<AbstractOperation*, int> sampledChannels;
157 auto*
op = conn.second.dest;
160 op->getSampledChannels(conn.second.input,
min,
max);
164 sampledChannels[
op] +=
max;
171 std::vector<Beatmup::Object*> outputs(
src->getOutputCount(),
nullptr);
172 std::vector<int> paddings(
src->getOutputCount(), 0);
173 Bitset connectedOutputs(
src->getOutputCount(),
false);
178 const auto& conn = i->second;
179 paddings[conn.output] =
std::max(paddings[conn.output], conn.dest->getInputPadding(conn.input));
184 const auto& conn = i->second;
185 auto* dst = conn.dest;
186 connectedOutputs.
set(conn.output);
188 if (outputs[conn.output])
189 RuntimeError::check(
src->acceptsStorageOutput(conn.output) ^
src->acceptsVectorOutput(conn.output) ^
src->acceptsTextureOutput(conn.output),
190 "Operation output accepting different types can only have a single connection");
194 if (
src->acceptsStorageOutput(conn.output) && dst->acceptsStorageInput(conn.input)) {
199 if (outputs[conn.output]) {
200 storage =
static_cast<Storage*
>(outputs[conn.output]);
201 refs[storage].push_back(dst);
206 int depthCapping = 0;
207 if (sampledChannels[dst] > sampledChannelsLimit) {
210 dst->getSampledChannels(conn.input,
min,
max);
211 const int cappingMargin =
std::min(sampledChannelsLimit,
size[2]) -
min;
212 if (cappingMargin > 0) {
213 depthCapping =
std::min(cappingMargin, sampledChannels[dst] - sampledChannelsLimit);
215 sampledChannels[dst] -= depthCapping;
220 for (
auto& i : refs) {
221 auto candidate = i.first;
222 auto& users = i.second;
223 const int reservedDepth = sampledChannelsLimit - 4 * candidate->getNumberOfTextures();
225 if (candidate->getSize() ==
size && candidate->getPadding() >= dst->getInputPadding(conn.input) && (reservedDepth == depthCapping || depthCapping == 0)
230 users.push_back(dst);
239 storage = (
size[0] == 1 &&
size[1] == 1) ?
244 src->usesGpu(), !
src->usesGpu(),
245 paddings[conn.output],
248 refs.emplace(storage, std::vector<AbstractOperation*>{ dst });
252 outputs[conn.output] = storage;
256 src->setOutput(*storage, conn.output);
257 if (conn.shuffle > 0)
258 dst->setInput(
Storage::View(*storage, conn.shuffle), conn.input);
260 dst->setInput(*storage, conn.input);
264 else if (
src->acceptsVectorOutput(conn.output) && dst->acceptsVectorInput(conn.input)) {
269 if (outputs[conn.output])
270 vector =
static_cast<GL::Vector*
>(outputs[conn.output]);
273 outputs[conn.output] = vector;
277 src->setOutput(*vector, conn.output);
278 dst->setInput(*vector, conn.input);
282 else if (
src->acceptsTextureOutput(conn.output) && dst->acceptsTextureInput(conn.input)) {
287 if (outputs[conn.output])
290 outputs[conn.output] = texture = &
allocateTexture(gpu,
src->getOutputSize(conn.output));
293 src->setOutput(*texture, conn.output);
294 dst->setInput(*texture, conn.input);
299 "to " + dst->getName() +
" (input #" +
std::to_string(conn.input) +
"): storage type mismatch");
305 int idx = i->second.index;
306 if (idx >=
src->getOutputCount())
308 if (!connectedOutputs[idx])
309 if (
src->acceptsStorageOutput(idx)) {
312 else if (
src->acceptsVectorOutput(idx)) {
318 src->prepare(gpu, data, *
this);
321 for (
auto& i : refs) {
322 auto& users = i.second;
323 for (
auto op = users.begin();
op != users.end(); )
347 for (
auto op :
ops) {
353 (*profiler)(
op->getName());
358 op->execute(thread, *gpu);
361 }
catch (
const std::exception& ex) {
368 int idx = it->second.index;
369 auto& data = it->second.data;
371 if (
op->acceptsStorageOutput(idx)) {
373 auto view =
op->getOutput(idx);
375 view.getStorage().pull(*gpu);
380 data.resize(view.getSize().volume());
381 for (
auto it = data.begin(); it != data.end(); it += view.getDepth()) {
382 scan.
fill(it, data.end());
386 else if (
op->acceptsVectorOutput(idx)) {
388 op->getOutput(vector, idx);
389 vector->
fetch(*gpu, data);
409 if (
op == &operation)
449 #ifdef BEATMUP_OPENGLVERSION_GLES20
462 switch (
size.getDepth()) {
481 for (
size_t firstIdx = 0; firstIdx <
ops.size(); ++firstIdx)
482 if (
ops[firstIdx] == &first) {
483 for (
size_t secondIdx = firstIdx + 1; secondIdx <
ops.size(); ++secondIdx)
484 if (
ops[secondIdx] == &second)
494 if (
op->getName() == operationName)
502 if (
op->getName() == operationName)
527 size += entry->getMemorySize();
529 size += entry->getMemorySize();
531 size += entry->getMemorySize();
553 for (
const auto&
op :
ops)
558 const auto& info = conn.second;
560 block.
set(
"from", conn.first->getName());
561 block.
set(
"to", info.dest->getName());
563 block.
set(
"from_output", info.output);
565 block.
set(
"to_input", info.input);
566 if (info.shuffle > 0)
567 block.
set(
"shuffle", info.shuffle);
568 listing.
emplace(
"connections", std::move(block));
573 listing.
emplace(
"connections", {});
581 std::stringstream strstr;
588 Exception(
"Error in %s: %s",
op.getName().c_str(), ex.what())
void set(size_t i, bool value=true)
A key-value pair set storing pieces of arbitrary data (chunks) under string keys.
virtual void close()=0
Closes the collection after a reading session.
virtual void open()=0
Opens the collection to read chunks from it.
Basic class: task and memory management, any kind of static data.
Base class for all exceptions.
Real-valued vector usable by GPU.
void fetch(GraphicPipeline &gpu, std::vector< float > &output) const
Grabs vector values back from GPU to user memory.
Format
Vector data format.
@ FIXED16
16 bit per element
@ FLOAT
32 bit per element, floating point
Internal low-level GPU control API.
void switchMode(Mode mode)
Switches GPU mode.
int getLimit(Limit limit) const
@ INFERENCE
Textures are feature maps computed in fragment shaders.
@ TEXTURE_IMAGE_UNITS
maximum number of texture units per fragment shader
void flush()
Waits until all operations submitted to GPU are finished.
Bitmap whose memory is managed by the Beatmup engine.
void set(const std::string &key, T value)
Sets a value for a specific key.
Parser of simple YAML-like listings.
void printOut(std::ostream &stream)
Prints out the listing to an output stream.
void emplace(const std::string &key, Block &&block)
Adds a block to a chapter.
Abstract neural net operation (layer).
virtual int getOutputCount() const
Returns number of operation outputs.
virtual int getInputCount() const
Returns number of operation inputs.
std::string getName() const
Wrapper for exceptions occuring during the model inference.
InferenceTimeError(const AbstractOperation &op, const std::exception &ex)
size_t getMemorySize() const
Returns the amount of texture memory in bytes currently allocated by the model to run the inference.
Storage & allocateFlatStorage(GraphicPipeline &gpu, const int size)
Allocates a new flat storage.
bool ready
if true, ops are connected to each other and storages are allocated
std::vector< AbstractOperation * > ops
model operations
void freeMemory()
Frees all allocated storages.
std::multimap< const AbstractOperation *, UserOutput > userOutputs
operation => user output mapping
OperationClass & getOperation(const std::string &operationName)
Retrieves an operation by its name.
unsigned long countTexelFetches() const
Provides an estimation of the total number of texels fetched by all the operations in the model per i...
Storage & allocateStorage(GraphicPipeline &gpu, const Size size, bool forGpu=true, bool forCpu=false, const int pad=0, const int reservedChannels=0)
Allocates a new storage.
ProgressTracking inferenceProgress
inference progress
Profiler * profiler
pointer to a Profiler attached to the model
void addConnection(AbstractOperation &source, AbstractOperation &dest, int output=0, int input=0, int shuffle=0)
Listing serialize() const
Returns serialized representation of the model as a Listing.
ProgressTracking preparingProgress
model preparation progress
void addOutput(const std::string &operation, int output=0)
Enables reading output data from the model memory through getOutputData().
std::vector< Storage * > storages
allocated storages used during the inference
bool isOperationInModel(const AbstractOperation &operation) const
Checks if a specific operation makes part of the model.
unsigned long countMultiplyAdds() const
Provides an estimation of the number of multiply-adds characterizing the model complexity.
GL::Vector & allocateVector(GraphicPipeline &gpu, const int size)
Allocates a vector that can be used as operation input or output.
Model(Context &context, std::initializer_list< AbstractOperation * > ops)
Instantiates a model from a list of operations interconnecting them in a feedforward fashion.
std::multimap< const AbstractOperation *, Connection > connections
source operation => connection descriptor mapping
virtual void prepare(GraphicPipeline &gpu, ChunkCollection &data)
Prepares all operations: reads the model data from chunks and builds GPU programs.
std::string serializeToString() const
Returns serialized representation of the model as a string.
AbstractOperation * operator[](const std::string &operationName)
void execute(TaskThread &thread, GraphicPipeline *gpu)
Runs the inference.
std::vector< InternalBitmap * > textures
allocated images used during the inference
void addOperation(const std::string &opName, AbstractOperation *newOp)
Adds a new operation to the model before another operation in the execution order.
bool isPreceding(const AbstractOperation &first, const AbstractOperation &second) const
Checks whether an operation goes before another operation in the model according the ops execution or...
std::vector< GL::Vector * > vectors
allocated vectors used during the inference
InternalBitmap & allocateTexture(GraphicPipeline &gpu, const Size size)
Allocates a texture that can be used as operation input or output.
void append(AbstractOperation *newOp, bool connect=false)
Adds a new operation to the model.
const float * getOutputData(size_t &numSamples, const std::string &operation, int output=0) const
Reads data from the model memory.
Operation 3D input/output size.
Scans a storageview in RAM for further computations on CPU.
void move(int x, int y)
Sets the pointer to a specific spatial position.
void fill(T begin, T limit)
Extracts the content of feature maps at the current position.
Maps a 3D tensor onto a storage.
3D tensor stored in a set of textures.
void allocate(GraphicPipeline &gpu)
Allocates the storage in GPU memory.
void reset(unsigned int max)
Resets the progress to zero.
static void check(const bool condition, const std::string &message)
virtual bool isTaskAborted() const =0
Returns true if the task is asked to stop from outside.
@ SingleByte
single channel of 8 bits per pixel (like grayscale), unsigned integer values
@ QuadByte
4 channels of 8 bits per pixel (like RGBA), unsigned integer values
@ TripleByte
3 channels of 8 bits per pixel (like RGB), unsigned integer values
std::string to_string(Beatmup::NNets::ActivationFunction function)
CustomPoint< numeric > min(const CustomPoint< numeric > &a, const CustomPoint< numeric > &b)
CustomPoint< numeric > max(const CustomPoint< numeric > &a, const CustomPoint< numeric > &b)
A user-defined output descriptor.
JNIEnv jobject jint format
jlong jint jint jint jint pixelFormat
return(jlong) new Beatmup jlong jstring src
JNIEnv jlong jobject jstring opName