20 #include "../utils/bitset.h" 
   24 using namespace NNets;
 
   29     profiler(nullptr), ready(false),
 
   30     ops(ops.begin(), ops.
end())
 
   33     for (
size_t i = 1; i < this->ops.size(); ++i)
 
   50             throw RuntimeError(
"Cannot add operation " + newOp->
getName() + 
" to the model: already added");
 
   53                 throw RuntimeError(
"Cannot add operation " + newOp->
getName() + 
" to the model: an operation with the same exists in the model");
 
   62 void Model::append(std::initializer_list<AbstractOperation*> newOps, 
bool connect) { 
 
   63     for (
auto op : newOps)
 
   72     ops.insert(it, newOp);
 
   77     auto it = std::find(
ops.begin(), 
ops.end(), &
op);
 
   80     ops.insert(it, newOp);
 
   84 void Model::addConnection(
const std::string& sourceOpName, 
const std::string& destOpName, 
int output, 
int input, 
int shuffle) {
 
  104     for (
auto i = outputs.first; i != outputs.second; ++i)
 
  105         if (i->second.index == output)
 
  115     auto outputs = 
userOutputs.equal_range(&operation);
 
  116     for (
auto i = outputs.first; i != outputs.second; ++i)
 
  117         if (i->second.index == output)
 
  126     return getOutputData(numSamples, *(*
this)[operation], output);
 
  131     auto outputs = 
userOutputs.equal_range(&operation);
 
  132     for (
auto i = outputs.first; i != outputs.second; ++i)
 
  133         if (i->second.index == output) {
 
  134             numSamples = i->second.data.size();
 
  135             return i->second.data.data();
 
  148     std::map<Storage*, std::vector<AbstractOperation*>> refs;
 
  155     std::map<AbstractOperation*, int> sampledChannels;   
 
  157         auto* 
op = conn.second.dest;
 
  160         op->getSampledChannels(conn.second.input, 
min, 
max);
 
  164         sampledChannels[
op] += 
max;
 
  171         std::vector<Beatmup::Object*> outputs(
src->getOutputCount(), 
nullptr);  
 
  172         std::vector<int> paddings(
src->getOutputCount(), 0);    
 
  173         Bitset connectedOutputs(
src->getOutputCount(), 
false);
 
  178             const auto& conn = i->second;
 
  179             paddings[conn.output] = 
std::max(paddings[conn.output], conn.dest->getInputPadding(conn.input));
 
  184             const auto& conn = i->second;
 
  185             auto* dst = conn.dest;
 
  186             connectedOutputs.
set(conn.output);
 
  188             if (outputs[conn.output])
 
  189                 RuntimeError::check(
src->acceptsStorageOutput(conn.output) ^ 
src->acceptsVectorOutput(conn.output) ^ 
src->acceptsTextureOutput(conn.output),
 
  190                     "Operation output accepting different types can only have a single connection");
 
  194             if (
src->acceptsStorageOutput(conn.output) && dst->acceptsStorageInput(conn.input)) {
 
  199                 if (outputs[conn.output]) {
 
  200                     storage = 
static_cast<Storage*
>(outputs[conn.output]);
 
  201                     refs[storage].push_back(dst);
 
  206                     int depthCapping = 0;
 
  207                     if (sampledChannels[dst] > sampledChannelsLimit) {
 
  210                         dst->getSampledChannels(conn.input, 
min, 
max);
 
  211                         const int cappingMargin = 
std::min(sampledChannelsLimit, 
size[2]) - 
min;    
 
  212                         if (cappingMargin > 0) {
 
  213                             depthCapping = 
std::min(cappingMargin, sampledChannels[dst] - sampledChannelsLimit);
 
  215                             sampledChannels[dst] -= depthCapping;
 
  220                     for (
auto& i : refs) {
 
  221                         auto candidate = i.first;
 
  222                         auto& users = i.second;
 
  223                         const int reservedDepth = sampledChannelsLimit - 4 * candidate->getNumberOfTextures();
 
  225                         if (candidate->getSize() == 
size && candidate->getPadding() >= dst->getInputPadding(conn.input) && (reservedDepth == depthCapping || depthCapping == 0)
 
  230                             users.push_back(dst);
 
  239                         storage = (
size[0] == 1 && 
size[1] == 1) ?
 
  244                                 src->usesGpu(), !
src->usesGpu(),
 
  245                                 paddings[conn.output],
 
  248                         refs.emplace(storage, std::vector<AbstractOperation*>{ dst });
 
  252                     outputs[conn.output] = storage;
 
  256                 src->setOutput(*storage, conn.output);
 
  257                 if (conn.shuffle > 0)
 
  258                     dst->setInput(
Storage::View(*storage, conn.shuffle), conn.input);
 
  260                     dst->setInput(*storage, conn.input);
 
  264             else if (
src->acceptsVectorOutput(conn.output) && dst->acceptsVectorInput(conn.input)) {
 
  269                 if (outputs[conn.output])
 
  270                     vector = 
static_cast<GL::Vector*
>(outputs[conn.output]);
 
  273                     outputs[conn.output] = vector;
 
  277                 src->setOutput(*vector, conn.output);
 
  278                 dst->setInput(*vector, conn.input);
 
  282             else if (
src->acceptsTextureOutput(conn.output) && dst->acceptsTextureInput(conn.input)) {
 
  287                 if (outputs[conn.output])
 
  290                     outputs[conn.output] = texture = &
allocateTexture(gpu, 
src->getOutputSize(conn.output));
 
  293                 src->setOutput(*texture, conn.output);
 
  294                 dst->setInput(*texture, conn.input);
 
  299                     "to " + dst->getName() + 
" (input #" + 
std::to_string(conn.input) + 
"): storage type mismatch");
 
  305             int idx = i->second.index;
 
  306             if (idx >= 
src->getOutputCount())
 
  308             if (!connectedOutputs[idx])
 
  309                 if (
src->acceptsStorageOutput(idx)) {
 
  312                 else if (
src->acceptsVectorOutput(idx)) {
 
  318         src->prepare(gpu, data, *
this);
 
  321         for (
auto& i : refs) {
 
  322             auto& users = i.second;
 
  323             for (
auto op = users.begin(); 
op != users.end(); )
 
  347     for (
auto op : 
ops) {
 
  353             (*profiler)(
op->getName());
 
  358                 op->execute(thread, *gpu);
 
  361         } 
catch (
const std::exception& ex) {
 
  368             int idx = it->second.index;
 
  369             auto& data = it->second.data;
 
  371                 if (
op->acceptsStorageOutput(idx)) {
 
  373                     auto view = 
op->getOutput(idx);
 
  375                         view.getStorage().pull(*gpu);
 
  380                     data.resize(view.getSize().volume());
 
  381                     for (
auto it = data.begin(); it != data.end(); it += view.getDepth()) {
 
  382                         scan.
fill(it, data.end());
 
  386                 else if (
op->acceptsVectorOutput(idx)) {
 
  388                     op->getOutput(vector, idx);
 
  389                     vector->
fetch(*gpu, data);
 
  409         if (
op == &operation)
 
  449 #ifdef BEATMUP_OPENGLVERSION_GLES20 
  462     switch (
size.getDepth()) {
 
  481     for (
size_t firstIdx = 0; firstIdx < 
ops.size(); ++firstIdx)
 
  482         if (
ops[firstIdx] == &first) {
 
  483             for (
size_t secondIdx = firstIdx + 1; secondIdx < 
ops.size(); ++secondIdx)
 
  484                 if (
ops[secondIdx] == &second)
 
  494         if (
op->getName() == operationName)
 
  502         if (
op->getName() == operationName)
 
  527         size += entry->getMemorySize();
 
  529         size += entry->getMemorySize();
 
  531         size += entry->getMemorySize();
 
  553     for (
const auto& 
op : 
ops)
 
  558         const auto& info = conn.second;
 
  560         block.
set(
"from", conn.first->getName());
 
  561         block.
set(
"to", info.dest->getName());
 
  563             block.
set(
"from_output", info.output);
 
  565             block.
set(
"to_input", info.input);
 
  566         if (info.shuffle > 0)
 
  567             block.
set(
"shuffle", info.shuffle);
 
  568         listing.
emplace(
"connections", std::move(block));
 
  573         listing.
emplace(
"connections", {});
 
  581     std::stringstream strstr;
 
  588     Exception(
"Error in %s: %s", 
op.getName().c_str(), ex.what())
 
void set(size_t i, bool value=true)
A key-value pair set storing pieces of arbitrary data (chunks) under string keys.
virtual void close()=0
Closes the collection after a reading session.
virtual void open()=0
Opens the collection to read chunks from it.
Basic class: task and memory management, any kind of static data.
Base class for all exceptions.
Real-valued vector usable by GPU.
void fetch(GraphicPipeline &gpu, std::vector< float > &output) const
Grabs vector values back from GPU to user memory.
Format
Vector data format.
@ FIXED16
16 bit per element
@ FLOAT
32 bit per element, floating point
Internal low-level GPU control API.
void switchMode(Mode mode)
Switches GPU mode.
int getLimit(Limit limit) const
@ INFERENCE
Textures are feature maps computed in fragment shaders.
@ TEXTURE_IMAGE_UNITS
maximum number of texture units per fragment shader
void flush()
Waits until all operations submitted to GPU are finished.
Bitmap whose memory is managed by the Beatmup engine.
void set(const std::string &key, T value)
Sets a value for a specific key.
Parser of simple YAML-like listings.
void printOut(std::ostream &stream)
Prints out the listing to an output stream.
void emplace(const std::string &key, Block &&block)
Adds a block to a chapter.
Abstract neural net operation (layer).
virtual int getOutputCount() const
Returns number of operation outputs.
virtual int getInputCount() const
Returns number of operation inputs.
std::string getName() const
Wrapper for exceptions occuring during the model inference.
InferenceTimeError(const AbstractOperation &op, const std::exception &ex)
size_t getMemorySize() const
Returns the amount of texture memory in bytes currently allocated by the model to run the inference.
Storage & allocateFlatStorage(GraphicPipeline &gpu, const int size)
Allocates a new flat storage.
bool ready
if true, ops are connected to each other and storages are allocated
std::vector< AbstractOperation * > ops
model operations
void freeMemory()
Frees all allocated storages.
std::multimap< const AbstractOperation *, UserOutput > userOutputs
operation => user output mapping
OperationClass & getOperation(const std::string &operationName)
Retrieves an operation by its name.
unsigned long countTexelFetches() const
Provides an estimation of the total number of texels fetched by all the operations in the model per i...
Storage & allocateStorage(GraphicPipeline &gpu, const Size size, bool forGpu=true, bool forCpu=false, const int pad=0, const int reservedChannels=0)
Allocates a new storage.
ProgressTracking inferenceProgress
inference progress
Profiler * profiler
pointer to a Profiler attached to the model
void addConnection(AbstractOperation &source, AbstractOperation &dest, int output=0, int input=0, int shuffle=0)
Listing serialize() const
Returns serialized representation of the model as a Listing.
ProgressTracking preparingProgress
model preparation progress
void addOutput(const std::string &operation, int output=0)
Enables reading output data from the model memory through getOutputData().
std::vector< Storage * > storages
allocated storages used during the inference
bool isOperationInModel(const AbstractOperation &operation) const
Checks if a specific operation makes part of the model.
unsigned long countMultiplyAdds() const
Provides an estimation of the number of multiply-adds characterizing the model complexity.
GL::Vector & allocateVector(GraphicPipeline &gpu, const int size)
Allocates a vector that can be used as operation input or output.
Model(Context &context, std::initializer_list< AbstractOperation * > ops)
Instantiates a model from a list of operations interconnecting them in a feedforward fashion.
std::multimap< const AbstractOperation *, Connection > connections
source operation => connection descriptor mapping
virtual void prepare(GraphicPipeline &gpu, ChunkCollection &data)
Prepares all operations: reads the model data from chunks and builds GPU programs.
std::string serializeToString() const
Returns serialized representation of the model as a string.
AbstractOperation * operator[](const std::string &operationName)
void execute(TaskThread &thread, GraphicPipeline *gpu)
Runs the inference.
std::vector< InternalBitmap * > textures
allocated images used during the inference
void addOperation(const std::string &opName, AbstractOperation *newOp)
Adds a new operation to the model before another operation in the execution order.
bool isPreceding(const AbstractOperation &first, const AbstractOperation &second) const
Checks whether an operation goes before another operation in the model according the ops execution or...
std::vector< GL::Vector * > vectors
allocated vectors used during the inference
InternalBitmap & allocateTexture(GraphicPipeline &gpu, const Size size)
Allocates a texture that can be used as operation input or output.
void append(AbstractOperation *newOp, bool connect=false)
Adds a new operation to the model.
const float * getOutputData(size_t &numSamples, const std::string &operation, int output=0) const
Reads data from the model memory.
Operation 3D input/output size.
Scans a storageview in RAM for further computations on CPU.
void move(int x, int y)
Sets the pointer to a specific spatial position.
void fill(T begin, T limit)
Extracts the content of feature maps at the current position.
Maps a 3D tensor onto a storage.
3D tensor stored in a set of textures.
void allocate(GraphicPipeline &gpu)
Allocates the storage in GPU memory.
void reset(unsigned int max)
Resets the progress to zero.
static void check(const bool condition, const std::string &message)
virtual bool isTaskAborted() const =0
Returns true if the task is asked to stop from outside.
@ SingleByte
single channel of 8 bits per pixel (like grayscale), unsigned integer values
@ QuadByte
4 channels of 8 bits per pixel (like RGBA), unsigned integer values
@ TripleByte
3 channels of 8 bits per pixel (like RGB), unsigned integer values
std::string to_string(Beatmup::NNets::ActivationFunction function)
CustomPoint< numeric > min(const CustomPoint< numeric > &a, const CustomPoint< numeric > &b)
CustomPoint< numeric > max(const CustomPoint< numeric > &a, const CustomPoint< numeric > &b)
A user-defined output descriptor.
JNIEnv jobject jint format
jlong jint jint jint jint pixelFormat
return(jlong) new Beatmup jlong jstring src
JNIEnv jlong jobject jstring opName