doxygen/6.0/dnn__backend__native_8h_source.html

/*

 * Copyright (c) 2018 Sergey Lavrushkin

 *

 * This file is part of FFmpeg.

 *

 * FFmpeg is free software; you can redistribute it and/or

 * modify it under the terms of the GNU Lesser General Public

 * License as published by the Free Software Foundation; either

 * version 2.1 of the License, or (at your option) any later version.

 *

 * FFmpeg is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

 * Lesser General Public License for more details.

 *

 * You should have received a copy of the GNU Lesser General Public

 * License along with FFmpeg; if not, write to the Free Software

 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

 */


/**

 * @file

 * DNN inference functions interface for native backend.

 */


#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_H

#define AVFILTER_DNN_DNN_BACKEND_NATIVE_H


#include "../dnn_interface.h"

#include "libavformat/avio.h"

#include "libavutil/opt.h"

#include "queue.h"


/**

 * the enum value of DNNLayerType should not be changed,

 * the same values are used in convert_from_tensorflow.py

 * and, it is used to index the layer execution/load function pointer.

 */

typedef enum {

    DLT_INPUT = 0,

    DLT_CONV2D = 1,

    DLT_DEPTH_TO_SPACE = 2,

    DLT_MIRROR_PAD = 3,

    DLT_MAXIMUM = 4,

    DLT_MATH_BINARY = 5,

    DLT_MATH_UNARY = 6,

    DLT_AVG_POOL = 7,

    DLT_DENSE = 8,

    DLT_COUNT

} DNNLayerType;


typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_OUTPUT} DNNOperandType;

typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNPaddingParam;

typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;


typedef struct Layer{

    DNNLayerType type;

    /**

     * a layer can have multiple inputs and one output.

     * 4 is just a big enough number for input operands (increase it if necessary),

     * do not use 'int32_t *input_operand_indexes', so we don't worry about mem leaks.

     */

    int32_t input_operand_indexes[4];

    int32_t output_operand_index;

    void *params;

} Layer;


typedef struct DnnOperand{

    /**

     * there are two memory layouts, NHWC or NCHW, so we use dims,

     * dims[0] is Number.

     */

    int32_t dims[4];


    /**

     * input/output/intermediate operand of the network

     */

    DNNOperandType type;


    /**

     * support different kinds of data type such as float, half float, int8 etc,

     * first support float now.

     */

    DNNDataType data_type;


    /**

     * NHWC if 1, otherwise NCHW.

     * let's first support NHWC only, this flag is for extensive usage.

     */

    int8_t isNHWC;


    /**

     * to avoid possible memory leak, do not use char *name

     */

    char name[128];


    /**

     * data pointer with data length in bytes.

     * usedNumbersLeft is only valid for intermediate operand,

     * it means how many layers still depend on this operand,

     * todo: the memory can be reused when usedNumbersLeft is zero.

     */

    void *data;

    int32_t length;

    int32_t usedNumbersLeft;

}DnnOperand;


typedef struct InputParams{

    int height, width, channels;

} InputParams;


typedef struct NativeOptions{

    uint8_t async;

    uint32_t conv2d_threads;

} NativeOptions;


typedef struct NativeContext {

    const AVClass *class;

    NativeOptions options;

} NativeContext;


// Represents simple feed-forward convolutional network.

typedef struct NativeModel{

    NativeContext ctx;

    DNNModel *model;

    Layer *layers;

    int32_t layers_num;

    DnnOperand *operands;

    int32_t operands_num;

    Queue *task_queue;

    Queue *lltask_queue;

} NativeModel;


DNNModel *ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);


int ff_dnn_execute_model_native(const DNNModel *model, DNNExecBaseParams *exec_params);


DNNAsyncStatusType ff_dnn_get_result_native(const DNNModel *model, AVFrame **in, AVFrame **out);


int ff_dnn_flush_native(const DNNModel *model);


void ff_dnn_free_model_native(DNNModel **model);


// NOTE: User must check for error (return value <= 0) to handle

// case like integer overflow.

int32_t ff_calculate_operand_data_length(const DnnOperand *oprd);

int32_t ff_calculate_operand_dims_count(const DnnOperand *oprd);

#endif