doxygen/6.1/vf__signature_8c_source.html

/*

 * Copyright (c) 2017 Gerion Entrup

 *

 * This file is part of FFmpeg.

 *

 * FFmpeg is free software; you can redistribute it and/or modify

 * it under the terms of the GNU General Public License as published by

 * the Free Software Foundation; either version 2 of the License, or

 * (at your option) any later version.

 *

 * FFmpeg is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 * GNU General Public License for more details.

 *

 * You should have received a copy of the GNU General Public License along

 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,

 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

 */


/**

 * @file

 * MPEG-7 video signature calculation and lookup filter

 * @see http://epubs.surrey.ac.uk/531590/1/MPEG-7%20Video%20Signature%20Author%27s%20Copy.pdf

 */


#include "libavcodec/put_bits.h"

#include "libavformat/avformat.h"

#include "libavutil/opt.h"

#include "libavutil/avstring.h"

#include "libavutil/file_open.h"

#include "avfilter.h"

#include "internal.h"

#include "signature.h"

#include "signature_lookup.c"


#define OFFSET(x) offsetof(SignatureContext, x)

#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM

#define BLOCK_LCM (int64_t) 476985600


static const AVOption signature_options[] = {

    { "detectmode", "set the detectmode",

        OFFSET(mode),         AV_OPT_TYPE_INT,    {.i64 = MODE_OFF}, 0, NB_LOOKUP_MODE-1, FLAGS, "mode" },

        { "off",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_OFF},  0, 0, .flags = FLAGS, "mode" },

        { "full", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FULL}, 0, 0, .flags = FLAGS, "mode" },

        { "fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FAST}, 0, 0, .flags = FLAGS, "mode" },

    { "nb_inputs",  "number of inputs",

        OFFSET(nb_inputs),    AV_OPT_TYPE_INT,    {.i64 = 1},        1, INT_MAX,          FLAGS },

    { "filename",   "filename for output files",

        OFFSET(filename),     AV_OPT_TYPE_STRING, {.str = ""},       0, NB_FORMATS-1,     FLAGS },

    { "format",     "set output format",

        OFFSET(format),       AV_OPT_TYPE_INT,    {.i64 = FORMAT_BINARY}, 0, 1,           FLAGS , "format" },

        { "binary", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_BINARY}, 0, 0, FLAGS, "format" },

        { "xml",    0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_XML},    0, 0, FLAGS, "format" },

    { "th_d",       "threshold to detect one word as similar",

        OFFSET(thworddist),   AV_OPT_TYPE_INT,    {.i64 = 9000},     1, INT_MAX,          FLAGS },

    { "th_dc",      "threshold to detect all words as similar",

        OFFSET(thcomposdist), AV_OPT_TYPE_INT,    {.i64 = 60000},    1, INT_MAX,          FLAGS },

    { "th_xh",      "threshold to detect frames as similar",

        OFFSET(thl1),         AV_OPT_TYPE_INT,    {.i64 = 116},      1, INT_MAX,          FLAGS },

    { "th_di",      "minimum length of matching sequence in frames",

        OFFSET(thdi),         AV_OPT_TYPE_INT,    {.i64 = 0},        0, INT_MAX,          FLAGS },

    { "th_it",      "threshold for relation of good to all frames",

        OFFSET(thit),         AV_OPT_TYPE_DOUBLE, {.dbl = 0.5},    0.0, 1.0,              FLAGS },

    { NULL }

};


AVFILTER_DEFINE_CLASS(signature);


/* all formats with a separate gray value */

static const enum AVPixelFormat pix_fmts[] = {

    AV_PIX_FMT_GRAY8,

    AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,

    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,

    AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,

    AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P,

    AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P,

    AV_PIX_FMT_YUVJ440P,

    AV_PIX_FMT_NV12, AV_PIX_FMT_NV21,

    AV_PIX_FMT_NONE

};


static int config_input(AVFilterLink *inlink)

{

    AVFilterContext *ctx = inlink->dst;

    SignatureContext *sic = ctx->priv;

    StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);


    sc->time_base = inlink->time_base;

    /* test for overflow */

    sc->divide = (((uint64_t) inlink->w/32) * (inlink->w/32 + 1) * (inlink->h/32 * inlink->h/32 + 1) > INT64_MAX / (BLOCK_LCM * 255));

    if (sc->divide) {

        av_log(ctx, AV_LOG_WARNING, "Input dimension too high for precise calculation, numbers will be rounded.\n");

    }

    sc->w = inlink->w;

    sc->h = inlink->h;

    return 0;

}


static int get_block_size(const Block *b)

{

    return (b->to.y - b->up.y + 1) * (b->to.x - b->up.x + 1);

}


static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)

{

    uint64_t sum = 0;


    int x0, y0, x1, y1;


    x0 = b->up.x;

    y0 = b->up.y;

    x1 = b->to.x;

    y1 = b->to.y;


    if (x0-1 >= 0 && y0-1 >= 0) {

        sum = intpic[y1][x1] + intpic[y0-1][x0-1] - intpic[y1][x0-1] - intpic[y0-1][x1];

    } else if (x0-1 >= 0) {

        sum = intpic[y1][x1] - intpic[y1][x0-1];

    } else if (y0-1 >= 0) {

        sum = intpic[y1][x1] - intpic[y0-1][x1];

    } else {

        sum = intpic[y1][x1];

    }

    return sum;

}


static int cmp(const void *x, const void *y)

{

    const uint64_t *a = x, *b = y;

    return *a < *b ? -1 : ( *a > *b ? 1 : 0 );

}


/**

 * sets the bit at position pos to 1 in data

 */

static void set_bit(uint8_t* data, size_t pos)

{

    uint8_t mask = 1 << 7-(pos%8);

    data[pos/8] |= mask;

}


static int filter_frame(AVFilterLink *inlink, AVFrame *picref)

{

    AVFilterContext *ctx = inlink->dst;

    SignatureContext *sic = ctx->priv;

    StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);

    FineSignature* fs;


    static const uint8_t pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };

    /* indexes of words : 210,217,219,274,334  44,175,233,270,273  57,70,103,237,269  100,285,295,337,354  101,102,111,275,296

    s2usw = sorted to unsorted wordvec: 44 is at index 5, 57 at index 10...

    */

    static const unsigned int wordvec[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354};

    static const uint8_t      s2usw[25]   = { 5,10,11, 15, 20, 21, 12, 22,  6,  0,  1,  2,  7, 13, 14,  8,  9,  3, 23, 16, 17, 24,  4, 18, 19};


    uint8_t wordt2b[5] = { 0, 0, 0, 0, 0 }; /* word ternary to binary */

    uint64_t intpic[32][32];

    uint64_t rowcount;

    uint8_t *p = picref->data[0];

    int inti, intj;

    int *intjlut;


    uint64_t conflist[DIFFELEM_SIZE];

    int f = 0, g = 0, w = 0;

    int32_t dh1 = 1, dh2 = 1, dw1 = 1, dw2 = 1, a, b;

    int64_t denom;

    int i, j, k, ternary;

    uint64_t blocksum;

    int blocksize;

    int64_t th; /* threshold */

    int64_t sum;


    int64_t precfactor = (sc->divide) ? 65536 : BLOCK_LCM;


    /* initialize fs */

    if (sc->curfinesig) {

        fs = av_mallocz(sizeof(FineSignature));

        if (!fs)

            return AVERROR(ENOMEM);

        sc->curfinesig->next = fs;

        fs->prev = sc->curfinesig;

        sc->curfinesig = fs;

    } else {

        fs = sc->curfinesig = sc->finesiglist;

        sc->curcoarsesig1->first = fs;

    }


    fs->pts = picref->pts;

    fs->index = sc->lastindex++;


    memset(intpic, 0, sizeof(uint64_t)*32*32);

    intjlut = av_malloc_array(inlink->w, sizeof(int));

    if (!intjlut)

        return AVERROR(ENOMEM);

    for (i = 0; i < inlink->w; i++) {

        intjlut[i] = (i*32)/inlink->w;

    }


    for (i = 0; i < inlink->h; i++) {

        inti = (i*32)/inlink->h;

        for (j = 0; j < inlink->w; j++) {

            intj = intjlut[j];

            intpic[inti][intj] += p[j];

        }

        p += picref->linesize[0];

    }

    av_freep(&intjlut);


    /* The following calculates a summed area table (intpic) and brings the numbers

     * in intpic to the same denominator.

     * So you only have to handle the numinator in the following sections.

     */

    dh1 = inlink->h / 32;

    if (inlink->h % 32)

        dh2 = dh1 + 1;

    dw1 = inlink->w / 32;

    if (inlink->w % 32)

        dw2 = dw1 + 1;

    denom = (sc->divide) ? dh1 * (int64_t)dh2 * dw1 * dw2 : 1;


    for (i = 0; i < 32; i++) {

        rowcount = 0;

        a = 1;

        if (dh2 > 1) {

            a = ((inlink->h*(i+1))%32 == 0) ? (inlink->h*(i+1))/32 - 1 : (inlink->h*(i+1))/32;

            a -= ((inlink->h*i)%32 == 0) ? (inlink->h*i)/32 - 1 : (inlink->h*i)/32;

            a = (a == dh1)? dh2 : dh1;

        }

        for (j = 0; j < 32; j++) {

            b = 1;

            if (dw2 > 1) {

                b = ((inlink->w*(j+1))%32 == 0) ? (inlink->w*(j+1))/32 - 1 : (inlink->w*(j+1))/32;

                b -= ((inlink->w*j)%32 == 0) ? (inlink->w*j)/32 - 1 : (inlink->w*j)/32;

                b = (b == dw1)? dw2 : dw1;

            }

            rowcount += intpic[i][j] * a * b * precfactor / denom;

            if (i > 0) {

                intpic[i][j] = intpic[i-1][j] + rowcount;

            } else {

                intpic[i][j] = rowcount;

            }

        }

    }


    denom = (sc->divide) ? 1 : dh1 * (int64_t)dh2 * dw1 * dw2;


    for (i = 0; i < ELEMENT_COUNT; i++) {

        const ElemCat* elemcat = elements[i];

        int64_t* elemsignature;

        uint64_t* sortsignature;


        elemsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));

        if (!elemsignature)

            return AVERROR(ENOMEM);

        sortsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));

        if (!sortsignature) {

            av_freep(&elemsignature);

            return AVERROR(ENOMEM);

        }


        for (j = 0; j < elemcat->elem_count; j++) {

            blocksum = 0;

            blocksize = 0;

            for (k = 0; k < elemcat->left_count; k++) {

                blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);

                blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);

            }

            sum = blocksum / blocksize;

            if (elemcat->av_elem) {

                sum -= 128 * precfactor * denom;

            } else {

                blocksum = 0;

                blocksize = 0;

                for (; k < elemcat->block_count; k++) {

                    blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);

                    blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);

                }

                sum -= blocksum / blocksize;

                conflist[g++] = FFABS(sum * 8 / (precfactor * denom));

            }


            elemsignature[j] = sum;

            sortsignature[j] = FFABS(sum);

        }


        /* get threshold */

        qsort(sortsignature, elemcat->elem_count, sizeof(uint64_t), cmp);

        th = sortsignature[(int) (elemcat->elem_count*0.333)];


        /* ternarize */

        for (j = 0; j < elemcat->elem_count; j++) {

            if (elemsignature[j] < -th) {

                ternary = 0;

            } else if (elemsignature[j] <= th) {

                ternary = 1;

            } else {

                ternary = 2;

            }

            fs->framesig[f/5] += ternary * pot3[f%5];


            if (f == wordvec[w]) {

                fs->words[s2usw[w]/5] += ternary * pot3[wordt2b[s2usw[w]/5]++];

                if (w < 24)

                    w++;

            }

            f++;

        }

        av_freep(&elemsignature);

        av_freep(&sortsignature);

    }


    /* confidence */

    qsort(conflist, DIFFELEM_SIZE, sizeof(uint64_t), cmp);

    fs->confidence = FFMIN(conflist[DIFFELEM_SIZE/2], 255);


    /* coarsesignature */

    if (sc->coarsecount == 0) {

        if (sc->curcoarsesig2) {

            sc->curcoarsesig1 = av_mallocz(sizeof(CoarseSignature));

            if (!sc->curcoarsesig1)

                return AVERROR(ENOMEM);

            sc->curcoarsesig1->first = fs;

            sc->curcoarsesig2->next = sc->curcoarsesig1;

            sc->coarseend = sc->curcoarsesig1;

        }

    }

    if (sc->coarsecount == 45) {

        sc->midcoarse = 1;

        sc->curcoarsesig2 = av_mallocz(sizeof(CoarseSignature));

        if (!sc->curcoarsesig2)

            return AVERROR(ENOMEM);

        sc->curcoarsesig2->first = fs;

        sc->curcoarsesig1->next = sc->curcoarsesig2;

        sc->coarseend = sc->curcoarsesig2;

    }

    for (i = 0; i < 5; i++) {

        set_bit(sc->curcoarsesig1->data[i], fs->words[i]);

    }

    /* assuming the actual frame is the last */

    sc->curcoarsesig1->last = fs;

    if (sc->midcoarse) {

        for (i = 0; i < 5; i++) {

            set_bit(sc->curcoarsesig2->data[i], fs->words[i]);

        }

        sc->curcoarsesig2->last = fs;

    }


    sc->coarsecount = (sc->coarsecount+1)%90;


    /* debug printing finesignature */

    if (av_log_get_level() == AV_LOG_DEBUG) {

        av_log(ctx, AV_LOG_DEBUG, "input %d, confidence: %d\n", FF_INLINK_IDX(inlink), fs->confidence);


        av_log(ctx, AV_LOG_DEBUG, "words:");

        for (i = 0; i < 5; i++) {

            av_log(ctx, AV_LOG_DEBUG, " %d:", fs->words[i] );

            av_log(ctx, AV_LOG_DEBUG, " %d", fs->words[i] / pot3[0] );

            for (j = 1; j < 5; j++)

                av_log(ctx, AV_LOG_DEBUG, ",%d", fs->words[i] % pot3[j-1] / pot3[j] );

            av_log(ctx, AV_LOG_DEBUG, ";");

        }

        av_log(ctx, AV_LOG_DEBUG, "\n");


        av_log(ctx, AV_LOG_DEBUG, "framesignature:");

        for (i = 0; i < SIGELEM_SIZE/5; i++) {

            av_log(ctx, AV_LOG_DEBUG, " %d", fs->framesig[i] / pot3[0] );

            for (j = 1; j < 5; j++)

                av_log(ctx, AV_LOG_DEBUG, ",%d", fs->framesig[i] % pot3[j-1] / pot3[j] );

        }

        av_log(ctx, AV_LOG_DEBUG, "\n");

    }


    if (FF_INLINK_IDX(inlink) == 0)

        return ff_filter_frame(inlink->dst->outputs[0], picref);

    return 1;

}


static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)

{

    FineSignature* fs;

    CoarseSignature* cs;

    int i, j;

    FILE* f;

    unsigned int pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };


    if (!sc->coarseend->last)

        return AVERROR(EINVAL); // No frames ?


    f = avpriv_fopen_utf8(filename, "w");

    if (!f) {

        int err = AVERROR(EINVAL);

        char buf[128];

        av_strerror(err, buf, sizeof(buf));

        av_log(ctx, AV_LOG_ERROR, "cannot open xml file %s: %s\n", filename, buf);

        return err;

    }


    /* header */

    fprintf(f, "<?xml version='1.0' encoding='ASCII' ?>\n");

    fprintf(f, "<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n");

    fprintf(f, "  <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n");

    fprintf(f, "    <Descriptor xsi:type=\"VideoSignatureType\">\n");

    fprintf(f, "      <VideoSignatureRegion>\n");

    fprintf(f, "        <VideoSignatureSpatialRegion>\n");

    fprintf(f, "          <Pixel>0 0 </Pixel>\n");

    fprintf(f, "          <Pixel>%d %d </Pixel>\n", sc->w - 1, sc->h - 1);

    fprintf(f, "        </VideoSignatureSpatialRegion>\n");

    fprintf(f, "        <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n");

    /* hoping num is 1, other values are vague */

    fprintf(f, "        <MediaTimeUnit>%d</MediaTimeUnit>\n", sc->time_base.den / sc->time_base.num);

    fprintf(f, "        <MediaTimeOfSpatialRegion>\n");

    fprintf(f, "          <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n");

    fprintf(f, "          <EndMediaTimeOfSpatialRegion>%" PRIu64 "</EndMediaTimeOfSpatialRegion>\n", sc->coarseend->last->pts);

    fprintf(f, "        </MediaTimeOfSpatialRegion>\n");


    /* coarsesignatures */

    for (cs = sc->coarsesiglist; cs; cs = cs->next) {

        fprintf(f, "        <VSVideoSegment>\n");

        fprintf(f, "          <StartFrameOfSegment>%" PRIu32 "</StartFrameOfSegment>\n", cs->first->index);

        fprintf(f, "          <EndFrameOfSegment>%" PRIu32 "</EndFrameOfSegment>\n", cs->last->index);

        fprintf(f, "          <MediaTimeOfSegment>\n");

        fprintf(f, "            <StartMediaTimeOfSegment>%" PRIu64 "</StartMediaTimeOfSegment>\n", cs->first->pts);

        fprintf(f, "            <EndMediaTimeOfSegment>%" PRIu64 "</EndMediaTimeOfSegment>\n", cs->last->pts);

        fprintf(f, "          </MediaTimeOfSegment>\n");

        for (i = 0; i < 5; i++) {

            fprintf(f, "          <BagOfWords>");

            for (j = 0; j < 31; j++) {

                uint8_t n = cs->data[i][j];

                if (j < 30) {

                    fprintf(f, "%d  %d  %d  %d  %d  %d  %d  %d  ", (n & 0x80) >> 7,

                                                                   (n & 0x40) >> 6,

                                                                   (n & 0x20) >> 5,

                                                                   (n & 0x10) >> 4,

                                                                   (n & 0x08) >> 3,

                                                                   (n & 0x04) >> 2,

                                                                   (n & 0x02) >> 1,

                                                                   (n & 0x01));

                } else {

                    /* print only 3 bit in last byte */

                    fprintf(f, "%d  %d  %d ", (n & 0x80) >> 7,

                                              (n & 0x40) >> 6,

                                              (n & 0x20) >> 5);

                }

            }

            fprintf(f, "</BagOfWords>\n");

        }

        fprintf(f, "        </VSVideoSegment>\n");

    }


    /* finesignatures */

    for (fs = sc->finesiglist; fs; fs = fs->next) {

        fprintf(f, "        <VideoFrame>\n");

        fprintf(f, "          <MediaTimeOfFrame>%" PRIu64 "</MediaTimeOfFrame>\n", fs->pts);

        /* confidence */

        fprintf(f, "          <FrameConfidence>%d</FrameConfidence>\n", fs->confidence);

        /* words */

        fprintf(f, "          <Word>");

        for (i = 0; i < 5; i++) {

            fprintf(f, "%d ", fs->words[i]);

            if (i < 4) {

                fprintf(f, " ");

            }

        }

        fprintf(f, "</Word>\n");

        /* framesignature */

        fprintf(f, "          <FrameSignature>");

        for (i = 0; i< SIGELEM_SIZE/5; i++) {

            if (i > 0) {

                fprintf(f, " ");

            }

            fprintf(f, "%d ", fs->framesig[i] / pot3[0]);

            for (j = 1; j < 5; j++)

                fprintf(f, " %d ", fs->framesig[i] % pot3[j-1] / pot3[j] );

        }

        fprintf(f, "</FrameSignature>\n");

        fprintf(f, "        </VideoFrame>\n");

    }

    fprintf(f, "      </VideoSignatureRegion>\n");

    fprintf(f, "    </Descriptor>\n");

    fprintf(f, "  </DescriptionUnit>\n");

    fprintf(f, "</Mpeg7>\n");


    fclose(f);

    return 0;

}


static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)

{

    FILE* f;

    FineSignature* fs;

    CoarseSignature* cs;

    uint32_t numofsegments = (sc->lastindex + 44)/45;

    int i, j;

    PutBitContext buf;

    /* buffer + header + coarsesignatures + finesignature */

    int len = (512 + 6 * 32 + 3*16 + 2 +

        numofsegments * (4*32 + 1 + 5*243) +

        sc->lastindex * (2 + 32 + 6*8 + 608)) / 8;

    uint8_t* buffer = av_malloc_array(len, sizeof(uint8_t));

    if (!buffer)

        return AVERROR(ENOMEM);


    f = avpriv_fopen_utf8(filename, "wb");

    if (!f) {

        int err = AVERROR(EINVAL);

        char buf[128];

        av_strerror(err, buf, sizeof(buf));

        av_log(ctx, AV_LOG_ERROR, "cannot open file %s: %s\n", filename, buf);

        av_freep(&buffer);

        return err;

    }

    init_put_bits(&buf, buffer, len);


    put_bits32(&buf, 1); /* NumOfSpatial Regions, only 1 supported */

    put_bits(&buf, 1, 1); /* SpatialLocationFlag, always the whole image */

    put_bits32(&buf, 0); /* PixelX,1 PixelY,1, 0,0 */

    put_bits(&buf, 16, sc->w-1 & 0xFFFF); /* PixelX,2 */

    put_bits(&buf, 16, sc->h-1 & 0xFFFF); /* PixelY,2 */

    put_bits32(&buf, 0); /* StartFrameOfSpatialRegion */

    put_bits32(&buf, sc->lastindex); /* NumOfFrames */

    /* hoping num is 1, other values are vague */

    /* den/num might be greater than 16 bit, so cutting it */

    put_bits(&buf, 16, 0xFFFF & (sc->time_base.den / sc->time_base.num)); /* MediaTimeUnit */

    put_bits(&buf, 1, 1); /* MediaTimeFlagOfSpatialRegion */

    put_bits32(&buf, 0); /* StartMediaTimeOfSpatialRegion */

    put_bits32(&buf, 0xFFFFFFFF & sc->coarseend->last->pts); /* EndMediaTimeOfSpatialRegion */

    put_bits32(&buf, numofsegments); /* NumOfSegments */

    /* coarsesignatures */

    for (cs = sc->coarsesiglist; cs; cs = cs->next) {

        put_bits32(&buf, cs->first->index); /* StartFrameOfSegment */

        put_bits32(&buf, cs->last->index); /* EndFrameOfSegment */

        put_bits(&buf, 1, 1); /* MediaTimeFlagOfSegment */

        put_bits32(&buf, 0xFFFFFFFF & cs->first->pts); /* StartMediaTimeOfSegment */

        put_bits32(&buf, 0xFFFFFFFF & cs->last->pts); /* EndMediaTimeOfSegment */

        for (i = 0; i < 5; i++) {

            /* put 243 bits ( = 7 * 32 + 19 = 8 * 28 + 19) into buffer */

            for (j = 0; j < 30; j++) {

                put_bits(&buf, 8, cs->data[i][j]);

            }

            put_bits(&buf, 3, cs->data[i][30] >> 5);

        }

    }

    /* finesignatures */

    put_bits(&buf, 1, 0); /* CompressionFlag, only 0 supported */

    for (fs = sc->finesiglist; fs; fs = fs->next) {

        put_bits(&buf, 1, 1); /* MediaTimeFlagOfFrame */

        put_bits32(&buf, 0xFFFFFFFF & fs->pts); /* MediaTimeOfFrame */

        put_bits(&buf, 8, fs->confidence); /* FrameConfidence */

        for (i = 0; i < 5; i++) {

            put_bits(&buf, 8, fs->words[i]); /* Words */

        }

        /* framesignature */

        for (i = 0; i < SIGELEM_SIZE/5; i++) {

            put_bits(&buf, 8, fs->framesig[i]);

        }

    }


    flush_put_bits(&buf);

    fwrite(buffer, 1, put_bytes_output(&buf), f);

    fclose(f);

    av_freep(&buffer);

    return 0;

}


static int export(AVFilterContext *ctx, StreamContext *sc, int input)

{

    SignatureContext* sic = ctx->priv;

    char filename[1024];


    if (sic->nb_inputs > 1) {

        /* error already handled */

        av_assert0(av_get_frame_filename(filename, sizeof(filename), sic->filename, input) == 0);

    } else {

        if (av_strlcpy(filename, sic->filename, sizeof(filename)) >= sizeof(filename))

            return AVERROR(EINVAL);

    }

    if (sic->format == FORMAT_XML) {

        return xml_export(ctx, sc, filename);

    } else {

        return binary_export(ctx, sc, filename);

    }

}


static int request_frame(AVFilterLink *outlink)

{

    AVFilterContext *ctx = outlink->src;

    SignatureContext *sic = ctx->priv;

    StreamContext *sc, *sc2;

    MatchingInfo match;

    int i, j, ret;

    int lookup = 1; /* indicates wheather EOF of all files is reached */


    /* process all inputs */

    for (i = 0; i < sic->nb_inputs; i++){

        sc = &(sic->streamcontexts[i]);


        ret = ff_request_frame(ctx->inputs[i]);


        /* return if unexpected error occurs in input stream */

        if (ret < 0 && ret != AVERROR_EOF)

            return ret;


        /* export signature at EOF */

        if (ret == AVERROR_EOF && !sc->exported) {

            /* export if wanted */

            if (strlen(sic->filename) > 0) {

                if (export(ctx, sc, i) < 0)

                    return ret;

            }

            sc->exported = 1;

        }

        lookup &= sc->exported;

    }


    /* signature lookup */

    if (lookup && sic->mode != MODE_OFF) {

        /* iterate over every pair */

        for (i = 0; i < sic->nb_inputs; i++) {

            sc = &(sic->streamcontexts[i]);

            for (j = i+1; j < sic->nb_inputs; j++) {

                sc2 = &(sic->streamcontexts[j]);

                match = lookup_signatures(ctx, sic, sc, sc2, sic->mode);

                if (match.score != 0) {

                    av_log(ctx, AV_LOG_INFO, "matching of video %d at %f and %d at %f, %d frames matching\n",

                            i, ((double) match.first->pts * sc->time_base.num) / sc->time_base.den,

                            j, ((double) match.second->pts * sc2->time_base.num) / sc2->time_base.den,

                            match.matchframes);

                    if (match.whole)

                        av_log(ctx, AV_LOG_INFO, "whole video matching\n");

                } else {

                    av_log(ctx, AV_LOG_INFO, "no matching of video %d and %d\n", i, j);

                }

            }

        }

    }


    return ret;

}


static av_cold int init(AVFilterContext *ctx)

{


    SignatureContext *sic = ctx->priv;

    StreamContext *sc;

    int i, ret;

    char tmp[1024];


    sic->streamcontexts = av_mallocz(sic->nb_inputs * sizeof(StreamContext));

    if (!sic->streamcontexts)

        return AVERROR(ENOMEM);


    for (i = 0; i < sic->nb_inputs; i++) {

        AVFilterPad pad = {

            .type = AVMEDIA_TYPE_VIDEO,

            .name = av_asprintf("in%d", i),

            .config_props = config_input,

            .filter_frame = filter_frame,

        };


        if (!pad.name)

            return AVERROR(ENOMEM);

        if ((ret = ff_append_inpad_free_name(ctx, &pad)) < 0)

            return ret;


        sc = &(sic->streamcontexts[i]);


        sc->lastindex = 0;

        sc->finesiglist = av_mallocz(sizeof(FineSignature));

        if (!sc->finesiglist)

            return AVERROR(ENOMEM);

        sc->curfinesig = NULL;


        sc->coarsesiglist = av_mallocz(sizeof(CoarseSignature));

        if (!sc->coarsesiglist)

            return AVERROR(ENOMEM);

        sc->curcoarsesig1 = sc->coarsesiglist;

        sc->coarseend = sc->coarsesiglist;

        sc->coarsecount = 0;

        sc->midcoarse = 0;

    }


    /* check filename */

    if (sic->nb_inputs > 1 && strlen(sic->filename) > 0 && av_get_frame_filename(tmp, sizeof(tmp), sic->filename, 0) == -1) {

        av_log(ctx, AV_LOG_ERROR, "The filename must contain %%d or %%0nd, if you have more than one input.\n");

        return AVERROR(EINVAL);

    }


    return 0;

}


static av_cold void uninit(AVFilterContext *ctx)

{

    SignatureContext *sic = ctx->priv;

    StreamContext *sc;

    void* tmp;

    FineSignature* finsig;

    CoarseSignature* cousig;

    int i;


    /* free the lists */

    if (sic->streamcontexts != NULL) {

        for (i = 0; i < sic->nb_inputs; i++) {

            sc = &(sic->streamcontexts[i]);

            finsig = sc->finesiglist;

            cousig = sc->coarsesiglist;


            while (finsig) {

                tmp = finsig;

                finsig = finsig->next;

                av_freep(&tmp);

            }

            sc->finesiglist = NULL;


            while (cousig) {

                tmp = cousig;

                cousig = cousig->next;

                av_freep(&tmp);

            }

            sc->coarsesiglist = NULL;

        }

        av_freep(&sic->streamcontexts);

    }

}


static int config_output(AVFilterLink *outlink)

{

    AVFilterContext *ctx = outlink->src;

    AVFilterLink *inlink = ctx->inputs[0];


    outlink->time_base = inlink->time_base;

    outlink->frame_rate = inlink->frame_rate;

    outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;

    outlink->w = inlink->w;

    outlink->h = inlink->h;


    return 0;

}


static const AVFilterPad signature_outputs[] = {

    {

        .name          = "default",

        .type          = AVMEDIA_TYPE_VIDEO,

        .request_frame = request_frame,

        .config_props  = config_output,

    },

};


const AVFilter ff_vf_signature = {

    .name          = "signature",

    .description   = NULL_IF_CONFIG_SMALL("Calculate the MPEG-7 video signature"),

    .priv_size     = sizeof(SignatureContext),

    .priv_class    = &signature_class,

    .init          = init,

    .uninit        = uninit,

    FILTER_OUTPUTS(signature_outputs),

    .inputs        = NULL,

    FILTER_PIXFMTS_ARRAY(pix_fmts),

    .flags         = AVFILTER_FLAG_DYNAMIC_INPUTS,

};