39 #define OFFSET(x) offsetof(SignatureContext, x)
40 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
41 #define BLOCK_LCM (int64_t) 476985600
44 {
"detectmode",
"set the detectmode",
49 {
"nb_inputs",
"number of inputs",
51 {
"filename",
"filename for output files",
53 {
"format",
"set output format",
57 {
"th_d",
"threshold to detect one word as similar",
59 {
"th_dc",
"threshold to detect all words as similar",
61 {
"th_xh",
"threshold to detect frames as similar",
63 {
"th_di",
"minimum length of matching sequence in frames",
65 {
"th_it",
"threshold for relation of good to all frames",
98 sc->
divide = (((uint64_t) inlink->
w/32) * (inlink->
w/32 + 1) * (inlink->
h/32 * inlink->
h/32 + 1) > INT64_MAX / (
BLOCK_LCM * 255));
100 av_log(ctx,
AV_LOG_WARNING,
"Input dimension too high for precise calculation, numbers will be rounded.\n");
109 return (b->
to.
y - b->
up.
y + 1) * (b->
to.
x - b->
up.
x + 1);
123 if (x0-1 >= 0 && y0-1 >= 0) {
124 sum = intpic[y1][x1] + intpic[y0-1][x0-1] - intpic[y1][x0-1] - intpic[y0-1][x1];
125 }
else if (x0-1 >= 0) {
126 sum = intpic[y1][x1] - intpic[y1][x0-1];
127 }
else if (y0-1 >= 0) {
128 sum = intpic[y1][x1] - intpic[y0-1][x1];
130 sum = intpic[y1][x1];
135 static int cmp(
const uint64_t *
a,
const uint64_t *
b)
137 return *a < *b ? -1 : ( *a > *b ? 1 : 0 );
156 static const uint8_t pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
160 static const unsigned int wordvec[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354};
161 static const uint8_t s2usw[25] = { 5,10,11, 15, 20, 21, 12, 22, 6, 0, 1, 2, 7, 13, 14, 8, 9, 3, 23, 16, 17, 24, 4, 18, 19};
163 uint8_t wordt2b[5] = { 0, 0, 0, 0, 0 };
164 uint64_t intpic[32][32];
171 int f = 0,
g = 0,
w = 0;
172 int32_t dh1 = 1, dh2 = 1, dw1 = 1, dw2 = 1,
a,
b;
174 int i, j, k, ternary;
198 memset(intpic, 0,
sizeof(uint64_t)*32*32);
202 for (i = 0; i < inlink->
w; i++) {
203 intjlut[i] = (i*32)/inlink->
w;
206 for (i = 0; i < inlink->
h; i++) {
207 inti = (i*32)/inlink->
h;
208 for (j = 0; j < inlink->
w; j++) {
210 intpic[inti][intj] += p[j];
220 dh1 = inlink->
h / 32;
223 dw1 = inlink->
w / 32;
226 denom = (sc->
divide) ? dh1 * dh2 * dw1 * dw2 : 1;
228 for (i = 0; i < 32; i++) {
232 a = ((inlink->
h*(i+1))%32 == 0) ? (inlink->
h*(i+1))/32 - 1 : (inlink->
h*(i+1))/32;
233 a -= ((inlink->
h*i)%32 == 0) ? (inlink->
h*i)/32 - 1 : (inlink->
h*i)/32;
234 a = (
a == dh1)? dh2 : dh1;
236 for (j = 0; j < 32; j++) {
239 b = ((inlink->
w*(j+1))%32 == 0) ? (inlink->
w*(j+1))/32 - 1 : (inlink->
w*(j+1))/32;
240 b -= ((inlink->
w*j)%32 == 0) ? (inlink->
w*j)/32 - 1 : (inlink->
w*j)/32;
241 b = (b == dw1)? dw2 : dw1;
243 rowcount += intpic[i][j] *
a * b * precfactor / denom;
245 intpic[i][j] = intpic[i-1][j] + rowcount;
247 intpic[i][j] = rowcount;
252 denom = (sc->
divide) ? 1 : dh1 * dh2 * dw1 * dw2;
256 int64_t* elemsignature;
257 uint64_t* sortsignature;
263 if (!sortsignature) {
275 sum = blocksum / blocksize;
277 sum -= 128 * precfactor * denom;
285 sum -= blocksum / blocksize;
286 conflist[
g++] =
FFABS(sum * 8 / (precfactor * denom));
289 elemsignature[j] = sum;
290 sortsignature[j] =
FFABS(sum);
294 qsort(sortsignature, elemcat->
elem_count,
sizeof(uint64_t), (
void*)
cmp);
299 if (elemsignature[j] < -th) {
301 }
else if (elemsignature[j] <= th) {
306 fs->
framesig[f/5] += ternary * pot3[f%5];
308 if (f == wordvec[
w]) {
309 fs->
words[s2usw[
w]/5] += ternary * pot3[wordt2b[s2usw[
w]/5]++];
343 for (i = 0; i < 5; i++) {
349 for (i = 0; i < 5; i++) {
362 for (i = 0; i < 5; i++) {
365 for (j = 1; j < 5; j++)
374 for (j = 1; j < 5; j++)
391 unsigned int pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
393 f = fopen(filename,
"w");
403 fprintf(f,
"<?xml version='1.0' encoding='ASCII' ?>\n");
404 fprintf(f,
"<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n");
405 fprintf(f,
" <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n");
406 fprintf(f,
" <Descriptor xsi:type=\"VideoSignatureType\">\n");
407 fprintf(f,
" <VideoSignatureRegion>\n");
408 fprintf(f,
" <VideoSignatureSpatialRegion>\n");
409 fprintf(f,
" <Pixel>0 0 </Pixel>\n");
410 fprintf(f,
" <Pixel>%d %d </Pixel>\n", sc->
w - 1, sc->
h - 1);
411 fprintf(f,
" </VideoSignatureSpatialRegion>\n");
412 fprintf(f,
" <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n");
415 fprintf(f,
" <MediaTimeOfSpatialRegion>\n");
416 fprintf(f,
" <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n");
417 fprintf(f,
" <EndMediaTimeOfSpatialRegion>%" PRIu64
"</EndMediaTimeOfSpatialRegion>\n", sc->
coarseend->
last->
pts);
418 fprintf(f,
" </MediaTimeOfSpatialRegion>\n");
422 fprintf(f,
" <VSVideoSegment>\n");
423 fprintf(f,
" <StartFrameOfSegment>%" PRIu32
"</StartFrameOfSegment>\n", cs->
first->
index);
424 fprintf(f,
" <EndFrameOfSegment>%" PRIu32
"</EndFrameOfSegment>\n", cs->
last->
index);
425 fprintf(f,
" <MediaTimeOfSegment>\n");
426 fprintf(f,
" <StartMediaTimeOfSegment>%" PRIu64
"</StartMediaTimeOfSegment>\n", cs->
first->
pts);
427 fprintf(f,
" <EndMediaTimeOfSegment>%" PRIu64
"</EndMediaTimeOfSegment>\n", cs->
last->
pts);
428 fprintf(f,
" </MediaTimeOfSegment>\n");
429 for (i = 0; i < 5; i++) {
430 fprintf(f,
" <BagOfWords>");
431 for (j = 0; j < 31; j++) {
434 fprintf(f,
"%d %d %d %d %d %d %d %d ", (n & 0x80) >> 7,
444 fprintf(f,
"%d %d %d ", (n & 0x80) >> 7,
449 fprintf(f,
"</BagOfWords>\n");
451 fprintf(f,
" </VSVideoSegment>\n");
456 fprintf(f,
" <VideoFrame>\n");
457 fprintf(f,
" <MediaTimeOfFrame>%" PRIu64
"</MediaTimeOfFrame>\n", fs->
pts);
459 fprintf(f,
" <FrameConfidence>%d</FrameConfidence>\n", fs->
confidence);
461 fprintf(f,
" <Word>");
462 for (i = 0; i < 5; i++) {
463 fprintf(f,
"%d ", fs->
words[i]);
468 fprintf(f,
"</Word>\n");
470 fprintf(f,
" <FrameSignature>");
475 fprintf(f,
"%d ", fs->
framesig[i] / pot3[0]);
476 for (j = 1; j < 5; j++)
477 fprintf(f,
" %d ", fs->
framesig[i] % pot3[j-1] / pot3[j] );
479 fprintf(f,
"</FrameSignature>\n");
480 fprintf(f,
" </VideoFrame>\n");
482 fprintf(f,
" </VideoSignatureRegion>\n");
483 fprintf(f,
" </Descriptor>\n");
484 fprintf(f,
" </DescriptionUnit>\n");
485 fprintf(f,
"</Mpeg7>\n");
496 uint32_t numofsegments = (sc->
lastindex + 44)/45;
500 int len = (512 + 6 * 32 + 3*16 + 2 +
501 numofsegments * (4*32 + 1 + 5*243) +
502 sc->
lastindex * (2 + 32 + 6*8 + 608)) / 8;
507 f = fopen(filename,
"wb");
539 for (i = 0; i < 5; i++) {
541 for (j = 0; j < 30; j++) {
553 for (i = 0; i < 5; i++) {
612 if (
export(ctx, sc, i) < 0)
628 if (match.
score != 0) {
629 av_log(ctx,
AV_LOG_INFO,
"matching of video %d at %f and %d at %f, %d frames matching\n",
692 av_log(ctx,
AV_LOG_ERROR,
"The filename must contain %%d or %%0nd, if you have more than one input.\n");
720 finsig = finsig->
next;
727 cousig = cousig->
next;
744 outlink->
w = inlink->
w;
745 outlink->
h = inlink->
h;
764 .priv_class = &signature_class,
static void av_unused put_bits32(PutBitContext *s, uint32_t value)
Write exactly 32 bits into a bitstream.
static const char * format[]
This structure describes decoded (raw) audio or video data.
static const AVFilterPad signature_outputs[]
ptrdiff_t const GLvoid * data
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
#define AV_LOG_WARNING
Something somehow does not look correct.
Main libavfilter public API header.
int h
agreed upon image height
static int cmp(const uint64_t *a, const uint64_t *b)
#define AVFILTER_FLAG_DYNAMIC_INPUTS
The number of the filter inputs is not determined just by AVFilter.inputs.
static void set_bit(uint8_t *data, size_t pos)
sets the bit at position pos to 1 in data
struct FineSignature * next
void avpriv_align_put_bits(PutBitContext *s)
Pad the bitstream with zeros up to the next byte boundary.
enum AVMediaType type
AVFilterPad type.
static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
StreamContext * streamcontexts
static const AVOption signature_options[]
const char * name
Pad name.
AVFilterLink ** inputs
array of pointers to input links
#define av_assert0(cond)
assert() equivalent, that is always enabled.
static const char signature[]
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
static int export(AVFilterContext *ctx, StreamContext *sc, int input)
timestamp utils, mostly useful for debugging/logging purposes
CoarseSignature * curcoarsesig1
static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
struct FineSignature * first
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range...
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
#define AVERROR_EOF
End of file.
struct FineSignature * last
static int get_block_size(const Block *b)
A filter pad used for either input or output.
A link between two filters.
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
static int config_output(AVFilterLink *outlink)
AVRational frame_rate
Frame rate of the stream on the link, or 1/0 if unknown or variable; if left to 0/0, will be automatically copied from the first input of the source filter if it exists.
AVFILTER_DEFINE_CLASS(signature)
static const uint16_t mask[17]
FineSignature * curfinesig
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
void * priv
private data for use by the filter
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
AVRational time_base
Define the time base used by the PTS of the frames/samples which will pass through this link...
int av_log_get_level(void)
Get the current log level.
size_t av_strlcpy(char *dst, const char *src, size_t size)
Copy the string src to dst, but no more than size - 1 bytes, and null-terminate dst.
static MatchingInfo lookup_signatures(AVFilterContext *ctx, SignatureContext *sc, StreamContext *first, StreamContext *second, int mode)
static int put_bits_count(PutBitContext *s)
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
int w
agreed upon image width
char * av_asprintf(const char *fmt,...)
as above, but U and V bytes are swapped
struct FineSignature * second
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
static const ElemCat * elements[ELEMENT_COUNT]
MPEG-7 video signature calculation and lookup filter.
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
AVFilterContext * src
source filter
static const AVFilterPad inputs[]
static const AVFilterPad outputs[]
int av_get_frame_filename(char *buf, int buf_size, const char *path, int number)
#define AV_LOG_INFO
Standard information.
static int request_frame(AVFilterLink *outlink)
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
static av_cold void uninit(AVFilterContext *ctx)
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
static int config_input(AVFilterLink *inlink)
const char * name
Filter name.
AVRational sample_aspect_ratio
agreed upon sample aspect ratio
AVFilterLink ** outputs
array of pointers to output links
struct CoarseSignature * next
MPEG-7 video signature calculation and lookup filter.
static enum AVPixelFormat pix_fmts[]
CoarseSignature * coarsesiglist
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
int av_strerror(int errnum, char *errbuf, size_t errbuf_size)
Put a description of the AVERROR code errnum in errbuf.
#define FF_INLINK_IDX(link)
Find the index of a link.
struct FineSignature * first
static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char *filename)
CoarseSignature * curcoarsesig2
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
static int query_formats(AVFilterContext *ctx)
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
struct FineSignature * prev
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
CoarseSignature * coarseend
AVFilterContext * dst
dest filter
static av_cold int init(AVFilterContext *ctx)
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples) full scale (JPEG), deprecated in favor ...
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
#define av_malloc_array(a, b)
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
AVPixelFormat
Pixel format.
mode
Use these values in ebur128_init (or'ed).
uint8_t framesig[SIGELEM_SIZE/5]
static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char *filename)
FineSignature * finesiglist
static int ff_insert_inpad(AVFilterContext *f, unsigned index, AVFilterPad *p)
Insert a new input pad for the filter.