FFmpeg
diracdsp_init.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2010 David Conrad
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/x86/cpu.h"
22 #include "libavcodec/diracdsp.h"
23 #include "fpel.h"
24 
25 void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
26 
27 void ff_add_dirac_obmc8_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
28 void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
29 void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
30 
31 void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
32 void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
33 void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
34 
35 void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
36 
37 #if HAVE_X86ASM
38 
39 #define HPEL_FILTER(MMSIZE, EXT) \
40  void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \
41  void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \
42  \
43  static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \
44  const uint8_t *src, int stride, int width, int height) \
45  { \
46  while( height-- ) \
47  { \
48  ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \
49  ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \
50  ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \
51  \
52  dsth += stride; \
53  dstv += stride; \
54  dstc += stride; \
55  src += stride; \
56  } \
57  }
58 
59 #define DIRAC_PIXOP(OPNAME, EXT)\
60 static void OPNAME ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], \
61  int stride, int h) \
62 {\
63  if (h&3)\
64  ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\
65  else\
66  ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
67 }\
68 static void OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], \
69  int stride, int h) \
70 {\
71  if (h&3) {\
72  ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\
73  } else {\
74  ff_ ## OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\
75  ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
76  }\
77 }
78 
79 DIRAC_PIXOP(put, sse2)
80 DIRAC_PIXOP(avg, sse2)
81 
82 HPEL_FILTER(16, sse2)
83 
84 #endif // HAVE_X86ASM
85 
87 {
88 #if HAVE_X86ASM
89  int mm_flags = av_get_cpu_flags();
90 
91  if (EXTERNAL_SSE2(mm_flags)) {
92  c->dirac_hpel_filter = dirac_hpel_filter_sse2;
93  c->add_rect_clamped = ff_add_rect_clamped_sse2;
94  c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2;
95 
96  c->add_dirac_obmc[0] = ff_add_dirac_obmc8_sse2;
97  c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
98  c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
99 
100  c->put_dirac_pixels_tab[1][0] = put_dirac_pixels16_sse2;
101  c->avg_dirac_pixels_tab[1][0] = avg_dirac_pixels16_sse2;
102  c->put_dirac_pixels_tab[2][0] = put_dirac_pixels32_sse2;
103  c->avg_dirac_pixels_tab[2][0] = avg_dirac_pixels32_sse2;
104  }
105 
106  if (EXTERNAL_SSE4(mm_flags)) {
107  c->dequant_subband[1] = ff_dequant_subband_32_sse4;
108  c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
109  }
110 #endif // HAVE_X86ASM
111 }
cpu.h
ff_put_rect_clamped_sse2
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:107
ff_diracdsp_init_x86
void ff_diracdsp_init_x86(DiracDSPContext *c)
Definition: diracdsp_init.c:86
diracdsp.h
ff_put_signed_rect_clamped_10_sse4
void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
height
#define height
Definition: dsp.h:85
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
DiracDSPContext
Definition: diracdsp.h:30
ff_add_rect_clamped_sse2
void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int)
ff_add_dirac_obmc16_sse2
void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
stride
#define stride
Definition: h264pred_template.c:537
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
fpel.h
ff_put_signed_rect_clamped_sse2
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
ff_add_dirac_obmc32_sse2
void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
ff_add_dirac_obmc8_sse2
void ff_add_dirac_obmc8_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
width
#define width
Definition: dsp.h:85
ff_dequant_subband_32_sse4
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h)
src
#define src
Definition: vp8dsp.c:248