FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
yuv2rgb_template.c
Go to the documentation of this file.
1 /*
2  * software YUV to RGB converter
3  *
4  * Copyright (C) 2001-2007 Michael Niedermayer
5  * (c) 2010 Konstantin Shishkov
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include <stdint.h>
25 
26 #include "libavutil/x86/asm.h"
28 
29 #undef MOVNTQ
30 #undef EMMS
31 #undef SFENCE
32 
33 #if COMPILE_TEMPLATE_MMXEXT
34 #define MOVNTQ "movntq"
35 #define SFENCE "sfence"
36 #else
37 #define MOVNTQ "movq"
38 #define SFENCE " # nop"
39 #endif
40 
41 #define REG_BLUE "0"
42 #define REG_RED "1"
43 #define REG_GREEN "2"
44 #define REG_ALPHA "3"
45 
46 #define YUV2RGB_LOOP(depth) \
47  h_size = (c->dstW + 7) & ~7; \
48  if (h_size * depth > FFABS(dstStride[0])) \
49  h_size -= 8; \
50  \
51  vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
52  \
53  __asm__ volatile ("pxor %mm4, %mm4\n\t"); \
54  for (y = 0; y < srcSliceH; y++) { \
55  uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \
56  const uint8_t *py = src[0] + y * srcStride[0]; \
57  const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
58  const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
59  x86_reg index = -h_size / 2; \
60 
61 #define YUV2RGB_INITIAL_LOAD \
62  __asm__ volatile ( \
63  "movq (%5, %0, 2), %%mm6\n\t" \
64  "movd (%2, %0), %%mm0\n\t" \
65  "movd (%3, %0), %%mm1\n\t" \
66  "1: \n\t" \
67 
68 /* YUV2RGB core
69  * Conversion is performed in usual way:
70  * R = Y' * Ycoef + Vred * V'
71  * G = Y' * Ycoef + Vgreen * V' + Ugreen * U'
72  * B = Y' * Ycoef + Ublue * U'
73  *
74  * where X' = X * 8 - Xoffset (multiplication is performed to increase
75  * precision a bit).
76  * Since it operates in YUV420 colorspace, Y component is additionally
77  * split into Y1 and Y2 for even and odd pixels.
78  *
79  * Input:
80  * mm0 - U (4 elems), mm1 - V (4 elems), mm6 - Y (8 elems), mm4 - zero register
81  * Output:
82  * mm1 - R, mm2 - G, mm0 - B
83  */
84 #define YUV2RGB \
85  /* convert Y, U, V into Y1', Y2', U', V' */ \
86  "movq %%mm6, %%mm7\n\t" \
87  "punpcklbw %%mm4, %%mm0\n\t" \
88  "punpcklbw %%mm4, %%mm1\n\t" \
89  "pand "MANGLE(mmx_00ffw)", %%mm6\n\t" \
90  "psrlw $8, %%mm7\n\t" \
91  "psllw $3, %%mm0\n\t" \
92  "psllw $3, %%mm1\n\t" \
93  "psllw $3, %%mm6\n\t" \
94  "psllw $3, %%mm7\n\t" \
95  "psubsw "U_OFFSET"(%4), %%mm0\n\t" \
96  "psubsw "V_OFFSET"(%4), %%mm1\n\t" \
97  "psubw "Y_OFFSET"(%4), %%mm6\n\t" \
98  "psubw "Y_OFFSET"(%4), %%mm7\n\t" \
99 \
100  /* multiply by coefficients */ \
101  "movq %%mm0, %%mm2\n\t" \
102  "movq %%mm1, %%mm3\n\t" \
103  "pmulhw "UG_COEFF"(%4), %%mm2\n\t" \
104  "pmulhw "VG_COEFF"(%4), %%mm3\n\t" \
105  "pmulhw "Y_COEFF" (%4), %%mm6\n\t" \
106  "pmulhw "Y_COEFF" (%4), %%mm7\n\t" \
107  "pmulhw "UB_COEFF"(%4), %%mm0\n\t" \
108  "pmulhw "VR_COEFF"(%4), %%mm1\n\t" \
109  "paddsw %%mm3, %%mm2\n\t" \
110  /* now: mm0 = UB, mm1 = VR, mm2 = CG */ \
111  /* mm6 = Y1, mm7 = Y2 */ \
112 \
113  /* produce RGB */ \
114  "movq %%mm7, %%mm3\n\t" \
115  "movq %%mm7, %%mm5\n\t" \
116  "paddsw %%mm0, %%mm3\n\t" \
117  "paddsw %%mm1, %%mm5\n\t" \
118  "paddsw %%mm2, %%mm7\n\t" \
119  "paddsw %%mm6, %%mm0\n\t" \
120  "paddsw %%mm6, %%mm1\n\t" \
121  "paddsw %%mm6, %%mm2\n\t" \
122 
123 #define RGB_PACK_INTERLEAVE \
124  /* pack and interleave even/odd pixels */ \
125  "packuswb %%mm1, %%mm0\n\t" \
126  "packuswb %%mm5, %%mm3\n\t" \
127  "packuswb %%mm2, %%mm2\n\t" \
128  "movq %%mm0, %%mm1\n\n" \
129  "packuswb %%mm7, %%mm7\n\t" \
130  "punpcklbw %%mm3, %%mm0\n\t" \
131  "punpckhbw %%mm3, %%mm1\n\t" \
132  "punpcklbw %%mm7, %%mm2\n\t" \
133 
134 #define YUV2RGB_ENDLOOP(depth) \
135  "movq 8 (%5, %0, 2), %%mm6\n\t" \
136  "movd 4 (%3, %0), %%mm1\n\t" \
137  "movd 4 (%2, %0), %%mm0\n\t" \
138  "add $"AV_STRINGIFY(depth * 8)", %1\n\t" \
139  "add $4, %0\n\t" \
140  "js 1b\n\t" \
141 
142 #if COMPILE_TEMPLATE_MMXEXT
143 #undef RGB_PACK24_B_OPERANDS
144 #define RGB_PACK24_B_OPERANDS NAMED_CONSTRAINTS_ARRAY_ADD(mask1101,mask0110,mask0100,mask0010,mask1001)
145 #else
146 #undef RGB_PACK24_B_OPERANDS
147 #define RGB_PACK24_B_OPERANDS
148 #endif
149 
150 #define YUV2RGB_OPERANDS \
151  : "+r" (index), "+r" (image) \
152  : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
153  "r" (py - 2*index) \
154  NAMED_CONSTRAINTS_ADD(mmx_00ffw,pb_03,pb_07,mmx_redmask,pb_e0) \
155  RGB_PACK24_B_OPERANDS \
156  : "memory" \
157  ); \
158  } \
159 
160 #define YUV2RGB_OPERANDS_ALPHA \
161  : "+r" (index), "+r" (image) \
162  : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
163  "r" (py - 2*index), "r" (pa - 2*index) \
164  NAMED_CONSTRAINTS_ADD(mmx_00ffw) \
165  : "memory" \
166  ); \
167  } \
168 
169 #define YUV2RGB_ENDFUNC \
170  __asm__ volatile (SFENCE"\n\t" \
171  "emms \n\t"); \
172  return srcSliceH; \
173 
174 #define IF0(x)
175 #define IF1(x) x
176 
177 #define RGB_PACK16(gmask, is15) \
178  "pand "MANGLE(mmx_redmask)", %%mm0\n\t" \
179  "pand "MANGLE(mmx_redmask)", %%mm1\n\t" \
180  "movq %%mm2, %%mm3\n\t" \
181  "psllw $"AV_STRINGIFY(3-is15)", %%mm2\n\t" \
182  "psrlw $"AV_STRINGIFY(5+is15)", %%mm3\n\t" \
183  "psrlw $3, %%mm0\n\t" \
184  IF##is15("psrlw $1, %%mm1\n\t") \
185  "pand "MANGLE(pb_e0)", %%mm2\n\t" \
186  "pand "MANGLE(gmask)", %%mm3\n\t" \
187  "por %%mm2, %%mm0\n\t" \
188  "por %%mm3, %%mm1\n\t" \
189  "movq %%mm0, %%mm2\n\t" \
190  "punpcklbw %%mm1, %%mm0\n\t" \
191  "punpckhbw %%mm1, %%mm2\n\t" \
192  MOVNTQ " %%mm0, (%1)\n\t" \
193  MOVNTQ " %%mm2, 8(%1)\n\t" \
194 
195 #define DITHER_RGB \
196  "paddusb "BLUE_DITHER"(%4), %%mm0\n\t" \
197  "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \
198  "paddusb "RED_DITHER"(%4), %%mm1\n\t" \
199 
200 #if !COMPILE_TEMPLATE_MMXEXT
201 static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
202  int srcStride[],
203  int srcSliceY, int srcSliceH,
204  uint8_t *dst[], int dstStride[])
205 {
206  int y, h_size, vshift;
207 
208  YUV2RGB_LOOP(2)
209 
210 #ifdef DITHER1XBPP
211  c->blueDither = ff_dither8[y & 1];
212  c->greenDither = ff_dither8[y & 1];
213  c->redDither = ff_dither8[(y + 1) & 1];
214 #endif
215 
217  YUV2RGB
219 #ifdef DITHER1XBPP
220  DITHER_RGB
221 #endif
222  RGB_PACK16(pb_03, 1)
223 
224  YUV2RGB_ENDLOOP(2)
227 }
228 
229 static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
230  int srcStride[],
231  int srcSliceY, int srcSliceH,
232  uint8_t *dst[], int dstStride[])
233 {
234  int y, h_size, vshift;
235 
236  YUV2RGB_LOOP(2)
237 
238 #ifdef DITHER1XBPP
239  c->blueDither = ff_dither8[y & 1];
240  c->greenDither = ff_dither4[y & 1];
241  c->redDither = ff_dither8[(y + 1) & 1];
242 #endif
243 
245  YUV2RGB
247 #ifdef DITHER1XBPP
248  DITHER_RGB
249 #endif
250  RGB_PACK16(pb_07, 0)
251 
252  YUV2RGB_ENDLOOP(2)
255 }
256 #endif /* !COMPILE_TEMPLATE_MMXEXT */
257 
258 #define RGB_PACK24(blue, red)\
259  "packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\
260  "packuswb %%mm5, %%mm1 \n" /* B0 B2 B4 B6 B1 B3 B5 B7 */\
261  "packuswb %%mm7, %%mm2 \n" /* G0 G2 G4 G6 G1 G3 G5 G7 */\
262  "movq %%mm"red", %%mm3 \n"\
263  "movq %%mm"blue", %%mm6 \n"\
264  "psrlq $32, %%mm"red" \n" /* R1 R3 R5 R7 */\
265  "punpcklbw %%mm2, %%mm3 \n" /* R0 G0 R2 G2 R4 G4 R6 G6 */\
266  "punpcklbw %%mm"red", %%mm6 \n" /* B0 R1 B2 R3 B4 R5 B6 R7 */\
267  "movq %%mm3, %%mm5 \n"\
268  "punpckhbw %%mm"blue", %%mm2 \n" /* G1 B1 G3 B3 G5 B5 G7 B7 */\
269  "punpcklwd %%mm6, %%mm3 \n" /* R0 G0 B0 R1 R2 G2 B2 R3 */\
270  "punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\
271  RGB_PACK24_B
272 
273 #if COMPILE_TEMPLATE_MMXEXT
274 DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1};
275 DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0};
276 DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0};
277 DECLARE_ASM_CONST(8, int16_t, mask1001[4]) = {-1, 0, 0,-1};
278 DECLARE_ASM_CONST(8, int16_t, mask0100[4]) = { 0,-1, 0, 0};
279 #undef RGB_PACK24_B
280 #define RGB_PACK24_B\
281  "pshufw $0xc6, %%mm2, %%mm1 \n"\
282  "pshufw $0x84, %%mm3, %%mm6 \n"\
283  "pshufw $0x38, %%mm5, %%mm7 \n"\
284  "pand "MANGLE(mask1101)", %%mm6 \n" /* R0 G0 B0 R1 -- -- R2 G2 */\
285  "movq %%mm1, %%mm0 \n"\
286  "pand "MANGLE(mask0110)", %%mm7 \n" /* -- -- R6 G6 B6 R7 -- -- */\
287  "movq %%mm1, %%mm2 \n"\
288  "pand "MANGLE(mask0100)", %%mm1 \n" /* -- -- G3 B3 -- -- -- -- */\
289  "psrlq $48, %%mm3 \n" /* B2 R3 -- -- -- -- -- -- */\
290  "pand "MANGLE(mask0010)", %%mm0 \n" /* -- -- -- -- G1 B1 -- -- */\
291  "psllq $32, %%mm5 \n" /* -- -- -- -- R4 G4 B4 R5 */\
292  "pand "MANGLE(mask1001)", %%mm2 \n" /* G5 B5 -- -- -- -- G7 B7 */\
293  "por %%mm3, %%mm1 \n"\
294  "por %%mm6, %%mm0 \n"\
295  "por %%mm5, %%mm1 \n"\
296  "por %%mm7, %%mm2 \n"\
297  MOVNTQ" %%mm0, (%1) \n"\
298  MOVNTQ" %%mm1, 8(%1) \n"\
299  MOVNTQ" %%mm2, 16(%1) \n"\
300 
301 #else
302 #undef RGB_PACK24_B
303 #define RGB_PACK24_B\
304  "movd %%mm3, (%1) \n" /* R0 G0 B0 R1 */\
305  "movd %%mm2, 4(%1) \n" /* G1 B1 */\
306  "psrlq $32, %%mm3 \n"\
307  "psrlq $16, %%mm2 \n"\
308  "movd %%mm3, 6(%1) \n" /* R2 G2 B2 R3 */\
309  "movd %%mm2, 10(%1) \n" /* G3 B3 */\
310  "psrlq $16, %%mm2 \n"\
311  "movd %%mm5, 12(%1) \n" /* R4 G4 B4 R5 */\
312  "movd %%mm2, 16(%1) \n" /* G5 B5 */\
313  "psrlq $32, %%mm5 \n"\
314  "movd %%mm2, 20(%1) \n" /* -- -- G7 B7 */\
315  "movd %%mm5, 18(%1) \n" /* R6 G6 B6 R7 */\
316 
317 #endif
318 
319 static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
320  int srcStride[],
321  int srcSliceY, int srcSliceH,
322  uint8_t *dst[], int dstStride[])
323 {
324  int y, h_size, vshift;
325 
326  YUV2RGB_LOOP(3)
327 
329  YUV2RGB
331 
332  YUV2RGB_ENDLOOP(3)
335 }
336 
337 static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
338  int srcStride[],
339  int srcSliceY, int srcSliceH,
340  uint8_t *dst[], int dstStride[])
341 {
342  int y, h_size, vshift;
343 
344  YUV2RGB_LOOP(3)
345 
347  YUV2RGB
349 
350  YUV2RGB_ENDLOOP(3)
353 }
354 
355 
356 #define SET_EMPTY_ALPHA \
357  "pcmpeqd %%mm"REG_ALPHA", %%mm"REG_ALPHA"\n\t" /* set alpha to 0xFF */ \
358 
359 #define LOAD_ALPHA \
360  "movq (%6, %0, 2), %%mm"REG_ALPHA"\n\t" \
361 
362 #define RGB_PACK32(red, green, blue, alpha) \
363  "movq %%mm"blue", %%mm5\n\t" \
364  "movq %%mm"red", %%mm6\n\t" \
365  "punpckhbw %%mm"green", %%mm5\n\t" \
366  "punpcklbw %%mm"green", %%mm"blue"\n\t" \
367  "punpckhbw %%mm"alpha", %%mm6\n\t" \
368  "punpcklbw %%mm"alpha", %%mm"red"\n\t" \
369  "movq %%mm"blue", %%mm"green"\n\t" \
370  "movq %%mm5, %%mm"alpha"\n\t" \
371  "punpcklwd %%mm"red", %%mm"blue"\n\t" \
372  "punpckhwd %%mm"red", %%mm"green"\n\t" \
373  "punpcklwd %%mm6, %%mm5\n\t" \
374  "punpckhwd %%mm6, %%mm"alpha"\n\t" \
375  MOVNTQ " %%mm"blue", 0(%1)\n\t" \
376  MOVNTQ " %%mm"green", 8(%1)\n\t" \
377  MOVNTQ " %%mm5, 16(%1)\n\t" \
378  MOVNTQ " %%mm"alpha", 24(%1)\n\t" \
379 
380 #if !COMPILE_TEMPLATE_MMXEXT
381 static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
382  int srcStride[],
383  int srcSliceY, int srcSliceH,
384  uint8_t *dst[], int dstStride[])
385 {
386  int y, h_size, vshift;
387 
388  YUV2RGB_LOOP(4)
389 
391  YUV2RGB
395 
396  YUV2RGB_ENDLOOP(4)
399 }
400 
401 #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
402 static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
403  int srcStride[],
404  int srcSliceY, int srcSliceH,
405  uint8_t *dst[], int dstStride[])
406 {
407  int y, h_size, vshift;
408 
409  YUV2RGB_LOOP(4)
410 
411  const uint8_t *pa = src[3] + y * srcStride[3];
413  YUV2RGB
415  LOAD_ALPHA
417 
418  YUV2RGB_ENDLOOP(4)
421 }
422 #endif
423 
424 static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
425  int srcStride[],
426  int srcSliceY, int srcSliceH,
427  uint8_t *dst[], int dstStride[])
428 {
429  int y, h_size, vshift;
430 
431  YUV2RGB_LOOP(4)
432 
434  YUV2RGB
437  RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA)
438 
439  YUV2RGB_ENDLOOP(4)
442 }
443 
444 #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
445 static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
446  int srcStride[],
447  int srcSliceY, int srcSliceH,
448  uint8_t *dst[], int dstStride[])
449 {
450  int y, h_size, vshift;
451 
452  YUV2RGB_LOOP(4)
453 
454  const uint8_t *pa = src[3] + y * srcStride[3];
456  YUV2RGB
458  LOAD_ALPHA
459  RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA)
460 
461  YUV2RGB_ENDLOOP(4)
464 }
465 #endif
466 
467 #endif /* !COMPILE_TEMPLATE_MMXEXT */