FFmpeg
hpeldsp_rnd_template.c
Go to the documentation of this file.
1 /*
2  * SIMD-optimized halfpel functions are compiled twice for rnd/no_rnd
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2003-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7  * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
8  * and improved by Zdenek Kabelac <kabi@users.sf.net>
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 #include <stddef.h>
28 #include <stdint.h>
29 
30 // put_pixels
31 av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
32 {
33  MOVQ_BFE(mm6);
34  __asm__ volatile(
35  "lea (%3, %3), %%"FF_REG_a" \n\t"
36  ".p2align 3 \n\t"
37  "1: \n\t"
38  "movq (%1), %%mm0 \n\t"
39  "movq 1(%1), %%mm1 \n\t"
40  "movq (%1, %3), %%mm2 \n\t"
41  "movq 1(%1, %3), %%mm3 \n\t"
42  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
43  "movq %%mm4, (%2) \n\t"
44  "movq %%mm5, (%2, %3) \n\t"
45  "add %%"FF_REG_a", %1 \n\t"
46  "add %%"FF_REG_a", %2 \n\t"
47  "movq (%1), %%mm0 \n\t"
48  "movq 1(%1), %%mm1 \n\t"
49  "movq (%1, %3), %%mm2 \n\t"
50  "movq 1(%1, %3), %%mm3 \n\t"
51  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
52  "movq %%mm4, (%2) \n\t"
53  "movq %%mm5, (%2, %3) \n\t"
54  "add %%"FF_REG_a", %1 \n\t"
55  "add %%"FF_REG_a", %2 \n\t"
56  "subl $4, %0 \n\t"
57  "jnz 1b \n\t"
58  :"+g"(h), "+S"(pixels), "+D"(block)
59  :"r"((x86_reg)line_size)
60  :FF_REG_a, "memory");
61 }
62 
63 av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
64 {
65  MOVQ_BFE(mm6);
66  __asm__ volatile(
67  "lea (%3, %3), %%"FF_REG_a" \n\t"
68  ".p2align 3 \n\t"
69  "1: \n\t"
70  "movq (%1), %%mm0 \n\t"
71  "movq 1(%1), %%mm1 \n\t"
72  "movq (%1, %3), %%mm2 \n\t"
73  "movq 1(%1, %3), %%mm3 \n\t"
74  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
75  "movq %%mm4, (%2) \n\t"
76  "movq %%mm5, (%2, %3) \n\t"
77  "movq 8(%1), %%mm0 \n\t"
78  "movq 9(%1), %%mm1 \n\t"
79  "movq 8(%1, %3), %%mm2 \n\t"
80  "movq 9(%1, %3), %%mm3 \n\t"
81  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
82  "movq %%mm4, 8(%2) \n\t"
83  "movq %%mm5, 8(%2, %3) \n\t"
84  "add %%"FF_REG_a", %1 \n\t"
85  "add %%"FF_REG_a", %2 \n\t"
86  "movq (%1), %%mm0 \n\t"
87  "movq 1(%1), %%mm1 \n\t"
88  "movq (%1, %3), %%mm2 \n\t"
89  "movq 1(%1, %3), %%mm3 \n\t"
90  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
91  "movq %%mm4, (%2) \n\t"
92  "movq %%mm5, (%2, %3) \n\t"
93  "movq 8(%1), %%mm0 \n\t"
94  "movq 9(%1), %%mm1 \n\t"
95  "movq 8(%1, %3), %%mm2 \n\t"
96  "movq 9(%1, %3), %%mm3 \n\t"
97  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
98  "movq %%mm4, 8(%2) \n\t"
99  "movq %%mm5, 8(%2, %3) \n\t"
100  "add %%"FF_REG_a", %1 \n\t"
101  "add %%"FF_REG_a", %2 \n\t"
102  "subl $4, %0 \n\t"
103  "jnz 1b \n\t"
104  :"+g"(h), "+S"(pixels), "+D"(block)
105  :"r"((x86_reg)line_size)
106  :FF_REG_a, "memory");
107 }
108 
109 av_unused static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
110 {
111  MOVQ_BFE(mm6);
112  __asm__ volatile(
113  "lea (%3, %3), %%"FF_REG_a" \n\t"
114  "movq (%1), %%mm0 \n\t"
115  ".p2align 3 \n\t"
116  "1: \n\t"
117  "movq (%1, %3), %%mm1 \n\t"
118  "movq (%1, %%"FF_REG_a"),%%mm2\n\t"
119  PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
120  "movq %%mm4, (%2) \n\t"
121  "movq %%mm5, (%2, %3) \n\t"
122  "add %%"FF_REG_a", %1 \n\t"
123  "add %%"FF_REG_a", %2 \n\t"
124  "movq (%1, %3), %%mm1 \n\t"
125  "movq (%1, %%"FF_REG_a"),%%mm0\n\t"
126  PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
127  "movq %%mm4, (%2) \n\t"
128  "movq %%mm5, (%2, %3) \n\t"
129  "add %%"FF_REG_a", %1 \n\t"
130  "add %%"FF_REG_a", %2 \n\t"
131  "subl $4, %0 \n\t"
132  "jnz 1b \n\t"
133  :"+g"(h), "+S"(pixels), "+D"(block)
134  :"r"((x86_reg)line_size)
135  :FF_REG_a, "memory");
136 }
137 
138 av_unused static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
139 {
140  MOVQ_BFE(mm6);
141  __asm__ volatile(
142  ".p2align 3 \n\t"
143  "1: \n\t"
144  "movq (%1), %%mm0 \n\t"
145  "movq 1(%1), %%mm1 \n\t"
146  "movq (%2), %%mm3 \n\t"
147  PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
148  PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
149  "movq %%mm0, (%2) \n\t"
150  "movq 8(%1), %%mm0 \n\t"
151  "movq 9(%1), %%mm1 \n\t"
152  "movq 8(%2), %%mm3 \n\t"
153  PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
154  PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
155  "movq %%mm0, 8(%2) \n\t"
156  "add %3, %1 \n\t"
157  "add %3, %2 \n\t"
158  "subl $1, %0 \n\t"
159  "jnz 1b \n\t"
160  :"+g"(h), "+S"(pixels), "+D"(block)
161  :"r"((x86_reg)line_size)
162  :"memory");
163 }
164 
165 av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
166 {
167  MOVQ_BFE(mm6);
168  __asm__ volatile(
169  "lea (%3, %3), %%"FF_REG_a" \n\t"
170  "movq (%1), %%mm0 \n\t"
171  ".p2align 3 \n\t"
172  "1: \n\t"
173  "movq (%1, %3), %%mm1 \n\t"
174  "movq (%1, %%"FF_REG_a"), %%mm2 \n\t"
175  PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
176  "movq (%2), %%mm3 \n\t"
177  PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6)
178  "movq (%2, %3), %%mm3 \n\t"
179  PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
180  "movq %%mm0, (%2) \n\t"
181  "movq %%mm1, (%2, %3) \n\t"
182  "add %%"FF_REG_a", %1 \n\t"
183  "add %%"FF_REG_a", %2 \n\t"
184 
185  "movq (%1, %3), %%mm1 \n\t"
186  "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
187  PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
188  "movq (%2), %%mm3 \n\t"
189  PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6)
190  "movq (%2, %3), %%mm3 \n\t"
191  PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
192  "movq %%mm2, (%2) \n\t"
193  "movq %%mm1, (%2, %3) \n\t"
194  "add %%"FF_REG_a", %1 \n\t"
195  "add %%"FF_REG_a", %2 \n\t"
196 
197  "subl $4, %0 \n\t"
198  "jnz 1b \n\t"
199  :"+g"(h), "+S"(pixels), "+D"(block)
200  :"r"((x86_reg)line_size)
201  :FF_REG_a, "memory");
202 }
PAVGB_MMX
#define PAVGB_MMX(rega, regb, regr, regfe)
Definition: inline_asm.h:63
av_unused
#define av_unused
Definition: attributes.h:131
MOVQ_BFE
#define MOVQ_BFE(regd)
Definition: inline_asm.h:34
PAVGB
#define PAVGB(a, b)
Definition: postprocess_template.c:88
DEF
#define DEF(type, name, bytes, read, write)
Definition: bytestream.h:42
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
uint8_t
uint8_t
Definition: audio_convert.c:194
pixels8_y2
static av_unused void put_TMPL pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_rnd_template.c:109
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
pixels16_x2
static av_unused void put_TMPL pixels16_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_rnd_template.c:63
pixels8_x2
static av_unused void put_TMPL pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_rnd_template.c:31
x86_reg
int x86_reg
Definition: asm.h:72
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038