FFmpeg
idctdsp_alpha.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/attributes.h"
22 #include "libavcodec/idctdsp.h"
23 #include "idctdsp_alpha.h"
24 #include "asm.h"
25 
26 void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
27  ptrdiff_t line_size);
28 void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
29  ptrdiff_t line_size);
30 
31 void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
32  ptrdiff_t line_size);
33 void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
34  ptrdiff_t line_size);
35 
36 #if 0
37 /* These functions were the base for the optimized assembler routines,
38  and remain here for documentation purposes. */
39 static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
40  ptrdiff_t line_size)
41 {
42  int i = 8;
43  uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
44 
45  do {
46  uint64_t shorts0, shorts1;
47 
48  shorts0 = ldq(block);
49  shorts0 = maxsw4(shorts0, 0);
50  shorts0 = minsw4(shorts0, clampmask);
51  stl(pkwb(shorts0), pixels);
52 
53  shorts1 = ldq(block + 4);
54  shorts1 = maxsw4(shorts1, 0);
55  shorts1 = minsw4(shorts1, clampmask);
56  stl(pkwb(shorts1), pixels + 4);
57 
58  pixels += line_size;
59  block += 8;
60  } while (--i);
61 }
62 
63 void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
64  ptrdiff_t line_size)
65 {
66  int h = 8;
67  /* Keep this function a leaf function by generating the constants
68  manually (mainly for the hack value ;-). */
69  uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
70  uint64_t signmask = zap(-1, 0x33);
71  signmask ^= signmask >> 1; /* 0x8000800080008000 */
72 
73  do {
74  uint64_t shorts0, pix0, signs0;
75  uint64_t shorts1, pix1, signs1;
76 
77  shorts0 = ldq(block);
78  shorts1 = ldq(block + 4);
79 
80  pix0 = unpkbw(ldl(pixels));
81  /* Signed subword add (MMX paddw). */
82  signs0 = shorts0 & signmask;
83  shorts0 &= ~signmask;
84  shorts0 += pix0;
85  shorts0 ^= signs0;
86  /* Clamp. */
87  shorts0 = maxsw4(shorts0, 0);
88  shorts0 = minsw4(shorts0, clampmask);
89 
90  /* Next 4. */
91  pix1 = unpkbw(ldl(pixels + 4));
92  signs1 = shorts1 & signmask;
93  shorts1 &= ~signmask;
94  shorts1 += pix1;
95  shorts1 ^= signs1;
96  shorts1 = maxsw4(shorts1, 0);
97  shorts1 = minsw4(shorts1, clampmask);
98 
99  stl(pkwb(shorts0), pixels);
100  stl(pkwb(shorts1), pixels + 4);
101 
102  pixels += line_size;
103  block += 8;
104  } while (--h);
105 }
106 #endif
107 
109  unsigned high_bit_depth)
110 {
111  /* amask clears all bits that correspond to present features. */
112  if (amask(AMASK_MVI) == 0) {
113  c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
114  c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
115  }
116 
117  put_pixels_clamped_axp_p = c->put_pixels_clamped;
118  add_pixels_clamped_axp_p = c->add_pixels_clamped;
119 
120  if (!high_bit_depth && !avctx->lowres &&
121  (avctx->idct_algo == FF_IDCT_AUTO)) {
122  c->idct_put = ff_simple_idct_put_axp;
123  c->idct_add = ff_simple_idct_add_axp;
124  c->idct = ff_simple_idct_axp;
125  }
126 }
put_pixels_clamped_mvi_asm
void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
ff_simple_idct_axp
void ff_simple_idct_axp(int16_t *block)
Definition: simple_idct_alpha.c:253
pixels
int pixels
Definition: avisynth_c.h:390
ldq
#define ldq(p)
Definition: asm.h:59
FF_IDCT_AUTO
#define FF_IDCT_AUTO
Definition: avcodec.h:2769
AMASK_MVI
#define AMASK_MVI
Definition: asm.h:40
asm.h
add_pixels_clamped_mvi_asm
void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
av_cold
#define av_cold
Definition: attributes.h:84
pkwb
#define pkwb(a)
Definition: asm.h:144
zap
#define zap(a, b)
Definition: asm.h:111
ff_simple_idct_put_axp
void ff_simple_idct_put_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: simple_idct_alpha.c:293
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ldl
#define ldl(p)
Definition: asm.h:64
AVCodecContext::lowres
int lowres
low resolution decoding, 1-> 1/2 size, 2->1/4 size
Definition: avcodec.h:2804
stl
#define stl(l, p)
Definition: asm.h:76
attributes.h
amask
#define amask(a)
Definition: asm.h:113
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
add_pixels_clamped_axp_p
void(* add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
Definition: idctdsp_alpha.c:33
maxsw4
#define maxsw4(a, b)
Definition: asm.h:141
uint8_t
uint8_t
Definition: audio_convert.c:194
AVCodecContext::idct_algo
int idct_algo
IDCT algorithm, see FF_IDCT_* below.
Definition: avcodec.h:2768
idctdsp.h
IDCTDSPContext
Definition: idctdsp.h:53
unpkbw
#define unpkbw(a)
Definition: asm.h:146
ff_simple_idct_add_axp
void ff_simple_idct_add_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: simple_idct_alpha.c:299
AVCodecContext
main external API structure.
Definition: avcodec.h:1565
minsw4
#define minsw4(a, b)
Definition: asm.h:137
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
put_pixels_clamped_axp_p
void(* put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
Definition: idctdsp_alpha.c:31
h
h
Definition: vp9dsp_template.c:2038
ff_idctdsp_init_alpha
av_cold void ff_idctdsp_init_alpha(IDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth)
Definition: idctdsp_alpha.c:108
idctdsp_alpha.h