Go to the documentation of this file.
34 const uint8_t *
left,
const uint8_t *_top)
48 const uint8_t *
left,
const uint8_t *_top)
57 for (y = 0; y < 8; y++) {
65 const uint8_t *
left,
const uint8_t *_top)
76 for (y = 0; y < 16; y++) {
86 const uint8_t *
left,
const uint8_t *_top)
101 for (y = 0; y < 32; y++) {
115 const uint8_t *_left,
const uint8_t *top)
128 const uint8_t *_left,
const uint8_t *top)
135 for (y = 0; y < 8; y++) {
145 const uint8_t *_left,
const uint8_t *top)
152 for (y = 0; y < 16; y++) {
164 const uint8_t *_left,
const uint8_t *top)
171 for (y = 0; y < 32; y++) {
189 const uint8_t *_left,
const uint8_t *_top)
197 for (y = 0; y < 4; y++) {
198 int l_m_tl =
left[3 - y] - tl;
209 const uint8_t *_left,
const uint8_t *_top)
217 for (y = 0; y < 8; y++) {
218 int l_m_tl =
left[7 - y] - tl;
233 const uint8_t *_left,
const uint8_t *_top)
241 for (y = 0; y < 16; y++) {
242 int l_m_tl =
left[15 - y] - tl;
265 const uint8_t *_left,
const uint8_t *_top)
273 for (y = 0; y < 32; y++) {
274 int l_m_tl =
left[31 - y] - tl;
315 const uint8_t *_left,
const uint8_t *_top)
321 top[0] + top[1] + top[2] + top[3] + 4) >> 3);
331 const uint8_t *_left,
const uint8_t *_top)
338 left[6] +
left[7] + top[0] + top[1] + top[2] + top[3] +
339 top[4] + top[5] + top[6] + top[7] + 8) >> 4);
343 for (y = 0; y < 8; y++) {
351 const uint8_t *_left,
const uint8_t *_top)
359 left[13] +
left[14] +
left[15] + top[0] + top[1] + top[2] + top[3] +
360 top[4] + top[5] + top[6] + top[7] + top[8] + top[9] + top[10] +
361 top[11] + top[12] + top[13] + top[14] + top[15] + 16) >> 5);
365 for (y = 0; y < 16; y++) {
375 const uint8_t *_left,
const uint8_t *_top)
386 left[31] + top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
387 top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + top[12] +
388 top[13] + top[14] + top[15] + top[16] + top[17] + top[18] + top[19] +
389 top[20] + top[21] + top[22] + top[23] + top[24] + top[25] + top[26] +
390 top[27] + top[28] + top[29] + top[30] + top[31] + 32) >> 6);
394 for (y = 0; y < 32; y++) {
408 const uint8_t *_left,
const uint8_t *top)
422 const uint8_t *_left,
const uint8_t *top)
432 for (y = 0; y < 8; y++) {
440 const uint8_t *_left,
const uint8_t *top)
451 for (y = 0; y < 16; y++) {
461 const uint8_t *_left,
const uint8_t *top)
475 for (y = 0; y < 32; y++) {
489 const uint8_t *
left,
const uint8_t *_top)
503 const uint8_t *
left,
const uint8_t *_top)
508 ((top[0] + top[1] + top[2] + top[3] +
509 top[4] + top[5] + top[6] + top[7] + 4) >> 3);
513 for (y = 0; y < 8; y++) {
521 const uint8_t *
left,
const uint8_t *_top)
526 ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
527 top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
528 top[12] + top[13] + top[14] + top[15] + 8) >> 4);
532 for (y = 0; y < 16; y++) {
542 const uint8_t *
left,
const uint8_t *_top)
547 ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
548 top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
549 top[12] + top[13] + top[14] + top[15] + top[16] + top[17] +
550 top[18] + top[19] + top[20] + top[21] + top[22] + top[23] +
551 top[24] + top[25] + top[26] + top[27] + top[28] + top[29] +
552 top[30] + top[31] + 16) >> 5);
556 for (y = 0; y < 32; y++) {
572 const uint8_t *
left,
const uint8_t *top)
585 const uint8_t *
left,
const uint8_t *top)
592 for (y = 0; y < 8; y++) {
600 const uint8_t *
left,
const uint8_t *top)
607 for (y = 0; y < 16; y++) {
617 const uint8_t *
left,
const uint8_t *top)
624 for (y = 0; y < 32; y++) {
638 const uint8_t *
left,
const uint8_t *top)
650 const uint8_t *
left,
const uint8_t *top)
657 for (y = 0; y < 8; y++) {
665 const uint8_t *
left,
const uint8_t *top)
672 for (y = 0; y < 16; y++) {
682 const uint8_t *
left,
const uint8_t *top)
689 for (y = 0; y < 32; y++) {
703 const uint8_t *
left,
const uint8_t *top)
716 const uint8_t *
left,
const uint8_t *top)
723 for (y = 0; y < 8; y++) {
731 const uint8_t *
left,
const uint8_t *top)
738 for (y = 0; y < 16; y++) {
748 const uint8_t *
left,
const uint8_t *top)
755 for (y = 0; y < 32; y++) {
771 #define memset_bpc memset
775 for (n = 0; n <
len; n++) {
781 #define DST(x, y) dst[(x) + (y) * stride]
784 const uint8_t *
left,
const uint8_t *_top)
788 int a0 = top[0],
a1 = top[1],
a2 = top[2],
a3 = top[3],
789 a4 = top[4],
a5 = top[5], a6 = top[6], a7 = top[7];
796 DST(3,1) =
DST(2,2) =
DST(1,3) = (
a4 +
a5 * 2 + a6 + 2) >> 2;
797 DST(3,2) =
DST(2,3) = (
a5 + a6 * 2 + a7 + 2) >> 2;
801 #define def_diag_downleft(size) \
802 static void diag_downleft_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
803 const uint8_t *left, const uint8_t *_top) \
805 pixel *dst = (pixel *) _dst; \
806 const pixel *top = (const pixel *) _top; \
810 stride /= sizeof(pixel); \
811 for (i = 0; i < size - 2; i++) \
812 v[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
813 v[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
815 for (j = 0; j < size; j++) { \
816 memcpy(dst + j*stride, v + j, (size - 1 - j) * sizeof(pixel)); \
817 memset_bpc(dst + j*stride + size - 1 - j, top[size - 1], j + 1); \
825 static
void diag_downright_4x4_c(uint8_t *_dst, ptrdiff_t
stride,
826 const uint8_t *_left, const uint8_t *_top)
831 int tl = top[-1],
a0 = top[0],
a1 = top[1],
a2 = top[2],
a3 = top[3],
835 DST(0,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
836 DST(0,2) =
DST(1,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
837 DST(0,1) =
DST(1,2) =
DST(2,3) = (tl + l0 * 2 + l1 + 2) >> 2;
838 DST(0,0) =
DST(1,1) =
DST(2,2) =
DST(3,3) = (l0 + tl * 2 +
a0 + 2) >> 2;
839 DST(1,0) =
DST(2,1) =
DST(3,2) = (tl +
a0 * 2 +
a1 + 2) >> 2;
844 #define def_diag_downright(size) \
845 static void diag_downright_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
846 const uint8_t *_left, const uint8_t *_top) \
848 pixel *dst = (pixel *) _dst; \
849 const pixel *top = (const pixel *) _top; \
850 const pixel *left = (const pixel *) _left; \
852 pixel v[size + size - 1]; \
854 stride /= sizeof(pixel); \
855 for (i = 0; i < size - 2; i++) { \
856 v[i ] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
857 v[size + 1 + i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
859 v[size - 2] = (left[size - 2] + left[size - 1] * 2 + top[-1] + 2) >> 2; \
860 v[size - 1] = (left[size - 1] + top[-1] * 2 + top[ 0] + 2) >> 2; \
861 v[size ] = (top[-1] + top[0] * 2 + top[ 1] + 2) >> 2; \
863 for (j = 0; j < size; j++) \
864 memcpy(dst + j*stride, v + size - 1 - j, size * sizeof(pixel)); \
871 static
void vert_right_4x4_c(uint8_t *_dst, ptrdiff_t
stride,
872 const uint8_t *_left, const uint8_t *_top)
877 int tl = top[-1],
a0 = top[0],
a1 = top[1],
a2 = top[2],
a3 = top[3],
881 DST(0,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
882 DST(0,2) = (tl + l0 * 2 + l1 + 2) >> 2;
883 DST(0,0) =
DST(1,2) = (tl +
a0 + 1) >> 1;
884 DST(0,1) =
DST(1,3) = (l0 + tl * 2 +
a0 + 2) >> 2;
886 DST(1,1) =
DST(2,3) = (tl +
a0 * 2 +
a1 + 2) >> 2;
893 #define def_vert_right(size) \
894 static void vert_right_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
895 const uint8_t *_left, const uint8_t *_top) \
897 pixel *dst = (pixel *) _dst; \
898 const pixel *top = (const pixel *) _top; \
899 const pixel *left = (const pixel *) _left; \
901 pixel ve[size + size/2 - 1], vo[size + size/2 - 1]; \
903 stride /= sizeof(pixel); \
904 for (i = 0; i < size/2 - 2; i++) { \
905 vo[i] = (left[i*2 + 3] + left[i*2 + 2] * 2 + left[i*2 + 1] + 2) >> 2; \
906 ve[i] = (left[i*2 + 4] + left[i*2 + 3] * 2 + left[i*2 + 2] + 2) >> 2; \
908 vo[size/2 - 2] = (left[size - 1] + left[size - 2] * 2 + left[size - 3] + 2) >> 2; \
909 ve[size/2 - 2] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
911 ve[size/2 - 1] = (top[-1] + top[0] + 1) >> 1; \
912 vo[size/2 - 1] = (left[size - 1] + top[-1] * 2 + top[0] + 2) >> 2; \
913 for (i = 0; i < size - 1; i++) { \
914 ve[size/2 + i] = (top[i] + top[i + 1] + 1) >> 1; \
915 vo[size/2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
918 for (j = 0; j < size / 2; j++) { \
919 memcpy(dst + j*2 *stride, ve + size/2 - 1 - j, size * sizeof(pixel)); \
920 memcpy(dst + (j*2 + 1)*stride, vo + size/2 - 1 - j, size * sizeof(pixel)); \
928 static
void hor_down_4x4_c(uint8_t *_dst, ptrdiff_t
stride,
929 const uint8_t *_left, const uint8_t *_top)
935 tl = top[-1],
a0 = top[0],
a1 = top[1],
a2 = top[2];
938 DST(2,0) = (tl +
a0 * 2 +
a1 + 2) >> 2;
940 DST(0,0) =
DST(2,1) = (tl + l0 + 1) >> 1;
941 DST(1,0) =
DST(3,1) = (
a0 + tl * 2 + l0 + 2) >> 2;
942 DST(0,1) =
DST(2,2) = (l0 + l1 + 1) >> 1;
943 DST(1,1) =
DST(3,2) = (tl + l0 * 2 + l1 + 2) >> 2;
944 DST(0,2) =
DST(2,3) = (l1 + l2 + 1) >> 1;
945 DST(1,2) =
DST(3,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
946 DST(0,3) = (l2 + l3 + 1) >> 1;
947 DST(1,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
950 #define def_hor_down(size) \
951 static void hor_down_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
952 const uint8_t *_left, const uint8_t *_top) \
954 pixel *dst = (pixel *) _dst; \
955 const pixel *top = (const pixel *) _top; \
956 const pixel *left = (const pixel *) _left; \
958 pixel v[size * 3 - 2]; \
960 stride /= sizeof(pixel); \
961 for (i = 0; i < size - 2; i++) { \
962 v[i*2 ] = (left[i + 1] + left[i + 0] + 1) >> 1; \
963 v[i*2 + 1] = (left[i + 2] + left[i + 1] * 2 + left[i + 0] + 2) >> 2; \
964 v[size*2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
966 v[size*2 - 2] = (top[-1] + left[size - 1] + 1) >> 1; \
967 v[size*2 - 4] = (left[size - 1] + left[size - 2] + 1) >> 1; \
968 v[size*2 - 1] = (top[0] + top[-1] * 2 + left[size - 1] + 2) >> 2; \
969 v[size*2 - 3] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
971 for (j = 0; j < size; j++) \
972 memcpy(dst + j*stride, v + size*2 - 2 - j*2, size * sizeof(pixel)); \
979 static
void vert_left_4x4_c(uint8_t *_dst, ptrdiff_t
stride,
980 const uint8_t *
left, const uint8_t *_top)
984 int a0 = top[0],
a1 = top[1],
a2 = top[2],
a3 = top[3],
985 a4 = top[4],
a5 = top[5], a6 = top[6];
997 DST(3,3) = (
a4 +
a5 * 2 + a6 + 2) >> 2;
1000 #define def_vert_left(size) \
1001 static void vert_left_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
1002 const uint8_t *left, const uint8_t *_top) \
1004 pixel *dst = (pixel *) _dst; \
1005 const pixel *top = (const pixel *) _top; \
1007 pixel ve[size - 1], vo[size - 1]; \
1009 stride /= sizeof(pixel); \
1010 for (i = 0; i < size - 2; i++) { \
1011 ve[i] = (top[i] + top[i + 1] + 1) >> 1; \
1012 vo[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
1014 ve[size - 2] = (top[size - 2] + top[size - 1] + 1) >> 1; \
1015 vo[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
1017 for (j = 0; j < size / 2; j++) { \
1018 memcpy(dst + j*2 * stride, ve + j, (size - j - 1) * sizeof(pixel)); \
1019 memset_bpc(dst + j*2 * stride + size - j - 1, top[size - 1], j + 1); \
1020 memcpy(dst + (j*2 + 1) * stride, vo + j, (size - j - 1) * sizeof(pixel)); \
1021 memset_bpc(dst + (j*2 + 1) * stride + size - j - 1, top[size - 1], j + 1); \
1029 static
void hor_up_4x4_c(uint8_t *_dst, ptrdiff_t
stride,
1030 const uint8_t *_left, const uint8_t *top)
1037 DST(0,0) = (l0 + l1 + 1) >> 1;
1038 DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2;
1039 DST(0,1) =
DST(2,0) = (l1 + l2 + 1) >> 1;
1040 DST(1,1) =
DST(3,0) = (l1 + l2 * 2 + l3 + 2) >> 2;
1041 DST(0,2) =
DST(2,1) = (l2 + l3 + 1) >> 1;
1042 DST(1,2) =
DST(3,1) = (l2 + l3 * 3 + 2) >> 2;
1046 #define def_hor_up(size) \
1047 static void hor_up_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
1048 const uint8_t *_left, const uint8_t *top) \
1050 pixel *dst = (pixel *) _dst; \
1051 const pixel *left = (const pixel *) _left; \
1053 pixel v[size*2 - 2]; \
1055 stride /= sizeof(pixel); \
1056 for (i = 0; i < size - 2; i++) { \
1057 v[i*2 ] = (left[i] + left[i + 1] + 1) >> 1; \
1058 v[i*2 + 1] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
1060 v[size*2 - 4] = (left[size - 2] + left[size - 1] + 1) >> 1; \
1061 v[size*2 - 3] = (left[size - 2] + left[size - 1] * 3 + 2) >> 2; \
1063 for (j = 0; j < size / 2; j++) \
1064 memcpy(dst + j*stride, v + j*2, size * sizeof(pixel)); \
1065 for (j = size / 2; j < size; j++) { \
1066 memcpy(dst + j*stride, v + j*2, (size*2 - 2 - j*2) * sizeof(pixel)); \
1067 memset_bpc(dst + j*stride + size*2 - 2 - j*2, left[size - 1], \
1088 #define init_intra_pred_bd_aware(tx, sz) \
1089 dsp->intra_pred[tx][TM_VP8_PRED] = tm_##sz##_c; \
1090 dsp->intra_pred[tx][DC_128_PRED] = dc_128_##sz##_c; \
1091 dsp->intra_pred[tx][DC_127_PRED] = dc_127_##sz##_c; \
1092 dsp->intra_pred[tx][DC_129_PRED] = dc_129_##sz##_c
1095 ff_vp9dsp_intrapred_init_10(dsp);
1096 #define init_intra_pred(tx, sz) \
1097 init_intra_pred_bd_aware(tx, sz)
1099 #define init_intra_pred(tx, sz) \
1100 dsp->intra_pred[tx][VERT_PRED] = vert_##sz##_c; \
1101 dsp->intra_pred[tx][HOR_PRED] = hor_##sz##_c; \
1102 dsp->intra_pred[tx][DC_PRED] = dc_##sz##_c; \
1103 dsp->intra_pred[tx][DIAG_DOWN_LEFT_PRED] = diag_downleft_##sz##_c; \
1104 dsp->intra_pred[tx][DIAG_DOWN_RIGHT_PRED] = diag_downright_##sz##_c; \
1105 dsp->intra_pred[tx][VERT_RIGHT_PRED] = vert_right_##sz##_c; \
1106 dsp->intra_pred[tx][HOR_DOWN_PRED] = hor_down_##sz##_c; \
1107 dsp->intra_pred[tx][VERT_LEFT_PRED] = vert_left_##sz##_c; \
1108 dsp->intra_pred[tx][HOR_UP_PRED] = hor_up_##sz##_c; \
1109 dsp->intra_pred[tx][LEFT_DC_PRED] = dc_left_##sz##_c; \
1110 dsp->intra_pred[tx][TOP_DC_PRED] = dc_top_##sz##_c; \
1111 init_intra_pred_bd_aware(tx, sz)
1119 #undef init_intra_pred
1120 #undef init_intra_pred_bd_aware
1123 #define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly) \
1124 static void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *_dst, \
1126 int16_t *_block, int eob) \
1129 pixel *dst = (pixel *) _dst; \
1130 dctcoef *block = (dctcoef *) _block, tmp[sz * sz], out[sz]; \
1132 stride /= sizeof(pixel); \
1133 if (has_dconly && eob == 1) { \
1134 const int t = ((((dctint) block[0] * 11585 + (1 << 13)) >> 14) \
1135 * 11585 + (1 << 13)) >> 14; \
1137 for (i = 0; i < sz; i++) { \
1138 for (j = 0; j < sz; j++) \
1139 dst[j * stride] = av_clip_pixel(dst[j * stride] + \
1141 (int)(t + (1U << (bits - 1))) >> bits : \
1148 for (i = 0; i < sz; i++) \
1149 type_a##sz##_1d(block + i, sz, tmp + i * sz, 0); \
1150 memset(block, 0, sz * sz * sizeof(*block)); \
1151 for (i = 0; i < sz; i++) { \
1152 type_b##sz##_1d(tmp + i, sz, out, 1); \
1153 for (j = 0; j < sz; j++) \
1154 dst[j * stride] = av_clip_pixel(dst[j * stride] + \
1156 (int)(out[j] + (1U << (bits - 1))) >> bits : \
1162 #define itxfm_wrap(sz, bits) \
1163 itxfm_wrapper(idct, idct, sz, bits, 1) \
1164 itxfm_wrapper(iadst, idct, sz, bits, 0) \
1165 itxfm_wrapper(idct, iadst, sz, bits, 0) \
1166 itxfm_wrapper(iadst, iadst, sz, bits, 0)
1168 #define IN(x) ((dctint) in[(x) * stride])
1175 t0 = ((
IN(0) +
IN(2)) * 11585 + (1 << 13)) >> 14;
1176 t1 = ((
IN(0) -
IN(2)) * 11585 + (1 << 13)) >> 14;
1177 t2 = (
IN(1) * 6270 -
IN(3) * 15137 + (1 << 13)) >> 14;
1178 t3 = (
IN(1) * 15137 +
IN(3) * 6270 + (1 << 13)) >> 14;
1191 t0 = 5283 *
IN(0) + 15212 *
IN(2) + 9929 *
IN(3);
1192 t1 = 9929 *
IN(0) - 5283 *
IN(2) - 15212 *
IN(3);
1193 t2 = 13377 * (
IN(0) -
IN(2) +
IN(3));
1196 out[0] = (
t0 +
t3 + (1 << 13)) >> 14;
1197 out[1] = (
t1 +
t3 + (1 << 13)) >> 14;
1198 out[2] = (
t2 + (1 << 13)) >> 14;
1199 out[3] = (
t0 +
t1 -
t3 + (1 << 13)) >> 14;
1207 dctint t0, t0a,
t1, t1a,
t2, t2a,
t3, t3a,
t4, t4a,
t5, t5a,
t6, t6a,
t7, t7a;
1209 t0a = ((
IN(0) +
IN(4)) * 11585 + (1 << 13)) >> 14;
1210 t1a = ((
IN(0) -
IN(4)) * 11585 + (1 << 13)) >> 14;
1211 t2a = (
IN(2) * 6270 -
IN(6) * 15137 + (1 << 13)) >> 14;
1212 t3a = (
IN(2) * 15137 +
IN(6) * 6270 + (1 << 13)) >> 14;
1213 t4a = (
IN(1) * 3196 -
IN(7) * 16069 + (1 << 13)) >> 14;
1214 t5a = (
IN(5) * 13623 -
IN(3) * 9102 + (1 << 13)) >> 14;
1215 t6a = (
IN(5) * 9102 +
IN(3) * 13623 + (1 << 13)) >> 14;
1216 t7a = (
IN(1) * 16069 +
IN(7) * 3196 + (1 << 13)) >> 14;
1227 t5 = ((t6a - t5a) * 11585 + (1 << 13)) >> 14;
1228 t6 = ((t6a + t5a) * 11585 + (1 << 13)) >> 14;
1243 dctint t0, t0a,
t1, t1a,
t2, t2a,
t3, t3a,
t4, t4a,
t5, t5a,
t6, t6a,
t7, t7a;
1245 t0a = 16305 *
IN(7) + 1606 *
IN(0);
1246 t1a = 1606 *
IN(7) - 16305 *
IN(0);
1247 t2a = 14449 *
IN(5) + 7723 *
IN(2);
1248 t3a = 7723 *
IN(5) - 14449 *
IN(2);
1249 t4a = 10394 *
IN(3) + 12665 *
IN(4);
1250 t5a = 12665 *
IN(3) - 10394 *
IN(4);
1251 t6a = 4756 *
IN(1) + 15679 *
IN(6);
1252 t7a = 15679 *
IN(1) - 4756 *
IN(6);
1254 t0 = (t0a + t4a + (1 << 13)) >> 14;
1255 t1 = (t1a + t5a + (1 << 13)) >> 14;
1256 t2 = (t2a + t6a + (1 << 13)) >> 14;
1257 t3 = (t3a + t7a + (1 << 13)) >> 14;
1258 t4 = (t0a - t4a + (1 << 13)) >> 14;
1259 t5 = (t1a - t5a + (1 << 13)) >> 14;
1260 t6 = (t2a - t6a + (1 << 13)) >> 14;
1261 t7 = (t3a - t7a + (1 << 13)) >> 14;
1263 t4a = 15137
U *
t4 + 6270
U *
t5;
1264 t5a = 6270
U *
t4 - 15137
U *
t5;
1265 t6a = 15137
U *
t7 - 6270
U *
t6;
1266 t7a = 6270
U *
t7 + 15137
U *
t6;
1273 out[1] = -((
dctint)((1
U << 13) + t4a + t6a) >> 14);
1274 out[6] = (
dctint)((1
U << 13) + t5a + t7a) >> 14;
1275 t6 = (
dctint)((1
U << 13) + t4a - t6a) >> 14;
1276 t7 = (
dctint)((1
U << 13) + t5a - t7a) >> 14;
1289 dctint t0,
t1,
t2,
t3,
t4,
t5,
t6,
t7,
t8,
t9,
t10,
t11,
t12, t13, t14,
t15;
1290 dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
1291 dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
1293 t0a = (
dctint)((
IN(0) +
IN(8)) * 11585
U + (1 << 13)) >> 14;
1294 t1a = (
dctint)((
IN(0) -
IN(8)) * 11585
U + (1 << 13)) >> 14;
1295 t2a = (
dctint)(
IN(4) * 6270
U -
IN(12) * 15137
U + (1 << 13)) >> 14;
1296 t3a = (
dctint)(
IN(4) * 15137
U +
IN(12) * 6270
U + (1 << 13)) >> 14;
1297 t4a = (
dctint)(
IN(2) * 3196
U -
IN(14) * 16069
U + (1 << 13)) >> 14;
1298 t7a = (
dctint)(
IN(2) * 16069
U +
IN(14) * 3196
U + (1 << 13)) >> 14;
1299 t5a = (
dctint)(
IN(10) * 13623
U -
IN(6) * 9102
U + (1 << 13)) >> 14;
1300 t6a = (
dctint)(
IN(10) * 9102
U +
IN(6) * 13623
U + (1 << 13)) >> 14;
1301 t8a = (
dctint)(
IN(1) * 1606
U -
IN(15) * 16305
U + (1 << 13)) >> 14;
1302 t15a = (
dctint)(
IN(1) * 16305
U +
IN(15) * 1606
U + (1 << 13)) >> 14;
1303 t9a = (
dctint)(
IN(9) * 12665
U -
IN(7) * 10394
U + (1 << 13)) >> 14;
1304 t14a = (
dctint)(
IN(9) * 10394
U +
IN(7) * 12665
U + (1 << 13)) >> 14;
1305 t10a = (
dctint)(
IN(5) * 7723
U -
IN(11) * 14449
U + (1 << 13)) >> 14;
1306 t13a = (
dctint)(
IN(5) * 14449
U +
IN(11) * 7723
U + (1 << 13)) >> 14;
1307 t11a = (
dctint)(
IN(13) * 15679
U -
IN(3) * 4756
U + (1 << 13)) >> 14;
1308 t12a = (
dctint)(
IN(13) * 4756
U +
IN(3) * 15679
U + (1 << 13)) >> 14;
1327 t5a = (
dctint)((
t6 -
t5) * 11585
U + (1 << 13)) >> 14;
1328 t6a = (
dctint)((
t6 +
t5) * 11585
U + (1 << 13)) >> 14;
1329 t9a = (
dctint)( t14 * 6270
U -
t9 * 15137
U + (1 << 13)) >> 14;
1330 t14a = (
dctint)( t14 * 15137
U +
t9 * 6270
U + (1 << 13)) >> 14;
1331 t10a = (
dctint)(-(t13 * 15137
U +
t10 * 6270
U) + (1 << 13)) >> 14;
1332 t13a = (
dctint)( t13 * 6270
U -
t10 * 15137
U + (1 << 13)) >> 14;
1351 t10a = (
dctint)((t13 -
t10) * 11585
U + (1 << 13)) >> 14;
1352 t13a = (
dctint)((t13 +
t10) * 11585
U + (1 << 13)) >> 14;
1353 t11 = (
dctint)((t12a - t11a) * 11585
U + (1 << 13)) >> 14;
1354 t12 = (
dctint)((t12a + t11a) * 11585
U + (1 << 13)) >> 14;
1356 out[ 0] = t0a + t15a;
1357 out[ 1] = t1a + t14;
1358 out[ 2] = t2a + t13a;
1361 out[ 5] =
t5 + t10a;
1366 out[10] =
t5 - t10a;
1369 out[13] = t2a - t13a;
1370 out[14] = t1a - t14;
1371 out[15] = t0a - t15a;
1377 dctint t0,
t1,
t2,
t3,
t4,
t5,
t6,
t7,
t8,
t9,
t10,
t11,
t12, t13, t14,
t15;
1378 dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
1379 dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
1381 t0 =
IN(15) * 16364
U +
IN(0) * 804
U;
1382 t1 =
IN(15) * 804
U -
IN(0) * 16364
U;
1383 t2 =
IN(13) * 15893
U +
IN(2) * 3981
U;
1384 t3 =
IN(13) * 3981
U -
IN(2) * 15893
U;
1385 t4 =
IN(11) * 14811
U +
IN(4) * 7005
U;
1386 t5 =
IN(11) * 7005
U -
IN(4) * 14811
U;
1387 t6 =
IN(9) * 13160
U +
IN(6) * 9760
U;
1388 t7 =
IN(9) * 9760
U -
IN(6) * 13160
U;
1389 t8 =
IN(7) * 11003
U +
IN(8) * 12140
U;
1390 t9 =
IN(7) * 12140
U -
IN(8) * 11003
U;
1394 t13 =
IN(3) * 15426
U -
IN(12) * 5520
U;
1395 t14 =
IN(1) * 2404
U +
IN(14) * 16207
U;
1403 t5a = (
dctint)((1
U << 13) +
t5 + t13) >> 14;
1404 t6a = (
dctint)((1
U << 13) +
t6 + t14) >> 14;
1411 t13a = (
dctint)((1
U << 13) +
t5 - t13) >> 14;
1412 t14a = (
dctint)((1
U << 13) +
t6 - t14) >> 14;
1415 t8 = t8a * 16069
U + t9a * 3196
U;
1416 t9 = t8a * 3196
U - t9a * 16069
U;
1417 t10 = t10a * 9102
U + t11a * 13623
U;
1418 t11 = t10a * 13623
U - t11a * 9102
U;
1419 t12 = t13a * 16069
U - t12a * 3196
U;
1420 t13 = t13a * 3196
U + t12a * 16069
U;
1421 t14 = t15a * 9102
U - t14a * 13623
U;
1422 t15 = t15a * 13623
U + t14a * 9102
U;
1433 t9a = (
dctint)((1
U << 13) +
t9 + t13) >> 14;
1434 t10a = (
dctint)((1
U << 13) +
t10 + t14) >> 14;
1437 t13a = (
dctint)((1
U << 13) +
t9 - t13) >> 14;
1438 t14a = (
dctint)((1
U << 13) +
t10 - t14) >> 14;
1441 t4a =
t4 * 15137
U +
t5 * 6270
U;
1442 t5a =
t4 * 6270
U -
t5 * 15137
U;
1443 t6a =
t7 * 15137
U -
t6 * 6270
U;
1444 t7a =
t7 * 6270
U +
t6 * 15137
U;
1445 t12 = t12a * 15137
U + t13a * 6270
U;
1446 t13 = t12a * 6270
U - t13a * 15137
U;
1447 t14 = t15a * 15137
U - t14a * 6270
U;
1448 t15 = t15a * 6270
U + t14a * 15137
U;
1454 out[ 3] = -((
dctint)((1
U << 13) + t4a + t6a) >> 14);
1455 out[12] = (
dctint)((1
U << 13) + t5a + t7a) >> 14;
1456 t6 = (
dctint)((1
U << 13) + t4a - t6a) >> 14;
1457 t7 = (
dctint)((1
U << 13) + t5a - t7a) >> 14;
1458 out[ 1] = -(t8a + t10a);
1459 out[14] = t9a + t11a;
1464 t14a = (
dctint)((1
U << 13) +
t12 - t14) >> 14;
1465 t15a = (
dctint)((1
U << 13) + t13 -
t15) >> 14;
1467 out[ 7] = (
dctint)(-(t2a + t3a) * 11585
U + (1 << 13)) >> 14;
1468 out[ 8] = (
dctint)( (t2a - t3a) * 11585
U + (1 << 13)) >> 14;
1473 out[ 5] = (
dctint)(-(t14a + t15a) * 11585
U + (1 << 13)) >> 14;
1474 out[10] = (
dctint)( (t14a - t15a) * 11585
U + (1 << 13)) >> 14;
1528 dctint t13 = t12a - t13a;
1529 dctint t14 = t15a - t14a;
1531 dctint t16 = t16a + t17a;
1532 dctint t17 = t16a - t17a;
1533 dctint t18 = t19a - t18a;
1534 dctint t19 = t19a + t18a;
1535 dctint t20 = t20a + t21a;
1536 dctint t21 = t20a - t21a;
1537 dctint t22 = t23a - t22a;
1538 dctint t23 = t23a + t22a;
1539 dctint t24 = t24a + t25a;
1540 dctint t25 = t24a - t25a;
1541 dctint t26 = t27a - t26a;
1543 dctint t28 = t28a + t29a;
1544 dctint t29 = t28a - t29a;
1545 dctint t30 = t31a - t30a;
1546 dctint t31 = t31a + t30a;
1548 t5a = (
dctint)((
t6 -
t5) * 11585
U + (1 << 13)) >> 14;
1549 t6a = (
dctint)((
t6 +
t5) * 11585
U + (1 << 13)) >> 14;
1550 t9a = (
dctint)( t14 * 6270
U -
t9 * 15137
U + (1 << 13)) >> 14;
1551 t14a = (
dctint)( t14 * 15137
U +
t9 * 6270
U + (1 << 13)) >> 14;
1552 t10a = (
dctint)(-(t13 * 15137
U +
t10 * 6270
U) + (1 << 13)) >> 14;
1553 t13a = (
dctint)( t13 * 6270
U -
t10 * 15137
U + (1 << 13)) >> 14;
1554 t17a = (
dctint)( t30 * 3196
U - t17 * 16069
U + (1 << 13)) >> 14;
1555 t30a = (
dctint)( t30 * 16069
U + t17 * 3196
U + (1 << 13)) >> 14;
1556 t18a = (
dctint)(-(t29 * 16069
U + t18 * 3196
U) + (1 << 13)) >> 14;
1557 t29a = (
dctint)( t29 * 3196
U - t18 * 16069
U + (1 << 13)) >> 14;
1558 t21a = (
dctint)( t26 * 13623
U - t21 * 9102
U + (1 << 13)) >> 14;
1559 t26a = (
dctint)( t26 * 9102
U + t21 * 13623
U + (1 << 13)) >> 14;
1560 t22a = (
dctint)(-(t25 * 9102
U + t22 * 13623
U) + (1 << 13)) >> 14;
1561 t25a = (
dctint)( t25 * 13623
U - t22 * 9102
U + (1 << 13)) >> 14;
1596 t10a = (
dctint)((t13 -
t10) * 11585
U + (1 << 13)) >> 14;
1597 t13a = (
dctint)((t13 +
t10) * 11585
U + (1 << 13)) >> 14;
1598 t11 = (
dctint)((t12a - t11a) * 11585
U + (1 << 13)) >> 14;
1599 t12 = (
dctint)((t12a + t11a) * 11585
U + (1 << 13)) >> 14;
1600 t18a = (
dctint)( t29 * 6270
U - t18 * 15137
U + (1 << 13)) >> 14;
1601 t29a = (
dctint)( t29 * 15137
U + t18 * 6270
U + (1 << 13)) >> 14;
1602 t19 = (
dctint)( t28a * 6270
U - t19a * 15137
U + (1 << 13)) >> 14;
1603 t28 = (
dctint)( t28a * 15137
U + t19a * 6270
U + (1 << 13)) >> 14;
1604 t20 = (
dctint)(-(t27a * 15137
U + t20a * 6270
U) + (1 << 13)) >> 14;
1605 t27 = (
dctint)( t27a * 6270
U - t20a * 15137
U + (1 << 13)) >> 14;
1606 t21a = (
dctint)(-(t26 * 15137
U + t21 * 6270
U) + (1 << 13)) >> 14;
1607 t26a = (
dctint)( t26 * 6270
U - t21 * 15137
U + (1 << 13)) >> 14;
1642 t20 = (
dctint)((t27a - t20a) * 11585
U + (1 << 13)) >> 14;
1643 t27 = (
dctint)((t27a + t20a) * 11585
U + (1 << 13)) >> 14;
1644 t21a = (
dctint)((t26 - t21 ) * 11585
U + (1 << 13)) >> 14;
1645 t26a = (
dctint)((t26 + t21 ) * 11585
U + (1 << 13)) >> 14;
1646 t22 = (
dctint)((t25a - t22a) * 11585
U + (1 << 13)) >> 14;
1647 t25 = (
dctint)((t25a + t22a) * 11585
U + (1 << 13)) >> 14;
1648 t23a = (
dctint)((t24 - t23 ) * 11585
U + (1 << 13)) >> 14;
1649 t24a = (
dctint)((t24 + t23 ) * 11585
U + (1 << 13)) >> 14;
1652 out[ 1] =
t1 + t30a;
1654 out[ 3] =
t3 + t28a;
1656 out[ 5] = t5a + t26a;
1657 out[ 6] = t6a + t25;
1658 out[ 7] =
t7 + t24a;
1659 out[ 8] =
t8 + t23a;
1660 out[ 9] = t9a + t22;
1662 out[11] = t11a + t20;
1663 out[12] = t12a + t19a;
1664 out[13] = t13 + t18;
1665 out[14] = t14a + t17a;
1668 out[17] = t14a - t17a;
1669 out[18] = t13 - t18;
1670 out[19] = t12a - t19a;
1671 out[20] = t11a - t20;
1673 out[22] = t9a - t22;
1674 out[23] =
t8 - t23a;
1675 out[24] =
t7 - t24a;
1676 out[25] = t6a - t25;
1677 out[26] = t5a - t26a;
1679 out[28] =
t3 - t28a;
1681 out[30] =
t1 - t30a;
1721 #undef itxfm_wrapper
1726 #define init_itxfm(tx, sz) \
1727 dsp->itxfm_add[tx][DCT_DCT] = idct_idct_##sz##_add_c; \
1728 dsp->itxfm_add[tx][DCT_ADST] = iadst_idct_##sz##_add_c; \
1729 dsp->itxfm_add[tx][ADST_DCT] = idct_iadst_##sz##_add_c; \
1730 dsp->itxfm_add[tx][ADST_ADST] = iadst_iadst_##sz##_add_c
1732 #define init_idct(tx, nm) \
1733 dsp->itxfm_add[tx][DCT_DCT] = \
1734 dsp->itxfm_add[tx][ADST_DCT] = \
1735 dsp->itxfm_add[tx][DCT_ADST] = \
1736 dsp->itxfm_add[tx][ADST_ADST] = nm##_add_c
1749 ptrdiff_t stridea, ptrdiff_t strideb,
1757 for (
i = 0;
i < 8;
i++, dst += stridea) {
1759 int p3 = dst[strideb * -4], p2 = dst[strideb * -3];
1760 int p1 = dst[strideb * -2], p0 = dst[strideb * -1];
1761 int q0 = dst[strideb * +0],
q1 = dst[strideb * +1];
1762 int q2 = dst[strideb * +2], q3 = dst[strideb * +3];
1764 int fm =
FFABS(p3 - p2) <= I &&
FFABS(p2 - p1) <= I &&
1768 int flat8out, flat8in;
1774 p7 = dst[strideb * -8];
1775 p6 = dst[strideb * -7];
1776 p5 = dst[strideb * -6];
1777 p4 = dst[strideb * -5];
1778 q4 = dst[strideb * +4];
1779 q5 = dst[strideb * +5];
1780 q6 = dst[strideb * +6];
1781 q7 = dst[strideb * +7];
1783 flat8out =
FFABS(p7 - p0) <=
F &&
FFABS(p6 - p0) <=
F &&
1790 flat8in =
FFABS(p3 - p0) <=
F &&
FFABS(p2 - p0) <=
F &&
1794 if (wd >= 16 && flat8out && flat8in) {
1795 dst[strideb * -7] = (p7 + p7 + p7 + p7 + p7 + p7 + p7 + p6 * 2 +
1796 p5 + p4 + p3 + p2 + p1 + p0 +
q0 + 8) >> 4;
1797 dst[strideb * -6] = (p7 + p7 + p7 + p7 + p7 + p7 + p6 + p5 * 2 +
1798 p4 + p3 + p2 + p1 + p0 +
q0 +
q1 + 8) >> 4;
1799 dst[strideb * -5] = (p7 + p7 + p7 + p7 + p7 + p6 + p5 + p4 * 2 +
1800 p3 + p2 + p1 + p0 +
q0 +
q1 + q2 + 8) >> 4;
1801 dst[strideb * -4] = (p7 + p7 + p7 + p7 + p6 + p5 + p4 + p3 * 2 +
1802 p2 + p1 + p0 +
q0 +
q1 + q2 + q3 + 8) >> 4;
1803 dst[strideb * -3] = (p7 + p7 + p7 + p6 + p5 + p4 + p3 + p2 * 2 +
1804 p1 + p0 +
q0 +
q1 + q2 + q3 + q4 + 8) >> 4;
1805 dst[strideb * -2] = (p7 + p7 + p6 + p5 + p4 + p3 + p2 + p1 * 2 +
1806 p0 +
q0 +
q1 + q2 + q3 + q4 + q5 + 8) >> 4;
1807 dst[strideb * -1] = (p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 +
1808 q0 +
q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
1809 dst[strideb * +0] = (p6 + p5 + p4 + p3 + p2 + p1 + p0 +
q0 * 2 +
1810 q1 + q2 + q3 + q4 + q5 + q6 + q7 + 8) >> 4;
1811 dst[strideb * +1] = (p5 + p4 + p3 + p2 + p1 + p0 +
q0 +
q1 * 2 +
1812 q2 + q3 + q4 + q5 + q6 + q7 + q7 + 8) >> 4;
1813 dst[strideb * +2] = (p4 + p3 + p2 + p1 + p0 +
q0 +
q1 + q2 * 2 +
1814 q3 + q4 + q5 + q6 + q7 + q7 + q7 + 8) >> 4;
1815 dst[strideb * +3] = (p3 + p2 + p1 + p0 +
q0 +
q1 + q2 + q3 * 2 +
1816 q4 + q5 + q6 + q7 + q7 + q7 + q7 + 8) >> 4;
1817 dst[strideb * +4] = (p2 + p1 + p0 +
q0 +
q1 + q2 + q3 + q4 * 2 +
1818 q5 + q6 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
1819 dst[strideb * +5] = (p1 + p0 +
q0 +
q1 + q2 + q3 + q4 + q5 * 2 +
1820 q6 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
1821 dst[strideb * +6] = (p0 +
q0 +
q1 + q2 + q3 + q4 + q5 + q6 * 2 +
1822 q7 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
1823 }
else if (wd >= 8 && flat8in) {
1824 dst[strideb * -3] = (p3 + p3 + p3 + 2 * p2 + p1 + p0 +
q0 + 4) >> 3;
1825 dst[strideb * -2] = (p3 + p3 + p2 + 2 * p1 + p0 +
q0 +
q1 + 4) >> 3;
1826 dst[strideb * -1] = (p3 + p2 + p1 + 2 * p0 +
q0 +
q1 + q2 + 4) >> 3;
1827 dst[strideb * +0] = (p2 + p1 + p0 + 2 *
q0 +
q1 + q2 + q3 + 4) >> 3;
1828 dst[strideb * +1] = (p1 + p0 +
q0 + 2 *
q1 + q2 + q3 + q3 + 4) >> 3;
1829 dst[strideb * +2] = (p0 +
q0 +
q1 + 2 * q2 + q3 + q3 + q3 + 4) >> 3;
1859 #define lf_8_fn(dir, wd, stridea, strideb) \
1860 static void loop_filter_##dir##_##wd##_8_c(uint8_t *_dst, \
1862 int E, int I, int H) \
1864 pixel *dst = (pixel *) _dst; \
1865 stride /= sizeof(pixel); \
1866 loop_filter(dst, E, I, H, stridea, strideb, wd); \
1869 #define lf_8_fns(wd) \
1870 lf_8_fn(h, wd, stride, 1) \
1871 lf_8_fn(v, wd, 1, stride)
1880 #define lf_16_fn(dir, stridea) \
1881 static void loop_filter_##dir##_16_16_c(uint8_t *dst, \
1883 int E, int I, int H) \
1885 loop_filter_##dir##_16_8_c(dst, stride, E, I, H); \
1886 loop_filter_##dir##_16_8_c(dst + 8 * stridea, stride, E, I, H); \
1894 #define lf_mix_fn(dir, wd1, wd2, stridea) \
1895 static void loop_filter_##dir##_##wd1##wd2##_16_c(uint8_t *dst, \
1897 int E, int I, int H) \
1899 loop_filter_##dir##_##wd1##_8_c(dst, stride, E & 0xff, I & 0xff, H & 0xff); \
1900 loop_filter_##dir##_##wd2##_8_c(dst + 8 * stridea, stride, E >> 8, I >> 8, H >> 8); \
1903 #define lf_mix_fns(wd1, wd2) \
1904 lf_mix_fn(h, wd1, wd2, stride) \
1905 lf_mix_fn(v, wd1, wd2, sizeof(pixel))
1940 const uint8_t *
src, ptrdiff_t src_stride,
1952 const uint8_t *_src, ptrdiff_t src_stride,
1958 dst_stride /=
sizeof(
pixel);
1959 src_stride /=
sizeof(
pixel);
1963 for (x = 0; x <
w; x += 4)
1971 #define fpel_fn(type, sz) \
1972 static void type##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
1973 const uint8_t *src, ptrdiff_t src_stride, \
1974 int h, int mx, int my) \
1976 type##_c(dst, dst_stride, src, src_stride, sz, h); \
1979 #define copy_avg_fn(sz) \
1994 #define FILTER_8TAP(src, x, F, stride) \
1995 av_clip_pixel((F[0] * src[x + -3 * stride] + \
1996 F[1] * src[x + -2 * stride] + \
1997 F[2] * src[x + -1 * stride] + \
1998 F[3] * src[x + +0 * stride] + \
1999 F[4] * src[x + +1 * stride] + \
2000 F[5] * src[x + +2 * stride] + \
2001 F[6] * src[x + +3 * stride] + \
2002 F[7] * src[x + +4 * stride] + 64) >> 7)
2004 static av_always_inline void do_8tap_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
2005 const uint8_t *_src, ptrdiff_t src_stride,
2006 int w,
int h, ptrdiff_t ds,
2012 dst_stride /=
sizeof(
pixel);
2013 src_stride /=
sizeof(
pixel);
2017 for (x = 0; x <
w; x++)
2029 #define filter_8tap_1d_fn(opn, opa, dir, ds) \
2030 static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2031 const uint8_t *src, ptrdiff_t src_stride, \
2032 int w, int h, const int16_t *filter) \
2034 do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
2042 #undef filter_8tap_1d_fn
2045 const uint8_t *_src, ptrdiff_t src_stride,
2046 int w,
int h,
const int16_t *filterx,
2047 const int16_t *filtery,
int avg)
2054 dst_stride /=
sizeof(
pixel);
2055 src_stride /=
sizeof(
pixel);
2056 src -= src_stride * 3;
2060 for (x = 0; x <
w; x++)
2067 tmp_ptr =
tmp + 64 * 3;
2071 for (x = 0; x <
w; x++)
2073 dst[x] = (dst[x] +
FILTER_8TAP(tmp_ptr, x, filtery, 64) + 1) >> 1;
2083 #define filter_8tap_2d_fn(opn, opa) \
2084 static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
2085 const uint8_t *src, ptrdiff_t src_stride, \
2086 int w, int h, const int16_t *filterx, \
2087 const int16_t *filtery) \
2089 do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \
2095 #undef filter_8tap_2d_fn
2097 #define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \
2098 static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2099 const uint8_t *src, ptrdiff_t src_stride, \
2100 int h, int mx, int my) \
2102 avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \
2103 ff_vp9_subpel_filters[type_idx][dir_m]); \
2106 #define filter_fn_2d(sz, type, type_idx, avg) \
2107 static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
2108 const uint8_t *src, ptrdiff_t src_stride, \
2109 int h, int mx, int my) \
2111 avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
2112 ff_vp9_subpel_filters[type_idx][mx], \
2113 ff_vp9_subpel_filters[type_idx][my]); \
2118 #define FILTER_BILIN(src, x, mxy, stride) \
2119 (src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
2121 static av_always_inline void do_bilin_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
2122 const uint8_t *_src, ptrdiff_t src_stride,
2123 int w,
int h, ptrdiff_t ds,
int mxy,
int avg)
2128 dst_stride /=
sizeof(
pixel);
2129 src_stride /=
sizeof(
pixel);
2133 for (x = 0; x <
w; x++)
2145 #define bilin_1d_fn(opn, opa, dir, ds) \
2146 static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2147 const uint8_t *src, ptrdiff_t src_stride, \
2148 int w, int h, int mxy) \
2150 do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
2161 const uint8_t *_src, ptrdiff_t src_stride,
2162 int w,
int h,
int mx,
int my,
int avg)
2169 dst_stride /=
sizeof(
pixel);
2170 src_stride /=
sizeof(
pixel);
2174 for (x = 0; x <
w; x++)
2185 for (x = 0; x <
w; x++)
2187 dst[x] = (dst[x] +
FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
2197 #define bilin_2d_fn(opn, opa) \
2198 static av_noinline void opn##_bilin_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
2199 const uint8_t *src, ptrdiff_t src_stride, \
2200 int w, int h, int mx, int my) \
2202 do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \
2210 #define bilinf_fn_1d(sz, dir, dir_m, avg) \
2211 static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2212 const uint8_t *src, ptrdiff_t src_stride, \
2213 int h, int mx, int my) \
2215 avg##_bilin_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, dir_m); \
2218 #define bilinf_fn_2d(sz, avg) \
2219 static void avg##_bilin_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
2220 const uint8_t *src, ptrdiff_t src_stride, \
2221 int h, int mx, int my) \
2223 avg##_bilin_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, mx, my); \
2228 #define bilinf_fn_1d(a, b, c, d)
2229 #define bilinf_fn_2d(a, b)
2233 #define filter_fn(sz, avg) \
2234 filter_fn_1d(sz, h, mx, regular, FILTER_8TAP_REGULAR, avg) \
2235 filter_fn_1d(sz, v, my, regular, FILTER_8TAP_REGULAR, avg) \
2236 filter_fn_2d(sz, regular, FILTER_8TAP_REGULAR, avg) \
2237 filter_fn_1d(sz, h, mx, smooth, FILTER_8TAP_SMOOTH, avg) \
2238 filter_fn_1d(sz, v, my, smooth, FILTER_8TAP_SMOOTH, avg) \
2239 filter_fn_2d(sz, smooth, FILTER_8TAP_SMOOTH, avg) \
2240 filter_fn_1d(sz, h, mx, sharp, FILTER_8TAP_SHARP, avg) \
2241 filter_fn_1d(sz, v, my, sharp, FILTER_8TAP_SHARP, avg) \
2242 filter_fn_2d(sz, sharp, FILTER_8TAP_SHARP, avg) \
2243 bilinf_fn_1d(sz, h, mx, avg) \
2244 bilinf_fn_1d(sz, v, my, avg) \
2245 bilinf_fn_2d(sz, avg)
2247 #define filter_fn_set(avg) \
2248 filter_fn(64, avg) \
2249 filter_fn(32, avg) \
2250 filter_fn(16, avg) \
2258 #undef filter_fn_set
2273 ff_vp9dsp_mc_init_10(dsp);
2276 #define init_fpel(idx1, idx2, sz, type) \
2277 dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = type##sz##_c; \
2278 dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = type##sz##_c; \
2279 dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = type##sz##_c; \
2280 dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = type##sz##_c
2282 #define init_copy_avg(idx, sz) \
2283 init_fpel(idx, 0, sz, copy); \
2284 init_fpel(idx, 1, sz, avg)
2292 #undef init_copy_avg
2297 #define init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type) \
2298 dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_c; \
2299 dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_c; \
2300 dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_c
2303 #define init_subpel1 init_subpel1_bd_aware
2305 #define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \
2306 init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type); \
2307 dsp->mc[idx1][FILTER_BILINEAR ][idx2][idxh][idxv] = type##_bilin_##sz##dir##_c
2310 #define init_subpel2(idx, idxh, idxv, dir, type) \
2311 init_subpel1(0, idx, idxh, idxv, 64, dir, type); \
2312 init_subpel1(1, idx, idxh, idxv, 32, dir, type); \
2313 init_subpel1(2, idx, idxh, idxv, 16, dir, type); \
2314 init_subpel1(3, idx, idxh, idxv, 8, dir, type); \
2315 init_subpel1(4, idx, idxh, idxv, 4, dir, type)
2317 #define init_subpel3(idx, type) \
2318 init_subpel2(idx, 1, 1, hv, type); \
2319 init_subpel2(idx, 0, 1, v, type); \
2320 init_subpel2(idx, 1, 0, h, type)
2328 #undef init_subpel1_bd_aware
2332 const uint8_t *_src, ptrdiff_t src_stride,
2333 int w,
int h,
int mx,
int my,
2334 int dx,
int dy,
int avg,
2337 int tmp_h = (((
h - 1) * dy + my) >> 4) + 8;
2342 dst_stride /=
sizeof(
pixel);
2343 src_stride /=
sizeof(
pixel);
2344 src -= src_stride * 3;
2347 int imx = mx, ioff = 0;
2349 for (x = 0; x <
w; x++) {
2360 tmp_ptr =
tmp + 64 * 3;
2365 for (x = 0; x <
w; x++)
2373 tmp_ptr += (my >> 4) * 64;
2379 #define scaled_filter_8tap_fn(opn, opa) \
2380 static av_noinline void opn##_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, \
2381 const uint8_t *src, ptrdiff_t src_stride, \
2382 int w, int h, int mx, int my, int dx, int dy, \
2383 const int16_t (*filters)[8]) \
2385 do_scaled_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
2392 #undef scaled_filter_8tap_fn
2396 #define scaled_filter_fn(sz, type, type_idx, avg) \
2397 static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2398 const uint8_t *src, ptrdiff_t src_stride, \
2399 int h, int mx, int my, int dx, int dy) \
2401 avg##_scaled_8tap_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy, \
2402 ff_vp9_subpel_filters[type_idx]); \
2407 static av_always_inline void do_scaled_bilin_c(uint8_t *_dst, ptrdiff_t dst_stride,
2408 const uint8_t *_src, ptrdiff_t src_stride,
2409 int w,
int h,
int mx,
int my,
2410 int dx,
int dy,
int avg)
2413 int tmp_h = (((
h - 1) * dy + my) >> 4) + 2;
2417 dst_stride /=
sizeof(
pixel);
2418 src_stride /=
sizeof(
pixel);
2421 int imx = mx, ioff = 0;
2423 for (x = 0; x <
w; x++) {
2438 for (x = 0; x <
w; x++)
2440 dst[x] = (dst[x] +
FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
2446 tmp_ptr += (my >> 4) * 64;
2452 #define scaled_bilin_fn(opn, opa) \
2453 static av_noinline void opn##_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, \
2454 const uint8_t *src, ptrdiff_t src_stride, \
2455 int w, int h, int mx, int my, int dx, int dy) \
2457 do_scaled_bilin_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, opa); \
2463 #undef scaled_bilin_fn
2467 #define scaled_bilinf_fn(sz, avg) \
2468 static void avg##_scaled_bilin_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2469 const uint8_t *src, ptrdiff_t src_stride, \
2470 int h, int mx, int my, int dx, int dy) \
2472 avg##_scaled_bilin_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy); \
2477 #define scaled_bilinf_fn(a, b)
2481 #define scaled_filter_fns(sz, avg) \
2482 scaled_filter_fn(sz, regular, FILTER_8TAP_REGULAR, avg) \
2483 scaled_filter_fn(sz, smooth, FILTER_8TAP_SMOOTH, avg) \
2484 scaled_filter_fn(sz, sharp, FILTER_8TAP_SHARP, avg) \
2485 scaled_bilinf_fn(sz, avg)
2487 #define scaled_filter_fn_set(avg) \
2488 scaled_filter_fns(64, avg) \
2489 scaled_filter_fns(32, avg) \
2490 scaled_filter_fns(16, avg) \
2491 scaled_filter_fns(8, avg) \
2492 scaled_filter_fns(4, avg)
2497 #undef scaled_filter_fns
2498 #undef scaled_filter_fn_set
2499 #undef scaled_filter_fn
2500 #undef scaled_bilinf_fn
2510 #define init_scaled_bd_aware(idx1, idx2, sz, type) \
2511 dsp->smc[idx1][FILTER_8TAP_SMOOTH ][idx2] = type##_scaled_smooth_##sz##_c; \
2512 dsp->smc[idx1][FILTER_8TAP_REGULAR][idx2] = type##_scaled_regular_##sz##_c; \
2513 dsp->smc[idx1][FILTER_8TAP_SHARP ][idx2] = type##_scaled_sharp_##sz##_c
2516 ff_vp9dsp_scaled_mc_init_10(dsp);
2517 #define init_scaled(a,b,c,d) init_scaled_bd_aware(a,b,c,d)
2519 #define init_scaled(idx1, idx2, sz, type) \
2520 init_scaled_bd_aware(idx1, idx2, sz, type); \
2521 dsp->smc[idx1][FILTER_BILINEAR ][idx2] = type##_scaled_bilin_##sz##_c
2524 #define init_scaled_put_avg(idx, sz) \
2525 init_scaled(idx, 0, sz, put); \
2526 init_scaled(idx, 1, sz, avg)
2534 #undef init_scaled_put_avg
2536 #undef init_scaled_bd_aware
2541 FUNC(ff_vp9dsp_intrapred_init)(dsp);
2542 vp9dsp_itxfm_init(dsp);
2543 vp9dsp_loopfilter_init(dsp);
2544 FUNC(ff_vp9dsp_mc_init)(dsp);
2545 FUNC(ff_vp9dsp_scaled_mc_init)(dsp);
static const uint8_t q1[256]
#define FILTER_8TAP(src, x, F, stride)
#define init_intra_pred(tx, sz)
static void dc_top_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
static av_always_inline void idct4_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass)
#define init_idct(tx, nm)
#define lf_16_fn(dir, stridea)
#define init_copy_avg(idx, sz)
void(* loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
#define lf_mix_fns(wd1, wd2)
static void hor_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void tm_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
#define init_itxfm(tx, sz)
#define def_diag_downleft(size)
static void memset_bpc(uint16_t *dst, int val, int len)
static void diag_downleft_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
void(* loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
static av_always_inline void loop_filter(pixel *dst, int E, int I, int H, ptrdiff_t stridea, ptrdiff_t strideb, int wd)
static void dc_left_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static double val(void *priv, double ch)
static av_always_inline void iadst8_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass)
static av_always_inline void do_bilin_2d_c(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride, int w, int h, int mx, int my, int avg)
static void idct(int16_t block[64])
static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, int w, int h)
#define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly)
static void tm_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
static int t15(InterplayACMContext *s, unsigned ind, unsigned col)
static void vert_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
static void dc_129_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_127_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
#define scaled_bilin_fn(opn, opa)
#define filters(fmt, type, inverse, clp, inverset, clip, one, clip_fn, packed)
static void dc_128_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
#define PIXEL_SPLAT_X4(x)
static void tm_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
static const uint8_t q0[256]
static void vert_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static av_always_inline void do_scaled_8tap_c(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride, int w, int h, int mx, int my, int dx, int dy, int avg, const int16_t(*filters)[8])
static void tm_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
#define FILTER_BILIN(src, x, mxy, stride)
static av_always_inline void avg_c(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride, int w, int h)
static void dc_127_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
static void hor_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void dc_127_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_left_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void dc_128_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
#define filter_8tap_1d_fn(opn, opa, dir, ds)
#define init_scaled_put_avg(idx, sz)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
#define def_hor_down(size)
static void dc_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
static int t27(InterplayACMContext *s, unsigned ind, unsigned col)
static void dc_129_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void av_always_inline idct16_1d(float *dst, const float *src, int dst_stridea, int dst_strideb, int src_stridea, int src_strideb, int add)
av_cold void FUNC() ff_vp9dsp_init(VP9DSPContext *dsp)
static av_always_inline void iadst16_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass)
static void dc_top_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
static void dc_128_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_top_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
#define i(width, name, range_min, range_max)
static av_always_inline void iadst4_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass)
#define scaled_filter_fn_set(avg)
static void dc_129_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
#define def_diag_downright(size)
static void hor_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void dc_left_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void dc_128_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
static void dc_left_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void vert_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
static void av_always_inline idct8_1d(float *dst, const float *src, int dst_stridea, int dst_strideb, int src_stridea, int src_strideb, int add)
#define bilin_1d_fn(opn, opa, dir, ds)
#define filter_8tap_2d_fn(opn, opa)
#define filter_fn_set(avg)
#define itxfm_wrap(sz, bits)
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
static void dc_top_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
#define scaled_filter_8tap_fn(opn, opa)
static void dc_127_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
#define init_subpel3(idx, type)
#define def_vert_left(size)
void(* loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
static void dc_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
#define def_vert_right(size)
static void hor_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static av_always_inline void do_8tap_2d_c(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride, int w, int h, const int16_t *filterx, const int16_t *filtery, int avg)
static void vert_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
#define bilin_2d_fn(opn, opa)
static void dc_129_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)