FFmpeg: libavcodec/h264.c Source File

00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "libavutil/imgutils.h"
00029 #include "internal.h"
00030 #include "dsputil.h"
00031 #include "avcodec.h"
00032 #include "mpegvideo.h"
00033 #include "h264.h"
00034 #include "h264data.h"
00035 #include "h264_mvpred.h"
00036 #include "golomb.h"
00037 #include "mathops.h"
00038 #include "rectangle.h"
00039 #include "thread.h"
00040 #include "vdpau_internal.h"
00041 #include "libavutil/avassert.h"
00042 
00043 #include "cabac.h"
00044 
00045 //#undef NDEBUG
00046 #include <assert.h>
00047 
00048 static const uint8_t rem6[QP_MAX_NUM+1]={
00049 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00050 };
00051 
00052 static const uint8_t div6[QP_MAX_NUM+1]={
00053 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
00054 };
00055 
00056 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
00057     PIX_FMT_DXVA2_VLD,
00058     PIX_FMT_VAAPI_VLD,
00059     PIX_FMT_YUVJ420P,
00060     PIX_FMT_NONE
00061 };
00062 
00063 void ff_h264_write_back_intra_pred_mode(H264Context *h){
00064     int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
00065 
00066     AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
00067     mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
00068     mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
00069     mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
00070 }
00071 
00075 int ff_h264_check_intra4x4_pred_mode(H264Context *h){
00076     MpegEncContext * const s = &h->s;
00077     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00078     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00079     int i;
00080 
00081     if(!(h->top_samples_available&0x8000)){
00082         for(i=0; i<4; i++){
00083             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00084             if(status<0){
00085                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00086                 return -1;
00087             } else if(status){
00088                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00089             }
00090         }
00091     }
00092 
00093     if((h->left_samples_available&0x8888)!=0x8888){
00094         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00095         for(i=0; i<4; i++){
00096             if(!(h->left_samples_available&mask[i])){
00097                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00098                 if(status<0){
00099                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00100                     return -1;
00101                 } else if(status){
00102                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00103                 }
00104             }
00105         }
00106     }
00107 
00108     return 0;
00109 } //FIXME cleanup like check_intra_pred_mode
00110 
00114 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma){
00115     MpegEncContext * const s = &h->s;
00116     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00117     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00118 
00119     if(mode > 6U) {
00120         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00121         return -1;
00122     }
00123 
00124     if(!(h->top_samples_available&0x8000)){
00125         mode= top[ mode ];
00126         if(mode<0){
00127             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00128             return -1;
00129         }
00130     }
00131 
00132     if((h->left_samples_available&0x8080) != 0x8080){
00133         mode= left[ mode ];
00134         if(is_chroma && (h->left_samples_available&0x8080)){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00135             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00136         }
00137         if(mode<0){
00138             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00139             return -1;
00140         }
00141     }
00142 
00143     return mode;
00144 }
00145 
00146 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
00147     int i, si, di;
00148     uint8_t *dst;
00149     int bufidx;
00150 
00151 //    src[0]&0x80;                //forbidden bit
00152     h->nal_ref_idc= src[0]>>5;
00153     h->nal_unit_type= src[0]&0x1F;
00154 
00155     src++; length--;
00156 
00157 #if HAVE_FAST_UNALIGNED
00158 # if HAVE_FAST_64BIT
00159 #   define RS 7
00160     for(i=0; i+1<length; i+=9){
00161         if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
00162 # else
00163 #   define RS 3
00164     for(i=0; i+1<length; i+=5){
00165         if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
00166 # endif
00167             continue;
00168         if(i>0 && !src[i]) i--;
00169         while(src[i]) i++;
00170 #else
00171 #   define RS 0
00172     for(i=0; i+1<length; i+=2){
00173         if(src[i]) continue;
00174         if(i>0 && src[i-1]==0) i--;
00175 #endif
00176         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
00177             if(src[i+2]!=3){
00178                 /* startcode, so we must be past the end */
00179                 length=i;
00180             }
00181             break;
00182         }
00183         i-= RS;
00184     }
00185 
00186     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
00187     si=h->rbsp_buffer_size[bufidx];
00188     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE+MAX_MBPAIR_SIZE);
00189     dst= h->rbsp_buffer[bufidx];
00190     if(si != h->rbsp_buffer_size[bufidx])
00191         memset(dst + length, 0, FF_INPUT_BUFFER_PADDING_SIZE+MAX_MBPAIR_SIZE);
00192 
00193     if (dst == NULL){
00194         return NULL;
00195     }
00196 
00197     if(i>=length-1){ //no escaped 0
00198         *dst_length= length;
00199         *consumed= length+1; //+1 for the header
00200         if(h->s.avctx->flags2 & CODEC_FLAG2_FAST){
00201             return src;
00202         }else{
00203             memcpy(dst, src, length);
00204             return dst;
00205         }
00206     }
00207 
00208 //printf("decoding esc\n");
00209     memcpy(dst, src, i);
00210     si=di=i;
00211     while(si+2<length){
00212         //remove escapes (very rare 1:2^22)
00213         if(src[si+2]>3){
00214             dst[di++]= src[si++];
00215             dst[di++]= src[si++];
00216         }else if(src[si]==0 && src[si+1]==0){
00217             if(src[si+2]==3){ //escape
00218                 dst[di++]= 0;
00219                 dst[di++]= 0;
00220                 si+=3;
00221                 continue;
00222             }else //next start code
00223                 goto nsc;
00224         }
00225 
00226         dst[di++]= src[si++];
00227     }
00228     while(si<length)
00229         dst[di++]= src[si++];
00230 nsc:
00231 
00232     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
00233 
00234     *dst_length= di;
00235     *consumed= si + 1;//+1 for the header
00236 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
00237     return dst;
00238 }
00239 
00244 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
00245     int v= *src;
00246     int r;
00247 
00248     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
00249 
00250     for(r=1; r<9; r++){
00251         if(v&1) return r;
00252         v>>=1;
00253     }
00254     return 0;
00255 }
00256 
00257 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
00258                                  int y_offset, int list){
00259     int raw_my= h->mv_cache[list][ scan8[n] ][1];
00260     int filter_height= (raw_my&3) ? 2 : 0;
00261     int full_my= (raw_my>>2) + y_offset;
00262     int top = full_my - filter_height, bottom = full_my + height + filter_height;
00263 
00264     return FFMAX(abs(top), bottom);
00265 }
00266 
00267 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
00268                                int y_offset, int list0, int list1, int *nrefs){
00269     MpegEncContext * const s = &h->s;
00270     int my;
00271 
00272     y_offset += 16*(s->mb_y >> MB_FIELD);
00273 
00274     if(list0){
00275         int ref_n = h->ref_cache[0][ scan8[n] ];
00276         Picture *ref= &h->ref_list[0][ref_n];
00277 
00278         // Error resilience puts the current picture in the ref list.
00279         // Don't try to wait on these as it will cause a deadlock.
00280         // Fields can wait on each other, though.
00281         if(ref->thread_opaque != s->current_picture.thread_opaque ||
00282            (ref->reference&3) != s->picture_structure) {
00283             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
00284             if (refs[0][ref_n] < 0) nrefs[0] += 1;
00285             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
00286         }
00287     }
00288 
00289     if(list1){
00290         int ref_n = h->ref_cache[1][ scan8[n] ];
00291         Picture *ref= &h->ref_list[1][ref_n];
00292 
00293         if(ref->thread_opaque != s->current_picture.thread_opaque ||
00294            (ref->reference&3) != s->picture_structure) {
00295             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
00296             if (refs[1][ref_n] < 0) nrefs[1] += 1;
00297             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
00298         }
00299     }
00300 }
00301 
00307 static void await_references(H264Context *h){
00308     MpegEncContext * const s = &h->s;
00309     const int mb_xy= h->mb_xy;
00310     const int mb_type= s->current_picture.mb_type[mb_xy];
00311     int refs[2][48];
00312     int nrefs[2] = {0};
00313     int ref, list;
00314 
00315     memset(refs, -1, sizeof(refs));
00316 
00317     if(IS_16X16(mb_type)){
00318         get_lowest_part_y(h, refs, 0, 16, 0,
00319                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00320     }else if(IS_16X8(mb_type)){
00321         get_lowest_part_y(h, refs, 0, 8, 0,
00322                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00323         get_lowest_part_y(h, refs, 8, 8, 8,
00324                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00325     }else if(IS_8X16(mb_type)){
00326         get_lowest_part_y(h, refs, 0, 16, 0,
00327                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00328         get_lowest_part_y(h, refs, 4, 16, 0,
00329                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00330     }else{
00331         int i;
00332 
00333         assert(IS_8X8(mb_type));
00334 
00335         for(i=0; i<4; i++){
00336             const int sub_mb_type= h->sub_mb_type[i];
00337             const int n= 4*i;
00338             int y_offset= (i&2)<<2;
00339 
00340             if(IS_SUB_8X8(sub_mb_type)){
00341                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00342                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00343             }else if(IS_SUB_8X4(sub_mb_type)){
00344                 get_lowest_part_y(h, refs, n  , 4, y_offset,
00345                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00346                 get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
00347                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00348             }else if(IS_SUB_4X8(sub_mb_type)){
00349                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00350                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00351                 get_lowest_part_y(h, refs, n+1, 8, y_offset,
00352                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00353             }else{
00354                 int j;
00355                 assert(IS_SUB_4X4(sub_mb_type));
00356                 for(j=0; j<4; j++){
00357                     int sub_y_offset= y_offset + 2*(j&2);
00358                     get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
00359                               IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00360                 }
00361             }
00362         }
00363     }
00364 
00365     for(list=h->list_count-1; list>=0; list--){
00366         for(ref=0; ref<48 && nrefs[list]; ref++){
00367             int row = refs[list][ref];
00368             if(row >= 0){
00369                 Picture *ref_pic = &h->ref_list[list][ref];
00370                 int ref_field = ref_pic->reference - 1;
00371                 int ref_field_picture = ref_pic->field_picture;
00372                 int pic_height = 16*s->mb_height >> ref_field_picture;
00373 
00374                 row <<= MB_MBAFF;
00375                 nrefs[list]--;
00376 
00377                 if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
00378                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
00379                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
00380                 }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
00381                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
00382                 }else if(FIELD_PICTURE){
00383                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
00384                 }else{
00385                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
00386                 }
00387             }
00388         }
00389     }
00390 }
00391 
00392 #if 0
00393 
00397 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
00398 //    const int qmul= dequant_coeff[qp][0];
00399     int i;
00400     int temp[16]; //FIXME check if this is a good idea
00401     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
00402     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
00403 
00404     for(i=0; i<4; i++){
00405         const int offset= y_offset[i];
00406         const int z0= block[offset+stride*0] + block[offset+stride*4];
00407         const int z1= block[offset+stride*0] - block[offset+stride*4];
00408         const int z2= block[offset+stride*1] - block[offset+stride*5];
00409         const int z3= block[offset+stride*1] + block[offset+stride*5];
00410 
00411         temp[4*i+0]= z0+z3;
00412         temp[4*i+1]= z1+z2;
00413         temp[4*i+2]= z1-z2;
00414         temp[4*i+3]= z0-z3;
00415     }
00416 
00417     for(i=0; i<4; i++){
00418         const int offset= x_offset[i];
00419         const int z0= temp[4*0+i] + temp[4*2+i];
00420         const int z1= temp[4*0+i] - temp[4*2+i];
00421         const int z2= temp[4*1+i] - temp[4*3+i];
00422         const int z3= temp[4*1+i] + temp[4*3+i];
00423 
00424         block[stride*0 +offset]= (z0 + z3)>>1;
00425         block[stride*2 +offset]= (z1 + z2)>>1;
00426         block[stride*8 +offset]= (z1 - z2)>>1;
00427         block[stride*10+offset]= (z0 - z3)>>1;
00428     }
00429 }
00430 #endif
00431 
00432 #undef xStride
00433 #undef stride
00434 
00435 #if 0
00436 static void chroma_dc_dct_c(DCTELEM *block){
00437     const int stride= 16*2;
00438     const int xStride= 16;
00439     int a,b,c,d,e;
00440 
00441     a= block[stride*0 + xStride*0];
00442     b= block[stride*0 + xStride*1];
00443     c= block[stride*1 + xStride*0];
00444     d= block[stride*1 + xStride*1];
00445 
00446     e= a-b;
00447     a= a+b;
00448     b= c-d;
00449     c= c+d;
00450 
00451     block[stride*0 + xStride*0]= (a+c);
00452     block[stride*0 + xStride*1]= (e+b);
00453     block[stride*1 + xStride*0]= (a-c);
00454     block[stride*1 + xStride*1]= (e-b);
00455 }
00456 #endif
00457 
00458 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
00459                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00460                            int src_x_offset, int src_y_offset,
00461                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
00462                            int pixel_shift, int chroma444){
00463     MpegEncContext * const s = &h->s;
00464     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
00465     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
00466     const int luma_xy= (mx&3) + ((my&3)<<2);
00467     int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
00468     uint8_t * src_y = pic->data[0] + offset;
00469     uint8_t * src_cb, * src_cr;
00470     int extra_width= h->emu_edge_width;
00471     int extra_height= h->emu_edge_height;
00472     int emu=0;
00473     const int full_mx= mx>>2;
00474     const int full_my= my>>2;
00475     const int pic_width  = 16*s->mb_width;
00476     const int pic_height = 16*s->mb_height >> MB_FIELD;
00477 
00478     if(mx&7) extra_width -= 3;
00479     if(my&7) extra_height -= 3;
00480 
00481     if(   full_mx < 0-extra_width
00482        || full_my < 0-extra_height
00483        || full_mx + 16/*FIXME*/ > pic_width + extra_width
00484        || full_my + 16/*FIXME*/ > pic_height + extra_height){
00485         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00486             src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00487         emu=1;
00488     }
00489 
00490     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
00491     if(!square){
00492         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
00493     }
00494 
00495     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
00496 
00497     if(chroma444){
00498         src_cb = pic->data[1] + offset;
00499         if(emu){
00500             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00501                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00502             src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00503         }
00504         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
00505         if(!square){
00506             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
00507         }
00508 
00509         src_cr = pic->data[2] + offset;
00510         if(emu){
00511             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00512                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00513             src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00514         }
00515         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
00516         if(!square){
00517             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
00518         }
00519         return;
00520     }
00521 
00522     if(MB_FIELD){
00523         // chroma offset when predicting from a field of opposite parity
00524         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
00525         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
00526     }
00527     src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
00528     src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
00529 
00530     if(emu){
00531         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
00532             src_cb= s->edge_emu_buffer;
00533     }
00534     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
00535 
00536     if(emu){
00537         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
00538             src_cr= s->edge_emu_buffer;
00539     }
00540     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
00541 }
00542 
00543 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
00544                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00545                            int x_offset, int y_offset,
00546                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00547                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00548                            int list0, int list1, int pixel_shift, int chroma444){
00549     MpegEncContext * const s = &h->s;
00550     qpel_mc_func *qpix_op=  qpix_put;
00551     h264_chroma_mc_func chroma_op= chroma_put;
00552 
00553     dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00554     if(chroma444){
00555         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00556         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00557     }else{
00558         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00559         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00560     }
00561     x_offset += 8*s->mb_x;
00562     y_offset += 8*(s->mb_y >> MB_FIELD);
00563 
00564     if(list0){
00565         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
00566         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
00567                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00568                            qpix_op, chroma_op, pixel_shift, chroma444);
00569 
00570         qpix_op=  qpix_avg;
00571         chroma_op= chroma_avg;
00572     }
00573 
00574     if(list1){
00575         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
00576         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
00577                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00578                            qpix_op, chroma_op, pixel_shift, chroma444);
00579     }
00580 }
00581 
00582 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
00583                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00584                            int x_offset, int y_offset,
00585                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00586                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
00587                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
00588                            int list0, int list1, int pixel_shift, int chroma444){
00589     MpegEncContext * const s = &h->s;
00590 
00591     dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00592     if(chroma444){
00593         chroma_weight_avg = luma_weight_avg;
00594         chroma_weight_op = luma_weight_op;
00595         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00596         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00597     }else{
00598         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00599         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00600     }
00601     x_offset += 8*s->mb_x;
00602     y_offset += 8*(s->mb_y >> MB_FIELD);
00603 
00604     if(list0 && list1){
00605         /* don't optimize for luma-only case, since B-frames usually
00606          * use implicit weights => chroma too. */
00607         uint8_t *tmp_cb = s->obmc_scratchpad;
00608         uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
00609         uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
00610         int refn0 = h->ref_cache[0][ scan8[n] ];
00611         int refn1 = h->ref_cache[1][ scan8[n] ];
00612 
00613         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
00614                     dest_y, dest_cb, dest_cr,
00615                     x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
00616         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
00617                     tmp_y, tmp_cb, tmp_cr,
00618                     x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
00619 
00620         if(h->use_weight == 2){
00621             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
00622             int weight1 = 64 - weight0;
00623             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
00624             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
00625             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
00626         }else{
00627             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
00628                             h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
00629                             h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
00630             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00631                             h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
00632                             h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
00633             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00634                             h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
00635                             h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
00636         }
00637     }else{
00638         int list = list1 ? 1 : 0;
00639         int refn = h->ref_cache[list][ scan8[n] ];
00640         Picture *ref= &h->ref_list[list][refn];
00641         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
00642                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
00643                     qpix_put, chroma_put, pixel_shift, chroma444);
00644 
00645         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
00646                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
00647         if(h->use_weight_chroma){
00648             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00649                              h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
00650             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00651                              h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
00652         }
00653     }
00654 }
00655 
00656 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
00657                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00658                            int x_offset, int y_offset,
00659                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00660                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00661                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00662                            int list0, int list1, int pixel_shift, int chroma444){
00663     if((h->use_weight==2 && list0 && list1
00664         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
00665        || h->use_weight==1)
00666         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
00667                          x_offset, y_offset, qpix_put, chroma_put,
00668                          weight_op[0], weight_op[3], weight_avg[0],
00669                          weight_avg[3], list0, list1, pixel_shift, chroma444);
00670     else
00671         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
00672                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
00673                     chroma_avg, list0, list1, pixel_shift, chroma444);
00674 }
00675 
00676 static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){
00677     /* fetch pixels for estimated mv 4 macroblocks ahead
00678      * optimized for 64byte cache lines */
00679     MpegEncContext * const s = &h->s;
00680     const int refn = h->ref_cache[list][scan8[0]];
00681     if(refn >= 0){
00682         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
00683         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
00684         uint8_t **src= h->ref_list[list][refn].data;
00685         int off= ((mx+64)<<h->pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize;
00686         s->dsp.prefetch(src[0]+off, s->linesize, 4);
00687         if(chroma444){
00688             s->dsp.prefetch(src[1]+off, s->linesize, 4);
00689             s->dsp.prefetch(src[2]+off, s->linesize, 4);
00690         }else{
00691             off= (((mx>>1)+64)<<pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
00692             s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
00693         }
00694     }
00695 }
00696 
00697 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00698                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00699                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00700                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00701                       int pixel_shift, int chroma444){
00702     MpegEncContext * const s = &h->s;
00703     const int mb_xy= h->mb_xy;
00704     const int mb_type= s->current_picture.mb_type[mb_xy];
00705 
00706     assert(IS_INTER(mb_type));
00707 
00708     if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
00709         await_references(h);
00710     prefetch_motion(h, 0, pixel_shift, chroma444);
00711 
00712     if(IS_16X16(mb_type)){
00713         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
00714                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
00715                 weight_op, weight_avg,
00716                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00717                 pixel_shift, chroma444);
00718     }else if(IS_16X8(mb_type)){
00719         mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
00720                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00721                 &weight_op[1], &weight_avg[1],
00722                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00723                 pixel_shift, chroma444);
00724         mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
00725                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00726                 &weight_op[1], &weight_avg[1],
00727                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00728                 pixel_shift, chroma444);
00729     }else if(IS_8X16(mb_type)){
00730         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
00731                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00732                 &weight_op[2], &weight_avg[2],
00733                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00734                 pixel_shift, chroma444);
00735         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
00736                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00737                 &weight_op[2], &weight_avg[2],
00738                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00739                 pixel_shift, chroma444);
00740     }else{
00741         int i;
00742 
00743         assert(IS_8X8(mb_type));
00744 
00745         for(i=0; i<4; i++){
00746             const int sub_mb_type= h->sub_mb_type[i];
00747             const int n= 4*i;
00748             int x_offset= (i&1)<<2;
00749             int y_offset= (i&2)<<1;
00750 
00751             if(IS_SUB_8X8(sub_mb_type)){
00752                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00753                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00754                     &weight_op[3], &weight_avg[3],
00755                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00756                     pixel_shift, chroma444);
00757             }else if(IS_SUB_8X4(sub_mb_type)){
00758                 mc_part(h, n  , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00759                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00760                     &weight_op[4], &weight_avg[4],
00761                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00762                     pixel_shift, chroma444);
00763                 mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
00764                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00765                     &weight_op[4], &weight_avg[4],
00766                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00767                     pixel_shift, chroma444);
00768             }else if(IS_SUB_4X8(sub_mb_type)){
00769                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00770                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00771                     &weight_op[5], &weight_avg[5],
00772                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00773                     pixel_shift, chroma444);
00774                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
00775                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00776                     &weight_op[5], &weight_avg[5],
00777                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00778                     pixel_shift, chroma444);
00779             }else{
00780                 int j;
00781                 assert(IS_SUB_4X4(sub_mb_type));
00782                 for(j=0; j<4; j++){
00783                     int sub_x_offset= x_offset + 2*(j&1);
00784                     int sub_y_offset= y_offset +   (j&2);
00785                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
00786                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00787                         &weight_op[6], &weight_avg[6],
00788                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00789                         pixel_shift, chroma444);
00790                 }
00791             }
00792         }
00793     }
00794 
00795     prefetch_motion(h, 1, pixel_shift, chroma444);
00796 }
00797 
00798 #define hl_motion_fn(sh, bits) \
00799 static av_always_inline void hl_motion_ ## bits(H264Context *h, \
00800                                        uint8_t *dest_y, \
00801                                        uint8_t *dest_cb, uint8_t *dest_cr, \
00802                                        qpel_mc_func (*qpix_put)[16], \
00803                                        h264_chroma_mc_func (*chroma_put), \
00804                                        qpel_mc_func (*qpix_avg)[16], \
00805                                        h264_chroma_mc_func (*chroma_avg), \
00806                                        h264_weight_func *weight_op, \
00807                                        h264_biweight_func *weight_avg, \
00808                                        int chroma444) \
00809 { \
00810     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
00811               qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
00812 }
00813 hl_motion_fn(0, 8);
00814 hl_motion_fn(1, 16);
00815 
00816 static void free_tables(H264Context *h, int free_rbsp){
00817     int i;
00818     H264Context *hx;
00819 
00820     av_freep(&h->intra4x4_pred_mode);
00821     av_freep(&h->chroma_pred_mode_table);
00822     av_freep(&h->cbp_table);
00823     av_freep(&h->mvd_table[0]);
00824     av_freep(&h->mvd_table[1]);
00825     av_freep(&h->direct_table);
00826     av_freep(&h->non_zero_count);
00827     av_freep(&h->slice_table_base);
00828     h->slice_table= NULL;
00829     av_freep(&h->list_counts);
00830 
00831     av_freep(&h->mb2b_xy);
00832     av_freep(&h->mb2br_xy);
00833 
00834     for(i = 0; i < MAX_THREADS; i++) {
00835         hx = h->thread_context[i];
00836         if(!hx) continue;
00837         av_freep(&hx->top_borders[1]);
00838         av_freep(&hx->top_borders[0]);
00839         av_freep(&hx->s.obmc_scratchpad);
00840         if (free_rbsp){
00841             av_freep(&hx->rbsp_buffer[1]);
00842             av_freep(&hx->rbsp_buffer[0]);
00843             hx->rbsp_buffer_size[0] = 0;
00844             hx->rbsp_buffer_size[1] = 0;
00845         }
00846         if (i) av_freep(&h->thread_context[i]);
00847     }
00848 }
00849 
00850 static void init_dequant8_coeff_table(H264Context *h){
00851     int i,j,q,x;
00852     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00853 
00854     for(i=0; i<6; i++ ){
00855         h->dequant8_coeff[i] = h->dequant8_buffer[i];
00856         for(j=0; j<i; j++){
00857             if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
00858                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
00859                 break;
00860             }
00861         }
00862         if(j<i)
00863             continue;
00864 
00865         for(q=0; q<max_qp+1; q++){
00866             int shift = div6[q];
00867             int idx = rem6[q];
00868             for(x=0; x<64; x++)
00869                 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
00870                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
00871                     h->pps.scaling_matrix8[i][x]) << shift;
00872         }
00873     }
00874 }
00875 
00876 static void init_dequant4_coeff_table(H264Context *h){
00877     int i,j,q,x;
00878     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00879     for(i=0; i<6; i++ ){
00880         h->dequant4_coeff[i] = h->dequant4_buffer[i];
00881         for(j=0; j<i; j++){
00882             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
00883                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
00884                 break;
00885             }
00886         }
00887         if(j<i)
00888             continue;
00889 
00890         for(q=0; q<max_qp+1; q++){
00891             int shift = div6[q] + 2;
00892             int idx = rem6[q];
00893             for(x=0; x<16; x++)
00894                 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
00895                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
00896                     h->pps.scaling_matrix4[i][x]) << shift;
00897         }
00898     }
00899 }
00900 
00901 static void init_dequant_tables(H264Context *h){
00902     int i,x;
00903     init_dequant4_coeff_table(h);
00904     if(h->pps.transform_8x8_mode)
00905         init_dequant8_coeff_table(h);
00906     if(h->sps.transform_bypass){
00907         for(i=0; i<6; i++)
00908             for(x=0; x<16; x++)
00909                 h->dequant4_coeff[i][0][x] = 1<<6;
00910         if(h->pps.transform_8x8_mode)
00911             for(i=0; i<6; i++)
00912                 for(x=0; x<64; x++)
00913                     h->dequant8_coeff[i][0][x] = 1<<6;
00914     }
00915 }
00916 
00917 
00918 int ff_h264_alloc_tables(H264Context *h){
00919     MpegEncContext * const s = &h->s;
00920     const int big_mb_num= s->mb_stride * (s->mb_height+1);
00921     const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
00922     int x,y;
00923 
00924     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
00925 
00926     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
00927     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
00928     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
00929 
00930     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
00931     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
00932     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
00933     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
00934     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
00935 
00936     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
00937     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
00938 
00939     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy  , big_mb_num * sizeof(uint32_t), fail);
00940     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
00941     for(y=0; y<s->mb_height; y++){
00942         for(x=0; x<s->mb_width; x++){
00943             const int mb_xy= x + y*s->mb_stride;
00944             const int b_xy = 4*x + 4*y*h->b_stride;
00945 
00946             h->mb2b_xy [mb_xy]= b_xy;
00947             h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
00948         }
00949     }
00950 
00951     s->obmc_scratchpad = NULL;
00952 
00953     if(!h->dequant4_coeff[0])
00954         init_dequant_tables(h);
00955 
00956     return 0;
00957 fail:
00958     free_tables(h, 1);
00959     return -1;
00960 }
00961 
00965 static void clone_tables(H264Context *dst, H264Context *src, int i){
00966     MpegEncContext * const s = &src->s;
00967     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
00968     dst->non_zero_count           = src->non_zero_count;
00969     dst->slice_table              = src->slice_table;
00970     dst->cbp_table                = src->cbp_table;
00971     dst->mb2b_xy                  = src->mb2b_xy;
00972     dst->mb2br_xy                 = src->mb2br_xy;
00973     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
00974     dst->mvd_table[0]             = src->mvd_table[0] + i*8*2*s->mb_stride;
00975     dst->mvd_table[1]             = src->mvd_table[1] + i*8*2*s->mb_stride;
00976     dst->direct_table             = src->direct_table;
00977     dst->list_counts              = src->list_counts;
00978 
00979     dst->s.obmc_scratchpad = NULL;
00980     ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
00981 }
00982 
00987 static int context_init(H264Context *h){
00988     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
00989     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
00990 
00991     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
00992     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
00993 
00994     return 0;
00995 fail:
00996     return -1; // free_tables will clean up for us
00997 }
00998 
00999 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
01000 
01001 static av_cold void common_init(H264Context *h){
01002     MpegEncContext * const s = &h->s;
01003 
01004     s->width = s->avctx->width;
01005     s->height = s->avctx->height;
01006     s->codec_id= s->avctx->codec->id;
01007 
01008     s->avctx->bits_per_raw_sample = 8;
01009 
01010     ff_h264dsp_init(&h->h264dsp,
01011                     s->avctx->bits_per_raw_sample);
01012     ff_h264_pred_init(&h->hpc, s->codec_id,
01013                       s->avctx->bits_per_raw_sample);
01014 
01015     h->dequant_coeff_pps= -1;
01016     s->unrestricted_mv=1;
01017     s->decode=1; //FIXME
01018 
01019     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
01020 
01021     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
01022     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
01023 }
01024 
01025 int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size)
01026 {
01027     AVCodecContext *avctx = h->s.avctx;
01028 
01029     if(!buf || size <= 0)
01030         return -1;
01031 
01032     if(buf[0] == 1){
01033         int i, cnt, nalsize;
01034         const unsigned char *p = buf;
01035 
01036         h->is_avc = 1;
01037 
01038         if(size < 7) {
01039             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
01040             return -1;
01041         }
01042         /* sps and pps in the avcC always have length coded with 2 bytes,
01043            so put a fake nal_length_size = 2 while parsing them */
01044         h->nal_length_size = 2;
01045         // Decode sps from avcC
01046         cnt = *(p+5) & 0x1f; // Number of sps
01047         p += 6;
01048         for (i = 0; i < cnt; i++) {
01049             nalsize = AV_RB16(p) + 2;
01050             if(nalsize > size - (p-buf))
01051                 return -1;
01052             if(decode_nal_units(h, p, nalsize) < 0) {
01053                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
01054                 return -1;
01055             }
01056             p += nalsize;
01057         }
01058         // Decode pps from avcC
01059         cnt = *(p++); // Number of pps
01060         for (i = 0; i < cnt; i++) {
01061             nalsize = AV_RB16(p) + 2;
01062             if(nalsize > size - (p-buf))
01063                 return -1;
01064             if (decode_nal_units(h, p, nalsize) < 0) {
01065                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
01066                 return -1;
01067             }
01068             p += nalsize;
01069         }
01070         // Now store right nal length size, that will be use to parse all other nals
01071         h->nal_length_size = (buf[4] & 0x03) + 1;
01072     } else {
01073         h->is_avc = 0;
01074         if(decode_nal_units(h, buf, size) < 0)
01075             return -1;
01076     }
01077     return 0;
01078 }
01079 
01080 av_cold int ff_h264_decode_init(AVCodecContext *avctx){
01081     H264Context *h= avctx->priv_data;
01082     MpegEncContext * const s = &h->s;
01083 
01084     MPV_decode_defaults(s);
01085 
01086     s->avctx = avctx;
01087     common_init(h);
01088 
01089     s->out_format = FMT_H264;
01090     s->workaround_bugs= avctx->workaround_bugs;
01091 
01092     // set defaults
01093 //    s->decode_mb= ff_h263_decode_mb;
01094     s->quarter_sample = 1;
01095     if(!avctx->has_b_frames)
01096     s->low_delay= 1;
01097 
01098     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
01099 
01100     ff_h264_decode_init_vlc();
01101 
01102     h->pixel_shift = 0;
01103     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
01104 
01105     h->thread_context[0] = h;
01106     h->outputed_poc = h->next_outputed_poc = INT_MIN;
01107     h->prev_poc_msb= 1<<16;
01108     h->x264_build = -1;
01109     ff_h264_reset_sei(h);
01110     if(avctx->codec_id == CODEC_ID_H264){
01111         if(avctx->ticks_per_frame == 1){
01112             s->avctx->time_base.den *=2;
01113         }
01114         avctx->ticks_per_frame = 2;
01115     }
01116 
01117     if(avctx->extradata_size > 0 && avctx->extradata &&
01118         ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size))
01119         return -1;
01120 
01121     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01122         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01123         s->low_delay = 0;
01124     }
01125 
01126     return 0;
01127 }
01128 
01129 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
01130 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
01131 {
01132     int i;
01133 
01134     for (i=0; i<count; i++){
01135         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
01136                 IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
01137                 !from[i]));
01138         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
01139     }
01140 }
01141 
01142 static void copy_parameter_set(void **to, void **from, int count, int size)
01143 {
01144     int i;
01145 
01146     for (i=0; i<count; i++){
01147         if (to[i] && !from[i]) av_freep(&to[i]);
01148         else if (from[i] && !to[i]) to[i] = av_malloc(size);
01149 
01150         if (from[i]) memcpy(to[i], from[i], size);
01151     }
01152 }
01153 
01154 static int decode_init_thread_copy(AVCodecContext *avctx){
01155     H264Context *h= avctx->priv_data;
01156 
01157     if (!avctx->is_copy) return 0;
01158     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01159     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01160 
01161     return 0;
01162 }
01163 
01164 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)
01165 static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){
01166     H264Context *h= dst->priv_data, *h1= src->priv_data;
01167     MpegEncContext * const s = &h->s, * const s1 = &h1->s;
01168     int inited = s->context_initialized, err;
01169     int i;
01170 
01171     if(dst == src || !s1->context_initialized) return 0;
01172 
01173     err = ff_mpeg_update_thread_context(dst, src);
01174     if(err) return err;
01175 
01176     //FIXME handle width/height changing
01177     if(!inited){
01178         for(i = 0; i < MAX_SPS_COUNT; i++)
01179             av_freep(h->sps_buffers + i);
01180 
01181         for(i = 0; i < MAX_PPS_COUNT; i++)
01182             av_freep(h->pps_buffers + i);
01183 
01184         memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc
01185         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01186         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01187         if (ff_h264_alloc_tables(h) < 0) {
01188             av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
01189             return AVERROR(ENOMEM);
01190         }
01191         context_init(h);
01192 
01193         for(i=0; i<2; i++){
01194             h->rbsp_buffer[i] = NULL;
01195             h->rbsp_buffer_size[i] = 0;
01196         }
01197 
01198         h->thread_context[0] = h;
01199 
01200         // frame_start may not be called for the next thread (if it's decoding a bottom field)
01201         // so this has to be allocated here
01202         h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01203 
01204         s->dsp.clear_blocks(h->mb);
01205         s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
01206     }
01207 
01208     //extradata/NAL handling
01209     h->is_avc          = h1->is_avc;
01210 
01211     //SPS/PPS
01212     copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS));
01213     h->sps             = h1->sps;
01214     copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS));
01215     h->pps             = h1->pps;
01216 
01217     //Dequantization matrices
01218     //FIXME these are big - can they be only copied when PPS changes?
01219     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
01220 
01221     for(i=0; i<6; i++)
01222         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
01223 
01224     for(i=0; i<6; i++)
01225         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
01226 
01227     h->dequant_coeff_pps = h1->dequant_coeff_pps;
01228 
01229     //POC timing
01230     copy_fields(h, h1, poc_lsb, redundant_pic_count);
01231 
01232     //reference lists
01233     copy_fields(h, h1, ref_count, list_count);
01234     copy_fields(h, h1, ref_list,  intra_gb);
01235     copy_fields(h, h1, short_ref, cabac_init_idc);
01236 
01237     copy_picture_range(h->short_ref,   h1->short_ref,   32, s, s1);
01238     copy_picture_range(h->long_ref,    h1->long_ref,    32, s, s1);
01239     copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1);
01240 
01241     h->last_slice_type = h1->last_slice_type;
01242 
01243     if(!s->current_picture_ptr) return 0;
01244 
01245     if(!s->dropable) {
01246         ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
01247         h->prev_poc_msb     = h->poc_msb;
01248         h->prev_poc_lsb     = h->poc_lsb;
01249     }
01250     h->prev_frame_num_offset= h->frame_num_offset;
01251     h->prev_frame_num       = h->frame_num;
01252     h->outputed_poc         = h->next_outputed_poc;
01253 
01254     return 0;
01255 }
01256 
01257 int ff_h264_frame_start(H264Context *h){
01258     MpegEncContext * const s = &h->s;
01259     int i;
01260     const int pixel_shift = h->pixel_shift;
01261     int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;
01262 
01263     if(MPV_frame_start(s, s->avctx) < 0)
01264         return -1;
01265     ff_er_frame_start(s);
01266     /*
01267      * MPV_frame_start uses pict_type to derive key_frame.
01268      * This is incorrect for H.264; IDR markings must be used.
01269      * Zero here; IDR markings per slice in frame or fields are ORed in later.
01270      * See decode_nal_units().
01271      */
01272     s->current_picture_ptr->key_frame= 0;
01273     s->current_picture_ptr->mmco_reset= 0;
01274 
01275     assert(s->linesize && s->uvlinesize);
01276 
01277     for(i=0; i<16; i++){
01278         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
01279         h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
01280     }
01281     for(i=0; i<16; i++){
01282         h->block_offset[16+i]=
01283         h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01284         h->block_offset[48+16+i]=
01285         h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01286     }
01287 
01288     /* can't be in alloc_tables because linesize isn't known there.
01289      * FIXME: redo bipred weight to not require extra buffer? */
01290     for(i = 0; i < thread_count; i++)
01291         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
01292             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01293 
01294     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
01295     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
01296 
01297 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
01298 
01299     // We mark the current picture as non-reference after allocating it, so
01300     // that if we break out due to an error it can be released automatically
01301     // in the next MPV_frame_start().
01302     // SVQ3 as well as most other codecs have only last/next/current and thus
01303     // get released even with set reference, besides SVQ3 and others do not
01304     // mark frames as reference later "naturally".
01305     if(s->codec_id != CODEC_ID_SVQ3)
01306         s->current_picture_ptr->reference= 0;
01307 
01308     s->current_picture_ptr->field_poc[0]=
01309     s->current_picture_ptr->field_poc[1]= INT_MAX;
01310 
01311     h->next_output_pic = NULL;
01312 
01313     assert(s->current_picture_ptr->long_ref==0);
01314 
01315     return 0;
01316 }
01317 
01326 static void decode_postinit(H264Context *h, int setup_finished){
01327     MpegEncContext * const s = &h->s;
01328     Picture *out = s->current_picture_ptr;
01329     Picture *cur = s->current_picture_ptr;
01330     int i, pics, out_of_order, out_idx;
01331 
01332     s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
01333     s->current_picture_ptr->pict_type= s->pict_type;
01334 
01335     if (h->next_output_pic) return;
01336 
01337     if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
01338         //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
01339         //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
01340         //to find this yet, so we assume the worst for now.
01341         //if (setup_finished)
01342         //    ff_thread_finish_setup(s->avctx);
01343         return;
01344     }
01345 
01346     cur->interlaced_frame = 0;
01347     cur->repeat_pict = 0;
01348 
01349     /* Signal interlacing information externally. */
01350     /* Prioritize picture timing SEI information over used decoding process if it exists. */
01351 
01352     if(h->sps.pic_struct_present_flag){
01353         switch (h->sei_pic_struct)
01354         {
01355         case SEI_PIC_STRUCT_FRAME:
01356             break;
01357         case SEI_PIC_STRUCT_TOP_FIELD:
01358         case SEI_PIC_STRUCT_BOTTOM_FIELD:
01359             cur->interlaced_frame = 1;
01360             break;
01361         case SEI_PIC_STRUCT_TOP_BOTTOM:
01362         case SEI_PIC_STRUCT_BOTTOM_TOP:
01363             if (FIELD_OR_MBAFF_PICTURE)
01364                 cur->interlaced_frame = 1;
01365             else
01366                 // try to flag soft telecine progressive
01367                 cur->interlaced_frame = h->prev_interlaced_frame;
01368             break;
01369         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
01370         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
01371             // Signal the possibility of telecined film externally (pic_struct 5,6)
01372             // From these hints, let the applications decide if they apply deinterlacing.
01373             cur->repeat_pict = 1;
01374             break;
01375         case SEI_PIC_STRUCT_FRAME_DOUBLING:
01376             // Force progressive here, as doubling interlaced frame is a bad idea.
01377             cur->repeat_pict = 2;
01378             break;
01379         case SEI_PIC_STRUCT_FRAME_TRIPLING:
01380             cur->repeat_pict = 4;
01381             break;
01382         }
01383 
01384         if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
01385             cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
01386     }else{
01387         /* Derive interlacing flag from used decoding process. */
01388         cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
01389     }
01390     h->prev_interlaced_frame = cur->interlaced_frame;
01391 
01392     if (cur->field_poc[0] != cur->field_poc[1]){
01393         /* Derive top_field_first from field pocs. */
01394         cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
01395     }else{
01396         if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
01397             /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
01398             if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
01399               || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
01400                 cur->top_field_first = 1;
01401             else
01402                 cur->top_field_first = 0;
01403         }else{
01404             /* Most likely progressive */
01405             cur->top_field_first = 0;
01406         }
01407     }
01408 
01409     //FIXME do something with unavailable reference frames
01410 
01411     /* Sort B-frames into display order */
01412 
01413     if(h->sps.bitstream_restriction_flag
01414        && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01415         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01416         s->low_delay = 0;
01417     }
01418 
01419     if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
01420        && !h->sps.bitstream_restriction_flag){
01421         s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
01422         s->low_delay= 0;
01423     }
01424 
01425     pics = 0;
01426     while(h->delayed_pic[pics]) pics++;
01427 
01428     av_assert0(pics <= MAX_DELAYED_PIC_COUNT);
01429 
01430     h->delayed_pic[pics++] = cur;
01431     if(cur->reference == 0)
01432         cur->reference = DELAYED_PIC_REF;
01433 
01434     out = h->delayed_pic[0];
01435     out_idx = 0;
01436     for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
01437         if(h->delayed_pic[i]->poc < out->poc){
01438             out = h->delayed_pic[i];
01439             out_idx = i;
01440         }
01441     if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
01442         h->next_outputed_poc= INT_MIN;
01443     out_of_order = out->poc < h->next_outputed_poc;
01444 
01445     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
01446         { }
01447     else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
01448        || (s->low_delay &&
01449         ((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2)
01450          || cur->pict_type == AV_PICTURE_TYPE_B)))
01451     {
01452         s->low_delay = 0;
01453         s->avctx->has_b_frames++;
01454     }
01455 
01456     if(out_of_order || pics > s->avctx->has_b_frames){
01457         out->reference &= ~DELAYED_PIC_REF;
01458         out->owner2 = s; // for frame threading, the owner must be the second field's thread
01459                          // or else the first thread can release the picture and reuse it unsafely
01460         for(i=out_idx; h->delayed_pic[i]; i++)
01461             h->delayed_pic[i] = h->delayed_pic[i+1];
01462     }
01463     if(!out_of_order && pics > s->avctx->has_b_frames){
01464         h->next_output_pic = out;
01465         if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
01466             h->next_outputed_poc = INT_MIN;
01467         } else
01468             h->next_outputed_poc = out->poc;
01469     }else{
01470         av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
01471     }
01472 
01473     if (setup_finished)
01474         ff_thread_finish_setup(s->avctx);
01475 }
01476 
01477 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
01478     MpegEncContext * const s = &h->s;
01479     uint8_t *top_border;
01480     int top_idx = 1;
01481     const int pixel_shift = h->pixel_shift;
01482 
01483     src_y  -=   linesize;
01484     src_cb -= uvlinesize;
01485     src_cr -= uvlinesize;
01486 
01487     if(!simple && FRAME_MBAFF){
01488         if(s->mb_y&1){
01489             if(!MB_MBAFF){
01490                 top_border = h->top_borders[0][s->mb_x];
01491                 AV_COPY128(top_border, src_y + 15*linesize);
01492                 if (pixel_shift)
01493                     AV_COPY128(top_border+16, src_y+15*linesize+16);
01494                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01495                     if(chroma444){
01496                         if (pixel_shift){
01497                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01498                             AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
01499                             AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
01500                             AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
01501                         } else {
01502                             AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
01503                             AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
01504                         }
01505                     } else {
01506                         if (pixel_shift) {
01507                             AV_COPY128(top_border+32, src_cb+7*uvlinesize);
01508                             AV_COPY128(top_border+48, src_cr+7*uvlinesize);
01509                         } else {
01510                             AV_COPY64(top_border+16, src_cb+7*uvlinesize);
01511                             AV_COPY64(top_border+24, src_cr+7*uvlinesize);
01512                         }
01513                     }
01514                 }
01515             }
01516         }else if(MB_MBAFF){
01517             top_idx = 0;
01518         }else
01519             return;
01520     }
01521 
01522     top_border = h->top_borders[top_idx][s->mb_x];
01523     // There are two lines saved, the line above the the top macroblock of a pair,
01524     // and the line above the bottom macroblock
01525     AV_COPY128(top_border, src_y + 16*linesize);
01526     if (pixel_shift)
01527         AV_COPY128(top_border+16, src_y+16*linesize+16);
01528 
01529     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01530         if(chroma444){
01531             if (pixel_shift){
01532                 AV_COPY128(top_border+32, src_cb + 16*linesize);
01533                 AV_COPY128(top_border+48, src_cb + 16*linesize+16);
01534                 AV_COPY128(top_border+64, src_cr + 16*linesize);
01535                 AV_COPY128(top_border+80, src_cr + 16*linesize+16);
01536             } else {
01537                 AV_COPY128(top_border+16, src_cb + 16*linesize);
01538                 AV_COPY128(top_border+32, src_cr + 16*linesize);
01539             }
01540         } else {
01541             if (pixel_shift) {
01542                 AV_COPY128(top_border+32, src_cb+8*uvlinesize);
01543                 AV_COPY128(top_border+48, src_cr+8*uvlinesize);
01544             } else {
01545                 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
01546                 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
01547             }
01548         }
01549     }
01550 }
01551 
01552 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
01553                                   uint8_t *src_cb, uint8_t *src_cr,
01554                                   int linesize, int uvlinesize,
01555                                   int xchg, int chroma444,
01556                                   int simple, int pixel_shift){
01557     MpegEncContext * const s = &h->s;
01558     int deblock_topleft;
01559     int deblock_top;
01560     int top_idx = 1;
01561     uint8_t *top_border_m1;
01562     uint8_t *top_border;
01563 
01564     if(!simple && FRAME_MBAFF){
01565         if(s->mb_y&1){
01566             if(!MB_MBAFF)
01567                 return;
01568         }else{
01569             top_idx = MB_MBAFF ? 0 : 1;
01570         }
01571     }
01572 
01573     if(h->deblocking_filter == 2) {
01574         deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
01575         deblock_top     = h->top_type;
01576     } else {
01577         deblock_topleft = (s->mb_x > 0);
01578         deblock_top     = (s->mb_y > !!MB_FIELD);
01579     }
01580 
01581     src_y  -=   linesize + 1 + pixel_shift;
01582     src_cb -= uvlinesize + 1 + pixel_shift;
01583     src_cr -= uvlinesize + 1 + pixel_shift;
01584 
01585     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
01586     top_border    = h->top_borders[top_idx][s->mb_x];
01587 
01588 #define XCHG(a,b,xchg)\
01589     if (pixel_shift) {\
01590         if (xchg) {\
01591             AV_SWAP64(b+0,a+0);\
01592             AV_SWAP64(b+8,a+8);\
01593         } else {\
01594             AV_COPY128(b,a); \
01595         }\
01596     } else \
01597 if (xchg) AV_SWAP64(b,a);\
01598 else      AV_COPY64(b,a);
01599 
01600     if(deblock_top){
01601         if(deblock_topleft){
01602             XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
01603         }
01604         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
01605         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
01606         if(s->mb_x+1 < s->mb_width){
01607             XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
01608         }
01609     }
01610     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01611         if(chroma444){
01612             if(deblock_topleft){
01613                 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01614                 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01615             }
01616             XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
01617             XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
01618             XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
01619             XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
01620             if(s->mb_x+1 < s->mb_width){
01621                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
01622                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
01623             }
01624         } else {
01625             if(deblock_top){
01626                 if(deblock_topleft){
01627                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01628                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01629                 }
01630                 XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
01631                 XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
01632             }
01633         }
01634     }
01635 }
01636 
01637 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
01638     if (high_bit_depth) {
01639         return AV_RN32A(((int32_t*)mb) + index);
01640     } else
01641         return AV_RN16A(mb + index);
01642 }
01643 
01644 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
01645     if (high_bit_depth) {
01646         AV_WN32A(((int32_t*)mb) + index, value);
01647     } else
01648         AV_WN16A(mb + index, value);
01649 }
01650 
01651 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01652                                                        int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01653 {
01654     MpegEncContext * const s = &h->s;
01655     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01656     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
01657     int i;
01658     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
01659     block_offset += 16*p;
01660     if(IS_INTRA4x4(mb_type)){
01661         if(simple || !s->encoding){
01662             if(IS_8x8DCT(mb_type)){
01663                 if(transform_bypass){
01664                     idct_dc_add =
01665                     idct_add    = s->dsp.add_pixels8;
01666                 }else{
01667                     idct_dc_add = h->h264dsp.h264_idct8_dc_add;
01668                     idct_add    = h->h264dsp.h264_idct8_add;
01669                 }
01670                 for(i=0; i<16; i+=4){
01671                     uint8_t * const ptr= dest_y + block_offset[i];
01672                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01673                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01674                         h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01675                     }else{
01676                         const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01677                         h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
01678                                                     (h->topright_samples_available<<i)&0x4000, linesize);
01679                         if(nnz){
01680                             if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01681                                 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01682                             else
01683                                 idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01684                         }
01685                     }
01686                 }
01687             }else{
01688                 if(transform_bypass){
01689                     idct_dc_add =
01690                     idct_add    = s->dsp.add_pixels4;
01691                 }else{
01692                     idct_dc_add = h->h264dsp.h264_idct_dc_add;
01693                     idct_add    = h->h264dsp.h264_idct_add;
01694                 }
01695                 for(i=0; i<16; i++){
01696                     uint8_t * const ptr= dest_y + block_offset[i];
01697                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01698 
01699                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01700                         h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01701                     }else{
01702                         uint8_t *topright;
01703                         int nnz, tr;
01704                         uint64_t tr_high;
01705                         if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
01706                             const int topright_avail= (h->topright_samples_available<<i)&0x8000;
01707                             assert(s->mb_y || linesize <= block_offset[i]);
01708                             if(!topright_avail){
01709                                 if (pixel_shift) {
01710                                     tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
01711                                     topright= (uint8_t*) &tr_high;
01712                                 } else {
01713                                     tr= ptr[3 - linesize]*0x01010101;
01714                                     topright= (uint8_t*) &tr;
01715                                 }
01716                             }else
01717                                 topright= ptr + (4 << pixel_shift) - linesize;
01718                         }else
01719                             topright= NULL;
01720 
01721                         h->hpc.pred4x4[ dir ](ptr, topright, linesize);
01722                         nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01723                         if(nnz){
01724                             if(is_h264){
01725                                 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01726                                     idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01727                                 else
01728                                     idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01729                             }else
01730                                 ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
01731                         }
01732                     }
01733                 }
01734             }
01735         }
01736     }else{
01737         h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
01738         if(is_h264){
01739             if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
01740                 if(!transform_bypass)
01741                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
01742                 else{
01743                     static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
01744                                                             8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
01745                     for(i = 0; i < 16; i++)
01746                         dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
01747                 }
01748             }
01749         }else
01750             ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
01751     }
01752 }
01753 
01754 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01755                                                     int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01756 {
01757     MpegEncContext * const s = &h->s;
01758     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01759     int i;
01760     block_offset += 16*p;
01761     if(!IS_INTRA4x4(mb_type)){
01762         if(is_h264){
01763             if(IS_INTRA16x16(mb_type)){
01764                 if(transform_bypass){
01765                     if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
01766                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
01767                     }else{
01768                         for(i=0; i<16; i++){
01769                             if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01770                                 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01771                         }
01772                     }
01773                 }else{
01774                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01775                 }
01776             }else if(h->cbp&15){
01777                 if(transform_bypass){
01778                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
01779                     idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
01780                     for(i=0; i<16; i+=di){
01781                         if(h->non_zero_count_cache[ scan8[i+p*16] ]){
01782                             idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01783                         }
01784                     }
01785                 }else{
01786                     if(IS_8x8DCT(mb_type)){
01787                         h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01788                     }else{
01789                         h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01790                     }
01791                 }
01792             }
01793         }else{
01794             for(i=0; i<16; i++){
01795                 if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
01796                     uint8_t * const ptr= dest_y + block_offset[i];
01797                     ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
01798                 }
01799             }
01800         }
01801     }
01802 }
01803 
01804 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
01805     MpegEncContext * const s = &h->s;
01806     const int mb_x= s->mb_x;
01807     const int mb_y= s->mb_y;
01808     const int mb_xy= h->mb_xy;
01809     const int mb_type= s->current_picture.mb_type[mb_xy];
01810     uint8_t  *dest_y, *dest_cb, *dest_cr;
01811     int linesize, uvlinesize /*dct_offset*/;
01812     int i, j;
01813     int *block_offset = &h->block_offset[0];
01814     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
01815     /* is_h264 should always be true if SVQ3 is disabled. */
01816     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
01817     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01818 
01819     dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
01820     dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
01821     dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
01822 
01823     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
01824     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
01825 
01826     h->list_counts[mb_xy]= h->list_count;
01827 
01828     if (!simple && MB_FIELD) {
01829         linesize   = h->mb_linesize   = s->linesize * 2;
01830         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
01831         block_offset = &h->block_offset[48];
01832         if(mb_y&1){ //FIXME move out of this function?
01833             dest_y -= s->linesize*15;
01834             dest_cb-= s->uvlinesize*7;
01835             dest_cr-= s->uvlinesize*7;
01836         }
01837         if(FRAME_MBAFF) {
01838             int list;
01839             for(list=0; list<h->list_count; list++){
01840                 if(!USES_LIST(mb_type, list))
01841                     continue;
01842                 if(IS_16X16(mb_type)){
01843                     int8_t *ref = &h->ref_cache[list][scan8[0]];
01844                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
01845                 }else{
01846                     for(i=0; i<16; i+=4){
01847                         int ref = h->ref_cache[list][scan8[i]];
01848                         if(ref >= 0)
01849                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
01850                     }
01851                 }
01852             }
01853         }
01854     } else {
01855         linesize   = h->mb_linesize   = s->linesize;
01856         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
01857 //        dct_offset = s->linesize * 16;
01858     }
01859 
01860     if (!simple && IS_INTRA_PCM(mb_type)) {
01861         if (pixel_shift) {
01862             const int bit_depth = h->sps.bit_depth_luma;
01863             int j;
01864             GetBitContext gb;
01865             init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
01866 
01867             for (i = 0; i < 16; i++) {
01868                 uint16_t *tmp_y  = (uint16_t*)(dest_y  + i*linesize);
01869                 for (j = 0; j < 16; j++)
01870                     tmp_y[j] = get_bits(&gb, bit_depth);
01871             }
01872             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01873                 if (!h->sps.chroma_format_idc) {
01874                     for (i = 0; i < 8; i++) {
01875                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01876                         for (j = 0; j < 8; j++) {
01877                             tmp_cb[j] = 1 << (bit_depth - 1);
01878                         }
01879                     }
01880                     for (i = 0; i < 8; i++) {
01881                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01882                         for (j = 0; j < 8; j++) {
01883                             tmp_cr[j] = 1 << (bit_depth - 1);
01884                         }
01885                     }
01886                 } else {
01887                     for (i = 0; i < 8; i++) {
01888                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01889                         for (j = 0; j < 8; j++)
01890                             tmp_cb[j] = get_bits(&gb, bit_depth);
01891                     }
01892                     for (i = 0; i < 8; i++) {
01893                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01894                         for (j = 0; j < 8; j++)
01895                             tmp_cr[j] = get_bits(&gb, bit_depth);
01896                     }
01897                 }
01898             }
01899         } else {
01900             for (i=0; i<16; i++) {
01901                 memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
01902             }
01903             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01904                 if (!h->sps.chroma_format_idc) {
01905                     for (i = 0; i < 8; i++) {
01906                         memset(dest_cb + i*uvlinesize, 128, 8);
01907                         memset(dest_cr + i*uvlinesize, 128, 8);
01908                     }
01909                 } else {
01910                     for (i = 0; i < 8; i++) {
01911                         memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4,  8);
01912                         memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4,  8);
01913                     }
01914                 }
01915             }
01916         }
01917     } else {
01918         if(IS_INTRA(mb_type)){
01919             if(h->deblocking_filter)
01920                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
01921 
01922             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01923                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
01924                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
01925             }
01926 
01927             hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
01928 
01929             if(h->deblocking_filter)
01930                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
01931         }else if(is_h264){
01932             if (pixel_shift) {
01933                 hl_motion_16(h, dest_y, dest_cb, dest_cr,
01934                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
01935                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
01936                              h->h264dsp.weight_h264_pixels_tab,
01937                              h->h264dsp.biweight_h264_pixels_tab, 0);
01938             } else
01939                 hl_motion_8(h, dest_y, dest_cb, dest_cr,
01940                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
01941                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
01942                             h->h264dsp.weight_h264_pixels_tab,
01943                             h->h264dsp.biweight_h264_pixels_tab, 0);
01944         }
01945 
01946         hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
01947 
01948         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
01949             uint8_t *dest[2] = {dest_cb, dest_cr};
01950             if(transform_bypass){
01951                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
01952                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
01953                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
01954                 }else{
01955                     idct_add = s->dsp.add_pixels4;
01956                     for(j=1; j<3; j++){
01957                         for(i=j*16; i<j*16+4; i++){
01958                             if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
01959                                 idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
01960                         }
01961                     }
01962                 }
01963             }else{
01964                 if(is_h264){
01965                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
01966                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
01967                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
01968                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
01969                     h->h264dsp.h264_idct_add8(dest, block_offset,
01970                                               h->mb, uvlinesize,
01971                                               h->non_zero_count_cache);
01972                 }
01973 #if CONFIG_SVQ3_DECODER
01974                 else{
01975                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
01976                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
01977                     for(j=1; j<3; j++){
01978                         for(i=j*16; i<j*16+4; i++){
01979                             if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
01980                                 uint8_t * const ptr= dest[j-1] + block_offset[i];
01981                                 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
01982                             }
01983                         }
01984                     }
01985                 }
01986 #endif
01987             }
01988         }
01989     }
01990     if(h->cbp || IS_INTRA(mb_type))
01991     {
01992         s->dsp.clear_blocks(h->mb);
01993         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
01994     }
01995 }
01996 
01997 static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
01998     MpegEncContext * const s = &h->s;
01999     const int mb_x= s->mb_x;
02000     const int mb_y= s->mb_y;
02001     const int mb_xy= h->mb_xy;
02002     const int mb_type= s->current_picture.mb_type[mb_xy];
02003     uint8_t  *dest[3];
02004     int linesize;
02005     int i, j, p;
02006     int *block_offset = &h->block_offset[0];
02007     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
02008     const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
02009 
02010     for (p = 0; p < plane_count; p++)
02011     {
02012         dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
02013         s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
02014     }
02015 
02016     h->list_counts[mb_xy]= h->list_count;
02017 
02018     if (!simple && MB_FIELD) {
02019         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
02020         block_offset = &h->block_offset[48];
02021         if(mb_y&1) //FIXME move out of this function?
02022             for (p = 0; p < 3; p++)
02023                 dest[p] -= s->linesize*15;
02024         if(FRAME_MBAFF) {
02025             int list;
02026             for(list=0; list<h->list_count; list++){
02027                 if(!USES_LIST(mb_type, list))
02028                     continue;
02029                 if(IS_16X16(mb_type)){
02030                     int8_t *ref = &h->ref_cache[list][scan8[0]];
02031                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
02032                 }else{
02033                     for(i=0; i<16; i+=4){
02034                         int ref = h->ref_cache[list][scan8[i]];
02035                         if(ref >= 0)
02036                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
02037                     }
02038                 }
02039             }
02040         }
02041     } else {
02042         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
02043     }
02044 
02045     if (!simple && IS_INTRA_PCM(mb_type)) {
02046         if (pixel_shift) {
02047             const int bit_depth = h->sps.bit_depth_luma;
02048             GetBitContext gb;
02049             init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
02050 
02051             for (p = 0; p < plane_count; p++) {
02052                 for (i = 0; i < 16; i++) {
02053                     uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
02054                     for (j = 0; j < 16; j++)
02055                         tmp[j] = get_bits(&gb, bit_depth);
02056                 }
02057             }
02058         } else {
02059             for (p = 0; p < plane_count; p++) {
02060                 for (i = 0; i < 16; i++) {
02061                     memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
02062                 }
02063             }
02064         }
02065     } else {
02066         if(IS_INTRA(mb_type)){
02067             if(h->deblocking_filter)
02068                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
02069 
02070             for (p = 0; p < plane_count; p++)
02071                 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02072 
02073             if(h->deblocking_filter)
02074                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
02075         }else{
02076             if (pixel_shift) {
02077                 hl_motion_16(h, dest[0], dest[1], dest[2],
02078                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02079                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02080                              h->h264dsp.weight_h264_pixels_tab,
02081                              h->h264dsp.biweight_h264_pixels_tab, 1);
02082             } else
02083                 hl_motion_8(h, dest[0], dest[1], dest[2],
02084                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02085                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02086                             h->h264dsp.weight_h264_pixels_tab,
02087                             h->h264dsp.biweight_h264_pixels_tab, 1);
02088         }
02089 
02090         for (p = 0; p < plane_count; p++)
02091             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02092     }
02093     if(h->cbp || IS_INTRA(mb_type))
02094     {
02095         s->dsp.clear_blocks(h->mb);
02096         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02097     }
02098 }
02099 
02103 #define hl_decode_mb_simple(sh, bits) \
02104 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
02105     hl_decode_mb_internal(h, 1, sh); \
02106 }
02107 hl_decode_mb_simple(0, 8);
02108 hl_decode_mb_simple(1, 16);
02109 
02113 static void av_noinline hl_decode_mb_complex(H264Context *h){
02114     hl_decode_mb_internal(h, 0, h->pixel_shift);
02115 }
02116 
02117 static void av_noinline hl_decode_mb_444_complex(H264Context *h){
02118     hl_decode_mb_444_internal(h, 0, h->pixel_shift);
02119 }
02120 
02121 static void av_noinline hl_decode_mb_444_simple(H264Context *h){
02122     hl_decode_mb_444_internal(h, 1, 0);
02123 }
02124 
02125 void ff_h264_hl_decode_mb(H264Context *h){
02126     MpegEncContext * const s = &h->s;
02127     const int mb_xy= h->mb_xy;
02128     const int mb_type= s->current_picture.mb_type[mb_xy];
02129     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
02130 
02131     if (CHROMA444) {
02132         if(is_complex || h->pixel_shift)
02133             hl_decode_mb_444_complex(h);
02134         else
02135             hl_decode_mb_444_simple(h);
02136     } else if (is_complex) {
02137         hl_decode_mb_complex(h);
02138     } else if (h->pixel_shift) {
02139         hl_decode_mb_simple_16(h);
02140     } else
02141         hl_decode_mb_simple_8(h);
02142 }
02143 
02144 static int pred_weight_table(H264Context *h){
02145     MpegEncContext * const s = &h->s;
02146     int list, i;
02147     int luma_def, chroma_def;
02148 
02149     h->use_weight= 0;
02150     h->use_weight_chroma= 0;
02151     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
02152     if(h->sps.chroma_format_idc)
02153         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
02154     luma_def = 1<<h->luma_log2_weight_denom;
02155     chroma_def = 1<<h->chroma_log2_weight_denom;
02156 
02157     for(list=0; list<2; list++){
02158         h->luma_weight_flag[list]   = 0;
02159         h->chroma_weight_flag[list] = 0;
02160         for(i=0; i<h->ref_count[list]; i++){
02161             int luma_weight_flag, chroma_weight_flag;
02162 
02163             luma_weight_flag= get_bits1(&s->gb);
02164             if(luma_weight_flag){
02165                 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
02166                 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
02167                 if(   h->luma_weight[i][list][0] != luma_def
02168                    || h->luma_weight[i][list][1] != 0) {
02169                     h->use_weight= 1;
02170                     h->luma_weight_flag[list]= 1;
02171                 }
02172             }else{
02173                 h->luma_weight[i][list][0]= luma_def;
02174                 h->luma_weight[i][list][1]= 0;
02175             }
02176 
02177             if(h->sps.chroma_format_idc){
02178                 chroma_weight_flag= get_bits1(&s->gb);
02179                 if(chroma_weight_flag){
02180                     int j;
02181                     for(j=0; j<2; j++){
02182                         h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
02183                         h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
02184                         if(   h->chroma_weight[i][list][j][0] != chroma_def
02185                            || h->chroma_weight[i][list][j][1] != 0) {
02186                             h->use_weight_chroma= 1;
02187                             h->chroma_weight_flag[list]= 1;
02188                         }
02189                     }
02190                 }else{
02191                     int j;
02192                     for(j=0; j<2; j++){
02193                         h->chroma_weight[i][list][j][0]= chroma_def;
02194                         h->chroma_weight[i][list][j][1]= 0;
02195                     }
02196                 }
02197             }
02198         }
02199         if(h->slice_type_nos != AV_PICTURE_TYPE_B) break;
02200     }
02201     h->use_weight= h->use_weight || h->use_weight_chroma;
02202     return 0;
02203 }
02204 
02210 static void implicit_weight_table(H264Context *h, int field){
02211     MpegEncContext * const s = &h->s;
02212     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
02213 
02214     for (i = 0; i < 2; i++) {
02215         h->luma_weight_flag[i]   = 0;
02216         h->chroma_weight_flag[i] = 0;
02217     }
02218 
02219     if(field < 0){
02220         if (s->picture_structure == PICT_FRAME) {
02221             cur_poc = s->current_picture_ptr->poc;
02222         } else {
02223             cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1];
02224         }
02225     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
02226        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
02227         h->use_weight= 0;
02228         h->use_weight_chroma= 0;
02229         return;
02230     }
02231         ref_start= 0;
02232         ref_count0= h->ref_count[0];
02233         ref_count1= h->ref_count[1];
02234     }else{
02235         cur_poc = s->current_picture_ptr->field_poc[field];
02236         ref_start= 16;
02237         ref_count0= 16+2*h->ref_count[0];
02238         ref_count1= 16+2*h->ref_count[1];
02239     }
02240 
02241     h->use_weight= 2;
02242     h->use_weight_chroma= 2;
02243     h->luma_log2_weight_denom= 5;
02244     h->chroma_log2_weight_denom= 5;
02245 
02246     for(ref0=ref_start; ref0 < ref_count0; ref0++){
02247         int poc0 = h->ref_list[0][ref0].poc;
02248         for(ref1=ref_start; ref1 < ref_count1; ref1++){
02249             int w = 32;
02250             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
02251                 int poc1 = h->ref_list[1][ref1].poc;
02252                 int td = av_clip(poc1 - poc0, -128, 127);
02253                 if(td){
02254                     int tb = av_clip(cur_poc - poc0, -128, 127);
02255                     int tx = (16384 + (FFABS(td) >> 1)) / td;
02256                     int dist_scale_factor = (tb*tx + 32) >> 8;
02257                     if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
02258                         w = 64 - dist_scale_factor;
02259                 }
02260             }
02261             if(field<0){
02262                 h->implicit_weight[ref0][ref1][0]=
02263                 h->implicit_weight[ref0][ref1][1]= w;
02264             }else{
02265                 h->implicit_weight[ref0][ref1][field]=w;
02266             }
02267         }
02268     }
02269 }
02270 
02274 static void idr(H264Context *h){
02275     ff_h264_remove_all_refs(h);
02276     h->prev_frame_num= 0;
02277     h->prev_frame_num_offset= 0;
02278     h->prev_poc_msb=
02279     h->prev_poc_lsb= 0;
02280 }
02281 
02282 /* forget old pics after a seek */
02283 static void flush_dpb(AVCodecContext *avctx){
02284     H264Context *h= avctx->priv_data;
02285     int i;
02286     for(i=0; i<=MAX_DELAYED_PIC_COUNT; i++) {
02287         if(h->delayed_pic[i])
02288             h->delayed_pic[i]->reference= 0;
02289         h->delayed_pic[i]= NULL;
02290     }
02291     h->outputed_poc=h->next_outputed_poc= INT_MIN;
02292     h->prev_interlaced_frame = 1;
02293     idr(h);
02294     if(h->s.current_picture_ptr)
02295         h->s.current_picture_ptr->reference= 0;
02296     h->s.first_field= 0;
02297     ff_h264_reset_sei(h);
02298     ff_mpeg_flush(avctx);
02299 }
02300 
02301 static int init_poc(H264Context *h){
02302     MpegEncContext * const s = &h->s;
02303     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
02304     int field_poc[2];
02305     Picture *cur = s->current_picture_ptr;
02306 
02307     h->frame_num_offset= h->prev_frame_num_offset;
02308     if(h->frame_num < h->prev_frame_num)
02309         h->frame_num_offset += max_frame_num;
02310 
02311     if(h->sps.poc_type==0){
02312         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
02313 
02314         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
02315             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
02316         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
02317             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
02318         else
02319             h->poc_msb = h->prev_poc_msb;
02320 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
02321         field_poc[0] =
02322         field_poc[1] = h->poc_msb + h->poc_lsb;
02323         if(s->picture_structure == PICT_FRAME)
02324             field_poc[1] += h->delta_poc_bottom;
02325     }else if(h->sps.poc_type==1){
02326         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
02327         int i;
02328 
02329         if(h->sps.poc_cycle_length != 0)
02330             abs_frame_num = h->frame_num_offset + h->frame_num;
02331         else
02332             abs_frame_num = 0;
02333 
02334         if(h->nal_ref_idc==0 && abs_frame_num > 0)
02335             abs_frame_num--;
02336 
02337         expected_delta_per_poc_cycle = 0;
02338         for(i=0; i < h->sps.poc_cycle_length; i++)
02339             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
02340 
02341         if(abs_frame_num > 0){
02342             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
02343             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
02344 
02345             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
02346             for(i = 0; i <= frame_num_in_poc_cycle; i++)
02347                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
02348         } else
02349             expectedpoc = 0;
02350 
02351         if(h->nal_ref_idc == 0)
02352             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
02353 
02354         field_poc[0] = expectedpoc + h->delta_poc[0];
02355         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
02356 
02357         if(s->picture_structure == PICT_FRAME)
02358             field_poc[1] += h->delta_poc[1];
02359     }else{
02360         int poc= 2*(h->frame_num_offset + h->frame_num);
02361 
02362         if(!h->nal_ref_idc)
02363             poc--;
02364 
02365         field_poc[0]= poc;
02366         field_poc[1]= poc;
02367     }
02368 
02369     if(s->picture_structure != PICT_BOTTOM_FIELD)
02370         s->current_picture_ptr->field_poc[0]= field_poc[0];
02371     if(s->picture_structure != PICT_TOP_FIELD)
02372         s->current_picture_ptr->field_poc[1]= field_poc[1];
02373     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
02374 
02375     return 0;
02376 }
02377 
02378 
02382 static void init_scan_tables(H264Context *h){
02383     int i;
02384     for(i=0; i<16; i++){
02385 #define T(x) (x>>2) | ((x<<2) & 0xF)
02386         h->zigzag_scan[i] = T(zigzag_scan[i]);
02387         h-> field_scan[i] = T( field_scan[i]);
02388 #undef T
02389     }
02390     for(i=0; i<64; i++){
02391 #define T(x) (x>>3) | ((x&7)<<3)
02392         h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
02393         h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
02394         h->field_scan8x8[i]        = T(field_scan8x8[i]);
02395         h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
02396 #undef T
02397     }
02398     if(h->sps.transform_bypass){ //FIXME same ugly
02399         h->zigzag_scan_q0          = zigzag_scan;
02400         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
02401         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
02402         h->field_scan_q0           = field_scan;
02403         h->field_scan8x8_q0        = field_scan8x8;
02404         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
02405     }else{
02406         h->zigzag_scan_q0          = h->zigzag_scan;
02407         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
02408         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
02409         h->field_scan_q0           = h->field_scan;
02410         h->field_scan8x8_q0        = h->field_scan8x8;
02411         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
02412     }
02413 }
02414 
02415 static void field_end(H264Context *h, int in_setup){
02416     MpegEncContext * const s = &h->s;
02417     AVCodecContext * const avctx= s->avctx;
02418     s->mb_y= 0;
02419 
02420     if (!in_setup && !s->dropable)
02421         ff_thread_report_progress((AVFrame*)s->current_picture_ptr, (16*s->mb_height >> FIELD_PICTURE) - 1,
02422                                  s->picture_structure==PICT_BOTTOM_FIELD);
02423 
02424     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02425         ff_vdpau_h264_set_reference_frames(s);
02426 
02427     if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){
02428         if(!s->dropable) {
02429             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02430             h->prev_poc_msb= h->poc_msb;
02431             h->prev_poc_lsb= h->poc_lsb;
02432         }
02433         h->prev_frame_num_offset= h->frame_num_offset;
02434         h->prev_frame_num= h->frame_num;
02435         h->outputed_poc = h->next_outputed_poc;
02436     }
02437 
02438     if (avctx->hwaccel) {
02439         if (avctx->hwaccel->end_frame(avctx) < 0)
02440             av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
02441     }
02442 
02443     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02444         ff_vdpau_h264_picture_complete(s);
02445 
02446     /*
02447      * FIXME: Error handling code does not seem to support interlaced
02448      * when slices span multiple rows
02449      * The ff_er_add_slice calls don't work right for bottom
02450      * fields; they cause massive erroneous error concealing
02451      * Error marking covers both fields (top and bottom).
02452      * This causes a mismatched s->error_count
02453      * and a bad error table. Further, the error count goes to
02454      * INT_MAX when called for bottom field, because mb_y is
02455      * past end by one (callers fault) and resync_mb_y != 0
02456      * causes problems for the first MB line, too.
02457      */
02458     if (!FIELD_PICTURE)
02459         ff_er_frame_end(s);
02460 
02461     MPV_frame_end(s);
02462 
02463     h->current_slice=0;
02464 }
02465 
02469 static void clone_slice(H264Context *dst, H264Context *src)
02470 {
02471     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
02472     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
02473     dst->s.current_picture      = src->s.current_picture;
02474     dst->s.linesize             = src->s.linesize;
02475     dst->s.uvlinesize           = src->s.uvlinesize;
02476     dst->s.first_field          = src->s.first_field;
02477 
02478     dst->prev_poc_msb           = src->prev_poc_msb;
02479     dst->prev_poc_lsb           = src->prev_poc_lsb;
02480     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
02481     dst->prev_frame_num         = src->prev_frame_num;
02482     dst->short_ref_count        = src->short_ref_count;
02483 
02484     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
02485     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
02486     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
02487     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
02488 
02489     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
02490     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
02491 }
02492 
02500 int ff_h264_get_profile(SPS *sps)
02501 {
02502     int profile = sps->profile_idc;
02503 
02504     switch(sps->profile_idc) {
02505     case FF_PROFILE_H264_BASELINE:
02506         // constraint_set1_flag set to 1
02507         profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
02508         break;
02509     case FF_PROFILE_H264_HIGH_10:
02510     case FF_PROFILE_H264_HIGH_422:
02511     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
02512         // constraint_set3_flag set to 1
02513         profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
02514         break;
02515     }
02516 
02517     return profile;
02518 }
02519 
02529 static int decode_slice_header(H264Context *h, H264Context *h0){
02530     MpegEncContext * const s = &h->s;
02531     MpegEncContext * const s0 = &h0->s;
02532     unsigned int first_mb_in_slice;
02533     unsigned int pps_id;
02534     int num_ref_idx_active_override_flag;
02535     unsigned int slice_type, tmp, i, j;
02536     int default_ref_list_done = 0;
02537     int last_pic_structure;
02538 
02539     s->dropable= h->nal_ref_idc == 0;
02540 
02541     /* FIXME: 2tap qpel isn't implemented for high bit depth. */
02542     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){
02543         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
02544         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
02545     }else{
02546         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
02547         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
02548     }
02549 
02550     first_mb_in_slice= get_ue_golomb(&s->gb);
02551 
02552     if(first_mb_in_slice == 0){ //FIXME better field boundary detection
02553         if(h0->current_slice && FIELD_PICTURE){
02554             field_end(h, 1);
02555         }
02556 
02557         h0->current_slice = 0;
02558         if (!s0->first_field)
02559             s->current_picture_ptr= NULL;
02560     }
02561 
02562     slice_type= get_ue_golomb_31(&s->gb);
02563     if(slice_type > 9){
02564         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
02565         return -1;
02566     }
02567     if(slice_type > 4){
02568         slice_type -= 5;
02569         h->slice_type_fixed=1;
02570     }else
02571         h->slice_type_fixed=0;
02572 
02573     slice_type= golomb_to_pict_type[ slice_type ];
02574     if (slice_type == AV_PICTURE_TYPE_I
02575         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
02576         default_ref_list_done = 1;
02577     }
02578     h->slice_type= slice_type;
02579     h->slice_type_nos= slice_type & 3;
02580 
02581     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
02582 
02583     pps_id= get_ue_golomb(&s->gb);
02584     if(pps_id>=MAX_PPS_COUNT){
02585         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
02586         return -1;
02587     }
02588     if(!h0->pps_buffers[pps_id]) {
02589         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
02590         return -1;
02591     }
02592     h->pps= *h0->pps_buffers[pps_id];
02593 
02594     if(!h0->sps_buffers[h->pps.sps_id]) {
02595         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
02596         return -1;
02597     }
02598     h->sps = *h0->sps_buffers[h->pps.sps_id];
02599 
02600     s->avctx->profile = ff_h264_get_profile(&h->sps);
02601     s->avctx->level   = h->sps.level_idc;
02602     s->avctx->refs    = h->sps.ref_frame_count;
02603 
02604     if(h == h0 && h->dequant_coeff_pps != pps_id){
02605         h->dequant_coeff_pps = pps_id;
02606         init_dequant_tables(h);
02607     }
02608 
02609     s->mb_width= h->sps.mb_width;
02610     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
02611 
02612     h->b_stride=  s->mb_width*4;
02613 
02614     s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
02615     if(h->sps.frame_mbs_only_flag)
02616         s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
02617     else
02618         s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
02619 
02620     if (s->context_initialized
02621         && (   s->width != s->avctx->width || s->height != s->avctx->height
02622             || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
02623         if(h != h0 || (HAVE_THREADS && h->s.avctx->active_thread_type & FF_THREAD_FRAME)) {
02624             av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0);
02625             return AVERROR_PATCHWELCOME;   // width / height changed during parallelized decoding
02626         }
02627         free_tables(h, 0);
02628         flush_dpb(s->avctx);
02629         MPV_common_end(s);
02630         h->list_count = 0;
02631     }
02632     if (!s->context_initialized) {
02633         if (h != h0) {
02634             av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n");
02635             return -1;
02636         }
02637 
02638         avcodec_set_dimensions(s->avctx, s->width, s->height);
02639         s->avctx->sample_aspect_ratio= h->sps.sar;
02640         av_assert0(s->avctx->sample_aspect_ratio.den);
02641 
02642         h->s.avctx->coded_width = 16*s->mb_width;
02643         h->s.avctx->coded_height = 16*s->mb_height;
02644 
02645         if(h->sps.video_signal_type_present_flag){
02646             s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
02647             if(h->sps.colour_description_present_flag){
02648                 s->avctx->color_primaries = h->sps.color_primaries;
02649                 s->avctx->color_trc       = h->sps.color_trc;
02650                 s->avctx->colorspace      = h->sps.colorspace;
02651             }
02652         }
02653 
02654         if(h->sps.timing_info_present_flag){
02655             int64_t den= h->sps.time_scale;
02656             if(h->x264_build < 44U)
02657                 den *= 2;
02658             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
02659                       h->sps.num_units_in_tick, den, 1<<30);
02660         }
02661 
02662         switch (h->sps.bit_depth_luma) {
02663             case 9 :
02664                 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9;
02665                 break;
02666             case 10 :
02667                 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10;
02668                 break;
02669             default:
02670                 if (CHROMA444){
02671                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
02672                 }else{
02673                     s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
02674                                                              s->avctx->codec->pix_fmts ?
02675                                                              s->avctx->codec->pix_fmts :
02676                                                              s->avctx->color_range == AVCOL_RANGE_JPEG ?
02677                                                              hwaccel_pixfmt_list_h264_jpeg_420 :
02678                                                              ff_hwaccel_pixfmt_list_420);
02679                 }
02680         }
02681 
02682         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
02683 
02684         if (MPV_common_init(s) < 0) {
02685             av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n");
02686             return -1;
02687         }
02688         s->first_field = 0;
02689         h->prev_interlaced_frame = 1;
02690 
02691         init_scan_tables(h);
02692         if (ff_h264_alloc_tables(h) < 0) {
02693             av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n");
02694             return AVERROR(ENOMEM);
02695         }
02696 
02697         if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {
02698             if (context_init(h) < 0) {
02699                 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02700                 return -1;
02701             }
02702         } else {
02703             for(i = 1; i < s->avctx->thread_count; i++) {
02704                 H264Context *c;
02705                 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
02706                 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
02707                 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
02708                 c->h264dsp = h->h264dsp;
02709                 c->sps = h->sps;
02710                 c->pps = h->pps;
02711                 c->pixel_shift = h->pixel_shift;
02712                 init_scan_tables(c);
02713                 clone_tables(c, h, i);
02714             }
02715 
02716             for(i = 0; i < s->avctx->thread_count; i++)
02717                 if (context_init(h->thread_context[i]) < 0) {
02718                     av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02719                     return -1;
02720                 }
02721         }
02722     }
02723 
02724     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
02725 
02726     h->mb_mbaff = 0;
02727     h->mb_aff_frame = 0;
02728     last_pic_structure = s0->picture_structure;
02729     if(h->sps.frame_mbs_only_flag){
02730         s->picture_structure= PICT_FRAME;
02731     }else{
02732         if(get_bits1(&s->gb)) { //field_pic_flag
02733             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
02734         } else {
02735             s->picture_structure= PICT_FRAME;
02736             h->mb_aff_frame = h->sps.mb_aff;
02737         }
02738     }
02739     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
02740 
02741     if(h0->current_slice == 0){
02742         // Shorten frame num gaps so we don't have to allocate reference frames just to throw them away
02743         if(h->frame_num != h->prev_frame_num) {
02744             int unwrap_prev_frame_num = h->prev_frame_num, max_frame_num = 1<<h->sps.log2_max_frame_num;
02745 
02746             if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num;
02747 
02748             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
02749                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
02750                 if (unwrap_prev_frame_num < 0)
02751                     unwrap_prev_frame_num += max_frame_num;
02752 
02753                 h->prev_frame_num = unwrap_prev_frame_num;
02754             }
02755         }
02756 
02757         while(h->frame_num !=  h->prev_frame_num &&
02758               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
02759             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
02760             av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
02761             if (ff_h264_frame_start(h) < 0)
02762                 return -1;
02763             h->prev_frame_num++;
02764             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
02765             s->current_picture_ptr->frame_num= h->prev_frame_num;
02766             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0);
02767             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
02768             ff_generate_sliding_window_mmcos(h);
02769             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02770             /* Error concealment: if a ref is missing, copy the previous ref in its place.
02771              * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
02772              * about there being no actual duplicates.
02773              * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
02774              * concealing a lost frame, this probably isn't noticable by comparison, but it should
02775              * be fixed. */
02776             if (h->short_ref_count) {
02777                 if (prev) {
02778                     av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize,
02779                                   (const uint8_t**)prev->data, prev->linesize,
02780                                   s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
02781                     h->short_ref[0]->poc = prev->poc+2;
02782                 }
02783                 h->short_ref[0]->frame_num = h->prev_frame_num;
02784             }
02785         }
02786 
02787         /* See if we have a decoded first field looking for a pair... */
02788         if (s0->first_field) {
02789             assert(s0->current_picture_ptr);
02790             assert(s0->current_picture_ptr->data[0]);
02791             assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
02792 
02793             /* figure out if we have a complementary field pair */
02794             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
02795                 /*
02796                  * Previous field is unmatched. Don't display it, but let it
02797                  * remain for reference if marked as such.
02798                  */
02799                 s0->current_picture_ptr = NULL;
02800                 s0->first_field = FIELD_PICTURE;
02801 
02802             } else {
02803                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
02804                     /*
02805                      * This and previous field had
02806                      * different frame_nums. Consider this field first in
02807                      * pair. Throw away previous field except for reference
02808                      * purposes.
02809                      */
02810                     s0->first_field = 1;
02811                     s0->current_picture_ptr = NULL;
02812 
02813                 } else {
02814                     /* Second field in complementary pair */
02815                     s0->first_field = 0;
02816                 }
02817             }
02818 
02819         } else {
02820             /* Frame or first field in a potentially complementary pair */
02821             assert(!s0->current_picture_ptr);
02822             s0->first_field = FIELD_PICTURE;
02823         }
02824 
02825         if(!FIELD_PICTURE || s0->first_field) {
02826             if (ff_h264_frame_start(h) < 0) {
02827                 s0->first_field = 0;
02828                 return -1;
02829             }
02830         } else {
02831             ff_release_unused_pictures(s, 0);
02832         }
02833     }
02834     if(h != h0)
02835         clone_slice(h, h0);
02836 
02837     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
02838 
02839     assert(s->mb_num == s->mb_width * s->mb_height);
02840     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
02841        first_mb_in_slice                    >= s->mb_num){
02842         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
02843         return -1;
02844     }
02845     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
02846     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
02847     if (s->picture_structure == PICT_BOTTOM_FIELD)
02848         s->resync_mb_y = s->mb_y = s->mb_y + 1;
02849     assert(s->mb_y < s->mb_height);
02850 
02851     if(s->picture_structure==PICT_FRAME){
02852         h->curr_pic_num=   h->frame_num;
02853         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
02854     }else{
02855         h->curr_pic_num= 2*h->frame_num + 1;
02856         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
02857     }
02858 
02859     if(h->nal_unit_type == NAL_IDR_SLICE){
02860         get_ue_golomb(&s->gb); /* idr_pic_id */
02861     }
02862 
02863     if(h->sps.poc_type==0){
02864         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
02865 
02866         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
02867             h->delta_poc_bottom= get_se_golomb(&s->gb);
02868         }
02869     }
02870 
02871     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
02872         h->delta_poc[0]= get_se_golomb(&s->gb);
02873 
02874         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
02875             h->delta_poc[1]= get_se_golomb(&s->gb);
02876     }
02877 
02878     init_poc(h);
02879 
02880     if(h->pps.redundant_pic_cnt_present){
02881         h->redundant_pic_count= get_ue_golomb(&s->gb);
02882     }
02883 
02884     //set defaults, might be overridden a few lines later
02885     h->ref_count[0]= h->pps.ref_count[0];
02886     h->ref_count[1]= h->pps.ref_count[1];
02887 
02888     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
02889         unsigned max= s->picture_structure == PICT_FRAME ? 15 : 31;
02890 
02891         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
02892             h->direct_spatial_mv_pred= get_bits1(&s->gb);
02893         }
02894         num_ref_idx_active_override_flag= get_bits1(&s->gb);
02895 
02896         if(num_ref_idx_active_override_flag){
02897             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
02898             if(h->slice_type_nos==AV_PICTURE_TYPE_B)
02899                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
02900         }
02901 
02902         if (h->ref_count[0]-1 > max || h->ref_count[1]-1 > max){
02903             av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
02904             h->ref_count[0] = h->ref_count[1] = 1;
02905             return AVERROR_INVALIDDATA;
02906         }
02907 
02908         if(h->slice_type_nos == AV_PICTURE_TYPE_B)
02909             h->list_count= 2;
02910         else
02911             h->list_count= 1;
02912     }else
02913         h->ref_count[1]= h->ref_count[0]= h->list_count= 0;
02914 
02915     if(!default_ref_list_done){
02916         ff_h264_fill_default_ref_list(h);
02917     }
02918 
02919     if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0) {
02920         h->ref_count[1]= h->ref_count[0]= 0;
02921         return -1;
02922     }
02923 
02924     if(h->slice_type_nos!=AV_PICTURE_TYPE_I){
02925         s->last_picture_ptr= &h->ref_list[0][0];
02926         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
02927     }
02928     if(h->slice_type_nos==AV_PICTURE_TYPE_B){
02929         s->next_picture_ptr= &h->ref_list[1][0];
02930         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
02931     }
02932 
02933     if(   (h->pps.weighted_pred          && h->slice_type_nos == AV_PICTURE_TYPE_P )
02934        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) )
02935         pred_weight_table(h);
02936     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
02937         implicit_weight_table(h, -1);
02938     }else {
02939         h->use_weight = 0;
02940         for (i = 0; i < 2; i++) {
02941             h->luma_weight_flag[i]   = 0;
02942             h->chroma_weight_flag[i] = 0;
02943         }
02944     }
02945 
02946     if(h->nal_ref_idc)
02947         ff_h264_decode_ref_pic_marking(h0, &s->gb);
02948 
02949     if(FRAME_MBAFF){
02950         ff_h264_fill_mbaff_ref_list(h);
02951 
02952         if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
02953             implicit_weight_table(h, 0);
02954             implicit_weight_table(h, 1);
02955         }
02956     }
02957 
02958     if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
02959         ff_h264_direct_dist_scale_factor(h);
02960     ff_h264_direct_ref_list_init(h);
02961 
02962     if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){
02963         tmp = get_ue_golomb_31(&s->gb);
02964         if(tmp > 2){
02965             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
02966             return -1;
02967         }
02968         h->cabac_init_idc= tmp;
02969     }
02970 
02971     h->last_qscale_diff = 0;
02972     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
02973     if(tmp>51+6*(h->sps.bit_depth_luma-8)){
02974         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
02975         return -1;
02976     }
02977     s->qscale= tmp;
02978     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
02979     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
02980     //FIXME qscale / qp ... stuff
02981     if(h->slice_type == AV_PICTURE_TYPE_SP){
02982         get_bits1(&s->gb); /* sp_for_switch_flag */
02983     }
02984     if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){
02985         get_se_golomb(&s->gb); /* slice_qs_delta */
02986     }
02987 
02988     h->deblocking_filter = 1;
02989     h->slice_alpha_c0_offset = 52;
02990     h->slice_beta_offset = 52;
02991     if( h->pps.deblocking_filter_parameters_present ) {
02992         tmp= get_ue_golomb_31(&s->gb);
02993         if(tmp > 2){
02994             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
02995             return -1;
02996         }
02997         h->deblocking_filter= tmp;
02998         if(h->deblocking_filter < 2)
02999             h->deblocking_filter^= 1; // 1<->0
03000 
03001         if( h->deblocking_filter ) {
03002             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
03003             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
03004             if(   h->slice_alpha_c0_offset > 104U
03005                || h->slice_beta_offset     > 104U){
03006                 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
03007                 return -1;
03008             }
03009         }
03010     }
03011 
03012     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
03013        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I)
03014        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == AV_PICTURE_TYPE_B)
03015        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
03016         h->deblocking_filter= 0;
03017 
03018     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
03019         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
03020             /* Cheat slightly for speed:
03021                Do not bother to deblock across slices. */
03022             h->deblocking_filter = 2;
03023         } else {
03024             h0->max_contexts = 1;
03025             if(!h0->single_decode_warning) {
03026                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
03027                 h0->single_decode_warning = 1;
03028             }
03029             if (h != h0) {
03030                 av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n");
03031                 return 1;
03032             }
03033         }
03034     }
03035     h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
03036 
03037 #if 0 //FMO
03038     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
03039         slice_group_change_cycle= get_bits(&s->gb, ?);
03040 #endif
03041 
03042     h0->last_slice_type = slice_type;
03043     h->slice_num = ++h0->current_slice;
03044     if(h->slice_num >= MAX_SLICES){
03045         av_log(s->avctx, AV_LOG_ERROR, "Too many slices (%d >= %d), increase MAX_SLICES and recompile\n", h->slice_num, MAX_SLICES);
03046     }
03047 
03048     for(j=0; j<2; j++){
03049         int id_list[16];
03050         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
03051         for(i=0; i<16; i++){
03052             id_list[i]= 60;
03053             if(h->ref_list[j][i].data[0]){
03054                 int k;
03055                 uint8_t *base= h->ref_list[j][i].base[0];
03056                 for(k=0; k<h->short_ref_count; k++)
03057                     if(h->short_ref[k]->base[0] == base){
03058                         id_list[i]= k;
03059                         break;
03060                     }
03061                 for(k=0; k<h->long_ref_count; k++)
03062                     if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
03063                         id_list[i]= h->short_ref_count + k;
03064                         break;
03065                     }
03066             }
03067         }
03068 
03069         ref2frm[0]=
03070         ref2frm[1]= -1;
03071         for(i=0; i<16; i++)
03072             ref2frm[i+2]= 4*id_list[i]
03073                           +(h->ref_list[j][i].reference&3);
03074         ref2frm[18+0]=
03075         ref2frm[18+1]= -1;
03076         for(i=16; i<48; i++)
03077             ref2frm[i+4]= 4*id_list[(i-16)>>1]
03078                           +(h->ref_list[j][i].reference&3);
03079     }
03080 
03081     //FIXME: fix draw_edges+PAFF+frame threads
03082     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
03083     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
03084 
03085     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
03086         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
03087                h->slice_num,
03088                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
03089                first_mb_in_slice,
03090                av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
03091                pps_id, h->frame_num,
03092                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
03093                h->ref_count[0], h->ref_count[1],
03094                s->qscale,
03095                h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
03096                h->use_weight,
03097                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
03098                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
03099                );
03100     }
03101 
03102     return 0;
03103 }
03104 
03105 int ff_h264_get_slice_type(const H264Context *h)
03106 {
03107     switch (h->slice_type) {
03108     case AV_PICTURE_TYPE_P:  return 0;
03109     case AV_PICTURE_TYPE_B:  return 1;
03110     case AV_PICTURE_TYPE_I:  return 2;
03111     case AV_PICTURE_TYPE_SP: return 3;
03112     case AV_PICTURE_TYPE_SI: return 4;
03113     default:         return -1;
03114     }
03115 }
03116 
03121 static int fill_filter_caches(H264Context *h, int mb_type){
03122     MpegEncContext * const s = &h->s;
03123     const int mb_xy= h->mb_xy;
03124     int top_xy, left_xy[2];
03125     int top_type, left_type[2];
03126 
03127     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
03128 
03129     //FIXME deblocking could skip the intra and nnz parts.
03130 
03131     /* Wow, what a mess, why didn't they simplify the interlacing & intra
03132      * stuff, I can't imagine that these complex rules are worth it. */
03133 
03134     left_xy[1] = left_xy[0] = mb_xy-1;
03135     if(FRAME_MBAFF){
03136         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
03137         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
03138         if(s->mb_y&1){
03139             if (left_mb_field_flag != curr_mb_field_flag) {
03140                 left_xy[0] -= s->mb_stride;
03141             }
03142         }else{
03143             if(curr_mb_field_flag){
03144                 top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
03145             }
03146             if (left_mb_field_flag != curr_mb_field_flag) {
03147                 left_xy[1] += s->mb_stride;
03148             }
03149         }
03150     }
03151 
03152     h->top_mb_xy = top_xy;
03153     h->left_mb_xy[0] = left_xy[0];
03154     h->left_mb_xy[1] = left_xy[1];
03155     {
03156         //for sufficiently low qp, filtering wouldn't do anything
03157         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
03158         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
03159         int qp = s->current_picture.qscale_table[mb_xy];
03160         if(qp <= qp_thresh
03161            && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
03162            && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
03163             if(!FRAME_MBAFF)
03164                 return 1;
03165             if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
03166                && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
03167                 return 1;
03168         }
03169     }
03170 
03171     top_type     = s->current_picture.mb_type[top_xy]    ;
03172     left_type[0] = s->current_picture.mb_type[left_xy[0]];
03173     left_type[1] = s->current_picture.mb_type[left_xy[1]];
03174     if(h->deblocking_filter == 2){
03175         if(h->slice_table[top_xy     ] != h->slice_num) top_type= 0;
03176         if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
03177     }else{
03178         if(h->slice_table[top_xy     ] == 0xFFFF) top_type= 0;
03179         if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
03180     }
03181     h->top_type    = top_type    ;
03182     h->left_type[0]= left_type[0];
03183     h->left_type[1]= left_type[1];
03184 
03185     if(IS_INTRA(mb_type))
03186         return 0;
03187 
03188     AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
03189     AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
03190     AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
03191     AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
03192 
03193     h->cbp= h->cbp_table[mb_xy];
03194 
03195     {
03196         int list;
03197         for(list=0; list<h->list_count; list++){
03198             int8_t *ref;
03199             int y, b_stride;
03200             int16_t (*mv_dst)[2];
03201             int16_t (*mv_src)[2];
03202 
03203             if(!USES_LIST(mb_type, list)){
03204                 fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
03205                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03206                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03207                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03208                 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03209                 continue;
03210             }
03211 
03212             ref = &s->current_picture.ref_index[list][4*mb_xy];
03213             {
03214                 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03215                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03216                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03217                 ref += 2;
03218                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03219                 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03220             }
03221 
03222             b_stride = h->b_stride;
03223             mv_dst   = &h->mv_cache[list][scan8[0]];
03224             mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
03225             for(y=0; y<4; y++){
03226                 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
03227             }
03228 
03229         }
03230     }
03231 
03232 
03233 /*
03234 0 . T T. T T T T
03235 1 L . .L . . . .
03236 2 L . .L . . . .
03237 3 . T TL . . . .
03238 4 L . .L . . . .
03239 5 L . .. . . . .
03240 */
03241 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
03242     if(top_type){
03243         AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
03244     }
03245 
03246     if(left_type[0]){
03247         h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
03248         h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
03249         h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
03250         h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
03251     }
03252 
03253     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
03254     if(!CABAC && h->pps.transform_8x8_mode){
03255         if(IS_8x8DCT(top_type)){
03256             h->non_zero_count_cache[4+8*0]=
03257             h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
03258             h->non_zero_count_cache[6+8*0]=
03259             h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
03260         }
03261         if(IS_8x8DCT(left_type[0])){
03262             h->non_zero_count_cache[3+8*1]=
03263             h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
03264         }
03265         if(IS_8x8DCT(left_type[1])){
03266             h->non_zero_count_cache[3+8*3]=
03267             h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
03268         }
03269 
03270         if(IS_8x8DCT(mb_type)){
03271             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
03272             h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
03273 
03274             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
03275             h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
03276 
03277             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
03278             h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
03279 
03280             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
03281             h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
03282         }
03283     }
03284 
03285     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
03286         int list;
03287         for(list=0; list<h->list_count; list++){
03288             if(USES_LIST(top_type, list)){
03289                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
03290                 const int b8_xy= 4*top_xy + 2;
03291                 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03292                 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
03293                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
03294                 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
03295                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
03296                 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
03297             }else{
03298                 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
03299                 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03300             }
03301 
03302             if(!IS_INTERLACED(mb_type^left_type[0])){
03303                 if(USES_LIST(left_type[0], list)){
03304                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
03305                     const int b8_xy= 4*left_xy[0] + 1;
03306                     int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03307                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
03308                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
03309                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
03310                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
03311                     h->ref_cache[list][scan8[0] - 1 + 0 ]=
03312                     h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
03313                     h->ref_cache[list][scan8[0] - 1 +16 ]=
03314                     h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
03315                 }else{
03316                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
03317                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
03318                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
03319                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
03320                     h->ref_cache[list][scan8[0] - 1 + 0  ]=
03321                     h->ref_cache[list][scan8[0] - 1 + 8  ]=
03322                     h->ref_cache[list][scan8[0] - 1 + 16 ]=
03323                     h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
03324                 }
03325             }
03326         }
03327     }
03328 
03329     return 0;
03330 }
03331 
03332 static void loop_filter(H264Context *h, int start_x, int end_x){
03333     MpegEncContext * const s = &h->s;
03334     uint8_t  *dest_y, *dest_cb, *dest_cr;
03335     int linesize, uvlinesize, mb_x, mb_y;
03336     const int end_mb_y= s->mb_y + FRAME_MBAFF;
03337     const int old_slice_type= h->slice_type;
03338     const int pixel_shift = h->pixel_shift;
03339 
03340     if(h->deblocking_filter) {
03341         for(mb_x= start_x; mb_x<end_x; mb_x++){
03342             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
03343                 int mb_xy, mb_type;
03344                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
03345                 h->slice_num= h->slice_table[mb_xy];
03346                 mb_type= s->current_picture.mb_type[mb_xy];
03347                 h->list_count= h->list_counts[mb_xy];
03348 
03349                 if(FRAME_MBAFF)
03350                     h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
03351 
03352                 s->mb_x= mb_x;
03353                 s->mb_y= mb_y;
03354                 dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
03355                 dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
03356                 dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
03357                     //FIXME simplify above
03358 
03359                 if (MB_FIELD) {
03360                     linesize   = h->mb_linesize   = s->linesize * 2;
03361                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
03362                     if(mb_y&1){ //FIXME move out of this function?
03363                         dest_y -= s->linesize*15;
03364                         dest_cb-= s->uvlinesize*((8 << CHROMA444)-1);
03365                         dest_cr-= s->uvlinesize*((8 << CHROMA444)-1);
03366                     }
03367                 } else {
03368                     linesize   = h->mb_linesize   = s->linesize;
03369                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
03370                 }
03371                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0);
03372                 if(fill_filter_caches(h, mb_type))
03373                     continue;
03374                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
03375                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
03376 
03377                 if (FRAME_MBAFF) {
03378                     ff_h264_filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03379                 } else {
03380                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03381                 }
03382             }
03383         }
03384     }
03385     h->slice_type= old_slice_type;
03386     s->mb_x= end_x;
03387     s->mb_y= end_mb_y - FRAME_MBAFF;
03388     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03389     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03390 }
03391 
03392 static void predict_field_decoding_flag(H264Context *h){
03393     MpegEncContext * const s = &h->s;
03394     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
03395     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
03396                 ? s->current_picture.mb_type[mb_xy-1]
03397                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
03398                 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
03399                 : 0;
03400     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
03401 }
03402 
03406 static void decode_finish_row(H264Context *h){
03407     MpegEncContext * const s = &h->s;
03408     int top = 16*(s->mb_y >> FIELD_PICTURE);
03409     int height = 16 << FRAME_MBAFF;
03410     int deblock_border = (16 + 4) << FRAME_MBAFF;
03411     int pic_height = 16*s->mb_height >> FIELD_PICTURE;
03412 
03413     if (h->deblocking_filter) {
03414         if((top + height) >= pic_height)
03415             height += deblock_border;
03416 
03417         top -= deblock_border;
03418     }
03419 
03420     if (top >= pic_height || (top + height) < h->emu_edge_height)
03421         return;
03422 
03423     height = FFMIN(height, pic_height - top);
03424     if (top < h->emu_edge_height) {
03425         height = top+height;
03426         top = 0;
03427     }
03428 
03429     ff_draw_horiz_band(s, top, height);
03430 
03431     if (s->dropable) return;
03432 
03433     ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1,
03434                              s->picture_structure==PICT_BOTTOM_FIELD);
03435 }
03436 
03437 static int decode_slice(struct AVCodecContext *avctx, void *arg){
03438     H264Context *h = *(void**)arg;
03439     MpegEncContext * const s = &h->s;
03440     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
03441     int lf_x_start = s->mb_x;
03442 
03443     s->mb_skip_run= -1;
03444 
03445     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
03446                     (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
03447 
03448     if( h->pps.cabac ) {
03449         /* realign */
03450         align_get_bits( &s->gb );
03451 
03452         /* init cabac */
03453         ff_init_cabac_states( &h->cabac);
03454         ff_init_cabac_decoder( &h->cabac,
03455                                s->gb.buffer + get_bits_count(&s->gb)/8,
03456                                (get_bits_left(&s->gb) + 7)/8);
03457 
03458         ff_h264_init_cabac_states(h);
03459 
03460         for(;;){
03461 //START_TIMER
03462             int ret = ff_h264_decode_mb_cabac(h);
03463             int eos;
03464 //STOP_TIMER("decode_mb_cabac")
03465 
03466             if(ret>=0) ff_h264_hl_decode_mb(h);
03467 
03468             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
03469                 s->mb_y++;
03470 
03471                 ret = ff_h264_decode_mb_cabac(h);
03472 
03473                 if(ret>=0) ff_h264_hl_decode_mb(h);
03474                 s->mb_y--;
03475             }
03476             eos = get_cabac_terminate( &h->cabac );
03477 
03478             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
03479                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03480                 if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);
03481                 return 0;
03482             }
03483             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
03484                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
03485                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03486                 return -1;
03487             }
03488 
03489             if( ++s->mb_x >= s->mb_width ) {
03490                 loop_filter(h, lf_x_start, s->mb_x);
03491                 s->mb_x = lf_x_start = 0;
03492                 decode_finish_row(h);
03493                 ++s->mb_y;
03494                 if(FIELD_OR_MBAFF_PICTURE) {
03495                     ++s->mb_y;
03496                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03497                         predict_field_decoding_flag(h);
03498                 }
03499             }
03500 
03501             if( eos || s->mb_y >= s->mb_height ) {
03502                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03503                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03504                 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03505                 return 0;
03506             }
03507         }
03508 
03509     } else {
03510         for(;;){
03511             int ret = ff_h264_decode_mb_cavlc(h);
03512 
03513             if(ret>=0) ff_h264_hl_decode_mb(h);
03514 
03515             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
03516                 s->mb_y++;
03517                 ret = ff_h264_decode_mb_cavlc(h);
03518 
03519                 if(ret>=0) ff_h264_hl_decode_mb(h);
03520                 s->mb_y--;
03521             }
03522 
03523             if(ret<0){
03524                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03525                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03526                 return -1;
03527             }
03528 
03529             if(++s->mb_x >= s->mb_width){
03530                 loop_filter(h, lf_x_start, s->mb_x);
03531                 s->mb_x = lf_x_start = 0;
03532                 decode_finish_row(h);
03533                 ++s->mb_y;
03534                 if(FIELD_OR_MBAFF_PICTURE) {
03535                     ++s->mb_y;
03536                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03537                         predict_field_decoding_flag(h);
03538                 }
03539                 if(s->mb_y >= s->mb_height){
03540                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03541 
03542                     if(   get_bits_count(&s->gb) == s->gb.size_in_bits
03543                        || get_bits_count(&s->gb) <  s->gb.size_in_bits && s->avctx->error_recognition < FF_ER_AGGRESSIVE) {
03544                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03545 
03546                         return 0;
03547                     }else{
03548                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03549 
03550                         return -1;
03551                     }
03552                 }
03553             }
03554 
03555             if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
03556                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03557                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
03558                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03559                     if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03560 
03561                     return 0;
03562                 }else{
03563                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03564 
03565                     return -1;
03566                 }
03567             }
03568         }
03569     }
03570 
03571 #if 0
03572     for(;s->mb_y < s->mb_height; s->mb_y++){
03573         for(;s->mb_x < s->mb_width; s->mb_x++){
03574             int ret= decode_mb(h);
03575 
03576             ff_h264_hl_decode_mb(h);
03577 
03578             if(ret<0){
03579                 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03580                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03581 
03582                 return -1;
03583             }
03584 
03585             if(++s->mb_x >= s->mb_width){
03586                 s->mb_x=0;
03587                 if(++s->mb_y >= s->mb_height){
03588                     if(get_bits_count(s->gb) == s->gb.size_in_bits){
03589                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03590 
03591                         return 0;
03592                     }else{
03593                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03594 
03595                         return -1;
03596                     }
03597                 }
03598             }
03599 
03600             if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
03601                 if(get_bits_count(s->gb) == s->gb.size_in_bits){
03602                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03603 
03604                     return 0;
03605                 }else{
03606                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03607 
03608                     return -1;
03609                 }
03610             }
03611         }
03612         s->mb_x=0;
03613         ff_draw_horiz_band(s, 16*s->mb_y, 16);
03614     }
03615 #endif
03616     return -1; //not reached
03617 }
03618 
03625 static void execute_decode_slices(H264Context *h, int context_count){
03626     MpegEncContext * const s = &h->s;
03627     AVCodecContext * const avctx= s->avctx;
03628     H264Context *hx;
03629     int i;
03630 
03631     if (s->avctx->hwaccel)
03632         return;
03633     if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03634         return;
03635     if(context_count == 1) {
03636         decode_slice(avctx, &h);
03637     } else {
03638         for(i = 1; i < context_count; i++) {
03639             hx = h->thread_context[i];
03640             hx->s.error_recognition = avctx->error_recognition;
03641             hx->s.error_count = 0;
03642             hx->x264_build= h->x264_build;
03643         }
03644 
03645         avctx->execute(avctx, (void *)decode_slice,
03646                        h->thread_context, NULL, context_count, sizeof(void*));
03647 
03648         /* pull back stuff from slices to master context */
03649         hx = h->thread_context[context_count - 1];
03650         s->mb_x = hx->s.mb_x;
03651         s->mb_y = hx->s.mb_y;
03652         s->dropable = hx->s.dropable;
03653         s->picture_structure = hx->s.picture_structure;
03654         for(i = 1; i < context_count; i++)
03655             h->s.error_count += h->thread_context[i]->s.error_count;
03656     }
03657 }
03658 
03659 
03660 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
03661     MpegEncContext * const s = &h->s;
03662     AVCodecContext * const avctx= s->avctx;
03663     H264Context *hx; 
03664     int buf_index;
03665     int context_count;
03666     int next_avc;
03667     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
03668     int nals_needed=0; 
03669     int nal_index;
03670 
03671     h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;
03672     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
03673         h->current_slice = 0;
03674         if (!s->first_field)
03675             s->current_picture_ptr= NULL;
03676         ff_h264_reset_sei(h);
03677     }
03678 
03679     for(;pass <= 1;pass++){
03680         buf_index = 0;
03681         context_count = 0;
03682         next_avc = h->is_avc ? 0 : buf_size;
03683         nal_index = 0;
03684     for(;;){
03685         int consumed;
03686         int dst_length;
03687         int bit_length;
03688         const uint8_t *ptr;
03689         int i, nalsize = 0;
03690         int err;
03691 
03692         if(buf_index >= next_avc) {
03693             if(buf_index >= buf_size) break;
03694             nalsize = 0;
03695             for(i = 0; i < h->nal_length_size; i++)
03696                 nalsize = (nalsize << 8) | buf[buf_index++];
03697             if(nalsize <= 0 || nalsize > buf_size - buf_index){
03698                 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
03699                 break;
03700             }
03701             next_avc= buf_index + nalsize;
03702         } else {
03703             // start code prefix search
03704             for(; buf_index + 3 < next_avc; buf_index++){
03705                 // This should always succeed in the first iteration.
03706                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
03707                     break;
03708             }
03709 
03710             if(buf_index+3 >= buf_size) break;
03711 
03712             buf_index+=3;
03713             if(buf_index >= next_avc) continue;
03714         }
03715 
03716         hx = h->thread_context[context_count];
03717 
03718         ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
03719         if (ptr==NULL || dst_length < 0){
03720             return -1;
03721         }
03722         i= buf_index + consumed;
03723         if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
03724            buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
03725             s->workaround_bugs |= FF_BUG_TRUNCATED;
03726 
03727         if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
03728         while(dst_length > 0 && ptr[dst_length - 1] == 0)
03729             dst_length--;
03730         }
03731         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
03732 
03733         if(s->avctx->debug&FF_DEBUG_STARTCODE){
03734             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d/%d at %d/%d length %d\n", hx->nal_unit_type, hx->nal_ref_idc, buf_index, buf_size, dst_length);
03735         }
03736 
03737         if (h->is_avc && (nalsize != consumed) && nalsize){
03738             av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
03739         }
03740 
03741         buf_index += consumed;
03742         nal_index++;
03743 
03744         if(pass == 0) {
03745             // packets can sometimes contain multiple PPS/SPS
03746             // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
03747             // if so, when frame threading we can't start the next thread until we've read all of them
03748             switch (hx->nal_unit_type) {
03749                 case NAL_SPS:
03750                 case NAL_PPS:
03751                     nals_needed = nal_index;
03752                     break;
03753                 case NAL_IDR_SLICE:
03754                 case NAL_SLICE:
03755                     init_get_bits(&hx->s.gb, ptr, bit_length);
03756                     if (!get_ue_golomb(&hx->s.gb))
03757                         nals_needed = nal_index;
03758             }
03759             continue;
03760         }
03761 
03762         //FIXME do not discard SEI id
03763         if(
03764 #if FF_API_HURRY_UP
03765            (s->hurry_up == 1 && h->nal_ref_idc  == 0) ||
03766 #endif
03767            (avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0))
03768             continue;
03769 
03770       again:
03771         err = 0;
03772         switch(hx->nal_unit_type){
03773         case NAL_IDR_SLICE:
03774             if (h->nal_unit_type != NAL_IDR_SLICE) {
03775                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
03776                 return -1;
03777             }
03778             idr(h); //FIXME ensure we don't loose some frames if there is reordering
03779         case NAL_SLICE:
03780             init_get_bits(&hx->s.gb, ptr, bit_length);
03781             hx->intra_gb_ptr=
03782             hx->inter_gb_ptr= &hx->s.gb;
03783             hx->s.data_partitioning = 0;
03784 
03785             if((err = decode_slice_header(hx, h)))
03786                break;
03787 
03788             s->current_picture_ptr->key_frame |=
03789                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
03790                     (h->sei_recovery_frame_cnt >= 0);
03791 
03792             if (h->current_slice == 1) {
03793                 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
03794                     decode_postinit(h, nal_index >= nals_needed);
03795                 }
03796 
03797                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
03798                     return -1;
03799                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03800                     ff_vdpau_h264_picture_start(s);
03801             }
03802 
03803             if(hx->redundant_pic_count==0
03804 #if FF_API_HURRY_UP
03805                && hx->s.hurry_up < 5
03806 #endif
03807                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03808                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03809                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03810                && avctx->skip_frame < AVDISCARD_ALL){
03811                 if(avctx->hwaccel) {
03812                     if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
03813                         return -1;
03814                 }else
03815                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
03816                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
03817                     ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
03818                     ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
03819                 }else
03820                     context_count++;
03821             }
03822             break;
03823         case NAL_DPA:
03824             init_get_bits(&hx->s.gb, ptr, bit_length);
03825             hx->intra_gb_ptr=
03826             hx->inter_gb_ptr= NULL;
03827 
03828             if ((err = decode_slice_header(hx, h)) < 0)
03829                 break;
03830 
03831             hx->s.data_partitioning = 1;
03832 
03833             break;
03834         case NAL_DPB:
03835             init_get_bits(&hx->intra_gb, ptr, bit_length);
03836             hx->intra_gb_ptr= &hx->intra_gb;
03837             break;
03838         case NAL_DPC:
03839             init_get_bits(&hx->inter_gb, ptr, bit_length);
03840             hx->inter_gb_ptr= &hx->inter_gb;
03841 
03842             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
03843                && s->context_initialized
03844 #if FF_API_HURRY_UP
03845                && s->hurry_up < 5
03846 #endif
03847                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03848                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03849                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03850                && avctx->skip_frame < AVDISCARD_ALL)
03851                 context_count++;
03852             break;
03853         case NAL_SEI:
03854             init_get_bits(&s->gb, ptr, bit_length);
03855             ff_h264_decode_sei(h);
03856             break;
03857         case NAL_SPS:
03858             init_get_bits(&s->gb, ptr, bit_length);
03859             ff_h264_decode_seq_parameter_set(h);
03860 
03861             if (s->flags& CODEC_FLAG_LOW_DELAY ||
03862                 (h->sps.bitstream_restriction_flag && !h->sps.num_reorder_frames))
03863                 s->low_delay=1;
03864 
03865             if(avctx->has_b_frames < 2)
03866                 avctx->has_b_frames= !s->low_delay;
03867 
03868             if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
03869                 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
03870                     avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
03871                     h->pixel_shift = h->sps.bit_depth_luma > 8;
03872 
03873                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
03874                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
03875                     dsputil_init(&s->dsp, s->avctx);
03876                 } else {
03877                     av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
03878                     return -1;
03879                 }
03880             }
03881             break;
03882         case NAL_PPS:
03883             init_get_bits(&s->gb, ptr, bit_length);
03884 
03885             ff_h264_decode_picture_parameter_set(h, bit_length);
03886 
03887             break;
03888         case NAL_AUD:
03889         case NAL_END_SEQUENCE:
03890         case NAL_END_STREAM:
03891         case NAL_FILLER_DATA:
03892         case NAL_SPS_EXT:
03893         case NAL_AUXILIARY_SLICE:
03894             break;
03895         default:
03896             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
03897         }
03898 
03899         if(context_count == h->max_contexts) {
03900             execute_decode_slices(h, context_count);
03901             context_count = 0;
03902         }
03903 
03904         if (err < 0)
03905             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
03906         else if(err == 1) {
03907             /* Slice could not be decoded in parallel mode, copy down
03908              * NAL unit stuff to context 0 and restart. Note that
03909              * rbsp_buffer is not transferred, but since we no longer
03910              * run in parallel mode this should not be an issue. */
03911             h->nal_unit_type = hx->nal_unit_type;
03912             h->nal_ref_idc   = hx->nal_ref_idc;
03913             hx = h;
03914             goto again;
03915         }
03916     }
03917     }
03918     if(context_count)
03919         execute_decode_slices(h, context_count);
03920     return buf_index;
03921 }
03922 
03926 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
03927         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
03928         if(pos+10>buf_size) pos=buf_size; // oops ;)
03929 
03930         return pos;
03931 }
03932 
03933 static int decode_frame(AVCodecContext *avctx,
03934                              void *data, int *data_size,
03935                              AVPacket *avpkt)
03936 {
03937     const uint8_t *buf = avpkt->data;
03938     int buf_size = avpkt->size;
03939     H264Context *h = avctx->priv_data;
03940     MpegEncContext *s = &h->s;
03941     AVFrame *pict = data;
03942     int buf_index;
03943 
03944     s->flags= avctx->flags;
03945     s->flags2= avctx->flags2;
03946 
03947    /* end of stream, output what is still in the buffers */
03948  out:
03949     if (buf_size == 0) {
03950         Picture *out;
03951         int i, out_idx;
03952 
03953         s->current_picture_ptr = NULL;
03954 
03955 //FIXME factorize this with the output code below
03956         out = h->delayed_pic[0];
03957         out_idx = 0;
03958         for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
03959             if(h->delayed_pic[i]->poc < out->poc){
03960                 out = h->delayed_pic[i];
03961                 out_idx = i;
03962             }
03963 
03964         for(i=out_idx; h->delayed_pic[i]; i++)
03965             h->delayed_pic[i] = h->delayed_pic[i+1];
03966 
03967         if(out){
03968             *data_size = sizeof(AVFrame);
03969             *pict= *(AVFrame*)out;
03970         }
03971 
03972         return 0;
03973     }
03974 
03975     buf_index=decode_nal_units(h, buf, buf_size);
03976     if(buf_index < 0)
03977         return -1;
03978 
03979     if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
03980         buf_size = 0;
03981         goto out;
03982     }
03983 
03984     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
03985         if (avctx->skip_frame >= AVDISCARD_NONREF
03986 #if FF_API_HURRY_UP
03987                 || s->hurry_up
03988 #endif
03989            )
03990             return 0;
03991         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
03992         return -1;
03993     }
03994 
03995     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
03996 
03997         if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
03998 
03999         field_end(h, 0);
04000 
04001         if (!h->next_output_pic) {
04002             /* Wait for second field. */
04003             *data_size = 0;
04004 
04005         } else {
04006             *data_size = sizeof(AVFrame);
04007             *pict = *(AVFrame*)h->next_output_pic;
04008         }
04009     }
04010 
04011     assert(pict->data[0] || !*data_size);
04012     ff_print_debug_info(s, pict);
04013 //printf("out %d\n", (int)pict->data[0]);
04014 
04015     return get_consumed_bytes(s, buf_index, buf_size);
04016 }
04017 #if 0
04018 static inline void fill_mb_avail(H264Context *h){
04019     MpegEncContext * const s = &h->s;
04020     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
04021 
04022     if(s->mb_y){
04023         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
04024         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
04025         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
04026     }else{
04027         h->mb_avail[0]=
04028         h->mb_avail[1]=
04029         h->mb_avail[2]= 0;
04030     }
04031     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
04032     h->mb_avail[4]= 1; //FIXME move out
04033     h->mb_avail[5]= 0; //FIXME move out
04034 }
04035 #endif
04036 
04037 #ifdef TEST
04038 #undef printf
04039 #undef random
04040 #define COUNT 8000
04041 #define SIZE (COUNT*40)
04042 int main(void){
04043     int i;
04044     uint8_t temp[SIZE];
04045     PutBitContext pb;
04046     GetBitContext gb;
04047 //    int int_temp[10000];
04048     DSPContext dsp;
04049     AVCodecContext avctx;
04050 
04051     dsputil_init(&dsp, &avctx);
04052 
04053     init_put_bits(&pb, temp, SIZE);
04054     printf("testing unsigned exp golomb\n");
04055     for(i=0; i<COUNT; i++){
04056         START_TIMER
04057         set_ue_golomb(&pb, i);
04058         STOP_TIMER("set_ue_golomb");
04059     }
04060     flush_put_bits(&pb);
04061 
04062     init_get_bits(&gb, temp, 8*SIZE);
04063     for(i=0; i<COUNT; i++){
04064         int j, s;
04065 
04066         s= show_bits(&gb, 24);
04067 
04068         START_TIMER
04069         j= get_ue_golomb(&gb);
04070         if(j != i){
04071             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04072 //            return -1;
04073         }
04074         STOP_TIMER("get_ue_golomb");
04075     }
04076 
04077 
04078     init_put_bits(&pb, temp, SIZE);
04079     printf("testing signed exp golomb\n");
04080     for(i=0; i<COUNT; i++){
04081         START_TIMER
04082         set_se_golomb(&pb, i - COUNT/2);
04083         STOP_TIMER("set_se_golomb");
04084     }
04085     flush_put_bits(&pb);
04086 
04087     init_get_bits(&gb, temp, 8*SIZE);
04088     for(i=0; i<COUNT; i++){
04089         int j, s;
04090 
04091         s= show_bits(&gb, 24);
04092 
04093         START_TIMER
04094         j= get_se_golomb(&gb);
04095         if(j != i - COUNT/2){
04096             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04097 //            return -1;
04098         }
04099         STOP_TIMER("get_se_golomb");
04100     }
04101 
04102 #if 0
04103     printf("testing 4x4 (I)DCT\n");
04104 
04105     DCTELEM block[16];
04106     uint8_t src[16], ref[16];
04107     uint64_t error= 0, max_error=0;
04108 
04109     for(i=0; i<COUNT; i++){
04110         int j;
04111 //        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
04112         for(j=0; j<16; j++){
04113             ref[j]= random()%255;
04114             src[j]= random()%255;
04115         }
04116 
04117         h264_diff_dct_c(block, src, ref, 4);
04118 
04119         //normalize
04120         for(j=0; j<16; j++){
04121 //            printf("%d ", block[j]);
04122             block[j]= block[j]*4;
04123             if(j&1) block[j]= (block[j]*4 + 2)/5;
04124             if(j&4) block[j]= (block[j]*4 + 2)/5;
04125         }
04126 //        printf("\n");
04127 
04128         h->h264dsp.h264_idct_add(ref, block, 4);
04129 /*        for(j=0; j<16; j++){
04130             printf("%d ", ref[j]);
04131         }
04132         printf("\n");*/
04133 
04134         for(j=0; j<16; j++){
04135             int diff= FFABS(src[j] - ref[j]);
04136 
04137             error+= diff*diff;
04138             max_error= FFMAX(max_error, diff);
04139         }
04140     }
04141     printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
04142     printf("testing quantizer\n");
04143     for(qp=0; qp<52; qp++){
04144         for(i=0; i<16; i++)
04145             src1_block[i]= src2_block[i]= random()%255;
04146 
04147     }
04148     printf("Testing NAL layer\n");
04149 
04150     uint8_t bitstream[COUNT];
04151     uint8_t nal[COUNT*2];
04152     H264Context h;
04153     memset(&h, 0, sizeof(H264Context));
04154 
04155     for(i=0; i<COUNT; i++){
04156         int zeros= i;
04157         int nal_length;
04158         int consumed;
04159         int out_length;
04160         uint8_t *out;
04161         int j;
04162 
04163         for(j=0; j<COUNT; j++){
04164             bitstream[j]= (random() % 255) + 1;
04165         }
04166 
04167         for(j=0; j<zeros; j++){
04168             int pos= random() % COUNT;
04169             while(bitstream[pos] == 0){
04170                 pos++;
04171                 pos %= COUNT;
04172             }
04173             bitstream[pos]=0;
04174         }
04175 
04176         START_TIMER
04177 
04178         nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
04179         if(nal_length<0){
04180             printf("encoding failed\n");
04181             return -1;
04182         }
04183 
04184         out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
04185 
04186         STOP_TIMER("NAL")
04187 
04188         if(out_length != COUNT){
04189             printf("incorrect length %d %d\n", out_length, COUNT);
04190             return -1;
04191         }
04192 
04193         if(consumed != nal_length){
04194             printf("incorrect consumed length %d %d\n", nal_length, consumed);
04195             return -1;
04196         }
04197 
04198         if(memcmp(bitstream, out, COUNT)){
04199             printf("mismatch\n");
04200             return -1;
04201         }
04202     }
04203 #endif
04204 
04205     printf("Testing RBSP\n");
04206 
04207 
04208     return 0;
04209 }
04210 #endif /* TEST */
04211 
04212 
04213 av_cold void ff_h264_free_context(H264Context *h)
04214 {
04215     int i;
04216 
04217     free_tables(h, 1); //FIXME cleanup init stuff perhaps
04218 
04219     for(i = 0; i < MAX_SPS_COUNT; i++)
04220         av_freep(h->sps_buffers + i);
04221 
04222     for(i = 0; i < MAX_PPS_COUNT; i++)
04223         av_freep(h->pps_buffers + i);
04224 }
04225 
04226 av_cold int ff_h264_decode_end(AVCodecContext *avctx)
04227 {
04228     H264Context *h = avctx->priv_data;
04229     MpegEncContext *s = &h->s;
04230 
04231     ff_h264_free_context(h);
04232 
04233     MPV_common_end(s);
04234 
04235 //    memset(h, 0, sizeof(H264Context));
04236 
04237     return 0;
04238 }
04239 
04240 static const AVProfile profiles[] = {
04241     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
04242     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
04243     { FF_PROFILE_H264_MAIN,                 "Main"                  },
04244     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
04245     { FF_PROFILE_H264_HIGH,                 "High"                  },
04246     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
04247     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
04248     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
04249     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
04250     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
04251     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
04252     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
04253     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
04254     { FF_PROFILE_UNKNOWN },
04255 };
04256 
04257 AVCodec ff_h264_decoder = {
04258     "h264",
04259     AVMEDIA_TYPE_VIDEO,
04260     CODEC_ID_H264,
04261     sizeof(H264Context),
04262     ff_h264_decode_init,
04263     NULL,
04264     ff_h264_decode_end,
04265     decode_frame,
04266     /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
04267         CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
04268     .flush= flush_dpb,
04269     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
04270     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
04271     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
04272     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04273 };
04274 
04275 #if CONFIG_H264_VDPAU_DECODER
04276 AVCodec ff_h264_vdpau_decoder = {
04277     "h264_vdpau",
04278     AVMEDIA_TYPE_VIDEO,
04279     CODEC_ID_H264,
04280     sizeof(H264Context),
04281     ff_h264_decode_init,
04282     NULL,
04283     ff_h264_decode_end,
04284     decode_frame,
04285     CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
04286     .flush= flush_dpb,
04287     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
04288     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
04289     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04290 };
04291 #endif