FFmpeg
vf_scale_cuda.c
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22 
23 #include <float.h>
24 #include <stdio.h>
25 
26 #include "libavutil/common.h"
27 #include "libavutil/hwcontext.h"
29 #include "libavutil/cuda_check.h"
30 #include "libavutil/internal.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 
34 #include "avfilter.h"
35 #include "filters.h"
36 #include "scale_eval.h"
37 #include "video.h"
38 
39 #include "cuda/load_helper.h"
40 #include "vf_scale_cuda.h"
41 
42 static const enum AVPixelFormat supported_formats[] = {
53 };
54 
55 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
56 #define BLOCKX 32
57 #define BLOCKY 16
58 
59 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
60 
61 enum {
63 
68 
70 };
71 
72 typedef struct CUDAScaleContext {
73  const AVClass *class;
74 
76 
77  enum AVPixelFormat in_fmt, out_fmt;
82 
85 
88 
89  /**
90  * Output sw format. AV_PIX_FMT_NONE for no conversion.
91  */
93 
94  char *w_expr; ///< width expression string
95  char *h_expr; ///< height expression string
96 
99 
100  CUcontext cu_ctx;
101  CUmodule cu_module;
102  CUfunction cu_func;
103  CUfunction cu_func_uv;
104  CUstream cu_stream;
105 
109 
110  float param;
112 
114 {
115  CUDAScaleContext *s = ctx->priv;
116 
117  s->frame = av_frame_alloc();
118  if (!s->frame)
119  return AVERROR(ENOMEM);
120 
121  s->tmp_frame = av_frame_alloc();
122  if (!s->tmp_frame)
123  return AVERROR(ENOMEM);
124 
125  return 0;
126 }
127 
129 {
130  CUDAScaleContext *s = ctx->priv;
131 
132  if (s->hwctx && s->cu_module) {
133  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
134  CUcontext dummy;
135 
136  CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
137  CHECK_CU(cu->cuModuleUnload(s->cu_module));
138  s->cu_module = NULL;
139  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
140  }
141 
142  av_frame_free(&s->frame);
143  av_buffer_unref(&s->frames_ctx);
144  av_frame_free(&s->tmp_frame);
145 }
146 
147 static av_cold int init_hwframe_ctx(CUDAScaleContext *s, AVBufferRef *device_ctx, int width, int height)
148 {
149  AVBufferRef *out_ref = NULL;
150  AVHWFramesContext *out_ctx;
151  int ret;
152 
153  out_ref = av_hwframe_ctx_alloc(device_ctx);
154  if (!out_ref)
155  return AVERROR(ENOMEM);
156  out_ctx = (AVHWFramesContext*)out_ref->data;
157 
158  out_ctx->format = AV_PIX_FMT_CUDA;
159  out_ctx->sw_format = s->out_fmt;
160  out_ctx->width = FFALIGN(width, 32);
161  out_ctx->height = FFALIGN(height, 32);
162 
163  ret = av_hwframe_ctx_init(out_ref);
164  if (ret < 0)
165  goto fail;
166 
167  av_frame_unref(s->frame);
168  ret = av_hwframe_get_buffer(out_ref, s->frame, 0);
169  if (ret < 0)
170  goto fail;
171 
172  s->frame->width = width;
173  s->frame->height = height;
174 
175  av_buffer_unref(&s->frames_ctx);
176  s->frames_ctx = out_ref;
177 
178  return 0;
179 fail:
180  av_buffer_unref(&out_ref);
181  return ret;
182 }
183 
184 static int format_is_supported(enum AVPixelFormat fmt)
185 {
186  int i;
187 
188  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
189  if (supported_formats[i] == fmt)
190  return 1;
191  return 0;
192 }
193 
194 static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat in_format, enum AVPixelFormat out_format)
195 {
196  CUDAScaleContext *s = ctx->priv;
197  int i, p, d;
198 
199  s->in_fmt = in_format;
200  s->out_fmt = out_format;
201 
202  s->in_desc = av_pix_fmt_desc_get(s->in_fmt);
203  s->out_desc = av_pix_fmt_desc_get(s->out_fmt);
204  s->in_planes = av_pix_fmt_count_planes(s->in_fmt);
205  s->out_planes = av_pix_fmt_count_planes(s->out_fmt);
206 
207  // find maximum step of each component of each plane
208  // For our subset of formats, this should accurately tell us how many channels CUDA needs
209  // i.e. 1 for Y plane, 2 for UV plane of NV12, 4 for single plane of RGB0 formats
210 
211  for (i = 0; i < s->in_desc->nb_components; i++) {
212  d = (s->in_desc->comp[i].depth + 7) / 8;
213  p = s->in_desc->comp[i].plane;
214  s->in_plane_channels[p] = FFMAX(s->in_plane_channels[p], s->in_desc->comp[i].step / d);
215 
216  s->in_plane_depths[p] = s->in_desc->comp[i].depth;
217  }
218 }
219 
220 static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height,
221  int out_width, int out_height)
222 {
223  CUDAScaleContext *s = ctx->priv;
224  FilterLink *inl = ff_filter_link(ctx->inputs[0]);
225  FilterLink *outl = ff_filter_link(ctx->outputs[0]);
226 
227  AVHWFramesContext *in_frames_ctx;
228 
229  enum AVPixelFormat in_format;
230  enum AVPixelFormat out_format;
231  int ret;
232 
233  /* check that we have a hw context */
234  if (!inl->hw_frames_ctx) {
235  av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
236  return AVERROR(EINVAL);
237  }
238  in_frames_ctx = (AVHWFramesContext*)inl->hw_frames_ctx->data;
239  in_format = in_frames_ctx->sw_format;
240  out_format = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format;
241 
242  if (!format_is_supported(in_format)) {
243  av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
244  av_get_pix_fmt_name(in_format));
245  return AVERROR(ENOSYS);
246  }
247  if (!format_is_supported(out_format)) {
248  av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
249  av_get_pix_fmt_name(out_format));
250  return AVERROR(ENOSYS);
251  }
252 
253  set_format_info(ctx, in_format, out_format);
254 
255  if (s->passthrough && in_width == out_width && in_height == out_height && in_format == out_format) {
256  s->frames_ctx = av_buffer_ref(inl->hw_frames_ctx);
257  if (!s->frames_ctx)
258  return AVERROR(ENOMEM);
259  } else {
260  s->passthrough = 0;
261 
262  ret = init_hwframe_ctx(s, in_frames_ctx->device_ref, out_width, out_height);
263  if (ret < 0)
264  return ret;
265 
266  if (in_width == out_width && in_height == out_height &&
267  in_format == out_format && s->interp_algo == INTERP_ALGO_DEFAULT)
268  s->interp_algo = INTERP_ALGO_NEAREST;
269  }
270 
271  outl->hw_frames_ctx = av_buffer_ref(s->frames_ctx);
272  if (!outl->hw_frames_ctx)
273  return AVERROR(ENOMEM);
274 
275  return 0;
276 }
277 
279 {
280  CUDAScaleContext *s = ctx->priv;
281  CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
282  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
283  char buf[128];
284  int ret;
285 
286  const char *in_fmt_name = av_get_pix_fmt_name(s->in_fmt);
287  const char *out_fmt_name = av_get_pix_fmt_name(s->out_fmt);
288 
289  const char *function_infix = "";
290 
291  extern const unsigned char ff_vf_scale_cuda_ptx_data[];
292  extern const unsigned int ff_vf_scale_cuda_ptx_len;
293 
294  switch(s->interp_algo) {
295  case INTERP_ALGO_NEAREST:
296  function_infix = "Nearest";
297  s->interp_use_linear = 0;
298  s->interp_as_integer = 1;
299  break;
301  function_infix = "Bilinear";
302  s->interp_use_linear = 1;
303  s->interp_as_integer = 1;
304  break;
305  case INTERP_ALGO_DEFAULT:
306  case INTERP_ALGO_BICUBIC:
307  function_infix = "Bicubic";
308  s->interp_use_linear = 0;
309  s->interp_as_integer = 0;
310  break;
311  case INTERP_ALGO_LANCZOS:
312  function_infix = "Lanczos";
313  s->interp_use_linear = 0;
314  s->interp_as_integer = 0;
315  break;
316  default:
317  av_log(ctx, AV_LOG_ERROR, "Unknown interpolation algorithm\n");
318  return AVERROR_BUG;
319  }
320 
321  ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
322  if (ret < 0)
323  return ret;
324 
325  ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module,
326  ff_vf_scale_cuda_ptx_data, ff_vf_scale_cuda_ptx_len);
327  if (ret < 0)
328  goto fail;
329 
330  snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s", function_infix, in_fmt_name, out_fmt_name);
331  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func, s->cu_module, buf));
332  if (ret < 0) {
333  av_log(ctx, AV_LOG_FATAL, "Unsupported conversion: %s -> %s\n", in_fmt_name, out_fmt_name);
334  ret = AVERROR(ENOSYS);
335  goto fail;
336  }
337 
338  snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s_uv", function_infix, in_fmt_name, out_fmt_name);
339  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uv, s->cu_module, buf));
340  if (ret < 0)
341  goto fail;
342 
343 fail:
344  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
345 
346  return ret;
347 }
348 
350 {
351  AVFilterContext *ctx = outlink->src;
352  AVFilterLink *inlink = outlink->src->inputs[0];
354  CUDAScaleContext *s = ctx->priv;
355  AVHWFramesContext *frames_ctx;
356  AVCUDADeviceContext *device_hwctx;
357  int w, h;
358  int ret;
359 
361  s->w_expr, s->h_expr,
362  inlink, outlink,
363  &w, &h)) < 0)
364  goto fail;
365 
367  s->force_original_aspect_ratio, s->force_divisible_by);
368 
369  if (((int64_t)h * inlink->w) > INT_MAX ||
370  ((int64_t)w * inlink->h) > INT_MAX)
371  av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
372 
373  outlink->w = w;
374  outlink->h = h;
375 
377  if (ret < 0)
378  return ret;
379 
380  frames_ctx = (AVHWFramesContext*)inl->hw_frames_ctx->data;
381  device_hwctx = frames_ctx->device_ctx->hwctx;
382 
383  s->hwctx = device_hwctx;
384  s->cu_stream = s->hwctx->stream;
385 
386  if (inlink->sample_aspect_ratio.num) {
387  outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
388  outlink->w*inlink->h},
389  inlink->sample_aspect_ratio);
390  } else {
391  outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
392  }
393 
394  av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d fmt:%s -> w:%d h:%d fmt:%s%s\n",
395  inlink->w, inlink->h, av_get_pix_fmt_name(s->in_fmt),
396  outlink->w, outlink->h, av_get_pix_fmt_name(s->out_fmt),
397  s->passthrough ? " (passthrough)" : "");
398 
400  if (ret < 0)
401  return ret;
402 
403  return 0;
404 
405 fail:
406  return ret;
407 }
408 
409 static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
410  CUtexObject src_tex[4], int src_left, int src_top, int src_width, int src_height,
411  AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch)
412 {
413  CUDAScaleContext *s = ctx->priv;
414  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
415 
416  CUdeviceptr dst_devptr[4] = {
417  (CUdeviceptr)out_frame->data[0], (CUdeviceptr)out_frame->data[1],
418  (CUdeviceptr)out_frame->data[2], (CUdeviceptr)out_frame->data[3]
419  };
420 
421  void *args_uchar[] = {
422  &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
423  &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
424  &dst_width, &dst_height, &dst_pitch,
425  &src_left, &src_top, &src_width, &src_height, &s->param
426  };
427 
428  return CHECK_CU(cu->cuLaunchKernel(func,
429  DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
430  BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));
431 }
432 
434  AVFrame *out, AVFrame *in)
435 {
436  CUDAScaleContext *s = ctx->priv;
437  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
438  CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
439  int i, ret;
440 
441  CUtexObject tex[4] = { 0, 0, 0, 0 };
442 
443  int crop_width = (in->width - in->crop_right) - in->crop_left;
444  int crop_height = (in->height - in->crop_bottom) - in->crop_top;
445 
446  ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
447  if (ret < 0)
448  return ret;
449 
450  for (i = 0; i < s->in_planes; i++) {
451  CUDA_TEXTURE_DESC tex_desc = {
452  .filterMode = s->interp_use_linear ?
453  CU_TR_FILTER_MODE_LINEAR :
454  CU_TR_FILTER_MODE_POINT,
455  .flags = s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0,
456  };
457 
458  CUDA_RESOURCE_DESC res_desc = {
459  .resType = CU_RESOURCE_TYPE_PITCH2D,
460  .res.pitch2D.format = s->in_plane_depths[i] <= 8 ?
461  CU_AD_FORMAT_UNSIGNED_INT8 :
462  CU_AD_FORMAT_UNSIGNED_INT16,
463  .res.pitch2D.numChannels = s->in_plane_channels[i],
464  .res.pitch2D.pitchInBytes = in->linesize[i],
465  .res.pitch2D.devPtr = (CUdeviceptr)in->data[i],
466  };
467 
468  if (i == 1 || i == 2) {
469  res_desc.res.pitch2D.width = AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w);
470  res_desc.res.pitch2D.height = AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h);
471  } else {
472  res_desc.res.pitch2D.width = in->width;
473  res_desc.res.pitch2D.height = in->height;
474  }
475 
476  ret = CHECK_CU(cu->cuTexObjectCreate(&tex[i], &res_desc, &tex_desc, NULL));
477  if (ret < 0)
478  goto exit;
479  }
480 
481  // scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
482  ret = call_resize_kernel(ctx, s->cu_func,
483  tex, in->crop_left, in->crop_top, crop_width, crop_height,
484  out, out->width, out->height, out->linesize[0]);
485  if (ret < 0)
486  goto exit;
487 
488  if (s->out_planes > 1) {
489  // scale UV plane. Scale function sets both U and V plane, or singular interleaved plane.
490  ret = call_resize_kernel(ctx, s->cu_func_uv, tex,
491  AV_CEIL_RSHIFT(in->crop_left, s->in_desc->log2_chroma_w),
492  AV_CEIL_RSHIFT(in->crop_top, s->in_desc->log2_chroma_h),
493  AV_CEIL_RSHIFT(crop_width, s->in_desc->log2_chroma_w),
494  AV_CEIL_RSHIFT(crop_height, s->in_desc->log2_chroma_h),
495  out,
496  AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w),
497  AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h),
498  out->linesize[1]);
499  if (ret < 0)
500  goto exit;
501  }
502 
503 exit:
504  for (i = 0; i < s->in_planes; i++)
505  if (tex[i])
506  CHECK_CU(cu->cuTexObjectDestroy(tex[i]));
507 
508  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
509 
510  return ret;
511 }
512 
514 {
515  CUDAScaleContext *s = ctx->priv;
516  AVFilterLink *outlink = ctx->outputs[0];
517  AVFrame *src = in;
518  int ret;
519 
520  ret = scalecuda_resize(ctx, s->frame, src);
521  if (ret < 0)
522  return ret;
523 
524  src = s->frame;
525  ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
526  if (ret < 0)
527  return ret;
528 
529  av_frame_move_ref(out, s->frame);
530  av_frame_move_ref(s->frame, s->tmp_frame);
531 
532  s->frame->width = outlink->w;
533  s->frame->height = outlink->h;
534 
535  ret = av_frame_copy_props(out, in);
536  if (ret < 0)
537  return ret;
538 
539  return 0;
540 }
541 
543 {
544  AVFilterContext *ctx = link->dst;
545  CUDAScaleContext *s = ctx->priv;
546  AVFilterLink *outlink = ctx->outputs[0];
547  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
548 
549  AVFrame *out = NULL;
550  CUcontext dummy;
551  int ret = 0;
552 
553  if (s->passthrough)
554  return ff_filter_frame(outlink, in);
555 
556  out = av_frame_alloc();
557  if (!out) {
558  ret = AVERROR(ENOMEM);
559  goto fail;
560  }
561 
562  ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
563  if (ret < 0)
564  goto fail;
565 
566  ret = cudascale_scale(ctx, out, in);
567 
568  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
569  if (ret < 0)
570  goto fail;
571 
572  av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
573  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
574  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
575  INT_MAX);
576 
577  av_frame_free(&in);
578  return ff_filter_frame(outlink, out);
579 fail:
580  av_frame_free(&in);
581  av_frame_free(&out);
582  return ret;
583 }
584 
586 {
587  CUDAScaleContext *s = inlink->dst->priv;
588 
589  return s->passthrough ?
592 }
593 
594 #define OFFSET(x) offsetof(CUDAScaleContext, x)
595 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
596 static const AVOption options[] = {
597  { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS },
598  { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS },
599  { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, .unit = "interp_algo" },
600  { "nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, .unit = "interp_algo" },
601  { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, .unit = "interp_algo" },
602  { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, .unit = "interp_algo" },
603  { "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS }, 0, 0, FLAGS, .unit = "interp_algo" },
604  { "format", "Output video pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, INT_MIN, INT_MAX, .flags=FLAGS },
605  { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
606  { "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS },
607  { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, .unit = "force_oar" },
608  { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, .unit = "force_oar" },
609  { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, .unit = "force_oar" },
610  { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, .unit = "force_oar" },
611  { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS },
612  { NULL },
613 };
614 
615 static const AVClass cudascale_class = {
616  .class_name = "cudascale",
617  .item_name = av_default_item_name,
618  .option = options,
619  .version = LIBAVUTIL_VERSION_INT,
620 };
621 
622 static const AVFilterPad cudascale_inputs[] = {
623  {
624  .name = "default",
625  .type = AVMEDIA_TYPE_VIDEO,
626  .filter_frame = cudascale_filter_frame,
627  .get_buffer.video = cudascale_get_video_buffer,
628  },
629 };
630 
631 static const AVFilterPad cudascale_outputs[] = {
632  {
633  .name = "default",
634  .type = AVMEDIA_TYPE_VIDEO,
635  .config_props = cudascale_config_props,
636  },
637 };
638 
640  .name = "scale_cuda",
641  .description = NULL_IF_CONFIG_SMALL("GPU accelerated video resizer"),
642 
643  .init = cudascale_init,
644  .uninit = cudascale_uninit,
645 
646  .priv_size = sizeof(CUDAScaleContext),
647  .priv_class = &cudascale_class,
648 
651 
653 
654  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
655 };
options
static const AVOption options[]
Definition: vf_scale_cuda.c:596
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
AVHWDeviceContext::hwctx
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:85
AV_PIX_FMT_CUDA
@ AV_PIX_FMT_CUDA
HW acceleration through CUDA.
Definition: pixfmt.h:260
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
CUDAScaleContext::frames_ctx
AVBufferRef * frames_ctx
Definition: vf_scale_cuda.c:83
hwcontext_cuda_internal.h
cudascale_init
static av_cold int cudascale_init(AVFilterContext *ctx)
Definition: vf_scale_cuda.c:113
out
FILE * out
Definition: movenc.c:55
AV_PIX_FMT_BGR32
#define AV_PIX_FMT_BGR32
Definition: pixfmt.h:477
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1062
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3170
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
AVHWFramesContext::format
enum AVPixelFormat format
The pixel format identifying the underlying HW surface type.
Definition: hwcontext.h:197
ff_cuda_load_module
int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx, CUmodule *cu_module, const unsigned char *data, const unsigned int length)
Loads a CUDA module and applies any decompression, if necessary.
Definition: load_helper.c:35
int64_t
long long int64_t
Definition: coverity.c:34
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
CUDAScaleContext::tmp_frame
AVFrame * tmp_frame
Definition: vf_scale_cuda.c:86
CUDAScaleContext::passthrough
int passthrough
Definition: vf_scale_cuda.c:87
cudascale_uninit
static av_cold void cudascale_uninit(AVFilterContext *ctx)
Definition: vf_scale_cuda.c:128
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:162
av_hwframe_ctx_init
int av_hwframe_ctx_init(AVBufferRef *ref)
Finalize the context before use.
Definition: hwcontext.c:322
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
CUDAScaleContext::w_expr
char * w_expr
width expression string
Definition: vf_scale_cuda.c:94
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:389
pixdesc.h
AVFrame::width
int width
Definition: frame.h:461
w
uint8_t w
Definition: llviddspenc.c:38
av_hwframe_ctx_alloc
AVBufferRef * av_hwframe_ctx_alloc(AVBufferRef *device_ref_in)
Allocate an AVHWFramesContext tied to a given device context.
Definition: hwcontext.c:248
AVOption
AVOption.
Definition: opt.h:429
init_hwframe_ctx
static av_cold int init_hwframe_ctx(CUDAScaleContext *s, AVBufferRef *device_ctx, int width, int height)
Definition: vf_scale_cuda.c:147
call_resize_kernel
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, CUtexObject src_tex[4], int src_left, int src_top, int src_width, int src_height, AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch)
Definition: vf_scale_cuda.c:409
CUDAScaleContext::interp_use_linear
int interp_use_linear
Definition: vf_scale_cuda.c:107
FLAGS
#define FLAGS
Definition: vf_scale_cuda.c:595
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:225
ff_scale_eval_dimensions
int ff_scale_eval_dimensions(void *log_ctx, const char *w_expr, const char *h_expr, AVFilterLink *inlink, AVFilterLink *outlink, int *ret_w, int *ret_h)
Parse and evaluate string expressions for width and height.
Definition: scale_eval.c:57
float.h
DIV_UP
#define DIV_UP(a, b)
Definition: vf_scale_cuda.c:55
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
av_buffer_ref
AVBufferRef * av_buffer_ref(const AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:103
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:205
AVHWFramesContext::width
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:217
video.h
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:410
CUDAScaleContext::frame
AVFrame * frame
Definition: vf_scale_cuda.c:84
ff_default_get_video_buffer
AVFrame * ff_default_get_video_buffer(AVFilterLink *link, int w, int h)
Definition: video.c:111
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3210
CUDAScaleContext::cu_func_uv
CUfunction cu_func_uv
Definition: vf_scale_cuda.c:103
vf_scale_cuda.h
fail
#define fail()
Definition: checkasm.h:189
CHECK_CU
#define CHECK_CU(x)
Definition: vf_scale_cuda.c:59
CUDAScaleContext::in_planes
int in_planes
Definition: vf_scale_cuda.c:79
dummy
int dummy
Definition: motion.c:66
scalecuda_resize
static int scalecuda_resize(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
Definition: vf_scale_cuda.c:433
INTERP_ALGO_LANCZOS
@ INTERP_ALGO_LANCZOS
Definition: vf_scale_cuda.c:67
CUDAScaleContext::in_plane_channels
int in_plane_channels[4]
Definition: vf_scale_cuda.c:81
av_reduce
int av_reduce(int *dst_num, int *dst_den, int64_t num, int64_t den, int64_t max)
Reduce a fraction.
Definition: rational.c:35
AVRational::num
int num
Numerator.
Definition: rational.h:59
cudascale_load_functions
static av_cold int cudascale_load_functions(AVFilterContext *ctx)
Definition: vf_scale_cuda.c:278
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:38
av_frame_alloc
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:150
cudascale_class
static const AVClass cudascale_class
Definition: vf_scale_cuda.c:615
cudascale_config_props
static av_cold int cudascale_config_props(AVFilterLink *outlink)
Definition: vf_scale_cuda.c:349
CUDAScaleContext::hwctx
AVCUDADeviceContext * hwctx
Definition: vf_scale_cuda.c:75
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:209
set_format_info
static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat in_format, enum AVPixelFormat out_format)
Definition: vf_scale_cuda.c:194
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
AVHWFramesContext::height
int height
Definition: hwcontext.h:217
CUDAScaleContext::interp_as_integer
int interp_as_integer
Definition: vf_scale_cuda.c:108
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_PIX_FMT_YUV444P16
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:515
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
format
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate. The lists are not just lists
AV_PIX_FMT_0BGR32
#define AV_PIX_FMT_0BGR32
Definition: pixfmt.h:480
CUDAScaleContext::cu_stream
CUstream cu_stream
Definition: vf_scale_cuda.c:104
filters.h
ctx
AVFormatContext * ctx
Definition: movenc.c:49
load_helper.h
AVFrame::crop_right
size_t crop_right
Definition: frame.h:769
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
format_is_supported
static int format_is_supported(enum AVPixelFormat fmt)
Definition: vf_scale_cuda.c:184
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
CUDAScaleContext::param
float param
Definition: vf_scale_cuda.c:110
if
if(ret)
Definition: filter_design.txt:179
CUDAScaleContext::force_divisible_by
int force_divisible_by
Definition: vf_scale_cuda.c:98
LIBAVUTIL_VERSION_INT
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
CUDAScaleContext::interp_algo
int interp_algo
Definition: vf_scale_cuda.c:106
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:75
OFFSET
#define OFFSET(x)
Definition: vf_scale_cuda.c:594
NULL
#define NULL
Definition: coverity.c:32
AVHWFramesContext::sw_format
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:210
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:725
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
CUDAScaleContext::out_desc
const AVPixFmtDescriptor * out_desc
Definition: vf_scale_cuda.c:78
CUDAScaleContext::h_expr
char * h_expr
height expression string
Definition: vf_scale_cuda.c:95
BLOCKY
#define BLOCKY
Definition: vf_scale_cuda.c:57
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
AVHWFramesContext::device_ref
AVBufferRef * device_ref
A reference to the parent AVHWDeviceContext.
Definition: hwcontext.h:126
AVFilterContext::inputs
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:465
av_default_item_name
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:237
ff_filter_link
static FilterLink * ff_filter_link(AVFilterLink *link)
Definition: filters.h:197
AVFrame::crop_bottom
size_t crop_bottom
Definition: frame.h:767
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: filters.h:206
cudascale_inputs
static const AVFilterPad cudascale_inputs[]
Definition: vf_scale_cuda.c:622
AVFrame::crop_left
size_t crop_left
Definition: frame.h:768
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
height
#define height
Definition: dsp.h:85
CUDAScaleContext::cu_func
CUfunction cu_func
Definition: vf_scale_cuda.c:102
scale_eval.h
AV_PIX_FMT_RGB32
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:475
CUDAScaleContext::cu_module
CUmodule cu_module
Definition: vf_scale_cuda.c:101
ff_null_get_video_buffer
AVFrame * ff_null_get_video_buffer(AVFilterLink *link, int w, int h)
Definition: video.c:44
CUDAScaleContext
Definition: vf_scale_cuda.c:72
AV_OPT_TYPE_FLOAT
@ AV_OPT_TYPE_FLOAT
Underlying C type is float.
Definition: opt.h:271
supported_formats
static enum AVPixelFormat supported_formats[]
Definition: vf_scale_cuda.c:42
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
INTERP_ALGO_BICUBIC
@ INTERP_ALGO_BICUBIC
Definition: vf_scale_cuda.c:66
internal.h
common.h
av_frame_move_ref
void av_frame_move_ref(AVFrame *dst, AVFrame *src)
Move everything contained in src to dst and reset src.
Definition: frame.c:649
av_frame_unref
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:622
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:44
AV_PIX_FMT_P016
#define AV_PIX_FMT_P016
Definition: pixfmt.h:554
CUDAScaleContext::in_plane_depths
int in_plane_depths[4]
Definition: vf_scale_cuda.c:80
AVFilter
Filter definition.
Definition: avfilter.h:201
AVHWFramesContext
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:115
AVCUDADeviceContext
This struct is allocated as AVHWDeviceContext.hwctx.
Definition: hwcontext_cuda.h:42
BLOCKX
#define BLOCKX
Definition: vf_scale_cuda.c:56
CUDAScaleContext::out_fmt
enum AVPixelFormat in_fmt out_fmt
Definition: vf_scale_cuda.c:77
ret
ret
Definition: filter_design.txt:187
AV_LOG_FATAL
#define AV_LOG_FATAL
Something went wrong and recovery is not possible.
Definition: log.h:203
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
AVClass::class_name
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:80
AV_PIX_FMT_0RGB32
#define AV_PIX_FMT_0RGB32
Definition: pixfmt.h:479
AVHWFramesContext::device_ctx
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:134
cuda_check.h
INTERP_ALGO_NEAREST
@ INTERP_ALGO_NEAREST
Definition: vf_scale_cuda.c:64
INTERP_ALGO_DEFAULT
@ INTERP_ALGO_DEFAULT
Definition: vf_scale_cuda.c:62
AVFrame::sample_aspect_ratio
AVRational sample_aspect_ratio
Sample aspect ratio for the video frame, 0/1 if unknown/unspecified.
Definition: frame.h:496
ff_scale_adjust_dimensions
int ff_scale_adjust_dimensions(AVFilterLink *inlink, int *ret_w, int *ret_h, int force_original_aspect_ratio, int force_divisible_by)
Transform evaluated width and height obtained from ff_scale_eval_dimensions into actual target width ...
Definition: scale_eval.c:113
AVFrame::height
int height
Definition: frame.h:461
cudascale_get_video_buffer
static AVFrame * cudascale_get_video_buffer(AVFilterLink *inlink, int w, int h)
Definition: vf_scale_cuda.c:585
AVRational::den
int den
Denominator.
Definition: rational.h:60
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
avfilter.h
INTERP_ALGO_COUNT
@ INTERP_ALGO_COUNT
Definition: vf_scale_cuda.c:69
AV_OPT_TYPE_PIXEL_FMT
@ AV_OPT_TYPE_PIXEL_FMT
Underlying C type is enum AVPixelFormat.
Definition: opt.h:307
av_mul_q
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
AVFilterContext
An instance of a filter.
Definition: avfilter.h:457
AV_PIX_FMT_P010
#define AV_PIX_FMT_P010
Definition: pixfmt.h:552
cudascale_outputs
static const AVFilterPad cudascale_outputs[]
Definition: vf_scale_cuda.c:631
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
cudascale_scale
static int cudascale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
Definition: vf_scale_cuda.c:513
cudascale_filter_frame
static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
Definition: vf_scale_cuda.c:542
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
CUDAScaleContext::force_original_aspect_ratio
int force_original_aspect_ratio
Definition: vf_scale_cuda.c:97
CUDAScaleContext::format
enum AVPixelFormat format
Output sw format.
Definition: vf_scale_cuda.c:92
INTERP_ALGO_BILINEAR
@ INTERP_ALGO_BILINEAR
Definition: vf_scale_cuda.c:65
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Underlying C type is int.
Definition: opt.h:327
AVFrame::crop_top
size_t crop_top
Definition: frame.h:766
SCALE_CUDA_PARAM_DEFAULT
#define SCALE_CUDA_PARAM_DEFAULT
Definition: vf_scale_cuda.h:26
CUDAScaleContext::in_desc
const AVPixFmtDescriptor * in_desc
Definition: vf_scale_cuda.c:78
init_processing_chain
static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height, int out_width, int out_height)
Definition: vf_scale_cuda.c:220
hwcontext.h
AVERROR_BUG
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:52
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, a positive or negative value, which is typically indicating the size in bytes of each pict...
Definition: frame.h:434
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
h
h
Definition: vp9dsp_template.c:2070
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Underlying C type is a uint8_t* that is either NULL or points to a C string allocated with the av_mal...
Definition: opt.h:276
width
#define width
Definition: dsp.h:85
av_hwframe_get_buffer
int av_hwframe_get_buffer(AVBufferRef *hwframe_ref, AVFrame *frame, int flags)
Allocate a new frame attached to the given AVHWFramesContext.
Definition: hwcontext.c:491
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Special option type for declaring named constants.
Definition: opt.h:299
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: filters.h:252
snprintf
#define snprintf
Definition: snprintf.h:34
src
#define src
Definition: vp8dsp.c:248
ff_vf_scale_cuda
const AVFilter ff_vf_scale_cuda
Definition: vf_scale_cuda.c:639
av_get_pix_fmt_name
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:3090
CUDAScaleContext::out_planes
int out_planes
Definition: vf_scale_cuda.c:79
CUDAScaleContext::cu_ctx
CUcontext cu_ctx
Definition: vf_scale_cuda.c:100