FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
unsharp_opencl.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
3  * Copyright (C) 2013 Lenny Wang
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * unsharp input video
25  */
26 
27 #include "unsharp_opencl.h"
28 #include "libavutil/common.h"
30 
31 #define PLANE_NUM 3
32 #define ROUND_TO_16(a) (((((a) - 1)/16)+1)*16)
33 
34 static inline void add_mask_counter(uint32_t *dst, uint32_t *counter1, uint32_t *counter2, int len)
35 {
36  int i;
37  for (i = 0; i < len; i++) {
38  dst[i] = counter1[i] + counter2[i];
39  }
40 }
41 
42 static int compute_mask(int step, uint32_t *mask)
43 {
44  int i, z, ret = 0;
45  int counter_size = sizeof(uint32_t) * (2 * step + 1);
46  uint32_t *temp1_counter, *temp2_counter, **counter = NULL;
47  temp1_counter = av_mallocz(counter_size);
48  if (!temp1_counter) {
49  ret = AVERROR(ENOMEM);
50  goto end;
51  }
52  temp2_counter = av_mallocz(counter_size);
53  if (!temp2_counter) {
54  ret = AVERROR(ENOMEM);
55  goto end;
56  }
57  counter = av_mallocz_array(2 * step + 1, sizeof(uint32_t *));
58  if (!counter) {
59  ret = AVERROR(ENOMEM);
60  goto end;
61  }
62  for (i = 0; i < 2 * step + 1; i++) {
63  counter[i] = av_mallocz(counter_size);
64  if (!counter[i]) {
65  ret = AVERROR(ENOMEM);
66  goto end;
67  }
68  }
69  for (i = 0; i < 2 * step + 1; i++) {
70  memset(temp1_counter, 0, counter_size);
71  temp1_counter[i] = 1;
72  for (z = 0; z < step * 2; z += 2) {
73  add_mask_counter(temp2_counter, counter[z], temp1_counter, step * 2);
74  memcpy(counter[z], temp1_counter, counter_size);
75  add_mask_counter(temp1_counter, counter[z + 1], temp2_counter, step * 2);
76  memcpy(counter[z + 1], temp2_counter, counter_size);
77  }
78  }
79  memcpy(mask, temp1_counter, counter_size);
80 end:
81  av_freep(&temp1_counter);
82  av_freep(&temp2_counter);
83  for (i = 0; counter && i < 2 * step + 1; i++) {
84  av_freep(&counter[i]);
85  }
86  av_freep(&counter);
87  return ret;
88 }
89 
90 static int copy_separable_masks(cl_mem cl_mask_x, cl_mem cl_mask_y, int step_x, int step_y)
91 {
92  int ret = 0;
93  uint32_t *mask_x, *mask_y;
94  size_t size_mask_x = sizeof(uint32_t) * (2 * step_x + 1);
95  size_t size_mask_y = sizeof(uint32_t) * (2 * step_y + 1);
96  mask_x = av_mallocz_array(2 * step_x + 1, sizeof(uint32_t));
97  if (!mask_x) {
98  ret = AVERROR(ENOMEM);
99  goto end;
100  }
101  mask_y = av_mallocz_array(2 * step_y + 1, sizeof(uint32_t));
102  if (!mask_y) {
103  ret = AVERROR(ENOMEM);
104  goto end;
105  }
106 
107  ret = compute_mask(step_x, mask_x);
108  if (ret < 0)
109  goto end;
110  ret = compute_mask(step_y, mask_y);
111  if (ret < 0)
112  goto end;
113 
114  ret = av_opencl_buffer_write(cl_mask_x, (uint8_t *)mask_x, size_mask_x);
115  ret = av_opencl_buffer_write(cl_mask_y, (uint8_t *)mask_y, size_mask_y);
116 end:
117  av_freep(&mask_x);
118  av_freep(&mask_y);
119 
120  return ret;
121 }
122 
124 {
125  cl_mem masks[4];
126  cl_mem mask_matrix[2];
127  int i, ret = 0, step_x[2], step_y[2];
128 
129  UnsharpContext *unsharp = ctx->priv;
130  mask_matrix[0] = unsharp->opencl_ctx.cl_luma_mask;
131  mask_matrix[1] = unsharp->opencl_ctx.cl_chroma_mask;
132  masks[0] = unsharp->opencl_ctx.cl_luma_mask_x;
133  masks[1] = unsharp->opencl_ctx.cl_luma_mask_y;
134  masks[2] = unsharp->opencl_ctx.cl_chroma_mask_x;
135  masks[3] = unsharp->opencl_ctx.cl_chroma_mask_y;
136  step_x[0] = unsharp->luma.steps_x;
137  step_x[1] = unsharp->chroma.steps_x;
138  step_y[0] = unsharp->luma.steps_y;
139  step_y[1] = unsharp->chroma.steps_y;
140 
141  /* use default kernel if any matrix dim larger than 8 due to limited local mem size */
142  if (step_x[0]>8 || step_x[1]>8 || step_y[0]>8 || step_y[1]>8)
143  unsharp->opencl_ctx.use_fast_kernels = 0;
144  else
145  unsharp->opencl_ctx.use_fast_kernels = 1;
146 
147  if (!masks[0] || !masks[1] || !masks[2] || !masks[3]) {
148  av_log(ctx, AV_LOG_ERROR, "Luma mask and chroma mask should not be NULL\n");
149  return AVERROR(EINVAL);
150  }
151  if (!mask_matrix[0] || !mask_matrix[1]) {
152  av_log(ctx, AV_LOG_ERROR, "Luma mask and chroma mask should not be NULL\n");
153  return AVERROR(EINVAL);
154  }
155  for (i = 0; i < 2; i++) {
156  ret = copy_separable_masks(masks[2*i], masks[2*i+1], step_x[i], step_y[i]);
157  if (ret < 0)
158  return ret;
159  }
160  return ret;
161 }
162 
164 {
165  int ret;
166  AVFilterLink *link = ctx->inputs[0];
167  UnsharpContext *unsharp = ctx->priv;
168  cl_int status;
169  FFOpenclParam kernel1 = {0};
170  FFOpenclParam kernel2 = {0};
171  int width = link->w;
172  int height = link->h;
173  int cw = AV_CEIL_RSHIFT(link->w, unsharp->hsub);
174  int ch = AV_CEIL_RSHIFT(link->h, unsharp->vsub);
175  size_t globalWorkSize1d = width * height + 2 * ch * cw;
176  size_t globalWorkSize2dLuma[2];
177  size_t globalWorkSize2dChroma[2];
178  size_t localWorkSize2d[2] = {16, 16};
179 
180  if (unsharp->opencl_ctx.use_fast_kernels) {
181  globalWorkSize2dLuma[0] = (size_t)ROUND_TO_16(width);
182  globalWorkSize2dLuma[1] = (size_t)ROUND_TO_16(height);
183  globalWorkSize2dChroma[0] = (size_t)ROUND_TO_16(cw);
184  globalWorkSize2dChroma[1] = (size_t)(2*ROUND_TO_16(ch));
185 
186  kernel1.ctx = ctx;
187  kernel1.kernel = unsharp->opencl_ctx.kernel_luma;
188  ret = avpriv_opencl_set_parameter(&kernel1,
189  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
190  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
191  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask_x),
192  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask_y),
193  FF_OPENCL_PARAM_INFO(unsharp->luma.amount),
198  FF_OPENCL_PARAM_INFO(width),
199  FF_OPENCL_PARAM_INFO(height),
200  NULL);
201  if (ret < 0)
202  return ret;
203 
204  kernel2.ctx = ctx;
205  kernel2.kernel = unsharp->opencl_ctx.kernel_chroma;
206  ret = avpriv_opencl_set_parameter(&kernel2,
207  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
208  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
209  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask_x),
210  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask_y),
218  FF_OPENCL_PARAM_INFO(link->w),
219  FF_OPENCL_PARAM_INFO(link->h),
222  NULL);
223  if (ret < 0)
224  return ret;
225  status = clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
226  unsharp->opencl_ctx.kernel_luma, 2, NULL,
227  globalWorkSize2dLuma, localWorkSize2d, 0, NULL, NULL);
228  status |=clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
229  unsharp->opencl_ctx.kernel_chroma, 2, NULL,
230  globalWorkSize2dChroma, localWorkSize2d, 0, NULL, NULL);
231  if (status != CL_SUCCESS) {
232  av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
233  return AVERROR_EXTERNAL;
234  }
235  } else { /* use default kernel */
236  kernel1.ctx = ctx;
237  kernel1.kernel = unsharp->opencl_ctx.kernel_default;
238 
239  ret = avpriv_opencl_set_parameter(&kernel1,
240  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
241  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
242  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask),
243  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask),
244  FF_OPENCL_PARAM_INFO(unsharp->luma.amount),
258  FF_OPENCL_PARAM_INFO(link->h),
259  FF_OPENCL_PARAM_INFO(link->w),
262  NULL);
263  if (ret < 0)
264  return ret;
265  status = clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
266  unsharp->opencl_ctx.kernel_default, 1, NULL,
267  &globalWorkSize1d, NULL, 0, NULL, NULL);
268  if (status != CL_SUCCESS) {
269  av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
270  return AVERROR_EXTERNAL;
271  }
272  }
273  //blocking map is suffficient, no need for clFinish
274  //clFinish(unsharp->opencl_ctx.command_queue);
275 
276  return av_opencl_buffer_read_image(out->data, unsharp->opencl_ctx.out_plane_size,
277  unsharp->opencl_ctx.plane_num, unsharp->opencl_ctx.cl_outbuf,
278  unsharp->opencl_ctx.cl_outbuf_size);
279 }
280 
282 {
283  int ret = 0;
284  char build_opts[96];
285  UnsharpContext *unsharp = ctx->priv;
286  ret = av_opencl_init(NULL);
287  if (ret < 0)
288  return ret;
289  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_luma_mask,
290  sizeof(uint32_t) * (2 * unsharp->luma.steps_x + 1) * (2 * unsharp->luma.steps_y + 1),
291  CL_MEM_READ_ONLY, NULL);
292  if (ret < 0)
293  return ret;
294  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_chroma_mask,
295  sizeof(uint32_t) * (2 * unsharp->chroma.steps_x + 1) * (2 * unsharp->chroma.steps_y + 1),
296  CL_MEM_READ_ONLY, NULL);
297  // separable filters
298  if (ret < 0)
299  return ret;
300  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_luma_mask_x,
301  sizeof(uint32_t) * (2 * unsharp->luma.steps_x + 1),
302  CL_MEM_READ_ONLY, NULL);
303  if (ret < 0)
304  return ret;
305  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_luma_mask_y,
306  sizeof(uint32_t) * (2 * unsharp->luma.steps_y + 1),
307  CL_MEM_READ_ONLY, NULL);
308  if (ret < 0)
309  return ret;
310  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_chroma_mask_x,
311  sizeof(uint32_t) * (2 * unsharp->chroma.steps_x + 1),
312  CL_MEM_READ_ONLY, NULL);
313  if (ret < 0)
314  return ret;
315  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_chroma_mask_y,
316  sizeof(uint32_t) * (2 * unsharp->chroma.steps_y + 1),
317  CL_MEM_READ_ONLY, NULL);
318  if (ret < 0)
319  return ret;
320  ret = generate_mask(ctx);
321  if (ret < 0)
322  return ret;
323  unsharp->opencl_ctx.plane_num = PLANE_NUM;
324  unsharp->opencl_ctx.command_queue = av_opencl_get_command_queue();
325  if (!unsharp->opencl_ctx.command_queue) {
326  av_log(ctx, AV_LOG_ERROR, "Unable to get OpenCL command queue in filter 'unsharp'\n");
327  return AVERROR(EINVAL);
328  }
329  snprintf(build_opts, 96, "-D LU_RADIUS_X=%d -D LU_RADIUS_Y=%d -D CH_RADIUS_X=%d -D CH_RADIUS_Y=%d",
330  2*unsharp->luma.steps_x+1, 2*unsharp->luma.steps_y+1, 2*unsharp->chroma.steps_x+1, 2*unsharp->chroma.steps_y+1);
331  unsharp->opencl_ctx.program = av_opencl_compile("unsharp", build_opts);
332  if (!unsharp->opencl_ctx.program) {
333  av_log(ctx, AV_LOG_ERROR, "OpenCL failed to compile program 'unsharp'\n");
334  return AVERROR(EINVAL);
335  }
336  if (unsharp->opencl_ctx.use_fast_kernels) {
337  if (!unsharp->opencl_ctx.kernel_luma) {
338  unsharp->opencl_ctx.kernel_luma = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_luma", &ret);
339  if (ret != CL_SUCCESS) {
340  av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_luma'\n");
341  return ret;
342  }
343  }
344  if (!unsharp->opencl_ctx.kernel_chroma) {
345  unsharp->opencl_ctx.kernel_chroma = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_chroma", &ret);
346  if (ret < 0) {
347  av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_chroma'\n");
348  return ret;
349  }
350  }
351  }
352  else {
353  if (!unsharp->opencl_ctx.kernel_default) {
354  unsharp->opencl_ctx.kernel_default = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_default", &ret);
355  if (ret < 0) {
356  av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_default'\n");
357  return ret;
358  }
359  }
360  }
361  return ret;
362 }
363 
365 {
366  UnsharpContext *unsharp = ctx->priv;
367  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_inbuf);
368  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_outbuf);
369  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_luma_mask);
370  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_chroma_mask);
371  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_luma_mask_x);
372  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_chroma_mask_x);
373  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_luma_mask_y);
374  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_chroma_mask_y);
375  clReleaseKernel(unsharp->opencl_ctx.kernel_default);
376  clReleaseKernel(unsharp->opencl_ctx.kernel_luma);
377  clReleaseKernel(unsharp->opencl_ctx.kernel_chroma);
378  clReleaseProgram(unsharp->opencl_ctx.program);
379  unsharp->opencl_ctx.command_queue = NULL;
381 }
382 
384 {
385  int ret = 0;
386  AVFilterLink *link = ctx->inputs[0];
387  UnsharpContext *unsharp = ctx->priv;
388  int ch = AV_CEIL_RSHIFT(link->h, unsharp->vsub);
389 
390  if ((!unsharp->opencl_ctx.cl_inbuf) || (!unsharp->opencl_ctx.cl_outbuf)) {
391  unsharp->opencl_ctx.in_plane_size[0] = (in->linesize[0] * in->height);
392  unsharp->opencl_ctx.in_plane_size[1] = (in->linesize[1] * ch);
393  unsharp->opencl_ctx.in_plane_size[2] = (in->linesize[2] * ch);
394  unsharp->opencl_ctx.out_plane_size[0] = (out->linesize[0] * out->height);
395  unsharp->opencl_ctx.out_plane_size[1] = (out->linesize[1] * ch);
396  unsharp->opencl_ctx.out_plane_size[2] = (out->linesize[2] * ch);
397  unsharp->opencl_ctx.cl_inbuf_size = unsharp->opencl_ctx.in_plane_size[0] +
398  unsharp->opencl_ctx.in_plane_size[1] +
399  unsharp->opencl_ctx.in_plane_size[2];
400  unsharp->opencl_ctx.cl_outbuf_size = unsharp->opencl_ctx.out_plane_size[0] +
401  unsharp->opencl_ctx.out_plane_size[1] +
402  unsharp->opencl_ctx.out_plane_size[2];
403  if (!unsharp->opencl_ctx.cl_inbuf) {
404  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_inbuf,
405  unsharp->opencl_ctx.cl_inbuf_size,
406  CL_MEM_READ_ONLY, NULL);
407  if (ret < 0)
408  return ret;
409  }
410  if (!unsharp->opencl_ctx.cl_outbuf) {
411  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_outbuf,
412  unsharp->opencl_ctx.cl_outbuf_size,
413  CL_MEM_READ_WRITE, NULL);
414  if (ret < 0)
415  return ret;
416  }
417  }
418  return av_opencl_buffer_write_image(unsharp->opencl_ctx.cl_inbuf,
419  unsharp->opencl_ctx.cl_inbuf_size,
420  0, in->data, unsharp->opencl_ctx.in_plane_size,
421  unsharp->opencl_ctx.plane_num);
422 }
#define NULL
Definition: coverity.c:32
static const uint16_t mask_matrix[]
Definition: escape124.c:193
This structure describes decoded (raw) audio or video data.
Definition: frame.h:201
UnsharpFilterParam luma
luma parameters (width, height, amount)
Definition: unsharp.h:75
static int copy_separable_masks(cl_mem cl_mask_x, cl_mem cl_mask_y, int step_x, int step_y)
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:222
const char * av_opencl_errstr(cl_int status)
Get OpenCL error string.
Definition: opencl.c:159
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
int steps_x
horizontal step count
Definition: unsharp.h:64
void ff_opencl_unsharp_uninit(AVFilterContext *ctx)
uint8_t
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
uint8_t pi<< 24) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8,(uint64_t)((*(constuint8_t *) pi-0x80U))<< 56) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16,(uint64_t)(*(constint16_t *) pi)<< 48) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32,(uint64_t)(*(constint32_t *) pi)<< 32) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S64,(*(constint64_t *) pi >>56)+0x80) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S64,*(constint64_t *) pi *(1.0f/(INT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S64,*(constint64_t *) pi *(1.0/(INT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, llrintf(*(constfloat *) pi *(INT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(constdouble *) pi *(INT64_C(1)<< 63)))#defineFMT_PAIR_FUNC(out, in) staticconv_func_type *constfmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB *AV_SAMPLE_FMT_NB]={FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S64),};staticvoidcpy1(uint8_t **dst, constuint8_t **src, intlen){memcpy(*dst,*src, len);}staticvoidcpy2(uint8_t **dst, constuint8_t **src, intlen){memcpy(*dst,*src, 2 *len);}staticvoidcpy4(uint8_t **dst, constuint8_t **src, intlen){memcpy(*dst,*src, 4 *len);}staticvoidcpy8(uint8_t **dst, constuint8_t **src, intlen){memcpy(*dst,*src, 8 *len);}AudioConvert *swri_audio_convert_alloc(enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, constint *ch_map, intflags){AudioConvert *ctx;conv_func_type *f=fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt)+AV_SAMPLE_FMT_NB *av_get_packed_sample_fmt(in_fmt)];if(!f) returnNULL;ctx=av_mallocz(sizeof(*ctx));if(!ctx) returnNULL;if(channels==1){in_fmt=av_get_planar_sample_fmt(in_fmt);out_fmt=av_get_planar_sample_fmt(out_fmt);}ctx->channels=channels;ctx->conv_f=f;ctx->ch_map=ch_map;if(in_fmt==AV_SAMPLE_FMT_U8||in_fmt==AV_SAMPLE_FMT_U8P) memset(ctx->silence, 0x80, sizeof(ctx->silence));if(out_fmt==in_fmt &&!ch_map){switch(av_get_bytes_per_sample(in_fmt)){case1:ctx->simd_f=cpy1;break;case2:ctx->simd_f=cpy2;break;case4:ctx->simd_f=cpy4;break;case8:ctx->simd_f=cpy8;break;}}if(HAVE_X86ASM &&1) swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels);if(ARCH_ARM) swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels);if(ARCH_AARCH64) swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels);returnctx;}voidswri_audio_convert_free(AudioConvert **ctx){av_freep(ctx);}intswri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, intlen){intch;intoff=0;constintos=(out->planar?1:out->ch_count)*out->bps;unsignedmisaligned=0;av_assert0(ctx->channels==out->ch_count);if(ctx->in_simd_align_mask){intplanes=in->planar?in->ch_count:1;unsignedm=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) in->ch[ch];misaligned|=m &ctx->in_simd_align_mask;}if(ctx->out_simd_align_mask){intplanes=out->planar?out->ch_count:1;unsignedm=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) out->ch[ch];misaligned|=m &ctx->out_simd_align_mask;}if(ctx->simd_f &&!ctx->ch_map &&!misaligned){off=len &~15;av_assert1(off >=0);av_assert1(off<=len);av_assert2(ctx->channels==SWR_CH_MAX||!in->ch[ctx->channels]);if(off >0){if(out->planar==in->planar){intplanes=out->planar?out->ch_count:1;for(ch=0;ch< planes;ch++){ctx->simd_f(out-> ch ch
Definition: audioconvert.c:56
static void add_mask_counter(uint32_t *dst, uint32_t *counter1, uint32_t *counter2, int len)
#define height
#define av_log(a,...)
int ff_opencl_unsharp_init(AVFilterContext *ctx)
int scalebits
bits to shift pixel
Definition: unsharp.h:66
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int32_t halfscale
amount to add to pixel
Definition: unsharp.h:67
static const uint16_t mask[17]
Definition: lzw.c:38
#define AVERROR(e)
Definition: error.h:43
void * priv
private data for use by the filter
Definition: avfilter.h:353
#define ROUND_TO_16(a)
uint16_t width
Definition: gdv.c:47
static void * av_mallocz_array(size_t nmemb, size_t size)
Definition: mem.h:229
int avpriv_opencl_set_parameter(FFOpenclParam *opencl_param,...)
int amount
effect amount
Definition: unsharp.h:63
AVFormatContext * ctx
Definition: movenc.c:48
cl_program av_opencl_compile(const char *program_name, const char *build_opts)
compile specific OpenCL kernel source
Definition: opencl.c:443
int av_opencl_buffer_create(cl_mem *cl_buf, size_t cl_buf_size, int flags, void *host_ptr)
Create OpenCL buffer.
Definition: opencl.c:681
int ff_opencl_unsharp_process_inout_buf(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
int av_opencl_buffer_write_image(cl_mem dst_cl_buf, size_t cl_buffer_size, int dst_cl_offset, uint8_t **src_data, int *plane_size, int plane_num)
Write image data from memory to OpenCL buffer.
Definition: opencl.c:751
static int compute_mask(int step, uint32_t *mask)
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:232
int steps_y
vertical step count
Definition: unsharp.h:65
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
cl_kernel kernel
#define PLANE_NUM
UnsharpFilterParam chroma
chroma parameters (width, height, amount)
Definition: unsharp.h:76
void av_opencl_buffer_release(cl_mem *cl_buf)
Release OpenCL buffer.
Definition: opencl.c:692
#define snprintf
Definition: snprintf.h:34
cl_command_queue av_opencl_get_command_queue(void)
get OpenCL command queue
Definition: opencl.c:520
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:215
common internal and external API header
int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size)
Write OpenCL buffer with data from src_buf.
Definition: opencl.c:705
void av_opencl_uninit(void)
Release OpenCL environment.
Definition: opencl.c:645
#define FF_OPENCL_PARAM_INFO(a)
int len
int av_opencl_init(AVOpenCLExternalEnv *ext_opencl_env)
Initialize the run time OpenCL environment.
Definition: opencl.c:618
An instance of a filter.
Definition: avfilter.h:338
int height
Definition: frame.h:259
FILE * out
Definition: movenc.c:54
#define av_freep(p)
#define AVERROR_EXTERNAL
Generic error in an external library.
Definition: error.h:57
static int generate_mask(AVFilterContext *ctx)
int ff_opencl_apply_unsharp(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
int av_opencl_buffer_read_image(uint8_t **dst_data, int *plane_size, int plane_num, cl_mem src_cl_buf, size_t cl_buffer_size)
Read image data from OpenCL buffer.
Definition: opencl.c:792