FFmpeg
vf_nlmeans_vulkan.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/mem.h"
22 #include "libavutil/random_seed.h"
23 #include "libavutil/vulkan_spirv.h"
24 #include "libavutil/opt.h"
25 #include "vulkan_filter.h"
26 
27 #include "filters.h"
28 #include "video.h"
29 
30 #define TYPE_NAME "vec4"
31 #define TYPE_ELEMS 4
32 #define TYPE_SIZE (TYPE_ELEMS*4)
33 
34 typedef struct NLMeansVulkanContext {
36 
40  VkSampler sampler;
41 
44 
46 
50 
51  int *xoffsets;
52  int *yoffsets;
54  float strength[4];
55  int patch[4];
56 
57  struct nlmeans_opts {
58  int r;
59  double s;
60  double sc[4];
61  int p;
62  int pc[4];
63  int t;
64  } opts;
66 
67 static void insert_first(FFVulkanShader *shd, int r, const char *off, int horiz, int plane, int comp)
68 {
69  GLSLF(4, s1 = texture(input_img[%i], pos + ivec2(%i + %s, %i + %s))[%i];
70  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
71 
72  GLSLF(4, s2[0] = texture(input_img[%i], pos + offs[0] + ivec2(%i + %s, %i + %s))[%i];
73  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
74  GLSLF(4, s2[1] = texture(input_img[%i], pos + offs[1] + ivec2(%i + %s, %i + %s))[%i];
75  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
76  GLSLF(4, s2[2] = texture(input_img[%i], pos + offs[2] + ivec2(%i + %s, %i + %s))[%i];
77  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
78  GLSLF(4, s2[3] = texture(input_img[%i], pos + offs[3] + ivec2(%i + %s, %i + %s))[%i];
79  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
80 
81  GLSLC(4, s2 = (s1 - s2) * (s1 - s2); );
82 }
83 
84 static void insert_horizontal_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp)
85 {
86  GLSLF(1, pos.y = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
87  if (!first)
88  GLSLC(1, barrier(); );
89  GLSLC(0, );
90  GLSLF(1, if (pos.y < height[%i]) { ,plane);
91  GLSLC(2, #pragma unroll(1) );
92  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
93  GLSLC(3, prefix_sum = DTYPE(0); );
94  GLSLC(3, offset = int_stride * uint64_t(pos.y + r); );
95  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
96  GLSLC(0, );
97  GLSLF(3, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane);
98  if (first)
99  insert_first(shd, 0, "r", 0, plane, comp);
100  else
101  GLSLC(4, s2 = dst.v[pos.x]; );
102  GLSLC(4, dst.v[pos.x] = s2 + prefix_sum; );
103  GLSLC(4, prefix_sum += s2; );
104  GLSLC(3, } );
105  GLSLC(2, } );
106  GLSLC(1, } );
107  GLSLC(0, );
108 }
109 
110 static void insert_vertical_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp)
111 {
112  GLSLF(1, pos.x = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
113  GLSLC(1, #pragma unroll(1) );
114  GLSLF(1, for (r = 0; r < %i; r++) ,nb_rows);
115  GLSLC(2, psum[r] = DTYPE(0); );
116  GLSLC(0, );
117  if (!first)
118  GLSLC(1, barrier(); );
119  GLSLC(0, );
120  GLSLF(1, if (pos.x < width[%i]) { ,plane);
121  GLSLF(2, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane);
122  GLSLC(3, offset = int_stride * uint64_t(pos.y); );
123  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
124  GLSLC(0, );
125  GLSLC(3, #pragma unroll(1) );
126  GLSLF(3, for (r = 0; r < %i; r++) { ,nb_rows);
127  if (first)
128  insert_first(shd, 0, "r", 1, plane, comp);
129  else
130  GLSLC(4, s2 = dst.v[pos.x + r]; );
131  GLSLC(4, dst.v[pos.x + r] = s2 + psum[r]; );
132  GLSLC(4, psum[r] += s2; );
133  GLSLC(3, } );
134  GLSLC(2, } );
135  GLSLC(1, } );
136  GLSLC(0, );
137 }
138 
139 static void insert_weights_pass(FFVulkanShader *shd, int nb_rows, int vert,
140  int t, int dst_comp, int plane, int comp)
141 {
142  GLSLF(1, p = patch_size[%i]; ,dst_comp);
143  GLSLC(0, );
144  GLSLC(1, barrier(); );
145  GLSLC(0, );
146  if (!vert) {
147  GLSLF(1, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane);
148  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= width[%i]) ,nb_rows, plane);
149  GLSLC(3, break; );
150  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
151  GLSLF(3, pos.x = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
152  } else {
153  GLSLF(1, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane);
154  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= height[%i]) ,nb_rows, plane);
155  GLSLC(3, break; );
156  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
157  GLSLF(3, pos.y = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
158  }
159  GLSLC(0, );
160  GLSLC(3, a = DTYPE(0); );
161  GLSLC(3, b = DTYPE(0); );
162  GLSLC(3, c = DTYPE(0); );
163  GLSLC(3, d = DTYPE(0); );
164  GLSLC(0, );
165  GLSLC(3, lt = ((pos.x - p) < 0) || ((pos.y - p) < 0); );
166  GLSLC(0, );
167  GLSLF(3, src[0] = texture(input_img[%i], pos + offs[0])[%i]; ,plane, comp);
168  GLSLF(3, src[1] = texture(input_img[%i], pos + offs[1])[%i]; ,plane, comp);
169  GLSLF(3, src[2] = texture(input_img[%i], pos + offs[2])[%i]; ,plane, comp);
170  GLSLF(3, src[3] = texture(input_img[%i], pos + offs[3])[%i]; ,plane, comp);
171  GLSLC(0, );
172  GLSLC(3, if (lt == false) { );
173  GLSLC(3, offset = int_stride * uint64_t(pos.y - p); );
174  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
175  GLSLC(4, a = dst.v[pos.x - p]; );
176  GLSLC(4, c = dst.v[pos.x + p]; );
177  GLSLC(3, offset = int_stride * uint64_t(pos.y + p); );
178  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
179  GLSLC(4, b = dst.v[pos.x - p]; );
180  GLSLC(4, d = dst.v[pos.x + p]; );
181  GLSLC(3, } );
182  GLSLC(0, );
183  GLSLC(3, patch_diff = d + a - b - c; );
184  GLSLF(3, w = exp(patch_diff * strength[%i]); ,dst_comp);
185  GLSLC(3, w_sum = w[0] + w[1] + w[2] + w[3]; );
186  GLSLC(3, sum = dot(w, src*255); );
187  GLSLC(0, );
188  if (t > 1) {
189  GLSLF(3, atomicAdd(weights_%i[pos.y*ws_stride[%i] + pos.x], w_sum); ,dst_comp, dst_comp);
190  GLSLF(3, atomicAdd(sums_%i[pos.y*ws_stride[%i] + pos.x], sum); ,dst_comp, dst_comp);
191  } else {
192  GLSLF(3, weights_%i[pos.y*ws_stride[%i] + pos.x] += w_sum; ,dst_comp, dst_comp);
193  GLSLF(3, sums_%i[pos.y*ws_stride[%i] + pos.x] += sum; ,dst_comp, dst_comp);
194  }
195  GLSLC(2, } );
196  GLSLC(1, } );
197 }
198 
199 typedef struct HorizontalPushData {
200  uint32_t width[4];
201  uint32_t height[4];
202  uint32_t ws_stride[4];
203  int32_t patch_size[4];
204  float strength[4];
205  VkDeviceAddress integral_base;
206  uint64_t integral_size;
207  uint64_t int_stride;
208  uint32_t xyoffs_start;
209 } HorizontalPushData;
210 
211 static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
212  FFVulkanShader *shd,
213  VkSampler sampler, FFVkSPIRVCompiler *spv,
214  int width, int height, int t,
215  const AVPixFmtDescriptor *desc,
216  int planes, int *nb_rows)
217 {
218  int err;
219  uint8_t *spv_data;
220  size_t spv_len;
221  void *spv_opaque = NULL;
223  int max_dim = FFMAX(width, height);
224  uint32_t max_wg = vkctx->props.properties.limits.maxComputeWorkGroupSize[0];
225  int wg_size, wg_rows;
226 
227  /* Round the max workgroup size to the previous power of two */
228  wg_size = max_wg;
229  wg_rows = 1;
230 
231  if (max_wg > max_dim) {
232  wg_size = max_dim;
233  } else if (max_wg < max_dim) {
234  /* Make it fit */
235  while (wg_size*wg_rows < max_dim)
236  wg_rows++;
237  }
238 
239  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_weights",
240  VK_SHADER_STAGE_COMPUTE_BIT,
241  (const char *[]) { "GL_EXT_buffer_reference",
242  "GL_EXT_buffer_reference2" }, 2,
243  wg_size, 1, 1,
244  0));
245 
246  *nb_rows = wg_rows;
247 
248  if (t > 1)
249  GLSLC(0, #extension GL_EXT_shader_atomic_float : require );
250  GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require );
251  GLSLC(0, );
252  GLSLF(0, #define DTYPE %s ,TYPE_NAME);
253  GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE);
254  GLSLC(0, );
255  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { );
256  GLSLC(1, DTYPE v[]; );
257  GLSLC(0, }; );
258  GLSLC(0, );
259  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
260  GLSLC(1, uvec4 width; );
261  GLSLC(1, uvec4 height; );
262  GLSLC(1, uvec4 ws_stride; );
263  GLSLC(1, ivec4 patch_size; );
264  GLSLC(1, vec4 strength; );
265  GLSLC(1, DataBuffer integral_base; );
266  GLSLC(1, uint64_t integral_size; );
267  GLSLC(1, uint64_t int_stride; );
268  GLSLC(1, uint xyoffs_start; );
269  GLSLC(0, }; );
270  GLSLC(0, );
271 
272  ff_vk_shader_add_push_const(shd, 0, sizeof(HorizontalPushData),
273  VK_SHADER_STAGE_COMPUTE_BIT);
274 
275  desc_set = (FFVulkanDescriptorSetBinding []) {
276  {
277  .name = "input_img",
278  .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
279  .dimensions = 2,
280  .elems = planes,
281  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
282  .samplers = DUP_SAMPLER(sampler),
283  },
284  {
285  .name = "weights_buffer_0",
286  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
287  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
288  .buf_content = "float weights_0[];",
289  },
290  {
291  .name = "sums_buffer_0",
292  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
293  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
294  .buf_content = "float sums_0[];",
295  },
296  {
297  .name = "weights_buffer_1",
298  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
299  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
300  .buf_content = "float weights_1[];",
301  },
302  {
303  .name = "sums_buffer_1",
304  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
305  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
306  .buf_content = "float sums_1[];",
307  },
308  {
309  .name = "weights_buffer_2",
310  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
311  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
312  .buf_content = "float weights_2[];",
313  },
314  {
315  .name = "sums_buffer_2",
316  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
317  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
318  .buf_content = "float sums_2[];",
319  },
320  {
321  .name = "weights_buffer_3",
322  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
323  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
324  .buf_content = "float weights_3[];",
325  },
326  {
327  .name = "sums_buffer_3",
328  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
329  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
330  .buf_content = "float sums_3[];",
331  },
332  };
333  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1 + 2*desc->nb_components, 0, 0));
334 
335  desc_set = (FFVulkanDescriptorSetBinding []) {
336  {
337  .name = "xyoffsets_buffer",
338  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
339  .mem_quali = "readonly",
340  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
341  .buf_content = "ivec2 xyoffsets[];",
342  },
343  };
344  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 1, 0));
345 
346  GLSLC(0, );
347  GLSLC(0, void main() );
348  GLSLC(0, { );
349  GLSLC(1, uint64_t offset; );
350  GLSLC(1, DataBuffer dst; );
351  GLSLC(1, float s1; );
352  GLSLC(1, DTYPE s2; );
353  GLSLC(1, DTYPE prefix_sum; );
354  GLSLF(1, DTYPE psum[%i]; ,*nb_rows);
355  GLSLC(1, int r; );
356  GLSLC(1, ivec2 pos; );
357  GLSLC(1, int p; );
358  GLSLC(0, );
359  GLSLC(1, DataBuffer integral_data; );
360  GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS);
361  GLSLC(0, );
362  GLSLC(1, int invoc_idx = int(gl_WorkGroupID.z); );
363  GLSLC(0, );
364  GLSLC(1, offset = integral_size * invoc_idx; );
365  GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); );
366  for (int i = 0; i < TYPE_ELEMS; i++)
367  GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i);
368  GLSLC(0, );
369  GLSLC(1, DTYPE a; );
370  GLSLC(1, DTYPE b; );
371  GLSLC(1, DTYPE c; );
372  GLSLC(1, DTYPE d; );
373  GLSLC(0, );
374  GLSLC(1, DTYPE patch_diff; );
375  if (TYPE_ELEMS == 4) {
376  GLSLC(1, vec4 src; );
377  GLSLC(1, vec4 w; );
378  } else {
379  GLSLC(1, vec4 src[4]; );
380  GLSLC(1, vec4 w[4]; );
381  }
382  GLSLC(1, float w_sum; );
383  GLSLC(1, float sum; );
384  GLSLC(0, );
385  GLSLC(1, bool lt; );
386  GLSLC(1, bool gt; );
387  GLSLC(0, );
388 
389  for (int i = 0; i < desc->nb_components; i++) {
390  int off = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8);
391  if (width >= height) {
392  insert_horizontal_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
393  insert_vertical_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
394  insert_weights_pass(shd, *nb_rows, 0, t, i, desc->comp[i].plane, off);
395  } else {
396  insert_vertical_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
397  insert_horizontal_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
398  insert_weights_pass(shd, *nb_rows, 1, t, i, desc->comp[i].plane, off);
399  }
400  }
401 
402  GLSLC(0, } );
403 
404  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
405  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
406 
407  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
408 
409 fail:
410  if (spv_opaque)
411  spv->free_shader(spv, &spv_opaque);
412 
413  return err;
414 }
415 
416 typedef struct DenoisePushData {
417  uint32_t ws_stride[4];
418 } DenoisePushData;
419 
420 static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
421  FFVulkanShader *shd,
422  VkSampler sampler, FFVkSPIRVCompiler *spv,
423  const AVPixFmtDescriptor *desc, int planes)
424 {
425  int err;
426  uint8_t *spv_data;
427  size_t spv_len;
428  void *spv_opaque = NULL;
430 
431  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_denoise",
432  VK_SHADER_STAGE_COMPUTE_BIT,
433  (const char *[]) { "GL_EXT_buffer_reference",
434  "GL_EXT_buffer_reference2" }, 2,
435  32, 32, 1,
436  0));
437 
438  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
439  GLSLC(1, uvec4 ws_stride; );
440  GLSLC(0, }; );
441 
442  ff_vk_shader_add_push_const(shd, 0, sizeof(DenoisePushData),
443  VK_SHADER_STAGE_COMPUTE_BIT);
444 
445  desc_set = (FFVulkanDescriptorSetBinding []) {
446  {
447  .name = "input_img",
448  .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
449  .dimensions = 2,
450  .elems = planes,
451  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
452  .samplers = DUP_SAMPLER(sampler),
453  },
454  {
455  .name = "output_img",
456  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
457  .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format, FF_VK_REP_FLOAT),
458  .mem_quali = "writeonly",
459  .dimensions = 2,
460  .elems = planes,
461  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
462  },
463  };
464  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2, 0, 0));
465 
466  desc_set = (FFVulkanDescriptorSetBinding []) {
467  {
468  .name = "weights_buffer_0",
469  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
470  .mem_quali = "readonly",
471  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
472  .buf_content = "float weights_0[];",
473  },
474  {
475  .name = "sums_buffer_0",
476  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
477  .mem_quali = "readonly",
478  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
479  .buf_content = "float sums_0[];",
480  },
481  {
482  .name = "weights_buffer_1",
483  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
484  .mem_quali = "readonly",
485  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
486  .buf_content = "float weights_1[];",
487  },
488  {
489  .name = "sums_buffer_1",
490  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
491  .mem_quali = "readonly",
492  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
493  .buf_content = "float sums_1[];",
494  },
495  {
496  .name = "weights_buffer_2",
497  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
498  .mem_quali = "readonly",
499  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
500  .buf_content = "float weights_2[];",
501  },
502  {
503  .name = "sums_buffer_2",
504  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
505  .mem_quali = "readonly",
506  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
507  .buf_content = "float sums_2[];",
508  },
509  {
510  .name = "weights_buffer_3",
511  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
512  .mem_quali = "readonly",
513  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
514  .buf_content = "float weights_3[];",
515  },
516  {
517  .name = "sums_buffer_3",
518  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
519  .mem_quali = "readonly",
520  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
521  .buf_content = "float sums_3[];",
522  },
523  };
524 
525  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2*desc->nb_components, 0, 0));
526 
527  GLSLC(0, void main() );
528  GLSLC(0, { );
529  GLSLC(1, ivec2 size; );
530  GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
531  GLSLC(1, const uint plane = uint(gl_WorkGroupID.z); );
532  GLSLC(0, );
533  GLSLC(1, float w_sum; );
534  GLSLC(1, float sum; );
535  GLSLC(1, vec4 src; );
536  GLSLC(1, vec4 r; );
537  GLSLC(0, );
538  GLSLC(1, size = imageSize(output_img[plane]); );
539  GLSLC(1, if (!IS_WITHIN(pos, size)) );
540  GLSLC(2, return; );
541  GLSLC(0, );
542  GLSLC(1, src = texture(input_img[plane], pos); );
543  GLSLC(0, );
544  for (int c = 0; c < desc->nb_components; c++) {
545  int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8);
546  GLSLF(1, if (plane == %i) { ,desc->comp[c].plane);
547  GLSLF(2, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
548  GLSLF(2, sum = sums_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
549  GLSLF(2, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255; ,off, off);
550  GLSLC(1, } );
551  GLSLC(0, );
552  }
553  GLSLC(1, imageStore(output_img[plane], pos, r); );
554  GLSLC(0, } );
555 
556  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
557  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
558 
559  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
560 
561 fail:
562  if (spv_opaque)
563  spv->free_shader(spv, &spv_opaque);
564 
565  return err;
566 }
567 
569 {
570  int rad, err;
571  int xcnt = 0, ycnt = 0;
572  NLMeansVulkanContext *s = ctx->priv;
573  FFVulkanContext *vkctx = &s->vkctx;
574  const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
575  FFVkSPIRVCompiler *spv = NULL;
576  int *offsets_buf;
577  int offsets_dispatched = 0, nb_dispatches = 0;
578 
579  const AVPixFmtDescriptor *desc;
581  if (!desc)
582  return AVERROR(EINVAL);
583 
584  if (!(s->opts.r & 1)) {
585  s->opts.r |= 1;
586  av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i",
587  s->opts.r);
588  }
589 
590  if (!(s->opts.p & 1)) {
591  s->opts.p |= 1;
592  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
593  s->opts.p);
594  }
595 
596  for (int i = 0; i < 4; i++) {
597  double str = (s->opts.sc[i] > 1.0) ? s->opts.sc[i] : s->opts.s;
598  int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p);
599  str = 10.0f*str;
600  str *= -str;
601  str = 255.0*255.0 / str;
602  s->strength[i] = str;
603  if (!(ps & 1)) {
604  ps |= 1;
605  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
606  ps);
607  }
608  s->patch[i] = ps / 2;
609  }
610 
611  rad = s->opts.r/2;
612  s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1;
613  s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets));
614  s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets));
615  s->nb_offsets = 0;
616 
617  for (int x = -rad; x <= rad; x++) {
618  for (int y = -rad; y <= rad; y++) {
619  if (!x && !y)
620  continue;
621 
622  s->xoffsets[xcnt++] = x;
623  s->yoffsets[ycnt++] = y;
624  s->nb_offsets++;
625  }
626  }
627 
628  RET(ff_vk_create_buf(&s->vkctx, &s->xyoffsets_buf, 2*s->nb_offsets*sizeof(int32_t), NULL, NULL,
629  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
630  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
631  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
632  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
633  RET(ff_vk_map_buffer(&s->vkctx, &s->xyoffsets_buf, (uint8_t **)&offsets_buf, 0));
634 
635  for (int i = 0; i < 2*s->nb_offsets; i += 2) {
636  offsets_buf[i + 0] = s->xoffsets[i >> 1];
637  offsets_buf[i + 1] = s->yoffsets[i >> 1];
638  }
639 
640  RET(ff_vk_unmap_buffer(&s->vkctx, &s->xyoffsets_buf, 1));
641 
642  s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS));
643  if (!vkctx->atomic_float_feats.shaderBufferFloat32AtomicAdd) {
644  av_log(ctx, AV_LOG_WARNING, "Device doesn't support atomic float adds, "
645  "disabling dispatch parallelism\n");
646  s->opts.t = 1;
647  }
648 
649  spv = ff_vk_spirv_init();
650  if (!spv) {
651  av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
652  return AVERROR_EXTERNAL;
653  }
654 
655  s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0);
656  if (!s->qf) {
657  av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n");
658  err = AVERROR(ENOTSUP);
659  goto fail;
660  }
661 
662  RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, 1, 0, 0, 0, NULL));
663  RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
664 
665  RET(init_weights_pipeline(vkctx, &s->e, &s->shd_weights, s->sampler,
666  spv, s->vkctx.output_width, s->vkctx.output_height,
667  s->opts.t, desc, planes, &s->pl_weights_rows));
668 
669  RET(init_denoise_pipeline(vkctx, &s->e, &s->shd_denoise, s->sampler,
670  spv, desc, planes));
671 
672  RET(ff_vk_shader_update_desc_buffer(vkctx, &s->e.contexts[0], &s->shd_weights,
673  1, 0, 0,
674  &s->xyoffsets_buf, 0, s->xyoffsets_buf.size,
675  VK_FORMAT_UNDEFINED));
676 
677  do {
678  int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
679  wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
680  offsets_dispatched += wg_invoc * TYPE_ELEMS;
681  nb_dispatches++;
682  } while (offsets_dispatched < s->nb_offsets);
683 
684  av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches\n",
685  s->nb_offsets, nb_dispatches);
686 
687  s->initialized = 1;
688 
689 fail:
690  if (spv)
691  spv->uninit(&spv);
692 
693  return err;
694 }
695 
696 static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec,
697  FFVkBuffer *ws_vk, uint32_t ws_stride[4])
698 {
699  FFVulkanContext *vkctx = &s->vkctx;
700  FFVulkanFunctions *vk = &vkctx->vkfn;
701  VkBufferMemoryBarrier2 buf_bar[8];
702  int nb_buf_bar = 0;
703 
704  DenoisePushData pd = {
705  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
706  };
707 
708  /* Denoise pass pipeline */
709  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_denoise);
710 
711  /* Push data */
712  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_denoise,
713  VK_SHADER_STAGE_COMPUTE_BIT,
714  0, sizeof(pd), &pd);
715 
716  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
717  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
718  .srcStageMask = ws_vk->stage,
719  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
720  .srcAccessMask = ws_vk->access,
721  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
722  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
723  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
724  .buffer = ws_vk->buf,
725  .size = ws_vk->size,
726  .offset = 0,
727  };
728 
729  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
730  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
731  .pBufferMemoryBarriers = buf_bar,
732  .bufferMemoryBarrierCount = nb_buf_bar,
733  });
734  ws_vk->stage = buf_bar[0].dstStageMask;
735  ws_vk->access = buf_bar[0].dstAccessMask;
736 
737  /* End of denoise pass */
738  vk->CmdDispatch(exec->buf,
739  FFALIGN(vkctx->output_width, s->shd_denoise.lg_size[0])/s->shd_denoise.lg_size[0],
740  FFALIGN(vkctx->output_height, s->shd_denoise.lg_size[1])/s->shd_denoise.lg_size[1],
741  av_pix_fmt_count_planes(s->vkctx.output_format));
742 
743  return 0;
744 }
745 
746 static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
747 {
748  int err;
749  AVFrame *out = NULL;
750  AVFilterContext *ctx = link->dst;
751  NLMeansVulkanContext *s = ctx->priv;
752  AVFilterLink *outlink = ctx->outputs[0];
753  FFVulkanContext *vkctx = &s->vkctx;
754  FFVulkanFunctions *vk = &vkctx->vkfn;
755 
756  const AVPixFmtDescriptor *desc;
757  int plane_widths[4];
758  int plane_heights[4];
759 
760  int offsets_dispatched = 0;
761 
762  /* Integral */
763  AVBufferRef *integral_buf = NULL;
764  FFVkBuffer *integral_vk;
765  size_t int_stride;
766  size_t int_size;
767 
768  /* Weights/sums */
769  AVBufferRef *ws_buf = NULL;
770  FFVkBuffer *ws_vk;
771  VkDeviceSize weights_offs[4];
772  VkDeviceSize sums_offs[4];
773  uint32_t ws_stride[4];
774  size_t ws_size[4];
775  size_t ws_total_size = 0;
776 
777  FFVkExecContext *exec;
778  VkImageView in_views[AV_NUM_DATA_POINTERS];
779  VkImageView out_views[AV_NUM_DATA_POINTERS];
780  VkImageMemoryBarrier2 img_bar[8];
781  int nb_img_bar = 0;
782  VkBufferMemoryBarrier2 buf_bar[8];
783  int nb_buf_bar = 0;
784 
785  if (!s->initialized)
786  RET(init_filter(ctx));
787 
789  if (!desc)
790  return AVERROR(EINVAL);
791 
792  /* Integral image */
793  int_stride = s->shd_weights.lg_size[0]*s->pl_weights_rows*TYPE_SIZE;
794  int_size = s->shd_weights.lg_size[0]*s->pl_weights_rows*int_stride;
795 
796  /* Plane dimensions */
797  for (int i = 0; i < desc->nb_components; i++) {
798  plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w);
799  plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_w);
800  plane_widths[i] = FFALIGN(plane_widths[i], s->shd_denoise.lg_size[0]);
801  plane_heights[i] = FFALIGN(plane_heights[i], s->shd_denoise.lg_size[1]);
802 
803  ws_stride[i] = plane_widths[i];
804  ws_size[i] = ws_stride[i] * plane_heights[i] * sizeof(float);
805  ws_total_size += ws_size[i];
806  }
807 
808  /* Buffers */
809  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf,
810  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
811  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
812  NULL,
813  s->opts.t * int_size,
814  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
815  if (err < 0)
816  return err;
817  integral_vk = (FFVkBuffer *)integral_buf->data;
818 
819  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf,
820  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
821  VK_BUFFER_USAGE_TRANSFER_DST_BIT |
822  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
823  NULL,
824  ws_total_size * 2,
825  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
826  if (err < 0)
827  return err;
828  ws_vk = (FFVkBuffer *)ws_buf->data;
829 
830  weights_offs[0] = 0;
831  sums_offs[0] = ws_total_size;
832  for (int i = 1; i < desc->nb_components; i++) {
833  weights_offs[i] = weights_offs[i - 1] + ws_size[i - 1];
834  sums_offs[i] = sums_offs[i - 1] + ws_size[i - 1];
835  }
836 
837  /* Output frame */
838  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
839  if (!out) {
840  err = AVERROR(ENOMEM);
841  goto fail;
842  }
843 
844  /* Execution context */
845  exec = ff_vk_exec_get(&s->vkctx, &s->e);
846  ff_vk_exec_start(vkctx, exec);
847 
848  /* Dependencies */
849  RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
850  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
851  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
852  RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
853  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
854  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
855 
856  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0));
857  integral_buf = NULL;
858 
859  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0));
860  ws_buf = NULL;
861 
862  /* Input frame prep */
863  RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT));
864  ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
865  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
866  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
867  VK_ACCESS_SHADER_READ_BIT,
868  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
869  VK_QUEUE_FAMILY_IGNORED);
870 
871  /* Output frame prep */
872  RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT));
873  ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
874  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
875  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
876  VK_ACCESS_SHADER_WRITE_BIT,
877  VK_IMAGE_LAYOUT_GENERAL,
878  VK_QUEUE_FAMILY_IGNORED);
879 
880  nb_buf_bar = 0;
881  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
882  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
883  .srcStageMask = ws_vk->stage,
884  .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
885  .srcAccessMask = ws_vk->access,
886  .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
887  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
888  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
889  .buffer = ws_vk->buf,
890  .size = ws_vk->size,
891  .offset = 0,
892  };
893  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
894  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
895  .srcStageMask = integral_vk->stage,
896  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
897  .srcAccessMask = integral_vk->access,
898  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
899  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
900  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
901  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
902  .buffer = integral_vk->buf,
903  .size = integral_vk->size,
904  .offset = 0,
905  };
906 
907  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
908  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
909  .pImageMemoryBarriers = img_bar,
910  .imageMemoryBarrierCount = nb_img_bar,
911  .pBufferMemoryBarriers = buf_bar,
912  .bufferMemoryBarrierCount = nb_buf_bar,
913  });
914  ws_vk->stage = buf_bar[0].dstStageMask;
915  ws_vk->access = buf_bar[0].dstAccessMask;
916  integral_vk->stage = buf_bar[1].dstStageMask;
917  integral_vk->access = buf_bar[1].dstAccessMask;
918 
919  /* Buffer zeroing */
920  vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0);
921 
922  nb_buf_bar = 0;
923  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
924  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
925  .srcStageMask = ws_vk->stage,
926  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
927  .srcAccessMask = ws_vk->access,
928  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
929  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
930  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
931  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
932  .buffer = ws_vk->buf,
933  .size = ws_vk->size,
934  .offset = 0,
935  };
936 
937  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
938  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
939  .pBufferMemoryBarriers = buf_bar,
940  .bufferMemoryBarrierCount = nb_buf_bar,
941  });
942  ws_vk->stage = buf_bar[0].dstStageMask;
943  ws_vk->access = buf_bar[0].dstAccessMask;
944 
945  /* Update weights descriptors */
946  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_weights, in, in_views, 0, 0,
947  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
948  s->sampler);
949  for (int i = 0; i < desc->nb_components; i++) {
950  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1 + i*2 + 0, 0,
951  ws_vk, weights_offs[i], ws_size[i],
952  VK_FORMAT_UNDEFINED));
953  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1 + i*2 + 1, 0,
954  ws_vk, sums_offs[i], ws_size[i],
955  VK_FORMAT_UNDEFINED));
956  }
957 
958  /* Update denoise descriptors */
959  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, in, in_views, 0, 0,
960  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
961  s->sampler);
962  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, out, out_views, 0, 1,
963  VK_IMAGE_LAYOUT_GENERAL, s->sampler);
964  for (int i = 0; i < desc->nb_components; i++) {
965  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, i*2 + 0, 0,
966  ws_vk, weights_offs[i], ws_size[i],
967  VK_FORMAT_UNDEFINED));
968  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, i*2 + 1, 0,
969  ws_vk, sums_offs[i], ws_size[i],
970  VK_FORMAT_UNDEFINED));
971  }
972 
973  /* Weights pipeline */
974  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_weights);
975 
976  do {
977  int wg_invoc;
978  HorizontalPushData pd = {
979  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
980  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
981  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
982  { s->patch[0], s->patch[1], s->patch[2], s->patch[3] },
983  { s->strength[0], s->strength[1], s->strength[2], s->strength[2], },
984  integral_vk->address,
985  (uint64_t)int_size,
986  (uint64_t)int_stride,
987  offsets_dispatched,
988  };
989 
990  /* Push data */
991  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_weights,
992  VK_SHADER_STAGE_COMPUTE_BIT,
993  0, sizeof(pd), &pd);
994 
995  if (offsets_dispatched) {
996  nb_buf_bar = 0;
997  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
998  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
999  .srcStageMask = integral_vk->stage,
1000  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1001  .srcAccessMask = integral_vk->access,
1002  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
1003  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
1004  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1005  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1006  .buffer = integral_vk->buf,
1007  .size = integral_vk->size,
1008  .offset = 0,
1009  };
1010 
1011  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
1012  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1013  .pBufferMemoryBarriers = buf_bar,
1014  .bufferMemoryBarrierCount = nb_buf_bar,
1015  });
1016  integral_vk->stage = buf_bar[1].dstStageMask;
1017  integral_vk->access = buf_bar[1].dstAccessMask;
1018  }
1019 
1020  wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
1021  wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
1022 
1023  /* End of horizontal pass */
1024  vk->CmdDispatch(exec->buf, 1, 1, wg_invoc);
1025 
1026  offsets_dispatched += wg_invoc * TYPE_ELEMS;
1027  } while (offsets_dispatched < s->nb_offsets);
1028 
1029  RET(denoise_pass(s, exec, ws_vk, ws_stride));
1030 
1031  err = ff_vk_exec_submit(vkctx, exec);
1032  if (err < 0)
1033  return err;
1034 
1035  err = av_frame_copy_props(out, in);
1036  if (err < 0)
1037  goto fail;
1038 
1039  av_frame_free(&in);
1040 
1041  return ff_filter_frame(outlink, out);
1042 
1043 fail:
1044  av_buffer_unref(&integral_buf);
1045  av_buffer_unref(&ws_buf);
1046  av_frame_free(&in);
1047  av_frame_free(&out);
1048  return err;
1049 }
1050 
1051 static void nlmeans_vulkan_uninit(AVFilterContext *avctx)
1052 {
1053  NLMeansVulkanContext *s = avctx->priv;
1054  FFVulkanContext *vkctx = &s->vkctx;
1055  FFVulkanFunctions *vk = &vkctx->vkfn;
1056 
1057  ff_vk_exec_pool_free(vkctx, &s->e);
1058  ff_vk_shader_free(vkctx, &s->shd_weights);
1059  ff_vk_shader_free(vkctx, &s->shd_denoise);
1060 
1061  av_buffer_pool_uninit(&s->integral_buf_pool);
1062  av_buffer_pool_uninit(&s->ws_buf_pool);
1063 
1064  if (s->sampler)
1065  vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
1066  vkctx->hwctx->alloc);
1067 
1068  ff_vk_uninit(&s->vkctx);
1069 
1070  av_freep(&s->xoffsets);
1071  av_freep(&s->yoffsets);
1072 
1073  s->initialized = 0;
1074 }
1075 
1076 #define OFFSET(x) offsetof(NLMeansVulkanContext, x)
1077 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
1078 static const AVOption nlmeans_vulkan_options[] = {
1079  { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1080  { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS },
1081  { "r", "research window radius", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS },
1082  { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 36 }, 1, 168, FLAGS },
1083 
1084  { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1085  { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1086  { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1087  { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1088 
1089  { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1090  { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1091  { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1092  { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1093 
1094  { NULL }
1095 };
1096 
1097 AVFILTER_DEFINE_CLASS(nlmeans_vulkan);
1098 
1099 static const AVFilterPad nlmeans_vulkan_inputs[] = {
1100  {
1101  .name = "default",
1102  .type = AVMEDIA_TYPE_VIDEO,
1103  .filter_frame = &nlmeans_vulkan_filter_frame,
1104  .config_props = &ff_vk_filter_config_input,
1105  },
1106 };
1107 
1108 static const AVFilterPad nlmeans_vulkan_outputs[] = {
1109  {
1110  .name = "default",
1111  .type = AVMEDIA_TYPE_VIDEO,
1112  .config_props = &ff_vk_filter_config_output,
1113  },
1114 };
1115 
1117  .name = "nlmeans_vulkan",
1118  .description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"),
1119  .priv_size = sizeof(NLMeansVulkanContext),
1120  .init = &ff_vk_filter_init,
1121  .uninit = &nlmeans_vulkan_uninit,
1122  FILTER_INPUTS(nlmeans_vulkan_inputs),
1123  FILTER_OUTPUTS(nlmeans_vulkan_outputs),
1125  .priv_class = &nlmeans_vulkan_class,
1126  .flags = AVFILTER_FLAG_HWDEVICE,
1127  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
1128 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:116
NLMeansVulkanContext::nlmeans_opts::p
int p
Definition: vf_nlmeans_vulkan.c:61
ff_vk_create_buf
int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
Definition: vulkan.c:928
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:215
FFVulkanContext::output_height
int output_height
Definition: vulkan.h:307
r
const char * r
Definition: vf_curves.c:127
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
NLMeansVulkanContext::opts
struct NLMeansVulkanContext::nlmeans_opts opts
NLMeansVulkanContext::sampler
VkSampler sampler
Definition: vf_nlmeans_vulkan.c:40
ff_vk_shader_free
void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd)
Free a shader.
Definition: vulkan.c:2564
ff_vk_shader_init
int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, VkPipelineStageFlags stage, const char *extensions[], int nb_extensions, int lg_x, int lg_y, int lg_z, uint32_t required_subgroup_size)
Initialize a shader object, with a specific set of extensions, type+bind, local group size,...
Definition: vulkan.c:1715
out
FILE * out
Definition: movenc.c:55
NLMeansVulkanContext::shd_weights
FFVulkanShader shd_weights
Definition: vf_nlmeans_vulkan.c:48
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:81
AVBufferPool
The buffer pool.
Definition: buffer_internal.h:88
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1061
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3170
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
RET
#define RET(x)
Definition: vulkan.h:67
ff_vk_exec_pool_init
int ff_vk_exec_pool_init(FFVulkanContext *s, AVVulkanDeviceQueueFamily *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext)
Allocates/frees an execution pool.
Definition: vulkan.c:296
FFVkBuffer::access
VkAccessFlags2 access
Definition: vulkan.h:96
FFVkBuffer::stage
VkPipelineStageFlags2 stage
Definition: vulkan.h:95
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:163
NLMeansVulkanContext::integral_buf_pool
AVBufferPool * integral_buf_pool
Definition: vf_nlmeans_vulkan.c:42
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:403
ff_vk_filter_init
int ff_vk_filter_init(AVFilterContext *avctx)
General lavfi IO functions.
Definition: vulkan_filter.c:233
ff_vk_map_buffer
static int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, int invalidate)
Definition: vulkan.h:495
w
uint8_t w
Definition: llviddspenc.c:38
NLMeansVulkanContext::xoffsets
int * xoffsets
Definition: vf_nlmeans_vulkan.c:51
AVOption
AVOption.
Definition: opt.h:429
b
#define b
Definition: input.c:41
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:225
FFVkBuffer::address
VkDeviceAddress address
Definition: vulkan.h:92
ff_vk_exec_get
FFVkExecContext * ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool)
Retrieve an execution pool.
Definition: vulkan.c:477
NLMeansVulkanContext::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:54
ff_vk_uninit
void ff_vk_uninit(FFVulkanContext *s)
Frees main context.
Definition: vulkan.c:2603
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
FFVkSPIRVCompiler::uninit
void(* uninit)(struct FFVkSPIRVCompiler **ctx)
Definition: vulkan_spirv.h:32
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:205
NLMeansVulkanContext::initialized
int initialized
Definition: vf_nlmeans_vulkan.c:37
video.h
AV_PIX_FMT_VULKAN
@ AV_PIX_FMT_VULKAN
Vulkan hardware images.
Definition: pixfmt.h:379
ff_vk_exec_add_dep_frame
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage)
Definition: vulkan.c:696
FFVkBuffer::buf
VkBuffer buf
Definition: vulkan.h:88
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
NLMeansVulkanContext::yoffsets
int * yoffsets
Definition: vf_nlmeans_vulkan.c:52
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3210
AVVulkanDeviceContext::alloc
const VkAllocationCallbacks * alloc
Custom memory allocator, else NULL.
Definition: hwcontext_vulkan.h:63
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:472
fail
#define fail()
Definition: checkasm.h:193
vulkan_filter.h
insert_first
static void insert_first(FFVulkanShader *shd, int r, const char *off, int horiz, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:67
ff_vk_shader_update_img_array
void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler)
Update a descriptor in a buffer with an image array.
Definition: vulkan.c:2507
ff_vk_shader_register_exec
int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd)
Register a shader with an exec pool.
Definition: vulkan.c:2204
NLMeansVulkanContext::shd_denoise
FFVulkanShader shd_denoise
Definition: vf_nlmeans_vulkan.c:49
ff_vk_shader_add_descriptor_set
int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, FFVulkanDescriptorSetBinding *desc, int nb, int singular, int print_to_shader_only)
Add descriptor to a shader.
Definition: vulkan.c:2079
FFVulkanContext::atomic_float_feats
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feats
Definition: vulkan.h:289
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:38
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
GLSLC
#define GLSLC(N, S)
Definition: vulkan.h:44
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:209
av_cold
#define av_cold
Definition: attributes.h:90
main
int main
Definition: dovi_rpuenc.c:37
float
float
Definition: af_crystalizer.c:122
FFVulkanContext::output_width
int output_width
Definition: vulkan.h:306
NLMeansVulkanContext::ws_buf_pool
AVBufferPool * ws_buf_pool
Definition: vf_nlmeans_vulkan.c:43
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Underlying C type is double.
Definition: opt.h:267
FLAGS
#define FLAGS
insert_weights_pass
static void insert_weights_pass(FFVulkanShader *shd, int nb_rows, int vert, int t, int dst_comp, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:139
filters.h
FF_VK_REP_FLOAT
@ FF_VK_REP_FLOAT
Definition: vulkan.h:376
ctx
AVFormatContext * ctx
Definition: movenc.c:49
ff_vf_nlmeans_vulkan
const AVFilter ff_vf_nlmeans_vulkan
ff_vk_exec_add_dep_buf
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps, int ref)
Execution dependency management.
Definition: vulkan.c:551
ff_vk_exec_pool_free
void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
Definition: vulkan.c:233
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
opts
AVDictionary * opts
Definition: movenc.c:51
ff_vk_shader_rep_fmt
const char * ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, enum FFVkShaderRepFormat rep_fmt)
Definition: vulkan.c:1322
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:726
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
DUP_SAMPLER
#define DUP_SAMPLER(x)
Definition: vulkan.h:73
av_buffer_pool_uninit
void av_buffer_pool_uninit(AVBufferPool **ppool)
Mark the pool as being available for freeing.
Definition: buffer.c:328
ff_vk_filter_config_output
int ff_vk_filter_config_output(AVFilterLink *outlink)
Definition: vulkan_filter.c:209
FFVkBuffer::size
size_t size
Definition: vulkan.h:91
NLMeansVulkanContext::nlmeans_opts
Definition: vf_nlmeans_vulkan.c:57
FFVulkanContext
Definition: vulkan.h:266
AVFILTER_DEFINE_CLASS
#define AVFILTER_DEFINE_CLASS(fname)
Definition: filters.h:273
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: filters.h:206
NLMeansVulkanContext::patch
int patch[4]
Definition: vf_nlmeans_vulkan.c:55
NLMeansVulkanContext::qf
AVVulkanDeviceQueueFamily * qf
Definition: vf_nlmeans_vulkan.c:39
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
NLMeansVulkanContext
Definition: vf_nlmeans_vulkan.c:34
ff_vk_shader_update_push_const
void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, VkShaderStageFlagBits stage, int offset, size_t size, void *src)
Update push constant in a shader.
Definition: vulkan.c:2520
NLMeansVulkanContext::xyoffsets_buf
FFVkBuffer xyoffsets_buf
Definition: vf_nlmeans_vulkan.c:45
FFVulkanDescriptorSetBinding
Definition: vulkan.h:75
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
height
#define height
Definition: dsp.h:85
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
planes
static const struct @472 planes[]
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
AVFILTER_FLAG_HWDEVICE
#define AVFILTER_FLAG_HWDEVICE
The filter can create hardware frames using AVFilterContext.hw_device_ctx.
Definition: avfilter.h:173
NLMeansVulkanContext::nlmeans_opts::t
int t
Definition: vf_nlmeans_vulkan.c:63
size
int size
Definition: twinvq_data.h:10344
AV_NUM_DATA_POINTERS
#define AV_NUM_DATA_POINTERS
Definition: frame.h:404
FFVulkanShader
Definition: vulkan.h:182
FFVulkanContext::output_format
enum AVPixelFormat output_format
Definition: vulkan.h:308
FFVkSPIRVCompiler::compile_shader
int(* compile_shader)(FFVulkanContext *s, struct FFVkSPIRVCompiler *ctx, FFVulkanShader *shd, uint8_t **data, size_t *size, const char *entrypoint, void **opaque)
Definition: vulkan_spirv.h:28
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
insert_horizontal_pass
static void insert_horizontal_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:84
AVERROR_EXTERNAL
#define AVERROR_EXTERNAL
Generic error in an external library.
Definition: error.h:59
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FFVkExecContext
Definition: vulkan.h:102
ff_vk_shader_update_desc_buffer
int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, int set, int bind, int elem, FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, VkFormat fmt)
Update a descriptor in a buffer with a buffer.
Definition: vulkan.c:2441
FFVulkanDescriptorSetBinding::name
const char * name
Definition: vulkan.h:76
TYPE_SIZE
#define TYPE_SIZE
Definition: vf_nlmeans_vulkan.c:32
FFVkSPIRVCompiler
Definition: vulkan_spirv.h:26
layout
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel layout
Definition: filter_design.txt:18
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
NLMeansVulkanContext::pl_weights_rows
int pl_weights_rows
Definition: vf_nlmeans_vulkan.c:47
ff_vk_exec_start
int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
Start/submit/wait an execution.
Definition: vulkan.c:489
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
init_filter
static int init_filter(FilteringContext *fctx, AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, const char *filter_spec)
Definition: transcode.c:260
ff_vk_frame_barrier
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlagBits new_access, VkImageLayout new_layout, uint32_t new_qf)
Definition: vulkan.c:1672
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ff_vk_shader_link
int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, uint8_t *spirv, size_t spirv_len, const char *entrypoint)
Link a shader into an executable.
Definition: vulkan.c:2004
ff_vk_unmap_buffer
static int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush)
Definition: vulkan.h:502
insert_vertical_pass
static void insert_vertical_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:110
vulkan_spirv.h
FFVulkanContext::props
VkPhysicalDeviceProperties2 props
Definition: vulkan.h:272
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:44
FFVkSPIRVCompiler::free_shader
void(* free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque)
Definition: vulkan_spirv.h:31
ff_vk_exec_bind_shader
void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd)
Bind a shader.
Definition: vulkan.c:2530
NLMeansVulkanContext::nb_offsets
int nb_offsets
Definition: vf_nlmeans_vulkan.c:53
AVFilter
Filter definition.
Definition: avfilter.h:201
TYPE_NAME
#define TYPE_NAME
Definition: vf_nlmeans_vulkan.c:30
ff_vk_create_imageviews
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f, enum FFVkShaderRepFormat rep_fmt)
Create an imageview and add it as a dependency to an execution.
Definition: vulkan.c:1589
FFVulkanContext::vkfn
FFVulkanFunctions vkfn
Definition: vulkan.h:270
FFVkExecPool
Definition: vulkan.h:244
pos
unsigned int pos
Definition: spdifenc.c:414
ff_vk_shader_add_push_const
int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, VkShaderStageFlagBits stage)
Add/update push constants for execution.
Definition: vulkan.c:1231
OFFSET
#define OFFSET(x)
ff_vk_qf_find
AVVulkanDeviceQueueFamily * ff_vk_qf_find(FFVulkanContext *s, VkQueueFlagBits dev_family, VkVideoCodecOperationFlagBitsKHR vid_ops)
Chooses an appropriate QF.
Definition: vulkan.c:220
FFVkExecContext::buf
VkCommandBuffer buf
Definition: vulkan.h:114
NLMeansVulkanContext::nlmeans_opts::s
double s
Definition: vf_nlmeans_vulkan.c:59
NLMeansVulkanContext::nlmeans_opts::pc
int pc[4]
Definition: vf_nlmeans_vulkan.c:62
random_seed.h
buffer
the frame and frame reference mechanism is intended to as much as expensive copies of that data while still allowing the filters to produce correct results The data is stored in buffers represented by AVFrame structures Several references can point to the same frame buffer
Definition: filter_design.txt:49
GLSLF
#define GLSLF(N, S,...)
Definition: vulkan.h:54
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
AVFilterContext
An instance of a filter.
Definition: avfilter.h:457
NLMeansVulkanContext::nlmeans_opts::sc
double sc[4]
Definition: vf_nlmeans_vulkan.c:60
desc
const char * desc
Definition: libsvtav1.c:79
ff_vk_filter_config_input
int ff_vk_filter_config_input(AVFilterLink *inlink)
Definition: vulkan_filter.c:176
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
FFVulkanContext::hwctx
AVVulkanDeviceContext * hwctx
Definition: vulkan.h:295
mem.h
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
NLMeansVulkanContext::e
FFVkExecPool e
Definition: vf_nlmeans_vulkan.c:38
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
AVVulkanDeviceContext::act_dev
VkDevice act_dev
Active device.
Definition: hwcontext_vulkan.h:84
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
ff_vk_init_sampler
int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, int unnorm_coords, VkFilter filt)
Create a sampler.
Definition: vulkan.c:1252
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
TYPE_ELEMS
#define TYPE_ELEMS
Definition: vf_nlmeans_vulkan.c:31
FFVkBuffer
Definition: vulkan.h:87
int32_t
int32_t
Definition: audioconvert.c:56
ff_vk_exec_submit
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:811
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
AVVulkanDeviceQueueFamily
Definition: hwcontext_vulkan.h:33
width
#define width
Definition: dsp.h:85
NLMeansVulkanContext::nlmeans_opts::r
int r
Definition: vf_nlmeans_vulkan.c:58
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: filters.h:252
FFVulkanFunctions
Definition: vulkan_functions.h:264
ff_vk_get_pooled_buffer
int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, AVBufferRef **buf, VkBufferUsageFlags usage, void *create_pNext, size_t size, VkMemoryPropertyFlagBits mem_props)
Initialize a pool and create AVBufferRefs containing FFVkBuffer.
Definition: vulkan.c:1178
src
#define src
Definition: vp8dsp.c:248
NLMeansVulkanContext::vkctx
FFVulkanContext vkctx
Definition: vf_nlmeans_vulkan.c:35