27 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
28 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
29 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
30 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
36 #define CASE(VAL) case VAL: return #VAL
44 CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
45 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
46 CASE(VK_ERROR_INITIALIZATION_FAILED);
47 CASE(VK_ERROR_DEVICE_LOST);
48 CASE(VK_ERROR_MEMORY_MAP_FAILED);
49 CASE(VK_ERROR_LAYER_NOT_PRESENT);
50 CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
51 CASE(VK_ERROR_FEATURE_NOT_PRESENT);
52 CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
53 CASE(VK_ERROR_TOO_MANY_OBJECTS);
54 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
55 CASE(VK_ERROR_FRAGMENTED_POOL);
56 CASE(VK_ERROR_UNKNOWN);
57 CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
58 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
59 CASE(VK_ERROR_FRAGMENTATION);
60 CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS);
61 CASE(VK_PIPELINE_COMPILE_REQUIRED);
62 CASE(VK_ERROR_SURFACE_LOST_KHR);
63 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
64 CASE(VK_SUBOPTIMAL_KHR);
65 CASE(VK_ERROR_OUT_OF_DATE_KHR);
66 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
67 CASE(VK_ERROR_VALIDATION_FAILED_EXT);
68 CASE(VK_ERROR_INVALID_SHADER_NV);
69 CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR);
70 CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR);
71 CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
72 CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
73 CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
74 CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
75 CASE(VK_ERROR_NOT_PERMITTED_KHR);
76 CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
77 CASE(VK_THREAD_IDLE_KHR);
78 CASE(VK_THREAD_DONE_KHR);
79 CASE(VK_OPERATION_DEFERRED_KHR);
80 CASE(VK_OPERATION_NOT_DEFERRED_KHR);
81 default:
return "Unknown error";
90 s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) {
91 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT,
93 s->coop_matrix_props = (VkPhysicalDeviceCooperativeMatrixPropertiesKHR) {
94 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
97 s->subgroup_props = (VkPhysicalDeviceSubgroupSizeControlProperties) {
98 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES,
99 .pNext = &
s->coop_matrix_props,
101 s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) {
102 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT,
103 .pNext = &
s->subgroup_props,
105 s->driver_props = (VkPhysicalDeviceDriverProperties) {
106 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
107 .pNext = &
s->desc_buf_props,
109 s->props = (VkPhysicalDeviceProperties2) {
110 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
111 .pNext = &
s->driver_props,
114 s->atomic_float_feats = (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT) {
115 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT,
117 s->feats_12 = (VkPhysicalDeviceVulkan12Features) {
118 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
119 .pNext = &
s->atomic_float_feats,
121 s->feats = (VkPhysicalDeviceFeatures2) {
122 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
123 .pNext = &
s->feats_12,
126 vk->GetPhysicalDeviceProperties2(
s->hwctx->phys_dev, &
s->props);
127 vk->GetPhysicalDeviceMemoryProperties(
s->hwctx->phys_dev, &
s->mprops);
128 vk->GetPhysicalDeviceFeatures2(
s->hwctx->phys_dev, &
s->feats);
133 vk->GetPhysicalDeviceQueueFamilyProperties2(
s->hwctx->phys_dev, &
s->tot_nb_qfs,
NULL);
135 s->qf_props =
av_calloc(
s->tot_nb_qfs,
sizeof(*
s->qf_props));
139 s->query_props =
av_calloc(
s->tot_nb_qfs,
sizeof(*
s->query_props));
145 s->video_props =
av_calloc(
s->tot_nb_qfs,
sizeof(*
s->video_props));
146 if (!
s->video_props) {
152 for (uint32_t
i = 0;
i <
s->tot_nb_qfs;
i++) {
153 s->query_props[
i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) {
154 .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR,
156 s->video_props[
i] = (VkQueueFamilyVideoPropertiesKHR) {
157 .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
158 .pNext = &
s->query_props[
i],
160 s->qf_props[
i] = (VkQueueFamilyProperties2) {
161 .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
162 .pNext = &
s->video_props[
i],
166 vk->GetPhysicalDeviceQueueFamilyProperties2(
s->hwctx->phys_dev, &
s->tot_nb_qfs,
s->qf_props);
169 vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(
s->hwctx->phys_dev,
170 &
s->coop_mat_props_nb,
NULL);
172 if (
s->coop_mat_props_nb) {
174 sizeof(VkCooperativeMatrixPropertiesKHR));
175 for (
int i = 0;
i <
s->coop_mat_props_nb;
i++) {
176 s->coop_mat_props[
i] = (VkCooperativeMatrixPropertiesKHR) {
177 .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
181 vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(
s->hwctx->phys_dev,
182 &
s->coop_mat_props_nb,
194 switch (dev_family) {
195 case VK_QUEUE_GRAPHICS_BIT:
196 ret =
s->hwctx->queue_family_index;
197 num =
s->hwctx->nb_graphics_queues;
199 case VK_QUEUE_COMPUTE_BIT:
200 ret =
s->hwctx->queue_family_comp_index;
201 num =
s->hwctx->nb_comp_queues;
203 case VK_QUEUE_TRANSFER_BIT:
204 ret =
s->hwctx->queue_family_tx_index;
205 num =
s->hwctx->nb_tx_queues;
207 case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
208 ret =
s->hwctx->queue_family_encode_index;
209 num =
s->hwctx->nb_encode_queues;
211 case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
212 ret =
s->hwctx->queue_family_decode_index;
213 num =
s->hwctx->nb_decode_queues;
226 VkQueueFlagBits dev_family)
233 if (
s->hwctx->queue_family_index >= 0)
234 s->qfs[
s->nb_qfs++] =
s->hwctx->queue_family_index;
235 if (!
s->nb_qfs ||
s->qfs[0] !=
s->hwctx->queue_family_tx_index)
236 s->qfs[
s->nb_qfs++] =
s->hwctx->queue_family_tx_index;
237 if (!
s->nb_qfs || (
s->qfs[0] !=
s->hwctx->queue_family_comp_index &&
238 s->qfs[1] !=
s->hwctx->queue_family_comp_index))
239 s->qfs[
s->nb_qfs++] =
s->hwctx->queue_family_comp_index;
240 if (
s->hwctx->queue_family_decode_index >= 0 &&
241 (
s->qfs[0] !=
s->hwctx->queue_family_decode_index &&
242 s->qfs[1] !=
s->hwctx->queue_family_decode_index &&
243 s->qfs[2] !=
s->hwctx->queue_family_decode_index))
244 s->qfs[
s->nb_qfs++] =
s->hwctx->queue_family_decode_index;
245 if (
s->hwctx->queue_family_encode_index >= 0 &&
246 (
s->qfs[0] !=
s->hwctx->queue_family_encode_index &&
247 s->qfs[1] !=
s->hwctx->queue_family_encode_index &&
248 s->qfs[2] !=
s->hwctx->queue_family_encode_index &&
249 s->qfs[3] !=
s->hwctx->queue_family_encode_index))
250 s->qfs[
s->nb_qfs++] =
s->hwctx->queue_family_encode_index;
264 vk->WaitForFences(
s->hwctx->act_dev, 1, &e->
fence, VK_TRUE, UINT64_MAX);
265 vk->DestroyFence(
s->hwctx->act_dev, e->
fence,
s->hwctx->alloc);
284 vk->FreeCommandBuffers(
s->hwctx->act_dev, pool->
cmd_buf_pool,
287 vk->DestroyCommandPool(
s->hwctx->act_dev, pool->
cmd_buf_pool,
s->hwctx->alloc);
289 vk->DestroyQueryPool(
s->hwctx->act_dev, pool->
query_pool,
s->hwctx->alloc);
298 int nb_queries, VkQueryType query_type,
int query_64bit,
299 const void *query_create_pnext)
306 VkCommandBufferAllocateInfo cbuf_create;
310 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
311 .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
312 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
317 if (
ret != VK_SUCCESS) {
332 cbuf_create = (VkCommandBufferAllocateInfo) {
333 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
334 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
336 .commandBufferCount = nb_contexts,
338 ret = vk->AllocateCommandBuffers(
s->hwctx->act_dev, &cbuf_create,
340 if (
ret != VK_SUCCESS) {
349 VkQueryPoolCreateInfo query_pool_info = {
350 .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
351 .pNext = query_create_pnext,
352 .queryType = query_type,
353 .queryCount = nb_queries*nb_contexts,
355 ret = vk->CreateQueryPool(
s->hwctx->act_dev, &query_pool_info,
357 if (
ret != VK_SUCCESS) {
371 if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) {
376 if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
404 VkFenceCreateInfo fence_create = {
405 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
406 .flags = VK_FENCE_CREATE_SIGNALED_BIT,
415 ret = vk->CreateFence(
s->hwctx->act_dev, &fence_create,
s->hwctx->alloc,
417 if (
ret != VK_SUCCESS) {
457 VkQueryResultFlags qf = 0;
463 VK_QUERY_RESULT_64_BIT : 0x0;
465 VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
467 ret = vk->GetQueryPoolResults(
s->hwctx->act_dev, pool->
query_pool,
472 if (
ret != VK_SUCCESS)
477 res = (res64[
i] < res) || (res >= 0 && res64[
i] > res) ?
483 res = (res32[
i] < res) || (res >= 0 && res32[
i] > res) ?
499 uint32_t idx = pool->
idx++;
508 vk->WaitForFences(
s->hwctx->act_dev, 1, &e->
fence, VK_TRUE, UINT64_MAX);
519 VkCommandBufferBeginInfo cmd_start = {
520 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
521 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
525 vk->WaitForFences(
s->hwctx->act_dev, 1, &e->
fence, VK_TRUE, UINT64_MAX);
529 vk->ResetFences(
s->hwctx->act_dev, 1, &e->
fence);
535 ret = vk->BeginCommandBuffer(e->
buf, &cmd_start);
536 if (
ret != VK_SUCCESS) {
561 vkfc->unlock_frame(hwfc, vkf);
587 for (
int i = 0;
i < nb_deps;
i++) {
600 VkPipelineStageFlagBits2 wait_stage,
601 VkPipelineStageFlagBits2 signal_stage)
603 uint8_t *frame_locked;
604 uint8_t *frame_update;
606 VkImageLayout *layout_dst;
607 uint32_t *queue_family_dst;
608 VkAccessFlagBits *access_dst;
620 #define ARR_REALLOC(str, arr, alloc_s, cnt) \
622 arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \
624 ff_vk_exec_discard_deps(s, e); \
625 return AVERROR(ENOMEM); \
644 vkfc->lock_frame(hwfc, vkf);
649 for (
int i = 0;
i < nb_images;
i++) {
651 VkSemaphoreSubmitInfo *sem_sig;
652 uint64_t **sem_sig_val_dst;
659 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
660 .semaphore = vkf->sem[
i],
661 .value = vkf->sem_value[
i],
662 .stageMask = wait_stage,
666 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
667 .semaphore = vkf->sem[
i],
668 .value = vkf->sem_value[
i] + 1,
669 .stageMask = signal_stage,
680 VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar)
699 VkSemaphore *dst, uint64_t *dst_val,
702 uint64_t **sem_sig_val_dst;
728 VkCommandBufferSubmitInfo cmd_buf_info = (VkCommandBufferSubmitInfo) {
729 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
730 .commandBuffer = e->
buf,
732 VkSubmitInfo2 submit_info = (VkSubmitInfo2) {
733 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
734 .pCommandBufferInfos = &cmd_buf_info,
735 .commandBufferInfoCount = 1,
738 .pSignalSemaphoreInfos = e->
sem_sig,
742 ret = vk->EndCommandBuffer(e->
buf);
743 if (
ret != VK_SUCCESS) {
750 s->hwctx->lock_queue(
s->device, e->
qf, e->
qi);
752 s->hwctx->unlock_queue(
s->device, e->
qf, e->
qi);
754 if (
ret != VK_SUCCESS) {
774 for (
int i = 0;
i < nb_images;
i++) {
780 vkfc->unlock_frame(hwfc, vkf);
791 VkMemoryPropertyFlagBits req_flags,
void *alloc_extension,
792 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
798 VkMemoryAllocateInfo alloc_info = {
799 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
800 .pNext = alloc_extension,
804 if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
805 req->size =
FFALIGN(req->size,
s->props.properties.limits.minMemoryMapAlignment);
807 alloc_info.allocationSize = req->size;
811 for (
int i = 0;
i <
s->mprops.memoryTypeCount;
i++) {
813 if (!(req->memoryTypeBits & (1 <<
i)))
817 if ((req_flags != UINT32_MAX) &&
818 ((
s->mprops.memoryTypes[
i].propertyFlags & req_flags) != req_flags))
832 alloc_info.memoryTypeIndex =
index;
834 ret = vk->AllocateMemory(
s->hwctx->act_dev, &alloc_info,
835 s->hwctx->alloc, mem);
836 if (
ret != VK_SUCCESS) {
843 *mem_flags |=
s->mprops.memoryTypes[
index].propertyFlags;
849 void *pNext,
void *alloc_pNext,
850 VkBufferUsageFlags
usage, VkMemoryPropertyFlagBits
flags)
857 VkBufferCreateInfo buf_spawn = {
858 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
861 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
866 VkMemoryAllocateFlagsInfo alloc_flags = {
867 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
868 .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
870 VkBufferMemoryRequirementsInfo2 req_desc = {
871 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
873 VkMemoryDedicatedAllocateInfo ded_alloc = {
874 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
875 .pNext = alloc_pNext,
877 VkMemoryDedicatedRequirements ded_req = {
878 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
880 VkMemoryRequirements2 req = {
881 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
885 ret = vk->CreateBuffer(
s->hwctx->act_dev, &buf_spawn,
NULL, &buf->
buf);
886 if (
ret != VK_SUCCESS) {
892 req_desc.buffer = buf->
buf;
894 vk->GetBufferMemoryRequirements2(
s->hwctx->act_dev, &req_desc, &req);
897 use_ded_mem = ded_req.prefersDedicatedAllocation |
898 ded_req.requiresDedicatedAllocation;
900 ded_alloc.buffer = buf->
buf;
901 ded_alloc.pNext = alloc_pNext;
902 alloc_pNext = &ded_alloc;
905 if (
usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
906 alloc_flags.pNext = alloc_pNext;
907 alloc_pNext = &alloc_flags;
915 ret = vk->BindBufferMemory(
s->hwctx->act_dev, buf->
buf, buf->
mem, 0);
916 if (
ret != VK_SUCCESS) {
922 if (
usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
923 VkBufferDeviceAddressInfo address_info = {
924 .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
927 buf->
address = vk->GetBufferDeviceAddress(
s->hwctx->act_dev, &address_info);
944 void *pNext,
void *alloc_pNext,
945 VkBufferUsageFlags
usage, VkMemoryPropertyFlagBits
flags)
971 int nb_buffers,
int invalidate)
975 VkMappedMemoryRange inval_list[64];
978 for (
int i = 0;
i < nb_buffers;
i++) {
980 ret = vk->MapMemory(
s->hwctx->act_dev, buf[
i]->
mem, 0,
981 VK_WHOLE_SIZE, 0, &dst);
982 if (
ret != VK_SUCCESS) {
993 for (
int i = 0;
i < nb_buffers;
i++) {
994 const VkMappedMemoryRange ival_buf = {
995 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
996 .memory = buf[
i]->
mem,
997 .size = VK_WHOLE_SIZE,
999 if (buf[
i]->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1001 inval_list[inval_count++] = ival_buf;
1005 ret = vk->InvalidateMappedMemoryRanges(
s->hwctx->act_dev, inval_count,
1007 if (
ret != VK_SUCCESS) {
1023 VkMappedMemoryRange flush_list[64];
1024 int flush_count = 0;
1027 for (
int i = 0;
i < nb_buffers;
i++) {
1028 const VkMappedMemoryRange flush_buf = {
1029 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
1030 .memory = buf[
i]->
mem,
1031 .size = VK_WHOLE_SIZE,
1033 if (buf[
i]->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1035 flush_list[flush_count++] = flush_buf;
1040 ret = vk->FlushMappedMemoryRanges(
s->hwctx->act_dev, flush_count,
1042 if (
ret != VK_SUCCESS) {
1049 for (
int i = 0;
i < nb_buffers;
i++)
1050 vk->UnmapMemory(
s->hwctx->act_dev, buf[
i]->
mem);
1059 if (!buf || !
s->hwctx)
1064 if (buf->
buf != VK_NULL_HANDLE)
1065 vk->DestroyBuffer(
s->hwctx->act_dev, buf->
buf,
s->hwctx->alloc);
1066 if (buf->
mem != VK_NULL_HANDLE)
1067 vk->FreeMemory(
s->hwctx->act_dev, buf->
mem,
s->hwctx->alloc);
1093 void *create_pNext,
size_t size,
1094 VkMemoryPropertyFlagBits mem_props)
1112 data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
1113 data->access = VK_ACCESS_2_NONE;
1132 if (mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
1144 VkShaderStageFlagBits stage)
1146 VkPushConstantRange *pc;
1154 memset(pc, 0,
sizeof(*pc));
1156 pc->stageFlags = stage;
1164 int unnorm_coords, VkFilter
filt)
1169 VkSamplerCreateInfo sampler_info = {
1170 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
1172 .minFilter = sampler_info.magFilter,
1173 .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
1174 VK_SAMPLER_MIPMAP_MODE_LINEAR,
1175 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
1176 .addressModeV = sampler_info.addressModeU,
1177 .addressModeW = sampler_info.addressModeU,
1178 .anisotropyEnable = VK_FALSE,
1179 .compareOp = VK_COMPARE_OP_NEVER,
1180 .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
1181 .unnormalizedCoordinates = unnorm_coords,
1184 ret = vk->CreateSampler(
s->hwctx->act_dev, &sampler_info,
1185 s->hwctx->alloc, sampler);
1186 if (
ret != VK_SUCCESS) {
1210 const int high =
desc->comp[0].depth > 8;
1211 return high ?
"rgba16f" :
"rgba8";
1226 vk->DestroyImageView(
s->hwctx->act_dev, iv->
views[
i],
s->hwctx->alloc);
1249 for (
int i = 0;
i < nb_planes;
i++) {
1250 VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
1251 VK_IMAGE_ASPECT_PLANE_0_BIT,
1252 VK_IMAGE_ASPECT_PLANE_1_BIT,
1253 VK_IMAGE_ASPECT_PLANE_2_BIT, };
1255 VkImageViewCreateInfo view_create_info = {
1256 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1258 .image = vkf->
img[
FFMIN(
i, nb_images - 1)],
1259 .viewType = VK_IMAGE_VIEW_TYPE_2D,
1260 .format = rep_fmts[
i],
1262 .subresourceRange = {
1263 .aspectMask = plane_aspect[(nb_planes != nb_images) +
1264 i*(nb_planes != nb_images)],
1270 ret = vk->CreateImageView(
s->hwctx->act_dev, &view_create_info,
1271 s->hwctx->alloc, &iv->
views[
i]);
1272 if (
ret != VK_SUCCESS) {
1293 memcpy(views, iv->views, nb_planes*
sizeof(*views));
1298 for (
int i = 0;
i < iv->nb_views;
i++)
1299 vk->DestroyImageView(
s->hwctx->act_dev, iv->views[
i],
s->hwctx->alloc);
1305 AVFrame *pic, VkImageMemoryBarrier2 *bar,
int *nb_bar,
1306 VkPipelineStageFlags src_stage,
1307 VkPipelineStageFlags dst_stage,
1308 VkAccessFlagBits new_access,
1309 VkImageLayout new_layout,
1320 for (
int i = 0;
i < nb_images;
i++) {
1321 bar[*nb_bar] = (VkImageMemoryBarrier2) {
1322 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
1324 .srcStageMask = src_stage,
1325 .dstStageMask = dst_stage,
1327 .dstAccessMask = new_access,
1329 .newLayout = new_layout,
1331 .dstQueueFamilyIndex = new_qf,
1332 .image = vkf->
img[
i],
1333 .subresourceRange = (VkImageSubresourceRange) {
1334 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1346 VkShaderStageFlags stage, uint32_t required_subgroup_size)
1350 shd->
shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
1351 shd->
shader.stage = stage;
1353 if (required_subgroup_size) {
1354 shd->
shader.flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT;
1356 shd->
subgroup_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO;
1357 shd->
subgroup_info.requiredSubgroupSize = required_subgroup_size;
1363 GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
1365 GLSLC(0, #extension GL_EXT_buffer_reference : require );
1366 GLSLC(0, #extension GL_EXT_buffer_reference2 : require );
1378 "local_size_y = %i, local_size_z = %i) in;\n\n",
1385 const char *p = shd->
src.str;
1386 const char *start = p;
1387 const size_t len = strlen(p);
1392 for (
int i = 0;
i <
len;
i++) {
1410 vk->DestroyShaderModule(
s->hwctx->act_dev, shd->
shader.module,
s->hwctx->alloc);
1414 uint8_t *spirv,
size_t spirv_size,
const char *entrypoint)
1418 VkShaderModuleCreateInfo shader_create;
1420 shd->
shader.pName = entrypoint;
1423 shd->
name, spirv_size);
1425 shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
1426 shader_create.pNext =
NULL;
1427 shader_create.codeSize = spirv_size;
1428 shader_create.flags = 0;
1429 shader_create.pCode = (
void *)spirv;
1431 ret = vk->CreateShaderModule(
s->hwctx->act_dev, &shader_create,
NULL,
1433 if (
ret != VK_SUCCESS) {
1450 [VK_DESCRIPTOR_TYPE_SAMPLER] = {
sizeof(VkDescriptorImageInfo),
"sampler", 1, 0, 0, 0, },
1451 [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = {
sizeof(VkDescriptorImageInfo),
"texture", 1, 0, 1, 0, },
1452 [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = {
sizeof(VkDescriptorImageInfo),
"image", 1, 1, 1, 0, },
1453 [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = {
sizeof(VkDescriptorImageInfo),
"subpassInput", 1, 0, 0, 0, },
1454 [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = {
sizeof(VkDescriptorImageInfo),
"sampler", 1, 0, 1, 0, },
1455 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = {
sizeof(VkDescriptorBufferInfo),
NULL, 1, 0, 0, 1, },
1456 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = {
sizeof(VkDescriptorBufferInfo),
"buffer", 0, 1, 0, 1, },
1457 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = {
sizeof(VkDescriptorBufferInfo),
NULL, 1, 0, 0, 1, },
1458 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = {
sizeof(VkDescriptorBufferInfo),
"buffer", 0, 1, 0, 1, },
1459 [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = {
sizeof(VkBufferView),
"samplerBuffer", 1, 0, 0, 0, },
1460 [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = {
sizeof(VkBufferView),
"imageBuffer", 1, 0, 0, 0, },
1466 int read_only,
int print_to_shader_only)
1469 int has_sampler = 0;
1472 VkDescriptorSetLayoutCreateInfo desc_create_layout;
1474 if (print_to_shader_only)
1484 memset(
set, 0,
sizeof(*
set));
1491 if (!
set->binding_offset) {
1496 desc_create_layout = (VkDescriptorSetLayoutCreateInfo) {
1497 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1499 .pBindings =
set->binding,
1500 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
1503 for (
int i = 0;
i < nb;
i++) {
1504 set->binding[
i].binding =
i;
1505 set->binding[
i].descriptorType =
desc[
i].type;
1507 set->binding[
i].stageFlags =
desc[
i].stages;
1508 set->binding[
i].pImmutableSamplers =
desc[
i].samplers;
1510 if (
desc[
i].
type == VK_DESCRIPTOR_TYPE_SAMPLER ||
1511 desc[
i].
type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
1515 set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
1516 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1518 set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT;
1520 ret = vk->CreateDescriptorSetLayout(
s->hwctx->act_dev, &desc_create_layout,
1521 s->hwctx->alloc, &
set->layout);
1522 if (
ret != VK_SUCCESS) {
1528 vk->GetDescriptorSetLayoutSizeEXT(
s->hwctx->act_dev,
set->layout, &
set->layout_size);
1530 set->aligned_size =
FFALIGN(
set->layout_size,
s->desc_buf_props.descriptorBufferOffsetAlignment);
1532 for (
int i = 0;
i < nb;
i++)
1533 vk->GetDescriptorSetLayoutBindingOffsetEXT(
s->hwctx->act_dev,
set->layout,
1534 i, &
set->binding_offset[
i]);
1536 set->read_only = read_only;
1537 set->nb_bindings = nb;
1542 for (
int i = 0;
i < nb;
i++) {
1546 if (
desc[
i].mem_layout)
1566 else if (
desc[
i].elems > 0)
1597 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1598 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
1606 pl->
desc_bind[
i] = (VkDescriptorBufferBindingInfoEXT) {
1607 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT,
1608 .usage =
set->usage,
1609 .address =
set->buf.address,
1620 int bind_idx,
int array_idx,
1621 VkDescriptorGetInfoEXT *desc_get_info,
1625 const size_t exec_offset =
set->read_only ? 0 :
set->aligned_size*e->
idx;
1628 set->binding_offset[bind_idx] +
1629 array_idx*desc_size;
1631 vk->GetDescriptorEXT(
s->hwctx->act_dev, desc_get_info, desc_size,
desc);
1639 VkDescriptorGetInfoEXT desc_get_info = {
1640 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
1641 .type = desc_set->
binding[bind].descriptorType,
1644 switch (desc_get_info.type) {
1645 case VK_DESCRIPTOR_TYPE_SAMPLER:
1646 desc_get_info.data.pSampler = sampler;
1650 set, bind, desc_get_info.type);
1656 s->desc_buf_props.samplerDescriptorSize);
1663 VkImageView view, VkImageLayout
layout, VkSampler sampler)
1666 VkDescriptorGetInfoEXT desc_get_info = {
1667 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
1668 .type = desc_set->
binding[bind].descriptorType,
1670 VkDescriptorImageInfo desc_img_info = {
1677 switch (desc_get_info.type) {
1678 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1679 desc_get_info.
data.pSampledImage = &desc_img_info;
1680 desc_size =
s->desc_buf_props.sampledImageDescriptorSize;
1682 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1683 desc_get_info.data.pStorageImage = &desc_img_info;
1684 desc_size =
s->desc_buf_props.storageImageDescriptorSize;
1686 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
1687 desc_get_info.data.pInputAttachmentImage = &desc_img_info;
1688 desc_size =
s->desc_buf_props.inputAttachmentDescriptorSize;
1690 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1691 desc_get_info.data.pCombinedImageSampler = &desc_img_info;
1692 desc_size =
s->desc_buf_props.combinedImageSamplerDescriptorSize;
1696 set, bind, desc_get_info.type);
1708 VkDeviceAddress addr, VkDeviceSize
len,
VkFormat fmt)
1711 VkDescriptorGetInfoEXT desc_get_info = {
1712 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
1713 .type = desc_set->
binding[bind].descriptorType,
1715 VkDescriptorAddressInfoEXT desc_buf_info = {
1722 switch (desc_get_info.type) {
1723 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1724 desc_get_info.data.pUniformBuffer = &desc_buf_info;
1725 desc_size =
s->desc_buf_props.uniformBufferDescriptorSize;
1727 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1728 desc_get_info.data.pStorageBuffer = &desc_buf_info;
1729 desc_size =
s->desc_buf_props.storageBufferDescriptorSize;
1731 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1732 desc_get_info.data.pUniformTexelBuffer = &desc_buf_info;
1733 desc_size =
s->desc_buf_props.uniformTexelBufferDescriptorSize;
1735 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1736 desc_get_info.data.pStorageTexelBuffer = &desc_buf_info;
1737 desc_size =
s->desc_buf_props.storageTexelBufferDescriptorSize;
1741 set, bind, desc_get_info.type);
1753 VkImageView *views,
int set,
int binding,
1754 VkImageLayout
layout, VkSampler sampler)
1759 for (
int i = 0;
i < nb_planes;
i++)
1766 VkShaderStageFlagBits stage,
1778 VkPipelineLayoutCreateInfo pipeline_layout_info;
1781 sizeof(desc_layouts));
1789 pipeline_layout_info = (VkPipelineLayoutCreateInfo) {
1790 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1791 .pSetLayouts = desc_layouts,
1797 ret = vk->CreatePipelineLayout(
s->hwctx->act_dev, &pipeline_layout_info,
1800 if (
ret != VK_SUCCESS) {
1816 VkComputePipelineCreateInfo pipeline_create_info;
1822 pipeline_create_info = (VkComputePipelineCreateInfo) {
1823 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1824 .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
1829 ret = vk->CreateComputePipelines(
s->hwctx->act_dev, VK_NULL_HANDLE, 1,
1830 &pipeline_create_info,
1832 if (
ret != VK_SUCCESS) {
1838 pl->
bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
1873 vk->DestroyPipeline(
s->hwctx->act_dev, pl->
pipeline,
s->hwctx->alloc);
1884 vk->DestroyDescriptorSetLayout(
s->hwctx->act_dev,
set->layout,