19 #define VK_NO_PROTOTYPES
20 #define VK_ENABLE_BETA_EXTENSIONS
24 #include <versionhelpers.h>
46 #include <drm_fourcc.h>
49 #include <va/va_drmcommon.h>
57 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
85 VkPhysicalDeviceProperties2
props;
86 VkPhysicalDeviceMemoryProperties
mprops;
87 VkPhysicalDeviceExternalMemoryHostPropertiesEXT
hprops;
144 #define ADD_VAL_TO_LIST(list, count, val) \
146 list = av_realloc_array(list, sizeof(*list), ++count); \
148 err = AVERROR(ENOMEM); \
151 list[count - 1] = av_strdup(val); \
152 if (!list[count - 1]) { \
153 err = AVERROR(ENOMEM); \
158 #define RELEASE_PROPS(props, count) \
160 for (int i = 0; i < count; i++) \
161 av_free((void *)((props)[i])); \
162 av_free((void *)props); \
165 static const struct {
183 {
AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
188 {
AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
193 {
AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
198 {
AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
199 {
AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
201 {
AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
203 {
AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
204 {
AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
205 {
AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
206 {
AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
208 {
AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
209 {
AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
210 {
AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
211 {
AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
232 {
AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
233 {
AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
234 {
AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
235 {
AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
248 const VkBaseInStructure *in = chain;
250 if (in->sType == stype)
261 VkBaseOutStructure *
out = chain;
284 VkFormatFeatureFlags
flags;
285 VkFormatProperties2 prop = {
286 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
288 vk->GetPhysicalDeviceFormatProperties2(hwctx->
phys_dev, fmt[
i], &prop);
289 flags =
linear ? prop.formatProperties.linearTilingFeatures :
290 prop.formatProperties.optimalTilingFeatures;
303 static const char *lib_names[] = {
306 #elif defined(__APPLE__)
317 p->
libvulkan = dlopen(lib_names[
i], RTLD_NOW | RTLD_LOCAL);
354 { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
355 { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
370 #define CASE(VAL) case VAL: return #VAL
376 CASE(VK_EVENT_RESET);
378 CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
379 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
380 CASE(VK_ERROR_INITIALIZATION_FAILED);
381 CASE(VK_ERROR_DEVICE_LOST);
382 CASE(VK_ERROR_MEMORY_MAP_FAILED);
383 CASE(VK_ERROR_LAYER_NOT_PRESENT);
384 CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
385 CASE(VK_ERROR_FEATURE_NOT_PRESENT);
386 CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
387 CASE(VK_ERROR_TOO_MANY_OBJECTS);
388 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
389 CASE(VK_ERROR_FRAGMENTED_POOL);
390 CASE(VK_ERROR_SURFACE_LOST_KHR);
391 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
392 CASE(VK_SUBOPTIMAL_KHR);
393 CASE(VK_ERROR_OUT_OF_DATE_KHR);
394 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
395 CASE(VK_ERROR_VALIDATION_FAILED_EXT);
396 CASE(VK_ERROR_INVALID_SHADER_NV);
397 CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
398 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
399 CASE(VK_ERROR_NOT_PERMITTED_EXT);
400 CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
401 CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
402 CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
403 default:
return "Unknown error";
409 VkDebugUtilsMessageTypeFlagsEXT messageType,
410 const VkDebugUtilsMessengerCallbackDataEXT *
data,
417 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l =
AV_LOG_VERBOSE;
break;
418 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l =
AV_LOG_INFO;
break;
419 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l =
AV_LOG_WARNING;
break;
420 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l =
AV_LOG_ERROR;
break;
425 for (
int i = 0;
i <
data->cmdBufLabelCount;
i++)
432 const char *
const **dst, uint32_t *num,
int debug)
435 const char **extension_names =
NULL;
439 int err = 0, found, extensions_found = 0;
442 int optional_exts_num;
443 uint32_t sup_ext_count;
444 char *user_exts_str =
NULL;
446 VkExtensionProperties *sup_ext;
456 if (!user_exts_str) {
461 vk->EnumerateInstanceExtensionProperties(
NULL, &sup_ext_count,
NULL);
462 sup_ext =
av_malloc_array(sup_ext_count,
sizeof(VkExtensionProperties));
465 vk->EnumerateInstanceExtensionProperties(
NULL, &sup_ext_count, sup_ext);
473 if (!user_exts_str) {
478 vk->EnumerateDeviceExtensionProperties(hwctx->
phys_dev,
NULL,
479 &sup_ext_count,
NULL);
480 sup_ext =
av_malloc_array(sup_ext_count,
sizeof(VkExtensionProperties));
483 vk->EnumerateDeviceExtensionProperties(hwctx->
phys_dev,
NULL,
484 &sup_ext_count, sup_ext);
487 for (
int i = 0;
i < optional_exts_num;
i++) {
488 tstr = optional_exts[
i].
name;
490 for (
int j = 0; j < sup_ext_count; j++) {
491 if (!strcmp(tstr, sup_ext[j].extensionName)) {
505 tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
507 for (
int j = 0; j < sup_ext_count; j++) {
508 if (!strcmp(tstr, sup_ext[j].extensionName)) {
526 char *save, *token =
av_strtok(user_exts_str,
"+", &save);
529 for (
int j = 0; j < sup_ext_count; j++) {
530 if (!strcmp(token, sup_ext[j].extensionName)) {
546 *dst = extension_names;
547 *num = extensions_found;
561 const char *
const **dst, uint32_t *num,
564 static const char default_layer[] = {
"VK_LAYER_KHRONOS_validation" };
566 int found = 0, err = 0;
570 uint32_t sup_layer_count;
571 VkLayerProperties *sup_layers;
574 char *user_layers_str =
NULL;
577 const char **enabled_layers =
NULL;
578 uint32_t enabled_layers_count = 0;
581 int debug = debug_opt && strtol(debug_opt->
value,
NULL, 10);
584 if (debug_opt && !debug)
587 vk->EnumerateInstanceLayerProperties(&sup_layer_count,
NULL);
588 sup_layers =
av_malloc_array(sup_layer_count,
sizeof(VkLayerProperties));
591 vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers);
594 for (
int i = 0;
i < sup_layer_count;
i++)
600 for (
int i = 0;
i < sup_layer_count;
i++) {
601 if (!strcmp(default_layer, sup_layers[
i].layerName)) {
616 if (!user_layers_str) {
621 token =
av_strtok(user_layers_str,
"+", &save);
624 if (!strcmp(default_layer, token)) {
634 for (
int j = 0; j < sup_layer_count; j++) {
635 if (!strcmp(token, sup_layers[j].layerName)) {
645 "Validation Layer \"%s\" not support.\n", token);
657 *dst = enabled_layers;
658 *num = enabled_layers_count;
672 int err = 0, debug_mode = 0;
677 VkApplicationInfo application_info = {
678 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
679 .pEngineName =
"libavutil",
680 .apiVersion = VK_API_VERSION_1_2,
685 VkInstanceCreateInfo inst_props = {
686 .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
687 .pApplicationInfo = &application_info,
703 &inst_props.enabledLayerCount, &debug_mode);
709 &inst_props.enabledExtensionCount, debug_mode);
716 ret = vk->CreateInstance(&inst_props, hwctx->
alloc, &hwctx->
inst);
719 if (
ret != VK_SUCCESS) {
733 VkDebugUtilsMessengerCreateInfoEXT dbg = {
734 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
735 .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
736 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
737 VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
738 VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
739 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
740 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
741 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
746 vk->CreateDebugUtilsMessengerEXT(hwctx->
inst, &dbg,
753 RELEASE_PROPS(inst_props.ppEnabledLayerNames, inst_props.enabledLayerCount);
769 case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU:
return "integrated";
770 case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:
return "discrete";
771 case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:
return "virtual";
772 case VK_PHYSICAL_DEVICE_TYPE_CPU:
return "software";
773 default:
return "unknown";
780 int err = 0, choice = -1;
785 VkPhysicalDevice *devices =
NULL;
786 VkPhysicalDeviceIDProperties *idp =
NULL;
787 VkPhysicalDeviceProperties2 *prop =
NULL;
790 ret = vk->EnumeratePhysicalDevices(hwctx->
inst, &num,
NULL);
791 if (
ret != VK_SUCCESS || !num) {
800 ret = vk->EnumeratePhysicalDevices(hwctx->
inst, &num, devices);
801 if (
ret != VK_SUCCESS) {
821 for (
int i = 0;
i < num;
i++) {
822 idp[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
823 prop[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
824 prop[
i].pNext = &idp[
i];
826 vk->GetPhysicalDeviceProperties2(devices[
i], &prop[
i]);
828 prop[
i].properties.deviceName,
830 prop[
i].properties.deviceID);
834 for (
int i = 0;
i < num;
i++) {
835 if (!strncmp(idp[
i].deviceUUID, select->
uuid, VK_UUID_SIZE)) {
843 }
else if (select->
name) {
845 for (
int i = 0;
i < num;
i++) {
846 if (strstr(prop[
i].properties.deviceName, select->
name)) {
857 for (
int i = 0;
i < num;
i++) {
858 if (select->
pci_device == prop[
i].properties.deviceID) {
869 for (
int i = 0;
i < num;
i++) {
870 if (select->
vendor_id == prop[
i].properties.vendorID) {
880 if (select->
index < num) {
881 choice = select->
index;
893 choice, prop[choice].properties.deviceName,
895 prop[choice].properties.deviceID);
908 VkQueueFlagBits
flags)
911 uint32_t min_score = UINT32_MAX;
913 for (
int i = 0;
i < num_qf;
i++) {
914 const VkQueueFlagBits qflags = qf[
i].queueFlags;
915 if (qflags &
flags) {
916 uint32_t score =
av_popcount(qflags) + qf[
i].timestampValidBits;
917 if (score < min_score) {
925 qf[
index].timestampValidBits++;
934 VkQueueFamilyProperties *qf =
NULL;
938 int graph_index, comp_index, tx_index, enc_index, dec_index;
941 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &num,
NULL);
953 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &num, qf);
956 for (
int i = 0;
i < num;
i++) {
958 ((qf[
i].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ?
" graphics" :
"",
959 ((qf[
i].queueFlags) & VK_QUEUE_COMPUTE_BIT) ?
" compute" :
"",
960 ((qf[
i].queueFlags) & VK_QUEUE_TRANSFER_BIT) ?
" transfer" :
"",
961 ((qf[
i].queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ?
" encode" :
"",
962 ((qf[
i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ?
" decode" :
"",
963 ((qf[
i].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ?
" sparse" :
"",
964 ((qf[
i].queueFlags) & VK_QUEUE_PROTECTED_BIT) ?
" protected" :
"",
969 qf[
i].timestampValidBits = 0;
992 #define SETUP_QUEUE(qf_idx) \
995 int qc = qf[fidx].queueCount; \
996 VkDeviceQueueCreateInfo *pc; \
998 if (fidx == graph_index) { \
999 hwctx->queue_family_index = fidx; \
1000 hwctx->nb_graphics_queues = qc; \
1003 if (fidx == comp_index) { \
1004 hwctx->queue_family_comp_index = fidx; \
1005 hwctx->nb_comp_queues = qc; \
1008 if (fidx == tx_index) { \
1009 hwctx->queue_family_tx_index = fidx; \
1010 hwctx->nb_tx_queues = qc; \
1013 if (fidx == enc_index) { \
1014 hwctx->queue_family_encode_index = fidx; \
1015 hwctx->nb_encode_queues = qc; \
1018 if (fidx == dec_index) { \
1019 hwctx->queue_family_decode_index = fidx; \
1020 hwctx->nb_decode_queues = qc; \
1024 pc = av_realloc((void *)cd->pQueueCreateInfos, \
1025 sizeof(*pc) * (cd->queueCreateInfoCount + 1)); \
1028 return AVERROR(ENOMEM); \
1030 cd->pQueueCreateInfos = pc; \
1031 pc = &pc[cd->queueCreateInfoCount]; \
1033 weights = av_malloc(qc * sizeof(float)); \
1036 return AVERROR(ENOMEM); \
1039 memset(pc, 0, sizeof(*pc)); \
1040 pc->sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; \
1041 pc->queueFamilyIndex = fidx; \
1042 pc->queueCount = qc; \
1043 pc->pQueuePriorities = weights; \
1045 for (int i = 0; i < qc; i++) \
1046 weights[i] = 1.0f / qc; \
1048 cd->queueCreateInfoCount++; \
1065 int queue_family_index,
int num_queues)
1073 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
1074 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1075 .queueFamilyIndex = queue_family_index,
1077 VkCommandBufferAllocateInfo cbuf_create = {
1078 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
1079 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
1080 .commandBufferCount = num_queues,
1088 if (
ret != VK_SUCCESS) {
1098 cbuf_create.commandPool = cmd->
pool;
1101 ret = vk->AllocateCommandBuffers(hwctx->
act_dev, &cbuf_create, cmd->
bufs);
1102 if (
ret != VK_SUCCESS) {
1113 for (
int i = 0;
i < num_queues;
i++) {
1115 vk->GetDeviceQueue(hwctx->
act_dev, queue_family_index,
i, &q->
queue);
1134 vk->WaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
1181 VkCommandBufferBeginInfo cmd_start = {
1182 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
1183 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
1188 VkFenceCreateInfo fence_spawn = {
1189 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
1193 if (
ret != VK_SUCCESS) {
1199 vk->WaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
1207 if (
ret != VK_SUCCESS) {
1222 if (!deps || !nb_deps)
1232 for (
int i = 0;
i < nb_deps;
i++) {
1247 VkSubmitInfo *s_info,
AVVkFrame *
f,
int synchronous)
1255 if (
ret != VK_SUCCESS) {
1263 s_info->commandBufferCount = 1;
1266 if (
ret != VK_SUCCESS) {
1274 for (
int i = 0;
i < s_info->signalSemaphoreCount;
i++)
1281 vk->WaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
1301 vk->DestroyDebugUtilsMessengerEXT(hwctx->
inst, p->
debug_ctx,
1305 vk->DestroyInstance(hwctx->
inst, hwctx->
alloc);
1330 VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
1331 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
1333 VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
1334 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1335 .pNext = &timeline_features,
1337 VkPhysicalDeviceVulkan11Features dev_features_1_1 = {
1338 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1339 .pNext = &dev_features_1_2,
1341 VkPhysicalDeviceFeatures2 dev_features = {
1342 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
1343 .pNext = &dev_features_1_1,
1346 VkDeviceCreateInfo dev_info = {
1347 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
1351 hwctx->
device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1366 vk->GetPhysicalDeviceFeatures2(hwctx->
phys_dev, &dev_features);
1369 #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.features.NAME;
1379 if (!timeline_features.timelineSemaphore) {
1391 &dev_info.enabledExtensionCount, 0))) {
1392 for (
int i = 0;
i < dev_info.queueCreateInfoCount;
i++)
1393 av_free((
void *)dev_info.pQueueCreateInfos[
i].pQueuePriorities);
1394 av_free((
void *)dev_info.pQueueCreateInfos);
1401 for (
int i = 0;
i < dev_info.queueCreateInfoCount;
i++)
1402 av_free((
void *)dev_info.pQueueCreateInfos[
i].pQueuePriorities);
1403 av_free((
void *)dev_info.pQueueCreateInfos);
1405 if (
ret != VK_SUCCESS) {
1408 for (
int i = 0;
i < dev_info.enabledExtensionCount;
i++)
1409 av_free((
void *)dev_info.ppEnabledExtensionNames[
i]);
1410 av_free((
void *)dev_info.ppEnabledExtensionNames);
1440 int graph_index, comp_index, tx_index, enc_index, dec_index;
1459 p->
props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1461 p->
hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
1463 vk->GetPhysicalDeviceProperties2(hwctx->
phys_dev, &p->
props);
1465 p->
props.properties.deviceName);
1468 p->
props.properties.limits.optimalBufferCopyRowPitchAlignment);
1470 p->
props.properties.limits.minMemoryMapAlignment);
1473 p->
hprops.minImportedHostPointerAlignment);
1478 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &queue_num,
NULL);
1490 #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \
1492 if (ctx_qf < 0 && required) { \
1493 av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \
1494 " in the context!\n", type); \
1495 return AVERROR(EINVAL); \
1496 } else if (fidx < 0 || ctx_qf < 0) { \
1498 } else if (ctx_qf >= queue_num) { \
1499 av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
1500 type, ctx_qf, queue_num); \
1501 return AVERROR(EINVAL); \
1504 av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \
1505 " for%s%s%s%s%s\n", \
1507 ctx_qf == graph_index ? " graphics" : "", \
1508 ctx_qf == comp_index ? " compute" : "", \
1509 ctx_qf == tx_index ? " transfers" : "", \
1510 ctx_qf == enc_index ? " encode" : "", \
1511 ctx_qf == dec_index ? " decode" : ""); \
1512 graph_index = (ctx_qf == graph_index) ? -1 : graph_index; \
1513 comp_index = (ctx_qf == comp_index) ? -1 : comp_index; \
1514 tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
1515 enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
1516 dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
1517 p->qfs[p->num_qfs++] = ctx_qf; \
1529 vk->GetPhysicalDeviceMemoryProperties(hwctx->
phys_dev, &p->
mprops);
1538 if (device && device[0]) {
1540 dev_select.
index = strtol(device, &end, 10);
1541 if (end == device) {
1542 dev_select.
index = 0;
1543 dev_select.
name = device;
1559 switch(src_ctx->
type) {
1565 const char *vendor = vaQueryVendorString(src_hwctx->
display);
1571 if (strstr(vendor,
"Intel"))
1572 dev_select.vendor_id = 0x8086;
1573 if (strstr(vendor,
"AMD"))
1574 dev_select.vendor_id = 0x1002;
1582 drmDevice *drm_dev_info;
1583 int err = drmGetDevice(src_hwctx->
fd, &drm_dev_info);
1589 if (drm_dev_info->bustype == DRM_BUS_PCI)
1590 dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
1592 drmFreeDevice(&drm_dev_info);
1602 CudaFunctions *cu = cu_internal->
cuda_dl;
1604 int ret =
CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
1611 dev_select.has_uuid = 1;
1622 const void *hwconfig,
1654 constraints->
max_width = p->
props.properties.limits.maxImageDimension2D;
1655 constraints->
max_height = p->
props.properties.limits.maxImageDimension2D;
1668 VkMemoryPropertyFlagBits req_flags,
const void *alloc_extension,
1669 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
1676 VkMemoryAllocateInfo alloc_info = {
1677 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1678 .pNext = alloc_extension,
1679 .allocationSize = req->size,
1684 for (
int i = 0;
i < p->
mprops.memoryTypeCount;
i++) {
1685 const VkMemoryType *
type = &p->
mprops.memoryTypes[
i];
1688 if (!(req->memoryTypeBits & (1 <<
i)))
1692 if ((
type->propertyFlags & req_flags) != req_flags)
1696 if (req->size > p->
mprops.memoryHeaps[
type->heapIndex].size)
1710 alloc_info.memoryTypeIndex =
index;
1712 ret = vk->AllocateMemory(dev_hwctx->
act_dev, &alloc_info,
1713 dev_hwctx->
alloc, mem);
1714 if (
ret != VK_SUCCESS) {
1720 *mem_flags |= p->
mprops.memoryTypes[
index].propertyFlags;
1733 if (internal->cuda_fc_ref) {
1739 CudaFunctions *cu = cu_internal->
cuda_dl;
1742 if (internal->cu_sem[
i])
1743 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[
i]));
1744 if (internal->cu_mma[
i])
1745 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[
i]));
1746 if (internal->ext_mem[
i])
1747 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[
i]));
1749 if (internal->ext_sem_handle[
i])
1750 CloseHandle(internal->ext_sem_handle[
i]);
1751 if (internal->ext_mem_handle[
i])
1752 CloseHandle(internal->ext_mem_handle[
i]);
1774 vk->DeviceWaitIdle(hwctx->
act_dev);
1788 void *alloc_pnext,
size_t alloc_pnext_stride)
1799 VkMemoryRequirements cont_memory_requirements = { 0 };
1801 int cont_mem_size = 0;
1807 VkImageMemoryRequirementsInfo2 req_desc = {
1808 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1811 VkMemoryDedicatedAllocateInfo ded_alloc = {
1812 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
1813 .pNext = (
void *)(((uint8_t *)alloc_pnext) +
i*alloc_pnext_stride),
1815 VkMemoryDedicatedRequirements ded_req = {
1816 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
1818 VkMemoryRequirements2 req = {
1819 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1823 vk->GetImageMemoryRequirements2(hwctx->
act_dev, &req_desc, &req);
1825 if (
f->tiling == VK_IMAGE_TILING_LINEAR)
1826 req.memoryRequirements.size =
FFALIGN(req.memoryRequirements.size,
1827 p->
props.properties.limits.minMemoryMapAlignment);
1830 if (ded_req.requiresDedicatedAllocation) {
1832 "device requires dedicated image allocation!\n");
1835 cont_memory_requirements = req.memoryRequirements;
1836 }
else if (cont_memory_requirements.memoryTypeBits !=
1837 req.memoryRequirements.memoryTypeBits) {
1839 "and %i, cannot allocate in a single region!\n",
1844 cont_mem_size_list[
i] =
FFALIGN(req.memoryRequirements.size,
1845 req.memoryRequirements.alignment);
1846 cont_mem_size += cont_mem_size_list[
i];
1851 use_ded_mem = ded_req.prefersDedicatedAllocation |
1852 ded_req.requiresDedicatedAllocation;
1854 ded_alloc.image =
f->img[
i];
1858 f->tiling == VK_IMAGE_TILING_LINEAR ?
1859 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1860 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1861 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
1862 &
f->flags, &
f->mem[
i])))
1865 f->size[
i] = req.memoryRequirements.size;
1866 bind_info[
i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1867 bind_info[
i].image =
f->img[
i];
1868 bind_info[
i].memory =
f->mem[
i];
1872 cont_memory_requirements.size = cont_mem_size;
1876 f->tiling == VK_IMAGE_TILING_LINEAR ?
1877 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1878 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1879 (
void *)(((uint8_t *)alloc_pnext)),
1880 &
f->flags, &
f->mem[0])))
1883 f->size[0] = cont_memory_requirements.size;
1886 bind_info[
i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1887 bind_info[
i].image =
f->img[
i];
1888 bind_info[
i].memory =
f->mem[0];
1889 bind_info[
i].memoryOffset =
offset;
1891 f->offset[
i] = bind_info[
i].memoryOffset;
1892 offset += cont_mem_size_list[
i];
1898 if (
ret != VK_SUCCESS) {
1917 uint32_t src_qf, dst_qf;
1918 VkImageLayout new_layout;
1919 VkAccessFlags new_access;
1927 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
1928 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
1929 .pSignalSemaphoreValues = sem_sig_val,
1930 .signalSemaphoreValueCount =
planes,
1933 VkSubmitInfo s_info = {
1934 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1935 .pNext = &s_timeline_sem_info,
1936 .pSignalSemaphores =
frame->sem,
1937 .signalSemaphoreCount =
planes,
1942 wait_st[
i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1943 sem_sig_val[
i] =
frame->sem_value[
i] + 1;
1948 new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1949 new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1950 src_qf = VK_QUEUE_FAMILY_IGNORED;
1951 dst_qf = VK_QUEUE_FAMILY_IGNORED;
1954 new_layout = VK_IMAGE_LAYOUT_GENERAL;
1955 new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1956 src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1957 dst_qf = VK_QUEUE_FAMILY_IGNORED;
1958 s_timeline_sem_info.pWaitSemaphoreValues =
frame->sem_value;
1959 s_timeline_sem_info.waitSemaphoreValueCount =
planes;
1960 s_info.pWaitSemaphores =
frame->sem;
1961 s_info.pWaitDstStageMask = wait_st;
1962 s_info.waitSemaphoreCount =
planes;
1965 new_layout = VK_IMAGE_LAYOUT_GENERAL;
1966 new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1967 src_qf = VK_QUEUE_FAMILY_IGNORED;
1968 dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1969 s_timeline_sem_info.pWaitSemaphoreValues =
frame->sem_value;
1970 s_timeline_sem_info.waitSemaphoreValueCount =
planes;
1971 s_info.pWaitSemaphores =
frame->sem;
1972 s_info.pWaitDstStageMask = wait_st;
1973 s_info.waitSemaphoreCount =
planes;
1984 img_bar[
i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1985 img_bar[
i].srcAccessMask = 0x0;
1986 img_bar[
i].dstAccessMask = new_access;
1987 img_bar[
i].oldLayout =
frame->layout[
i];
1988 img_bar[
i].newLayout = new_layout;
1989 img_bar[
i].srcQueueFamilyIndex = src_qf;
1990 img_bar[
i].dstQueueFamilyIndex = dst_qf;
1991 img_bar[
i].image =
frame->img[
i];
1992 img_bar[
i].subresourceRange.levelCount = 1;
1993 img_bar[
i].subresourceRange.layerCount = 1;
1994 img_bar[
i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1996 frame->layout[
i] = img_bar[
i].newLayout;
1997 frame->access[
i] = img_bar[
i].dstAccessMask;
2001 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
2002 VK_PIPELINE_STAGE_TRANSFER_BIT,
2009 int frame_w,
int frame_h,
int plane)
2026 VkImageTiling tiling, VkImageUsageFlagBits
usage,
2039 VkExportSemaphoreCreateInfo ext_sem_info = {
2040 .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
2042 .handleTypes = IsWindows8OrGreater()
2043 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
2044 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
2046 .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2050 VkSemaphoreTypeCreateInfo sem_type_info = {
2051 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
2053 .pNext = p->
extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info :
NULL,
2057 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
2061 VkSemaphoreCreateInfo sem_spawn = {
2062 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2063 .pNext = &sem_type_info,
2074 VkImageCreateInfo create_info = {
2075 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2076 .pNext = create_pnext,
2077 .imageType = VK_IMAGE_TYPE_2D,
2078 .format = img_fmts[
i],
2082 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
2084 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2086 .samples = VK_SAMPLE_COUNT_1_BIT,
2087 .pQueueFamilyIndices = p->
qfs,
2088 .queueFamilyIndexCount = p->
num_qfs,
2089 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2090 VK_SHARING_MODE_EXCLUSIVE,
2093 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2096 ret = vk->CreateImage(hwctx->
act_dev, &create_info,
2098 if (
ret != VK_SUCCESS) {
2106 ret = vk->CreateSemaphore(hwctx->
act_dev, &sem_spawn,
2108 if (
ret != VK_SUCCESS) {
2114 f->layout[
i] = create_info.initialLayout;
2116 f->sem_value[
i] = 0;
2132 VkExternalMemoryHandleTypeFlags *comp_handle_types,
2133 VkExternalMemoryHandleTypeFlagBits *iexp,
2134 VkExternalMemoryHandleTypeFlagBits
exp)
2142 const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info =
2144 VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
2145 int has_mods = hwctx->
tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info;
2148 VkExternalImageFormatProperties eprops = {
2149 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
2151 VkImageFormatProperties2 props = {
2152 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
2155 VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
2156 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
2158 .pQueueFamilyIndices = p->
qfs,
2159 .queueFamilyIndexCount = p->
num_qfs,
2160 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2161 VK_SHARING_MODE_EXCLUSIVE,
2163 VkPhysicalDeviceExternalImageFormatInfo enext = {
2164 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
2166 .pNext = has_mods ? &phy_dev_mod_info :
NULL,
2168 VkPhysicalDeviceImageFormatInfo2 pinfo = {
2169 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
2170 .pNext = !
exp ?
NULL : &enext,
2172 .type = VK_IMAGE_TYPE_2D,
2174 .usage = hwctx->
usage,
2175 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
2178 nb_mods = has_mods ? drm_mod_info->drmFormatModifierCount : 1;
2179 for (
int i = 0;
i < nb_mods;
i++) {
2181 phy_dev_mod_info.drmFormatModifier = drm_mod_info->pDrmFormatModifiers[
i];
2183 ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->
phys_dev,
2186 if (
ret == VK_SUCCESS) {
2188 *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
2203 VkExternalMemoryHandleTypeFlags e = 0x0;
2205 VkExternalMemoryImageCreateInfo eiinfo = {
2206 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2211 if (p->
extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
2213 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
2214 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
2218 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
2222 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2226 eminfo[
i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
2228 eminfo[
i].handleTypes = e;
2232 eiinfo.handleTypes ? &eiinfo :
NULL);
2260 if (
fp->modifier_info) {
2261 if (
fp->modifier_info->pDrmFormatModifiers)
2262 av_freep(&
fp->modifier_info->pDrmFormatModifiers);
2279 const VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
2284 has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
2286 VK_IMAGE_TILING_OPTIMAL;
2298 VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
2301 if (has_modifiers && !modifier_info) {
2303 VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
2305 VkDrmFormatModifierPropertiesEXT *mod_props;
2306 uint64_t *modifiers;
2307 int modifier_count = 0;
2309 VkDrmFormatModifierPropertiesListEXT mod_props_list = {
2310 .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
2312 .drmFormatModifierCount = 0,
2313 .pDrmFormatModifierProperties =
NULL,
2315 VkFormatProperties2 prop = {
2316 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
2317 .pNext = &mod_props_list,
2321 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->
phys_dev, fmt[0], &prop);
2323 if (!mod_props_list.drmFormatModifierCount) {
2324 av_log(hwfc,
AV_LOG_ERROR,
"There are no supported modifiers for the given sw_format\n");
2329 modifier_info =
av_mallocz(
sizeof(*modifier_info));
2333 modifier_info->pNext =
NULL;
2334 modifier_info->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;
2343 fp->modifier_info = modifier_info;
2346 modifiers =
av_mallocz(mod_props_list.drmFormatModifierCount *
2347 sizeof(*modifiers));
2351 modifier_info->pDrmFormatModifiers = modifiers;
2354 mod_props =
av_mallocz(mod_props_list.drmFormatModifierCount *
2355 sizeof(*mod_props));
2359 mod_props_list.pDrmFormatModifierProperties = mod_props;
2362 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->
phys_dev, fmt[0], &prop);
2365 for (
int i = 0;
i < mod_props_list.drmFormatModifierCount;
i++) {
2366 if (!(mod_props[
i].drmFormatModifierTilingFeatures & hwctx->
usage))
2369 modifiers[modifier_count++] = mod_props[
i].drmFormatModifier;
2372 if (!modifier_count) {
2374 " the usage flags!\n");
2379 modifier_info->drmFormatModifierCount = modifier_count;
2465 !(
map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
2470 flush_ranges[
i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
2471 flush_ranges[
i].memory =
map->frame->mem[
i];
2472 flush_ranges[
i].size = VK_WHOLE_SIZE;
2477 if (
ret != VK_SUCCESS) {
2484 vk->UnmapMemory(hwctx->
act_dev,
map->frame->mem[
i]);
2493 int err, mapped_mem_count = 0, mem_planes = 0;
2512 if (!(
f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
2513 !(
f->tiling == VK_IMAGE_TILING_LINEAR)) {
2524 for (
int i = 0;
i < mem_planes;
i++) {
2525 ret = vk->MapMemory(hwctx->act_dev,
f->mem[
i], 0,
2526 VK_WHOLE_SIZE, 0, (
void **)&dst->
data[
i]);
2527 if (
ret != VK_SUCCESS) {
2543 !(
f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
2546 map_mem_ranges[
i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
2547 map_mem_ranges[
i].size = VK_WHOLE_SIZE;
2548 map_mem_ranges[
i].memory =
f->mem[
i];
2551 ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev,
planes,
2553 if (
ret != VK_SUCCESS) {
2562 VkImageSubresource
sub = {
2563 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2565 VkSubresourceLayout
layout;
2566 vk->GetImageSubresourceLayout(hwctx->act_dev,
f->img[
i], &
sub, &
layout);
2581 for (
int i = 0;
i < mapped_mem_count;
i++)
2582 vk->UnmapMemory(hwctx->act_dev,
f->mem[
i]);
2597 VkSemaphoreWaitInfo wait_info = {
2598 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
2600 .pSemaphores =
f->sem,
2601 .pValues =
f->sem_value,
2602 .semaphoreCount =
planes,
2605 vk->WaitSemaphores(hwctx->
act_dev, &wait_info, UINT64_MAX);
2618 static const struct {
2619 uint32_t drm_fourcc;
2621 } vulkan_drm_format_map[] = {
2622 { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
2623 { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
2624 { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
2625 { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
2626 { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
2627 { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
2628 { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
2629 { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
2630 { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
2631 { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
2634 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
2637 if (vulkan_drm_format_map[
i].drm_fourcc == drm_fourcc)
2638 return vulkan_drm_format_map[
i].vk_format;
2639 return VK_FORMAT_UNDEFINED;
2648 int bind_counts = 0;
2659 if (drm_to_vulkan_fmt(
desc->layers[
i].format) == VK_FORMAT_UNDEFINED) {
2661 desc->layers[
i].format);
2672 f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
2674 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2678 VkSemaphoreTypeCreateInfo sem_type_info = {
2679 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
2680 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
2683 VkSemaphoreCreateInfo sem_spawn = {
2684 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2685 .pNext = &sem_type_info,
2690 VkImageDrmFormatModifierExplicitCreateInfoEXT ext_img_mod_spec = {
2691 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
2692 .drmFormatModifier =
desc->objects[0].format_modifier,
2693 .drmFormatModifierPlaneCount =
planes,
2694 .pPlaneLayouts = (
const VkSubresourceLayout *)&ext_img_layouts,
2696 VkExternalMemoryImageCreateInfo ext_img_spec = {
2697 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2698 .pNext = &ext_img_mod_spec,
2699 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2701 VkImageCreateInfo create_info = {
2702 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2703 .pNext = &ext_img_spec,
2704 .imageType = VK_IMAGE_TYPE_2D,
2705 .format = drm_to_vulkan_fmt(
desc->layers[
i].format),
2710 .tiling =
f->tiling,
2711 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2712 .usage = VK_IMAGE_USAGE_SAMPLED_BIT |
2713 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2714 .samples = VK_SAMPLE_COUNT_1_BIT,
2715 .pQueueFamilyIndices = p->
qfs,
2716 .queueFamilyIndexCount = p->
num_qfs,
2717 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2718 VK_SHARING_MODE_EXCLUSIVE,
2722 VkExternalImageFormatProperties ext_props = {
2723 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
2725 VkImageFormatProperties2 props_ret = {
2726 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
2727 .pNext = &ext_props,
2729 VkPhysicalDeviceImageDrmFormatModifierInfoEXT props_drm_mod = {
2730 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
2731 .drmFormatModifier = ext_img_mod_spec.drmFormatModifier,
2732 .pQueueFamilyIndices = create_info.pQueueFamilyIndices,
2733 .queueFamilyIndexCount = create_info.queueFamilyIndexCount,
2734 .sharingMode = create_info.sharingMode,
2736 VkPhysicalDeviceExternalImageFormatInfo props_ext = {
2737 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
2738 .pNext = &props_drm_mod,
2739 .handleType = ext_img_spec.handleTypes,
2741 VkPhysicalDeviceImageFormatInfo2 fmt_props = {
2742 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
2743 .pNext = &props_ext,
2744 .format = create_info.format,
2745 .type = create_info.imageType,
2746 .tiling = create_info.tiling,
2747 .usage = create_info.usage,
2748 .flags = create_info.flags,
2752 ret = vk->GetPhysicalDeviceImageFormatProperties2(hwctx->
phys_dev,
2753 &fmt_props, &props_ret);
2754 if (
ret != VK_SUCCESS) {
2762 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2766 for (
int j = 0; j <
planes; j++) {
2767 ext_img_layouts[j].offset =
desc->layers[
i].planes[j].offset;
2768 ext_img_layouts[j].rowPitch =
desc->layers[
i].planes[j].pitch;
2769 ext_img_layouts[j].size = 0;
2770 ext_img_layouts[j].arrayPitch = 0;
2771 ext_img_layouts[j].depthPitch = 0;
2775 ret = vk->CreateImage(hwctx->
act_dev, &create_info,
2777 if (
ret != VK_SUCCESS) {
2784 ret = vk->CreateSemaphore(hwctx->
act_dev, &sem_spawn,
2786 if (
ret != VK_SUCCESS) {
2797 f->layout[
i] = create_info.initialLayout;
2799 f->sem_value[
i] = 0;
2802 for (
int i = 0;
i <
desc->nb_objects;
i++) {
2804 VkImageMemoryRequirementsInfo2 req_desc = {
2805 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
2808 VkMemoryDedicatedRequirements ded_req = {
2809 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2811 VkMemoryRequirements2 req2 = {
2812 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2817 VkMemoryFdPropertiesKHR fdmp = {
2818 .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
2820 VkImportMemoryFdInfoKHR idesc = {
2821 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
2822 .fd = dup(
desc->objects[
i].fd),
2823 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2825 VkMemoryDedicatedAllocateInfo ded_alloc = {
2826 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2828 .image = req_desc.image,
2832 ret = vk->GetMemoryFdPropertiesKHR(hwctx->
act_dev,
2833 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2835 if (
ret != VK_SUCCESS) {
2843 vk->GetImageMemoryRequirements2(hwctx->
act_dev, &req_desc, &req2);
2846 req2.memoryRequirements.memoryTypeBits = fdmp.memoryTypeBits;
2849 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
2850 (ded_req.prefersDedicatedAllocation ||
2851 ded_req.requiresDedicatedAllocation) ?
2852 &ded_alloc : ded_alloc.pNext,
2853 &
f->flags, &
f->mem[
i]);
2859 f->size[
i] = req2.memoryRequirements.size;
2862 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2864 for (
int j = 0; j <
planes; j++) {
2865 VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2866 j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
2867 VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
2869 plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
2871 plane_info[bind_counts].planeAspect = aspect;
2873 bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
2875 bind_info[bind_counts].image =
f->img[
i];
2876 bind_info[bind_counts].memory =
f->mem[
desc->layers[
i].planes[j].object_index];
2879 bind_info[bind_counts].memoryOffset = 0;
2886 ret = vk->BindImageMemory2(hwctx->
act_dev, bind_counts, bind_info);
2887 if (
ret != VK_SUCCESS) {
2903 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2907 for (
int i = 0;
i <
desc->nb_objects;
i++)
2921 if ((err = vulkan_map_from_drm_frame_desc(hwfc, &
f,
src)))
2925 dst->
data[0] = (uint8_t *)
f;
2930 &vulkan_unmap_from_drm,
f);
2953 VASurfaceID surface_id = (VASurfaceID)(uintptr_t)
src->data[3];
2959 vaSyncSurface(vaapi_ctx->display, surface_id);
2967 err = vulkan_map_from_drm(dst_fc, dst,
tmp,
flags);
3000 CudaFunctions *cu = cu_internal->
cuda_dl;
3001 CUarray_format cufmt =
desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
3002 CU_AD_FORMAT_UNSIGNED_INT8;
3007 if (!dst_int || !dst_int->cuda_fc_ref) {
3015 if (!dst_int->cuda_fc_ref) {
3021 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
3026 .NumChannels = 1 + ((
planes == 2) &&
i),
3034 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
3035 .type = IsWindows8OrGreater()
3036 ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
3037 : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT,
3038 .size = dst_f->
size[
i],
3040 VkMemoryGetWin32HandleInfoKHR export_info = {
3041 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
3042 .memory = dst_f->
mem[
i],
3043 .handleType = IsWindows8OrGreater()
3044 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
3045 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
3047 VkSemaphoreGetWin32HandleInfoKHR sem_export = {
3048 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
3049 .semaphore = dst_f->
sem[
i],
3050 .handleType = IsWindows8OrGreater()
3051 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
3052 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
3054 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
3058 ret = vk->GetMemoryWin32HandleKHR(hwctx->
act_dev, &export_info,
3059 &ext_desc.handle.win32.handle);
3060 if (
ret != VK_SUCCESS) {
3066 dst_int->ext_mem_handle[
i] = ext_desc.handle.win32.handle;
3068 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
3069 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
3070 .size = dst_f->
size[
i],
3072 VkMemoryGetFdInfoKHR export_info = {
3073 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
3074 .memory = dst_f->
mem[
i],
3075 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
3077 VkSemaphoreGetFdInfoKHR sem_export = {
3078 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
3079 .semaphore = dst_f->
sem[
i],
3080 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
3082 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
3086 ret = vk->GetMemoryFdKHR(hwctx->
act_dev, &export_info,
3087 &ext_desc.handle.fd);
3088 if (
ret != VK_SUCCESS) {
3096 ret =
CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[
i], &ext_desc));
3099 close(ext_desc.handle.fd);
3106 tex_desc.arrayDesc.Width = p_w;
3107 tex_desc.arrayDesc.Height = p_h;
3109 ret =
CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[
i],
3110 dst_int->ext_mem[
i],
3117 ret =
CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[
i],
3118 dst_int->cu_mma[
i], 0));
3125 ret = vk->GetSemaphoreWin32HandleKHR(hwctx->
act_dev, &sem_export,
3126 &ext_sem_desc.handle.win32.handle);
3128 ret = vk->GetSemaphoreFdKHR(hwctx->
act_dev, &sem_export,
3129 &ext_sem_desc.handle.fd);
3131 if (
ret != VK_SUCCESS) {
3138 dst_int->ext_sem_handle[
i] = ext_sem_desc.handle.win32.handle;
3141 ret =
CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[
i],
3145 close(ext_sem_desc.handle.fd);
3175 CudaFunctions *cu = cu_internal->
cuda_dl;
3185 err =
CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
3189 err = vulkan_export_to_cuda(hwfc,
src->hw_frames_ctx, dst);
3198 s_w_par[
i].params.fence.value = dst_f->
sem_value[
i] + 0;
3199 s_s_par[
i].params.fence.value = dst_f->
sem_value[
i] + 1;
3202 err =
CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
3203 planes, cuda_dev->stream));
3208 CUDA_MEMCPY2D cpy = {
3209 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
3210 .srcDevice = (CUdeviceptr)
src->data[
i],
3211 .srcPitch =
src->linesize[
i],
3214 .dstMemoryType = CU_MEMORYTYPE_ARRAY,
3215 .dstArray = dst_int->cu_array[
i],
3221 cpy.WidthInBytes = p_w *
desc->comp[
i].step;
3224 err =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
3229 err =
CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
3230 planes, cuda_dev->stream));
3257 switch (
src->format) {
3262 return vulkan_map_from_vaapi(hwfc, dst,
src,
flags);
3268 return vulkan_map_from_drm(hwfc, dst,
src,
flags);
3278 typedef struct VulkanDRMMapping {
3293 static inline uint32_t vulkan_fmt_to_drm(
VkFormat vkfmt)
3296 if (vulkan_drm_format_map[
i].vk_format == vkfmt)
3297 return vulkan_drm_format_map[
i].drm_fourcc;
3298 return DRM_FORMAT_INVALID;
3313 VkImageDrmFormatModifierPropertiesEXT drm_mod = {
3314 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
3316 VkSemaphoreWaitInfo wait_info = {
3317 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
3319 .semaphoreCount =
planes,
3331 wait_info.pSemaphores =
f->sem;
3332 wait_info.pValues =
f->sem_value;
3334 vk->WaitSemaphores(hwctx->
act_dev, &wait_info, UINT64_MAX);
3340 ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->
act_dev,
f->img[0],
3342 if (
ret != VK_SUCCESS) {
3348 for (
int i = 0; (
i <
planes) && (
f->mem[
i]);
i++) {
3349 VkMemoryGetFdInfoKHR export_info = {
3350 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
3351 .memory =
f->mem[
i],
3352 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
3355 ret = vk->GetMemoryFdKHR(hwctx->
act_dev, &export_info,
3357 if (
ret != VK_SUCCESS) {
3370 VkSubresourceLayout
layout;
3371 VkImageSubresource
sub = {
3372 .aspectMask = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT,
3376 drm_desc->
layers[
i].
format = vulkan_fmt_to_drm(plane_vkfmt);
3387 if (
f->tiling == VK_IMAGE_TILING_OPTIMAL)
3400 dst->
data[0] = (uint8_t *)drm_desc;
3448 return vulkan_map_to_drm(hwfc, dst,
src,
flags);
3454 return vulkan_map_to_vaapi(hwfc, dst,
src,
flags);
3497 VkBufferUsageFlags
usage, VkMemoryPropertyFlagBits
flags,
3498 size_t size, uint32_t req_memory_bits,
int host_mapped,
3499 void *create_pnext,
void *alloc_pnext)
3508 VkBufferCreateInfo buf_spawn = {
3509 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
3510 .pNext = create_pnext,
3513 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
3516 VkBufferMemoryRequirementsInfo2 req_desc = {
3517 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
3519 VkMemoryDedicatedAllocateInfo ded_alloc = {
3520 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
3521 .pNext = alloc_pnext,
3523 VkMemoryDedicatedRequirements ded_req = {
3524 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
3526 VkMemoryRequirements2 req = {
3527 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
3538 if (
ret != VK_SUCCESS) {
3545 req_desc.buffer = vkbuf->
buf;
3547 vk->GetBufferMemoryRequirements2(hwctx->
act_dev, &req_desc, &req);
3550 use_ded_mem = ded_req.prefersDedicatedAllocation |
3551 ded_req.requiresDedicatedAllocation;
3553 ded_alloc.buffer = vkbuf->
buf;
3556 if (req_memory_bits)
3557 req.memoryRequirements.memoryTypeBits &= req_memory_bits;
3560 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
3566 if (
ret != VK_SUCCESS) {
3588 int nb_buffers,
int invalidate)
3595 int invalidate_count = 0;
3597 for (
int i = 0;
i < nb_buffers;
i++) {
3603 VK_WHOLE_SIZE, 0, (
void **)&mem[
i]);
3604 if (
ret != VK_SUCCESS) {
3614 for (
int i = 0;
i < nb_buffers;
i++) {
3616 const VkMappedMemoryRange ival_buf = {
3617 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
3618 .memory = vkbuf->
mem,
3619 .size = VK_WHOLE_SIZE,
3628 if (vkbuf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3631 invalidate_ctx[invalidate_count++] = ival_buf;
3634 if (invalidate_count) {
3635 ret = vk->InvalidateMappedMemoryRanges(hwctx->
act_dev, invalidate_count,
3637 if (
ret != VK_SUCCESS)
3646 int nb_buffers,
int flush)
3654 int flush_count = 0;
3657 for (
int i = 0;
i < nb_buffers;
i++) {
3659 const VkMappedMemoryRange flush_buf = {
3660 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
3661 .memory = vkbuf->
mem,
3662 .size = VK_WHOLE_SIZE,
3665 if (vkbuf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3668 flush_ctx[flush_count++] = flush_buf;
3673 ret = vk->FlushMappedMemoryRanges(hwctx->
act_dev, flush_count, flush_ctx);
3674 if (
ret != VK_SUCCESS) {
3681 for (
int i = 0;
i < nb_buffers;
i++) {
3694 const int *buf_stride,
int w,
3715 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
3716 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
3717 .pWaitSemaphoreValues =
frame->sem_value,
3718 .pSignalSemaphoreValues = sem_signal_values,
3719 .waitSemaphoreValueCount =
planes,
3720 .signalSemaphoreValueCount =
planes,
3723 VkSubmitInfo s_info = {
3724 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
3725 .pNext = &s_timeline_sem_info,
3726 .pSignalSemaphores =
frame->sem,
3727 .pWaitSemaphores =
frame->sem,
3728 .pWaitDstStageMask = sem_wait_dst,
3729 .signalSemaphoreCount =
planes,
3730 .waitSemaphoreCount =
planes,
3734 sem_signal_values[
i] =
frame->sem_value[
i] + 1;
3741 VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
3742 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
3743 VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
3744 VK_ACCESS_TRANSFER_WRITE_BIT;
3746 sem_wait_dst[
i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
3749 if ((
frame->layout[
i] == new_layout) && (
frame->access[
i] & new_access))
3752 img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
3753 img_bar[bar_num].srcAccessMask = 0x0;
3754 img_bar[bar_num].dstAccessMask = new_access;
3755 img_bar[bar_num].oldLayout =
frame->layout[
i];
3756 img_bar[bar_num].newLayout = new_layout;
3757 img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
3758 img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
3759 img_bar[bar_num].image =
frame->img[
i];
3760 img_bar[bar_num].subresourceRange.levelCount = 1;
3761 img_bar[bar_num].subresourceRange.layerCount = 1;
3762 img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3764 frame->layout[
i] = img_bar[bar_num].newLayout;
3765 frame->access[
i] = img_bar[bar_num].dstAccessMask;
3771 vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
3772 VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
3773 0,
NULL, 0,
NULL, bar_num, img_bar);
3778 VkBufferImageCopy buf_reg = {
3779 .bufferOffset = buf_offsets[
i],
3780 .bufferRowLength = buf_stride[
i] /
desc->comp[
i].step,
3781 .imageSubresource.layerCount = 1,
3782 .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
3783 .imageOffset = { 0, 0, 0, },
3789 buf_reg.bufferImageHeight = p_h;
3790 buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
3793 vk->CmdCopyImageToBuffer(cmd_buf,
frame->img[
i],
frame->layout[
i],
3794 vkbuf->
buf, 1, &buf_reg);
3796 vk->CmdCopyBufferToImage(cmd_buf, vkbuf->
buf,
frame->img[
i],
3797 frame->layout[
i], 1, &buf_reg);
3848 if (swf->width > hwfc->
width || swf->height > hwfc->
height)
3852 if (
f->tiling == VK_IMAGE_TILING_LINEAR &&
3853 f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
3857 map->format = swf->format;
3872 VkExternalMemoryBufferCreateInfo create_desc = {
3873 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
3874 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3877 VkImportMemoryHostPointerInfoEXT import_desc = {
3878 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
3879 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3882 VkMemoryHostPointerPropertiesEXT p_props = {
3883 .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
3886 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height,
i);
3891 if (map_host && swf->linesize[
i] > 0) {
3893 offs = (uintptr_t)swf->data[
i] % p->
hprops.minImportedHostPointerAlignment;
3894 import_desc.pHostPointer = swf->data[
i] - offs;
3899 p->
hprops.minImportedHostPointerAlignment);
3901 ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->
act_dev,
3902 import_desc.handleType,
3903 import_desc.pHostPointer,
3906 if (
ret == VK_SUCCESS) {
3908 buf_offsets[
i] = offs;
3912 if (!host_mapped[
i])
3916 from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
3917 VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
3918 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
3919 req_size, p_props.memoryTypeBits, host_mapped[
i],
3920 host_mapped[
i] ? &create_desc :
NULL,
3921 host_mapped[
i] ? &import_desc :
NULL);
3935 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height,
i);
3938 (
const uint8_t *)swf->data[
i], swf->linesize[
i],
3949 swf->width, swf->height, swf->format,
from);
3960 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height,
i);
3963 (
const uint8_t *)
tmp.data[
i],
tmp.linesize[
i],
3984 switch (
src->format) {
3988 if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
3989 (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
3994 return vulkan_transfer_data_from_cuda(hwfc, dst,
src);
3997 if (
src->hw_frames_ctx)
4020 CudaFunctions *cu = cu_internal->
cuda_dl;
4030 err =
CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
4043 s_w_par[
i].params.fence.value = dst_f->
sem_value[
i] + 0;
4044 s_s_par[
i].params.fence.value = dst_f->
sem_value[
i] + 1;
4047 err =
CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
4048 planes, cuda_dev->stream));
4053 CUDA_MEMCPY2D cpy = {
4054 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
4055 .dstDevice = (CUdeviceptr)dst->
data[
i],
4059 .srcMemoryType = CU_MEMORYTYPE_ARRAY,
4060 .srcArray = dst_int->cu_array[
i],
4066 cpy.WidthInBytes =
w *
desc->comp[
i].step;
4069 err =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
4074 err =
CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
4075 planes, cuda_dev->stream));
4106 if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
4107 (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
4112 return vulkan_transfer_data_to_cuda(hwfc, dst,
src);