28 #include <nvEncodeAPI.h>
40 #define CUDAAPI __stdcall
46 #define LOAD_FUNC(l, s) GetProcAddress(l, s)
47 #define DL_CLOSE_FUNC(l) FreeLibrary(l)
49 #define LOAD_FUNC(l, s) dlsym(l, s)
50 #define DL_CLOSE_FUNC(l) dlclose(l)
53 typedef enum cudaError_enum {
70 #if NVENCAPI_MAJOR_VERSION < 5
71 static const GUID dummy_license = { 0x0, 0x0, 0x0, { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 } };
176 mask = queue->
size - 1;
177 read_pos = (queue->
pos - queue->
count) & mask;
180 return &queue->
data[read_pos];
219 mask = queue->
size - 1;
222 queue->
pos = (queue->
pos + 1) & mask;
264 #define CHECK_LOAD_FUNC(t, f, s) \
266 (f) = (t)LOAD_FUNC(dl_fn->cuda_lib, s); \
268 av_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \
284 dl_fn->
cuda_lib = dlopen(
"libcuda.so", RTLD_LAZY);
321 #define check_cuda_errors(f) if (!check_cuda_errors(avctx, f, #f)) goto error
325 int device_count = 0;
328 int smminor = 0, smmajor = 0;
353 for (i = 0; i < device_count; ++i) {
358 smver = (smmajor << 4) | smminor;
360 av_log(avctx,
AV_LOG_VERBOSE,
"[ GPU #%d - < %s > has Compute SM %d.%d, NVENC %s ]\n", i, gpu_name, smmajor, smminor, (smver >= 0x30) ?
"Available" :
"Not Available");
383 NVENCSTATUS nvstatus;
395 if (
sizeof(
void*) == 8) {
401 dl_fn->
nvenc_lib = dlopen(
"libnvidia-encode.so.1", RTLD_LAZY);
411 if (!nvEncodeAPICreateInstance) {
416 dl_fn->
nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
418 nvstatus = nvEncodeAPICreateInstance(&dl_fn->
nvenc_funcs);
420 if (nvstatus != NV_ENC_SUCCESS) {
465 NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encode_session_params = { 0 };
466 NV_ENC_PRESET_CONFIG preset_config = { 0 };
469 GUID encoder_preset = NV_ENC_PRESET_HQ_GUID;
470 NVENCSTATUS nv_status = NV_ENC_SUCCESS;
471 int surfaceCount = 0;
477 #if NVENCAPI_MAJOR_VERSION < 5
483 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->
nvenc_funcs;
498 preset_config.version = NV_ENC_PRESET_CONFIG_VER;
499 preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
500 encode_session_params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
501 encode_session_params.apiVersion = NVENCAPI_VERSION;
503 #if NVENCAPI_MAJOR_VERSION < 5
504 encode_session_params.clientKeyPtr = &license;
517 av_log(avctx,
AV_LOG_FATAL,
"Failed creating CUDA context for NVENC: 0x%x\n", (
int)cu_res);
530 encode_session_params.device = ctx->
cu_context;
531 encode_session_params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
533 nv_status = p_nvenc->nvEncOpenEncodeSessionEx(&encode_session_params, &ctx->
nvencoder);
534 if (nv_status != NV_ENC_SUCCESS) {
536 av_log(avctx,
AV_LOG_FATAL,
"OpenEncodeSessionEx failed: 0x%x - invalid license key?\n", (
int)nv_status);
542 if (!strcmp(ctx->
preset,
"hp")) {
543 encoder_preset = NV_ENC_PRESET_HP_GUID;
544 }
else if (!strcmp(ctx->
preset,
"hq")) {
545 encoder_preset = NV_ENC_PRESET_HQ_GUID;
546 }
else if (!strcmp(ctx->
preset,
"bd")) {
547 encoder_preset = NV_ENC_PRESET_BD_GUID;
548 }
else if (!strcmp(ctx->
preset,
"ll")) {
549 encoder_preset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
551 }
else if (!strcmp(ctx->
preset,
"llhp")) {
552 encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID;
554 }
else if (!strcmp(ctx->
preset,
"llhq")) {
555 encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
557 }
else if (!strcmp(ctx->
preset,
"default")) {
558 encoder_preset = NV_ENC_PRESET_DEFAULT_GUID;
560 av_log(avctx,
AV_LOG_FATAL,
"Preset \"%s\" is unknown! Supported presets: hp, hq, bd, ll, llhp, llhq, default\n", ctx->
preset);
566 nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->
nvencoder, NV_ENC_CODEC_H264_GUID, encoder_preset, &preset_config);
567 if (nv_status != NV_ENC_SUCCESS) {
592 if (avctx->
width == 720 &&
605 num_mbs = ((avctx->
width + 15) >> 4) * ((avctx->
height + 15) >> 4);
617 if (avctx->
refs >= 0) {
633 ctx->
encode_config.encodeCodecConfig.h264Config.idrPeriod = 1;
648 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
649 }
else if (ctx->
twopass == 1 || isLL) {
650 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
652 ctx->
encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
653 ctx->
encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
656 av_log(avctx,
AV_LOG_WARNING,
"Twopass mode is only known to work with low latency (ll, llhq, llhp) presets.\n");
658 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
661 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
668 }
else if (avctx->
qmin >= 0 && avctx->
qmax >= 0) {
669 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
687 ctx->
encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
689 ctx->
encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
694 ctx->
encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
697 ctx->
encode_config.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
701 ctx->
encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
705 ctx->
encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
709 ctx->
encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag = 1;
710 ctx->
encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag = 1;
714 ctx->
encode_config.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = avctx->
color_trc;
722 if (nv_status != NV_ENC_SUCCESS) {
743 NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
744 NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
745 allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
746 allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
748 allocSurf.width = (avctx->
width + 31) & ~31;
749 allocSurf.height = (avctx->
height + 31) & ~31;
751 allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
755 allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL;
759 allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL;
763 allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL;
772 nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->
nvencoder, &allocSurf);
773 if (nv_status = NV_ENC_SUCCESS){
786 allocOut.size = 1024 * 1024;
788 allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
790 nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->
nvencoder, &allocOut);
791 if (nv_status = NV_ENC_SUCCESS) {
804 uint32_t outSize = 0;
806 NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
807 payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
809 payload.spsppsBuffer = tmpHeader;
810 payload.inBufferSize =
sizeof(tmpHeader);
811 payload.outSPSPPSPayloadSize = &outSize;
813 nv_status = p_nvenc->nvEncGetSequenceParams(ctx->
nvencoder, &payload);
814 if (nv_status != NV_ENC_SUCCESS) {
827 memcpy(avctx->
extradata, tmpHeader, outSize);
840 for (i = 0; i < surfaceCount; ++i) {
847 p_nvenc->nvEncDestroyEncoder(ctx->
nvencoder);
866 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->
nvenc_funcs;
879 p_nvenc->nvEncDestroyEncoder(ctx->
nvencoder);
896 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->
nvenc_funcs;
898 uint32_t *slice_offsets =
av_mallocz(ctx->
encode_config.encodeCodecConfig.h264Config.sliceModeData *
sizeof(*slice_offsets));
899 NV_ENC_LOCK_BITSTREAM lock_params = { 0 };
900 NVENCSTATUS nv_status;
906 lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
908 lock_params.doNotWait = 0;
910 lock_params.sliceOffsets = slice_offsets;
912 nv_status = p_nvenc->nvEncLockBitstream(ctx->
nvencoder, &lock_params);
913 if (nv_status != NV_ENC_SUCCESS) {
924 memcpy(pkt->
data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);
927 if (nv_status != NV_ENC_SUCCESS)
928 av_log(avctx,
AV_LOG_ERROR,
"Failed unlocking bitstream buffer, expect the gates of mordor to open\n");
930 switch (lock_params.pictureType) {
931 case NV_ENC_PIC_TYPE_IDR:
933 case NV_ENC_PIC_TYPE_I:
936 case NV_ENC_PIC_TYPE_P:
939 case NV_ENC_PIC_TYPE_B:
942 case NV_ENC_PIC_TYPE_BI:
946 av_log(avctx,
AV_LOG_ERROR,
"Unknown picture type encountered, expect the output to be broken.\n");
947 av_log(avctx,
AV_LOG_ERROR,
"Please report this error and include as much information on how to reproduce it as possible.\n");
952 pkt->
pts = lock_params.outputTimeStamp;
982 NVENCSTATUS nv_status;
988 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->
nvenc_funcs;
990 NV_ENC_PIC_PARAMS pic_params = { 0 };
991 pic_params.version = NV_ENC_PIC_PARAMS_VER;
994 NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
1008 lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
1011 nv_status = p_nvenc->nvEncLockInputBuffer(ctx->
nvencoder, &lockBufferParams);
1012 if (nv_status != NV_ENC_SUCCESS) {
1018 uint8_t *
buf = lockBufferParams.bufferDataPtr;
1024 buf += inSurf->
height * lockBufferParams.pitch;
1030 buf += (inSurf->
height * lockBufferParams.pitch) >> 2;
1036 uint8_t *
buf = lockBufferParams.bufferDataPtr;
1042 buf += inSurf->
height * lockBufferParams.pitch;
1048 uint8_t *
buf = lockBufferParams.bufferDataPtr;
1054 buf += inSurf->
height * lockBufferParams.pitch;
1060 buf += inSurf->
height * lockBufferParams.pitch;
1071 if (nv_status != NV_ENC_SUCCESS) {
1089 pic_params.bufferFmt = inSurf->
format;
1090 pic_params.inputWidth = avctx->
width;
1091 pic_params.inputHeight = avctx->
height;
1093 pic_params.completionEvent = 0;
1097 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
1099 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
1102 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
1105 pic_params.encodePicFlags = 0;
1106 pic_params.inputTimeStamp = frame->
pts;
1107 pic_params.inputDuration = 0;
1108 pic_params.codecPicParams.h264PicParams.sliceMode = ctx->
encode_config.encodeCodecConfig.h264Config.sliceMode;
1109 pic_params.codecPicParams.h264PicParams.sliceModeData = ctx->
encode_config.encodeCodecConfig.h264Config.sliceModeData;
1111 #if NVENCAPI_MAJOR_VERSION < 5
1112 memcpy(&pic_params.rcParams, &ctx->
encode_config.rcParams,
sizeof(NV_ENC_RC_PARAMS));
1120 pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
1123 nv_status = p_nvenc->nvEncEncodePicture(ctx->
nvencoder, &pic_params);
1125 if (frame && nv_status == NV_ENC_ERR_NEED_MORE_INPUT) {
1134 if (nv_status != NV_ENC_SUCCESS && nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
1139 if (nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
1166 tmpoutsurf->
busy = 0;
1183 #define OFFSET(x) offsetof(NvencContext, x)
1184 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1186 {
"preset",
"Set the encoding preset (one of hq, hp, bd, ll, llhq, llhp, default)",
OFFSET(
preset),
AV_OPT_TYPE_STRING, { .str =
"hq" }, 0, 0,
VE },
1188 {
"2pass",
"Use 2pass cbr encoding mode (low latency mode only)",
OFFSET(twopass),
AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1,
VE },
1189 {
"gpu",
"Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",
OFFSET(gpu),
AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX,
VE },
1220 .priv_class = &nvenc_class,