/** * @file src/platform/windows/display_vram.cpp * @brief todo */ #include #include #include #include extern "C" { #include #include } #include "display.h" #include "misc.h" #include "src/config.h" #include "src/main.h" #include "src/nvenc/nvenc_config.h" #include "src/nvenc/nvenc_d3d11.h" #include "src/nvenc/nvenc_utils.h" #include "src/video.h" #include #include #define SUNSHINE_SHADERS_DIR SUNSHINE_ASSETS_DIR "/shaders/directx" namespace platf { using namespace std::literals; } static void free_frame(AVFrame *frame) { av_frame_free(&frame); } using frame_t = util::safe_ptr; namespace platf::dxgi { template buf_t make_buffer(device_t::pointer device, const T &t) { static_assert(sizeof(T) % 16 == 0, "Buffer needs to be aligned on a 16-byte alignment"); D3D11_BUFFER_DESC buffer_desc { sizeof(T), D3D11_USAGE_IMMUTABLE, D3D11_BIND_CONSTANT_BUFFER }; D3D11_SUBRESOURCE_DATA init_data { &t }; buf_t::pointer buf_p; auto status = device->CreateBuffer(&buffer_desc, &init_data, &buf_p); if (status) { BOOST_LOG(error) << "Failed to create buffer: [0x"sv << util::hex(status).to_string_view() << ']'; return nullptr; } return buf_t { buf_p }; } blend_t make_blend(device_t::pointer device, bool enable, bool invert) { D3D11_BLEND_DESC bdesc {}; auto &rt = bdesc.RenderTarget[0]; rt.BlendEnable = enable; rt.RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; if (enable) { rt.BlendOp = D3D11_BLEND_OP_ADD; rt.BlendOpAlpha = D3D11_BLEND_OP_ADD; if (invert) { // Invert colors rt.SrcBlend = D3D11_BLEND_INV_DEST_COLOR; rt.DestBlend = D3D11_BLEND_INV_SRC_COLOR; } else { // Regular alpha blending rt.SrcBlend = D3D11_BLEND_SRC_ALPHA; rt.DestBlend = D3D11_BLEND_INV_SRC_ALPHA; } rt.SrcBlendAlpha = D3D11_BLEND_ZERO; rt.DestBlendAlpha = D3D11_BLEND_ZERO; } blend_t blend; auto status = device->CreateBlendState(&bdesc, &blend); if (status) { BOOST_LOG(error) << "Failed to create blend state: [0x"sv << util::hex(status).to_string_view() << ']'; return nullptr; } return blend; } blob_t convert_UV_vs_hlsl; blob_t convert_UV_ps_hlsl; blob_t convert_UV_linear_ps_hlsl; blob_t convert_UV_PQ_ps_hlsl; blob_t scene_vs_hlsl; blob_t cursor_vs_hlsl; blob_t convert_Y_ps_hlsl; blob_t convert_Y_linear_ps_hlsl; blob_t convert_Y_PQ_ps_hlsl; blob_t scene_ps_hlsl; blob_t scene_NW_ps_hlsl; struct img_d3d_t: public platf::img_t { // These objects are owned by the display_t's ID3D11Device texture2d_t capture_texture; render_target_t capture_rt; keyed_mutex_t capture_mutex; // This is the shared handle used by hwdevice_t to open capture_texture HANDLE encoder_texture_handle = {}; // Set to true if the image corresponds to a dummy texture used prior to // the first successful capture of a desktop frame bool dummy = false; // Unique identifier for this image uint32_t id = 0; // DXGI format of this image texture DXGI_FORMAT format; virtual ~img_d3d_t() override { if (encoder_texture_handle) { CloseHandle(encoder_texture_handle); } }; }; struct texture_lock_helper { keyed_mutex_t _mutex; bool _locked = false; texture_lock_helper(const texture_lock_helper &) = delete; texture_lock_helper & operator=(const texture_lock_helper &) = delete; texture_lock_helper(texture_lock_helper &&other) { _mutex.reset(other._mutex.release()); _locked = other._locked; other._locked = false; } texture_lock_helper & operator=(texture_lock_helper &&other) { if (_locked) _mutex->ReleaseSync(0); _mutex.reset(other._mutex.release()); _locked = other._locked; other._locked = false; return *this; } texture_lock_helper(IDXGIKeyedMutex *mutex): _mutex(mutex) { if (_mutex) _mutex->AddRef(); } ~texture_lock_helper() { if (_locked) _mutex->ReleaseSync(0); } bool lock() { if (_locked) return true; HRESULT status = _mutex->AcquireSync(0, INFINITE); if (status == S_OK) { _locked = true; } else { BOOST_LOG(error) << "Failed to acquire texture mutex [0x"sv << util::hex(status).to_string_view() << ']'; } return _locked; } }; util::buffer_t make_cursor_xor_image(const util::buffer_t &img_data, DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info) { constexpr std::uint32_t inverted = 0xFFFFFFFF; constexpr std::uint32_t transparent = 0; switch (shape_info.Type) { case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR: // This type doesn't require any XOR-blending return {}; case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR: { util::buffer_t cursor_img = img_data; std::for_each((std::uint32_t *) std::begin(cursor_img), (std::uint32_t *) std::end(cursor_img), [](auto &pixel) { auto alpha = (std::uint8_t)((pixel >> 24) & 0xFF); if (alpha == 0xFF) { // Pixels with 0xFF alpha will be XOR-blended as is. } else if (alpha == 0x00) { // Pixels with 0x00 alpha will be blended by make_cursor_alpha_image(). // We make them transparent for the XOR-blended cursor image. pixel = transparent; } else { // Other alpha values are illegal in masked color cursors BOOST_LOG(warning) << "Illegal alpha value in masked color cursor: " << alpha; } }); return cursor_img; } case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MONOCHROME: // Monochrome is handled below break; default: BOOST_LOG(error) << "Invalid cursor shape type: " << shape_info.Type; return {}; } shape_info.Height /= 2; util::buffer_t cursor_img { shape_info.Width * shape_info.Height * 4 }; auto bytes = shape_info.Pitch * shape_info.Height; auto pixel_begin = (std::uint32_t *) std::begin(cursor_img); auto pixel_data = pixel_begin; auto and_mask = std::begin(img_data); auto xor_mask = std::begin(img_data) + bytes; for (auto x = 0; x < bytes; ++x) { for (auto c = 7; c >= 0; --c) { auto bit = 1 << c; auto color_type = ((*and_mask & bit) ? 1 : 0) + ((*xor_mask & bit) ? 2 : 0); switch (color_type) { case 0: // Opaque black (handled by alpha-blending) case 2: // Opaque white (handled by alpha-blending) case 1: // Color of screen (transparent) *pixel_data = transparent; break; case 3: // Inverse of screen *pixel_data = inverted; break; } ++pixel_data; } ++and_mask; ++xor_mask; } return cursor_img; } util::buffer_t make_cursor_alpha_image(const util::buffer_t &img_data, DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info) { constexpr std::uint32_t black = 0xFF000000; constexpr std::uint32_t white = 0xFFFFFFFF; constexpr std::uint32_t transparent = 0; switch (shape_info.Type) { case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR: { util::buffer_t cursor_img = img_data; std::for_each((std::uint32_t *) std::begin(cursor_img), (std::uint32_t *) std::end(cursor_img), [](auto &pixel) { auto alpha = (std::uint8_t)((pixel >> 24) & 0xFF); if (alpha == 0xFF) { // Pixels with 0xFF alpha will be XOR-blended by make_cursor_xor_image(). // We make them transparent for the alpha-blended cursor image. pixel = transparent; } else if (alpha == 0x00) { // Pixels with 0x00 alpha will be blended as opaque with the alpha-blended image. pixel |= 0xFF000000; } else { // Other alpha values are illegal in masked color cursors BOOST_LOG(warning) << "Illegal alpha value in masked color cursor: " << alpha; } }); return cursor_img; } case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR: // Color cursors are just an ARGB bitmap which requires no processing. return img_data; case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MONOCHROME: // Monochrome cursors are handled below. break; default: BOOST_LOG(error) << "Invalid cursor shape type: " << shape_info.Type; return {}; } shape_info.Height /= 2; util::buffer_t cursor_img { shape_info.Width * shape_info.Height * 4 }; auto bytes = shape_info.Pitch * shape_info.Height; auto pixel_begin = (std::uint32_t *) std::begin(cursor_img); auto pixel_data = pixel_begin; auto and_mask = std::begin(img_data); auto xor_mask = std::begin(img_data) + bytes; for (auto x = 0; x < bytes; ++x) { for (auto c = 7; c >= 0; --c) { auto bit = 1 << c; auto color_type = ((*and_mask & bit) ? 1 : 0) + ((*xor_mask & bit) ? 2 : 0); switch (color_type) { case 0: // Opaque black *pixel_data = black; break; case 2: // Opaque white *pixel_data = white; break; case 3: // Inverse of screen (handled by XOR blending) case 1: // Color of screen (transparent) *pixel_data = transparent; break; } ++pixel_data; } ++and_mask; ++xor_mask; } return cursor_img; } blob_t compile_shader(LPCSTR file, LPCSTR entrypoint, LPCSTR shader_model) { blob_t::pointer msg_p = nullptr; blob_t::pointer compiled_p; DWORD flags = D3DCOMPILE_ENABLE_STRICTNESS; #ifndef NDEBUG flags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; #endif std::wstring_convert, wchar_t> converter; auto wFile = converter.from_bytes(file); auto status = D3DCompileFromFile(wFile.c_str(), nullptr, nullptr, entrypoint, shader_model, flags, 0, &compiled_p, &msg_p); if (msg_p) { BOOST_LOG(warning) << std::string_view { (const char *) msg_p->GetBufferPointer(), msg_p->GetBufferSize() - 1 }; msg_p->Release(); } if (status) { BOOST_LOG(error) << "Couldn't compile ["sv << file << "] [0x"sv << util::hex(status).to_string_view() << ']'; return nullptr; } return blob_t { compiled_p }; } blob_t compile_pixel_shader(LPCSTR file) { return compile_shader(file, "main_ps", "ps_5_0"); } blob_t compile_vertex_shader(LPCSTR file) { return compile_shader(file, "main_vs", "vs_5_0"); } class d3d_base_encode_device final { public: int convert(platf::img_t &img_base) { // Garbage collect mapped capture images whose weak references have expired for (auto it = img_ctx_map.begin(); it != img_ctx_map.end();) { if (it->second.img_weak.expired()) { it = img_ctx_map.erase(it); } else { it++; } } auto &img = (img_d3d_t &) img_base; auto &img_ctx = img_ctx_map[img.id]; // Open the shared capture texture with our ID3D11Device if (initialize_image_context(img, img_ctx)) { return -1; } // Acquire encoder mutex to synchronize with capture code auto status = img_ctx.encoder_mutex->AcquireSync(0, INFINITE); if (status != S_OK) { BOOST_LOG(error) << "Failed to acquire encoder mutex [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } device_ctx->OMSetRenderTargets(1, &nv12_Y_rt, nullptr); device_ctx->VSSetShader(scene_vs.get(), nullptr, 0); device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_Y_fp16_ps.get() : convert_Y_ps.get(), nullptr, 0); device_ctx->RSSetViewports(1, &outY_view); device_ctx->PSSetShaderResources(0, 1, &img_ctx.encoder_input_res); device_ctx->Draw(3, 0); device_ctx->OMSetRenderTargets(1, &nv12_UV_rt, nullptr); device_ctx->VSSetShader(convert_UV_vs.get(), nullptr, 0); device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_UV_fp16_ps.get() : convert_UV_ps.get(), nullptr, 0); device_ctx->RSSetViewports(1, &outUV_view); device_ctx->Draw(3, 0); // Release encoder mutex to allow capture code to reuse this image img_ctx.encoder_mutex->ReleaseSync(0); ID3D11ShaderResourceView *emptyShaderResourceView = nullptr; device_ctx->PSSetShaderResources(0, 1, &emptyShaderResourceView); return 0; } void apply_colorspace(const ::video::sunshine_colorspace_t &colorspace) { auto color_vectors = ::video::color_vectors_from_colorspace(colorspace); if (!color_vectors) { BOOST_LOG(error) << "No vector data for colorspace"sv; return; } auto color_matrix = make_buffer(device.get(), *color_vectors); if (!color_matrix) { BOOST_LOG(warning) << "Failed to create color matrix"sv; return; } device_ctx->PSSetConstantBuffers(0, 1, &color_matrix); this->color_matrix = std::move(color_matrix); } int init_output(ID3D11Texture2D *frame_texture, int width, int height) { // The underlying frame pool owns the texture, so we must reference it for ourselves frame_texture->AddRef(); output_texture.reset(frame_texture); auto out_width = width; auto out_height = height; float in_width = display->width; float in_height = display->height; // Ensure aspect ratio is maintained auto scalar = std::fminf(out_width / in_width, out_height / in_height); auto out_width_f = in_width * scalar; auto out_height_f = in_height * scalar; // result is always positive auto offsetX = (out_width - out_width_f) / 2; auto offsetY = (out_height - out_height_f) / 2; outY_view = D3D11_VIEWPORT { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f }; outUV_view = D3D11_VIEWPORT { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f }; float info_in[16 / sizeof(float)] { 1.0f / (float) out_width_f }; // aligned to 16-byte info_scene = make_buffer(device.get(), info_in); if (!info_scene) { BOOST_LOG(error) << "Failed to create info scene buffer"sv; return -1; } device_ctx->VSSetConstantBuffers(0, 1, &info_scene); { int32_t rotation_modifier = display->display_rotation == DXGI_MODE_ROTATION_UNSPECIFIED ? 0 : display->display_rotation - 1; int32_t rotation_data[16 / sizeof(int32_t)] { -rotation_modifier }; // aligned to 16-byte auto rotation = make_buffer(device.get(), rotation_data); if (!rotation) { BOOST_LOG(error) << "Failed to create display rotation vertex constant buffer"; return -1; } device_ctx->VSSetConstantBuffers(1, 1, &rotation); } D3D11_RENDER_TARGET_VIEW_DESC nv12_rt_desc { format == DXGI_FORMAT_P010 ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM, D3D11_RTV_DIMENSION_TEXTURE2D }; auto status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_Y_rt); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM; status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_UV_rt); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } // Clear the RTVs to ensure the aspect ratio padding is black const float y_black[] = { 0.0f, 0.0f, 0.0f, 0.0f }; device_ctx->ClearRenderTargetView(nv12_Y_rt.get(), y_black); const float uv_black[] = { 0.5f, 0.5f, 0.5f, 0.5f }; device_ctx->ClearRenderTargetView(nv12_UV_rt.get(), uv_black); return 0; } int init(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) { D3D_FEATURE_LEVEL featureLevels[] { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0, D3D_FEATURE_LEVEL_9_3, D3D_FEATURE_LEVEL_9_2, D3D_FEATURE_LEVEL_9_1 }; HRESULT status = D3D11CreateDevice( adapter_p, D3D_DRIVER_TYPE_UNKNOWN, nullptr, D3D11_CREATE_DEVICE_FLAGS, featureLevels, sizeof(featureLevels) / sizeof(D3D_FEATURE_LEVEL), D3D11_SDK_VERSION, &device, nullptr, &device_ctx); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create encoder D3D11 device [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } dxgi::dxgi_t dxgi; status = device->QueryInterface(IID_IDXGIDevice, (void **) &dxgi); if (FAILED(status)) { BOOST_LOG(warning) << "Failed to query DXGI interface from device [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } status = dxgi->SetGPUThreadPriority(7); if (FAILED(status)) { BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance."; } format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010); status = device->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs); if (status) { BOOST_LOG(error) << "Failed to create scene vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } status = device->CreateVertexShader(convert_UV_vs_hlsl->GetBufferPointer(), convert_UV_vs_hlsl->GetBufferSize(), nullptr, &convert_UV_vs); if (status) { BOOST_LOG(error) << "Failed to create convertUV vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } // If the display is in HDR and we're streaming HDR, we'll be converting scRGB to SMPTE 2084 PQ. if (format == DXGI_FORMAT_P010 && display->is_hdr()) { status = device->CreatePixelShader(convert_Y_PQ_ps_hlsl->GetBufferPointer(), convert_Y_PQ_ps_hlsl->GetBufferSize(), nullptr, &convert_Y_fp16_ps); if (status) { BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } status = device->CreatePixelShader(convert_UV_PQ_ps_hlsl->GetBufferPointer(), convert_UV_PQ_ps_hlsl->GetBufferSize(), nullptr, &convert_UV_fp16_ps); if (status) { BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } } else { // If the display is in Advanced Color mode, the desktop format will be scRGB FP16. // scRGB uses linear gamma, so we must use our linear to sRGB conversion shaders. status = device->CreatePixelShader(convert_Y_linear_ps_hlsl->GetBufferPointer(), convert_Y_linear_ps_hlsl->GetBufferSize(), nullptr, &convert_Y_fp16_ps); if (status) { BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } status = device->CreatePixelShader(convert_UV_linear_ps_hlsl->GetBufferPointer(), convert_UV_linear_ps_hlsl->GetBufferSize(), nullptr, &convert_UV_fp16_ps); if (status) { BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } } // These shaders consume standard 8-bit sRGB input status = device->CreatePixelShader(convert_Y_ps_hlsl->GetBufferPointer(), convert_Y_ps_hlsl->GetBufferSize(), nullptr, &convert_Y_ps); if (status) { BOOST_LOG(error) << "Failed to create convertY pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } status = device->CreatePixelShader(convert_UV_ps_hlsl->GetBufferPointer(), convert_UV_ps_hlsl->GetBufferSize(), nullptr, &convert_UV_ps); if (status) { BOOST_LOG(error) << "Failed to create convertUV pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false); if (!default_color_vectors) { BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv; return -1; } color_matrix = make_buffer(device.get(), *default_color_vectors); if (!color_matrix) { BOOST_LOG(error) << "Failed to create color matrix buffer"sv; return -1; } device_ctx->PSSetConstantBuffers(0, 1, &color_matrix); D3D11_INPUT_ELEMENT_DESC layout_desc { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }; status = device->CreateInputLayout( &layout_desc, 1, convert_UV_vs_hlsl->GetBufferPointer(), convert_UV_vs_hlsl->GetBufferSize(), &input_layout); this->display = std::dynamic_pointer_cast(display); if (!this->display) { return -1; } display = nullptr; blend_disable = make_blend(device.get(), false, false); if (!blend_disable) { return -1; } D3D11_SAMPLER_DESC sampler_desc {}; sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; sampler_desc.ComparisonFunc = D3D11_COMPARISON_NEVER; sampler_desc.MinLOD = 0; sampler_desc.MaxLOD = D3D11_FLOAT32_MAX; status = device->CreateSamplerState(&sampler_desc, &sampler_linear); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create point sampler state [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } device_ctx->IASetInputLayout(input_layout.get()); device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu); device_ctx->PSSetSamplers(0, 1, &sampler_linear); device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); return 0; } struct encoder_img_ctx_t { // Used to determine if the underlying texture changes. // Not safe for actual use by the encoder! texture2d_t::const_pointer capture_texture_p; texture2d_t encoder_texture; shader_res_t encoder_input_res; keyed_mutex_t encoder_mutex; std::weak_ptr img_weak; void reset() { capture_texture_p = nullptr; encoder_texture.reset(); encoder_input_res.reset(); encoder_mutex.reset(); img_weak.reset(); } }; int initialize_image_context(const img_d3d_t &img, encoder_img_ctx_t &img_ctx) { // If we've already opened the shared texture, we're done if (img_ctx.encoder_texture && img.capture_texture.get() == img_ctx.capture_texture_p) { return 0; } // Reset this image context in case it was used before with a different texture. // Textures can change when transitioning from a dummy image to a real image. img_ctx.reset(); device1_t device1; auto status = device->QueryInterface(__uuidof(ID3D11Device1), (void **) &device1); if (FAILED(status)) { BOOST_LOG(error) << "Failed to query ID3D11Device1 [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } // Open a handle to the shared texture status = device1->OpenSharedResource1(img.encoder_texture_handle, __uuidof(ID3D11Texture2D), (void **) &img_ctx.encoder_texture); if (FAILED(status)) { BOOST_LOG(error) << "Failed to open shared image texture [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } // Get the keyed mutex to synchronize with the capture code status = img_ctx.encoder_texture->QueryInterface(__uuidof(IDXGIKeyedMutex), (void **) &img_ctx.encoder_mutex); if (FAILED(status)) { BOOST_LOG(error) << "Failed to query IDXGIKeyedMutex [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } // Create the SRV for the encoder texture status = device->CreateShaderResourceView(img_ctx.encoder_texture.get(), nullptr, &img_ctx.encoder_input_res); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create shader resource view for encoding [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } img_ctx.capture_texture_p = img.capture_texture.get(); img_ctx.img_weak = img.weak_from_this(); return 0; } ::video::color_t *color_p; buf_t info_scene; buf_t color_matrix; input_layout_t input_layout; blend_t blend_disable; sampler_state_t sampler_linear; render_target_t nv12_Y_rt; render_target_t nv12_UV_rt; // d3d_img_t::id -> encoder_img_ctx_t // These store the encoder textures for each img_t that passes through // convert(). We can't store them in the img_t itself because it is shared // amongst multiple hwdevice_t objects (and therefore multiple ID3D11Devices). std::map img_ctx_map; std::shared_ptr display; vs_t convert_UV_vs; ps_t convert_UV_ps; ps_t convert_UV_fp16_ps; ps_t convert_Y_ps; ps_t convert_Y_fp16_ps; vs_t scene_vs; D3D11_VIEWPORT outY_view; D3D11_VIEWPORT outUV_view; DXGI_FORMAT format; device_t device; device_ctx_t device_ctx; texture2d_t output_texture; }; class d3d_avcodec_encode_device_t: public avcodec_encode_device_t { public: int init(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) { int result = base.init(display, adapter_p, pix_fmt); data = base.device.get(); return result; } int convert(platf::img_t &img_base) override { return base.convert(img_base); } void apply_colorspace() override { base.apply_colorspace(colorspace); } void init_hwframes(AVHWFramesContext *frames) override { // We may be called with a QSV or D3D11VA context if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) { auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx; // The encoder requires textures with D3D11_BIND_RENDER_TARGET set d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET; d3d11_frames->MiscFlags = 0; } // We require a single texture frames->initial_pool_size = 1; } int prepare_to_derive_context(int hw_device_type) override { // QuickSync requires our device to be multithread-protected if (hw_device_type == AV_HWDEVICE_TYPE_QSV) { multithread_t mt; auto status = base.device->QueryInterface(IID_ID3D11Multithread, (void **) &mt); if (FAILED(status)) { BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } mt->SetMultithreadProtected(TRUE); } return 0; } int set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override { this->hwframe.reset(frame); this->frame = frame; // Populate this frame with a hardware buffer if one isn't there already if (!frame->buf[0]) { auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0); if (err) { char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 }; BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); return -1; } } // If this is a frame from a derived context, we'll need to map it to D3D11 ID3D11Texture2D *frame_texture; if (frame->format != AV_PIX_FMT_D3D11) { frame_t d3d11_frame { av_frame_alloc() }; d3d11_frame->format = AV_PIX_FMT_D3D11; auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE); if (err) { char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 }; BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); return -1; } // Get the texture from the mapped frame frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0]; } else { // Otherwise, we can just use the texture inside the original frame frame_texture = (ID3D11Texture2D *) frame->data[0]; } return base.init_output(frame_texture, frame->width, frame->height); } private: d3d_base_encode_device base; frame_t hwframe; }; class d3d_nvenc_encode_device_t: public nvenc_encode_device_t { public: bool init_device(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) { buffer_format = nvenc::nvenc_format_from_sunshine_format(pix_fmt); if (buffer_format == NV_ENC_BUFFER_FORMAT_UNDEFINED) { BOOST_LOG(error) << "Unexpected pixel format for NvENC ["sv << from_pix_fmt(pix_fmt) << ']'; return false; } if (base.init(display, adapter_p, pix_fmt)) return false; nvenc_d3d = std::make_unique(base.device.get()); nvenc = nvenc_d3d.get(); return true; } bool init_encoder(const ::video::config_t &client_config, const ::video::sunshine_colorspace_t &colorspace) override { if (!nvenc_d3d) return false; auto nvenc_colorspace = nvenc::nvenc_colorspace_from_sunshine_colorspace(colorspace); if (!nvenc_d3d->create_encoder(config::video.nv, client_config, nvenc_colorspace, buffer_format)) return false; base.apply_colorspace(colorspace); return base.init_output(nvenc_d3d->get_input_texture(), client_config.width, client_config.height) == 0; } int convert(platf::img_t &img_base) override { return base.convert(img_base); } private: d3d_base_encode_device base; std::unique_ptr nvenc_d3d; NV_ENC_BUFFER_FORMAT buffer_format = NV_ENC_BUFFER_FORMAT_UNDEFINED; }; bool set_cursor_texture(device_t::pointer device, gpu_cursor_t &cursor, util::buffer_t &&cursor_img, DXGI_OUTDUPL_POINTER_SHAPE_INFO &shape_info) { // This cursor image may not be used if (cursor_img.size() == 0) { cursor.input_res.reset(); cursor.set_texture(0, 0, nullptr); return true; } D3D11_SUBRESOURCE_DATA data { std::begin(cursor_img), 4 * shape_info.Width, 0 }; // Create texture for cursor D3D11_TEXTURE2D_DESC t {}; t.Width = shape_info.Width; t.Height = cursor_img.size() / data.SysMemPitch; t.MipLevels = 1; t.ArraySize = 1; t.SampleDesc.Count = 1; t.Usage = D3D11_USAGE_IMMUTABLE; t.Format = DXGI_FORMAT_B8G8R8A8_UNORM; t.BindFlags = D3D11_BIND_SHADER_RESOURCE; texture2d_t texture; auto status = device->CreateTexture2D(&t, &data, &texture); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create mouse texture [0x"sv << util::hex(status).to_string_view() << ']'; return false; } // Free resources before allocating on the next line. cursor.input_res.reset(); status = device->CreateShaderResourceView(texture.get(), nullptr, &cursor.input_res); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create cursor shader resource view [0x"sv << util::hex(status).to_string_view() << ']'; return false; } cursor.set_texture(t.Width, t.Height, std::move(texture)); return true; } capture_e display_vram_t::snapshot(const pull_free_image_cb_t &pull_free_image_cb, std::shared_ptr &img_out, std::chrono::milliseconds timeout, bool cursor_visible) { HRESULT status; DXGI_OUTDUPL_FRAME_INFO frame_info; resource_t::pointer res_p {}; auto capture_status = dup.next_frame(frame_info, timeout, &res_p); resource_t res { res_p }; if (capture_status != capture_e::ok) { return capture_status; } const bool mouse_update_flag = frame_info.LastMouseUpdateTime.QuadPart != 0 || frame_info.PointerShapeBufferSize > 0; const bool frame_update_flag = frame_info.LastPresentTime.QuadPart != 0; const bool update_flag = mouse_update_flag || frame_update_flag; if (!update_flag) { return capture_e::timeout; } std::optional frame_timestamp; if (auto qpc_displayed = std::max(frame_info.LastPresentTime.QuadPart, frame_info.LastMouseUpdateTime.QuadPart)) { // Translate QueryPerformanceCounter() value to steady_clock time point frame_timestamp = std::chrono::steady_clock::now() - qpc_time_difference(qpc_counter(), qpc_displayed); } if (frame_info.PointerShapeBufferSize > 0) { DXGI_OUTDUPL_POINTER_SHAPE_INFO shape_info {}; util::buffer_t img_data { frame_info.PointerShapeBufferSize }; UINT dummy; status = dup.dup->GetFramePointerShape(img_data.size(), std::begin(img_data), &dummy, &shape_info); if (FAILED(status)) { BOOST_LOG(error) << "Failed to get new pointer shape [0x"sv << util::hex(status).to_string_view() << ']'; return capture_e::error; } auto alpha_cursor_img = make_cursor_alpha_image(img_data, shape_info); auto xor_cursor_img = make_cursor_xor_image(img_data, shape_info); if (!set_cursor_texture(device.get(), cursor_alpha, std::move(alpha_cursor_img), shape_info) || !set_cursor_texture(device.get(), cursor_xor, std::move(xor_cursor_img), shape_info)) { return capture_e::error; } } if (frame_info.LastMouseUpdateTime.QuadPart) { cursor_alpha.set_pos(frame_info.PointerPosition.Position.x, frame_info.PointerPosition.Position.y, width, height, display_rotation, frame_info.PointerPosition.Visible); cursor_xor.set_pos(frame_info.PointerPosition.Position.x, frame_info.PointerPosition.Position.y, width, height, display_rotation, frame_info.PointerPosition.Visible); } const bool blend_mouse_cursor_flag = (cursor_alpha.visible || cursor_xor.visible) && cursor_visible; texture2d_t src {}; if (frame_update_flag) { // Get the texture object from this frame status = res->QueryInterface(IID_ID3D11Texture2D, (void **) &src); if (FAILED(status)) { BOOST_LOG(error) << "Couldn't query interface [0x"sv << util::hex(status).to_string_view() << ']'; return capture_e::error; } D3D11_TEXTURE2D_DESC desc; src->GetDesc(&desc); // It's possible for our display enumeration to race with mode changes and result in // mismatched image pool and desktop texture sizes. If this happens, just reinit again. if (desc.Width != width_before_rotation || desc.Height != height_before_rotation) { BOOST_LOG(info) << "Capture size changed ["sv << width << 'x' << height << " -> "sv << desc.Width << 'x' << desc.Height << ']'; return capture_e::reinit; } // If we don't know the capture format yet, grab it from this texture if (capture_format == DXGI_FORMAT_UNKNOWN) { capture_format = desc.Format; BOOST_LOG(info) << "Capture format ["sv << dxgi_format_to_string(capture_format) << ']'; } // It's also possible for the capture format to change on the fly. If that happens, // reinitialize capture to try format detection again and create new images. if (capture_format != desc.Format) { BOOST_LOG(info) << "Capture format changed ["sv << dxgi_format_to_string(capture_format) << " -> "sv << dxgi_format_to_string(desc.Format) << ']'; return capture_e::reinit; } } enum class lfa { nothing, replace_surface_with_img, replace_img_with_surface, copy_src_to_img, copy_src_to_surface, }; enum class ofa { forward_last_img, copy_last_surface_and_blend_cursor, dummy_fallback, }; auto last_frame_action = lfa::nothing; auto out_frame_action = ofa::dummy_fallback; if (capture_format == DXGI_FORMAT_UNKNOWN) { // We don't know the final capture format yet, so we will encode a black dummy image last_frame_action = lfa::nothing; out_frame_action = ofa::dummy_fallback; } else { if (src) { // We got a new frame from DesktopDuplication... if (blend_mouse_cursor_flag) { // ...and we need to blend the mouse cursor onto it. // Copy the frame to intermediate surface so we can blend this and future mouse cursor updates // without new frames from DesktopDuplication. We use direct3d surface directly here and not // an image from pull_free_image_cb mainly because it's lighter (surface sharing between // direct3d devices produce significant memory overhead). last_frame_action = lfa::copy_src_to_surface; // Copy the intermediate surface to a new image from pull_free_image_cb and blend the mouse cursor onto it. out_frame_action = ofa::copy_last_surface_and_blend_cursor; } else { // ...and we don't need to blend the mouse cursor. // Copy the frame to a new image from pull_free_image_cb and save the shared pointer to the image // in case the mouse cursor appears without a new frame from DesktopDuplication. last_frame_action = lfa::copy_src_to_img; // Use saved last image shared pointer as output image evading copy. out_frame_action = ofa::forward_last_img; } } else if (!std::holds_alternative(last_frame_variant)) { // We didn't get a new frame from DesktopDuplication... if (blend_mouse_cursor_flag) { // ...but we need to blend the mouse cursor. if (std::holds_alternative>(last_frame_variant)) { // We have the shared pointer of the last image, replace it with intermediate surface // while copying contents so we can blend this and future mouse cursor updates. last_frame_action = lfa::replace_img_with_surface; } // Copy the intermediate surface which contains last DesktopDuplication frame // to a new image from pull_free_image_cb and blend the mouse cursor onto it. out_frame_action = ofa::copy_last_surface_and_blend_cursor; } else { // ...and we don't need to blend the mouse cursor. // This happens when the mouse cursor disappears from screen, // or there's mouse cursor on screen, but its drawing is disabled in sunshine. if (std::holds_alternative(last_frame_variant)) { // We have the intermediate surface that was used as the mouse cursor blending base. // Replace it with an image from pull_free_image_cb copying contents and freeing up the surface memory. // Save the shared pointer to the image in case the mouse cursor reappears. last_frame_action = lfa::replace_surface_with_img; } // Use saved last image shared pointer as output image evading copy. out_frame_action = ofa::forward_last_img; } } } auto create_surface = [&](texture2d_t &surface) -> bool { // Try to reuse the old surface if it hasn't been destroyed yet. if (old_surface_delayed_destruction) { surface.reset(old_surface_delayed_destruction.release()); return true; } // Otherwise create a new surface. D3D11_TEXTURE2D_DESC t {}; t.Width = width_before_rotation; t.Height = height_before_rotation; t.MipLevels = 1; t.ArraySize = 1; t.SampleDesc.Count = 1; t.Usage = D3D11_USAGE_DEFAULT; t.Format = capture_format; t.BindFlags = 0; status = device->CreateTexture2D(&t, nullptr, &surface); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create frame copy texture [0x"sv << util::hex(status).to_string_view() << ']'; return false; } return true; }; auto get_locked_d3d_img = [&](std::shared_ptr &img, bool dummy = false) -> std::tuple, texture_lock_helper> { auto d3d_img = std::static_pointer_cast(img); // Finish creating the image (if it hasn't happened already), // also creates synchronization primitives for shared access from multiple direct3d devices. if (complete_img(d3d_img.get(), dummy)) return { nullptr, nullptr }; // This image is shared between capture direct3d device and encoders direct3d devices, // we must acquire lock before doing anything to it. texture_lock_helper lock_helper(d3d_img->capture_mutex.get()); if (!lock_helper.lock()) { BOOST_LOG(error) << "Failed to lock capture texture"; return { nullptr, nullptr }; } return { std::move(d3d_img), std::move(lock_helper) }; }; switch (last_frame_action) { case lfa::nothing: { break; } case lfa::replace_surface_with_img: { auto p_surface = std::get_if(&last_frame_variant); if (!p_surface) { BOOST_LOG(error) << "Logical error at " << __FILE__ << ":" << __LINE__; return capture_e::error; } std::shared_ptr img; if (!pull_free_image_cb(img)) return capture_e::interrupted; auto [d3d_img, lock] = get_locked_d3d_img(img); if (!d3d_img) return capture_e::error; device_ctx->CopyResource(d3d_img->capture_texture.get(), p_surface->get()); // We delay the destruction of intermediate surface in case the mouse cursor reappears shortly. old_surface_delayed_destruction.reset(p_surface->release()); old_surface_timestamp = std::chrono::steady_clock::now(); last_frame_variant = img; break; } case lfa::replace_img_with_surface: { auto p_img = std::get_if>(&last_frame_variant); if (!p_img) { BOOST_LOG(error) << "Logical error at " << __FILE__ << ":" << __LINE__; return capture_e::error; } auto [d3d_img, lock] = get_locked_d3d_img(*p_img); if (!d3d_img) return capture_e::error; p_img = nullptr; last_frame_variant = texture2d_t {}; auto &surface = std::get(last_frame_variant); if (!create_surface(surface)) return capture_e::error; device_ctx->CopyResource(surface.get(), d3d_img->capture_texture.get()); break; } case lfa::copy_src_to_img: { last_frame_variant = {}; std::shared_ptr img; if (!pull_free_image_cb(img)) return capture_e::interrupted; auto [d3d_img, lock] = get_locked_d3d_img(img); if (!d3d_img) return capture_e::error; device_ctx->CopyResource(d3d_img->capture_texture.get(), src.get()); last_frame_variant = img; break; } case lfa::copy_src_to_surface: { auto p_surface = std::get_if(&last_frame_variant); if (!p_surface) { last_frame_variant = texture2d_t {}; p_surface = std::get_if(&last_frame_variant); if (!create_surface(*p_surface)) return capture_e::error; } device_ctx->CopyResource(p_surface->get(), src.get()); break; } } auto blend_cursor = [&](img_d3d_t &d3d_img) { device_ctx->VSSetShader(cursor_vs.get(), nullptr, 0); device_ctx->PSSetShader(cursor_ps.get(), nullptr, 0); device_ctx->OMSetRenderTargets(1, &d3d_img.capture_rt, nullptr); if (cursor_alpha.texture.get()) { // Perform an alpha blending operation device_ctx->OMSetBlendState(blend_alpha.get(), nullptr, 0xFFFFFFFFu); device_ctx->PSSetShaderResources(0, 1, &cursor_alpha.input_res); device_ctx->RSSetViewports(1, &cursor_alpha.cursor_view); device_ctx->Draw(3, 0); } if (cursor_xor.texture.get()) { // Perform an invert blending without touching alpha values device_ctx->OMSetBlendState(blend_invert.get(), nullptr, 0x00FFFFFFu); device_ctx->PSSetShaderResources(0, 1, &cursor_xor.input_res); device_ctx->RSSetViewports(1, &cursor_xor.cursor_view); device_ctx->Draw(3, 0); } device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu); ID3D11RenderTargetView *emptyRenderTarget = nullptr; device_ctx->OMSetRenderTargets(1, &emptyRenderTarget, nullptr); device_ctx->RSSetViewports(0, nullptr); ID3D11ShaderResourceView *emptyShaderResourceView = nullptr; device_ctx->PSSetShaderResources(0, 1, &emptyShaderResourceView); }; switch (out_frame_action) { case ofa::forward_last_img: { auto p_img = std::get_if>(&last_frame_variant); if (!p_img) { BOOST_LOG(error) << "Logical error at " << __FILE__ << ":" << __LINE__; return capture_e::error; } img_out = *p_img; break; } case ofa::copy_last_surface_and_blend_cursor: { auto p_surface = std::get_if(&last_frame_variant); if (!p_surface) { BOOST_LOG(error) << "Logical error at " << __FILE__ << ":" << __LINE__; return capture_e::error; } if (!blend_mouse_cursor_flag) { BOOST_LOG(error) << "Logical error at " << __FILE__ << ":" << __LINE__; return capture_e::error; } if (!pull_free_image_cb(img_out)) return capture_e::interrupted; auto [d3d_img, lock] = get_locked_d3d_img(img_out); if (!d3d_img) return capture_e::error; device_ctx->CopyResource(d3d_img->capture_texture.get(), p_surface->get()); blend_cursor(*d3d_img); break; } case ofa::dummy_fallback: { if (!pull_free_image_cb(img_out)) return capture_e::interrupted; // Clear the image if it has been used as a dummy. // It can have the mouse cursor blended onto it. auto old_d3d_img = (img_d3d_t *) img_out.get(); bool reclear_dummy = old_d3d_img->dummy && old_d3d_img->capture_texture; auto [d3d_img, lock] = get_locked_d3d_img(img_out, true); if (!d3d_img) return capture_e::error; if (reclear_dummy) { auto dummy_data = std::make_unique(d3d_img->row_pitch * d3d_img->height); std::fill_n(dummy_data.get(), d3d_img->row_pitch * d3d_img->height, 0); device_ctx->UpdateSubresource(d3d_img->capture_texture.get(), 0, nullptr, dummy_data.get(), d3d_img->row_pitch, 0); } if (blend_mouse_cursor_flag) { blend_cursor(*d3d_img); } break; } } // Perform delayed destruction of the unused surface if the time is due. if (old_surface_delayed_destruction && old_surface_timestamp + 10s < std::chrono::steady_clock::now()) { old_surface_delayed_destruction.reset(); } if (img_out) { img_out->frame_timestamp = frame_timestamp; } return capture_e::ok; } int display_vram_t::init(const ::video::config_t &config, const std::string &display_name) { if (display_base_t::init(config, display_name)) { return -1; } D3D11_SAMPLER_DESC sampler_desc {}; sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; sampler_desc.ComparisonFunc = D3D11_COMPARISON_NEVER; sampler_desc.MinLOD = 0; sampler_desc.MaxLOD = D3D11_FLOAT32_MAX; auto status = device->CreateSamplerState(&sampler_desc, &sampler_linear); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create point sampler state [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } status = device->CreateVertexShader(cursor_vs_hlsl->GetBufferPointer(), cursor_vs_hlsl->GetBufferSize(), nullptr, &cursor_vs); if (status) { BOOST_LOG(error) << "Failed to create scene vertex shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } { int32_t rotation_modifier = display_rotation == DXGI_MODE_ROTATION_UNSPECIFIED ? 0 : display_rotation - 1; int32_t rotation_data[16 / sizeof(int32_t)] { rotation_modifier }; // aligned to 16-byte auto rotation = make_buffer(device.get(), rotation_data); if (!rotation) { BOOST_LOG(error) << "Failed to create display rotation vertex constant buffer"; return -1; } device_ctx->VSSetConstantBuffers(2, 1, &rotation); } if (config.dynamicRange && is_hdr()) { // This shader will normalize scRGB white levels to a user-defined white level status = device->CreatePixelShader(scene_NW_ps_hlsl->GetBufferPointer(), scene_NW_ps_hlsl->GetBufferSize(), nullptr, &cursor_ps); if (status) { BOOST_LOG(error) << "Failed to create scene pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } // Use a 300 nit target for the mouse cursor. We should really get // the user's SDR white level in nits, but there is no API that // provides that information to Win32 apps. float sdr_multiplier_data[16 / sizeof(float)] { 300.0f / 80.f }; // aligned to 16-byte auto sdr_multiplier = make_buffer(device.get(), sdr_multiplier_data); if (!sdr_multiplier) { BOOST_LOG(warning) << "Failed to create SDR multiplier"sv; return -1; } device_ctx->PSSetConstantBuffers(1, 1, &sdr_multiplier); } else { status = device->CreatePixelShader(scene_ps_hlsl->GetBufferPointer(), scene_ps_hlsl->GetBufferSize(), nullptr, &cursor_ps); if (status) { BOOST_LOG(error) << "Failed to create scene pixel shader [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } } blend_alpha = make_blend(device.get(), true, false); blend_invert = make_blend(device.get(), true, true); blend_disable = make_blend(device.get(), false, false); if (!blend_disable || !blend_alpha || !blend_invert) { return -1; } device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu); device_ctx->PSSetSamplers(0, 1, &sampler_linear); device_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); return 0; } std::shared_ptr display_vram_t::alloc_img() { auto img = std::make_shared(); // Initialize format-independent fields img->width = width_before_rotation; img->height = height_before_rotation; img->id = next_image_id++; return img; } // This cannot use ID3D11DeviceContext because it can be called concurrently by the encoding thread int display_vram_t::complete_img(platf::img_t *img_base, bool dummy) { auto img = (img_d3d_t *) img_base; // If this already has a capture texture and it's not switching dummy state, nothing to do if (img->capture_texture && img->dummy == dummy) { return 0; } // If this is not a dummy image, we must know the format by now if (!dummy && capture_format == DXGI_FORMAT_UNKNOWN) { BOOST_LOG(error) << "display_vram_t::complete_img() called with unknown capture format!"; return -1; } // Reset the image (in case this was previously a dummy) img->capture_texture.reset(); img->capture_rt.reset(); img->capture_mutex.reset(); img->data = nullptr; if (img->encoder_texture_handle) { CloseHandle(img->encoder_texture_handle); img->encoder_texture_handle = NULL; } // Initialize format-dependent fields img->pixel_pitch = get_pixel_pitch(); img->row_pitch = img->pixel_pitch * img->width; img->dummy = dummy; img->format = (capture_format == DXGI_FORMAT_UNKNOWN) ? DXGI_FORMAT_B8G8R8A8_UNORM : capture_format; D3D11_TEXTURE2D_DESC t {}; t.Width = img->width; t.Height = img->height; t.MipLevels = 1; t.ArraySize = 1; t.SampleDesc.Count = 1; t.Usage = D3D11_USAGE_DEFAULT; t.Format = img->format; t.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; t.MiscFlags = D3D11_RESOURCE_MISC_SHARED_NTHANDLE | D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX; HRESULT status; if (dummy) { auto dummy_data = std::make_unique(img->row_pitch * img->height); std::fill_n(dummy_data.get(), img->row_pitch * img->height, 0); D3D11_SUBRESOURCE_DATA initial_data { dummy_data.get(), (UINT) img->row_pitch, 0 }; status = device->CreateTexture2D(&t, &initial_data, &img->capture_texture); } else { status = device->CreateTexture2D(&t, nullptr, &img->capture_texture); } if (FAILED(status)) { BOOST_LOG(error) << "Failed to create img buf texture [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } status = device->CreateRenderTargetView(img->capture_texture.get(), nullptr, &img->capture_rt); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } // Get the keyed mutex to synchronize with the encoding code status = img->capture_texture->QueryInterface(__uuidof(IDXGIKeyedMutex), (void **) &img->capture_mutex); if (FAILED(status)) { BOOST_LOG(error) << "Failed to query IDXGIKeyedMutex [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } resource1_t resource; status = img->capture_texture->QueryInterface(__uuidof(IDXGIResource1), (void **) &resource); if (FAILED(status)) { BOOST_LOG(error) << "Failed to query IDXGIResource1 [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } // Create a handle for the encoder device to use to open this texture status = resource->CreateSharedHandle(nullptr, DXGI_SHARED_RESOURCE_READ, nullptr, &img->encoder_texture_handle); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create shared texture handle [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } img->data = (std::uint8_t *) img->capture_texture.get(); return 0; } // This cannot use ID3D11DeviceContext because it can be called concurrently by the encoding thread int display_vram_t::dummy_img(platf::img_t *img_base) { return complete_img(img_base, true); } std::vector display_vram_t::get_supported_capture_formats() { return { // scRGB FP16 is the ideal format for Wide Color Gamut and Advanced Color // displays (both SDR and HDR). This format uses linear gamma, so we will // use a linear->PQ shader for HDR and a linear->sRGB shader for SDR. DXGI_FORMAT_R16G16B16A16_FLOAT, // DXGI_FORMAT_R10G10B10A2_UNORM seems like it might give us frames already // converted to SMPTE 2084 PQ, however it seems to actually just clamp the // scRGB FP16 values that DWM is using when the desktop format is scRGB FP16. // // If there is a case where the desktop format is really SMPTE 2084 PQ, it // might make sense to support capturing it without conversion to scRGB, // but we avoid it for now. // We include the 8-bit modes too for when the display is in SDR mode, // while the client stream is HDR-capable. These UNORM formats can // use our normal pixel shaders that expect sRGB input. DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, }; } /** * @brief Checks that a given codec is supported by the display device. * @param name The FFmpeg codec name (or similar for non-FFmpeg codecs). * @param config The codec configuration. * @return true if supported, false otherwise. */ bool display_vram_t::is_codec_supported(std::string_view name, const ::video::config_t &config) { DXGI_ADAPTER_DESC adapter_desc; adapter->GetDesc(&adapter_desc); if (adapter_desc.VendorId == 0x1002) { // AMD // If it's not an AMF encoder, it's not compatible with an AMD GPU if (!boost::algorithm::ends_with(name, "_amf")) { return false; } // Perform AMF version checks if we're using an AMD GPU. This check is placed in display_vram_t // to avoid hitting the display_ram_t path which uses software encoding and doesn't touch AMF. HMODULE amfrt = LoadLibraryW(AMF_DLL_NAME); if (amfrt) { auto unload_amfrt = util::fail_guard([amfrt]() { FreeLibrary(amfrt); }); auto fnAMFQueryVersion = (AMFQueryVersion_Fn) GetProcAddress(amfrt, AMF_QUERY_VERSION_FUNCTION_NAME); if (fnAMFQueryVersion) { amf_uint64 version; auto result = fnAMFQueryVersion(&version); if (result == AMF_OK) { if (config.videoFormat == 2 && version < AMF_MAKE_FULL_VERSION(1, 4, 30, 0)) { // AMF 1.4.30 adds ultra low latency mode for AV1. Don't use AV1 on earlier versions. // This corresponds to driver version 23.5.2 (23.10.01.45) or newer. BOOST_LOG(warning) << "AV1 encoding is disabled on AMF version "sv << AMF_GET_MAJOR_VERSION(version) << '.' << AMF_GET_MINOR_VERSION(version) << '.' << AMF_GET_SUBMINOR_VERSION(version) << '.' << AMF_GET_BUILD_VERSION(version); BOOST_LOG(warning) << "If your AMD GPU supports AV1 encoding, update your graphics drivers!"sv; return false; } else if (config.dynamicRange && version < AMF_MAKE_FULL_VERSION(1, 4, 23, 0)) { // Older versions of the AMD AMF runtime can crash when fed P010 surfaces. // Fail if AMF version is below 1.4.23 where HEVC Main10 encoding was introduced. // AMF 1.4.23 corresponds to driver version 21.12.1 (21.40.11.03) or newer. BOOST_LOG(warning) << "HDR encoding is disabled on AMF version "sv << AMF_GET_MAJOR_VERSION(version) << '.' << AMF_GET_MINOR_VERSION(version) << '.' << AMF_GET_SUBMINOR_VERSION(version) << '.' << AMF_GET_BUILD_VERSION(version); BOOST_LOG(warning) << "If your AMD GPU supports HEVC Main10 encoding, update your graphics drivers!"sv; return false; } } else { BOOST_LOG(warning) << "AMFQueryVersion() failed: "sv << result; } } else { BOOST_LOG(warning) << "AMF DLL missing export: "sv << AMF_QUERY_VERSION_FUNCTION_NAME; } } else { BOOST_LOG(warning) << "Detected AMD GPU but AMF failed to load"sv; } } else if (adapter_desc.VendorId == 0x8086) { // Intel // If it's not a QSV encoder, it's not compatible with an Intel GPU if (!boost::algorithm::ends_with(name, "_qsv")) { return false; } } else if (adapter_desc.VendorId == 0x10de) { // Nvidia // If it's not an NVENC encoder, it's not compatible with an Nvidia GPU if (!boost::algorithm::ends_with(name, "_nvenc")) { return false; } } else { BOOST_LOG(warning) << "Unknown GPU vendor ID: " << util::hex(adapter_desc.VendorId).to_string_view(); } return true; } std::unique_ptr display_vram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt) { if (pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) { BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << from_pix_fmt(pix_fmt) << ']'; return nullptr; } auto device = std::make_unique(); auto ret = device->init(shared_from_this(), adapter.get(), pix_fmt); if (ret) { return nullptr; } return device; } std::unique_ptr display_vram_t::make_nvenc_encode_device(pix_fmt_e pix_fmt) { auto device = std::make_unique(); if (!device->init_device(shared_from_this(), adapter.get(), pix_fmt)) { return nullptr; } return device; } int init() { BOOST_LOG(info) << "Compiling shaders..."sv; scene_vs_hlsl = compile_vertex_shader(SUNSHINE_SHADERS_DIR "/SceneVS.hlsl"); if (!scene_vs_hlsl) { return -1; } cursor_vs_hlsl = compile_vertex_shader(SUNSHINE_SHADERS_DIR "/CursorVS.hlsl"); if (!cursor_vs_hlsl) { return -1; } convert_Y_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertYPS.hlsl"); if (!convert_Y_ps_hlsl) { return -1; } convert_Y_PQ_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertYPS_PQ.hlsl"); if (!convert_Y_PQ_ps_hlsl) { return -1; } convert_Y_linear_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertYPS_Linear.hlsl"); if (!convert_Y_linear_ps_hlsl) { return -1; } convert_UV_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertUVPS.hlsl"); if (!convert_UV_ps_hlsl) { return -1; } convert_UV_PQ_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertUVPS_PQ.hlsl"); if (!convert_UV_PQ_ps_hlsl) { return -1; } convert_UV_linear_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ConvertUVPS_Linear.hlsl"); if (!convert_UV_linear_ps_hlsl) { return -1; } convert_UV_vs_hlsl = compile_vertex_shader(SUNSHINE_SHADERS_DIR "/ConvertUVVS.hlsl"); if (!convert_UV_vs_hlsl) { return -1; } scene_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ScenePS.hlsl"); if (!scene_ps_hlsl) { return -1; } scene_NW_ps_hlsl = compile_pixel_shader(SUNSHINE_SHADERS_DIR "/ScenePS_NW.hlsl"); if (!scene_NW_ps_hlsl) { return -1; } BOOST_LOG(info) << "Compiled shaders"sv; return 0; } } // namespace platf::dxgi