From bb912786bdbc65b08aeb5ae508f2d481ce4b2bf6 Mon Sep 17 00:00:00 2001 From: loki-47-6F-64 Date: Wed, 22 Sep 2021 11:36:59 +0200 Subject: [PATCH] Added NvFBC on Linux --- sunshine/platform/common.h | 7 + sunshine/platform/linux/cuda.cpp | 703 +++++++++++++++++----------- sunshine/platform/linux/cuda.cu | 92 ++-- sunshine/platform/linux/cuda.h | 36 +- sunshine/platform/linux/misc.cpp | 29 +- sunshine/platform/linux/x11grab.cpp | 4 +- sunshine/video.cpp | 2 +- 7 files changed, 546 insertions(+), 327 deletions(-) diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h index 7482f0ea..4b702a9b 100644 --- a/sunshine/platform/common.h +++ b/sunshine/platform/common.h @@ -141,6 +141,13 @@ public: struct img_t { public: + img_t() = default; + + img_t(img_t &&) = delete; + img_t(const img_t &) = delete; + img_t &operator=(img_t &&) = delete; + img_t &operator=(const img_t &) = delete; + std::uint8_t *data {}; std::int32_t width {}; std::int32_t height {}; diff --git a/sunshine/platform/linux/cuda.cpp b/sunshine/platform/linux/cuda.cpp index e57f914d..25d88518 100644 --- a/sunshine/platform/linux/cuda.cpp +++ b/sunshine/platform/linux/cuda.cpp @@ -1,3 +1,5 @@ +#include + #include #include @@ -55,96 +57,9 @@ inline static int check(CUresult result, const std::string_view &sv) { return 0; } -class ctx_t { +class img_t : public platf::img_t { public: - ctx_t(CUcontext ctx) { - CU_CHECK_IGNORE(cdf->cuCtxPushCurrent(ctx), "Couldn't push cuda context"); - } - - ~ctx_t() { - CUcontext dummy; - - CU_CHECK_IGNORE(cdf->cuCtxPopCurrent(&dummy), "Couldn't pop cuda context"); - } -}; - -void free_res(CUgraphicsResource res) { - cdf->cuGraphicsUnregisterResource(res); -} - -using res_internal_t = util::safe_ptr; - -template -class res_t { -public: - res_t() : resources {}, mapped { false } {} - res_t(res_t &&other) noexcept : resources { other.resources }, array_p { other.array_p }, ctx { other.ctx }, stream { other.stream } { - other.resources = std::array {}; - } - - res_t &operator=(res_t &&other) { - for(auto x = 0; x < N; ++x) { - std::swap(resources[x], other.resources[x]); - std::swap(array_p[x], other.array_p[x]); - } - - std::swap(ctx, other.ctx); - std::swap(stream, other.stream); - std::swap(mapped, other.mapped); - - return *this; - } - - res_t(CUcontext ctx, CUstream stream) : resources {}, ctx { ctx }, stream { stream }, mapped { false } {} - - int bind(gl::tex_t &tex) { - ctx_t ctx { this->ctx }; - - CU_CHECK(cdf->cuGraphicsGLRegisterImage(&resources[0], tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register Y image"); - CU_CHECK(cdf->cuGraphicsGLRegisterImage(&resources[1], tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register uv image"); - - return 0; - } - - int map() { - ctx_t ctx { this->ctx }; - - CU_CHECK(cdf->cuGraphicsMapResources(resources.size(), resources.data(), stream), "Coudn't map cuda resources"); - - mapped = true; - - CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&array_p[0], resources[0], 0, 0), "Couldn't get mapped subresource [0]"); - CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&array_p[1], resources[1], 0, 0), "Couldn't get mapped subresource [1]"); - - return 0; - } - - void unmap() { - // Either all or none are mapped - if(mapped) { - ctx_t ctx { this->ctx }; - - CU_CHECK_IGNORE(cdf->cuGraphicsUnmapResources(resources.size(), resources.data(), stream), "Couldn't unmap cuda resources"); - - mapped = false; - } - } - - inline CUarray &operator[](std::size_t index) { - return array_p[index]; - } - - ~res_t() { - unmap(); - } - - std::array resources; - std::array array_p; - - CUcontext ctx; - CUstream stream; - - bool mapped; + tex_t tex; }; int init() { @@ -160,139 +75,8 @@ int init() { return 0; } -class opengl_t : public platf::hwdevice_t { -public: - int init(int in_width, int in_height, platf::x11::xdisplay_t::pointer xdisplay) { - if(!cdf) { - BOOST_LOG(warning) << "cuda not initialized"sv; - return -1; - } - - this->data = (void *)0x1; - - display = egl::make_display(xdisplay); - if(!display) { - return -1; - } - - auto ctx_opt = egl::make_ctx(display.get()); - if(!ctx_opt) { - return -1; - } - - ctx = std::move(*ctx_opt); - - width = in_width; - height = in_height; - - return 0; - } - - int set_frame(AVFrame *frame) override { - auto cuda_ctx = (AVCUDADeviceContext *)((AVHWFramesContext *)frame->hw_frames_ctx->data)->device_ctx->hwctx; - - tex = gl::tex_t::make(2); - fb = gl::frame_buf_t::make(2); - - gl::ctx.BindTexture(GL_TEXTURE_2D, tex[0]); - gl::ctx.TexImage2D(GL_TEXTURE_2D, 0, GL_RED, frame->width, frame->height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr); - gl::ctx.BindTexture(GL_TEXTURE_2D, tex[1]); - gl::ctx.TexImage2D(GL_TEXTURE_2D, 0, GL_RG, frame->width / 2, frame->height / 2, 0, GL_RG, GL_UNSIGNED_BYTE, nullptr); - gl::ctx.BindTexture(GL_TEXTURE_2D, 0); - - fb.bind(std::begin(tex), std::end(tex)); - - res = res_t<2> { cuda_ctx->cuda_ctx, cuda_ctx->stream }; - - if(res.bind(tex)) { - return -1; - } - - this->hwframe.reset(frame); - this->frame = frame; - - if(av_hwframe_get_buffer(frame->hw_frames_ctx, frame, 0)) { - BOOST_LOG(error) << "Couldn't get hwframe for NVENC"sv; - - return -1; - } - - auto sws_opt = egl::sws_t::make(width, height, frame->width, frame->height); - if(!sws_opt) { - return -1; - } - - sws = std::move(*sws_opt); - return sws.blank(fb, 0, 0, frame->width, frame->height); - } - - int convert(platf::img_t &img) override { - sws.load_ram(img); - - if(sws.convert(fb)) { - return -1; - } - - if(res.map()) { - return -1; - } - - // Push and pop cuda context - ctx_t ctx { res.ctx }; - for(auto x = 0; x < 2; ++x) { - CUDA_MEMCPY2D desc {}; - - auto shift = x; - - desc.srcPitch = frame->width; - desc.dstPitch = frame->linesize[x]; - desc.Height = frame->height >> shift; - desc.WidthInBytes = std::min(desc.srcPitch, desc.dstPitch); - - desc.srcMemoryType = CU_MEMORYTYPE_ARRAY; - desc.dstMemoryType = CU_MEMORYTYPE_DEVICE; - - desc.srcArray = res[x]; - desc.dstDevice = (CUdeviceptr)frame->data[x]; - - CU_CHECK(cdf->cuMemcpy2DAsync(&desc, res.stream), "Couldn't copy from OpenGL to cuda"); - } - - res.unmap(); - - return 0; - } - - void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override { - sws.set_colorspace(colorspace, color_range); - } - - frame_t hwframe; - - egl::display_t display; - egl::ctx_t ctx; - - egl::sws_t sws; - - gl::tex_t tex; - gl::frame_buf_t fb; - - res_t<2> res; - - int width, height; -}; - class cuda_t : public platf::hwdevice_t { public: - ~cuda_t() override { - // sws_t needs to be destroyed while the context is active - if(sws) { - ctx_t ctx { cuda_ctx }; - - sws.reset(); - } - } - int init(int in_width, int in_height) { if(!cdf) { BOOST_LOG(warning) << "cuda not initialized"sv; @@ -322,78 +106,111 @@ public: return -1; } - cuda_ctx = ((AVCUDADeviceContext *)((AVHWFramesContext *)frame->hw_frames_ctx->data)->device_ctx->hwctx)->cuda_ctx; - - ctx_t ctx { cuda_ctx }; - sws = sws_t::make(width, height, frame->width, frame->height, width * 4); - - if(!sws) { + auto sws_opt = sws_t::make(width, height, frame->width, frame->height, width * 4); + if(!sws_opt) { return -1; } + sws = std::move(*sws_opt); + return 0; } - int convert(platf::img_t &img) override { - ctx_t ctx { cuda_ctx }; - - return sws->load_ram(img) || sws->convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1]); - } - void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override { - ctx_t ctx { cuda_ctx }; - sws->set_colorspace(colorspace, color_range); + sws.set_colorspace(colorspace, color_range); + auto tex = tex_t::make(height, width * 4); + if(!tex) { + return; + } // The default green color is ugly. // Update the background color platf::img_t img; - img.width = frame->width; - img.height = frame->height; + img.width = width; + img.height = height; img.pixel_pitch = 4; - img.row_pitch = img.width * img.pixel_pitch; + img.row_pitch = img.width * img.pixel_pitch; std::vector image_data; image_data.resize(img.row_pitch * img.height); img.data = image_data.data(); - if(sws->load_ram(img)) { + if(sws.load_ram(img, tex->array)) { return; } - sws->convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], { - frame->width, frame->height, 0, 0 - }); + sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture, { frame->width, frame->height, 0, 0 }); } frame_t hwframe; - std::unique_ptr sws; - int width, height; - CUcontext cuda_ctx; + sws_t sws; }; -std::shared_ptr make_hwdevice(int width, int height, platf::x11::xdisplay_t::pointer xdisplay) { +class cuda_ram_t : public cuda_t { +public: + int convert(platf::img_t &img) override { + return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex.texture); + } + + int set_frame(AVFrame *frame) { + if(cuda_t::set_frame(frame)) { + return -1; + } + + auto tex_opt = tex_t::make(height, width * 4); + if(!tex_opt) { + return -1; + } + + tex = std::move(*tex_opt); + + return 0; + } + + tex_t tex; +}; + +class cuda_vram_t : public cuda_t { +public: + int convert(platf::img_t &img) override { + return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], (cudaTextureObject_t)img.data); + } +}; + +std::shared_ptr make_hwdevice(int width, int height, bool vram) { if(init()) { return nullptr; } - auto cuda = std::make_shared(); + std::shared_ptr cuda; + + if(vram) { + cuda = std::make_shared(); + } + else { + cuda = std::make_shared(); + } + if(cuda->init(width, height)) { return nullptr; } return cuda; } -} // namespace cuda -namespace platf::nvfbc { +namespace nvfbc { static PNVFBCCREATEINSTANCE createInstance {}; static NVFBC_API_FUNCTION_LIST func { NVFBC_VERSION }; +static constexpr inline NVFBC_BOOL nv_bool(bool b) { + return b ? NVFBC_TRUE : NVFBC_FALSE; +} + static void *handle { nullptr }; int init() { static bool funcs_loaded = false; @@ -418,49 +235,65 @@ int init() { return -1; } + auto status = cuda::nvfbc::createInstance(&cuda::nvfbc::func); + if(status) { + BOOST_LOG(error) << "Unable to create NvFBC instance"sv; + + dlclose(handle); + handle = nullptr; + return -1; + } + funcs_loaded = true; return 0; } class handle_t { - KITTY_USING_MOVE_T(session_t, NVFBC_SESSION_HANDLE, std::numeric_limits::max(), { - if(el == std::numeric_limits::max()) { - return; - } - NVFBC_DESTROY_HANDLE_PARAMS params { NVFBC_DESTROY_HANDLE_PARAMS_VER }; - - auto status = func.nvFBCDestroyHandle(el, ¶ms); - if(status) { - BOOST_LOG(error) << "Failed to destroy nvfbc handle: "sv << func.nvFBCGetLastErrorStr(el); - } - }); + enum flag_e { + SESSION_HANDLE, + SESSION_CAPTURE, + MAX_FLAGS, + }; public: + handle_t() = default; + handle_t(handle_t &&other) : handle_flags { other.handle_flags }, handle { other.handle } { + other.handle_flags.reset(); + } + + handle_t &operator=(handle_t &&other) { + std::swap(handle_flags, other.handle_flags); + std::swap(handle, other.handle); + + return *this; + } + static std::optional make() { NVFBC_CREATE_HANDLE_PARAMS params { NVFBC_CREATE_HANDLE_PARAMS_VER }; - session_t session; - auto status = func.nvFBCCreateHandle(&session.el, ¶ms); + handle_t handle; + auto status = func.nvFBCCreateHandle(&handle.handle, ¶ms); if(status) { - BOOST_LOG(error) << "Failed to create session: "sv << func.nvFBCGetLastErrorStr(session.el); - session.release(); + BOOST_LOG(error) << "Failed to create session: "sv << handle.last_error(); return std::nullopt; } - return handle_t { std::move(session) }; + handle.handle_flags[SESSION_HANDLE] = true; + + return std::move(handle); } const char *last_error() { - return func.nvFBCGetLastErrorStr(session.el); + return func.nvFBCGetLastErrorStr(handle); } std::optional status() { NVFBC_GET_STATUS_PARAMS params { NVFBC_GET_STATUS_PARAMS_VER }; - auto status = func.nvFBCGetStatus(session.el, ¶ms); + auto status = func.nvFBCGetStatus(handle, ¶ms); if(status) { - BOOST_LOG(error) << "Failed to create session: "sv << last_error(); + BOOST_LOG(error) << "Failed to get NvFBC status: "sv << last_error(); return std::nullopt; } @@ -468,23 +301,328 @@ public: return params; } - session_t session; + int capture(NVFBC_CREATE_CAPTURE_SESSION_PARAMS &capture_params) { + if(func.nvFBCCreateCaptureSession(handle, &capture_params)) { + BOOST_LOG(error) << "Failed to start capture session: "sv << last_error(); + return -1; + } + + handle_flags[SESSION_CAPTURE] = true; + + NVFBC_TOCUDA_SETUP_PARAMS setup_params { + NVFBC_TOCUDA_SETUP_PARAMS_VER, + NVFBC_BUFFER_FORMAT_BGRA, + }; + + if(func.nvFBCToCudaSetUp(handle, &setup_params)) { + BOOST_LOG(error) << "Failed to setup cuda interop with nvFBC: "sv << last_error(); + return -1; + } + return 0; + } + + int stop() { + if(!handle_flags[SESSION_CAPTURE]) { + return 0; + } + + NVFBC_DESTROY_CAPTURE_SESSION_PARAMS params { NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER }; + + if(func.nvFBCDestroyCaptureSession(handle, ¶ms)) { + BOOST_LOG(error) << "Couldn't destroy capture session: "sv << last_error(); + + return -1; + } + + handle_flags[SESSION_CAPTURE] = false; + + return 0; + } + + ~handle_t() { + if(!handle_flags[SESSION_HANDLE]) { + return; + } + + if(handle_flags[SESSION_CAPTURE]) { + NVFBC_DESTROY_CAPTURE_SESSION_PARAMS params { NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER }; + + if(func.nvFBCDestroyCaptureSession(handle, ¶ms)) { + BOOST_LOG(error) << "Couldn't destroy capture session: "sv << func.nvFBCGetLastErrorStr(handle); + } + } + + NVFBC_DESTROY_HANDLE_PARAMS params { NVFBC_DESTROY_HANDLE_PARAMS_VER }; + + if(func.nvFBCDestroyHandle(handle, ¶ms)) { + BOOST_LOG(error) << "Couldn't destroy session handle: "sv << func.nvFBCGetLastErrorStr(handle); + } + } + + std::bitset handle_flags; + + NVFBC_SESSION_HANDLE handle; }; +class display_t : public platf::display_t { +public: + int init(const std::string_view &display_name, int framerate) { + auto handle = handle_t::make(); + if(!handle) { + return -1; + } + + auto status_params = handle->status(); + if(!status_params) { + return -1; + } + + int streamedMonitor = -1; + if(!display_name.empty()) { + if(status_params->bXRandRAvailable) { + auto monitor_nr = util::from_view(display_name); + + if(monitor_nr < 0 || monitor_nr >= status_params->dwOutputNum) { + BOOST_LOG(warning) << "Can't stream monitor ["sv << monitor_nr << "], it needs to be between [0] and ["sv << status_params->dwOutputNum - 1 << "], defaulting to virtual desktop"sv; + } + else { + streamedMonitor = monitor_nr; + } + } + else { + BOOST_LOG(warning) << "XrandR not available, streaming entire virtual desktop"sv; + } + } + + capture_params = NVFBC_CREATE_CAPTURE_SESSION_PARAMS { NVFBC_CREATE_CAPTURE_SESSION_PARAMS_VER }; + + capture_params.eCaptureType = NVFBC_CAPTURE_SHARED_CUDA; + capture_params.bDisableAutoModesetRecovery = nv_bool(true); + + capture_params.dwSamplingRateMs = 1000 /* ms */ / framerate; + + if(streamedMonitor != -1) { + auto &output = status_params->outputs[streamedMonitor]; + + width = output.trackedBox.w; + height = output.trackedBox.h; + offset_x = output.trackedBox.x; + offset_y = output.trackedBox.y; + + capture_params.eTrackingType = NVFBC_TRACKING_OUTPUT; + capture_params.dwOutputId = output.dwId; + } + else { + capture_params.eTrackingType = NVFBC_TRACKING_SCREEN; + + width = status_params->screenSize.w; + height = status_params->screenSize.h; + } + + env_width = status_params->screenSize.w; + env_height = status_params->screenSize.h; + + this->handle = std::move(*handle); + return 0; + } + + platf::capture_e capture(snapshot_cb_t &&snapshot_cb, std::shared_ptr img, bool *cursor) override { + // Force display_t::capture to initialize handle_t::capture + cursor_visible = !*cursor; + + auto fg = util::fail_guard([&]() { + handle.stop(); + }); + + while(img) { + auto status = snapshot(img.get(), 500ms, *cursor); + switch(status) { + case platf::capture_e::reinit: + case platf::capture_e::error: + return status; + case platf::capture_e::timeout: + std::this_thread::sleep_for(1ms); + continue; + case platf::capture_e::ok: + img = snapshot_cb(img); + break; + default: + BOOST_LOG(error) << "Unrecognized capture status ["sv << (int)status << ']'; + return status; + } + } + + return platf::capture_e::ok; + } + + // Reinitialize the capture session. + platf::capture_e reinit(bool cursor) { + if(handle.stop()) { + return platf::capture_e::error; + } + + cursor_visible = cursor; + if(false && cursor) { + capture_params.bPushModel = nv_bool(false); + capture_params.bWithCursor = nv_bool(true); + capture_params.bAllowDirectCapture = nv_bool(false); + } + else { + capture_params.bPushModel = nv_bool(true); + capture_params.bWithCursor = nv_bool(false); + capture_params.bAllowDirectCapture = nv_bool(true); + } + + if(handle.capture(capture_params)) { + return platf::capture_e::error; + } + + // If trying to capture directly, test if it actually does. + if(capture_params.bAllowDirectCapture) { + CUdeviceptr device_ptr; + NVFBC_FRAME_GRAB_INFO info; + + NVFBC_TOCUDA_GRAB_FRAME_PARAMS grab { + NVFBC_TOCUDA_GRAB_FRAME_PARAMS_VER, + NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT_IF_NEW_FRAME_READY, + &device_ptr, + &info, + 0, + }; + + // Direct Capture may fail the first few times, even if it's possible + for(int x = 0; x < 3; ++x) { + if(auto status = func.nvFBCToCudaGrabFrame(handle.handle, &grab)) { + if(status == NVFBC_ERR_MUST_RECREATE) { + return platf::capture_e::reinit; + } + + BOOST_LOG(error) << "Couldn't capture nvFramebuffer: "sv << handle.last_error(); + + return platf::capture_e::error; + } + + if(info.bDirectCapture) { + break; + } + + BOOST_LOG(debug) << "Direct capture failed attempt ["sv << x << ']'; + } + + if(!info.bDirectCapture) { + BOOST_LOG(debug) << "Direct capture failed, trying the extra copy method"sv; + // Direct capture failed + capture_params.bPushModel = nv_bool(false); + capture_params.bWithCursor = nv_bool(false); + capture_params.bAllowDirectCapture = nv_bool(false); + + if(handle.stop() || handle.capture(capture_params)) { + return platf::capture_e::error; + } + } + } + + return platf::capture_e::ok; + } + + platf::capture_e snapshot(platf::img_t *img, std::chrono::milliseconds timeout, bool cursor) { + if(cursor != cursor_visible) { + auto status = reinit(cursor); + if(status != platf::capture_e::ok) { + return status; + } + } + + CUdeviceptr device_ptr; + NVFBC_FRAME_GRAB_INFO info; + + NVFBC_TOCUDA_GRAB_FRAME_PARAMS grab { + NVFBC_TOCUDA_GRAB_FRAME_PARAMS_VER, + NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT_IF_NEW_FRAME_READY, + &device_ptr, + &info, + (std::uint32_t)timeout.count(), + }; + + if(auto status = func.nvFBCToCudaGrabFrame(handle.handle, &grab)) { + if(status == NVFBC_ERR_MUST_RECREATE) { + return platf::capture_e::reinit; + } + + BOOST_LOG(error) << "Couldn't capture nvFramebuffer: "sv << handle.last_error(); + return platf::capture_e::error; + } + + if(!info.bIsNewFrame) { + return platf::capture_e::timeout; + } + + if(((img_t *)img)->tex.copy((std::uint8_t *)device_ptr, img->height, img->row_pitch)) { + return platf::capture_e::error; + } + + return platf::capture_e::ok; + } + + std::shared_ptr make_hwdevice(platf::pix_fmt_e pix_fmt) override { + return ::cuda::make_hwdevice(width, height, true); + } + + std::shared_ptr alloc_img() override { + auto img = std::make_shared(); + + img->width = width; + img->height = height; + img->pixel_pitch = 4; + img->row_pitch = img->width * img->pixel_pitch; + + auto tex_opt = tex_t::make(height, width * img->pixel_pitch); + if(!tex_opt) { + return nullptr; + } + + img->tex = std::move(*tex_opt); + img->data = (std::uint8_t *)img->tex.texture; + + return img; + }; + + int dummy_img(platf::img_t *) override { + return 0; + } + + bool cursor_visible; + handle_t handle; + + NVFBC_CREATE_CAPTURE_SESSION_PARAMS capture_params; +}; +} // namespace nvfbc +} // namespace cuda + +namespace platf { +std::shared_ptr nvfbc_display(mem_type_e hwdevice_type, const std::string &display_name, int framerate) { + if(hwdevice_type != mem_type_e::cuda) { + BOOST_LOG(error) << "Could not initialize nvfbc display with the given hw device type"sv; + return nullptr; + } + + auto display = std::make_shared(); + + if(display->init(display_name, framerate)) { + return nullptr; + } + + return display; +} + std::vector nvfbc_display_names() { - if(init()) { + if(cuda::init() || cuda::nvfbc::init()) { return {}; } std::vector display_names; - auto status = createInstance(&func); - if(status) { - BOOST_LOG(error) << "Unable to create NvFBC instance"sv; - return {}; - } - - auto handle = handle_t::make(); + auto handle = cuda::nvfbc::handle_t::make(); if(!handle) { return {}; } @@ -500,7 +638,18 @@ std::vector nvfbc_display_names() { BOOST_LOG(info) << "Found ["sv << status_params->dwOutputNum << "] outputs"sv; BOOST_LOG(info) << "Virtual Desktop: "sv << status_params->screenSize.w << 'x' << status_params->screenSize.h; + BOOST_LOG(info) << "XrandR: "sv << (status_params->bXRandRAvailable ? "available"sv : "unavailable"sv); + + for(auto x = 0; x < status_params->dwOutputNum; ++x) { + auto &output = status_params->outputs[x]; + BOOST_LOG(info) << "-- Output --"sv; + BOOST_LOG(debug) << " ID: "sv << output.dwId; + BOOST_LOG(debug) << " Name: "sv << output.name; + BOOST_LOG(info) << " Resolution: "sv << output.trackedBox.w << 'x' << output.trackedBox.h; + BOOST_LOG(info) << " Offset: "sv << output.trackedBox.x << 'x' << output.trackedBox.y; + display_names.emplace_back(std::to_string(x)); + } return display_names; } -} // namespace platf::nvfbc \ No newline at end of file +} // namespace platf \ No newline at end of file diff --git a/sunshine/platform/linux/cuda.cu b/sunshine/platform/linux/cuda.cu index e310964c..68368095 100644 --- a/sunshine/platform/linux/cuda.cu +++ b/sunshine/platform/linux/cuda.cu @@ -21,6 +21,9 @@ using namespace std::literals; #define CU_CHECK_PTR(x, y) \ if(check((x), SUNSHINE_STRINGVIEW(y ": "))) return nullptr; +#define CU_CHECK_OPT(x, y) \ + if(check((x), SUNSHINE_STRINGVIEW(y ": "))) return std::nullopt; + #define CU_CHECK_IGNORE(x, y) \ check((x), SUNSHINE_STRINGVIEW(y ": ")) @@ -165,15 +168,21 @@ __global__ void RGBA_to_NV12( dstY[1] = calcY(rgb_r, color_matrix) * 245.0f; // 245.0f is a magic number to ensure slight changes in luminosity are more visisble } -sws_t::sws_t(int in_width, int in_height, int out_width, int out_height, int pitch, int threadsPerBlock, ptr_t &&color_matrix) - : array {}, texture { INVALID_TEXTURE }, threadsPerBlock { threadsPerBlock }, color_matrix { std::move(color_matrix) } { - auto format = cudaCreateChannelDesc(); +int tex_t::copy(std::uint8_t *src, int height, int pitch) { + CU_CHECK(cudaMemcpy2DToArray(array, 0, 0, src, pitch, pitch, height, cudaMemcpyDeviceToDevice), "Couldn't copy to cuda array from deviceptr"); - CU_CHECK_VOID(cudaMallocArray(&array, &format, pitch, in_height, cudaArrayDefault), "Couldn't allocate cuda array"); + return 0; +} + +std::optional tex_t::make(int height, int pitch) { + tex_t tex; + + auto format = cudaCreateChannelDesc(); + CU_CHECK_OPT(cudaMallocArray(&tex.array, &format, pitch, height, cudaArrayDefault), "Couldn't allocate cuda array"); cudaResourceDesc res {}; res.resType = cudaResourceTypeArray; - res.res.array.array = array; + res.res.array.array = tex.array; cudaTextureDesc desc {}; @@ -183,9 +192,40 @@ sws_t::sws_t(int in_width, int in_height, int out_width, int out_height, int pit std::fill_n(std::begin(desc.addressMode), 2, cudaAddressModeClamp); - CU_CHECK_VOID(cudaCreateTextureObject(&texture, &res, &desc, nullptr), "Couldn't create cuda texture"); + CU_CHECK_OPT(cudaCreateTextureObject(&tex.texture, &res, &desc, nullptr), "Couldn't create cuda texture"); + return std::move(tex); +} +tex_t::tex_t() : array { }, texture { INVALID_TEXTURE } {} +tex_t::tex_t(tex_t &&other) : array { other.array }, texture { other.texture } { + other.array = 0; + other.texture = INVALID_TEXTURE; +} + +tex_t &tex_t::operator=(tex_t &&other) { + std::swap(array, other.array); + std::swap(texture, other.texture); + + return *this; +} + +tex_t::~tex_t() { + if(texture != INVALID_TEXTURE) { + CU_CHECK_IGNORE(cudaDestroyTextureObject(texture), "Couldn't deallocate cuda texture"); + + texture = INVALID_TEXTURE; + } + + if(array) { + CU_CHECK_IGNORE(cudaFreeArray(array), "Couldn't deallocate cuda array"); + + array = cudaArray_t {}; + } +} + +sws_t::sws_t(int in_width, int in_height, int out_width, int out_height, int pitch, int threadsPerBlock, ptr_t &&color_matrix) + : threadsPerBlock { threadsPerBlock }, color_matrix { std::move(color_matrix) } { // Ensure aspect ratio is maintained auto scalar = std::fminf(out_width / (float)in_width, out_height / (float)in_height); auto out_width_f = in_width * scalar; @@ -202,52 +242,32 @@ sws_t::sws_t(int in_width, int in_height, int out_width, int out_height, int pit viewport.offsetY = offsetY_f; } -sws_t::~sws_t() { - if(texture != INVALID_TEXTURE) { - CU_CHECK_IGNORE(cudaDestroyTextureObject(texture), "Couldn't deallocate cuda texture"); - - texture = INVALID_TEXTURE; - } - - if(array) { - CU_CHECK_IGNORE(cudaFreeArray(array), "Couldn't deallocate cuda array"); - - array = cudaArray_t {}; - } -} - -std::unique_ptr sws_t::make(int in_width, int in_height, int out_width, int out_height, int pitch) { +std::optional sws_t::make(int in_width, int in_height, int out_width, int out_height, int pitch) { cudaDeviceProp props; int device; - CU_CHECK_PTR(cudaGetDevice(&device), "Couldn't get cuda device"); - CU_CHECK_PTR(cudaGetDeviceProperties(&props, device), "Couldn't get cuda device properties"); + CU_CHECK_OPT(cudaGetDevice(&device), "Couldn't get cuda device"); + CU_CHECK_OPT(cudaGetDeviceProperties(&props, device), "Couldn't get cuda device properties"); auto ptr = make_ptr(); if(!ptr) { - return nullptr; + return std::nullopt; } - auto sws = std::make_unique(in_width, in_height, out_width, out_height, pitch, props.maxThreadsPerMultiProcessor / props.maxBlocksPerMultiProcessor / 2, std::move(ptr)); - - if(sws->texture == INVALID_TEXTURE) { - return nullptr; - } - - return sws; + return std::make_optional(in_width, in_height, out_width, out_height, pitch, props.maxThreadsPerMultiProcessor / props.maxBlocksPerMultiProcessor / 2, std::move(ptr)); } -int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV) { - return convert(Y, UV, pitchY, pitchUV, viewport); +int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture) { + return convert(Y, UV, pitchY, pitchUV, texture, viewport); } -int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, const viewport_t &viewport) { +int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, const viewport_t &viewport) { int threadsX = viewport.width / 2; int threadsY = viewport.height; dim3 block(threadsPerBlock, threadsPerBlock); dim3 grid(div_align(threadsX, threadsPerBlock), div_align(threadsY, threadsPerBlock)); - RGBA_to_NV12<<>>(texture, Y, UV, pitchY, pitchUV, viewport, (video::color_t*)color_matrix.get()); + RGBA_to_NV12<<>>(texture, Y, UV, pitchY, pitchUV, viewport, (video::color_t *)color_matrix.get()); return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed"); } @@ -274,7 +294,7 @@ void sws_t::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) CU_CHECK_IGNORE(cudaMemcpy(color_matrix.get(), color_p, sizeof(video::color_t), cudaMemcpyHostToDevice), "Couldn't copy color matrix to cuda"); } -int sws_t::load_ram(platf::img_t &img) { +int sws_t::load_ram(platf::img_t &img, cudaArray_t array) { return CU_CHECK_IGNORE(cudaMemcpy2DToArray(array, 0, 0, img.data, img.row_pitch, img.width * img.pixel_pitch, img.height, cudaMemcpyHostToDevice), "Couldn't copy to cuda array"); } diff --git a/sunshine/platform/linux/cuda.h b/sunshine/platform/linux/cuda.h index 27dede07..08cb0156 100644 --- a/sunshine/platform/linux/cuda.h +++ b/sunshine/platform/linux/cuda.h @@ -1,15 +1,16 @@ #ifndef SUNSHINE_PLATFORM_CUDA_H #define SUNSHINE_PLATFORM_CUDA_H -#include - #ifndef __NVCC__ #include "sunshine/platform/common.h" #include "x11grab.h" namespace cuda { -std::shared_ptr make_hwdevice(int width, int height, platf::x11::xdisplay_t::pointer xdisplay); +namespace nvfbc { +std::vector display_names(); +} +std::shared_ptr make_hwdevice(int width, int height, bool vram); int init(); } // namespace cuda @@ -41,9 +42,26 @@ struct viewport_t { int offsetX, offsetY; }; +class tex_t { +public: + static std::optional make(int height, int pitch); + + tex_t(); + tex_t(tex_t &&); + + tex_t &operator=(tex_t &&other); + + ~tex_t(); + + int copy(std::uint8_t *src, int height, int pitch); + + cudaArray_t array; + cudaTextureObject_t texture; +}; + class sws_t { public: - ~sws_t(); + sws_t() = default; sws_t(int in_width, int in_height, int out_width, int out_height, int pitch, int threadsPerBlock, ptr_t &&color_matrix); /** @@ -52,19 +70,17 @@ public: * * pitch -- The size of a single row of pixels in bytes */ - static std::unique_ptr make(int in_width, int in_height, int out_width, int out_height, int pitch); + static std::optional make(int in_width, int in_height, int out_width, int out_height, int pitch); // Converts loaded image into a CUDevicePtr - int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV); - int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, const viewport_t &viewport); + int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture); + int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, const viewport_t &viewport); void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range); - int load_ram(platf::img_t &img); + int load_ram(platf::img_t &img, cudaArray_t array); ptr_t color_matrix; - cudaArray_t array; - cudaTextureObject_t texture; int threadsPerBlock; diff --git a/sunshine/platform/linux/misc.cpp b/sunshine/platform/linux/misc.cpp index ad5afc5a..e10242ff 100644 --- a/sunshine/platform/linux/misc.cpp +++ b/sunshine/platform/linux/misc.cpp @@ -141,6 +141,9 @@ std::string get_mac_address(const std::string_view &address) { } enum class source_e { + // #ifdef SUNSHINE_BUILD_CUDA + NVFBC, +// #endif #ifdef SUNSHINE_BUILD_WAYLAND WAYLAND, #endif @@ -153,6 +156,15 @@ enum class source_e { }; static source_e source; +// #ifdef SUNSHINE_BUILD_CUDA +std::vector nvfbc_display_names(); +std::shared_ptr nvfbc_display(mem_type_e hwdevice_type, const std::string &display_name, int framerate); + +bool verify_nvfbc() { + return !nvfbc_display_names().empty(); +} +// #endif + #ifdef SUNSHINE_BUILD_WAYLAND std::vector wl_display_names(); std::shared_ptr wl_display(mem_type_e hwdevice_type, const std::string &display_name, int framerate); @@ -182,6 +194,10 @@ bool verify_x11() { std::vector display_names() { switch(source) { + // #ifdef SUNSHINE_BUILD_CUDA + case source_e::NVFBC: + return nvfbc_display_names(); + // #endif #ifdef SUNSHINE_BUILD_WAYLAND case source_e::WAYLAND: return wl_display_names(); @@ -201,6 +217,10 @@ std::vector display_names() { std::shared_ptr display(mem_type_e hwdevice_type, const std::string &display_name, int framerate) { switch(source) { + // #ifdef SUNSHINE_BUILD_CUDA + case source_e::NVFBC: + return nvfbc_display(hwdevice_type, display_name, framerate); + // #endif #ifdef SUNSHINE_BUILD_WAYLAND case source_e::WAYLAND: return wl_display(hwdevice_type, display_name, framerate); @@ -229,7 +249,7 @@ std::unique_ptr init() { window_system = window_system_e::WAYLAND; } #endif -#ifdef SUNSHINE_BUILD_X11 +#if defined(SUNSHINE_BUILD_X11) // || defined(SUNSHINE_BUILD_CUDA) if(std::getenv("DISPLAY") && window_system != window_system_e::WAYLAND) { if(std::getenv("WAYLAND_DISPLAY")) { BOOST_LOG(warning) << "Wayland detected, yet sunshine will use X11 for screencasting, screencasting will only work on XWayland applications"sv; @@ -238,6 +258,13 @@ std::unique_ptr init() { window_system = window_system_e::X11; } #endif +// #ifdef SUNSHINE_BUILD_CUDA + if(verify_nvfbc()) { + BOOST_LOG(info) << "Using nvFBC for screencasting"sv; + source = source_e::NVFBC; + goto found_source; + } +// #endif #ifdef SUNSHINE_BUILD_WAYLAND if(verify_wl()) { BOOST_LOG(info) << "Using Wayland for screencasting"sv; diff --git a/sunshine/platform/linux/x11grab.cpp b/sunshine/platform/linux/x11grab.cpp index c8bc70f0..decf24aa 100644 --- a/sunshine/platform/linux/x11grab.cpp +++ b/sunshine/platform/linux/x11grab.cpp @@ -518,7 +518,7 @@ struct x11_attr_t : public display_t { } if(mem_type == mem_type_e::cuda) { - return cuda::make_hwdevice(width, height, xdisplay.get()); + return cuda::make_hwdevice(width, height, false); } return std::make_shared(); @@ -678,7 +678,7 @@ struct shm_attr_t : public x11_attr_t { std::shared_ptr x11_display(platf::mem_type_e hwdevice_type, const std::string &display_name, int framerate) { if(hwdevice_type != platf::mem_type_e::system && hwdevice_type != platf::mem_type_e::vaapi && hwdevice_type != platf::mem_type_e::cuda) { - BOOST_LOG(error) << "Could not initialize display with the given hw device type."sv; + BOOST_LOG(error) << "Could not initialize x11 display with the given hw device type"sv; return nullptr; } diff --git a/sunshine/video.cpp b/sunshine/video.cpp index 3b143cbe..6faf82ee 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -1699,7 +1699,7 @@ util::Either vaapi_make_hwdevice_ctx(platf::hwdevice_t *base) { util::Either cuda_make_hwdevice_ctx(platf::hwdevice_t *base) { buffer_t hw_device_buf; - auto status = av_hwdevice_ctx_create(&hw_device_buf, AV_HWDEVICE_TYPE_CUDA, nullptr, nullptr, 0); + auto status = av_hwdevice_ctx_create(&hw_device_buf, AV_HWDEVICE_TYPE_CUDA, nullptr, nullptr, 1 /* AV_CUDA_USE_PRIMARY_CONTEXT */); if(status < 0) { char string[AV_ERROR_MAX_STRING_SIZE]; BOOST_LOG(error) << "Failed to create a CUDA device: "sv << av_make_error_string(string, AV_ERROR_MAX_STRING_SIZE, status);