diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h index d8a8171e..e1b3c500 100644 --- a/sunshine/platform/common.h +++ b/sunshine/platform/common.h @@ -29,9 +29,17 @@ constexpr std::uint16_t B = 0x2000; constexpr std::uint16_t X = 0x4000; constexpr std::uint16_t Y = 0x8000; +enum class dev_type_e { + none, + dxgi, + unknown +}; + enum class pix_fmt_e { yuv420p, - yuv420p10 + yuv420p10, + nv12, + unknown }; struct gamepad_state_t { @@ -114,7 +122,7 @@ std::string from_sockaddr(const sockaddr *const); std::pair from_sockaddr_ex(const sockaddr *const); std::unique_ptr microphone(std::uint32_t sample_rate); -std::shared_ptr display(int hwdevice_type); +std::shared_ptr display(dev_type_e hwdevice_type); input_t input(); void move_mouse(input_t &input, int deltaX, int deltaY); diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp index 9e95942a..3988b344 100644 --- a/sunshine/platform/windows_dxgi.cpp +++ b/sunshine/platform/windows_dxgi.cpp @@ -38,6 +38,7 @@ using output1_t = util::safe_ptr>; using dup_t = util::safe_ptr>; using texture2d_t = util::safe_ptr>; using resource_t = util::safe_ptr>; +using multithread_t = util::safe_ptr>; namespace video { using device_t = util::safe_ptr>; @@ -894,6 +895,12 @@ public: } std::shared_ptr make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) override { + if(pix_fmt != platf::pix_fmt_e::nv12) { + BOOST_LOG(error) << "display_gpu_t doesn't support pixel format ["sv << (int)pix_fmt << ']'; + + return nullptr; + } + auto hwdevice = std::make_shared(); auto ret = hwdevice->init( @@ -909,6 +916,24 @@ public: return hwdevice; } + + int init() { + if(display_base_t::init()) { + return -1; + } + + multithread_t::pointer multithread_p {}; + auto status = device->QueryInterface(__uuidof(multithread_t::element_type), (void**)&multithread_p); + multithread_t multithread { multithread_p }; + + if(FAILED(status)) { + BOOST_LOG(error) << "Couldn't query Multithread interface [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + multithread->SetMultithreadProtected(true); + + return 0; + } }; const char *format_str[] = { @@ -1039,15 +1064,15 @@ const char *format_str[] = { } namespace platf { -std::shared_ptr display(int hwdevice_type) { - if(hwdevice_type == AV_HWDEVICE_TYPE_D3D11VA) { +std::shared_ptr display(platf::dev_type_e hwdevice_type) { + if(hwdevice_type == platf::dev_type_e::dxgi) { auto disp = std::make_shared(); if(!disp->init()) { return disp; } } - else { + else if(hwdevice_type == platf::dev_type_e::none) { auto disp = std::make_shared(); if(!disp->init()) { diff --git a/sunshine/thread_safe.h b/sunshine/thread_safe.h index c593fcff..1d4f12e6 100644 --- a/sunshine/thread_safe.h +++ b/sunshine/thread_safe.h @@ -33,7 +33,7 @@ public: // pop and view shoud not be used interchangebly status_t pop() { - std::unique_lock ul{_lock}; + std::unique_lock ul{ _lock }; if (!_continue) { return util::false_v; @@ -55,7 +55,7 @@ public: // pop and view shoud not be used interchangebly template status_t pop(std::chrono::duration delay) { - std::unique_lock ul{_lock}; + std::unique_lock ul{ _lock }; if (!_continue) { return util::false_v; @@ -74,7 +74,7 @@ public: // pop and view shoud not be used interchangebly const status_t &view() { - std::unique_lock ul{_lock}; + std::unique_lock ul{ _lock }; if (!_continue) { return util::false_v; @@ -98,7 +98,7 @@ public: } void stop() { - std::lock_guard lg{_lock}; + std::lock_guard lg{ _lock }; _continue = false; @@ -106,7 +106,7 @@ public: } void reset() { - std::lock_guard lg{_lock}; + std::lock_guard lg{ _lock }; _continue = true; @@ -118,8 +118,8 @@ public: } private: - bool _continue{true}; - status_t _status; + bool _continue { true }; + status_t _status { util::false_v }; std::condition_variable _cv; std::mutex _lock; @@ -170,7 +170,7 @@ public: } status_t pop() { - std::unique_lock ul{_lock}; + std::unique_lock ul{ _lock }; if (!_continue) { return util::false_v; @@ -191,11 +191,12 @@ public: } std::vector &unsafe() { + std::lock_guard { _lock }; return _queue; } void stop() { - std::lock_guard lg{_lock}; + std::lock_guard lg{ _lock }; _continue = false; @@ -208,7 +209,7 @@ public: private: - bool _continue{true}; + bool _continue{ true }; std::mutex _lock; std::condition_variable _cv; @@ -274,9 +275,8 @@ public: void release() { std::lock_guard lg { owner->_lock }; - auto c = owner->_count.fetch_sub(1, std::memory_order_acquire); - if(c - 1 == 0) { + if(!--owner->_count) { owner->_destruct(*get()); (*this)->~element_type(); } @@ -296,10 +296,9 @@ public: template shared_t(FC && fc, FD &&fd) : _construct { std::forward(fc) }, _destruct { std::forward(fd) } {} [[nodiscard]] ptr_t ref() { - auto c = _count.fetch_add(1, std::memory_order_acquire); - if(!c) { - std::lock_guard lg { _lock }; + std::lock_guard lg { _lock }; + if(!_count++) { new(_object_buf.data()) element_type; if(_construct(*reinterpret_cast(_object_buf.data()))) { return ptr_t { nullptr }; @@ -314,7 +313,7 @@ private: std::array _object_buf; - std::atomic _count; + std::uint32_t _count; std::mutex _lock; }; diff --git a/sunshine/video.cpp b/sunshine/video.cpp index 3530f481..d4bbc936 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -37,6 +37,36 @@ void free_packet(AVPacket *packet) { av_packet_free(&packet); } +namespace nv { +enum class preset_e : int { + _default = 0, + slow, + medium, + fast, + hp, + hq, + bd, + ll_default, + llhq, + llhp, + lossless_default, // lossless presets must be the last ones + lossless_hp, +}; + +enum class profile_h264_e : int { + baseline, + main, + high, + high_444p, +}; + +enum class profile_hevc_e : int { + main, + main_10, + rext, +}; +} + using ctx_t = util::safe_ptr; using frame_t = util::safe_ptr; using buffer_t = util::safe_ptr; @@ -104,8 +134,41 @@ struct session_t { int sws_color_format; }; +struct encode_session_ctx_t { + safe::signal_t *shutdown_event; + safe::signal_t *join_event; + packet_queue_t packets; + idr_event_t idr_events; + config_t config; + int frame_nr; + int key_frame_nr; + void *channel_data; +}; + +struct encode_session_t { + encode_session_ctx_t *ctx; + + std::chrono::steady_clock::time_point next_frame; + std::chrono::milliseconds delay; + + platf::img_t *img_tmp; + std::shared_ptr hwdevice; + session_t session; +}; + +using encode_session_ctx_queue_t = safe::queue_t; +using encode_e = platf::capture_e; + +struct capture_synced_ctx_t { + encode_session_ctx_queue_t encode_session_ctx_queue; +}; + +int start_capture_sync(capture_synced_ctx_t &ctx); +void end_capture_sync(capture_synced_ctx_t &ctx); +auto capture_thread_sync = safe::make_shared(start_capture_sync, end_capture_sync); + static encoder_t nvenc { - { 2, 0, 1 }, + { (int)nv::profile_h264_e::high, (int)nv::profile_hevc_e::main, (int)nv::profile_hevc_e::main_10 }, AV_HWDEVICE_TYPE_D3D11VA, AV_PIX_FMT_D3D11, AV_PIX_FMT_NV12, AV_PIX_FMT_NV12, @@ -115,9 +178,7 @@ static encoder_t nvenc { { { { "forced-idr"s, 1}, - { "profile"s, "high"s }, - { "preset"s , "llhp" }, - { "rc"s, "cbr_ld_hq"s }, + { "preset"s , (int)nv::preset_e::llhq }, }, "h264_nvenc"s }, false, @@ -172,11 +233,39 @@ struct capture_thread_ctx_t { util::sync_t> display_wp; }; +platf::dev_type_e map_dev_type(AVHWDeviceType type) { + switch(type) { + case AV_HWDEVICE_TYPE_D3D11VA: + return platf::dev_type_e::dxgi; + case AV_PICTURE_TYPE_NONE: + return platf::dev_type_e::none; + default: + return platf::dev_type_e::unknown; + } + + return platf::dev_type_e::unknown; +} + +platf::pix_fmt_e map_pix_fmt(AVPixelFormat fmt) { + switch(fmt) { + case AV_PIX_FMT_YUV420P10: + return platf::pix_fmt_e::yuv420p10; + case AV_PIX_FMT_YUV420P: + return platf::pix_fmt_e::yuv420p; + case AV_PIX_FMT_NV12: + return platf::pix_fmt_e::nv12; + default: + return platf::pix_fmt_e::unknown; + } + + return platf::pix_fmt_e::unknown; +} + void reset_display(std::shared_ptr &disp, AVHWDeviceType type) { // We try this twice, in case we still get an error on reinitialization for(int x = 0; x < 2; ++x) { disp.reset(); - disp = platf::display(type); + disp = platf::display(map_dev_type(type)); if(disp) { break; } @@ -207,7 +296,7 @@ void captureThread( std::chrono::nanoseconds delay = 1s; - auto disp = platf::display(encoder.dev_type); + auto disp = platf::display(map_dev_type(encoder.dev_type)); if(!disp) { return; } @@ -585,7 +674,6 @@ void encode_run( if(!session) { return; } - hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range); auto delay = std::chrono::floor(1s) / config.framerate; @@ -671,47 +759,41 @@ void encode_run( } } -void capture( - safe::signal_t *shutdown_event, - packet_queue_t packets, - idr_event_t idr_events, - config_t config, - void *channel_data) { +std::optional make_session_from_ctx(platf::display_t *disp, const encoder_t &encoder, platf::img_t &img, encode_session_ctx_t &ctx) { + encode_session_t encode_session; - auto lg = util::fail_guard([&]() { - shutdown_event->raise(true); - }); + encode_session.ctx = &ctx; + encode_session.next_frame = std::chrono::steady_clock::now(); - const auto &encoder = encoders.front(); - auto disp = platf::display(encoder.dev_type); - if(!disp) { - return; - } + encode_session.delay = 1000ms / ctx.config.framerate; - auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10; - auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt); + auto pix_fmt = ctx.config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt); + auto hwdevice_ctx = disp->make_hwdevice_ctx(ctx.config.width, ctx.config.height, pix_fmt); if(!hwdevice_ctx) { - return; + return std::nullopt; } - auto session = make_session(encoder, config, hwdevice_ctx.get()); + auto session = make_session(encoder, ctx.config, hwdevice_ctx.get()); if(!session) { - return; + return std::nullopt; } hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range); - auto img = disp->alloc_img(); - if(disp->dummy_img(img.get())) { - return; - } + encode_session.img_tmp = &img; + encode_session.hwdevice = std::move(hwdevice_ctx); + encode_session.session = std::move(*session); - const platf::img_t* img_p = hwdevice_ctx->convert(*img); - if(!img_p) { - return; - } + return std::move(encode_session); +} - sws_t sws; - encoder.img_to_frame(sws, *img_p, session->frame); +encode_e encode_run_sync(std::vector> &encode_session_ctxs, encode_session_ctx_queue_t &encode_session_ctx_queue) { + const auto &encoder = encoders.front(); + + std::shared_ptr disp; + reset_display(disp, encoder.dev_type); + if(!disp) { + return encode_e::error; + } std::vector> imgs(12); for(auto &img : imgs) { @@ -719,26 +801,41 @@ void capture( } auto round_robin = util::make_round_robin>(std::begin(imgs), std::end(imgs)); - - int frame_nr = 1; - int key_frame_nr = 1; - - auto max_delay = 1000ms / config.framerate; - std::shared_ptr img_tmp; - auto next_frame = std::chrono::steady_clock::now(); - while(!shutdown_event->peek()) { - if(idr_events->peek()) { - session->frame->pict_type = AV_PICTURE_TYPE_I; + auto dummy_img = disp->alloc_img(); + auto img_tmp = dummy_img.get(); + if(disp->dummy_img(img_tmp)) { + return encode_e::error; + } - auto event = idr_events->pop(); - TUPLE_2D_REF(_, end, *event); - - frame_nr = end; - key_frame_nr = end + config.framerate; + std::vector encode_sessions; + for(auto &ctx : encode_session_ctxs) { + auto encode_session = make_session_from_ctx(disp.get(), encoder, *dummy_img, *ctx); + if(!encode_session) { + return encode_e::error; } - else if(frame_nr == key_frame_nr) { - session->frame->pict_type = AV_PICTURE_TYPE_I; + + encode_sessions.emplace_back(std::move(*encode_session)); + } + + auto next_frame = std::chrono::steady_clock::now(); + while(encode_session_ctx_queue.running()) { + while(encode_session_ctx_queue.peek()) { + auto encode_session_ctx = encode_session_ctx_queue.pop(); + if(!encode_session_ctx) { + return encode_e::ok; + } + + encode_session_ctxs.emplace_back(std::make_unique(std::move(*encode_session_ctx))); + + auto encode_session = make_session_from_ctx(disp.get(), encoder, *dummy_img, *encode_session_ctxs.back()); + if(!encode_session) { + return encode_e::error; + } + + encode_sessions.emplace_back(std::move(*encode_session)); + + next_frame = std::chrono::steady_clock::now(); } auto delay = std::max(0ms, std::chrono::duration_cast(next_frame - std::chrono::steady_clock::now())); @@ -746,35 +843,144 @@ void capture( auto status = disp->snapshot(round_robin->get(), delay, display_cursor); switch(status) { case platf::capture_e::reinit: - return; case platf::capture_e::error: - return; + return status; case platf::capture_e::timeout: - next_frame += max_delay; - if(!img_tmp && frame_nr > (key_frame_nr + config.framerate)) { - continue; - } - break; case platf::capture_e::ok: - img_tmp = *round_robin++; + img_tmp = round_robin->get(); + ++round_robin; break; } - - if(img_tmp) { - img_p = hwdevice_ctx->convert(*img_tmp); - img_tmp.reset(); - } - if(encode(frame_nr++, session->ctx, session->frame, packets, channel_data)) { - BOOST_LOG(fatal) << "Could not encode video packet"sv; - log_flush(); - std::abort(); + auto now = std::chrono::steady_clock::now(); + + next_frame = now + 1s; + {auto pos = std::begin(encode_sessions);while( pos != std::end(encode_sessions)) { + auto ctx = pos->ctx; + if(ctx->shutdown_event->peek()) { + // Let waiting thread know it can delete shutdown_event + ctx->join_event->raise(true); + + //FIXME: Causes segfault even if (pos + 1) != std::end() + // *pos = std::move(*(pos + 1)); + + {encode_session_t t { std::move(*pos) };} + + //FIXME: encode_session_t = std::move(encode_session_t) <=> segfault + pos = encode_sessions.erase(pos); + encode_session_ctxs.erase(std::find_if(std::begin(encode_session_ctxs), std::end(encode_session_ctxs), [&ctx_p=ctx](auto &ctx) { + return ctx.get() == ctx_p; + })); + + if(encode_sessions.empty()) { + return encode_e::ok; + } + + continue; + } + + if(ctx->idr_events->peek()) { + pos->session.frame->pict_type = AV_PICTURE_TYPE_I; + + auto event = ctx->idr_events->pop(); + auto end = event->second; + + ctx->frame_nr = end; + ctx->key_frame_nr = end + ctx->config.framerate; + } + else if(ctx->frame_nr == ctx->key_frame_nr) { + pos->session.frame->pict_type = AV_PICTURE_TYPE_I; + } + + if(img_tmp) { + pos->img_tmp = img_tmp; + } + + auto timeout = now > pos->next_frame; + if(timeout) { + pos->next_frame += pos->delay; + } + + next_frame = std::min(next_frame, pos->next_frame); + + if(!timeout) { + ++pos; + continue; + } + + sws_t sws; + if(pos->img_tmp) { + auto img_p = pos->hwdevice->convert(*pos->img_tmp); + pos->img_tmp = nullptr; + + encoder.img_to_frame(sws, *img_p, pos->session.frame); + } + + if(encode(ctx->frame_nr++, pos->session.ctx, pos->session.frame, ctx->packets, ctx->channel_data)) { + BOOST_LOG(fatal) << "Could not encode video packet"sv; + log_flush(); + std::abort(); + } + + pos->session.frame->pict_type = AV_PICTURE_TYPE_NONE; + + ++pos; + }} + + img_tmp = nullptr; + } + + return encode_e::ok; +} + +void captureThreadSync() { + auto ref = capture_thread_sync.ref(); + + std::vector> encode_session_ctxs; + + auto &ctx = ref->encode_session_ctx_queue; + auto lg = util::fail_guard([&]() { + ctx.stop(); + + for(auto &ctx : encode_session_ctxs) { + ctx->shutdown_event->raise(true); + ctx->join_event->raise(true); } - session->frame->pict_type = AV_PICTURE_TYPE_NONE; - } + for(auto &ctx : ctx.unsafe()) { + ctx.shutdown_event->raise(true); + ctx.join_event->raise(true); + } + }); + + while(encode_run_sync(encode_session_ctxs, ctx) == encode_e::reinit); } + +int start_capture_sync(capture_synced_ctx_t &ctx) { + std::thread { &captureThreadSync }.detach(); + return 0; +} + +void end_capture_sync(capture_synced_ctx_t &ctx) {} + +void capture( + safe::signal_t *shutdown_event, + packet_queue_t packets, + idr_event_t idr_events, + config_t config, + void *channel_data) { + + safe::signal_t join_event; + auto ref = capture_thread_sync.ref(); + ref->encode_session_ctx_queue.raise(encode_session_ctx_t { + shutdown_event, &join_event, packets, idr_events, config, 1, 1, channel_data + }); + + // Wait for join signal + join_event.view(); +} + void capture_async( safe::signal_t *shutdown_event, packet_queue_t packets, @@ -840,7 +1046,7 @@ bool validate_config(std::shared_ptr &disp, const encoder_t &e return false; } - auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10; + auto pix_fmt = config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt); auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt); if(!hwdevice_ctx) { return false;