Skip to content

Commit 997095c

Browse files
committed
Resolve merge conflicts
2 parents ef11145 + 8b9cd51 commit 997095c

File tree

10 files changed

+144
-47
lines changed

10 files changed

+144
-47
lines changed

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ Install the following:
2020

2121
#### Common
2222
```
23-
sudo apt install cmake gcc-10 g++-10 libssl-dev libavdevice-dev libboost-thread-dev libboost-filesystem-dev libboost-log-dev libpulse-dev libopus-dev
23+
sudo apt install cmake gcc-10 g++-10 libssl-dev libavdevice-dev libboost-thread-dev libboost-filesystem-dev libboost-log-dev libpulse-dev libopus-dev libevdev-dev
2424
```
2525
#### X11
2626
```
27-
sudo apt install libxtst-dev libx11-dev libxrandr-dev libxfixes-dev libevdev-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev
27+
sudo apt install libxtst-dev libx11-dev libxrandr-dev libxfixes-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev
2828
```
2929

3030
#### KMS

gen-deb.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ Architecture: amd64
3838
Maintainer: @loki
3939
Priority: optional
4040
Version: 0.11.0
41-
Depends: libssl1.1, libavdevice58, libboost-thread1.67.0 | libboost-thread1.71.0, libboost-filesystem1.67.0 | libboost-filesystem1.71.0, libboost-log1.67.0 | libboost-log1.71.0, libpulse0, libopus0, libxcb-shm0, libxcb-xfixes0, libxtst6, libevdev2, libdrm2, libcap2
41+
Depends: libssl1.1, libavdevice58, libboost-thread1.67.0 | libboost-thread1.71.0 | libboost-thread1.74.0, libboost-filesystem1.67.0 | libboost-filesystem1.71.0 | libboost-filesystem1.74.0, libboost-log1.67.0 | libboost-log1.71.0 | libboost-log1.74.0, libpulse0, libopus0, libxcb-shm0, libxcb-xfixes0, libxtst6, libevdev2, libdrm2, libcap2
4242
Description: Gamestream host for Moonlight
4343
EOF
4444

sunshine/config.cpp

+29-6
Original file line numberDiff line numberDiff line change
@@ -99,16 +99,22 @@ enum quality_e : int {
9999
_default = 0,
100100
speed,
101101
balanced,
102-
//quality2,
103102
};
104103

105-
enum rc_e : int {
104+
enum class rc_hevc_e : int {
106105
constqp, /**< Constant QP mode */
107106
vbr_latency, /**< Latency Constrained Variable Bitrate */
108107
vbr_peak, /**< Peak Contrained Variable Bitrate */
109108
cbr, /**< Constant bitrate mode */
110109
};
111110

111+
enum class rc_h264_e : int {
112+
constqp, /**< Constant QP mode */
113+
cbr, /**< Constant bitrate mode */
114+
vbr_peak, /**< Peak Contrained Variable Bitrate */
115+
vbr_latency, /**< Latency Constrained Variable Bitrate */
116+
};
117+
112118
enum coder_e : int {
113119
_auto = 0,
114120
cabac,
@@ -120,15 +126,25 @@ std::optional<quality_e> quality_from_view(const std::string_view &quality) {
120126
if(quality == #x##sv) return x
121127
_CONVERT_(speed);
122128
_CONVERT_(balanced);
123-
//_CONVERT_(quality2);
124129
if(quality == "default"sv) return _default;
125130
#undef _CONVERT_
126131
return std::nullopt;
127132
}
128133

129-
std::optional<rc_e> rc_from_view(const std::string_view &rc) {
134+
std::optional<int> rc_h264_from_view(const std::string_view &rc) {
130135
#define _CONVERT_(x) \
131-
if(rc == #x##sv) return x
136+
if(rc == #x##sv) return (int)rc_h264_e::x
137+
_CONVERT_(constqp);
138+
_CONVERT_(vbr_latency);
139+
_CONVERT_(vbr_peak);
140+
_CONVERT_(cbr);
141+
#undef _CONVERT_
142+
return std::nullopt;
143+
}
144+
145+
std::optional<int> rc_hevc_from_view(const std::string_view &rc) {
146+
#define _CONVERT_(x) \
147+
if(rc == #x##sv) return (int)rc_hevc_e::x
132148
_CONVERT_(constqp);
133149
_CONVERT_(vbr_latency);
134150
_CONVERT_(vbr_peak);
@@ -165,6 +181,7 @@ video_t video {
165181
{
166182
amd::balanced,
167183
std::nullopt,
184+
std::nullopt,
168185
-1 }, // amd
169186

170187
{}, // encoder
@@ -659,8 +676,14 @@ void apply_config(std::unordered_map<std::string, std::string> &&vars) {
659676
int_f(vars, "nv_coder", video.nv.coder, nv::coder_from_view);
660677

661678
int_f(vars, "amd_quality", video.amd.quality, amd::quality_from_view);
662-
int_f(vars, "amd_rc", video.amd.rc, amd::rc_from_view);
679+
680+
std::string rc;
681+
string_f(vars, "amd_rc", rc);
663682
int_f(vars, "amd_coder", video.amd.coder, amd::coder_from_view);
683+
if(!rc.empty()) {
684+
video.amd.rc_h264 = amd::rc_h264_from_view(rc);
685+
video.amd.rc_hevc = amd::rc_hevc_from_view(rc);
686+
}
664687

665688
string_f(vars, "encoder", video.encoder);
666689
string_f(vars, "adapter_name", video.adapter_name);

sunshine/config.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ struct video_t {
2929

3030
struct {
3131
std::optional<int> quality;
32-
std::optional<int> rc;
32+
std::optional<int> rc_h264;
33+
std::optional<int> rc_hevc;
3334
int coder;
3435
} amd;
3536

sunshine/platform/linux/cuda.cpp

+56-14
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ inline static int check(CUresult result, const std::string_view &sv) {
5656
return 0;
5757
}
5858

59+
void freeStream(CUstream stream) {
60+
CU_CHECK_IGNORE(cdf->cuStreamDestroy(stream), "Couldn't destroy cuda stream");
61+
}
62+
5963
class img_t : public platf::img_t {
6064
public:
6165
tex_t tex;
@@ -94,7 +98,8 @@ class cuda_t : public platf::hwdevice_t {
9498
this->hwframe.reset(frame);
9599
this->frame = frame;
96100

97-
if(((AVHWFramesContext *)frame->hw_frames_ctx->data)->sw_format != AV_PIX_FMT_NV12) {
101+
auto hwframe_ctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
102+
if(hwframe_ctx->sw_format != AV_PIX_FMT_NV12) {
98103
BOOST_LOG(error) << "cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12"sv;
99104
return -1;
100105
}
@@ -105,6 +110,15 @@ class cuda_t : public platf::hwdevice_t {
105110
return -1;
106111
}
107112

113+
auto cuda_ctx = (AVCUDADeviceContext *)hwframe_ctx->device_ctx->hwctx;
114+
115+
stream = make_stream();
116+
if(!stream) {
117+
return -1;
118+
}
119+
120+
cuda_ctx->stream = stream.get();
121+
108122
auto sws_opt = sws_t::make(width, height, frame->width, frame->height, width * 4);
109123
if(!sws_opt) {
110124
return -1;
@@ -142,13 +156,14 @@ class cuda_t : public platf::hwdevice_t {
142156
return;
143157
}
144158

145-
sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, { frame->width, frame->height, 0, 0 });
159+
sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, stream.get(), { frame->width, frame->height, 0, 0 });
146160
}
147161

148162
cudaTextureObject_t tex_obj(const tex_t &tex) const {
149163
return linear_interpolation ? tex.texture.linear : tex.texture.point;
150164
}
151165

166+
stream_t stream;
152167
frame_t hwframe;
153168

154169
int width, height;
@@ -162,7 +177,7 @@ class cuda_t : public platf::hwdevice_t {
162177
class cuda_ram_t : public cuda_t {
163178
public:
164179
int convert(platf::img_t &img) override {
165-
return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex));
180+
return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex), stream.get());
166181
}
167182

168183
int set_frame(AVFrame *frame) {
@@ -186,7 +201,7 @@ class cuda_ram_t : public cuda_t {
186201
class cuda_vram_t : public cuda_t {
187202
public:
188203
int convert(platf::img_t &img) override {
189-
return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *)&img)->tex));
204+
return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *)&img)->tex), stream.get());
190205
}
191206
};
192207

@@ -256,6 +271,28 @@ int init() {
256271
return 0;
257272
}
258273

274+
class ctx_t {
275+
public:
276+
ctx_t(NVFBC_SESSION_HANDLE handle) {
277+
NVFBC_BIND_CONTEXT_PARAMS params { NVFBC_BIND_CONTEXT_PARAMS_VER };
278+
279+
if(func.nvFBCBindContext(handle, &params)) {
280+
BOOST_LOG(error) << "Couldn't bind NvFBC context to current thread: " << func.nvFBCGetLastErrorStr(handle);
281+
}
282+
283+
this->handle = handle;
284+
}
285+
286+
~ctx_t() {
287+
NVFBC_RELEASE_CONTEXT_PARAMS params { NVFBC_RELEASE_CONTEXT_PARAMS_VER };
288+
if(func.nvFBCReleaseContext(handle, &params)) {
289+
BOOST_LOG(error) << "Couldn't release NvFBC context from current thread: " << func.nvFBCGetLastErrorStr(handle);
290+
}
291+
}
292+
293+
NVFBC_SESSION_HANDLE handle;
294+
};
295+
259296
class handle_t {
260297
enum flag_e {
261298
SESSION_HANDLE,
@@ -347,24 +384,26 @@ class handle_t {
347384
return 0;
348385
}
349386

350-
~handle_t() {
387+
int reset() {
351388
if(!handle_flags[SESSION_HANDLE]) {
352-
return;
389+
return 0;
353390
}
354391

355-
if(handle_flags[SESSION_CAPTURE]) {
356-
NVFBC_DESTROY_CAPTURE_SESSION_PARAMS params { NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER };
357-
358-
if(func.nvFBCDestroyCaptureSession(handle, &params)) {
359-
BOOST_LOG(error) << "Couldn't destroy capture session: "sv << func.nvFBCGetLastErrorStr(handle);
360-
}
361-
}
392+
stop();
362393

363394
NVFBC_DESTROY_HANDLE_PARAMS params { NVFBC_DESTROY_HANDLE_PARAMS_VER };
364395

365396
if(func.nvFBCDestroyHandle(handle, &params)) {
366397
BOOST_LOG(error) << "Couldn't destroy session handle: "sv << func.nvFBCGetLastErrorStr(handle);
367398
}
399+
400+
handle_flags[SESSION_HANDLE] = false;
401+
402+
return 0;
403+
}
404+
405+
~handle_t() {
406+
reset();
368407
}
369408

370409
std::bitset<MAX_FLAGS> handle_flags;
@@ -380,6 +419,8 @@ class display_t : public platf::display_t {
380419
return -1;
381420
}
382421

422+
ctx_t ctx { handle->handle };
423+
383424
auto status_params = handle->status();
384425
if(!status_params) {
385426
return -1;
@@ -442,8 +483,9 @@ class display_t : public platf::display_t {
442483
// Force display_t::capture to initialize handle_t::capture
443484
cursor_visible = !*cursor;
444485

486+
ctx_t ctx { handle.handle };
445487
auto fg = util::fail_guard([&]() {
446-
handle.stop();
488+
handle.reset();
447489
});
448490

449491
while(img) {

sunshine/platform/linux/cuda.cu

+23-6
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,23 @@ void freeCudaPtr_t::operator()(void *ptr) {
110110
CU_CHECK_IGNORE(cudaFree(ptr), "Couldn't free cuda device pointer");
111111
}
112112

113+
void freeCudaStream_t::operator()(cudaStream_t ptr) {
114+
CU_CHECK_IGNORE(cudaStreamDestroy(ptr), "Couldn't free cuda stream");
115+
}
116+
117+
stream_t make_stream(int flags) {
118+
cudaStream_t stream;
119+
120+
if(!flags) {
121+
CU_CHECK_PTR(cudaStreamCreate(&stream), "Couldn't create cuda stream");
122+
}
123+
else {
124+
CU_CHECK_PTR(cudaStreamCreateWithFlags(&stream, flags), "Couldn't create cuda stream with flags");
125+
}
126+
127+
return stream_t { stream };
128+
}
129+
113130
inline __device__ float3 bgra_to_rgb(uchar4 vec) {
114131
return make_float3((float)vec.z, (float)vec.y, (float)vec.x);
115132
}
@@ -203,8 +220,8 @@ std::optional<tex_t> tex_t::make(int height, int pitch) {
203220

204221
tex_t::tex_t() : array {}, texture { INVALID_TEXTURE } {}
205222
tex_t::tex_t(tex_t &&other) : array { other.array }, texture { other.texture } {
206-
other.array = 0;
207-
other.texture.point = INVALID_TEXTURE;
223+
other.array = 0;
224+
other.texture.point = INVALID_TEXTURE;
208225
other.texture.linear = INVALID_TEXTURE;
209226
}
210227

@@ -269,18 +286,18 @@ std::optional<sws_t> sws_t::make(int in_width, int in_height, int out_width, int
269286
return std::make_optional<sws_t>(in_width, in_height, out_width, out_height, pitch, props.maxThreadsPerMultiProcessor / props.maxBlocksPerMultiProcessor, std::move(ptr));
270287
}
271288

272-
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture) {
273-
return convert(Y, UV, pitchY, pitchUV, texture, viewport);
289+
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream) {
290+
return convert(Y, UV, pitchY, pitchUV, texture, stream, viewport);
274291
}
275292

276-
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, const viewport_t &viewport) {
293+
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport) {
277294
int threadsX = viewport.width / 2;
278295
int threadsY = viewport.height;
279296

280297
dim3 block(threadsPerBlock);
281298
dim3 grid(div_align(threadsX, threadsPerBlock), threadsY);
282299

283-
RGBA_to_NV12<<<grid, block>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get());
300+
RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get());
284301

285302
return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed");
286303
}

sunshine/platform/linux/cuda.h

+19-7
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
#if !defined(SUNSHINE_PLATFORM_CUDA_H) && defined(SUNSHINE_BUILD_CUDA)
22
#define SUNSHINE_PLATFORM_CUDA_H
33

4-
#include <vector>
54
#include <memory>
65
#include <optional>
6+
#include <string>
7+
#include <vector>
78

89
namespace platf {
9-
class hwdevice_t;
10-
class img_t;
11-
}
10+
class hwdevice_t;
11+
class img_t;
12+
} // namespace platf
1213

1314
namespace cuda {
15+
1416
namespace nvfbc {
1517
std::vector<std::string> display_names();
1618
}
@@ -21,8 +23,10 @@ int init();
2123
typedef struct cudaArray *cudaArray_t;
2224

2325
#if !defined(__CUDACC__)
26+
typedef struct CUstream_st *cudaStream_t;
2427
typedef unsigned long long cudaTextureObject_t;
2528
#else /* defined(__CUDACC__) */
29+
typedef __location__(device_builtin) struct CUstream_st *cudaStream_t;
2630
typedef __location__(device_builtin) unsigned long long cudaTextureObject_t;
2731
#endif /* !defined(__CUDACC__) */
2832

@@ -33,7 +37,15 @@ class freeCudaPtr_t {
3337
void operator()(void *ptr);
3438
};
3539

36-
using ptr_t = std::unique_ptr<void, freeCudaPtr_t>;
40+
class freeCudaStream_t {
41+
public:
42+
void operator()(cudaStream_t ptr);
43+
};
44+
45+
using ptr_t = std::unique_ptr<void, freeCudaPtr_t>;
46+
using stream_t = std::unique_ptr<CUstream_st, freeCudaStream_t>;
47+
48+
stream_t make_stream(int flags = 0);
3749

3850
struct viewport_t {
3951
int width, height;
@@ -75,8 +87,8 @@ class sws_t {
7587
static std::optional<sws_t> make(int in_width, int in_height, int out_width, int out_height, int pitch);
7688

7789
// Converts loaded image into a CUDevicePtr
78-
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture);
79-
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, const viewport_t &viewport);
90+
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream);
91+
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport);
8092

8193
void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
8294

0 commit comments

Comments
 (0)