Skip to content

Commit

Permalink
fix: automatic selection for hybrid GPU and IDDSampleDriver users (Li…
Browse files Browse the repository at this point in the history
…zardByte#3002)

* Fix frame capture and output duplication for dual GPU setups and virtual displays

- Added `test_frame_capture` function to verify if frames are successfully captured and not empty.
- Fixes issues with virtual displays such as IDDSampleDriver when using more than one GPU.

Co-authored-by: ReenigneArcher <[email protected]>
Co-authored-by: Cameron Gutman <[email protected]>
  • Loading branch information
3 people authored and KuleRucket committed Oct 9, 2024
1 parent 1f1bbcf commit 1c3a14e
Show file tree
Hide file tree
Showing 2 changed files with 208 additions and 37 deletions.
70 changes: 44 additions & 26 deletions src/platform/windows/display_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <initguid.h>
#include <thread>

#include <boost/algorithm/string/join.hpp>
#include <boost/process.hpp>

// We have to include boost/process.hpp before display.h due to WinSock.h,
Expand Down Expand Up @@ -351,24 +352,8 @@ namespace platf::dxgi {
return true;
}

// On hybrid graphics systems, Windows will change the order of GPUs reported by
// DXGI in accordance with the user's GPU preference. If the selected GPU is a
// render-only device with no displays, DXGI will add virtual outputs to the
// that device to avoid confusing applications. While this works properly for most
// applications, it breaks the Desktop Duplication API because DXGI doesn't proxy
// the virtual DXGIOutput to the real GPU it is attached to. When trying to call
// DuplicateOutput() on one of these virtual outputs, it fails with DXGI_ERROR_UNSUPPORTED
// (even if you try sneaky stuff like passing the ID3D11Device for the iGPU and the
// virtual DXGIOutput from the dGPU). Because the GPU preference is once-per-process,
// we spawn a helper tool to probe for us before we set our own GPU preference.
bool
probe_for_gpu_preference(const std::string &display_name) {
// If we've already been through here, there's nothing to do this time.
static bool set_gpu_preference = false;
if (set_gpu_preference) {
return true;
}

validate_and_test_gpu_preference(const std::string &display_name, bool verify_frame_capture) {
std::string cmd = "tools\\ddprobe.exe";

// We start at 1 because 0 is automatic selection which can be overridden by
Expand All @@ -378,38 +363,71 @@ namespace platf::dxgi {
for (int i = 1; i < 5; i++) {
// Run the probe tool. It returns the status of DuplicateOutput().
//
// Arg format: [GPU preference] [Display name]
// Arg format: [GPU preference] [Display name] [--verify-frame-capture]
HRESULT result;
std::vector<std::string> args = { std::to_string(i), display_name };
try {
result = bp::system(cmd, std::to_string(i), display_name, bp::std_out > bp::null, bp::std_err > bp::null);
if (verify_frame_capture) {
args.emplace_back("--verify-frame-capture");
}
result = bp::system(cmd, bp::args(args), bp::std_out > bp::null, bp::std_err > bp::null);
}
catch (bp::process_error &e) {
BOOST_LOG(error) << "Failed to start ddprobe.exe: "sv << e.what();
return false;
}

BOOST_LOG(info) << "ddprobe.exe ["sv << i << "] ["sv << display_name << "] returned: 0x"sv << util::hex(result).to_string_view();
BOOST_LOG(info) << "ddprobe.exe " << boost::algorithm::join(args, " ") << " returned 0x"
<< util::hex(result).to_string_view();

// E_ACCESSDENIED can happen at the login screen. If we get this error,
// we know capture would have been supported, because DXGI_ERROR_UNSUPPORTED
// would have been raised first if it wasn't.
if (result == S_OK || result == E_ACCESSDENIED) {
// We found a working GPU preference, so set ourselves to use that.
if (set_gpu_preference_on_self(i)) {
set_gpu_preference = true;
return true;
}
else {
return false;
}
}
else {
// This configuration didn't work, so continue testing others
continue;
}
}

// If none of the manual options worked, leave the GPU preference alone
// If no valid configuration was found, return false
return false;
}

// On hybrid graphics systems, Windows will change the order of GPUs reported by
// DXGI in accordance with the user's GPU preference. If the selected GPU is a
// render-only device with no displays, DXGI will add virtual outputs to the
// that device to avoid confusing applications. While this works properly for most
// applications, it breaks the Desktop Duplication API because DXGI doesn't proxy
// the virtual DXGIOutput to the real GPU it is attached to. When trying to call
// DuplicateOutput() on one of these virtual outputs, it fails with DXGI_ERROR_UNSUPPORTED
// (even if you try sneaky stuff like passing the ID3D11Device for the iGPU and the
// virtual DXGIOutput from the dGPU). Because the GPU preference is once-per-process,
// we spawn a helper tool to probe for us before we set our own GPU preference.
bool
probe_for_gpu_preference(const std::string &display_name) {
static bool set_gpu_preference = false;

// If we've already been through here, there's nothing to do this time.
if (set_gpu_preference) {
return true;
}

// Try probing with different GPU preferences and verify_frame_capture flag
if (validate_and_test_gpu_preference(display_name, true)) {
return true;
}

// If no valid configuration was found, try again with verify_frame_capture == false
if (validate_and_test_gpu_preference(display_name, false)) {
return true;
}

// If neither worked, return false
return false;
}

Expand Down
175 changes: 164 additions & 11 deletions tools/ddprobe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
#include <iostream>
#include <locale>
#include <string>
#include <wrl.h>

#include "src/utility.h"

using Microsoft::WRL::ComPtr;
using namespace std::literals;
namespace dxgi {
template <class T>
Expand Down Expand Up @@ -69,8 +71,128 @@ syncThreadDesktop() {
CloseDesktop(hDesk);
}

/**
* @brief Determines if a given frame is valid by checking if it contains any non-dark pixels.
*
* This function analyzes the provided frame to determine if it contains any pixels that exceed a specified darkness threshold.
* It iterates over all pixels in the frame, comparing each pixel's RGB values to the defined darkness threshold.
* If any pixel's RGB values exceed this threshold, the function concludes that the frame is valid (i.e., not entirely dark) and returns `true`.
* If all pixels are below or equal to the threshold, indicating a completely dark frame, the function returns `false`.
* @param mappedResource A reference to a `D3D11_MAPPED_SUBRESOURCE` structure containing the mapped subresource data of the frame to be analyzed.
* @param frameDesc A reference to a `D3D11_TEXTURE2D_DESC` structure describing the texture properties, including width and height.
* @param darknessThreshold A floating-point value representing the threshold above which a pixel's RGB values are considered dark. The value ranges from 0.0f to 1.0f, with a default value of 0.1f.
* @return Returns `true` if the frame contains any non-dark pixels, indicating it is valid; otherwise, returns `false`.
*/
bool
is_valid_frame(const D3D11_MAPPED_SUBRESOURCE &mappedResource, const D3D11_TEXTURE2D_DESC &frameDesc, float darknessThreshold = 0.1f) {
const auto *pixels = static_cast<const uint8_t *>(mappedResource.pData);
const int bytesPerPixel = 4; // (8 bits per channel, excluding alpha). Factoring HDR is not needed because it doesn't cause black levels to raise enough to be a concern.
const int stride = mappedResource.RowPitch;
const int width = frameDesc.Width;
const int height = frameDesc.Height;

// Convert the darkness threshold to an integer value for comparison
const auto threshold = static_cast<int>(darknessThreshold * 255);

// Iterate over each pixel in the frame
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
const uint8_t *pixel = pixels + y * stride + x * bytesPerPixel;
// Check if any RGB channel exceeds the darkness threshold
if (pixel[0] > threshold || pixel[1] > threshold || pixel[2] > threshold) {
// Frame is not dark
return true;
}
}
}
// Frame is entirely dark
return false;
}

/**
* @brief Captures and verifies the contents of up to 10 consecutive frames from a DXGI output duplication.
*
* This function attempts to acquire and analyze up to 10 frames from a DXGI output duplication object (`dup`).
* It checks if each frame is non-empty (not entirely dark) by using the `is_valid_frame` function.
* If any non-empty frame is found, the function returns `S_OK`.
* If all 10 frames are empty, it returns `E_FAIL`, suggesting potential issues with the capture process.
* If any error occurs during the frame acquisition or analysis process, the corresponding `HRESULT` error code is returned.
*
* @param dup A reference to the DXGI output duplication object (`dxgi::dup_t&`) used to acquire frames.
* @param device A ComPtr to the ID3D11Device interface representing the device associated with the Direct3D context.
* @return Returns `S_OK` if a non-empty frame is captured successfully, `E_FAIL` if all frames are empty, or an error code if any failure occurs during the process.
*/
HRESULT
test_frame_capture(dxgi::dup_t &dup, ComPtr<ID3D11Device> device) {
for (int i = 0; i < 10; ++i) {
std::cout << "Attempting to acquire frame " << (i + 1) << " of 10..." << std::endl;
ComPtr<IDXGIResource> frameResource;
DXGI_OUTDUPL_FRAME_INFO frameInfo;
ComPtr<ID3D11DeviceContext> context;
ComPtr<ID3D11Texture2D> stagingTexture;

HRESULT status = dup->AcquireNextFrame(500, &frameInfo, &frameResource);
device->GetImmediateContext(&context);

if (FAILED(status)) {
std::cout << "Error: Failed to acquire next frame [0x"sv << util::hex(status).to_string_view() << ']' << std::endl;
return status;
}

auto cleanup = util::fail_guard([&dup]() {
dup->ReleaseFrame();
});

std::cout << "Frame acquired successfully." << std::endl;

ComPtr<ID3D11Texture2D> frameTexture;
status = frameResource->QueryInterface(IID_PPV_ARGS(&frameTexture));
if (FAILED(status)) {
std::cout << "Error: Failed to query texture interface from frame resource [0x"sv << util::hex(status).to_string_view() << ']' << std::endl;
return status;
}

D3D11_TEXTURE2D_DESC frameDesc;
frameTexture->GetDesc(&frameDesc);
frameDesc.Usage = D3D11_USAGE_STAGING;
frameDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
frameDesc.BindFlags = 0;
frameDesc.MiscFlags = 0;

status = device->CreateTexture2D(&frameDesc, nullptr, &stagingTexture);
if (FAILED(status)) {
std::cout << "Error: Failed to create staging texture [0x"sv << util::hex(status).to_string_view() << ']' << std::endl;
return status;
}

context->CopyResource(stagingTexture.Get(), frameTexture.Get());

D3D11_MAPPED_SUBRESOURCE mappedResource;
status = context->Map(stagingTexture.Get(), 0, D3D11_MAP_READ, 0, &mappedResource);
if (FAILED(status)) {
std::cout << "Error: Failed to map the staging texture for inspection [0x"sv << util::hex(status).to_string_view() << ']' << std::endl;
return status;
}

auto contextCleanup = util::fail_guard([&context, &stagingTexture]() {
context->Unmap(stagingTexture.Get(), 0);
});

if (is_valid_frame(mappedResource, frameDesc)) {
std::cout << "Frame " << (i + 1) << " is non-empty (contains visible content)." << std::endl;
return S_OK;
}

std::cout << "Frame " << (i + 1) << " is empty (no visible content)." << std::endl;
}

// All frames were empty, indicating potential capture issues.
return E_FAIL;
}

HRESULT
test_dxgi_duplication(dxgi::adapter_t &adapter, dxgi::output_t &output) {
test_dxgi_duplication(dxgi::adapter_t &adapter, dxgi::output_t &output, bool verify_frame_capture) {
D3D_FEATURE_LEVEL featureLevels[] {
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0,
Expand Down Expand Up @@ -107,29 +229,60 @@ test_dxgi_duplication(dxgi::adapter_t &adapter, dxgi::output_t &output) {
// Ensure we can duplicate the current display
syncThreadDesktop();

// Return the result of DuplicateOutput() to Sunshine
// Attempt to duplicate the output
dxgi::dup_t dup;
return output1->DuplicateOutput((IUnknown *) device.get(), &dup);
ComPtr<ID3D11Device> device_ptr(device.get());
HRESULT result = output1->DuplicateOutput(device_ptr.Get(), &dup);

if (FAILED(result)) {
std::cout << "Failed to duplicate output [0x"sv << util::hex(result).to_string_view() << "]" << std::endl;
return result;
}

// To prevent false negatives, we'll make it optional to test for frame capture.
if (verify_frame_capture) {
HRESULT captureResult = test_frame_capture(dup, device_ptr.Get());
if (FAILED(captureResult)) {
std::cout << "Frame capture test failed [0x"sv << util::hex(captureResult).to_string_view() << "]" << std::endl;
return captureResult;
}
}

return S_OK;
}

int
main(int argc, char *argv[]) {
HRESULT status;

// Display name may be omitted
if (argc != 2 && argc != 3) {
std::cout << "ddprobe.exe [GPU preference value] [display name]"sv << std::endl;
// Usage message
if (argc < 2 || argc > 4) {
std::cout << "Usage: ddprobe.exe [GPU preference value] [display name] [--verify-frame-capture]"sv << std::endl;
return -1;
}

std::wstring display_name;
if (argc == 3) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
display_name = converter.from_bytes(argv[2]);
bool verify_frame_capture = false;

// Parse GPU preference value (required)
int gpu_preference = atoi(argv[1]);

// Parse optional arguments
for (int i = 2; i < argc; ++i) {
std::string arg = argv[i];

if (arg == "--verify-frame-capture") {
verify_frame_capture = true;
}
else {
// Assume any other argument is the display name
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
display_name = converter.from_bytes(arg);
}
}

// We must set the GPU preference before making any DXGI/D3D calls
status = set_gpu_preference(atoi(argv[1]));
status = set_gpu_preference(gpu_preference);
if (status != ERROR_SUCCESS) {
return status;
}
Expand Down Expand Up @@ -173,7 +326,7 @@ main(int argc, char *argv[]) {
}

// We found the matching output. Test it and return the result.
return test_dxgi_duplication(adapter, output);
return test_dxgi_duplication(adapter, output, verify_frame_capture);
}
}

Expand Down

0 comments on commit 1c3a14e

Please sign in to comment.