mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2024-11-25 12:56:27 -05:00
Merge pull request #13149 from liamwhite/per-channel-program
video_core: make gpu context aware of rendering program
This commit is contained in:
commit
ce62fa6f7b
12 changed files with 39 additions and 52 deletions
|
@ -435,8 +435,6 @@ struct Values {
|
||||||
linkage, false, "disable_shader_loop_safety_checks", Category::RendererDebug};
|
linkage, false, "disable_shader_loop_safety_checks", Category::RendererDebug};
|
||||||
Setting<bool> enable_renderdoc_hotkey{linkage, false, "renderdoc_hotkey",
|
Setting<bool> enable_renderdoc_hotkey{linkage, false, "renderdoc_hotkey",
|
||||||
Category::RendererDebug};
|
Category::RendererDebug};
|
||||||
// TODO: remove this once AMDVLK supports VK_EXT_depth_bias_control
|
|
||||||
bool renderer_amdvlk_depth_bias_workaround{};
|
|
||||||
Setting<bool> disable_buffer_reorder{linkage, false, "disable_buffer_reorder",
|
Setting<bool> disable_buffer_reorder{linkage, false, "disable_buffer_reorder",
|
||||||
Category::RendererDebug};
|
Category::RendererDebug};
|
||||||
|
|
||||||
|
|
|
@ -425,11 +425,6 @@ struct System::Impl {
|
||||||
room_member->SendGameInfo(game_info);
|
room_member->SendGameInfo(game_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Workarounds:
|
|
||||||
// Activate this in Super Smash Brothers Ultimate, it only affects AMD cards using AMDVLK
|
|
||||||
Settings::values.renderer_amdvlk_depth_bias_workaround =
|
|
||||||
params.program_id == 0x1006A800016E000ULL;
|
|
||||||
|
|
||||||
status = SystemResultStatus::Success;
|
status = SystemResultStatus::Success;
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
@ -489,9 +484,6 @@ struct System::Impl {
|
||||||
room_member->SendGameInfo(game_info);
|
room_member->SendGameInfo(game_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Workarounds
|
|
||||||
Settings::values.renderer_amdvlk_depth_bias_workaround = false;
|
|
||||||
|
|
||||||
LOG_DEBUG(Core, "Shutdown OK");
|
LOG_DEBUG(Core, "Shutdown OK");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
|
#include "core/hle/kernel/k_process.h"
|
||||||
#include "core/hle/service/nvdrv/core/container.h"
|
#include "core/hle/service/nvdrv/core/container.h"
|
||||||
#include "core/hle/service/nvdrv/core/nvmap.h"
|
#include "core/hle/service/nvdrv/core/nvmap.h"
|
||||||
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
|
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
|
||||||
|
@ -75,7 +76,7 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
|
||||||
case 0xd:
|
case 0xd:
|
||||||
return WrapFixed(this, &nvhost_gpu::SetChannelPriority, input, output);
|
return WrapFixed(this, &nvhost_gpu::SetChannelPriority, input, output);
|
||||||
case 0x1a:
|
case 0x1a:
|
||||||
return WrapFixed(this, &nvhost_gpu::AllocGPFIFOEx2, input, output);
|
return WrapFixed(this, &nvhost_gpu::AllocGPFIFOEx2, input, output, fd);
|
||||||
case 0x1b:
|
case 0x1b:
|
||||||
return WrapFixedVariable(this, &nvhost_gpu::SubmitGPFIFOBase1, input, output, true);
|
return WrapFixedVariable(this, &nvhost_gpu::SubmitGPFIFOBase1, input, output, true);
|
||||||
case 0x1d:
|
case 0x1d:
|
||||||
|
@ -120,8 +121,13 @@ NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
|
||||||
return NvResult::NotImplemented;
|
return NvResult::NotImplemented;
|
||||||
}
|
}
|
||||||
|
|
||||||
void nvhost_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
|
void nvhost_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
|
||||||
void nvhost_gpu::OnClose(DeviceFD fd) {}
|
sessions[fd] = session_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvhost_gpu::OnClose(DeviceFD fd) {
|
||||||
|
sessions.erase(fd);
|
||||||
|
}
|
||||||
|
|
||||||
NvResult nvhost_gpu::SetNVMAPfd(IoctlSetNvmapFD& params) {
|
NvResult nvhost_gpu::SetNVMAPfd(IoctlSetNvmapFD& params) {
|
||||||
LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
|
LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
|
||||||
|
@ -161,7 +167,7 @@ NvResult nvhost_gpu::SetChannelPriority(IoctlChannelSetPriority& params) {
|
||||||
return NvResult::Success;
|
return NvResult::Success;
|
||||||
}
|
}
|
||||||
|
|
||||||
NvResult nvhost_gpu::AllocGPFIFOEx2(IoctlAllocGpfifoEx2& params) {
|
NvResult nvhost_gpu::AllocGPFIFOEx2(IoctlAllocGpfifoEx2& params, DeviceFD fd) {
|
||||||
LOG_WARNING(Service_NVDRV,
|
LOG_WARNING(Service_NVDRV,
|
||||||
"(STUBBED) called, num_entries={:X}, flags={:X}, unk0={:X}, "
|
"(STUBBED) called, num_entries={:X}, flags={:X}, unk0={:X}, "
|
||||||
"unk1={:X}, unk2={:X}, unk3={:X}",
|
"unk1={:X}, unk2={:X}, unk3={:X}",
|
||||||
|
@ -173,7 +179,12 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(IoctlAllocGpfifoEx2& params) {
|
||||||
return NvResult::AlreadyAllocated;
|
return NvResult::AlreadyAllocated;
|
||||||
}
|
}
|
||||||
|
|
||||||
system.GPU().InitChannel(*channel_state);
|
u64 program_id{};
|
||||||
|
if (auto* const session = core.GetSession(sessions[fd]); session != nullptr) {
|
||||||
|
program_id = session->process->GetProgramId();
|
||||||
|
}
|
||||||
|
|
||||||
|
system.GPU().InitChannel(*channel_state, program_id);
|
||||||
|
|
||||||
params.fence_out = syncpoint_manager.GetSyncpointFence(channel_syncpoint);
|
params.fence_out = syncpoint_manager.GetSyncpointFence(channel_syncpoint);
|
||||||
|
|
||||||
|
|
|
@ -192,7 +192,7 @@ private:
|
||||||
NvResult ZCullBind(IoctlZCullBind& params);
|
NvResult ZCullBind(IoctlZCullBind& params);
|
||||||
NvResult SetErrorNotifier(IoctlSetErrorNotifier& params);
|
NvResult SetErrorNotifier(IoctlSetErrorNotifier& params);
|
||||||
NvResult SetChannelPriority(IoctlChannelSetPriority& params);
|
NvResult SetChannelPriority(IoctlChannelSetPriority& params);
|
||||||
NvResult AllocGPFIFOEx2(IoctlAllocGpfifoEx2& params);
|
NvResult AllocGPFIFOEx2(IoctlAllocGpfifoEx2& params, DeviceFD fd);
|
||||||
NvResult AllocateObjectContext(IoctlAllocObjCtx& params);
|
NvResult AllocateObjectContext(IoctlAllocObjCtx& params);
|
||||||
|
|
||||||
NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandList&& entries);
|
NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandList&& entries);
|
||||||
|
@ -210,6 +210,7 @@ private:
|
||||||
NvCore::SyncpointManager& syncpoint_manager;
|
NvCore::SyncpointManager& syncpoint_manager;
|
||||||
NvCore::NvMap& nvmap;
|
NvCore::NvMap& nvmap;
|
||||||
std::shared_ptr<Tegra::Control::ChannelState> channel_state;
|
std::shared_ptr<Tegra::Control::ChannelState> channel_state;
|
||||||
|
std::unordered_map<DeviceFD, NvCore::SessionId> sessions;
|
||||||
u32 channel_syncpoint;
|
u32 channel_syncpoint;
|
||||||
std::mutex channel_mutex;
|
std::mutex channel_mutex;
|
||||||
|
|
||||||
|
|
|
@ -16,8 +16,9 @@ namespace Tegra::Control {
|
||||||
|
|
||||||
ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {}
|
ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {}
|
||||||
|
|
||||||
void ChannelState::Init(Core::System& system, GPU& gpu) {
|
void ChannelState::Init(Core::System& system, GPU& gpu, u64 program_id_) {
|
||||||
ASSERT(memory_manager);
|
ASSERT(memory_manager);
|
||||||
|
program_id = program_id_;
|
||||||
dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
|
dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
|
||||||
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
|
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
|
||||||
fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
|
fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
|
||||||
|
|
|
@ -40,11 +40,12 @@ struct ChannelState {
|
||||||
ChannelState(ChannelState&& other) noexcept = default;
|
ChannelState(ChannelState&& other) noexcept = default;
|
||||||
ChannelState& operator=(ChannelState&& other) noexcept = default;
|
ChannelState& operator=(ChannelState&& other) noexcept = default;
|
||||||
|
|
||||||
void Init(Core::System& system, GPU& gpu);
|
void Init(Core::System& system, GPU& gpu, u64 program_id);
|
||||||
|
|
||||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||||
|
|
||||||
s32 bind_id = -1;
|
s32 bind_id = -1;
|
||||||
|
u64 program_id = 0;
|
||||||
/// 3D engine
|
/// 3D engine
|
||||||
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
|
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
|
||||||
/// 2D engine
|
/// 2D engine
|
||||||
|
|
|
@ -7,7 +7,7 @@ namespace VideoCommon {
|
||||||
|
|
||||||
ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
|
ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
|
||||||
: maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
|
: maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
|
||||||
gpu_memory{*channel_state.memory_manager} {}
|
gpu_memory{*channel_state.memory_manager}, program_id{channel_state.program_id} {}
|
||||||
|
|
||||||
template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
|
template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
|
||||||
|
|
||||||
|
|
|
@ -39,6 +39,7 @@ public:
|
||||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||||
Tegra::MemoryManager& gpu_memory;
|
Tegra::MemoryManager& gpu_memory;
|
||||||
|
u64 program_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -77,9 +78,10 @@ protected:
|
||||||
P* channel_state;
|
P* channel_state;
|
||||||
size_t current_channel_id{UNSET_CHANNEL};
|
size_t current_channel_id{UNSET_CHANNEL};
|
||||||
size_t current_address_space{};
|
size_t current_address_space{};
|
||||||
Tegra::Engines::Maxwell3D* maxwell3d;
|
Tegra::Engines::Maxwell3D* maxwell3d{};
|
||||||
Tegra::Engines::KeplerCompute* kepler_compute;
|
Tegra::Engines::KeplerCompute* kepler_compute{};
|
||||||
Tegra::MemoryManager* gpu_memory;
|
Tegra::MemoryManager* gpu_memory{};
|
||||||
|
u64 program_id{};
|
||||||
|
|
||||||
std::deque<P> channel_storage;
|
std::deque<P> channel_storage;
|
||||||
std::deque<size_t> free_channel_ids;
|
std::deque<size_t> free_channel_ids;
|
||||||
|
|
|
@ -58,6 +58,7 @@ void ChannelSetupCaches<P>::BindToChannel(s32 id) {
|
||||||
maxwell3d = &channel_state->maxwell3d;
|
maxwell3d = &channel_state->maxwell3d;
|
||||||
kepler_compute = &channel_state->kepler_compute;
|
kepler_compute = &channel_state->kepler_compute;
|
||||||
gpu_memory = &channel_state->gpu_memory;
|
gpu_memory = &channel_state->gpu_memory;
|
||||||
|
program_id = channel_state->program_id;
|
||||||
current_address_space = gpu_memory->GetID();
|
current_address_space = gpu_memory->GetID();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,6 +77,7 @@ void ChannelSetupCaches<P>::EraseChannel(s32 id) {
|
||||||
maxwell3d = nullptr;
|
maxwell3d = nullptr;
|
||||||
kepler_compute = nullptr;
|
kepler_compute = nullptr;
|
||||||
gpu_memory = nullptr;
|
gpu_memory = nullptr;
|
||||||
|
program_id = 0;
|
||||||
} else if (current_channel_id != UNSET_CHANNEL) {
|
} else if (current_channel_id != UNSET_CHANNEL) {
|
||||||
channel_state = &channel_storage[current_channel_id];
|
channel_state = &channel_storage[current_channel_id];
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,8 +67,8 @@ struct GPU::Impl {
|
||||||
return CreateChannel(new_channel_id++);
|
return CreateChannel(new_channel_id++);
|
||||||
}
|
}
|
||||||
|
|
||||||
void InitChannel(Control::ChannelState& to_init) {
|
void InitChannel(Control::ChannelState& to_init, u64 program_id) {
|
||||||
to_init.Init(system, gpu);
|
to_init.Init(system, gpu, program_id);
|
||||||
to_init.BindRasterizer(rasterizer);
|
to_init.BindRasterizer(rasterizer);
|
||||||
rasterizer->InitializeChannel(to_init);
|
rasterizer->InitializeChannel(to_init);
|
||||||
}
|
}
|
||||||
|
@ -412,8 +412,8 @@ std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
|
||||||
return impl->AllocateChannel();
|
return impl->AllocateChannel();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU::InitChannel(Control::ChannelState& to_init) {
|
void GPU::InitChannel(Control::ChannelState& to_init, u64 program_id) {
|
||||||
impl->InitChannel(to_init);
|
impl->InitChannel(to_init, program_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU::BindChannel(s32 channel_id) {
|
void GPU::BindChannel(s32 channel_id) {
|
||||||
|
|
|
@ -149,7 +149,7 @@ public:
|
||||||
|
|
||||||
std::shared_ptr<Control::ChannelState> AllocateChannel();
|
std::shared_ptr<Control::ChannelState> AllocateChannel();
|
||||||
|
|
||||||
void InitChannel(Control::ChannelState& to_init);
|
void InitChannel(Control::ChannelState& to_init, u64 program_id);
|
||||||
|
|
||||||
void BindChannel(s32 channel_id);
|
void BindChannel(s32 channel_id);
|
||||||
|
|
||||||
|
|
|
@ -1054,37 +1054,16 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||||
regs.zeta.format == Tegra::DepthFormat::X8Z24_UNORM ||
|
regs.zeta.format == Tegra::DepthFormat::X8Z24_UNORM ||
|
||||||
regs.zeta.format == Tegra::DepthFormat::S8Z24_UNORM ||
|
regs.zeta.format == Tegra::DepthFormat::S8Z24_UNORM ||
|
||||||
regs.zeta.format == Tegra::DepthFormat::V8Z24_UNORM;
|
regs.zeta.format == Tegra::DepthFormat::V8Z24_UNORM;
|
||||||
bool force_unorm = ([&] {
|
if (is_d24 && !device.SupportsD24DepthBuffer() && program_id == 0x1006A800016E000ULL) {
|
||||||
if (!is_d24 || device.SupportsD24DepthBuffer()) {
|
// Only activate this in Super Smash Brothers Ultimate
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (device.IsExtDepthBiasControlSupported()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (!Settings::values.renderer_amdvlk_depth_bias_workaround) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// the base formulas can be obtained from here:
|
// the base formulas can be obtained from here:
|
||||||
// https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias
|
// https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias
|
||||||
const double rescale_factor =
|
const double rescale_factor =
|
||||||
static_cast<double>(1ULL << (32 - 24)) / (static_cast<double>(0x1.ep+127));
|
static_cast<double>(1ULL << (32 - 24)) / (static_cast<double>(0x1.ep+127));
|
||||||
units = static_cast<float>(static_cast<double>(units) * rescale_factor);
|
units = static_cast<float>(static_cast<double>(units) * rescale_factor);
|
||||||
return false;
|
|
||||||
})();
|
|
||||||
scheduler.Record([constant = units, clamp = regs.depth_bias_clamp,
|
|
||||||
factor = regs.slope_scale_depth_bias, force_unorm,
|
|
||||||
precise = device.HasExactDepthBiasControl()](vk::CommandBuffer cmdbuf) {
|
|
||||||
if (force_unorm) {
|
|
||||||
VkDepthBiasRepresentationInfoEXT info{
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT,
|
|
||||||
.pNext = nullptr,
|
|
||||||
.depthBiasRepresentation =
|
|
||||||
VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT,
|
|
||||||
.depthBiasExact = precise ? VK_TRUE : VK_FALSE,
|
|
||||||
};
|
|
||||||
cmdbuf.SetDepthBias(constant, clamp, factor, &info);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
scheduler.Record([constant = units, clamp = regs.depth_bias_clamp,
|
||||||
|
factor = regs.slope_scale_depth_bias](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.SetDepthBias(constant, clamp, factor);
|
cmdbuf.SetDepthBias(constant, clamp, factor);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue