mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2024-11-22 19:00:14 -05:00
Merge pull request #2237 from bunnei/cache-host-addr
gpu: Use host address for caching instead of guest address.
This commit is contained in:
commit
47b622825c
26 changed files with 393 additions and 293 deletions
|
@ -10,6 +10,7 @@
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
|
#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvmap.h"
|
#include "core/hle/service/nvdrv/devices/nvmap.h"
|
||||||
|
#include "core/memory.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
|
@ -178,7 +179,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
|
||||||
auto& gpu = system_instance.GPU();
|
auto& gpu = system_instance.GPU();
|
||||||
auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
|
auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
|
||||||
ASSERT(cpu_addr);
|
ASSERT(cpu_addr);
|
||||||
gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
|
gpu.FlushAndInvalidateRegion(ToCacheAddr(Memory::GetPointer(*cpu_addr)), itr->second.size);
|
||||||
|
|
||||||
params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
|
params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
|
||||||
|
|
||||||
|
|
|
@ -67,8 +67,11 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
|
||||||
LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
|
LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
|
||||||
(base + size) * PAGE_SIZE);
|
(base + size) * PAGE_SIZE);
|
||||||
|
|
||||||
RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
|
// During boot, current_page_table might not be set yet, in which case we need not flush
|
||||||
FlushMode::FlushAndInvalidate);
|
if (current_page_table) {
|
||||||
|
RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
|
||||||
|
FlushMode::FlushAndInvalidate);
|
||||||
|
}
|
||||||
|
|
||||||
VAddr end = base + size;
|
VAddr end = base + size;
|
||||||
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
|
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
|
||||||
|
@ -359,13 +362,13 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
|
||||||
auto& gpu = system_instance.GPU();
|
auto& gpu = system_instance.GPU();
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case FlushMode::Flush:
|
case FlushMode::Flush:
|
||||||
gpu.FlushRegion(overlap_start, overlap_size);
|
gpu.FlushRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
|
||||||
break;
|
break;
|
||||||
case FlushMode::Invalidate:
|
case FlushMode::Invalidate:
|
||||||
gpu.InvalidateRegion(overlap_start, overlap_size);
|
gpu.InvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
|
||||||
break;
|
break;
|
||||||
case FlushMode::FlushAndInvalidate:
|
case FlushMode::FlushAndInvalidate:
|
||||||
gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
|
gpu.FlushAndInvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "video_core/engines/kepler_memory.h"
|
#include "video_core/engines/kepler_memory.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
#include "video_core/renderer_base.h"
|
||||||
|
|
||||||
namespace Tegra::Engines {
|
namespace Tegra::Engines {
|
||||||
|
|
||||||
|
@ -48,7 +49,8 @@ void KeplerMemory::ProcessData(u32 data) {
|
||||||
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
|
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
|
||||||
// We do this before actually writing the new data because the destination address might contain
|
// We do this before actually writing the new data because the destination address might contain
|
||||||
// a dirty surface that will have to be written back to memory.
|
// a dirty surface that will have to be written back to memory.
|
||||||
Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
|
system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)),
|
||||||
|
sizeof(u32));
|
||||||
|
|
||||||
Memory::Write32(*dest_address, data);
|
Memory::Write32(*dest_address, data);
|
||||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||||
|
|
|
@ -396,7 +396,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
|
||||||
const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
|
const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
|
||||||
ASSERT_MSG(address, "Invalid GPU address");
|
ASSERT_MSG(address, "Invalid GPU address");
|
||||||
|
|
||||||
Memory::Write32(*address, value);
|
u8* ptr{Memory::GetPointer(*address)};
|
||||||
|
rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
|
||||||
|
std::memcpy(ptr, &value, sizeof(u32));
|
||||||
|
|
||||||
dirty_flags.OnMemoryWrite();
|
dirty_flags.OnMemoryWrite();
|
||||||
|
|
||||||
// Increment the current buffer position.
|
// Increment the current buffer position.
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/engines/maxwell_dma.h"
|
#include "video_core/engines/maxwell_dma.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
#include "video_core/renderer_base.h"
|
||||||
#include "video_core/textures/decoders.h"
|
#include "video_core/textures/decoders.h"
|
||||||
|
|
||||||
namespace Tegra::Engines {
|
namespace Tegra::Engines {
|
||||||
|
@ -92,12 +93,14 @@ void MaxwellDMA::HandleCopy() {
|
||||||
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
|
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
|
||||||
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
|
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
|
||||||
// copying.
|
// copying.
|
||||||
Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
|
Core::System::GetInstance().Renderer().Rasterizer().FlushRegion(
|
||||||
|
ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size);
|
||||||
|
|
||||||
// We have to invalidate the destination region to evict any outdated surfaces from the
|
// We have to invalidate the destination region to evict any outdated surfaces from the
|
||||||
// cache. We do this before actually writing the new data because the destination address
|
// cache. We do this before actually writing the new data because the destination address
|
||||||
// might contain a dirty surface that will have to be written back to memory.
|
// might contain a dirty surface that will have to be written back to memory.
|
||||||
Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
|
Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion(
|
||||||
|
ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size);
|
||||||
};
|
};
|
||||||
|
|
||||||
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
|
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
|
||||||
|
|
|
@ -11,6 +11,11 @@
|
||||||
#include "video_core/dma_pusher.h"
|
#include "video_core/dma_pusher.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
|
||||||
|
using CacheAddr = std::uintptr_t;
|
||||||
|
inline CacheAddr ToCacheAddr(const void* host_ptr) {
|
||||||
|
return reinterpret_cast<CacheAddr>(host_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
class System;
|
class System;
|
||||||
}
|
}
|
||||||
|
@ -209,13 +214,13 @@ public:
|
||||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
|
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||||
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||||
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void ProcessBindMethod(const MethodCall& method_call);
|
void ProcessBindMethod(const MethodCall& method_call);
|
||||||
|
|
|
@ -22,15 +22,15 @@ void GPUAsynch::SwapBuffers(
|
||||||
gpu_thread.SwapBuffers(std::move(framebuffer));
|
gpu_thread.SwapBuffers(std::move(framebuffer));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
|
void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
|
||||||
gpu_thread.FlushRegion(addr, size);
|
gpu_thread.FlushRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
|
void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
gpu_thread.InvalidateRegion(addr, size);
|
gpu_thread.InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,9 +26,9 @@ public:
|
||||||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||||
void SwapBuffers(
|
void SwapBuffers(
|
||||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
|
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
|
||||||
void FlushRegion(VAddr addr, u64 size) override;
|
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
GPUThread::ThreadManager gpu_thread;
|
GPUThread::ThreadManager gpu_thread;
|
||||||
|
|
|
@ -22,15 +22,15 @@ void GPUSynch::SwapBuffers(
|
||||||
renderer.SwapBuffers(std::move(framebuffer));
|
renderer.SwapBuffers(std::move(framebuffer));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
|
void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
|
||||||
renderer.Rasterizer().FlushRegion(addr, size);
|
renderer.Rasterizer().FlushRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
|
void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
renderer.Rasterizer().InvalidateRegion(addr, size);
|
renderer.Rasterizer().InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
|
renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,9 +21,9 @@ public:
|
||||||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||||
void SwapBuffers(
|
void SwapBuffers(
|
||||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
|
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
|
||||||
void FlushRegion(VAddr addr, u64 size) override;
|
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
|
@ -5,7 +5,6 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "core/frontend/scope_acquire_window_context.h"
|
#include "core/frontend/scope_acquire_window_context.h"
|
||||||
#include "core/settings.h"
|
|
||||||
#include "video_core/dma_pusher.h"
|
#include "video_core/dma_pusher.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
#include "video_core/gpu_thread.h"
|
#include "video_core/gpu_thread.h"
|
||||||
|
@ -13,38 +12,13 @@
|
||||||
|
|
||||||
namespace VideoCommon::GPUThread {
|
namespace VideoCommon::GPUThread {
|
||||||
|
|
||||||
/// Executes a single GPU thread command
|
|
||||||
static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
|
|
||||||
Tegra::DmaPusher& dma_pusher) {
|
|
||||||
if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
|
|
||||||
dma_pusher.Push(std::move(submit_list->entries));
|
|
||||||
dma_pusher.DispatchCalls();
|
|
||||||
} else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
|
|
||||||
renderer.SwapBuffers(data->framebuffer);
|
|
||||||
} else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
|
|
||||||
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
|
||||||
} else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
|
|
||||||
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
|
|
||||||
} else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
|
|
||||||
renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
|
|
||||||
} else {
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Runs the GPU thread
|
/// Runs the GPU thread
|
||||||
static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
|
static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
|
||||||
SynchState& state) {
|
SynchState& state) {
|
||||||
|
|
||||||
MicroProfileOnThreadCreate("GpuThread");
|
MicroProfileOnThreadCreate("GpuThread");
|
||||||
|
|
||||||
auto WaitForWakeup = [&]() {
|
|
||||||
std::unique_lock<std::mutex> lock{state.signal_mutex};
|
|
||||||
state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
|
|
||||||
};
|
|
||||||
|
|
||||||
// Wait for first GPU command before acquiring the window context
|
// Wait for first GPU command before acquiring the window context
|
||||||
WaitForWakeup();
|
state.WaitForCommands();
|
||||||
|
|
||||||
// If emulation was stopped during disk shader loading, abort before trying to acquire context
|
// If emulation was stopped during disk shader loading, abort before trying to acquire context
|
||||||
if (!state.is_running) {
|
if (!state.is_running) {
|
||||||
|
@ -53,100 +27,72 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
|
||||||
|
|
||||||
Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
|
Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
|
||||||
|
|
||||||
|
CommandDataContainer next;
|
||||||
while (state.is_running) {
|
while (state.is_running) {
|
||||||
if (!state.is_running) {
|
state.WaitForCommands();
|
||||||
return;
|
while (!state.queue.Empty()) {
|
||||||
|
state.queue.Pop(next);
|
||||||
|
if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
|
||||||
|
dma_pusher.Push(std::move(submit_list->entries));
|
||||||
|
dma_pusher.DispatchCalls();
|
||||||
|
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
|
||||||
|
state.DecrementFramesCounter();
|
||||||
|
renderer.SwapBuffers(std::move(data->framebuffer));
|
||||||
|
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
|
||||||
|
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
||||||
|
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||||
|
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
|
||||||
|
} else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
|
||||||
// Thread has been woken up, so make the previous write queue the next read queue
|
|
||||||
std::lock_guard<std::mutex> lock{state.signal_mutex};
|
|
||||||
std::swap(state.push_queue, state.pop_queue);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Execute all of the GPU commands
|
|
||||||
while (!state.pop_queue->empty()) {
|
|
||||||
ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
|
|
||||||
state.pop_queue->pop();
|
|
||||||
}
|
|
||||||
|
|
||||||
state.UpdateIdleState();
|
|
||||||
|
|
||||||
// Signal that the GPU thread has finished processing commands
|
|
||||||
if (state.is_idle) {
|
|
||||||
state.idle_condition.notify_one();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for CPU thread to send more GPU commands
|
|
||||||
WaitForWakeup();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
|
ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
|
||||||
: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
|
: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
|
||||||
std::ref(dma_pusher), std::ref(state)},
|
std::ref(dma_pusher), std::ref(state)} {}
|
||||||
thread_id{thread.get_id()} {}
|
|
||||||
|
|
||||||
ThreadManager::~ThreadManager() {
|
ThreadManager::~ThreadManager() {
|
||||||
{
|
// Notify GPU thread that a shutdown is pending
|
||||||
// Notify GPU thread that a shutdown is pending
|
PushCommand(EndProcessingCommand());
|
||||||
std::lock_guard<std::mutex> lock{state.signal_mutex};
|
|
||||||
state.is_running = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
state.signal_condition.notify_one();
|
|
||||||
thread.join();
|
thread.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
||||||
if (entries.empty()) {
|
PushCommand(SubmitListCommand(std::move(entries)));
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
PushCommand(SubmitListCommand(std::move(entries)), false, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::SwapBuffers(
|
void ThreadManager::SwapBuffers(
|
||||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
|
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
|
||||||
PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
|
state.IncrementFramesCounter();
|
||||||
|
PushCommand(SwapBuffersCommand(std::move(framebuffer)));
|
||||||
|
state.WaitForFrames();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
|
||||||
// Block the CPU when using accurate emulation
|
PushCommand(FlushRegionCommand(addr, size));
|
||||||
PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
PushCommand(InvalidateRegionCommand(addr, size), true, true);
|
if (state.queue.Empty()) {
|
||||||
|
// It's quicker to invalidate a single region on the CPU if the queue is already empty
|
||||||
|
renderer.Rasterizer().InvalidateRegion(addr, size);
|
||||||
|
} else {
|
||||||
|
PushCommand(InvalidateRegionCommand(addr, size));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
|
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
|
||||||
InvalidateRegion(addr, size);
|
InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
|
void ThreadManager::PushCommand(CommandData&& command_data) {
|
||||||
{
|
state.queue.Push(CommandDataContainer(std::move(command_data)));
|
||||||
std::lock_guard<std::mutex> lock{state.signal_mutex};
|
state.SignalCommands();
|
||||||
|
|
||||||
if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
|
|
||||||
// Execute the command synchronously on the current thread
|
|
||||||
ExecuteCommand(&command_data, renderer, dma_pusher);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Push the command to the GPU thread
|
|
||||||
state.UpdateIdleState();
|
|
||||||
state.push_queue->emplace(command_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Signal the GPU thread that commands are pending
|
|
||||||
state.signal_condition.notify_one();
|
|
||||||
|
|
||||||
if (wait_for_idle) {
|
|
||||||
// Wait for the GPU to be idle (all commands to be executed)
|
|
||||||
std::unique_lock<std::mutex> lock{state.idle_mutex};
|
|
||||||
state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon::GPUThread
|
} // namespace VideoCommon::GPUThread
|
||||||
|
|
|
@ -13,6 +13,9 @@
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <variant>
|
#include <variant>
|
||||||
|
|
||||||
|
#include "common/threadsafe_queue.h"
|
||||||
|
#include "video_core/gpu.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
struct FramebufferConfig;
|
struct FramebufferConfig;
|
||||||
class DmaPusher;
|
class DmaPusher;
|
||||||
|
@ -24,6 +27,9 @@ class RendererBase;
|
||||||
|
|
||||||
namespace VideoCommon::GPUThread {
|
namespace VideoCommon::GPUThread {
|
||||||
|
|
||||||
|
/// Command to signal to the GPU thread that processing has ended
|
||||||
|
struct EndProcessingCommand final {};
|
||||||
|
|
||||||
/// Command to signal to the GPU thread that a command list is ready for processing
|
/// Command to signal to the GPU thread that a command list is ready for processing
|
||||||
struct SubmitListCommand final {
|
struct SubmitListCommand final {
|
||||||
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
|
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
|
||||||
|
@ -36,59 +42,110 @@ struct SwapBuffersCommand final {
|
||||||
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
|
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
|
||||||
: framebuffer{std::move(framebuffer)} {}
|
: framebuffer{std::move(framebuffer)} {}
|
||||||
|
|
||||||
std::optional<const Tegra::FramebufferConfig> framebuffer;
|
std::optional<Tegra::FramebufferConfig> framebuffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Command to signal to the GPU thread to flush a region
|
/// Command to signal to the GPU thread to flush a region
|
||||||
struct FlushRegionCommand final {
|
struct FlushRegionCommand final {
|
||||||
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||||
|
|
||||||
const VAddr addr;
|
CacheAddr addr;
|
||||||
const u64 size;
|
u64 size;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Command to signal to the GPU thread to invalidate a region
|
/// Command to signal to the GPU thread to invalidate a region
|
||||||
struct InvalidateRegionCommand final {
|
struct InvalidateRegionCommand final {
|
||||||
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||||
|
|
||||||
const VAddr addr;
|
CacheAddr addr;
|
||||||
const u64 size;
|
u64 size;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Command to signal to the GPU thread to flush and invalidate a region
|
/// Command to signal to the GPU thread to flush and invalidate a region
|
||||||
struct FlushAndInvalidateRegionCommand final {
|
struct FlushAndInvalidateRegionCommand final {
|
||||||
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
|
explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
|
||||||
: addr{addr}, size{size} {}
|
: addr{addr}, size{size} {}
|
||||||
|
|
||||||
const VAddr addr;
|
CacheAddr addr;
|
||||||
const u64 size;
|
u64 size;
|
||||||
};
|
};
|
||||||
|
|
||||||
using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
using CommandData =
|
||||||
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
|
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
||||||
|
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
|
||||||
|
|
||||||
|
struct CommandDataContainer {
|
||||||
|
CommandDataContainer() = default;
|
||||||
|
|
||||||
|
CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
|
||||||
|
|
||||||
|
CommandDataContainer& operator=(const CommandDataContainer& t) {
|
||||||
|
data = std::move(t.data);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
CommandData data;
|
||||||
|
};
|
||||||
|
|
||||||
/// Struct used to synchronize the GPU thread
|
/// Struct used to synchronize the GPU thread
|
||||||
struct SynchState final {
|
struct SynchState final {
|
||||||
std::atomic<bool> is_running{true};
|
std::atomic_bool is_running{true};
|
||||||
std::atomic<bool> is_idle{true};
|
std::atomic_int queued_frame_count{};
|
||||||
std::condition_variable signal_condition;
|
std::mutex frames_mutex;
|
||||||
std::mutex signal_mutex;
|
std::mutex commands_mutex;
|
||||||
std::condition_variable idle_condition;
|
std::condition_variable commands_condition;
|
||||||
std::mutex idle_mutex;
|
std::condition_variable frames_condition;
|
||||||
|
|
||||||
// We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
|
void IncrementFramesCounter() {
|
||||||
// one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
|
std::lock_guard<std::mutex> lock{frames_mutex};
|
||||||
// empty. This allows for efficient thread-safe access, as it does not require any copies.
|
++queued_frame_count;
|
||||||
|
|
||||||
using CommandQueue = std::queue<CommandData>;
|
|
||||||
std::array<CommandQueue, 2> command_queues;
|
|
||||||
CommandQueue* push_queue{&command_queues[0]};
|
|
||||||
CommandQueue* pop_queue{&command_queues[1]};
|
|
||||||
|
|
||||||
void UpdateIdleState() {
|
|
||||||
std::lock_guard<std::mutex> lock{idle_mutex};
|
|
||||||
is_idle = command_queues[0].empty() && command_queues[1].empty();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DecrementFramesCounter() {
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock{frames_mutex};
|
||||||
|
--queued_frame_count;
|
||||||
|
|
||||||
|
if (queued_frame_count) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
frames_condition.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
void WaitForFrames() {
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock{frames_mutex};
|
||||||
|
if (!queued_frame_count) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the GPU to be idle (all commands to be executed)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock{frames_mutex};
|
||||||
|
frames_condition.wait(lock, [this] { return !queued_frame_count; });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SignalCommands() {
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock{commands_mutex};
|
||||||
|
if (queue.Empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
commands_condition.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
void WaitForCommands() {
|
||||||
|
std::unique_lock<std::mutex> lock{commands_mutex};
|
||||||
|
commands_condition.wait(lock, [this] { return !queue.Empty(); });
|
||||||
|
}
|
||||||
|
|
||||||
|
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
|
||||||
|
CommandQueue queue;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Class used to manage the GPU thread
|
/// Class used to manage the GPU thread
|
||||||
|
@ -105,22 +162,17 @@ public:
|
||||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
|
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||||
void FlushRegion(VAddr addr, u64 size);
|
void FlushRegion(CacheAddr addr, u64 size);
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||||
void InvalidateRegion(VAddr addr, u64 size);
|
void InvalidateRegion(CacheAddr addr, u64 size);
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||||
void FlushAndInvalidateRegion(VAddr addr, u64 size);
|
void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Pushes a command to be executed by the GPU thread
|
/// Pushes a command to be executed by the GPU thread
|
||||||
void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
|
void PushCommand(CommandData&& command_data);
|
||||||
|
|
||||||
/// Returns true if this is called by the GPU thread
|
|
||||||
bool IsGpuThread() const {
|
|
||||||
return std::this_thread::get_id() == thread_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
SynchState state;
|
SynchState state;
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <mutex>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
|
@ -12,14 +13,26 @@
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "core/settings.h"
|
#include "core/settings.h"
|
||||||
|
#include "video_core/gpu.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
|
||||||
class RasterizerCacheObject {
|
class RasterizerCacheObject {
|
||||||
public:
|
public:
|
||||||
|
explicit RasterizerCacheObject(const u8* host_ptr)
|
||||||
|
: host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
|
||||||
|
|
||||||
virtual ~RasterizerCacheObject();
|
virtual ~RasterizerCacheObject();
|
||||||
|
|
||||||
|
CacheAddr GetCacheAddr() const {
|
||||||
|
return cache_addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u8* GetHostPtr() const {
|
||||||
|
return host_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
/// Gets the address of the shader in guest memory, required for cache management
|
/// Gets the address of the shader in guest memory, required for cache management
|
||||||
virtual VAddr GetAddr() const = 0;
|
virtual VAddr GetCpuAddr() const = 0;
|
||||||
|
|
||||||
/// Gets the size of the shader in guest memory, required for cache management
|
/// Gets the size of the shader in guest memory, required for cache management
|
||||||
virtual std::size_t GetSizeInBytes() const = 0;
|
virtual std::size_t GetSizeInBytes() const = 0;
|
||||||
|
@ -58,6 +71,8 @@ private:
|
||||||
bool is_registered{}; ///< Whether the object is currently registered with the cache
|
bool is_registered{}; ///< Whether the object is currently registered with the cache
|
||||||
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
|
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
|
||||||
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
|
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
|
||||||
|
CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
|
||||||
|
const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
@ -68,7 +83,9 @@ public:
|
||||||
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
|
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
|
||||||
|
|
||||||
/// Write any cached resources overlapping the specified region back to memory
|
/// Write any cached resources overlapping the specified region back to memory
|
||||||
void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
|
void FlushRegion(CacheAddr addr, std::size_t size) {
|
||||||
|
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||||
|
|
||||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
||||||
for (auto& object : objects) {
|
for (auto& object : objects) {
|
||||||
FlushObject(object);
|
FlushObject(object);
|
||||||
|
@ -76,7 +93,9 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mark the specified region as being invalidated
|
/// Mark the specified region as being invalidated
|
||||||
void InvalidateRegion(VAddr addr, u64 size) {
|
void InvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
|
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||||
|
|
||||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
||||||
for (auto& object : objects) {
|
for (auto& object : objects) {
|
||||||
if (!object->IsRegistered()) {
|
if (!object->IsRegistered()) {
|
||||||
|
@ -89,48 +108,60 @@ public:
|
||||||
|
|
||||||
/// Invalidates everything in the cache
|
/// Invalidates everything in the cache
|
||||||
void InvalidateAll() {
|
void InvalidateAll() {
|
||||||
|
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||||
|
|
||||||
while (interval_cache.begin() != interval_cache.end()) {
|
while (interval_cache.begin() != interval_cache.end()) {
|
||||||
Unregister(*interval_cache.begin()->second.begin());
|
Unregister(*interval_cache.begin()->second.begin());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/// Tries to get an object from the cache with the specified address
|
/// Tries to get an object from the cache with the specified cache address
|
||||||
T TryGet(VAddr addr) const {
|
T TryGet(CacheAddr addr) const {
|
||||||
const auto iter = map_cache.find(addr);
|
const auto iter = map_cache.find(addr);
|
||||||
if (iter != map_cache.end())
|
if (iter != map_cache.end())
|
||||||
return iter->second;
|
return iter->second;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
T TryGet(const void* addr) const {
|
||||||
|
const auto iter = map_cache.find(ToCacheAddr(addr));
|
||||||
|
if (iter != map_cache.end())
|
||||||
|
return iter->second;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
/// Register an object into the cache
|
/// Register an object into the cache
|
||||||
void Register(const T& object) {
|
void Register(const T& object) {
|
||||||
|
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||||
|
|
||||||
object->SetIsRegistered(true);
|
object->SetIsRegistered(true);
|
||||||
interval_cache.add({GetInterval(object), ObjectSet{object}});
|
interval_cache.add({GetInterval(object), ObjectSet{object}});
|
||||||
map_cache.insert({object->GetAddr(), object});
|
map_cache.insert({object->GetCacheAddr(), object});
|
||||||
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
|
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Unregisters an object from the cache
|
/// Unregisters an object from the cache
|
||||||
void Unregister(const T& object) {
|
void Unregister(const T& object) {
|
||||||
object->SetIsRegistered(false);
|
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||||
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
|
|
||||||
// Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
|
|
||||||
if (Settings::values.use_accurate_gpu_emulation) {
|
|
||||||
FlushObject(object);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
object->SetIsRegistered(false);
|
||||||
|
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
|
||||||
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
|
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
|
||||||
map_cache.erase(object->GetAddr());
|
map_cache.erase(object->GetCacheAddr());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a ticks counter used for tracking when cached objects were last modified
|
/// Returns a ticks counter used for tracking when cached objects were last modified
|
||||||
u64 GetModifiedTicks() {
|
u64 GetModifiedTicks() {
|
||||||
|
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||||
|
|
||||||
return ++modified_ticks;
|
return ++modified_ticks;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Flushes the specified object, updating appropriate cache state as needed
|
/// Flushes the specified object, updating appropriate cache state as needed
|
||||||
void FlushObject(const T& object) {
|
void FlushObject(const T& object) {
|
||||||
|
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||||
|
|
||||||
if (!object->IsDirty()) {
|
if (!object->IsDirty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -140,7 +171,7 @@ protected:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Returns a list of cached objects from the specified memory region, ordered by access time
|
/// Returns a list of cached objects from the specified memory region, ordered by access time
|
||||||
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
|
std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
@ -164,17 +195,18 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
using ObjectSet = std::set<T>;
|
using ObjectSet = std::set<T>;
|
||||||
using ObjectCache = std::unordered_map<VAddr, T>;
|
using ObjectCache = std::unordered_map<CacheAddr, T>;
|
||||||
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
|
using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
|
||||||
using ObjectInterval = typename IntervalCache::interval_type;
|
using ObjectInterval = typename IntervalCache::interval_type;
|
||||||
|
|
||||||
static auto GetInterval(const T& object) {
|
static auto GetInterval(const T& object) {
|
||||||
return ObjectInterval::right_open(object->GetAddr(),
|
return ObjectInterval::right_open(object->GetCacheAddr(),
|
||||||
object->GetAddr() + object->GetSizeInBytes());
|
object->GetCacheAddr() + object->GetSizeInBytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
ObjectCache map_cache;
|
ObjectCache map_cache;
|
||||||
IntervalCache interval_cache; ///< Cache of objects
|
IntervalCache interval_cache; ///< Cache of objects
|
||||||
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
|
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
|
std::recursive_mutex mutex;
|
||||||
};
|
};
|
||||||
|
|
|
@ -35,14 +35,14 @@ public:
|
||||||
virtual void FlushAll() = 0;
|
virtual void FlushAll() = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||||
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||||
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||||
/// and invalidated
|
/// and invalidated
|
||||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
/// Attempt to use a faster method to perform a surface copy
|
/// Attempt to use a faster method to perform a surface copy
|
||||||
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||||
|
@ -63,7 +63,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Increase/decrease the number of object in pages touching the specified region
|
/// Increase/decrease the number of object in pages touching the specified region
|
||||||
virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {}
|
virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
|
||||||
|
|
||||||
/// Initialize disk cached resources for the game being emulated
|
/// Initialize disk cached resources for the game being emulated
|
||||||
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
|
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
|
||||||
|
|
|
@ -13,6 +13,11 @@
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
|
||||||
|
std::size_t alignment, u8* host_ptr)
|
||||||
|
: cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
|
||||||
|
host_ptr} {}
|
||||||
|
|
||||||
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
|
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
|
||||||
: RasterizerCache{rasterizer}, stream_buffer(size, true) {}
|
: RasterizerCache{rasterizer}, stream_buffer(size, true) {}
|
||||||
|
|
||||||
|
@ -26,11 +31,12 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
|
||||||
// TODO: Figure out which size is the best for given games.
|
// TODO: Figure out which size is the best for given games.
|
||||||
cache &= size >= 2048;
|
cache &= size >= 2048;
|
||||||
|
|
||||||
|
const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
|
||||||
if (cache) {
|
if (cache) {
|
||||||
auto entry = TryGet(*cpu_addr);
|
auto entry = TryGet(host_ptr);
|
||||||
if (entry) {
|
if (entry) {
|
||||||
if (entry->size >= size && entry->alignment == alignment) {
|
if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
|
||||||
return entry->offset;
|
return entry->GetOffset();
|
||||||
}
|
}
|
||||||
Unregister(entry);
|
Unregister(entry);
|
||||||
}
|
}
|
||||||
|
@ -39,17 +45,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
|
||||||
AlignBuffer(alignment);
|
AlignBuffer(alignment);
|
||||||
const GLintptr uploaded_offset = buffer_offset;
|
const GLintptr uploaded_offset = buffer_offset;
|
||||||
|
|
||||||
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
|
if (!host_ptr) {
|
||||||
|
return uploaded_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memcpy(buffer_ptr, host_ptr, size);
|
||||||
buffer_ptr += size;
|
buffer_ptr += size;
|
||||||
buffer_offset += size;
|
buffer_offset += size;
|
||||||
|
|
||||||
if (cache) {
|
if (cache) {
|
||||||
auto entry = std::make_shared<CachedBufferEntry>();
|
auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
|
||||||
entry->offset = uploaded_offset;
|
alignment, host_ptr);
|
||||||
entry->size = size;
|
|
||||||
entry->alignment = alignment;
|
|
||||||
entry->addr = *cpu_addr;
|
|
||||||
Register(entry);
|
Register(entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,22 +17,39 @@ namespace OpenGL {
|
||||||
|
|
||||||
class RasterizerOpenGL;
|
class RasterizerOpenGL;
|
||||||
|
|
||||||
struct CachedBufferEntry final : public RasterizerCacheObject {
|
class CachedBufferEntry final : public RasterizerCacheObject {
|
||||||
VAddr GetAddr() const override {
|
public:
|
||||||
return addr;
|
explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
|
||||||
|
std::size_t alignment, u8* host_ptr);
|
||||||
|
|
||||||
|
VAddr GetCpuAddr() const override {
|
||||||
|
return cpu_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t GetSizeInBytes() const override {
|
std::size_t GetSizeInBytes() const override {
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::size_t GetSize() const {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
GLintptr GetOffset() const {
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t GetAlignment() const {
|
||||||
|
return alignment;
|
||||||
|
}
|
||||||
|
|
||||||
// We do not have to flush this cache as things in it are never modified by us.
|
// We do not have to flush this cache as things in it are never modified by us.
|
||||||
void Flush() override {}
|
void Flush() override {}
|
||||||
|
|
||||||
VAddr addr;
|
private:
|
||||||
std::size_t size;
|
VAddr cpu_addr{};
|
||||||
GLintptr offset;
|
std::size_t size{};
|
||||||
std::size_t alignment;
|
GLintptr offset{};
|
||||||
|
std::size_t alignment{};
|
||||||
};
|
};
|
||||||
|
|
||||||
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
|
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
|
||||||
|
|
|
@ -15,12 +15,13 @@
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
|
CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
|
||||||
|
: cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} {
|
||||||
buffer.Create();
|
buffer.Create();
|
||||||
// Bind and unbind the buffer so it gets allocated by the driver
|
// Bind and unbind the buffer so it gets allocated by the driver
|
||||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
|
||||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
|
||||||
LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
|
LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
|
||||||
}
|
}
|
||||||
|
|
||||||
void CachedGlobalRegion::Reload(u32 size_) {
|
void CachedGlobalRegion::Reload(u32 size_) {
|
||||||
|
@ -35,7 +36,7 @@ void CachedGlobalRegion::Reload(u32 size_) {
|
||||||
|
|
||||||
// TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
|
// TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
|
||||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
|
||||||
glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
|
glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
|
||||||
}
|
}
|
||||||
|
|
||||||
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
|
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
|
||||||
|
@ -46,11 +47,11 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
|
||||||
return search->second;
|
return search->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
|
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr) {
|
||||||
GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
|
GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
|
||||||
if (!region) {
|
if (!region) {
|
||||||
// No reserved surface available, create a new one and reserve it
|
// No reserved surface available, create a new one and reserve it
|
||||||
region = std::make_shared<CachedGlobalRegion>(addr, size);
|
region = std::make_shared<CachedGlobalRegion>(addr, size, host_ptr);
|
||||||
ReserveGlobalRegion(region);
|
ReserveGlobalRegion(region);
|
||||||
}
|
}
|
||||||
region->Reload(size);
|
region->Reload(size);
|
||||||
|
@ -58,7 +59,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si
|
||||||
}
|
}
|
||||||
|
|
||||||
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
|
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
|
||||||
reserve.insert_or_assign(region->GetAddr(), std::move(region));
|
reserve.insert_or_assign(region->GetCpuAddr(), std::move(region));
|
||||||
}
|
}
|
||||||
|
|
||||||
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
|
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
|
||||||
|
@ -80,11 +81,12 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
|
||||||
ASSERT(actual_addr);
|
ASSERT(actual_addr);
|
||||||
|
|
||||||
// Look up global region in the cache based on address
|
// Look up global region in the cache based on address
|
||||||
GlobalRegion region = TryGet(*actual_addr);
|
const auto& host_ptr{Memory::GetPointer(*actual_addr)};
|
||||||
|
GlobalRegion region{TryGet(host_ptr)};
|
||||||
|
|
||||||
if (!region) {
|
if (!region) {
|
||||||
// No global region found - create a new one
|
// No global region found - create a new one
|
||||||
region = GetUncachedGlobalRegion(*actual_addr, size);
|
region = GetUncachedGlobalRegion(*actual_addr, size, host_ptr);
|
||||||
Register(region);
|
Register(region);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,14 +27,12 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
|
||||||
|
|
||||||
class CachedGlobalRegion final : public RasterizerCacheObject {
|
class CachedGlobalRegion final : public RasterizerCacheObject {
|
||||||
public:
|
public:
|
||||||
explicit CachedGlobalRegion(VAddr addr, u32 size);
|
explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
|
||||||
|
|
||||||
/// Gets the address of the shader in guest memory, required for cache management
|
VAddr GetCpuAddr() const override {
|
||||||
VAddr GetAddr() const override {
|
return cpu_addr;
|
||||||
return addr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Gets the size of the shader in guest memory, required for cache management
|
|
||||||
std::size_t GetSizeInBytes() const override {
|
std::size_t GetSizeInBytes() const override {
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
@ -53,9 +51,8 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
VAddr addr{};
|
VAddr cpu_addr{};
|
||||||
u32 size{};
|
u32 size{};
|
||||||
|
|
||||||
OGLBuffer buffer;
|
OGLBuffer buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -69,7 +66,7 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
|
GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
|
||||||
GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
|
GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr);
|
||||||
void ReserveGlobalRegion(GlobalRegion region);
|
void ReserveGlobalRegion(GlobalRegion region);
|
||||||
|
|
||||||
std::unordered_map<VAddr, GlobalRegion> reserve;
|
std::unordered_map<VAddr, GlobalRegion> reserve;
|
||||||
|
|
|
@ -449,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
|
||||||
return boost::make_iterator_range(map.equal_range(interval));
|
return boost::make_iterator_range(map.equal_range(interval));
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
|
void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
|
||||||
const u64 page_start{addr >> Memory::PAGE_BITS};
|
const u64 page_start{addr >> Memory::PAGE_BITS};
|
||||||
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
|
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
|
||||||
|
|
||||||
|
@ -747,12 +747,12 @@ void RasterizerOpenGL::DrawArrays() {
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushAll() {}
|
void RasterizerOpenGL::FlushAll() {}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
res_cache.FlushRegion(addr, size);
|
res_cache.FlushRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
res_cache.InvalidateRegion(addr, size);
|
res_cache.InvalidateRegion(addr, size);
|
||||||
shader_cache.InvalidateRegion(addr, size);
|
shader_cache.InvalidateRegion(addr, size);
|
||||||
|
@ -760,7 +760,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||||
buffer_cache.InvalidateRegion(addr, size);
|
buffer_cache.InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
FlushRegion(addr, size);
|
FlushRegion(addr, size);
|
||||||
InvalidateRegion(addr, size);
|
InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
@ -782,7 +782,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
|
|
||||||
const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
|
const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
|
||||||
if (!surface) {
|
if (!surface) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,9 +57,9 @@ public:
|
||||||
void DrawArrays() override;
|
void DrawArrays() override;
|
||||||
void Clear() override;
|
void Clear() override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
void FlushRegion(VAddr addr, u64 size) override;
|
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||||
const Common::Rectangle<u32>& src_rect,
|
const Common::Rectangle<u32>& src_rect,
|
||||||
|
@ -67,7 +67,7 @@ public:
|
||||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||||
u32 pixel_stride) override;
|
u32 pixel_stride) override;
|
||||||
bool AccelerateDrawBatch(bool is_indexed) override;
|
bool AccelerateDrawBatch(bool is_indexed) override;
|
||||||
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
|
void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
|
||||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||||
|
|
||||||
|
|
|
@ -61,6 +61,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
|
||||||
|
|
||||||
addr = cpu_addr ? *cpu_addr : 0;
|
addr = cpu_addr ? *cpu_addr : 0;
|
||||||
gpu_addr = gpu_addr_;
|
gpu_addr = gpu_addr_;
|
||||||
|
host_ptr = Memory::GetPointer(addr);
|
||||||
size_in_bytes = SizeInBytesRaw();
|
size_in_bytes = SizeInBytesRaw();
|
||||||
|
|
||||||
if (IsPixelFormatASTC(pixel_format)) {
|
if (IsPixelFormatASTC(pixel_format)) {
|
||||||
|
@ -563,8 +564,8 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
|
||||||
}
|
}
|
||||||
|
|
||||||
CachedSurface::CachedSurface(const SurfaceParams& params)
|
CachedSurface::CachedSurface(const SurfaceParams& params)
|
||||||
: params(params), gl_target(SurfaceTargetToGL(params.target)),
|
: params{params}, gl_target{SurfaceTargetToGL(params.target)},
|
||||||
cached_size_in_bytes(params.size_in_bytes) {
|
cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} {
|
||||||
texture.Create(gl_target);
|
texture.Create(gl_target);
|
||||||
|
|
||||||
// TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
|
// TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
|
||||||
|
@ -633,10 +634,9 @@ void CachedSurface::LoadGLBuffer() {
|
||||||
const u32 bpp = params.GetFormatBpp() / 8;
|
const u32 bpp = params.GetFormatBpp() / 8;
|
||||||
const u32 copy_size = params.width * bpp;
|
const u32 copy_size = params.width * bpp;
|
||||||
if (params.pitch == copy_size) {
|
if (params.pitch == copy_size) {
|
||||||
std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr),
|
std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
|
||||||
params.size_in_bytes_gl);
|
|
||||||
} else {
|
} else {
|
||||||
const u8* start = Memory::GetPointer(params.addr);
|
const u8* start{params.host_ptr};
|
||||||
u8* write_to = gl_buffer[0].data();
|
u8* write_to = gl_buffer[0].data();
|
||||||
for (u32 h = params.height; h > 0; h--) {
|
for (u32 h = params.height; h > 0; h--) {
|
||||||
std::memcpy(write_to, start, copy_size);
|
std::memcpy(write_to, start, copy_size);
|
||||||
|
@ -680,8 +680,6 @@ void CachedSurface::FlushGLBuffer() {
|
||||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||||
Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
|
Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
|
||||||
params.height, params.depth, true, true);
|
params.height, params.depth, true, true);
|
||||||
const u8* const texture_src_data = Memory::GetPointer(params.addr);
|
|
||||||
ASSERT(texture_src_data);
|
|
||||||
if (params.is_tiled) {
|
if (params.is_tiled) {
|
||||||
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
|
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
|
||||||
params.block_width, static_cast<u32>(params.target));
|
params.block_width, static_cast<u32>(params.target));
|
||||||
|
@ -691,9 +689,9 @@ void CachedSurface::FlushGLBuffer() {
|
||||||
const u32 bpp = params.GetFormatBpp() / 8;
|
const u32 bpp = params.GetFormatBpp() / 8;
|
||||||
const u32 copy_size = params.width * bpp;
|
const u32 copy_size = params.width * bpp;
|
||||||
if (params.pitch == copy_size) {
|
if (params.pitch == copy_size) {
|
||||||
std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes());
|
std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
|
||||||
} else {
|
} else {
|
||||||
u8* start = Memory::GetPointer(params.addr);
|
u8* start{params.host_ptr};
|
||||||
const u8* read_to = gl_buffer[0].data();
|
const u8* read_to = gl_buffer[0].data();
|
||||||
for (u32 h = params.height; h > 0; h--) {
|
for (u32 h = params.height; h > 0; h--) {
|
||||||
std::memcpy(start, read_to, copy_size);
|
std::memcpy(start, read_to, copy_size);
|
||||||
|
@ -932,7 +930,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look up surface in the cache based on address
|
// Look up surface in the cache based on address
|
||||||
Surface surface{TryGet(params.addr)};
|
Surface surface{TryGet(params.host_ptr)};
|
||||||
if (surface) {
|
if (surface) {
|
||||||
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
|
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
|
||||||
// Use the cached surface as-is unless it's not synced with memory
|
// Use the cached surface as-is unless it's not synced with memory
|
||||||
|
@ -986,7 +984,7 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
|
||||||
for (u32 layer = 0; layer < dst_params.depth; layer++) {
|
for (u32 layer = 0; layer < dst_params.depth; layer++) {
|
||||||
for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
|
for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
|
||||||
const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
|
const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
|
||||||
const Surface& copy = TryGet(sub_address);
|
const Surface& copy = TryGet(Memory::GetPointer(sub_address));
|
||||||
if (!copy)
|
if (!copy)
|
||||||
continue;
|
continue;
|
||||||
const auto& src_params{copy->GetSurfaceParams()};
|
const auto& src_params{copy->GetSurfaceParams()};
|
||||||
|
@ -1163,7 +1161,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
|
||||||
const auto& dst_params{dst_surface->GetSurfaceParams()};
|
const auto& dst_params{dst_surface->GetSurfaceParams()};
|
||||||
|
|
||||||
// Flush enough memory for both the source and destination surface
|
// Flush enough memory for both the source and destination surface
|
||||||
FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize()));
|
FlushRegion(ToCacheAddr(src_params.host_ptr),
|
||||||
|
std::max(src_params.MemorySize(), dst_params.MemorySize()));
|
||||||
|
|
||||||
LoadSurface(dst_surface);
|
LoadSurface(dst_surface);
|
||||||
}
|
}
|
||||||
|
@ -1215,8 +1214,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
|
||||||
return new_surface;
|
return new_surface;
|
||||||
}
|
}
|
||||||
|
|
||||||
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {
|
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
|
||||||
return TryGet(addr);
|
return TryGet(host_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
|
void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
|
||||||
|
@ -1267,7 +1266,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
|
||||||
src_params.height == dst_params.MipHeight(*level) &&
|
src_params.height == dst_params.MipHeight(*level) &&
|
||||||
src_params.block_height >= dst_params.MipBlockHeight(*level)) {
|
src_params.block_height >= dst_params.MipBlockHeight(*level)) {
|
||||||
const std::optional<u32> slot =
|
const std::optional<u32> slot =
|
||||||
TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
|
TryFindBestLayer(render_surface->GetCpuAddr(), dst_params, *level);
|
||||||
if (slot.has_value()) {
|
if (slot.has_value()) {
|
||||||
glCopyImageSubData(render_surface->Texture().handle,
|
glCopyImageSubData(render_surface->Texture().handle,
|
||||||
SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
|
SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
|
||||||
|
@ -1283,8 +1282,8 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
|
static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
|
||||||
const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
|
const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
|
||||||
const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
|
const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
|
||||||
if (bound2 > bound1)
|
if (bound2 > bound1)
|
||||||
return true;
|
return true;
|
||||||
const auto& dst_params = blitted_surface->GetSurfaceParams();
|
const auto& dst_params = blitted_surface->GetSurfaceParams();
|
||||||
|
@ -1327,7 +1326,8 @@ void RasterizerCacheOpenGL::SignalPreDrawCall() {
|
||||||
void RasterizerCacheOpenGL::SignalPostDrawCall() {
|
void RasterizerCacheOpenGL::SignalPostDrawCall() {
|
||||||
for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
|
for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
|
||||||
if (current_color_buffers[i] != nullptr) {
|
if (current_color_buffers[i] != nullptr) {
|
||||||
Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
|
Surface intersect =
|
||||||
|
CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
|
||||||
if (intersect != nullptr) {
|
if (intersect != nullptr) {
|
||||||
PartialReinterpretSurface(current_color_buffers[i], intersect);
|
PartialReinterpretSurface(current_color_buffers[i], intersect);
|
||||||
texception = true;
|
texception = true;
|
||||||
|
|
|
@ -297,6 +297,7 @@ struct SurfaceParams {
|
||||||
bool srgb_conversion;
|
bool srgb_conversion;
|
||||||
// Parameters used for caching
|
// Parameters used for caching
|
||||||
VAddr addr;
|
VAddr addr;
|
||||||
|
u8* host_ptr;
|
||||||
Tegra::GPUVAddr gpu_addr;
|
Tegra::GPUVAddr gpu_addr;
|
||||||
std::size_t size_in_bytes;
|
std::size_t size_in_bytes;
|
||||||
std::size_t size_in_bytes_gl;
|
std::size_t size_in_bytes_gl;
|
||||||
|
@ -345,9 +346,9 @@ class RasterizerOpenGL;
|
||||||
|
|
||||||
class CachedSurface final : public RasterizerCacheObject {
|
class CachedSurface final : public RasterizerCacheObject {
|
||||||
public:
|
public:
|
||||||
CachedSurface(const SurfaceParams& params);
|
explicit CachedSurface(const SurfaceParams& params);
|
||||||
|
|
||||||
VAddr GetAddr() const override {
|
VAddr GetCpuAddr() const override {
|
||||||
return params.addr;
|
return params.addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -449,7 +450,7 @@ public:
|
||||||
Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
|
Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
|
||||||
|
|
||||||
/// Tries to find a framebuffer using on the provided CPU address
|
/// Tries to find a framebuffer using on the provided CPU address
|
||||||
Surface TryFindFramebufferSurface(VAddr addr) const;
|
Surface TryFindFramebufferSurface(const u8* host_ptr) const;
|
||||||
|
|
||||||
/// Copies the contents of one surface to another
|
/// Copies the contents of one surface to another
|
||||||
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
|
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
|
||||||
|
@ -506,12 +507,12 @@ private:
|
||||||
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
|
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
|
||||||
Surface last_depth_buffer;
|
Surface last_depth_buffer;
|
||||||
|
|
||||||
using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
|
using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
|
||||||
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
|
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
|
||||||
|
|
||||||
static auto GetReinterpretInterval(const Surface& object) {
|
static auto GetReinterpretInterval(const Surface& object) {
|
||||||
return SurfaceInterval::right_open(object->GetAddr() + 1,
|
return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
|
||||||
object->GetAddr() + object->GetMemorySize() - 1);
|
object->GetCacheAddr() + object->GetMemorySize() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reinterpreted surfaces are very fragil as the game may keep rendering into them.
|
// Reinterpreted surfaces are very fragil as the game may keep rendering into them.
|
||||||
|
@ -523,7 +524,7 @@ private:
|
||||||
reinterpret_surface->MarkReinterpreted();
|
reinterpret_surface->MarkReinterpreted();
|
||||||
}
|
}
|
||||||
|
|
||||||
Surface CollideOnReinterpretedSurface(VAddr addr) const {
|
Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
|
||||||
const SurfaceInterval interval{addr};
|
const SurfaceInterval interval{addr};
|
||||||
for (auto& pair :
|
for (auto& pair :
|
||||||
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
|
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
|
||||||
|
|
|
@ -42,9 +42,9 @@ VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Gets the shader program code from memory for the specified address
|
/// Gets the shader program code from memory for the specified address
|
||||||
ProgramCode GetShaderCode(VAddr addr) {
|
ProgramCode GetShaderCode(const u8* host_ptr) {
|
||||||
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
|
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
|
||||||
Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
|
std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
|
||||||
return program_code;
|
return program_code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -214,12 +214,13 @@ std::set<GLenum> GetSupportedFormats() {
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
|
CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
|
||||||
ShaderDiskCacheOpenGL& disk_cache,
|
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
|
||||||
const PrecompiledPrograms& precompiled_programs,
|
const PrecompiledPrograms& precompiled_programs,
|
||||||
ProgramCode&& program_code, ProgramCode&& program_code_b)
|
ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
|
||||||
: addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
|
: host_ptr{host_ptr}, guest_addr{guest_addr}, unique_identifier{unique_identifier},
|
||||||
disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
|
program_type{program_type}, disk_cache{disk_cache},
|
||||||
|
precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} {
|
||||||
|
|
||||||
const std::size_t code_size = CalculateProgramSize(program_code);
|
const std::size_t code_size = CalculateProgramSize(program_code);
|
||||||
const std::size_t code_size_b =
|
const std::size_t code_size_b =
|
||||||
|
@ -243,12 +244,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro
|
||||||
disk_cache.SaveRaw(raw);
|
disk_cache.SaveRaw(raw);
|
||||||
}
|
}
|
||||||
|
|
||||||
CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
|
CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
|
||||||
ShaderDiskCacheOpenGL& disk_cache,
|
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
|
||||||
const PrecompiledPrograms& precompiled_programs,
|
const PrecompiledPrograms& precompiled_programs,
|
||||||
GLShader::ProgramResult result)
|
GLShader::ProgramResult result, u8* host_ptr)
|
||||||
: addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
|
: guest_addr{guest_addr}, unique_identifier{unique_identifier}, program_type{program_type},
|
||||||
disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
|
disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{
|
||||||
|
host_ptr} {
|
||||||
|
|
||||||
code = std::move(result.first);
|
code = std::move(result.first);
|
||||||
entries = result.second;
|
entries = result.second;
|
||||||
|
@ -271,7 +273,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
|
||||||
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
|
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
|
||||||
}
|
}
|
||||||
|
|
||||||
LabelGLObject(GL_PROGRAM, program->handle, addr);
|
LabelGLObject(GL_PROGRAM, program->handle, guest_addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
handle = program->handle;
|
handle = program->handle;
|
||||||
|
@ -323,7 +325,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
|
||||||
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
|
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
|
||||||
}
|
}
|
||||||
|
|
||||||
LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name);
|
LabelGLObject(GL_PROGRAM, target_program->handle, guest_addr, debug_name);
|
||||||
|
|
||||||
return target_program->handle;
|
return target_program->handle;
|
||||||
};
|
};
|
||||||
|
@ -489,14 +491,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||||
const VAddr program_addr{GetShaderAddress(program)};
|
const VAddr program_addr{GetShaderAddress(program)};
|
||||||
|
|
||||||
// Look up shader in the cache based on address
|
// Look up shader in the cache based on address
|
||||||
Shader shader{TryGet(program_addr)};
|
const auto& host_ptr{Memory::GetPointer(program_addr)};
|
||||||
|
Shader shader{TryGet(host_ptr)};
|
||||||
|
|
||||||
if (!shader) {
|
if (!shader) {
|
||||||
// No shader found - create a new one
|
// No shader found - create a new one
|
||||||
ProgramCode program_code = GetShaderCode(program_addr);
|
const auto& host_ptr{Memory::GetPointer(program_addr)};
|
||||||
|
ProgramCode program_code{GetShaderCode(host_ptr)};
|
||||||
ProgramCode program_code_b;
|
ProgramCode program_code_b;
|
||||||
if (program == Maxwell::ShaderProgram::VertexA) {
|
if (program == Maxwell::ShaderProgram::VertexA) {
|
||||||
program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB));
|
program_code_b = GetShaderCode(
|
||||||
|
Memory::GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
|
||||||
}
|
}
|
||||||
const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
|
const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
|
||||||
|
|
||||||
|
@ -504,11 +509,11 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||||
if (found != precompiled_shaders.end()) {
|
if (found != precompiled_shaders.end()) {
|
||||||
shader =
|
shader =
|
||||||
std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
|
std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
|
||||||
precompiled_programs, found->second);
|
precompiled_programs, found->second, host_ptr);
|
||||||
} else {
|
} else {
|
||||||
shader = std::make_shared<CachedShader>(
|
shader = std::make_shared<CachedShader>(
|
||||||
program_addr, unique_identifier, program, disk_cache, precompiled_programs,
|
program_addr, unique_identifier, program, disk_cache, precompiled_programs,
|
||||||
std::move(program_code), std::move(program_code_b));
|
std::move(program_code), std::move(program_code_b), host_ptr);
|
||||||
}
|
}
|
||||||
Register(shader);
|
Register(shader);
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
|
||||||
|
|
||||||
class CachedShader final : public RasterizerCacheObject {
|
class CachedShader final : public RasterizerCacheObject {
|
||||||
public:
|
public:
|
||||||
explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
|
explicit CachedShader(VAddr guest_addr, u64 unique_identifier,
|
||||||
ShaderDiskCacheOpenGL& disk_cache,
|
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
|
||||||
const PrecompiledPrograms& precompiled_programs,
|
const PrecompiledPrograms& precompiled_programs,
|
||||||
ProgramCode&& program_code, ProgramCode&& program_code_b);
|
ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
|
||||||
|
|
||||||
explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
|
explicit CachedShader(VAddr guest_addr, u64 unique_identifier,
|
||||||
ShaderDiskCacheOpenGL& disk_cache,
|
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
|
||||||
const PrecompiledPrograms& precompiled_programs,
|
const PrecompiledPrograms& precompiled_programs,
|
||||||
GLShader::ProgramResult result);
|
GLShader::ProgramResult result, u8* host_ptr);
|
||||||
|
|
||||||
VAddr GetAddr() const override {
|
VAddr GetCpuAddr() const override {
|
||||||
return addr;
|
return guest_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t GetSizeInBytes() const override {
|
std::size_t GetSizeInBytes() const override {
|
||||||
|
@ -91,7 +91,8 @@ private:
|
||||||
|
|
||||||
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
|
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
|
||||||
|
|
||||||
VAddr addr{};
|
u8* host_ptr{};
|
||||||
|
VAddr guest_addr{};
|
||||||
u64 unique_identifier{};
|
u64 unique_identifier{};
|
||||||
Maxwell::ShaderProgram program_type{};
|
Maxwell::ShaderProgram program_type{};
|
||||||
ShaderDiskCacheOpenGL& disk_cache;
|
ShaderDiskCacheOpenGL& disk_cache;
|
||||||
|
|
|
@ -17,6 +17,11 @@
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
|
||||||
|
std::size_t alignment, u8* host_ptr)
|
||||||
|
: cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
|
||||||
|
host_ptr} {}
|
||||||
|
|
||||||
VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
|
VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
|
||||||
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
|
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
|
||||||
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
|
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
|
||||||
|
@ -37,16 +42,18 @@ VKBufferCache::~VKBufferCache() = default;
|
||||||
u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
|
u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
|
||||||
bool cache) {
|
bool cache) {
|
||||||
const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
|
const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
|
||||||
ASSERT(cpu_addr);
|
ASSERT_MSG(cpu_addr, "Invalid GPU address");
|
||||||
|
|
||||||
// Cache management is a big overhead, so only cache entries with a given size.
|
// Cache management is a big overhead, so only cache entries with a given size.
|
||||||
// TODO: Figure out which size is the best for given games.
|
// TODO: Figure out which size is the best for given games.
|
||||||
cache &= size >= 2048;
|
cache &= size >= 2048;
|
||||||
|
|
||||||
|
const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
|
||||||
if (cache) {
|
if (cache) {
|
||||||
if (auto entry = TryGet(*cpu_addr); entry) {
|
auto entry = TryGet(host_ptr);
|
||||||
if (entry->size >= size && entry->alignment == alignment) {
|
if (entry) {
|
||||||
return entry->offset;
|
if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
|
||||||
|
return entry->GetOffset();
|
||||||
}
|
}
|
||||||
Unregister(entry);
|
Unregister(entry);
|
||||||
}
|
}
|
||||||
|
@ -55,17 +62,17 @@ u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64
|
||||||
AlignBuffer(alignment);
|
AlignBuffer(alignment);
|
||||||
const u64 uploaded_offset = buffer_offset;
|
const u64 uploaded_offset = buffer_offset;
|
||||||
|
|
||||||
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
|
if (!host_ptr) {
|
||||||
|
return uploaded_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memcpy(buffer_ptr, host_ptr, size);
|
||||||
buffer_ptr += size;
|
buffer_ptr += size;
|
||||||
buffer_offset += size;
|
buffer_offset += size;
|
||||||
|
|
||||||
if (cache) {
|
if (cache) {
|
||||||
auto entry = std::make_shared<CachedBufferEntry>();
|
auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
|
||||||
entry->offset = uploaded_offset;
|
alignment, host_ptr);
|
||||||
entry->size = size;
|
|
||||||
entry->alignment = alignment;
|
|
||||||
entry->addr = *cpu_addr;
|
|
||||||
Register(entry);
|
Register(entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,22 +24,39 @@ class VKFence;
|
||||||
class VKMemoryManager;
|
class VKMemoryManager;
|
||||||
class VKStreamBuffer;
|
class VKStreamBuffer;
|
||||||
|
|
||||||
struct CachedBufferEntry final : public RasterizerCacheObject {
|
class CachedBufferEntry final : public RasterizerCacheObject {
|
||||||
VAddr GetAddr() const override {
|
public:
|
||||||
return addr;
|
explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
|
||||||
|
u8* host_ptr);
|
||||||
|
|
||||||
|
VAddr GetCpuAddr() const override {
|
||||||
|
return cpu_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t GetSizeInBytes() const override {
|
std::size_t GetSizeInBytes() const override {
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::size_t GetSize() const {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 GetOffset() const {
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t GetAlignment() const {
|
||||||
|
return alignment;
|
||||||
|
}
|
||||||
|
|
||||||
// We do not have to flush this cache as things in it are never modified by us.
|
// We do not have to flush this cache as things in it are never modified by us.
|
||||||
void Flush() override {}
|
void Flush() override {}
|
||||||
|
|
||||||
VAddr addr;
|
private:
|
||||||
std::size_t size;
|
VAddr cpu_addr{};
|
||||||
u64 offset;
|
std::size_t size{};
|
||||||
std::size_t alignment;
|
u64 offset{};
|
||||||
|
std::size_t alignment{};
|
||||||
};
|
};
|
||||||
|
|
||||||
class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
|
class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
|
||||||
|
|
Loading…
Reference in a new issue