suyu/src/video_core/engines/maxwell_dma.cpp

// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.

#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"

namespace Tegra::Engines {

MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                       MemoryManager& memory_manager)
    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}

void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
               "Invalid MaxwellDMA register, increase the size of the Regs structure");

    regs.reg_array[method_call.method] = method_call.argument;

#define MAXWELLDMA_REG_INDEX(field_name)                                                           \
    (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))

    switch (method_call.method) {
    case MAXWELLDMA_REG_INDEX(exec): {
        HandleCopy();
        break;
    }
    }

#undef MAXWELLDMA_REG_INDEX
}

void MaxwellDMA::HandleCopy() {
    LOG_WARNING(HW_GPU, "Requested a DMA copy");

    const GPUVAddr source = regs.src_address.Address();
    const GPUVAddr dest = regs.dst_address.Address();

    // TODO(Subv): Perform more research and implement all features of this engine.
    ASSERT(regs.exec.enable_swizzle == 0);
    ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
    ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
    ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
    ASSERT(regs.dst_params.pos_x == 0);
    ASSERT(regs.dst_params.pos_y == 0);

    if (!regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
        // If both the source and the destination are in block layout, assert.
        UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented");
        return;
    }

    // All copies here update the main memory, so mark all rasterizer states as invalid.
    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();

    if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
        // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
        // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
        // y_count).
        if (!regs.exec.enable_2d) {
            memory_manager.CopyBlock(dest, source, regs.x_count);
            return;
        }

        // If both the source and the destination are in linear layout, perform a line-by-line
        // copy. We're going to take a subrect of size (x_count, y_count) from the source
        // rectangle. There is no need to manually flush/invalidate the regions because
        // CopyBlock does that for us.
        for (u32 line = 0; line < regs.y_count; ++line) {
            const GPUVAddr source_line = source + line * regs.src_pitch;
            const GPUVAddr dest_line = dest + line * regs.dst_pitch;
            memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
        }
        return;
    }

    ASSERT(regs.exec.enable_2d == 1);

    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
        ASSERT(regs.src_params.size_z == 1);
        // If the input is tiled and the output is linear, deswizzle the input and copy it over.
        const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
        const std::size_t src_size = Texture::CalculateSize(
            true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
            regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());

        const std::size_t dst_size = regs.dst_pitch * regs.y_count;

        if (read_buffer.size() < src_size) {
            read_buffer.resize(src_size);
        }

        if (write_buffer.size() < dst_size) {
            write_buffer.resize(dst_size);
        }

        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);

        Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
                                  regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
                                  write_buffer.data(), regs.src_params.BlockHeight(),
                                  regs.src_params.pos_x, regs.src_params.pos_y);

        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
    } else {
        ASSERT(regs.dst_params.BlockDepth() == 0);

        const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;

        const std::size_t dst_size = Texture::CalculateSize(
            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
            regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());

        const std::size_t dst_layer_size = Texture::CalculateSize(
            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
            regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());

        const std::size_t src_size = regs.src_pitch * regs.y_count;

        if (read_buffer.size() < src_size) {
            read_buffer.resize(src_size);
        }

        if (write_buffer.size() < dst_size) {
            write_buffer.resize(dst_size);
        }

        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);

        // If the input is linear and the output is tiled, swizzle the input and copy it over.
        Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
                                src_bytes_per_pixel,
                                write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
                                read_buffer.data(), regs.dst_params.BlockHeight());

        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
    }
}

} // namespace Tegra::Engines
GPU: Partially implemented the Maxwell DMA engine. Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported. 2018-06-10 18:02:33 -04:00			`// Copyright 2018 yuzu Emulator Project`
			`// Licensed under GPLv2 or any later version`
			`// Refer to the license.txt file included.`

video_core: Remove usages of System::GetInstance() within the engines Avoids the use of the global accessor in favor of explicitly making the system a dependency within the interface. 2019-02-15 22:05:17 -05:00			`#include "common/assert.h"`
video_core/engines: Remove unnecessary includes Removes a few unnecessary dependencies on core-related machinery, such as the core.h and memory.h, which reduces the amount of rebuilding necessary if those files change. This also uncovered some indirect dependencies within other source files. This also fixes those. 2019-03-05 20:25:01 -05:00			`#include "common/logging/log.h"`
gl_rasterizer: Skip VB upload if the state is clean. 2018-11-06 15:26:27 -05:00			`#include "core/core.h"`
			`#include "video_core/engines/maxwell_3d.h"`
GPU: Partially implemented the Maxwell DMA engine. Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported. 2018-06-10 18:02:33 -04:00			`#include "video_core/engines/maxwell_dma.h"`
video_core/engines: Remove unnecessary inclusions where applicable Replaces header inclusions with forward declarations where applicable and also removes unused headers within the cpp file. This reduces a few more dependencies on core/memory.h 2019-04-05 18:21:15 -04:00			`#include "video_core/memory_manager.h"`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00			`#include "video_core/rasterizer_interface.h"`
gpu: Use host address for caching instead of guest address. 2019-02-18 20:58:32 -05:00			`#include "video_core/renderer_base.h"`
GPU: Partially implemented the Maxwell DMA engine. Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported. 2018-06-10 18:02:33 -04:00			`#include "video_core/textures/decoders.h"`

engines/maxwell_*: Use nested namespace specifiers where applicable These three source files are the only ones within the engines directory that don't use nested namespaces. We may as well change these over to keep things consistent. 2018-10-20 15:58:06 -04:00			`namespace Tegra::Engines {`
GPU: Partially implemented the Maxwell DMA engine. Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported. 2018-06-10 18:02:33 -04:00
video_core: Remove usages of System::GetInstance() within the engines Avoids the use of the global accessor in favor of explicitly making the system a dependency within the interface. 2019-02-15 22:05:17 -05:00			`MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,`
			`MemoryManager& memory_manager)`
video_core/engines: Make memory manager members private These aren't used externally by anything, so they can be made private data members. 2019-04-05 18:25:20 -04:00			`: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}`
GPU: Partially implemented the Maxwell DMA engine. Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported. 2018-06-10 18:02:33 -04:00
gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-23 23:20:56 -05:00			`void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {`
			`ASSERT_MSG(method_call.method < Regs::NUM_REGS,`
GPU: Partially implemented the Maxwell DMA engine. Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported. 2018-06-10 18:02:33 -04:00			`"Invalid MaxwellDMA register, increase the size of the Regs structure");`

gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-23 23:20:56 -05:00			`regs.reg_array[method_call.method] = method_call.argument;`
GPU: Partially implemented the Maxwell DMA engine. Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported. 2018-06-10 18:02:33 -04:00
			`#define MAXWELLDMA_REG_INDEX(field_name) \`
			`(offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))`

gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-23 23:20:56 -05:00			`switch (method_call.method) {`
GPU: Partially implemented the Maxwell DMA engine. Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported. 2018-06-10 18:02:33 -04:00			`case MAXWELLDMA_REG_INDEX(exec): {`
			`HandleCopy();`
			`break;`
			`}`
			`}`

			`#undef MAXWELLDMA_REG_INDEX`
			`}`

			`void MaxwellDMA::HandleCopy() {`
Rename logging macro back to LOG_* 2018-07-02 12:13:26 -04:00			`LOG_WARNING(HW_GPU, "Requested a DMA copy");`
GPU: Partially implemented the Maxwell DMA engine. Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported. 2018-06-10 18:02:33 -04:00
			`const GPUVAddr source = regs.src_address.Address();`
			`const GPUVAddr dest = regs.dst_address.Address();`

			`// TODO(Subv): Perform more research and implement all features of this engine.`
			`ASSERT(regs.exec.enable_swizzle == 0);`
			`ASSERT(regs.exec.query_mode == Regs::QueryMode::None);`
			`ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);`
			`ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);`
			`ASSERT(regs.dst_params.pos_x == 0);`
			`ASSERT(regs.dst_params.pos_y == 0);`
GPU: Directly copy the pixels when performing a same-layout DMA. 2018-07-02 10:46:33 -04:00
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00			`if (!regs.exec.is_dst_linear && !regs.exec.is_src_linear) {`
			`// If both the source and the destination are in block layout, assert.`
			`UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented");`
			`return;`
			`}`
GPU/DMA: Partially implemented the 'enable_2d' bit in the DMA engine. When not set, this tells the GPU to only use the X size when performing a DMA copy. This is only implemented for linear->linear and tiled->tiled copies. Conversion copies still retain the assert. This bit is unset by some games for various purposes, and by nouveau when copying the vertex buffers. 2018-09-08 17:02:16 -04:00
gl_rasterizer: Skip VB upload if the state is clean. 2018-11-06 15:26:27 -05:00			`// All copies here update the main memory, so mark all rasterizer states as invalid.`
video_core: Remove usages of System::GetInstance() within the engines Avoids the use of the global accessor in favor of explicitly making the system a dependency within the interface. 2019-02-15 22:05:17 -05:00			`system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();`
gl_rasterizer: Skip VB upload if the state is clean. 2018-11-06 15:26:27 -05:00
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00			`if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {`
GPU/DMA: Partially implemented the 'enable_2d' bit in the DMA engine. When not set, this tells the GPU to only use the X size when performing a DMA copy. This is only implemented for linear->linear and tiled->tiled copies. Conversion copies still retain the assert. This bit is unset by some games for various purposes, and by nouveau when copying the vertex buffers. 2018-09-08 17:02:16 -04:00			`// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00			// buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
			`// y_count).`
			`if (!regs.exec.enable_2d) {`
video_core: Refactor to use MemoryManager interface for all memory access. # Conflicts: # src/video_core/engines/kepler_memory.cpp # src/video_core/engines/maxwell_3d.cpp # src/video_core/morton.cpp # src/video_core/morton.h # src/video_core/renderer_opengl/gl_global_cache.cpp # src/video_core/renderer_opengl/gl_global_cache.h # src/video_core/renderer_opengl/gl_rasterizer_cache.cpp 2019-02-24 00:15:35 -05:00			`memory_manager.CopyBlock(dest, source, regs.x_count);`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00			`return;`
GPU/DMA: Partially implemented the 'enable_2d' bit in the DMA engine. When not set, this tells the GPU to only use the X size when performing a DMA copy. This is only implemented for linear->linear and tiled->tiled copies. Conversion copies still retain the assert. This bit is unset by some games for various purposes, and by nouveau when copying the vertex buffers. 2018-09-08 17:02:16 -04:00			`}`

GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00			`// If both the source and the destination are in linear layout, perform a line-by-line`
			`// copy. We're going to take a subrect of size (x_count, y_count) from the source`
			`// rectangle. There is no need to manually flush/invalidate the regions because`
			`// CopyBlock does that for us.`
			`for (u32 line = 0; line < regs.y_count; ++line) {`
video_core: Refactor to use MemoryManager interface for all memory access. # Conflicts: # src/video_core/engines/kepler_memory.cpp # src/video_core/engines/maxwell_3d.cpp # src/video_core/morton.cpp # src/video_core/morton.h # src/video_core/renderer_opengl/gl_global_cache.cpp # src/video_core/renderer_opengl/gl_global_cache.h # src/video_core/renderer_opengl/gl_rasterizer_cache.cpp 2019-02-24 00:15:35 -05:00			`const GPUVAddr source_line = source + line * regs.src_pitch;`
			`const GPUVAddr dest_line = dest + line * regs.dst_pitch;`
			`memory_manager.CopyBlock(dest_line, source_line, regs.x_count);`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00			`}`
GPU: Directly copy the pixels when performing a same-layout DMA. 2018-07-02 10:46:33 -04:00			`return;`
			`}`
GPU: Partially implemented the Maxwell DMA engine. Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported. 2018-06-10 18:02:33 -04:00
GPU/DMA: Partially implemented the 'enable_2d' bit in the DMA engine. When not set, this tells the GPU to only use the X size when performing a DMA copy. This is only implemented for linear->linear and tiled->tiled copies. Conversion copies still retain the assert. This bit is unset by some games for various purposes, and by nouveau when copying the vertex buffers. 2018-09-08 17:02:16 -04:00			`ASSERT(regs.exec.enable_2d == 1);`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {`
			`ASSERT(regs.src_params.size_z == 1);`
			`// If the input is tiled and the output is linear, deswizzle the input and copy it over.`
			`const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;`
			`const std::size_t src_size = Texture::CalculateSize(`
			`true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,`
			`regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`const std::size_t dst_size = regs.dst_pitch * regs.y_count;`
video_core: Refactor to use MemoryManager interface for all memory access. # Conflicts: # src/video_core/engines/kepler_memory.cpp # src/video_core/engines/maxwell_3d.cpp # src/video_core/morton.cpp # src/video_core/morton.h # src/video_core/renderer_opengl/gl_global_cache.cpp # src/video_core/renderer_opengl/gl_global_cache.h # src/video_core/renderer_opengl/gl_rasterizer_cache.cpp 2019-02-24 00:15:35 -05:00
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`if (read_buffer.size() < src_size) {`
			`read_buffer.resize(src_size);`
			`}`
maxwell_dma: Check for valid source in destination before copy. - Avoid a crash in Octopath Traveler. 2019-03-09 14:36:52 -05:00
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`if (write_buffer.size() < dst_size) {`
			`write_buffer.resize(dst_size);`
			`}`
maxwell_dma: Check for valid source in destination before copy. - Avoid a crash in Octopath Traveler. 2019-03-09 14:36:52 -05:00
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`memory_manager.ReadBlock(source, read_buffer.data(), src_size);`
			`memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,`
			`regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),`
			`write_buffer.data(), regs.src_params.BlockHeight(),`
			`regs.src_params.pos_x, regs.src_params.pos_y);`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);`
			`} else {`
surface: Correct format S8Z24 2019-06-14 16:40:04 -04:00			`ASSERT(regs.dst_params.BlockDepth() == 0);`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`const std::size_t dst_size = Texture::CalculateSize(`
			`true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,`
			`regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`const std::size_t dst_layer_size = Texture::CalculateSize(`
			`true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,`
			`regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`const std::size_t src_size = regs.src_pitch * regs.y_count;`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`if (read_buffer.size() < src_size) {`
			`read_buffer.resize(src_size);`
			`}`

			`if (write_buffer.size() < dst_size) {`
			`write_buffer.resize(dst_size);`
			`}`

			`memory_manager.ReadBlock(source, read_buffer.data(), src_size);`
			`memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00
GPU: Partially implemented the Maxwell DMA engine. Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported. 2018-06-10 18:02:33 -04:00			`// If the input is linear and the output is tiled, swizzle the input and copy it over.`
GPU: Improved implementation of maxwell DMA (Subv). 2018-10-17 21:29:10 -04:00			`Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,`
Fixes and Corrections to DMA Engine 2019-04-23 12:41:55 -04:00			`src_bytes_per_pixel,`
			`write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,`
			`read_buffer.data(), regs.dst_params.BlockHeight());`

			`memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);`
GPU: Partially implemented the Maxwell DMA engine. Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported. 2018-06-10 18:02:33 -04:00			`}`
			`}`

engines/maxwell_*: Use nested namespace specifiers where applicable These three source files are the only ones within the engines directory that don't use nested namespaces. We may as well change these over to keep things consistent. 2018-10-20 15:58:06 -04:00			`} // namespace Tegra::Engines`