mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2024-11-24 12:26:26 -05:00
Merge pull request #2306 from ReinUsesLisp/aoffi
shader_ir: Implement AOFFI for TEX and TLD4
This commit is contained in:
commit
8aaf418bd6
4 changed files with 207 additions and 73 deletions
|
@ -21,6 +21,8 @@
|
|||
|
||||
namespace OpenGL::GLShader {
|
||||
|
||||
namespace {
|
||||
|
||||
using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::AttributeUse;
|
||||
using Tegra::Shader::Header;
|
||||
|
@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
|||
using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
|
||||
using Operation = const OperationNode&;
|
||||
|
||||
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
|
||||
|
||||
struct TextureAoffi {};
|
||||
using TextureArgument = std::pair<Type, Node>;
|
||||
using TextureIR = std::variant<TextureAoffi, TextureArgument>;
|
||||
|
||||
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
||||
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
|
||||
constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
|
||||
static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
|
||||
|
||||
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
|
||||
|
||||
class ShaderWriter {
|
||||
public:
|
||||
void AddExpression(std::string_view text) {
|
||||
|
@ -91,7 +97,7 @@ private:
|
|||
};
|
||||
|
||||
/// Generates code to use for a swizzle operation.
|
||||
static std::string GetSwizzle(u32 elem) {
|
||||
std::string GetSwizzle(u32 elem) {
|
||||
ASSERT(elem <= 3);
|
||||
std::string swizzle = ".";
|
||||
swizzle += "xyzw"[elem];
|
||||
|
@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) {
|
|||
}
|
||||
|
||||
/// Translate topology
|
||||
static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
|
||||
std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
|
||||
switch (topology) {
|
||||
case Tegra::Shader::OutputTopology::PointList:
|
||||
return "points";
|
||||
|
@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
|
|||
}
|
||||
|
||||
/// Returns true if an object has to be treated as precise
|
||||
static bool IsPrecise(Operation operand) {
|
||||
bool IsPrecise(Operation operand) {
|
||||
const auto& meta = operand.GetMeta();
|
||||
|
||||
if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
|
||||
|
@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool IsPrecise(Node node) {
|
||||
bool IsPrecise(Node node) {
|
||||
if (const auto operation = std::get_if<OperationNode>(node)) {
|
||||
return IsPrecise(*operation);
|
||||
}
|
||||
|
@ -723,8 +729,8 @@ private:
|
|||
result_type));
|
||||
}
|
||||
|
||||
std::string GenerateTexture(Operation operation, const std::string& func,
|
||||
const std::vector<std::pair<Type, Node>>& extras) {
|
||||
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
|
||||
const std::vector<TextureIR>& extras) {
|
||||
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
|
||||
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
|
@ -734,11 +740,11 @@ private:
|
|||
const bool has_array = meta->sampler.IsArray();
|
||||
const bool has_shadow = meta->sampler.IsShadow();
|
||||
|
||||
std::string expr = func;
|
||||
expr += '(';
|
||||
expr += GetSampler(meta->sampler);
|
||||
expr += ", ";
|
||||
|
||||
std::string expr = "texture" + function_suffix;
|
||||
if (!meta->aoffi.empty()) {
|
||||
expr += "Offset";
|
||||
}
|
||||
expr += '(' + GetSampler(meta->sampler) + ", ";
|
||||
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
|
||||
expr += '(';
|
||||
for (std::size_t i = 0; i < count; ++i) {
|
||||
|
@ -756,38 +762,76 @@ private:
|
|||
}
|
||||
expr += ')';
|
||||
|
||||
for (const auto& extra_pair : extras) {
|
||||
const auto [type, operand] = extra_pair;
|
||||
if (operand == nullptr) {
|
||||
continue;
|
||||
}
|
||||
expr += ", ";
|
||||
|
||||
switch (type) {
|
||||
case Type::Int:
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
|
||||
// Inline the string as an immediate integer in GLSL (some extra arguments are
|
||||
// required to be constant)
|
||||
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
|
||||
} else {
|
||||
expr += "ftoi(" + Visit(operand) + ')';
|
||||
}
|
||||
break;
|
||||
case Type::Float:
|
||||
expr += Visit(operand);
|
||||
break;
|
||||
default: {
|
||||
const auto type_int = static_cast<u32>(type);
|
||||
UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
|
||||
expr += '0';
|
||||
break;
|
||||
}
|
||||
for (const auto& variant : extras) {
|
||||
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
|
||||
expr += GenerateTextureArgument(*argument);
|
||||
} else if (std::get_if<TextureAoffi>(&variant)) {
|
||||
expr += GenerateTextureAoffi(meta->aoffi);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
return expr + ')';
|
||||
}
|
||||
|
||||
std::string GenerateTextureArgument(TextureArgument argument) {
|
||||
const auto [type, operand] = argument;
|
||||
if (operand == nullptr) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string expr = ", ";
|
||||
switch (type) {
|
||||
case Type::Int:
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
|
||||
// Inline the string as an immediate integer in GLSL (some extra arguments are
|
||||
// required to be constant)
|
||||
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
|
||||
} else {
|
||||
expr += "ftoi(" + Visit(operand) + ')';
|
||||
}
|
||||
break;
|
||||
case Type::Float:
|
||||
expr += Visit(operand);
|
||||
break;
|
||||
default: {
|
||||
const auto type_int = static_cast<u32>(type);
|
||||
UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
|
||||
expr += '0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
|
||||
if (aoffi.empty()) {
|
||||
return {};
|
||||
}
|
||||
constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
|
||||
std::string expr = ", ";
|
||||
expr += coord_constructors.at(aoffi.size() - 1);
|
||||
expr += '(';
|
||||
|
||||
for (std::size_t index = 0; index < aoffi.size(); ++index) {
|
||||
const auto operand{aoffi.at(index)};
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
|
||||
// Inline the string as an immediate integer in GLSL (AOFFI arguments are required
|
||||
// to be constant by the standard).
|
||||
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
|
||||
} else {
|
||||
expr += "ftoi(" + Visit(operand) + ')';
|
||||
}
|
||||
if (index + 1 < aoffi.size()) {
|
||||
expr += ", ";
|
||||
}
|
||||
}
|
||||
expr += ')';
|
||||
|
||||
return expr;
|
||||
}
|
||||
|
||||
std::string Assign(Operation operation) {
|
||||
const Node dest = operation[0];
|
||||
const Node src = operation[1];
|
||||
|
@ -1164,7 +1208,8 @@ private:
|
|||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
|
||||
std::string expr = GenerateTexture(
|
||||
operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
|
||||
if (meta->sampler.IsShadow()) {
|
||||
expr = "vec4(" + expr + ')';
|
||||
}
|
||||
|
@ -1175,7 +1220,8 @@ private:
|
|||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
|
||||
std::string expr = GenerateTexture(
|
||||
operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
|
||||
if (meta->sampler.IsShadow()) {
|
||||
expr = "vec4(" + expr + ')';
|
||||
}
|
||||
|
@ -1187,7 +1233,8 @@ private:
|
|||
ASSERT(meta);
|
||||
|
||||
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
|
||||
return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
|
||||
return GenerateTexture(operation, "Gather",
|
||||
{TextureArgument{type, meta->component}, TextureAoffi{}}) +
|
||||
GetSwizzle(meta->element);
|
||||
}
|
||||
|
||||
|
@ -1217,8 +1264,8 @@ private:
|
|||
ASSERT(meta);
|
||||
|
||||
if (meta->element < 2) {
|
||||
return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
|
||||
" * vec2(256))" + GetSwizzle(meta->element) + "))";
|
||||
return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
|
||||
GetSwizzle(meta->element) + "))";
|
||||
}
|
||||
return "0";
|
||||
}
|
||||
|
@ -1571,6 +1618,8 @@ private:
|
|||
ShaderWriter code;
|
||||
};
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
std::string GetCommonDeclarations() {
|
||||
const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
|
||||
const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
|
||||
|
|
|
@ -7,7 +7,9 @@
|
|||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
|
@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
|||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::TEX: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
|
||||
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const TextureType texture_type{instr.tex.texture_type};
|
||||
const bool is_array = instr.tex.array != 0;
|
||||
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.tex.GetTextureProcessMode();
|
||||
WriteTexInstructionFloat(
|
||||
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
|
||||
bb, instr,
|
||||
GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TEXS: {
|
||||
|
@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
|||
}
|
||||
case OpCode::Id::TLD4: {
|
||||
ASSERT(instr.tld4.array == 0);
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
|
||||
|
@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
|||
const auto texture_type = instr.tld4.texture_type.Value();
|
||||
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
||||
const bool is_array = instr.tld4.array != 0;
|
||||
WriteTexInstructionFloat(bb, instr,
|
||||
GetTld4Code(instr, texture_type, depth_compare, is_array));
|
||||
const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||
WriteTexInstructionFloat(
|
||||
bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4S: {
|
||||
|
@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
|||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
|
@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
|||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
|
||||
SetTemporal(bb, indexer++, value);
|
||||
|
@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
|||
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
|
||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||
SetTemporal(bb, element, value);
|
||||
}
|
||||
|
@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
|||
|
||||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, std::vector<Node> coords,
|
||||
Node array, Node depth_compare, u32 bias_offset) {
|
||||
Node array, Node depth_compare, u32 bias_offset,
|
||||
std::vector<Node> aoffi) {
|
||||
const bool is_array = array;
|
||||
const bool is_shadow = depth_compare;
|
||||
|
||||
|
@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
|||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto copy_coords = coords;
|
||||
MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
|
||||
MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
|
||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||
}
|
||||
|
||||
|
@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
|||
}
|
||||
|
||||
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array,
|
||||
bool is_aoffi) {
|
||||
const bool lod_bias_enabled{
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
|
||||
|
||||
u64 parameter_register = instr.gpr20.Value();
|
||||
if (lod_bias_enabled) {
|
||||
++parameter_register;
|
||||
}
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
|
||||
|
@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
|||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
std::vector<Node> aoffi;
|
||||
if (is_aoffi) {
|
||||
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
|
||||
}
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
dc = GetRegister(parameter_register++);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
|
@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
|||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||
bool is_array) {
|
||||
bool is_array, bool is_aoffi) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
|
||||
|
@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
|
|||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (size_t i = 0; i < coord_count; ++i)
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
}
|
||||
|
||||
u64 parameter_register = instr.gpr20.Value();
|
||||
std::vector<Node> aoffi;
|
||||
if (is_aoffi) {
|
||||
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
|
||||
}
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
dc = GetRegister(parameter_register++);
|
||||
}
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
|
||||
MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
|
@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
|
|||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
|
||||
MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
return values;
|
||||
|
@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
|
|||
return {coord_count, total_coord_count};
|
||||
}
|
||||
|
||||
std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
|
||||
bool is_tld4) {
|
||||
const auto [coord_offsets, size, wrap_value,
|
||||
diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
|
||||
if (is_tld4) {
|
||||
return {{0, 8, 16}, 6, 32, 64};
|
||||
} else {
|
||||
return {{0, 4, 8}, 4, 8, 16};
|
||||
}
|
||||
}();
|
||||
const u32 mask = (1U << size) - 1;
|
||||
|
||||
std::vector<Node> aoffi;
|
||||
aoffi.reserve(coord_count);
|
||||
|
||||
const auto aoffi_immediate{
|
||||
TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
|
||||
if (!aoffi_immediate) {
|
||||
// Variable access, not supported on AMD.
|
||||
LOG_WARNING(HW_GPU,
|
||||
"AOFFI constant folding failed, some hardware might have graphical issues");
|
||||
for (std::size_t coord = 0; coord < coord_count; ++coord) {
|
||||
const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
|
||||
const Node condition =
|
||||
Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
|
||||
const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
|
||||
aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
|
||||
}
|
||||
return aoffi;
|
||||
}
|
||||
|
||||
for (std::size_t coord = 0; coord < coord_count; ++coord) {
|
||||
s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
|
||||
if (value >= wrap_value) {
|
||||
value -= diff_value;
|
||||
}
|
||||
aoffi.push_back(Immediate(value));
|
||||
}
|
||||
return aoffi;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
|
@ -7,6 +7,7 @@
|
|||
#include <array>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
|
@ -290,6 +291,7 @@ struct MetaTexture {
|
|||
const Sampler& sampler;
|
||||
Node array{};
|
||||
Node depth_compare{};
|
||||
std::vector<Node> aoffi;
|
||||
Node bias{};
|
||||
Node lod{};
|
||||
Node component{};
|
||||
|
@ -741,14 +743,14 @@ private:
|
|||
|
||||
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
|
||||
bool is_array);
|
||||
bool is_array, bool is_aoffi);
|
||||
|
||||
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
|
||||
bool is_array);
|
||||
|
||||
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
bool depth_compare, bool is_array);
|
||||
bool depth_compare, bool is_array, bool is_aoffi);
|
||||
|
||||
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
bool is_array);
|
||||
|
@ -757,9 +759,11 @@ private:
|
|||
Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
|
||||
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
|
||||
|
||||
std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
|
||||
|
||||
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
|
||||
Node array, Node depth_compare, u32 bias_offset);
|
||||
Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
|
||||
|
||||
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
|
||||
u64 byte_height);
|
||||
|
@ -773,6 +777,8 @@ private:
|
|||
|
||||
Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
|
||||
|
||||
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
|
||||
|
||||
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
|
||||
|
||||
template <typename... T>
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <utility>
|
||||
#include <variant>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
@ -14,7 +15,7 @@ namespace {
|
|||
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
|
||||
OperationCode operation_code) {
|
||||
for (; cursor >= 0; --cursor) {
|
||||
const Node node = code[cursor];
|
||||
const Node node = code.at(cursor);
|
||||
if (const auto operation = std::get_if<OperationNode>(node)) {
|
||||
if (operation->GetCode() == operation_code)
|
||||
return {node, cursor};
|
||||
|
@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
|
||||
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
|
||||
// that it uses as operand
|
||||
const auto [found, found_cursor] =
|
||||
TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
|
||||
if (!found) {
|
||||
return {};
|
||||
}
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(found)) {
|
||||
return immediate->GetValue();
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
|
||||
s64 cursor) {
|
||||
for (; cursor >= 0; --cursor) {
|
||||
|
|
Loading…
Reference in a new issue