mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2024-11-25 04:46:27 -05:00
4a493cb10f
Fix some typos reported by Lintian
494 lines
17 KiB
C++
494 lines
17 KiB
C++
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include <algorithm>
|
|
#include <string>
|
|
#include <tuple>
|
|
|
|
#include "common/div_ceil.h"
|
|
#include "common/settings.h"
|
|
#include "shader_recompiler/backend/bindings.h"
|
|
#include "shader_recompiler/backend/glasm/emit_glasm.h"
|
|
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
|
|
#include "shader_recompiler/backend/glasm/glasm_emit_context.h"
|
|
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
|
#include "shader_recompiler/frontend/ir/program.h"
|
|
#include "shader_recompiler/profile.h"
|
|
#include "shader_recompiler/runtime_info.h"
|
|
|
|
namespace Shader::Backend::GLASM {
|
|
namespace {
|
|
template <class Func>
|
|
struct FuncTraits {};
|
|
|
|
template <class ReturnType_, class... Args>
|
|
struct FuncTraits<ReturnType_ (*)(Args...)> {
|
|
using ReturnType = ReturnType_;
|
|
|
|
static constexpr size_t NUM_ARGS = sizeof...(Args);
|
|
|
|
template <size_t I>
|
|
using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
|
|
};
|
|
|
|
template <typename T>
|
|
struct Identity {
|
|
Identity(T data_) : data{data_} {}
|
|
|
|
T Extract() {
|
|
return data;
|
|
}
|
|
|
|
T data;
|
|
};
|
|
|
|
template <bool scalar>
|
|
class RegWrapper {
|
|
public:
|
|
RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} {
|
|
const Value value{reg_alloc.Peek(ir_value)};
|
|
if (value.type == Type::Register) {
|
|
inst = ir_value.InstRecursive();
|
|
reg = Register{value};
|
|
} else {
|
|
reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg();
|
|
}
|
|
switch (value.type) {
|
|
case Type::Register:
|
|
case Type::Void:
|
|
break;
|
|
case Type::U32:
|
|
ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32);
|
|
break;
|
|
case Type::U64:
|
|
ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64);
|
|
break;
|
|
}
|
|
}
|
|
|
|
auto Extract() {
|
|
if (inst) {
|
|
reg_alloc.Unref(*inst);
|
|
} else {
|
|
reg_alloc.FreeReg(reg);
|
|
}
|
|
return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}};
|
|
}
|
|
|
|
private:
|
|
RegAlloc& reg_alloc;
|
|
IR::Inst* inst{};
|
|
Register reg{};
|
|
};
|
|
|
|
template <typename ArgType>
|
|
class ValueWrapper {
|
|
public:
|
|
ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_)
|
|
: reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {}
|
|
|
|
ArgType Extract() {
|
|
if (!ir_value.IsImmediate()) {
|
|
reg_alloc.Unref(*ir_value.InstRecursive());
|
|
}
|
|
return value;
|
|
}
|
|
|
|
private:
|
|
RegAlloc& reg_alloc;
|
|
const IR::Value& ir_value;
|
|
ArgType value;
|
|
};
|
|
|
|
template <typename ArgType>
|
|
auto Arg(EmitContext& ctx, const IR::Value& arg) {
|
|
if constexpr (std::is_same_v<ArgType, Register>) {
|
|
return RegWrapper<false>{ctx, arg};
|
|
} else if constexpr (std::is_same_v<ArgType, ScalarRegister>) {
|
|
return RegWrapper<true>{ctx, arg};
|
|
} else if constexpr (std::is_base_of_v<Value, ArgType>) {
|
|
return ValueWrapper<ArgType>{ctx, arg};
|
|
} else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
|
|
return Identity<const IR::Value&>{arg};
|
|
} else if constexpr (std::is_same_v<ArgType, u32>) {
|
|
return Identity{arg.U32()};
|
|
} else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
|
|
return Identity{arg.Attribute()};
|
|
} else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
|
|
return Identity{arg.Patch()};
|
|
} else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
|
|
return Identity{arg.Reg()};
|
|
}
|
|
}
|
|
|
|
template <auto func, bool is_first_arg_inst>
|
|
struct InvokeCall {
|
|
template <typename... Args>
|
|
InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) {
|
|
if constexpr (is_first_arg_inst) {
|
|
func(ctx, *inst, args.Extract()...);
|
|
} else {
|
|
func(ctx, args.Extract()...);
|
|
}
|
|
}
|
|
};
|
|
|
|
template <auto func, bool is_first_arg_inst, size_t... I>
|
|
void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
|
|
using Traits = FuncTraits<decltype(func)>;
|
|
if constexpr (is_first_arg_inst) {
|
|
InvokeCall<func, is_first_arg_inst>{
|
|
ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...};
|
|
} else {
|
|
InvokeCall<func, is_first_arg_inst>{
|
|
ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...};
|
|
}
|
|
}
|
|
|
|
template <auto func>
|
|
void Invoke(EmitContext& ctx, IR::Inst* inst) {
|
|
using Traits = FuncTraits<decltype(func)>;
|
|
static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
|
|
if constexpr (Traits::NUM_ARGS == 1) {
|
|
Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
|
|
} else {
|
|
using FirstArgType = typename Traits::template ArgType<1>;
|
|
static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
|
|
using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
|
|
Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
|
|
}
|
|
}
|
|
|
|
void EmitInst(EmitContext& ctx, IR::Inst* inst) {
|
|
switch (inst->GetOpcode()) {
|
|
#define OPCODE(name, result_type, ...) \
|
|
case IR::Opcode::name: \
|
|
return Invoke<&Emit##name>(ctx, inst);
|
|
#include "shader_recompiler/frontend/ir/opcodes.inc"
|
|
#undef OPCODE
|
|
}
|
|
throw LogicError("Invalid opcode {}", inst->GetOpcode());
|
|
}
|
|
|
|
bool IsReference(IR::Inst& inst) {
|
|
return inst.GetOpcode() == IR::Opcode::Reference;
|
|
}
|
|
|
|
void PrecolorInst(IR::Inst& phi) {
|
|
// Insert phi moves before references to avoid overwriting other phis
|
|
const size_t num_args{phi.NumArgs()};
|
|
for (size_t i = 0; i < num_args; ++i) {
|
|
IR::Block& phi_block{*phi.PhiBlock(i)};
|
|
auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
|
|
IR::IREmitter ir{phi_block, it};
|
|
const IR::Value arg{phi.Arg(i)};
|
|
if (arg.IsImmediate()) {
|
|
ir.PhiMove(phi, arg);
|
|
} else {
|
|
ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())});
|
|
}
|
|
}
|
|
for (size_t i = 0; i < num_args; ++i) {
|
|
IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
|
|
}
|
|
}
|
|
|
|
void Precolor(const IR::Program& program) {
|
|
for (IR::Block* const block : program.blocks) {
|
|
for (IR::Inst& phi : block->Instructions()) {
|
|
if (!IR::IsPhi(phi)) {
|
|
break;
|
|
}
|
|
PrecolorInst(phi);
|
|
}
|
|
}
|
|
}
|
|
|
|
void EmitCode(EmitContext& ctx, const IR::Program& program) {
|
|
const auto eval{
|
|
[&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }};
|
|
for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
|
|
switch (node.type) {
|
|
case IR::AbstractSyntaxNode::Type::Block:
|
|
for (IR::Inst& inst : node.data.block->Instructions()) {
|
|
EmitInst(ctx, &inst);
|
|
}
|
|
break;
|
|
case IR::AbstractSyntaxNode::Type::If:
|
|
ctx.Add("MOV.S.CC RC,{};"
|
|
"IF NE.x;",
|
|
eval(node.data.if_node.cond));
|
|
break;
|
|
case IR::AbstractSyntaxNode::Type::EndIf:
|
|
ctx.Add("ENDIF;");
|
|
break;
|
|
case IR::AbstractSyntaxNode::Type::Loop:
|
|
ctx.Add("REP;");
|
|
break;
|
|
case IR::AbstractSyntaxNode::Type::Repeat:
|
|
if (!Settings::values.disable_shader_loop_safety_checks) {
|
|
const u32 loop_index{ctx.num_safety_loop_vars++};
|
|
const u32 vector_index{loop_index / 4};
|
|
const char component{"xyzw"[loop_index % 4]};
|
|
ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;"
|
|
"BRK(LT.{});",
|
|
vector_index, component, vector_index, component, component);
|
|
}
|
|
if (node.data.repeat.cond.IsImmediate()) {
|
|
if (node.data.repeat.cond.U1()) {
|
|
ctx.Add("ENDREP;");
|
|
} else {
|
|
ctx.Add("BRK;"
|
|
"ENDREP;");
|
|
}
|
|
} else {
|
|
ctx.Add("MOV.S.CC RC,{};"
|
|
"BRK(EQ.x);"
|
|
"ENDREP;",
|
|
eval(node.data.repeat.cond));
|
|
}
|
|
break;
|
|
case IR::AbstractSyntaxNode::Type::Break:
|
|
if (node.data.break_node.cond.IsImmediate()) {
|
|
if (node.data.break_node.cond.U1()) {
|
|
ctx.Add("BRK;");
|
|
}
|
|
} else {
|
|
ctx.Add("MOV.S.CC RC,{};"
|
|
"BRK (NE.x);",
|
|
eval(node.data.break_node.cond));
|
|
}
|
|
break;
|
|
case IR::AbstractSyntaxNode::Type::Return:
|
|
case IR::AbstractSyntaxNode::Type::Unreachable:
|
|
ctx.Add("RET;");
|
|
break;
|
|
}
|
|
}
|
|
if (!ctx.reg_alloc.IsEmpty()) {
|
|
LOG_WARNING(Shader_GLASM, "Register leak after generating code");
|
|
}
|
|
}
|
|
|
|
void SetupOptions(const IR::Program& program, const Profile& profile,
|
|
const RuntimeInfo& runtime_info, std::string& header) {
|
|
const Info& info{program.info};
|
|
const Stage stage{program.stage};
|
|
|
|
// TODO: Track the shared atomic ops
|
|
header += "OPTION NV_internal;"
|
|
"OPTION NV_shader_storage_buffer;"
|
|
"OPTION NV_gpu_program_fp64;";
|
|
if (info.uses_int64_bit_atomics) {
|
|
header += "OPTION NV_shader_atomic_int64;";
|
|
}
|
|
if (info.uses_atomic_f32_add) {
|
|
header += "OPTION NV_shader_atomic_float;";
|
|
}
|
|
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
|
|
header += "OPTION NV_shader_atomic_fp16_vector;";
|
|
}
|
|
if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
|
|
info.uses_fswzadd) {
|
|
header += "OPTION NV_shader_thread_group;";
|
|
}
|
|
if (info.uses_subgroup_shuffles) {
|
|
header += "OPTION NV_shader_thread_shuffle;";
|
|
}
|
|
if (info.uses_sparse_residency) {
|
|
header += "OPTION EXT_sparse_texture2;";
|
|
}
|
|
const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] ||
|
|
info.stores[IR::Attribute::Layer]};
|
|
if ((stage != Stage::Geometry && stores_viewport_layer) ||
|
|
info.stores[IR::Attribute::ViewportMask]) {
|
|
if (profile.support_viewport_index_layer_non_geometry) {
|
|
header += "OPTION NV_viewport_array2;";
|
|
}
|
|
}
|
|
if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) {
|
|
header += "OPTION NV_geometry_shader_passthrough;";
|
|
}
|
|
if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
|
|
header += "OPTION EXT_shader_image_load_formatted;";
|
|
}
|
|
if (profile.support_derivative_control) {
|
|
header += "OPTION ARB_derivative_control;";
|
|
}
|
|
if (stage == Stage::Fragment && runtime_info.force_early_z != 0) {
|
|
header += "OPTION NV_early_fragment_tests;";
|
|
}
|
|
if (stage == Stage::Fragment) {
|
|
header += "OPTION ARB_draw_buffers;";
|
|
}
|
|
}
|
|
|
|
std::string_view StageHeader(Stage stage) {
|
|
switch (stage) {
|
|
case Stage::VertexA:
|
|
case Stage::VertexB:
|
|
return "!!NVvp5.0\n";
|
|
case Stage::TessellationControl:
|
|
return "!!NVtcp5.0\n";
|
|
case Stage::TessellationEval:
|
|
return "!!NVtep5.0\n";
|
|
case Stage::Geometry:
|
|
return "!!NVgp5.0\n";
|
|
case Stage::Fragment:
|
|
return "!!NVfp5.0\n";
|
|
case Stage::Compute:
|
|
return "!!NVcp5.0\n";
|
|
}
|
|
throw InvalidArgument("Invalid stage {}", stage);
|
|
}
|
|
|
|
std::string_view InputPrimitive(InputTopology topology) {
|
|
switch (topology) {
|
|
case InputTopology::Points:
|
|
return "POINTS";
|
|
case InputTopology::Lines:
|
|
return "LINES";
|
|
case InputTopology::LinesAdjacency:
|
|
return "LINES_ADJACENCY";
|
|
case InputTopology::Triangles:
|
|
return "TRIANGLES";
|
|
case InputTopology::TrianglesAdjacency:
|
|
return "TRIANGLES_ADJACENCY";
|
|
}
|
|
throw InvalidArgument("Invalid input topology {}", topology);
|
|
}
|
|
|
|
std::string_view OutputPrimitive(OutputTopology topology) {
|
|
switch (topology) {
|
|
case OutputTopology::PointList:
|
|
return "POINTS";
|
|
case OutputTopology::LineStrip:
|
|
return "LINE_STRIP";
|
|
case OutputTopology::TriangleStrip:
|
|
return "TRIANGLE_STRIP";
|
|
}
|
|
throw InvalidArgument("Invalid output topology {}", topology);
|
|
}
|
|
|
|
std::string_view GetTessMode(TessPrimitive primitive) {
|
|
switch (primitive) {
|
|
case TessPrimitive::Triangles:
|
|
return "TRIANGLES";
|
|
case TessPrimitive::Quads:
|
|
return "QUADS";
|
|
case TessPrimitive::Isolines:
|
|
return "ISOLINES";
|
|
}
|
|
throw InvalidArgument("Invalid tessellation primitive {}", primitive);
|
|
}
|
|
|
|
std::string_view GetTessSpacing(TessSpacing spacing) {
|
|
switch (spacing) {
|
|
case TessSpacing::Equal:
|
|
return "EQUAL";
|
|
case TessSpacing::FractionalOdd:
|
|
return "FRACTIONAL_ODD";
|
|
case TessSpacing::FractionalEven:
|
|
return "FRACTIONAL_EVEN";
|
|
}
|
|
throw InvalidArgument("Invalid tessellation spacing {}", spacing);
|
|
}
|
|
} // Anonymous namespace
|
|
|
|
std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
|
|
Bindings& bindings) {
|
|
EmitContext ctx{program, bindings, profile, runtime_info};
|
|
Precolor(program);
|
|
EmitCode(ctx, program);
|
|
std::string header{StageHeader(program.stage)};
|
|
SetupOptions(program, profile, runtime_info, header);
|
|
switch (program.stage) {
|
|
case Stage::TessellationControl:
|
|
header += fmt::format("VERTICES_OUT {};", program.invocations);
|
|
break;
|
|
case Stage::TessellationEval:
|
|
header += fmt::format("TESS_MODE {};"
|
|
"TESS_SPACING {};"
|
|
"TESS_VERTEX_ORDER {};",
|
|
GetTessMode(runtime_info.tess_primitive),
|
|
GetTessSpacing(runtime_info.tess_spacing),
|
|
runtime_info.tess_clockwise ? "CW" : "CCW");
|
|
break;
|
|
case Stage::Geometry:
|
|
header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology));
|
|
if (program.is_geometry_passthrough) {
|
|
if (profile.support_geometry_shader_passthrough) {
|
|
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
|
|
if (program.info.passthrough.Generic(index)) {
|
|
header += fmt::format("PASSTHROUGH result.attrib[{}];", index);
|
|
}
|
|
}
|
|
if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
|
|
header += "PASSTHROUGH result.position;";
|
|
}
|
|
} else {
|
|
LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported");
|
|
}
|
|
} else {
|
|
header +=
|
|
fmt::format("VERTICES_OUT {};"
|
|
"PRIMITIVE_OUT {};",
|
|
program.output_vertices, OutputPrimitive(program.output_topology));
|
|
}
|
|
break;
|
|
case Stage::Compute:
|
|
header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0],
|
|
program.workgroup_size[1], program.workgroup_size[2]);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
if (program.shared_memory_size > 0) {
|
|
header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
|
|
header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
|
|
}
|
|
if (program.info.uses_rescaling_uniform) {
|
|
header += "PARAM scaling[1]={program.local[0..0]};";
|
|
}
|
|
header += "TEMP ";
|
|
for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
|
|
header += fmt::format("R{},", index);
|
|
}
|
|
if (program.local_memory_size > 0) {
|
|
header += fmt::format("lmem[{}],", program.local_memory_size);
|
|
}
|
|
if (program.info.uses_fswzadd) {
|
|
header += "FSWZA[4],FSWZB[4],";
|
|
}
|
|
const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)};
|
|
for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
|
|
header += fmt::format("loop{},", index);
|
|
}
|
|
header += "RC;"
|
|
"LONG TEMP ";
|
|
for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) {
|
|
header += fmt::format("D{},", index);
|
|
}
|
|
header += "DC;";
|
|
if (program.info.uses_fswzadd) {
|
|
header += "MOV.F FSWZA[0],-1;"
|
|
"MOV.F FSWZA[1],1;"
|
|
"MOV.F FSWZA[2],-1;"
|
|
"MOV.F FSWZA[3],0;"
|
|
"MOV.F FSWZB[0],-1;"
|
|
"MOV.F FSWZB[1],-1;"
|
|
"MOV.F FSWZB[2],1;"
|
|
"MOV.F FSWZB[3],-1;";
|
|
}
|
|
for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
|
|
header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index);
|
|
}
|
|
if (ctx.uses_y_direction) {
|
|
header += "PARAM y_direction[1]={state.material.front.ambient};";
|
|
}
|
|
ctx.code.insert(0, header);
|
|
ctx.code += "END";
|
|
return ctx.code;
|
|
}
|
|
|
|
} // namespace Shader::Backend::GLASM
|