suyu/src/video_core/shader/node.h
ReinUsesLisp e2b67a868b shader/other: Implement thread comparisons (NV_shader_thread_group)
Hardware S2R special registers match gl_Thread*MaskNV. We can trivially
implement these using Nvidia's extension on OpenGL or naively stubbing
them with the ARB instructions to match. This might cause issues if the
host device warp size doesn't match Nvidia's. That said, this is
unlikely on proper shaders.

Refer to the attached url for more documentation about these flags.
https://www.khronos.org/registry/OpenGL/extensions/NV/NV_shader_thread_group.txt
2020-05-21 23:18:37 -03:00

709 lines
23 KiB
C++

// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <memory>
#include <optional>
#include <string>
#include <tuple>
#include <utility>
#include <variant>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
namespace VideoCommon::Shader {
enum class OperationCode {
Assign, /// (float& dest, float src) -> void
Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
FAdd, /// (MetaArithmetic, float a, float b) -> float
FMul, /// (MetaArithmetic, float a, float b) -> float
FDiv, /// (MetaArithmetic, float a, float b) -> float
FFma, /// (MetaArithmetic, float a, float b, float c) -> float
FNegate, /// (MetaArithmetic, float a) -> float
FAbsolute, /// (MetaArithmetic, float a) -> float
FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float
FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float
FMin, /// (MetaArithmetic, float a, float b) -> float
FMax, /// (MetaArithmetic, float a, float b) -> float
FCos, /// (MetaArithmetic, float a) -> float
FSin, /// (MetaArithmetic, float a) -> float
FExp2, /// (MetaArithmetic, float a) -> float
FLog2, /// (MetaArithmetic, float a) -> float
FInverseSqrt, /// (MetaArithmetic, float a) -> float
FSqrt, /// (MetaArithmetic, float a) -> float
FRoundEven, /// (MetaArithmetic, float a) -> float
FFloor, /// (MetaArithmetic, float a) -> float
FCeil, /// (MetaArithmetic, float a) -> float
FTrunc, /// (MetaArithmetic, float a) -> float
FCastInteger, /// (MetaArithmetic, int a) -> float
FCastUInteger, /// (MetaArithmetic, uint a) -> float
FSwizzleAdd, /// (float a, float b, uint mask) -> float
IAdd, /// (MetaArithmetic, int a, int b) -> int
IMul, /// (MetaArithmetic, int a, int b) -> int
IDiv, /// (MetaArithmetic, int a, int b) -> int
INegate, /// (MetaArithmetic, int a) -> int
IAbsolute, /// (MetaArithmetic, int a) -> int
IMin, /// (MetaArithmetic, int a, int b) -> int
IMax, /// (MetaArithmetic, int a, int b) -> int
ICastFloat, /// (MetaArithmetic, float a) -> int
ICastUnsigned, /// (MetaArithmetic, uint a) -> int
ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int
ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int
IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int
IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int
IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int
IBitwiseNot, /// (MetaArithmetic, int a) -> int
IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
IBitCount, /// (MetaArithmetic, int) -> int
IBitMSB, /// (MetaArithmetic, int) -> int
UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
UMul, /// (MetaArithmetic, uint a, uint b) -> uint
UDiv, /// (MetaArithmetic, uint a, uint b) -> uint
UMin, /// (MetaArithmetic, uint a, uint b) -> uint
UMax, /// (MetaArithmetic, uint a, uint b) -> uint
UCastFloat, /// (MetaArithmetic, float a) -> uint
UCastSigned, /// (MetaArithmetic, int a) -> uint
ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint
ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint
UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint
UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint
UBitwiseNot, /// (MetaArithmetic, uint a) -> uint
UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
UBitCount, /// (MetaArithmetic, uint) -> uint
UBitMSB, /// (MetaArithmetic, uint) -> uint
HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
HAbsolute, /// (f16vec2 a) -> f16vec2
HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
HMergeF32, /// (f16vec2 src) -> float
HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
HPack2, /// (float a, float b) -> f16vec2
LogicalAssign, /// (bool& dst, bool src) -> void
LogicalAnd, /// (bool a, bool b) -> bool
LogicalOr, /// (bool a, bool b) -> bool
LogicalXor, /// (bool a, bool b) -> bool
LogicalNegate, /// (bool a) -> bool
LogicalPick2, /// (bool2 pair, uint index) -> bool
LogicalAnd2, /// (bool2 a) -> bool
LogicalFOrdLessThan, /// (float a, float b) -> bool
LogicalFOrdEqual, /// (float a, float b) -> bool
LogicalFOrdLessEqual, /// (float a, float b) -> bool
LogicalFOrdGreaterThan, /// (float a, float b) -> bool
LogicalFOrdNotEqual, /// (float a, float b) -> bool
LogicalFOrdGreaterEqual, /// (float a, float b) -> bool
LogicalFOrdered, /// (float a, float b) -> bool
LogicalFUnordered, /// (float a, float b) -> bool
LogicalFUnordLessThan, /// (float a, float b) -> bool
LogicalFUnordEqual, /// (float a, float b) -> bool
LogicalFUnordLessEqual, /// (float a, float b) -> bool
LogicalFUnordGreaterThan, /// (float a, float b) -> bool
LogicalFUnordNotEqual, /// (float a, float b) -> bool
LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
LogicalILessThan, /// (int a, int b) -> bool
LogicalIEqual, /// (int a, int b) -> bool
LogicalILessEqual, /// (int a, int b) -> bool
LogicalIGreaterThan, /// (int a, int b) -> bool
LogicalINotEqual, /// (int a, int b) -> bool
LogicalIGreaterEqual, /// (int a, int b) -> bool
LogicalULessThan, /// (uint a, uint b) -> bool
LogicalUEqual, /// (uint a, uint b) -> bool
LogicalULessEqual, /// (uint a, uint b) -> bool
LogicalUGreaterThan, /// (uint a, uint b) -> bool
LogicalUNotEqual, /// (uint a, uint b) -> bool
LogicalUGreaterEqual, /// (uint a, uint b) -> bool
LogicalAddCarry, /// (uint a, uint b) -> bool
Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Texture, /// (MetaTexture, float[N] coords) -> float4
TextureLod, /// (MetaTexture, float[N] coords) -> float4
TextureGather, /// (MetaTexture, float[N] coords) -> float4
TextureQueryDimensions, /// (MetaTexture, float a) -> float4
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
TexelFetch, /// (MetaTexture, int[N], int) -> float4
TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
ImageLoad, /// (MetaImage, int[N] coords) -> void
ImageStore, /// (MetaImage, int[N] coords) -> void
AtomicImageAdd, /// (MetaImage, int[N] coords) -> void
AtomicImageAnd, /// (MetaImage, int[N] coords) -> void
AtomicImageOr, /// (MetaImage, int[N] coords) -> void
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
AtomicUExchange, /// (memory, uint) -> uint
AtomicUAdd, /// (memory, uint) -> uint
AtomicUMin, /// (memory, uint) -> uint
AtomicUMax, /// (memory, uint) -> uint
AtomicUAnd, /// (memory, uint) -> uint
AtomicUOr, /// (memory, uint) -> uint
AtomicUXor, /// (memory, uint) -> uint
AtomicIExchange, /// (memory, int) -> int
AtomicIAdd, /// (memory, int) -> int
AtomicIMin, /// (memory, int) -> int
AtomicIMax, /// (memory, int) -> int
AtomicIAnd, /// (memory, int) -> int
AtomicIOr, /// (memory, int) -> int
AtomicIXor, /// (memory, int) -> int
ReduceUAdd, /// (memory, uint) -> void
ReduceUMin, /// (memory, uint) -> void
ReduceUMax, /// (memory, uint) -> void
ReduceUAnd, /// (memory, uint) -> void
ReduceUOr, /// (memory, uint) -> void
ReduceUXor, /// (memory, uint) -> void
ReduceIAdd, /// (memory, int) -> void
ReduceIMin, /// (memory, int) -> void
ReduceIMax, /// (memory, int) -> void
ReduceIAnd, /// (memory, int) -> void
ReduceIOr, /// (memory, int) -> void
ReduceIXor, /// (memory, int) -> void
Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void
PopFlowStack, /// () -> void
Exit, /// () -> void
Discard, /// () -> void
EmitVertex, /// () -> void
EndPrimitive, /// () -> void
InvocationId, /// () -> int
YNegate, /// () -> float
LocalInvocationIdX, /// () -> uint
LocalInvocationIdY, /// () -> uint
LocalInvocationIdZ, /// () -> uint
WorkGroupIdX, /// () -> uint
WorkGroupIdY, /// () -> uint
WorkGroupIdZ, /// () -> uint
BallotThread, /// (bool) -> uint
VoteAll, /// (bool) -> bool
VoteAny, /// (bool) -> bool
VoteEqual, /// (bool) -> bool
ThreadId, /// () -> uint
ThreadEqMask, /// () -> uint
ThreadGeMask, /// () -> uint
ThreadGtMask, /// () -> uint
ThreadLeMask, /// () -> uint
ThreadLtMask, /// () -> uint
ShuffleIndexed, /// (uint value, uint index) -> uint
MemoryBarrierGL, /// () -> void
Amount,
};
enum class InternalFlag {
Zero = 0,
Sign = 1,
Carry = 2,
Overflow = 3,
Amount = 4,
};
enum class MetaStackClass {
Ssy,
Pbk,
};
class OperationNode;
class ConditionalNode;
class GprNode;
class CustomVarNode;
class ImmediateNode;
class InternalFlagNode;
class PredicateNode;
class AbufNode;
class CbufNode;
class LmemNode;
class PatchNode;
class SmemNode;
class GmemNode;
class CommentNode;
using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
LmemNode, SmemNode, GmemNode, CommentNode>;
using Node = std::shared_ptr<NodeData>;
using Node4 = std::array<Node, 4>;
using NodeBlock = std::vector<Node>;
class BindlessSamplerNode;
class ArraySamplerNode;
using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
using TrackSampler = std::shared_ptr<TrackSamplerData>;
struct Sampler {
/// Bound samplers constructor
constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
: index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
is_buffer{is_buffer}, is_indexed{is_indexed} {}
/// Bindless samplers constructor
constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
: index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
u32 index = 0; ///< Emulated index given for the this sampler.
u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
u32 size = 1; ///< Size of the sampler.
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
};
/// Represents a tracked bindless sampler into a direct const buffer
class ArraySamplerNode final {
public:
explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
: index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
constexpr u32 GetIndex() const {
return index;
}
constexpr u32 GetBaseOffset() const {
return base_offset;
}
constexpr u32 GetIndexVar() const {
return bindless_var;
}
private:
u32 index;
u32 base_offset;
u32 bindless_var;
};
/// Represents a tracked bindless sampler into a direct const buffer
class BindlessSamplerNode final {
public:
explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
constexpr u32 GetIndex() const {
return index;
}
constexpr u32 GetOffset() const {
return offset;
}
private:
u32 index;
u32 offset;
};
struct Image {
public:
/// Bound images constructor
constexpr explicit Image(u32 index, u32 offset, Tegra::Shader::ImageType type)
: index{index}, offset{offset}, type{type} {}
/// Bindless samplers constructor
constexpr explicit Image(u32 index, u32 offset, u32 buffer, Tegra::Shader::ImageType type)
: index{index}, offset{offset}, buffer{buffer}, type{type}, is_bindless{true} {}
void MarkWrite() {
is_written = true;
}
void MarkRead() {
is_read = true;
}
void MarkAtomic() {
MarkWrite();
MarkRead();
is_atomic = true;
}
u32 index = 0;
u32 offset = 0;
u32 buffer = 0;
Tegra::Shader::ImageType type{};
bool is_bindless = false;
bool is_written = false;
bool is_read = false;
bool is_atomic = false;
};
struct GlobalMemoryBase {
u32 cbuf_index = 0;
u32 cbuf_offset = 0;
bool operator<(const GlobalMemoryBase& rhs) const {
return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
}
};
/// Parameters describing an arithmetic operation
struct MetaArithmetic {
bool precise{}; ///< Whether the operation can be constraint or not
};
/// Parameters describing a texture sampler
struct MetaTexture {
Sampler sampler;
Node array;
Node depth_compare;
std::vector<Node> aoffi;
std::vector<Node> ptp;
std::vector<Node> derivates;
Node bias;
Node lod;
Node component;
u32 element{};
Node index;
};
struct MetaImage {
const Image& image;
std::vector<Node> values;
u32 element{};
};
/// Parameters that modify an operation but are not part of any particular operand
using Meta =
std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
class AmendNode {
public:
std::optional<std::size_t> GetAmendIndex() const {
if (amend_index == amend_null_index) {
return std::nullopt;
}
return {amend_index};
}
void SetAmendIndex(std::size_t index) {
amend_index = index;
}
void ClearAmend() {
amend_index = amend_null_index;
}
private:
static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
std::size_t amend_index{amend_null_index};
};
/// Holds any kind of operation that can be done in the IR
class OperationNode final : public AmendNode {
public:
explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
explicit OperationNode(OperationCode code, Meta meta)
: OperationNode(code, std::move(meta), std::vector<Node>{}) {}
explicit OperationNode(OperationCode code, std::vector<Node> operands)
: OperationNode(code, Meta{}, std::move(operands)) {}
explicit OperationNode(OperationCode code, Meta meta, std::vector<Node> operands)
: code{code}, meta{std::move(meta)}, operands{std::move(operands)} {}
template <typename... Args>
explicit OperationNode(OperationCode code, Meta meta, Args&&... operands)
: code{code}, meta{std::move(meta)}, operands{operands...} {}
OperationCode GetCode() const {
return code;
}
const Meta& GetMeta() const {
return meta;
}
std::size_t GetOperandsCount() const {
return operands.size();
}
const Node& operator[](std::size_t operand_index) const {
return operands.at(operand_index);
}
private:
OperationCode code{};
Meta meta{};
std::vector<Node> operands;
};
/// Encloses inside any kind of node that returns a boolean conditionally-executed code
class ConditionalNode final : public AmendNode {
public:
explicit ConditionalNode(Node condition, std::vector<Node>&& code)
: condition{std::move(condition)}, code{std::move(code)} {}
const Node& GetCondition() const {
return condition;
}
const std::vector<Node>& GetCode() const {
return code;
}
private:
Node condition; ///< Condition to be satisfied
std::vector<Node> code; ///< Code to execute
};
/// A general purpose register
class GprNode final {
public:
explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {}
u32 GetIndex() const {
return static_cast<u32>(index);
}
private:
Tegra::Shader::Register index{};
};
/// A custom variable
class CustomVarNode final {
public:
explicit constexpr CustomVarNode(u32 index) : index{index} {}
constexpr u32 GetIndex() const {
return index;
}
private:
u32 index{};
};
/// A 32-bits value that represents an immediate value
class ImmediateNode final {
public:
explicit constexpr ImmediateNode(u32 value) : value{value} {}
u32 GetValue() const {
return value;
}
private:
u32 value{};
};
/// One of Maxwell's internal flags
class InternalFlagNode final {
public:
explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {}
InternalFlag GetFlag() const {
return flag;
}
private:
InternalFlag flag{};
};
/// A predicate register, it can be negated without additional nodes
class PredicateNode final {
public:
explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated)
: index{index}, negated{negated} {}
Tegra::Shader::Pred GetIndex() const {
return index;
}
bool IsNegated() const {
return negated;
}
private:
Tegra::Shader::Pred index{};
bool negated{};
};
/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
class AbufNode final {
public:
// Initialize for standard attributes (index is explicit).
explicit AbufNode(Tegra::Shader::Attribute::Index index, u32 element, Node buffer = {})
: buffer{std::move(buffer)}, index{index}, element{element} {}
// Initialize for physical attributes (index is a variable value).
explicit AbufNode(Node physical_address, Node buffer = {})
: physical_address{std::move(physical_address)}, buffer{std::move(buffer)} {}
Tegra::Shader::Attribute::Index GetIndex() const {
return index;
}
u32 GetElement() const {
return element;
}
const Node& GetBuffer() const {
return buffer;
}
bool IsPhysicalBuffer() const {
return static_cast<bool>(physical_address);
}
const Node& GetPhysicalAddress() const {
return physical_address;
}
private:
Node physical_address;
Node buffer;
Tegra::Shader::Attribute::Index index{};
u32 element{};
};
/// Patch memory (used to communicate tessellation stages).
class PatchNode final {
public:
explicit PatchNode(u32 offset) : offset{offset} {}
u32 GetOffset() const {
return offset;
}
private:
u32 offset{};
};
/// Constant buffer node, usually mapped to uniform buffers in GLSL
class CbufNode final {
public:
explicit CbufNode(u32 index, Node offset) : index{index}, offset{std::move(offset)} {}
u32 GetIndex() const {
return index;
}
const Node& GetOffset() const {
return offset;
}
private:
u32 index{};
Node offset;
};
/// Local memory node
class LmemNode final {
public:
explicit LmemNode(Node address) : address{std::move(address)} {}
const Node& GetAddress() const {
return address;
}
private:
Node address;
};
/// Shared memory node
class SmemNode final {
public:
explicit SmemNode(Node address) : address{std::move(address)} {}
const Node& GetAddress() const {
return address;
}
private:
Node address;
};
/// Global memory node
class GmemNode final {
public:
explicit GmemNode(Node real_address, Node base_address, const GlobalMemoryBase& descriptor)
: real_address{std::move(real_address)}, base_address{std::move(base_address)},
descriptor{descriptor} {}
const Node& GetRealAddress() const {
return real_address;
}
const Node& GetBaseAddress() const {
return base_address;
}
const GlobalMemoryBase& GetDescriptor() const {
return descriptor;
}
private:
Node real_address;
Node base_address;
GlobalMemoryBase descriptor;
};
/// Commentary, can be dropped
class CommentNode final {
public:
explicit CommentNode(std::string text) : text{std::move(text)} {}
const std::string& GetText() const {
return text;
}
private:
std::string text;
};
} // namespace VideoCommon::Shader