Merge pull request #6900 from ameerj/attr-reorder

structured_control_flow: Add DemoteCombinationPass
This commit is contained in:
bunnei 2021-09-01 17:36:26 -07:00 committed by GitHub
commit b2572a56d3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 140 additions and 10 deletions

View file

@ -20,6 +20,7 @@
#include "shader_recompiler/frontend/maxwell/decode.h" #include "shader_recompiler/frontend/maxwell/decode.h"
#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
#include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/object_pool.h" #include "shader_recompiler/object_pool.h"
namespace Shader::Maxwell { namespace Shader::Maxwell {
@ -652,7 +653,7 @@ class TranslatePass {
public: public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
IR::AbstractSyntaxList& syntax_list_) IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
syntax_list{syntax_list_} { syntax_list{syntax_list_} {
Visit(root_stmt, nullptr, nullptr); Visit(root_stmt, nullptr, nullptr);
@ -660,6 +661,9 @@ public:
IR::Block& first_block{*syntax_list.front().data.block}; IR::Block& first_block{*syntax_list.front().data.block};
IR::IREmitter ir(first_block, first_block.begin()); IR::IREmitter ir(first_block, first_block.begin());
ir.Prologue(); ir.Prologue();
if (uses_demote_to_helper && host_info.needs_demote_reorder) {
DemoteCombinationPass();
}
} }
private: private:
@ -809,7 +813,14 @@ private:
} }
case StatementType::Return: { case StatementType::Return: {
ensure_block(); ensure_block();
IR::IREmitter{*current_block}.Epilogue(); IR::Block* return_block{block_pool.Create(inst_pool)};
IR::IREmitter{*return_block}.Epilogue();
current_block->AddBranch(return_block);
auto& merge{syntax_list.emplace_back()};
merge.type = IR::AbstractSyntaxNode::Type::Block;
merge.data.block = return_block;
current_block = nullptr; current_block = nullptr;
syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
break; break;
@ -824,6 +835,7 @@ private:
auto& merge{syntax_list.emplace_back()}; auto& merge{syntax_list.emplace_back()};
merge.type = IR::AbstractSyntaxNode::Type::Block; merge.type = IR::AbstractSyntaxNode::Type::Block;
merge.data.block = demote_block; merge.data.block = demote_block;
uses_demote_to_helper = true;
break; break;
} }
case StatementType::Unreachable: { case StatementType::Unreachable: {
@ -855,11 +867,117 @@ private:
return block_pool.Create(inst_pool); return block_pool.Create(inst_pool);
} }
void DemoteCombinationPass() {
using Type = IR::AbstractSyntaxNode::Type;
std::vector<IR::Block*> demote_blocks;
std::vector<IR::U1> demote_conds;
u32 num_epilogues{};
u32 branch_depth{};
for (const IR::AbstractSyntaxNode& node : syntax_list) {
if (node.type == Type::If) {
++branch_depth;
}
if (node.type == Type::EndIf) {
--branch_depth;
}
if (node.type != Type::Block) {
continue;
}
if (branch_depth > 1) {
// Skip reordering nested demote branches.
continue;
}
for (const IR::Inst& inst : node.data.block->Instructions()) {
const IR::Opcode op{inst.GetOpcode()};
if (op == IR::Opcode::DemoteToHelperInvocation) {
demote_blocks.push_back(node.data.block);
break;
}
if (op == IR::Opcode::Epilogue) {
++num_epilogues;
}
}
}
if (demote_blocks.size() == 0) {
return;
}
if (num_epilogues > 1) {
LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented.");
return;
}
s64 last_iterator_offset{};
auto& asl{syntax_list};
for (const IR::Block* demote_block : demote_blocks) {
const auto start_it{asl.begin() + last_iterator_offset};
auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
return asn.type == Type::If && asn.data.if_node.body == demote_block;
})};
if (asl_it == asl.end()) {
// Demote without a conditional branch.
// No need to proceed since all fragment instances will be demoted regardless.
return;
}
const IR::Block* const end_if = asl_it->data.if_node.merge;
demote_conds.push_back(asl_it->data.if_node.cond);
last_iterator_offset = std::distance(asl.begin(), asl_it);
asl_it = asl.erase(asl_it);
asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
return asn.type == Type::Block && asn.data.block == demote_block;
});
asl_it = asl.erase(asl_it);
asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
return asn.type == Type::EndIf && asn.data.end_if.merge == end_if;
});
asl_it = asl.erase(asl_it);
}
const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) {
if (asn.type != Type::Block) {
return false;
}
for (const auto& inst : asn.data.block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::Epilogue) {
return true;
}
}
return false;
}};
const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)};
const auto return_block_it{(reverse_it + 1).base()};
IR::IREmitter ir{*(return_block_it - 1)->data.block};
IR::U1 cond(IR::Value(false));
for (const auto& demote_cond : demote_conds) {
cond = ir.LogicalOr(cond, demote_cond);
}
cond.Inst()->DestructiveAddUsage(1);
IR::AbstractSyntaxNode demote_if_node{};
demote_if_node.type = Type::If;
demote_if_node.data.if_node.cond = cond;
demote_if_node.data.if_node.body = demote_blocks[0];
demote_if_node.data.if_node.merge = return_block_it->data.block;
IR::AbstractSyntaxNode demote_node{};
demote_node.type = Type::Block;
demote_node.data.block = demote_blocks[0];
IR::AbstractSyntaxNode demote_endif_node{};
demote_endif_node.type = Type::EndIf;
demote_endif_node.data.end_if.merge = return_block_it->data.block;
asl.insert(return_block_it, demote_endif_node);
asl.insert(return_block_it, demote_node);
asl.insert(return_block_it, demote_if_node);
}
ObjectPool<Statement>& stmt_pool; ObjectPool<Statement>& stmt_pool;
ObjectPool<IR::Inst>& inst_pool; ObjectPool<IR::Inst>& inst_pool;
ObjectPool<IR::Block>& block_pool; ObjectPool<IR::Block>& block_pool;
Environment& env; Environment& env;
IR::AbstractSyntaxList& syntax_list; IR::AbstractSyntaxList& syntax_list;
bool uses_demote_to_helper{};
// TODO: C++20 Remove this when all compilers support constexpr std::vector // TODO: C++20 Remove this when all compilers support constexpr std::vector
#if __cpp_lib_constexpr_vector >= 201907 #if __cpp_lib_constexpr_vector >= 201907
@ -871,12 +989,13 @@ private:
} // Anonymous namespace } // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
Environment& env, Flow::CFG& cfg) { Environment& env, Flow::CFG& cfg,
const HostTranslateInfo& host_info) {
ObjectPool<Statement> stmt_pool{64}; ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, stmt_pool}; GotoPass goto_pass{cfg, stmt_pool};
Statement& root{goto_pass.RootStatement()}; Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list; IR::AbstractSyntaxList syntax_list;
TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info};
return syntax_list; return syntax_list;
} }

View file

@ -11,10 +11,13 @@
#include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/object_pool.h" #include "shader_recompiler/object_pool.h"
namespace Shader::Maxwell { namespace Shader {
struct HostTranslateInfo;
namespace Maxwell {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Environment& env, ObjectPool<IR::Block>& block_pool, Environment& env,
Flow::CFG& cfg); Flow::CFG& cfg, const HostTranslateInfo& host_info);
} // namespace Shader::Maxwell } // namespace Maxwell
} // namespace Shader

View file

@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) {
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
IR::Program program; IR::Program program;
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
program.blocks = GenerateBlocks(program.syntax_list); program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = PostOrder(program.syntax_list.front()); program.post_order_blocks = PostOrder(program.syntax_list.front());
program.stage = env.ShaderStage(); program.stage = env.ShaderStage();

View file

@ -13,6 +13,7 @@ namespace Shader {
struct HostTranslateInfo { struct HostTranslateInfo {
bool support_float16{}; ///< True when the device supports 16-bit floats bool support_float16{}; ///< True when the device supports 16-bit floats
bool support_int64{}; ///< True when the device supports 64-bit integers bool support_int64{}; ///< True when the device supports 64-bit integers
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
}; };
} // namespace Shader } // namespace Shader

View file

@ -156,6 +156,10 @@ public:
return shader_backend; return shader_backend;
} }
bool IsAmd() const {
return vendor_name == "ATI Technologies Inc.";
}
private: private:
static bool TestVariableAoffi(); static bool TestVariableAoffi();
static bool TestPreciseBug(); static bool TestPreciseBug();

View file

@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
host_info{ host_info{
.support_float16 = false, .support_float16 = false,
.support_int64 = device.HasShaderInt64(), .support_int64 = device.HasShaderInt64(),
.needs_demote_reorder = device.IsAmd(),
} { } {
if (use_asynchronous_shaders) { if (use_asynchronous_shaders) {
workers = CreateWorkers(); workers = CreateWorkers();

View file

@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
host_info = Shader::HostTranslateInfo{ host_info = Shader::HostTranslateInfo{
.support_float16 = device.IsFloat16Supported(), .support_float16 = device.IsFloat16Supported(),
.support_int64 = device.IsShaderInt64Supported(), .support_int64 = device.IsShaderInt64Supported(),
.needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,
}; };
} }