Add an early TailMerge
pass (#2721)
* Add an early `TailMerge` pass Some translations can have a lot of guest calls and since for each guest call there is a call guard which may return. This can produce a lot of epilogue code for returns. This pass merges the epilogue into a single block. ``` Using filter 'hcq'. Using metric 'code size'. Total diff: -1648111 (-7.19 %) (bytes): Base: 22913847 Diff: 21265736 Improved: 4567, regressed: 14, unchanged: 144 ``` * Set PTC version * Address feedback * Handle `void` returning functions * Actually handle `void` returning functions * Fix `RegisterToLocal` logging
This commit is contained in:
parent
d512ce122c
commit
fbf40424f4
7 changed files with 148 additions and 26 deletions
|
@ -17,7 +17,7 @@ namespace ARMeilleure.CodeGen.Optimizations
|
||||||
BasicBlock lastBlock = cfg.Blocks.Last;
|
BasicBlock lastBlock = cfg.Blocks.Last;
|
||||||
|
|
||||||
// Move cold blocks at the end of the list, so that they are emitted away from hot code.
|
// Move cold blocks at the end of the list, so that they are emitted away from hot code.
|
||||||
for (block = cfg.Blocks.First; block != lastBlock; block = nextBlock)
|
for (block = cfg.Blocks.First; block != null; block = nextBlock)
|
||||||
{
|
{
|
||||||
nextBlock = block.ListNext;
|
nextBlock = block.ListNext;
|
||||||
|
|
||||||
|
@ -26,6 +26,11 @@ namespace ARMeilleure.CodeGen.Optimizations
|
||||||
cfg.Blocks.Remove(block);
|
cfg.Blocks.Remove(block);
|
||||||
cfg.Blocks.AddLast(block);
|
cfg.Blocks.AddLast(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (block == lastBlock)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (block = cfg.Blocks.First; block != null; block = nextBlock)
|
for (block = cfg.Blocks.First; block != null; block = nextBlock)
|
||||||
|
|
83
ARMeilleure/CodeGen/Optimizations/TailMerge.cs
Normal file
83
ARMeilleure/CodeGen/Optimizations/TailMerge.cs
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
using ARMeilleure.IntermediateRepresentation;
|
||||||
|
using ARMeilleure.Translation;
|
||||||
|
using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
|
||||||
|
|
||||||
|
namespace ARMeilleure.CodeGen.Optimizations
|
||||||
|
{
|
||||||
|
static class TailMerge
|
||||||
|
{
|
||||||
|
public static void RunPass(in CompilerContext cctx)
|
||||||
|
{
|
||||||
|
ControlFlowGraph cfg = cctx.Cfg;
|
||||||
|
|
||||||
|
BasicBlock mergedReturn = new(cfg.Blocks.Count);
|
||||||
|
|
||||||
|
Operand returnValue;
|
||||||
|
Operation returnOp;
|
||||||
|
|
||||||
|
if (cctx.FuncReturnType == OperandType.None)
|
||||||
|
{
|
||||||
|
returnValue = default;
|
||||||
|
returnOp = Operation(Instruction.Return, default);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
returnValue = cfg.AllocateLocal(cctx.FuncReturnType);
|
||||||
|
returnOp = Operation(Instruction.Return, default, returnValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
mergedReturn.Frequency = BasicBlockFrequency.Cold;
|
||||||
|
mergedReturn.Operations.AddLast(returnOp);
|
||||||
|
|
||||||
|
for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
|
||||||
|
{
|
||||||
|
Operation op = block.Operations.Last;
|
||||||
|
|
||||||
|
if (op != default && op.Instruction == Instruction.Return)
|
||||||
|
{
|
||||||
|
block.Operations.Remove(op);
|
||||||
|
|
||||||
|
if (cctx.FuncReturnType == OperandType.None)
|
||||||
|
{
|
||||||
|
PrepareMerge(block, mergedReturn);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Operation copyOp = Operation(Instruction.Copy, returnValue, op.GetSource(0));
|
||||||
|
|
||||||
|
PrepareMerge(block, mergedReturn).Append(copyOp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg.Blocks.AddLast(mergedReturn);
|
||||||
|
cfg.Update();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static BasicBlock PrepareMerge(BasicBlock from, BasicBlock to)
|
||||||
|
{
|
||||||
|
BasicBlock fromPred = from.Predecessors.Count == 1 ? from.Predecessors[0] : null;
|
||||||
|
|
||||||
|
// If the block is empty, we can try to append to the predecessor and avoid unnecessary jumps.
|
||||||
|
if (from.Operations.Count == 0 && fromPred != null)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < fromPred.SuccessorsCount; i++)
|
||||||
|
{
|
||||||
|
if (fromPred.GetSuccessor(i) == from)
|
||||||
|
{
|
||||||
|
fromPred.SetSuccessor(i, to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: `from` becomes unreachable and the call to `cfg.Update()` will remove it.
|
||||||
|
return fromPred;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
from.AddSuccessor(to);
|
||||||
|
|
||||||
|
return from;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -5,8 +5,10 @@ namespace ARMeilleure.Diagnostics
|
||||||
Decoding,
|
Decoding,
|
||||||
Translation,
|
Translation,
|
||||||
RegisterUsage,
|
RegisterUsage,
|
||||||
|
TailMerge,
|
||||||
Dominance,
|
Dominance,
|
||||||
SsaConstruction,
|
SsaConstruction,
|
||||||
|
RegisterToLocal,
|
||||||
Optimization,
|
Optimization,
|
||||||
PreAllocation,
|
PreAllocation,
|
||||||
RegisterAllocation,
|
RegisterAllocation,
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
using ARMeilleure.CodeGen;
|
using ARMeilleure.CodeGen;
|
||||||
|
using ARMeilleure.CodeGen.Optimizations;
|
||||||
using ARMeilleure.CodeGen.X86;
|
using ARMeilleure.CodeGen.X86;
|
||||||
using ARMeilleure.Diagnostics;
|
using ARMeilleure.Diagnostics;
|
||||||
using ARMeilleure.IntermediateRepresentation;
|
using ARMeilleure.IntermediateRepresentation;
|
||||||
|
@ -13,31 +14,41 @@ namespace ARMeilleure.Translation
|
||||||
OperandType retType,
|
OperandType retType,
|
||||||
CompilerOptions options)
|
CompilerOptions options)
|
||||||
{
|
{
|
||||||
Logger.StartPass(PassName.Dominance);
|
CompilerContext cctx = new(cfg, argTypes, retType, options);
|
||||||
|
|
||||||
if ((options & CompilerOptions.SsaForm) != 0)
|
if (options.HasFlag(CompilerOptions.Optimize))
|
||||||
{
|
{
|
||||||
Dominance.FindDominators(cfg);
|
Logger.StartPass(PassName.TailMerge);
|
||||||
Dominance.FindDominanceFrontiers(cfg);
|
|
||||||
|
TailMerge.RunPass(cctx);
|
||||||
|
|
||||||
|
Logger.EndPass(PassName.TailMerge, cfg);
|
||||||
}
|
}
|
||||||
|
|
||||||
Logger.EndPass(PassName.Dominance);
|
if (options.HasFlag(CompilerOptions.SsaForm))
|
||||||
|
|
||||||
Logger.StartPass(PassName.SsaConstruction);
|
|
||||||
|
|
||||||
if ((options & CompilerOptions.SsaForm) != 0)
|
|
||||||
{
|
{
|
||||||
|
Logger.StartPass(PassName.Dominance);
|
||||||
|
|
||||||
|
Dominance.FindDominators(cfg);
|
||||||
|
Dominance.FindDominanceFrontiers(cfg);
|
||||||
|
|
||||||
|
Logger.EndPass(PassName.Dominance);
|
||||||
|
|
||||||
|
Logger.StartPass(PassName.SsaConstruction);
|
||||||
|
|
||||||
Ssa.Construct(cfg);
|
Ssa.Construct(cfg);
|
||||||
|
|
||||||
|
Logger.EndPass(PassName.SsaConstruction, cfg);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
Logger.StartPass(PassName.RegisterToLocal);
|
||||||
|
|
||||||
RegisterToLocal.Rename(cfg);
|
RegisterToLocal.Rename(cfg);
|
||||||
|
|
||||||
|
Logger.EndPass(PassName.RegisterToLocal, cfg);
|
||||||
}
|
}
|
||||||
|
|
||||||
Logger.EndPass(PassName.SsaConstruction, cfg);
|
|
||||||
|
|
||||||
CompilerContext cctx = new(cfg, argTypes, retType, options);
|
|
||||||
|
|
||||||
return CodeGenerator.Generate(cctx);
|
return CodeGenerator.Generate(cctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,7 @@ namespace ARMeilleure.Translation
|
||||||
private BasicBlock[] _postOrderBlocks;
|
private BasicBlock[] _postOrderBlocks;
|
||||||
private int[] _postOrderMap;
|
private int[] _postOrderMap;
|
||||||
|
|
||||||
public int LocalsCount { get; }
|
public int LocalsCount { get; private set; }
|
||||||
public BasicBlock Entry { get; }
|
public BasicBlock Entry { get; }
|
||||||
public IntrusiveList<BasicBlock> Blocks { get; }
|
public IntrusiveList<BasicBlock> Blocks { get; }
|
||||||
public BasicBlock[] PostOrderBlocks => _postOrderBlocks;
|
public BasicBlock[] PostOrderBlocks => _postOrderBlocks;
|
||||||
|
@ -25,6 +25,15 @@ namespace ARMeilleure.Translation
|
||||||
Update();
|
Update();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Operand AllocateLocal(OperandType type)
|
||||||
|
{
|
||||||
|
Operand result = Operand.Factory.Local(type);
|
||||||
|
|
||||||
|
result.NumberLocal(++LocalsCount);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
public void Update()
|
public void Update()
|
||||||
{
|
{
|
||||||
RemoveUnreachableBlocks(Blocks);
|
RemoveUnreachableBlocks(Blocks);
|
||||||
|
|
|
@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
|
||||||
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
||||||
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
||||||
|
|
||||||
private const uint InternalVersion = 2680; //! To be incremented manually for each change to the ARMeilleure project.
|
private const uint InternalVersion = 2721; //! To be incremented manually for each change to the ARMeilleure project.
|
||||||
|
|
||||||
private const string ActualDir = "0";
|
private const string ActualDir = "0";
|
||||||
private const string BackupDir = "1";
|
private const string BackupDir = "1";
|
||||||
|
|
|
@ -203,12 +203,18 @@ namespace ARMeilleure.Translation
|
||||||
// It always needs a context load as it is the first block to run.
|
// It always needs a context load as it is the first block to run.
|
||||||
if (block.Predecessors.Count == 0 || hasContextLoad)
|
if (block.Predecessors.Count == 0 || hasContextLoad)
|
||||||
{
|
{
|
||||||
arg = Local(OperandType.I64);
|
long vecMask = globalInputs[block.Index].VecMask;
|
||||||
|
long intMask = globalInputs[block.Index].IntMask;
|
||||||
|
|
||||||
Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0)));
|
if (vecMask != 0 || intMask != 0)
|
||||||
|
{
|
||||||
|
arg = Local(OperandType.I64);
|
||||||
|
|
||||||
LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector, mode, loadArg, arg);
|
Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0)));
|
||||||
LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer, mode, loadArg, arg);
|
|
||||||
|
LoadLocals(block, vecMask, RegisterType.Vector, mode, loadArg, arg);
|
||||||
|
LoadLocals(block, intMask, RegisterType.Integer, mode, loadArg, arg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool hasContextStore = HasContextStore(block);
|
bool hasContextStore = HasContextStore(block);
|
||||||
|
@ -220,15 +226,21 @@ namespace ARMeilleure.Translation
|
||||||
|
|
||||||
if (EndsWithReturn(block) || hasContextStore)
|
if (EndsWithReturn(block) || hasContextStore)
|
||||||
{
|
{
|
||||||
if (arg == default)
|
long vecMask = globalOutputs[block.Index].VecMask;
|
||||||
|
long intMask = globalOutputs[block.Index].IntMask;
|
||||||
|
|
||||||
|
if (vecMask != 0 || intMask != 0)
|
||||||
{
|
{
|
||||||
arg = Local(OperandType.I64);
|
if (arg == default)
|
||||||
|
{
|
||||||
|
arg = Local(OperandType.I64);
|
||||||
|
|
||||||
block.Append(Operation(Instruction.LoadArgument, arg, Const(0)));
|
block.Append(Operation(Instruction.LoadArgument, arg, Const(0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
StoreLocals(block, intMask, RegisterType.Integer, mode, arg);
|
||||||
|
StoreLocals(block, vecMask, RegisterType.Vector, mode, arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, mode, arg);
|
|
||||||
StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, mode, arg);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue