New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
using Ryujinx.Graphics.Shader.Instructions;
|
|
|
|
using System;
|
2019-10-13 03:02:07 -03:00
|
|
|
using System.Buffers.Binary;
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
using System.Collections.Concurrent;
|
|
|
|
using System.Collections.Generic;
|
|
|
|
using System.Linq;
|
|
|
|
using System.Reflection.Emit;
|
|
|
|
|
|
|
|
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
|
|
|
|
|
|
|
namespace Ryujinx.Graphics.Shader.Decoders
|
|
|
|
{
|
|
|
|
static class Decoder
|
|
|
|
{
|
|
|
|
private delegate object OpActivator(InstEmitter emitter, ulong address, long opCode);
|
|
|
|
|
|
|
|
private static ConcurrentDictionary<Type, OpActivator> _opActivators;
|
|
|
|
|
|
|
|
static Decoder()
|
|
|
|
{
|
|
|
|
_opActivators = new ConcurrentDictionary<Type, OpActivator>();
|
|
|
|
}
|
|
|
|
|
2019-10-13 03:02:07 -03:00
|
|
|
public static Block[] Decode(Span<byte> code, ulong headerSize)
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
{
|
|
|
|
List<Block> blocks = new List<Block>();
|
|
|
|
|
|
|
|
Queue<Block> workQueue = new Queue<Block>();
|
|
|
|
|
|
|
|
Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
|
|
|
|
|
|
|
|
Block GetBlock(ulong blkAddress)
|
|
|
|
{
|
|
|
|
if (!visited.TryGetValue(blkAddress, out Block block))
|
|
|
|
{
|
|
|
|
block = new Block(blkAddress);
|
|
|
|
|
|
|
|
workQueue.Enqueue(block);
|
|
|
|
|
|
|
|
visited.Add(blkAddress, block);
|
|
|
|
}
|
|
|
|
|
|
|
|
return block;
|
|
|
|
}
|
|
|
|
|
2019-10-13 03:02:07 -03:00
|
|
|
ulong startAddress = headerSize;
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
|
|
|
|
GetBlock(startAddress);
|
|
|
|
|
|
|
|
while (workQueue.TryDequeue(out Block currBlock))
|
|
|
|
{
|
2019-07-01 22:39:22 -04:00
|
|
|
// Check if the current block is inside another block.
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
|
|
|
|
{
|
|
|
|
Block nBlock = blocks[nBlkIndex];
|
|
|
|
|
|
|
|
if (nBlock.Address == currBlock.Address)
|
|
|
|
{
|
|
|
|
throw new InvalidOperationException("Found duplicate block address on the list.");
|
|
|
|
}
|
|
|
|
|
|
|
|
nBlock.Split(currBlock);
|
|
|
|
|
|
|
|
blocks.Insert(nBlkIndex + 1, currBlock);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-07-01 22:39:22 -04:00
|
|
|
// If we have a block after the current one, set the limit address.
|
2019-10-13 03:02:07 -03:00
|
|
|
ulong limitAddress = (ulong)code.Length;
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
|
|
|
|
if (nBlkIndex != blocks.Count)
|
|
|
|
{
|
|
|
|
Block nBlock = blocks[nBlkIndex];
|
|
|
|
|
|
|
|
int nextIndex = nBlkIndex + 1;
|
|
|
|
|
|
|
|
if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
|
|
|
|
{
|
|
|
|
limitAddress = blocks[nextIndex].Address;
|
|
|
|
}
|
|
|
|
else if (nBlock.Address > currBlock.Address)
|
|
|
|
{
|
|
|
|
limitAddress = blocks[nBlkIndex].Address;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-13 03:02:07 -03:00
|
|
|
FillBlock(code, currBlock, limitAddress, startAddress);
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
|
|
|
|
if (currBlock.OpCodes.Count != 0)
|
|
|
|
{
|
|
|
|
foreach (OpCodeSsy ssyOp in currBlock.SsyOpCodes)
|
|
|
|
{
|
|
|
|
GetBlock(ssyOp.GetAbsoluteAddress());
|
|
|
|
}
|
|
|
|
|
2019-07-01 22:39:22 -04:00
|
|
|
// Set child blocks. "Branch" is the block the branch instruction
|
|
|
|
// points to (when taken), "Next" is the block at the next address,
|
|
|
|
// executed when the branch is not taken. For Unconditional Branches
|
|
|
|
// or end of program, Next is null.
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
OpCode lastOp = currBlock.GetLastOp();
|
|
|
|
|
|
|
|
if (lastOp is OpCodeBranch op)
|
|
|
|
{
|
|
|
|
currBlock.Branch = GetBlock(op.GetAbsoluteAddress());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!IsUnconditionalBranch(lastOp))
|
|
|
|
{
|
|
|
|
currBlock.Next = GetBlock(currBlock.EndAddress);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-01 22:39:22 -04:00
|
|
|
// Insert the new block on the list (sorted by address).
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
if (blocks.Count != 0)
|
|
|
|
{
|
|
|
|
Block nBlock = blocks[nBlkIndex];
|
|
|
|
|
|
|
|
blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
blocks.Add(currBlock);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach (Block ssyBlock in blocks.Where(x => x.SsyOpCodes.Count != 0))
|
|
|
|
{
|
|
|
|
for (int ssyIndex = 0; ssyIndex < ssyBlock.SsyOpCodes.Count; ssyIndex++)
|
|
|
|
{
|
|
|
|
PropagateSsy(visited, ssyBlock, ssyIndex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return blocks.ToArray();
|
|
|
|
}
|
|
|
|
|
|
|
|
private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
|
|
|
|
{
|
|
|
|
index = 0;
|
|
|
|
|
|
|
|
int left = 0;
|
|
|
|
int right = blocks.Count - 1;
|
|
|
|
|
|
|
|
while (left <= right)
|
|
|
|
{
|
|
|
|
int size = right - left;
|
|
|
|
|
|
|
|
int middle = left + (size >> 1);
|
|
|
|
|
|
|
|
Block block = blocks[middle];
|
|
|
|
|
|
|
|
index = middle;
|
|
|
|
|
|
|
|
if (address >= block.Address && address < block.EndAddress)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (address < block.Address)
|
|
|
|
{
|
|
|
|
right = middle - 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
left = middle + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static void FillBlock(
|
2019-10-13 03:02:07 -03:00
|
|
|
Span<byte> code,
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
Block block,
|
|
|
|
ulong limitAddress,
|
|
|
|
ulong startAddress)
|
|
|
|
{
|
|
|
|
ulong address = block.Address;
|
|
|
|
|
|
|
|
do
|
|
|
|
{
|
|
|
|
if (address >= limitAddress)
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-07-01 22:39:22 -04:00
|
|
|
// Ignore scheduling instructions, which are written every 32 bytes.
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
if (((address - startAddress) & 0x1f) == 0)
|
|
|
|
{
|
|
|
|
address += 8;
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-10-13 03:02:07 -03:00
|
|
|
uint word0 = BinaryPrimitives.ReadUInt32LittleEndian(code.Slice((int)address));
|
|
|
|
uint word1 = BinaryPrimitives.ReadUInt32LittleEndian(code.Slice((int)address + 4));
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
|
|
|
|
ulong opAddress = address;
|
|
|
|
|
|
|
|
address += 8;
|
|
|
|
|
|
|
|
long opCode = word0 | (long)word1 << 32;
|
|
|
|
|
|
|
|
(InstEmitter emitter, Type opCodeType) = OpCodeTable.GetEmitter(opCode);
|
|
|
|
|
|
|
|
if (emitter == null)
|
|
|
|
{
|
2019-07-01 22:39:22 -04:00
|
|
|
// TODO: Warning, illegal encoding.
|
2019-10-13 03:02:07 -03:00
|
|
|
|
|
|
|
block.OpCodes.Add(new OpCode(null, opAddress, opCode));
|
|
|
|
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 19:57:08 -04:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
OpCode op = MakeOpCode(opCodeType, emitter, opAddress, opCode);
|
|
|
|
|
|
|
|
block.OpCodes.Add(op);
|
|
|
|
}
|
|
|
|
while (!IsBranch(block.GetLastOp()));
|
|
|
|
|
|
|
|
block.EndAddress = address;
|
|
|
|
|
|
|
|
block.UpdateSsyOpCodes();
|
|
|
|
}
|
|
|
|
|
|
|
|
private static bool IsUnconditionalBranch(OpCode opCode)
|
|
|
|
{
|
|
|
|
return IsUnconditional(opCode) && IsBranch(opCode);
|
|
|
|
}
|
|
|
|
|
|
|
|
private static bool IsUnconditional(OpCode opCode)
|
|
|
|
{
|
|
|
|
if (opCode is OpCodeExit op && op.Condition != Condition.Always)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return opCode.Predicate.Index == RegisterConsts.PredicateTrueIndex && !opCode.InvertPredicate;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static bool IsBranch(OpCode opCode)
|
|
|
|
{
|
|
|
|
return (opCode is OpCodeBranch && opCode.Emitter != InstEmit.Ssy) ||
|
|
|
|
opCode is OpCodeSync ||
|
|
|
|
opCode is OpCodeExit;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static OpCode MakeOpCode(Type type, InstEmitter emitter, ulong address, long opCode)
|
|
|
|
{
|
|
|
|
if (type == null)
|
|
|
|
{
|
|
|
|
throw new ArgumentNullException(nameof(type));
|
|
|
|
}
|
|
|
|
|
|
|
|
OpActivator createInstance = _opActivators.GetOrAdd(type, CacheOpActivator);
|
|
|
|
|
|
|
|
return (OpCode)createInstance(emitter, address, opCode);
|
|
|
|
}
|
|
|
|
|
|
|
|
private static OpActivator CacheOpActivator(Type type)
|
|
|
|
{
|
|
|
|
Type[] argTypes = new Type[] { typeof(InstEmitter), typeof(ulong), typeof(long) };
|
|
|
|
|
|
|
|
DynamicMethod mthd = new DynamicMethod($"Make{type.Name}", type, argTypes);
|
|
|
|
|
|
|
|
ILGenerator generator = mthd.GetILGenerator();
|
|
|
|
|
|
|
|
generator.Emit(OpCodes.Ldarg_0);
|
|
|
|
generator.Emit(OpCodes.Ldarg_1);
|
|
|
|
generator.Emit(OpCodes.Ldarg_2);
|
|
|
|
generator.Emit(OpCodes.Newobj, type.GetConstructor(argTypes));
|
|
|
|
generator.Emit(OpCodes.Ret);
|
|
|
|
|
|
|
|
return (OpActivator)mthd.CreateDelegate(typeof(OpActivator));
|
|
|
|
}
|
|
|
|
|
|
|
|
private struct PathBlockState
|
|
|
|
{
|
|
|
|
public Block Block { get; }
|
|
|
|
|
|
|
|
private enum RestoreType
|
|
|
|
{
|
|
|
|
None,
|
|
|
|
PopSsy,
|
|
|
|
PushSync
|
|
|
|
}
|
|
|
|
|
|
|
|
private RestoreType _restoreType;
|
|
|
|
|
|
|
|
private ulong _restoreValue;
|
|
|
|
|
|
|
|
public bool ReturningFromVisit => _restoreType != RestoreType.None;
|
|
|
|
|
|
|
|
public PathBlockState(Block block)
|
|
|
|
{
|
|
|
|
Block = block;
|
|
|
|
_restoreType = RestoreType.None;
|
|
|
|
_restoreValue = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
public PathBlockState(int oldSsyStackSize)
|
|
|
|
{
|
|
|
|
Block = null;
|
|
|
|
_restoreType = RestoreType.PopSsy;
|
|
|
|
_restoreValue = (ulong)oldSsyStackSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
public PathBlockState(ulong syncAddress)
|
|
|
|
{
|
|
|
|
Block = null;
|
|
|
|
_restoreType = RestoreType.PushSync;
|
|
|
|
_restoreValue = syncAddress;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void RestoreStackState(Stack<ulong> ssyStack)
|
|
|
|
{
|
|
|
|
if (_restoreType == RestoreType.PushSync)
|
|
|
|
{
|
|
|
|
ssyStack.Push(_restoreValue);
|
|
|
|
}
|
|
|
|
else if (_restoreType == RestoreType.PopSsy)
|
|
|
|
{
|
|
|
|
while (ssyStack.Count > (uint)_restoreValue)
|
|
|
|
{
|
|
|
|
ssyStack.Pop();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private static void PropagateSsy(Dictionary<ulong, Block> blocks, Block ssyBlock, int ssyIndex)
|
|
|
|
{
|
|
|
|
OpCodeSsy ssyOp = ssyBlock.SsyOpCodes[ssyIndex];
|
|
|
|
|
|
|
|
Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
|
|
|
|
|
|
|
|
HashSet<Block> visited = new HashSet<Block>();
|
|
|
|
|
|
|
|
Stack<ulong> ssyStack = new Stack<ulong>();
|
|
|
|
|
|
|
|
void Push(PathBlockState pbs)
|
|
|
|
{
|
|
|
|
if (pbs.Block == null || visited.Add(pbs.Block))
|
|
|
|
{
|
|
|
|
workQueue.Push(pbs);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Push(new PathBlockState(ssyBlock));
|
|
|
|
|
|
|
|
while (workQueue.TryPop(out PathBlockState pbs))
|
|
|
|
{
|
|
|
|
if (pbs.ReturningFromVisit)
|
|
|
|
{
|
|
|
|
pbs.RestoreStackState(ssyStack);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
Block current = pbs.Block;
|
|
|
|
|
|
|
|
int ssyOpCodesCount = current.SsyOpCodes.Count;
|
|
|
|
|
|
|
|
if (ssyOpCodesCount != 0)
|
|
|
|
{
|
|
|
|
Push(new PathBlockState(ssyStack.Count));
|
|
|
|
|
|
|
|
for (int index = ssyIndex; index < ssyOpCodesCount; index++)
|
|
|
|
{
|
|
|
|
ssyStack.Push(current.SsyOpCodes[index].GetAbsoluteAddress());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ssyIndex = 0;
|
|
|
|
|
|
|
|
if (current.Next != null)
|
|
|
|
{
|
|
|
|
Push(new PathBlockState(current.Next));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (current.Branch != null)
|
|
|
|
{
|
|
|
|
Push(new PathBlockState(current.Branch));
|
|
|
|
}
|
|
|
|
else if (current.GetLastOp() is OpCodeSync op)
|
|
|
|
{
|
|
|
|
ulong syncAddress = ssyStack.Pop();
|
|
|
|
|
|
|
|
if (ssyStack.Count == 0)
|
|
|
|
{
|
|
|
|
ssyStack.Push(syncAddress);
|
|
|
|
|
|
|
|
op.Targets.Add(ssyOp, op.Targets.Count);
|
|
|
|
|
|
|
|
ssyOp.Syncs.TryAdd(op, Local());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Push(new PathBlockState(syncAddress));
|
|
|
|
Push(new PathBlockState(blocks[syncAddress]));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|