Cemu/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp
Exzap c3e29fb619 Latte: Add support for shader instructions MIN_UINT and MAX_UINT
Seen in the eShop version of Fatal Frame
Also made some warnings less spammy since this game seems to trigger it a lot
2024-11-21 20:34:24 +01:00

1007 lines
40 KiB
C++

#include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/Core/LatteShaderAssembly.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "Cafe/HW/Latte/Core/Latte.h"
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h"
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
/*
* Return index of used color attachment based on shader pixel export index (0-7)
*/
sint32 LatteDecompiler_getColorOutputIndexFromExportIndex(LatteDecompilerShaderContext* shaderContext, sint32 exportIndex)
{
sint32 colorOutputIndex = -1;
sint32 outputCounter = 0;
uint32 cbShaderMask = shaderContext->contextRegisters[mmCB_SHADER_MASK];
uint32 cbShaderControl = shaderContext->contextRegisters[mmCB_SHADER_CONTROL];
for(sint32 m=0; m<8; m++)
{
uint32 outputMask = (cbShaderMask>>(m*4))&0xF;
if( outputMask == 0 )
continue;
cemu_assert_debug(outputMask == 0xF); // mask is unsupported
if( outputCounter == exportIndex )
{
colorOutputIndex = m;
break;
}
outputCounter++;
}
cemu_assert_debug(colorOutputIndex != -1); // real outputs and outputs defined via mask do not match up
return colorOutputIndex;
}
void _remapUniformAccess(LatteDecompilerShaderContext* shaderContext, bool isRegisterUniform, uint32 kcacheBankId, uint32 uniformIndex)
{
auto& list_uniformMapping = shaderContext->shader->list_remappedUniformEntries;
for(uint32 i=0; i<list_uniformMapping.size(); i++)
{
LatteDecompilerRemappedUniformEntry_t* ufMapping = list_uniformMapping.data()+i;
if( isRegisterUniform )
{
if( ufMapping->isRegister == true && ufMapping->index == uniformIndex )
{
return;
}
}
else
{
if( ufMapping->isRegister == false && ufMapping->kcacheBankId == kcacheBankId && ufMapping->index == uniformIndex )
{
return;
}
}
}
// add new mapping
LatteDecompilerRemappedUniformEntry_t newMapping = {0};
if( isRegisterUniform )
{
newMapping.isRegister = true;
newMapping.index = uniformIndex;
newMapping.mappedIndex = (uint32)list_uniformMapping.size();
}
else
{
newMapping.isRegister = false;
newMapping.kcacheBankId = kcacheBankId;
newMapping.index = uniformIndex;
newMapping.mappedIndex = (uint32)list_uniformMapping.size();
}
list_uniformMapping.emplace_back(newMapping);
}
/*
* Returns true if the instruction takes integer operands or returns a integer value
*/
bool _isIntegerInstruction(const LatteDecompilerALUInstruction& aluInstruction)
{
if (aluInstruction.isOP3 == false)
{
// OP2
switch (aluInstruction.opcode)
{
case ALU_OP2_INST_ADD:
case ALU_OP2_INST_MUL:
case ALU_OP2_INST_MUL_IEEE:
case ALU_OP2_INST_MAX:
case ALU_OP2_INST_MIN:
case ALU_OP2_INST_FLOOR:
case ALU_OP2_INST_FRACT:
case ALU_OP2_INST_TRUNC:
case ALU_OP2_INST_MOV:
case ALU_OP2_INST_NOP:
case ALU_OP2_INST_DOT4:
case ALU_OP2_INST_DOT4_IEEE:
case ALU_OP2_INST_CUBE:
case ALU_OP2_INST_EXP_IEEE:
case ALU_OP2_INST_LOG_CLAMPED:
case ALU_OP2_INST_LOG_IEEE:
case ALU_OP2_INST_SQRT_IEEE:
case ALU_OP2_INST_SIN:
case ALU_OP2_INST_COS:
case ALU_OP2_INST_RNDNE:
case ALU_OP2_INST_MAX_DX10:
case ALU_OP2_INST_MIN_DX10:
case ALU_OP2_INST_SETGT:
case ALU_OP2_INST_SETGE:
case ALU_OP2_INST_SETNE:
case ALU_OP2_INST_SETE:
case ALU_OP2_INST_PRED_SETE:
case ALU_OP2_INST_PRED_SETGT:
case ALU_OP2_INST_PRED_SETGE:
case ALU_OP2_INST_PRED_SETNE:
case ALU_OP2_INST_KILLE:
case ALU_OP2_INST_KILLGT:
case ALU_OP2_INST_KILLGE:
case ALU_OP2_INST_RECIP_FF:
case ALU_OP2_INST_RECIP_IEEE:
case ALU_OP2_INST_RECIPSQRT_CLAMPED:
case ALU_OP2_INST_RECIPSQRT_FF:
case ALU_OP2_INST_RECIPSQRT_IEEE:
return false;
case ALU_OP2_INST_FLT_TO_INT:
case ALU_OP2_INST_INT_TO_FLOAT:
case ALU_OP2_INST_UINT_TO_FLOAT:
case ALU_OP2_INST_ASHR_INT:
case ALU_OP2_INST_LSHR_INT:
case ALU_OP2_INST_LSHL_INT:
case ALU_OP2_INST_MULLO_INT:
case ALU_OP2_INST_MULLO_UINT:
case ALU_OP2_INST_FLT_TO_UINT:
case ALU_OP2_INST_AND_INT:
case ALU_OP2_INST_OR_INT:
case ALU_OP2_INST_XOR_INT:
case ALU_OP2_INST_NOT_INT:
case ALU_OP2_INST_ADD_INT:
case ALU_OP2_INST_SUB_INT:
case ALU_OP2_INST_MAX_INT:
case ALU_OP2_INST_MIN_INT:
case ALU_OP2_INST_MAX_UINT:
case ALU_OP2_INST_MIN_UINT:
case ALU_OP2_INST_SETE_INT:
case ALU_OP2_INST_SETGT_INT:
case ALU_OP2_INST_SETGE_INT:
case ALU_OP2_INST_SETNE_INT:
case ALU_OP2_INST_SETGT_UINT:
case ALU_OP2_INST_SETGE_UINT:
case ALU_OP2_INST_PRED_SETE_INT:
case ALU_OP2_INST_PRED_SETGT_INT:
case ALU_OP2_INST_PRED_SETGE_INT:
case ALU_OP2_INST_PRED_SETNE_INT:
case ALU_OP2_INST_KILLE_INT:
case ALU_OP2_INST_KILLGT_INT:
case ALU_OP2_INST_KILLNE_INT:
case ALU_OP2_INST_MOVA_FLOOR:
case ALU_OP2_INST_MOVA_INT:
return true;
// these return an integer result but are usually used only for conditionals
case ALU_OP2_INST_SETE_DX10:
case ALU_OP2_INST_SETGT_DX10:
case ALU_OP2_INST_SETGE_DX10:
case ALU_OP2_INST_SETNE_DX10:
return true;
default:
#ifdef CEMU_DEBUG_ASSERT
debug_printf("_isIntegerInstruction(): OP3=%s opcode=%02x\n", aluInstruction.isOP3 ? "true" : "false", aluInstruction.opcode);
cemu_assert_debug(false);
#endif
break;
}
}
else
{
// OP3
switch (aluInstruction.opcode)
{
case ALU_OP3_INST_MULADD:
case ALU_OP3_INST_MULADD_D2:
case ALU_OP3_INST_MULADD_M2:
case ALU_OP3_INST_MULADD_M4:
case ALU_OP3_INST_MULADD_IEEE:
case ALU_OP3_INST_CMOVE:
case ALU_OP3_INST_CMOVGT:
case ALU_OP3_INST_CMOVGE:
return false;
case ALU_OP3_INST_CNDE_INT:
case ALU_OP3_INST_CNDGT_INT:
case ALU_OP3_INST_CMOVGE_INT:
return true;
default:
#ifdef CEMU_DEBUG_ASSERT
debug_printf("_isIntegerInstruction(): OP3=%s opcode=%02x\n", aluInstruction.isOP3?"true":"false", aluInstruction.opcode);
#endif
break;
}
}
return false;
}
/*
* Analyze ALU CF instruction and all instructions within the ALU clause
*/
void LatteDecompiler_analyzeALUClause(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction)
{
// check if this shader has any clause that potentially modifies the pixel execution state
if( cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE || cfInstruction->type == GPU7_CF_INST_ALU_POP_AFTER || cfInstruction->type == GPU7_CF_INST_ALU_POP2_AFTER || cfInstruction->type == GPU7_CF_INST_ALU_BREAK || cfInstruction->type == GPU7_CF_INST_ALU_ELSE_AFTER )
{
shaderContext->analyzer.modifiesPixelActiveState = true;
}
// analyze ALU instructions
for(auto& aluInstruction : cfInstruction->instructionsALU)
{
// ignore NOP instruction
if( !aluInstruction.isOP3 && aluInstruction.opcode == ALU_OP2_INST_NOP )
continue;
// check for CUBE instruction
if( !aluInstruction.isOP3 && aluInstruction.opcode == ALU_OP2_INST_CUBE )
{
shaderContext->analyzer.hasRedcCUBE = true;
}
// check for integer instruction
if (_isIntegerInstruction(aluInstruction))
shaderContext->analyzer.usesIntegerValues = true;
// process all available operands (inputs)
for(sint32 f=0; f<3; f++)
{
// check input for uniform access
if( aluInstruction.sourceOperand[f].sel == 0xFFFFFFFF )
continue; // source operand not set/used
// about uniform register and buffer access tracking:
// for absolute indices we can determine a maximum size that is accessed
// relative accesses are tricky because the upper bound of accessed indices is unknown
// worst case we have to load the full file (256 * 16 byte entries) or for buffers an arbitrary upper bound (64KB in our case)
if( GPU7_ALU_SRC_IS_CFILE(aluInstruction.sourceOperand[f].sel) )
{
if (aluInstruction.sourceOperand[f].rel)
{
shaderContext->analyzer.uniformRegisterAccessTracker.TrackAccess(GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel), true);
}
else
{
_remapUniformAccess(shaderContext, true, 0, GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel));
shaderContext->analyzer.uniformRegisterAccessTracker.TrackAccess(GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel), false);
}
}
else if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction.sourceOperand[f].sel) )
{
// uniform bank 0 (uniform buffer with index cfInstruction->cBank0Index)
uint32 uniformBufferIndex = cfInstruction->cBank0Index;
cemu_assert(uniformBufferIndex < LATTE_NUM_MAX_UNIFORM_BUFFERS);
uint32 offset = GPU7_ALU_SRC_GET_CBANK0_INDEX(aluInstruction.sourceOperand[f].sel)+cfInstruction->cBank0AddrBase;
_remapUniformAccess(shaderContext, false, uniformBufferIndex, offset);
shaderContext->analyzer.uniformBufferAccessTracker[uniformBufferIndex].TrackAccess(offset, aluInstruction.sourceOperand[f].rel);
}
else if( GPU7_ALU_SRC_IS_CBANK1(aluInstruction.sourceOperand[f].sel) )
{
// uniform bank 1 (uniform buffer with index cfInstruction->cBank1Index)
uint32 uniformBufferIndex = cfInstruction->cBank1Index;
cemu_assert(uniformBufferIndex < LATTE_NUM_MAX_UNIFORM_BUFFERS);
uint32 offset = GPU7_ALU_SRC_GET_CBANK1_INDEX(aluInstruction.sourceOperand[f].sel)+cfInstruction->cBank1AddrBase;
_remapUniformAccess(shaderContext, false, uniformBufferIndex, offset);
shaderContext->analyzer.uniformBufferAccessTracker[uniformBufferIndex].TrackAccess(offset, aluInstruction.sourceOperand[f].rel);
}
else if( GPU7_ALU_SRC_IS_GPR(aluInstruction.sourceOperand[f].sel) )
{
sint32 gprIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction.sourceOperand[f].sel);
shaderContext->analyzer.gprUseMask[gprIndex/8] |= (1<<(gprIndex%8));
if( aluInstruction.sourceOperand[f].rel != 0 )
{
// if indexed register access is used, all possibly referenced registers are stored to a separate array at the beginning of the group
shaderContext->analyzer.usesRelativeGPRRead = true;
continue;
}
}
}
if( aluInstruction.destRel != 0 )
shaderContext->analyzer.usesRelativeGPRWrite = true;
shaderContext->analyzer.gprUseMask[aluInstruction.destGpr/8] |= (1<<(aluInstruction.destGpr%8));
}
}
// analyze TEX CF instruction and all instructions within the TEX clause
void LatteDecompiler_analyzeTEXClause(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction)
{
LatteDecompilerShader* shader = shaderContext->shader;
for(auto& texInstruction : cfInstruction->instructionsTEX)
{
if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_L ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LB ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LZ ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ ||
texInstruction.opcode == GPU7_TEX_INST_FETCH4 ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_G ||
texInstruction.opcode == GPU7_TEX_INST_LD )
{
if (texInstruction.textureFetch.textureIndex < 0 || texInstruction.textureFetch.textureIndex >= LATTE_NUM_MAX_TEX_UNITS)
{
cemuLog_logDebug(LogType::Force, "Shader {:16x} has out of bounds texture access (texture {})", shaderContext->shader->baseHash, (sint32)texInstruction.textureFetch.textureIndex);
continue;
}
if( texInstruction.textureFetch.samplerIndex < 0 || texInstruction.textureFetch.samplerIndex >= 0x12 )
cemu_assert_debug(false);
if(shaderContext->output->textureUnitMask[texInstruction.textureFetch.textureIndex] && shader->textureUnitSamplerAssignment[texInstruction.textureFetch.textureIndex] != texInstruction.textureFetch.samplerIndex && shader->textureUnitSamplerAssignment[texInstruction.textureFetch.textureIndex] != LATTE_DECOMPILER_SAMPLER_NONE )
{
cemu_assert_debug(false);
}
shaderContext->output->textureUnitMask[texInstruction.textureFetch.textureIndex] = true;
shader->textureUnitSamplerAssignment[texInstruction.textureFetch.textureIndex] = texInstruction.textureFetch.samplerIndex;
if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ)
shader->textureUsesDepthCompare[texInstruction.textureFetch.textureIndex] = true;
bool useTexelCoords = false;
if (texInstruction.opcode == GPU7_TEX_INST_SAMPLE && (texInstruction.textureFetch.unnormalized[0] && texInstruction.textureFetch.unnormalized[1] && texInstruction.textureFetch.unnormalized[2] && texInstruction.textureFetch.unnormalized[3]))
useTexelCoords = true;
else if (texInstruction.opcode == GPU7_TEX_INST_LD)
useTexelCoords = true;
if (useTexelCoords)
{
shaderContext->analyzer.texUnitUsesTexelCoordinates.set(texInstruction.textureFetch.textureIndex);
}
}
else if( texInstruction.opcode == GPU7_TEX_INST_GET_COMP_TEX_LOD || texInstruction.opcode == GPU7_TEX_INST_GET_TEXTURE_RESINFO )
{
if( texInstruction.textureFetch.textureIndex < 0 || texInstruction.textureFetch.textureIndex >= LATTE_NUM_MAX_TEX_UNITS )
debugBreakpoint();
if( texInstruction.textureFetch.samplerIndex != 0 )
debugBreakpoint(); // sampler is ignored and should be 0
shaderContext->output->textureUnitMask[texInstruction.textureFetch.textureIndex] = true;
}
else if( texInstruction.opcode == GPU7_TEX_INST_SET_CUBEMAP_INDEX )
{
// no analysis required
}
else if (texInstruction.opcode == GPU7_TEX_INST_GET_GRADIENTS_H || texInstruction.opcode == GPU7_TEX_INST_GET_GRADIENTS_V)
{
// no analysis required
}
else if (texInstruction.opcode == GPU7_TEX_INST_SET_GRADIENTS_H || texInstruction.opcode == GPU7_TEX_INST_SET_GRADIENTS_V)
{
shaderContext->analyzer.hasGradientLookup = true;
}
else if( texInstruction.opcode == GPU7_TEX_INST_VFETCH )
{
// VFETCH is used to access uniform buffers dynamically
if( texInstruction.textureFetch.textureIndex >= 0x80 && texInstruction.textureFetch.textureIndex <= 0x8F )
{
uint32 uniformBufferIndex = texInstruction.textureFetch.textureIndex - 0x80;
shaderContext->analyzer.uniformBufferAccessTracker[uniformBufferIndex].TrackAccess(0, true);
}
else if( texInstruction.textureFetch.textureIndex == 0x9F && shader->shaderType == LatteConst::ShaderType::Geometry )
{
// instruction to read geometry shader input from ringbuffer
}
else
debugBreakpoint();
}
else if (texInstruction.opcode == GPU7_TEX_INST_MEM)
{
// SSBO access
shaderContext->analyzer.hasSSBORead = true;
}
else
debugBreakpoint();
// mark read and written registers as used
if(texInstruction.dstGpr < LATTE_NUM_GPR)
shaderContext->analyzer.gprUseMask[texInstruction.dstGpr/8] |= (1<<(texInstruction.dstGpr%8));
if(texInstruction.srcGpr < LATTE_NUM_GPR)
shaderContext->analyzer.gprUseMask[texInstruction.srcGpr/8] |= (1<<(texInstruction.srcGpr%8));
}
}
/*
* Analyze export CF instruction
*/
void LatteDecompiler_analyzeExport(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction)
{
LatteDecompilerShader* shader = shaderContext->shader;
if( shader->shaderType == LatteConst::ShaderType::Pixel )
{
if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase < 8 )
{
// remember color outputs that are written
for(uint32 i=0; i<(cfInstruction->exportBurstCount+1); i++)
{
sint32 colorOutputIndex = LatteDecompiler_getColorOutputIndexFromExportIndex(shaderContext, cfInstruction->exportArrayBase+i);
shader->pixelColorOutputMask |= (1<<colorOutputIndex);
}
}
else if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase == 61 )
{
// writes pixel depth
}
else
debugBreakpoint();
}
else if (shader->shaderType == LatteConst::ShaderType::Vertex)
{
if (cfInstruction->exportType == 2 && cfInstruction->exportArrayBase < 32)
{
shaderContext->shader->outputParameterMask |= (1<<cfInstruction->exportArrayBase);
}
else if (cfInstruction->exportType == 1 && cfInstruction->exportArrayBase == GPU7_DECOMPILER_CF_EXPORT_POINT_SIZE)
{
shaderContext->analyzer.writesPointSize = true;
}
}
// mark input GPRs as used
for(uint32 i=0; i<(cfInstruction->exportBurstCount+1); i++)
{
shaderContext->analyzer.gprUseMask[(cfInstruction->exportSourceGPR+i)/8] |= (1<<((cfInstruction->exportSourceGPR+i)%8));
}
}
void LatteDecompiler_analyzeSubroutine(LatteDecompilerShaderContext* shaderContext, uint32 cfAddr)
{
// analyze CF and clauses up to RET statement
// todo - find cfInstruction index from cfAddr
cemu_assert_debug(false);
for(auto& cfInstruction : shaderContext->cfInstructions)
{
if (cfInstruction.type == GPU7_CF_INST_ALU || cfInstruction.type == GPU7_CF_INST_ALU_PUSH_BEFORE || cfInstruction.type == GPU7_CF_INST_ALU_POP_AFTER || cfInstruction.type == GPU7_CF_INST_ALU_POP2_AFTER || cfInstruction.type == GPU7_CF_INST_ALU_BREAK || cfInstruction.type == GPU7_CF_INST_ALU_ELSE_AFTER)
{
LatteDecompiler_analyzeALUClause(shaderContext, &cfInstruction);
}
else if (cfInstruction.type == GPU7_CF_INST_TEX)
{
LatteDecompiler_analyzeTEXClause(shaderContext, &cfInstruction);
}
else if (cfInstruction.type == GPU7_CF_INST_EXPORT || cfInstruction.type == GPU7_CF_INST_EXPORT_DONE)
{
LatteDecompiler_analyzeExport(shaderContext, &cfInstruction);
}
else if (cfInstruction.type == GPU7_CF_INST_ELSE || cfInstruction.type == GPU7_CF_INST_POP)
{
shaderContext->analyzer.modifiesPixelActiveState = true;
}
else if (cfInstruction.type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction.type == GPU7_CF_INST_LOOP_END ||
cfInstruction.type == GPU7_CF_INST_LOOP_START_NO_AL)
{
shaderContext->analyzer.modifiesPixelActiveState = true;
}
else if (cfInstruction.type == GPU7_CF_INST_LOOP_BREAK)
{
shaderContext->analyzer.modifiesPixelActiveState = true;
}
else if (cfInstruction.type == GPU7_CF_INST_EMIT_VERTEX)
{
// nothing to analyze
}
else if (cfInstruction.type == GPU7_CF_INST_CALL)
{
cemu_assert_debug(false); // CALLs inside subroutines are still todo
}
else
{
cemu_assert_unimplemented();
}
}
}
namespace LatteDecompiler
{
void _initTextureBindingPointsGL(LatteDecompilerShaderContext* decompilerContext)
{
// for OpenGL we use the relative texture unit index
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
{
if (!decompilerContext->output->textureUnitMask[i])
continue;
sint32 textureBindingPoint;
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
textureBindingPoint = i + LATTE_CEMU_VS_TEX_UNIT_BASE;
else if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
textureBindingPoint = i + LATTE_CEMU_GS_TEX_UNIT_BASE;
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
textureBindingPoint = i + LATTE_CEMU_PS_TEX_UNIT_BASE;
decompilerContext->output->resourceMappingGL.textureUnitToBindingPoint[i] = textureBindingPoint;
}
}
void _initTextureBindingPointsVK(LatteDecompilerShaderContext* decompilerContext)
{
// for Vulkan we use consecutive indices
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
{
if (!decompilerContext->output->textureUnitMask[i])
continue;
decompilerContext->output->resourceMappingVK.textureUnitToBindingPoint[i] = decompilerContext->currentBindingPointVK;
decompilerContext->currentBindingPointVK++;
}
}
void _initHasUniformVarBlock(LatteDecompilerShaderContext* decompilerContext)
{
decompilerContext->hasUniformVarBlock = false;
if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED)
decompilerContext->hasUniformVarBlock = true;
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
decompilerContext->hasUniformVarBlock = true;
bool hasAnyViewportScaleDisabled =
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() ||
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Y_SCALE_ENA() ||
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Z_SCALE_ENA();
// we currently only support all on/off. Individual component scaling is not supported
cemu_assert_debug(decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() == !hasAnyViewportScaleDisabled);
cemu_assert_debug(decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Y_SCALE_ENA() == !hasAnyViewportScaleDisabled);
cemu_assert_debug(decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Z_SCALE_ENA() == !hasAnyViewportScaleDisabled);
cemu_assert_debug(decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA() == !hasAnyViewportScaleDisabled);
cemu_assert_debug(decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Y_OFFSET_ENA() == !hasAnyViewportScaleDisabled);
cemu_assert_debug(decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Z_OFFSET_ENA() == !hasAnyViewportScaleDisabled);
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && hasAnyViewportScaleDisabled)
decompilerContext->hasUniformVarBlock = true; // uf_windowSpaceToClipSpaceTransform
bool alphaTestEnable = decompilerContext->contextRegistersNew->SX_ALPHA_TEST_CONTROL.get_ALPHA_TEST_ENABLE();
if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel && alphaTestEnable != 0)
decompilerContext->hasUniformVarBlock = true; // uf_alphaTestRef
if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
decompilerContext->hasUniformVarBlock = true; // uf_fragCoordScale
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->analyzer.outputPointSize && decompilerContext->analyzer.writesPointSize == false)
decompilerContext->hasUniformVarBlock = true; // uf_pointSize
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry && decompilerContext->analyzer.outputPointSize && decompilerContext->analyzer.writesPointSize == false)
decompilerContext->hasUniformVarBlock = true; // uf_pointSize
if (decompilerContext->analyzer.useSSBOForStreamout &&
(decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) ||
(decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
{
decompilerContext->hasUniformVarBlock = true; // uf_verticesPerInstance and uf_streamoutBufferBase*
}
}
void _initUniformBindingPoints(LatteDecompilerShaderContext* decompilerContext)
{
// check if uniform vars block has at least one variable
_initHasUniformVarBlock(decompilerContext);
if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
{
for (sint32 t = 0; t < LATTE_NUM_MAX_TEX_UNITS; t++)
{
if (decompilerContext->analyzer.texUnitUsesTexelCoordinates.test(t) == false)
continue;
decompilerContext->hasUniformVarBlock = true; // uf_tex%dScale
}
}
// assign binding point to uniform var block
decompilerContext->output->resourceMappingGL.uniformVarsBufferBindingPoint = -1; // OpenGL currently doesnt use a uniform block
if (decompilerContext->hasUniformVarBlock)
{
decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = decompilerContext->currentBindingPointVK;
decompilerContext->currentBindingPointVK++;
}
else
decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = -1;
// assign binding points to uniform buffers
if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
{
// for Vulkan we use consecutive indices
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
{
if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
continue;
sint32 uniformBindingPoint = i;
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
uniformBindingPoint += 64;
else if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
uniformBindingPoint += 0;
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
uniformBindingPoint += 32;
decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] = decompilerContext->currentBindingPointVK;
decompilerContext->currentBindingPointVK++;
}
// for OpenGL we use the relative buffer index
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
{
if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
continue;
sint32 uniformBindingPoint = i;
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
uniformBindingPoint += 64;
else if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
uniformBindingPoint += 0;
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
uniformBindingPoint += 32;
decompilerContext->output->resourceMappingGL.uniformBuffersBindingPoint[i] = uniformBindingPoint;
}
}
// shader storage buffer for alternative transform feedback path
if (decompilerContext->analyzer.useSSBOForStreamout)
{
decompilerContext->output->resourceMappingVK.tfStorageBindingPoint = decompilerContext->currentBindingPointVK;
decompilerContext->currentBindingPointVK++;
}
}
void _initAttributeBindingPoints(LatteDecompilerShaderContext* decompilerContext)
{
if (decompilerContext->shaderType != LatteConst::ShaderType::Vertex)
return;
// create input attribute binding mapping
// OpenGL and Vulkan use consecutive indices starting at 0
sint8 bindingIndex = 0;
for (sint32 i = 0; i < LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS; i++)
{
if (decompilerContext->analyzer.inputAttributSemanticMask[i])
{
decompilerContext->output->resourceMappingGL.attributeMapping[i] = bindingIndex;
decompilerContext->output->resourceMappingVK.attributeMapping[i] = bindingIndex;
bindingIndex++;
}
}
}
}
/*
* Analyze the shader program
* This will help to determine:
* 1) Uniform usage
* 2) Texture usage
* 3) Data types
* 4) CF stack and execution flow
*/
void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader)
{
// analyze render state
shaderContext->analyzer.isPointsPrimitive = shaderContext->contextRegistersNew->VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE() == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS;
shaderContext->analyzer.hasStreamoutEnable = shaderContext->contextRegisters[mmVGT_STRMOUT_EN] != 0; // set if the shader is used for transform feedback operations
if (shaderContext->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader)
shaderContext->analyzer.outputPointSize = shaderContext->analyzer.isPointsPrimitive;
else if (shaderContext->shaderType == LatteConst::ShaderType::Geometry)
{
uint32 gsOutPrimType = shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE];
if (gsOutPrimType == 0) // points
shaderContext->analyzer.outputPointSize = true;
}
// analyze input attributes for vertex/geometry shader
if (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)
{
if(shaderContext->fetchShader)
{
LatteFetchShader* parsedFetchShader = shaderContext->fetchShader;
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
{
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
{
uint8 semanticId = bufferGroup.attrib[i].semanticId;
if (semanticId == 0xFF)
{
// unused attribute? Found in Hot Wheels: World's best driver
continue;
}
cemu_assert_debug(semanticId < 0x80);
shaderContext->analyzer.inputAttributSemanticMask[semanticId] = true;
}
}
}
}
// list of subroutines (call destinations)
std::vector<uint32> list_subroutineAddrs;
// analyze CF and clauses
for(auto& cfInstruction : shaderContext->cfInstructions)
{
if (cfInstruction.type == GPU7_CF_INST_ALU || cfInstruction.type == GPU7_CF_INST_ALU_PUSH_BEFORE || cfInstruction.type == GPU7_CF_INST_ALU_POP_AFTER || cfInstruction.type == GPU7_CF_INST_ALU_POP2_AFTER || cfInstruction.type == GPU7_CF_INST_ALU_BREAK || cfInstruction.type == GPU7_CF_INST_ALU_ELSE_AFTER)
{
LatteDecompiler_analyzeALUClause(shaderContext, &cfInstruction);
}
else if (cfInstruction.type == GPU7_CF_INST_TEX)
{
LatteDecompiler_analyzeTEXClause(shaderContext, &cfInstruction);
}
else if (cfInstruction.type == GPU7_CF_INST_EXPORT || cfInstruction.type == GPU7_CF_INST_EXPORT_DONE)
{
LatteDecompiler_analyzeExport(shaderContext, &cfInstruction);
}
else if (cfInstruction.type == GPU7_CF_INST_ELSE || cfInstruction.type == GPU7_CF_INST_POP)
{
shaderContext->analyzer.modifiesPixelActiveState = true;
}
else if (cfInstruction.type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction.type == GPU7_CF_INST_LOOP_END ||
cfInstruction.type == GPU7_CF_INST_LOOP_START_NO_AL)
{
shaderContext->analyzer.modifiesPixelActiveState = true;
shaderContext->analyzer.hasLoops = true;
}
else if (cfInstruction.type == GPU7_CF_INST_LOOP_BREAK)
{
shaderContext->analyzer.modifiesPixelActiveState = true;
shaderContext->analyzer.hasLoops = true;
}
else if (cfInstruction.type == GPU7_CF_INST_MEM_STREAM0_WRITE ||
cfInstruction.type == GPU7_CF_INST_MEM_STREAM1_WRITE)
{
uint32 streamoutBufferIndex;
if (cfInstruction.type == GPU7_CF_INST_MEM_STREAM0_WRITE)
streamoutBufferIndex = 0;
else if (cfInstruction.type == GPU7_CF_INST_MEM_STREAM1_WRITE)
streamoutBufferIndex = 1;
else
cemu_assert_debug(false);
shaderContext->analyzer.hasStreamoutWrite = true;
cemu_assert(streamoutBufferIndex < shaderContext->output->streamoutBufferWriteMask.size());
shaderContext->output->streamoutBufferWriteMask[streamoutBufferIndex] = true;
uint32 vectorWriteSize = 0;
for (sint32 f = 0; f < 4; f++)
{
if ((cfInstruction.memWriteCompMask & (1 << f)) != 0)
vectorWriteSize = (f + 1) * 4;
shaderContext->output->streamoutBufferStride[f] = shaderContext->contextRegisters[mmVGT_STRMOUT_VTX_STRIDE_0 + f * 4] << 2;
}
cemu_assert_debug((cfInstruction.exportArrayBase * 4 + vectorWriteSize) <= shaderContext->output->streamoutBufferStride[streamoutBufferIndex]);
}
else if (cfInstruction.type == GPU7_CF_INST_MEM_RING_WRITE)
{
// track number of parameters that are output (simplified by just tracking the offset of the last one)
if (cfInstruction.memWriteElemSize != 3)
debugBreakpoint();
if (cfInstruction.exportBurstCount != 0 && cfInstruction.memWriteElemSize != 3)
{
debugBreakpoint();
}
uint32 dwordWriteCount = (cfInstruction.exportBurstCount + 1) * 4;
uint32 numRingParameter = (cfInstruction.exportArrayBase + dwordWriteCount) / 4;
shader->ringParameterCount = std::max(shader->ringParameterCount, numRingParameter);
// mark input GPRs as used
for (uint32 i = 0; i < (cfInstruction.exportBurstCount + 1); i++)
{
shaderContext->analyzer.gprUseMask[(cfInstruction.exportSourceGPR + i) / 8] |= (1 << ((cfInstruction.exportSourceGPR + i) % 8));
}
}
else if (cfInstruction.type == GPU7_CF_INST_EMIT_VERTEX)
{
shaderContext->analyzer.numEmitVertex++;
}
else if (cfInstruction.type == GPU7_CF_INST_CALL)
{
// CALL instruction does not need analyzing
// and subroutines are analyzed separately
}
else
cemu_assert_unimplemented();
}
// analyze subroutines
for (auto subroutineAddr : list_subroutineAddrs)
{
LatteDecompiler_analyzeSubroutine(shaderContext, subroutineAddr);
}
// decide which uniform mode to use
bool hasAnyDynamicBufferAccess = false;
bool hasAnyBufferAccess = false;
for(auto& it : shaderContext->analyzer.uniformBufferAccessTracker)
{
if( it.HasRelativeAccess() )
hasAnyDynamicBufferAccess = true;
if( it.HasAccess() )
hasAnyBufferAccess = true;
}
if (hasAnyDynamicBufferAccess)
{
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
}
else if(shaderContext->analyzer.uniformRegisterAccessTracker.HasRelativeAccess() )
{
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE;
}
else if(hasAnyBufferAccess || shaderContext->analyzer.uniformRegisterAccessTracker.HasAccess() )
{
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED;
}
else
{
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_NONE;
}
// generate compact list of uniform buffers (for faster access)
cemu_assert_debug(shader->list_quickBufferList.empty());
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
{
if( !shaderContext->analyzer.uniformBufferAccessTracker[i].HasAccess() )
continue;
LatteDecompilerShader::QuickBufferEntry entry;
entry.index = i;
entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(shaderContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16;
shader->list_quickBufferList.push_back(entry);
}
// get dimension of each used texture
_LatteRegisterSetTextureUnit* texRegs = nullptr;
if( shader->shaderType == LatteConst::ShaderType::Vertex )
texRegs = shaderContext->contextRegistersNew->SQ_TEX_START_VS;
else if( shader->shaderType == LatteConst::ShaderType::Pixel )
texRegs = shaderContext->contextRegistersNew->SQ_TEX_START_PS;
else if( shader->shaderType == LatteConst::ShaderType::Geometry )
texRegs = shaderContext->contextRegistersNew->SQ_TEX_START_GS;
for(sint32 i=0; i<LATTE_NUM_MAX_TEX_UNITS; i++)
{
if (!shaderContext->output->textureUnitMask[i])
{
// texture unit not used
shader->textureUnitDim[i] = (Latte::E_DIM)0xFF;
continue;
}
auto& texUnit = texRegs[i];
auto dim = texUnit.word0.get_DIM();
shader->textureUnitDim[i] = dim;
if(dim == Latte::E_DIM::DIM_CUBEMAP)
shaderContext->analyzer.hasCubeMapTexture = true;
shader->textureIsIntegerFormat[i] = texUnit.word4.get_NUM_FORM_ALL() == Latte::LATTE_SQ_TEX_RESOURCE_WORD4_N::E_NUM_FORMAT_ALL::NUM_FORMAT_INT;
}
// generate list of used texture units
shader->textureUnitListCount = 0;
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
{
if (shaderContext->output->textureUnitMask[i])
{
shader->textureUnitList[shader->textureUnitListCount] = i;
shader->textureUnitListCount++;
}
}
// for geometry shaders check the copy shader for stream writes
if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false)
{
shaderContext->analyzer.hasStreamoutWrite = true;
if (shaderContext->contextRegisters[mmVGT_STRMOUT_EN] != 0)
shaderContext->analyzer.hasStreamoutEnable = true;
for (auto& it : shaderContext->parsedGSCopyShader->list_streamWrites)
{
shaderContext->output->streamoutBufferWriteMask[it.bufferIndex] = true;
uint32 vectorWriteSize = 0;
for (sint32 f = 0; f < 4; f++)
{
if ((it.memWriteCompMask&(1 << f)) != 0)
vectorWriteSize = (f + 1) * 4;
}
shaderContext->output->streamoutBufferStride[it.bufferIndex] = std::max(shaderContext->output->streamoutBufferStride[it.bufferIndex], it.exportArrayBase * 4 + vectorWriteSize);
}
}
// analyze input attributes again (if shader has relative GPR read)
if(shaderContext->analyzer.usesRelativeGPRRead && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry) )
{
if(shaderContext->fetchShader)
{
LatteFetchShader* parsedFetchShader = shaderContext->fetchShader;
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
{
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
{
uint32 registerIndex;
// get register index based on vtx semantic table
uint32 attributeShaderLoc = 0xFFFFFFFF;
for (sint32 f = 0; f < 32; f++)
{
if (shaderContext->contextRegisters[mmSQ_VTX_SEMANTIC_0 + f] == bufferGroup.attrib[i].semanticId)
{
attributeShaderLoc = f;
break;
}
}
if (attributeShaderLoc == 0xFFFFFFFF)
continue; // attribute is not mapped to VS input
registerIndex = attributeShaderLoc + 1;
shaderContext->analyzer.gprUseMask[registerIndex / 8] |= (1 << (registerIndex % 8));
}
}
}
}
else if (shaderContext->analyzer.usesRelativeGPRRead && shader->shaderType == LatteConst::ShaderType::Pixel)
{
// mark pixel shader inputs as used if there is any relative GPR access
LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable();
for (sint32 i = 0; i < psInputTable->count; i++)
{
shaderContext->analyzer.gprUseMask[i / 8] |= (1 << (i % 8));
}
}
// analyze CF stack
sint32 cfCurrentStackDepth = 0;
sint32 cfCurrentMaxStackDepth = 0;
for(auto& cfInstruction : shaderContext->cfInstructions)
{
if (cfInstruction.type == GPU7_CF_INST_ALU)
{
// no effect on stack depth
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else if (cfInstruction.type == GPU7_CF_INST_ALU_PUSH_BEFORE )
{
cfCurrentStackDepth++;
cfCurrentMaxStackDepth = std::max(cfCurrentMaxStackDepth, cfCurrentStackDepth);
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else if (cfInstruction.type == GPU7_CF_INST_ALU_POP_AFTER)
{
cfInstruction.activeStackDepth = cfCurrentStackDepth;
cfCurrentStackDepth--;
}
else if (cfInstruction.type == GPU7_CF_INST_ALU_POP2_AFTER)
{
cfInstruction.activeStackDepth = cfCurrentStackDepth;
cfCurrentStackDepth -= 2;
}
else if (cfInstruction.type == GPU7_CF_INST_ALU_BREAK )
{
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else if (cfInstruction.type == GPU7_CF_INST_ALU_ELSE_AFTER)
{
if (cfInstruction.popCount != 0)
debugBreakpoint();
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else if (cfInstruction.type == GPU7_CF_INST_ELSE )
{
//if (cfInstruction.popCount != 0)
// debugBreakpoint(); -> Only relevant when ELSE jump is taken
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else if (cfInstruction.type == GPU7_CF_INST_POP)
{
cfInstruction.activeStackDepth = cfCurrentStackDepth;
cfCurrentStackDepth -= cfInstruction.popCount;
if (cfCurrentStackDepth < 0)
debugBreakpoint();
}
else if (cfInstruction.type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction.type == GPU7_CF_INST_LOOP_END ||
cfInstruction.type == GPU7_CF_INST_LOOP_START_NO_AL)
{
// no effect on stack depth
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else if (cfInstruction.type == GPU7_CF_INST_LOOP_BREAK)
{
// since we assume that the break is not taken (for all pixels), we also don't need to worry about the stack depth adjustment
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else if (cfInstruction.type == GPU7_CF_INST_TEX)
{
// no effect on stack depth
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else if (cfInstruction.type == GPU7_CF_INST_EXPORT || cfInstruction.type == GPU7_CF_INST_EXPORT_DONE)
{
// no effect on stack depth
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else if (cfInstruction.type == GPU7_CF_INST_MEM_STREAM0_WRITE ||
cfInstruction.type == GPU7_CF_INST_MEM_STREAM1_WRITE)
{
// no effect on stack depth
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else if (cfInstruction.type == GPU7_CF_INST_MEM_RING_WRITE)
{
// no effect on stack depth
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else if (cfInstruction.type == GPU7_CF_INST_EMIT_VERTEX)
{
// no effect on stack depth
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else if (cfInstruction.type == GPU7_CF_INST_CALL)
{
// no effect on stack depth
cfInstruction.activeStackDepth = cfCurrentStackDepth;
}
else
{
cemu_assert_debug(false);
}
}
shaderContext->analyzer.activeStackMaxDepth = cfCurrentMaxStackDepth;
if (cfCurrentStackDepth != 0)
{
debug_printf("cfCurrentStackDepth is not zero after all CF instructions. depth is %d\n", cfCurrentStackDepth);
cemu_assert_debug(false);
}
if(list_subroutineAddrs.empty() == false)
cemuLog_logDebug(LogType::Force, "Todo - analyze shader subroutine CF stack");
// TF mode
if (shaderContext->options->useTFViaSSBO && shaderContext->output->streamoutBufferWriteMask.any())
{
shaderContext->analyzer.useSSBOForStreamout = true;
}
// assign binding points
if (shaderContext->shaderType == LatteConst::ShaderType::Vertex)
shaderContext->output->resourceMappingVK.setIndex = 0;
else if (shaderContext->shaderType == LatteConst::ShaderType::Pixel)
shaderContext->output->resourceMappingVK.setIndex = 1;
else if (shaderContext->shaderType == LatteConst::ShaderType::Geometry)
shaderContext->output->resourceMappingVK.setIndex = 2;
LatteDecompiler::_initTextureBindingPointsGL(shaderContext);
LatteDecompiler::_initTextureBindingPointsVK(shaderContext);
LatteDecompiler::_initUniformBindingPoints(shaderContext);
LatteDecompiler::_initAttributeBindingPoints(shaderContext);
}