Latte: Optimize uniform register array size for known shaders

This commit is contained in:
Exzap 2024-02-19 12:07:03 +01:00
parent 96bbd3bd25
commit 72ce4838ea
4 changed files with 15 additions and 11 deletions

View file

@ -652,7 +652,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
}
else
{
shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsVK.count_uniformRegister;
shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsGL.count_uniformRegister;
}
// calculate aux hash
if (calculateAuxHash)

View file

@ -787,7 +787,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
continue;
LatteDecompilerShader::QuickBufferEntry entry;
entry.index = i;
entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16;
entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(shaderContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16;
shader->list_quickBufferList.push_back(entry);
}
// get dimension of each used texture

View file

@ -37,7 +37,7 @@ namespace LatteDecompiler
}
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
{
uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(256);
uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(decompilerContext->shaderBaseHash, 256);
// full or partial uniform register file has to be present
if (shaderType == LatteConst::ShaderType::Vertex)
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize);
@ -156,7 +156,7 @@ namespace LatteDecompiler
shaderSrc->addFmt("uniform {}{}" _CRLF, _getShaderUniformBlockInterfaceName(decompilerContext->shaderType), i);
shaderSrc->add("{" _CRLF);
shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE));
shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(decompilerContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE));
shaderSrc->add("};" _CRLF _CRLF);
shaderSrc->add(_CRLF);
}

View file

@ -157,19 +157,23 @@ struct LatteDecompilerBufferAccessTracker
}
}
sint32 DetermineSize(sint32 maximumSize) const
sint32 DetermineSize(uint64 shaderBaseHash, sint32 maximumSize) const
{
// here we try to predict the accessed range so we dont have to upload the whole buffer
// potential risky optimization: assume that if there is a fixed-index access on an index higher than any other non-zero relative accesses, it bounds the prior relative access
// here we try to predict the accessed byte range so we dont have to upload the whole buffer
// if no bound can be determined then return maximumSize
// for some known shaders we use hand-tuned values instead of the maximumSize fallback value that those shaders would normally use
if(shaderBaseHash == 0x8ff56afdf1a2f837) // XCX text rendering
return 24;
if(shaderBaseHash == 0x37b9100c1310d3bb) // BotW UI backdrops 1
return 24;
if(shaderBaseHash == 0xf7ba548c1fefe24a) // BotW UI backdrops 2
return 30;
sint32 highestAccessIndex = -1;
if(hasStaticIndexAccess)
{
highestAccessIndex = highestAccessStaticIndex;
}
if(hasDynamicIndexAccess)
{
return maximumSize; // dynamic index exists and no bound can be determined
}
if (highestAccessIndex < 0)
return 1; // no access at all? But avoid zero as a size
return highestAccessIndex + 1;