Latte: Optimize uniform register array size for known shaders
This commit is contained in:
parent
96bbd3bd25
commit
72ce4838ea
4 changed files with 15 additions and 11 deletions
|
@ -652,7 +652,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
|
|||
}
|
||||
else
|
||||
{
|
||||
shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsVK.count_uniformRegister;
|
||||
shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsGL.count_uniformRegister;
|
||||
}
|
||||
// calculate aux hash
|
||||
if (calculateAuxHash)
|
||||
|
|
|
@ -787,7 +787,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
|||
continue;
|
||||
LatteDecompilerShader::QuickBufferEntry entry;
|
||||
entry.index = i;
|
||||
entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16;
|
||||
entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(shaderContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16;
|
||||
shader->list_quickBufferList.push_back(entry);
|
||||
}
|
||||
// get dimension of each used texture
|
||||
|
|
|
@ -37,7 +37,7 @@ namespace LatteDecompiler
|
|||
}
|
||||
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
|
||||
{
|
||||
uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(256);
|
||||
uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(decompilerContext->shaderBaseHash, 256);
|
||||
// full or partial uniform register file has to be present
|
||||
if (shaderType == LatteConst::ShaderType::Vertex)
|
||||
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize);
|
||||
|
@ -156,7 +156,7 @@ namespace LatteDecompiler
|
|||
|
||||
shaderSrc->addFmt("uniform {}{}" _CRLF, _getShaderUniformBlockInterfaceName(decompilerContext->shaderType), i);
|
||||
shaderSrc->add("{" _CRLF);
|
||||
shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE));
|
||||
shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(decompilerContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE));
|
||||
shaderSrc->add("};" _CRLF _CRLF);
|
||||
shaderSrc->add(_CRLF);
|
||||
}
|
||||
|
|
|
@ -157,19 +157,23 @@ struct LatteDecompilerBufferAccessTracker
|
|||
}
|
||||
}
|
||||
|
||||
sint32 DetermineSize(sint32 maximumSize) const
|
||||
sint32 DetermineSize(uint64 shaderBaseHash, sint32 maximumSize) const
|
||||
{
|
||||
// here we try to predict the accessed range so we dont have to upload the whole buffer
|
||||
// potential risky optimization: assume that if there is a fixed-index access on an index higher than any other non-zero relative accesses, it bounds the prior relative access
|
||||
// here we try to predict the accessed byte range so we dont have to upload the whole buffer
|
||||
// if no bound can be determined then return maximumSize
|
||||
// for some known shaders we use hand-tuned values instead of the maximumSize fallback value that those shaders would normally use
|
||||
if(shaderBaseHash == 0x8ff56afdf1a2f837) // XCX text rendering
|
||||
return 24;
|
||||
if(shaderBaseHash == 0x37b9100c1310d3bb) // BotW UI backdrops 1
|
||||
return 24;
|
||||
if(shaderBaseHash == 0xf7ba548c1fefe24a) // BotW UI backdrops 2
|
||||
return 30;
|
||||
|
||||
sint32 highestAccessIndex = -1;
|
||||
if(hasStaticIndexAccess)
|
||||
{
|
||||
highestAccessIndex = highestAccessStaticIndex;
|
||||
}
|
||||
if(hasDynamicIndexAccess)
|
||||
{
|
||||
return maximumSize; // dynamic index exists and no bound can be determined
|
||||
}
|
||||
if (highestAccessIndex < 0)
|
||||
return 1; // no access at all? But avoid zero as a size
|
||||
return highestAccessIndex + 1;
|
||||
|
|
Loading…
Reference in a new issue