[Bayonetta 2] Fix aspect ratio patch to not break Vulkan

Fixes https://github.com/slashiee/cemu_graphic_packs/issues/437

The aspect ratio mod apparently relied more on a shotgun approach, which caused major issues with Vulkan.
Using @getdls addresses, I found out that 2 of the addresses that were being patched to jump to the code cave were unrelated to the aspect ratio, so I had those removed.

Those random jumps probably didn't cause issues because they would jump to code that would just utilize floating point registers that were in-use and then put the result in the wrong register too. While the picked registers would work with the 3D rendering aspect ratio instruction, it wouldn't fail for the other registers since they weren't specific to those.

Anyway, the proper fix was to just make a second code cave for the aspect ratio, which outputted the culling ratio in the proper register. And also make sure it didn't use in-use registers.

I also reverted some of the shader code changes since they are inconsistent with how we've done them for all the other packs. And made them more compatible with the ultrawide resolutions.
This commit is contained in:
Crementif 2020-05-24 18:09:28 +02:00
parent 7e56ff1272
commit 3f87a42f4f
No known key found for this signature in database
GPG key ID: 11A98E7DE0412B33
6 changed files with 79 additions and 59 deletions

View file

@ -2,6 +2,10 @@
#extension GL_ARB_texture_gather : enable
#extension GL_ARB_separate_shader_objects : enable
// shader 43a2239f07af804e
// Used for: Horizontal Blur
float resXScale = float($width)/float($gameWidth);
// start of shader inputs/outputs, predetermined by Cemu. Do not touch
#ifdef VULKAN
#define ATTR_LAYOUT(__vkSet, __location) layout(set = __vkSet, location = __location)
@ -27,7 +31,6 @@ uniform vec2 uf_fragCoordScale;
TEXTURE_LAYOUT(0, 1, 0) uniform sampler2D textureUnitPS0;
layout(location = 0) in vec4 passParameterSem133;
layout(location = 0) out vec4 passPixelColor0;
const float resScale = ($gameWidth / $width);
// end of shader inputs/outputs
int clampFI32(int v)
{
@ -64,17 +67,17 @@ R4f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw);
R5f.x = intBitsToFloat(uf_remappedPS[0].x) + -(intBitsToFloat(uf_remappedPS[1].x));
R5f.w = intBitsToFloat(uf_remappedPS[0].y) + -(intBitsToFloat(uf_remappedPS[1].y));
// 1
R1f.x = R0f.x + intBitsToFloat(0x3c333333)*resScale;
R1f.x = R0f.x + (intBitsToFloat(0x3c333333)/resXScale);
R1f.y = R0f.y;
R5f.z = intBitsToFloat(uf_remappedPS[0].z) + -(intBitsToFloat(uf_remappedPS[1].z));
R2f.w = R0f.y;
R2f.x = R0f.x + intBitsToFloat(0x3b99999a)*resScale;
R2f.x = R0f.x + (intBitsToFloat(0x3b99999a)/resXScale);
PS1f = R2f.x;
// 2
backupReg0f = R0f.x;
R3f.x = R0f.x + -(intBitsToFloat(0x3c333333)*resScale);
R3f.x = R0f.x + -(intBitsToFloat(0x3c333333)/resXScale);
R3f.y = R0f.y;
R0f.x = backupReg0f + -(intBitsToFloat(0x3b99999a)*resScale);
R0f.x = backupReg0f + -(intBitsToFloat(0x3b99999a)/resXScale);
PS0f = R0f.x;
R1f.xyzw = (texture(textureUnitPS0, R1f.xy).xyzw);
R2f.xyzw = (texture(textureUnitPS0, R2f.xw).xyzw);

View file

@ -1,7 +1,11 @@
#version 430
#extension GL_ARB_texture_gather : enable
#extension GL_ARB_separate_shader_objects : enable
// shader 75387173950c1793//bloom vert
// shader 75387173950c1793
// Used for: Vertical Blur
float resYScale = float($height)/float($gameHeight);
// start of shader inputs/outputs, predetermined by Cemu. Do not touch
#ifdef VULKAN
#define ATTR_LAYOUT(__vkSet, __location) layout(set = __vkSet, location = __location)
@ -28,7 +32,6 @@ TEXTURE_LAYOUT(0, 1, 0) uniform sampler2D textureUnitPS0;
layout(location = 0) in vec4 passParameterSem133;
layout(location = 0) out vec4 passPixelColor0;
// end of shader inputs/outputs
const float resScale = float($gameWidth / $width);
int clampFI32(int v)
{
if( v == 0x7FFFFFFF )
@ -60,14 +63,14 @@ R0f = passParameterSem133;
R4f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw);
// 0
R1f.x = R0f.x;
R1f.y = R0f.y + intBitsToFloat(0x3ca2e8ba)*resScale;
R1f.y = R0f.y + (intBitsToFloat(0x3ca2e8ba)/resYScale);
R2f.z = R0f.x;
R2f.y = R0f.y + intBitsToFloat(0x3c0ba2e9)*resScale;
R2f.y = R0f.y + (intBitsToFloat(0x3c0ba2e9)/resYScale);
PS0f = R2f.y;
// 1
R3f.x = R0f.x;
R3f.y = R0f.y + -(intBitsToFloat(0x3ca2e8ba))*resScale;
R0f.z = R0f.y + -(intBitsToFloat(0x3c0ba2e9))*resScale;
R3f.y = R0f.y + -(intBitsToFloat(0x3ca2e8ba)/resYScale);
R0f.z = R0f.y + -(intBitsToFloat(0x3c0ba2e9)/resYScale);
R1f.xyzw = (texture(textureUnitPS0, R1f.xy).xyzw);
R2f.xyzw = (texture(textureUnitPS0, R2f.zy).xyzw);
R3f.xyzw = (texture(textureUnitPS0, R3f.xy).xyzw);

View file

@ -1,7 +1,12 @@
#version 430
#extension GL_ARB_texture_gather : enable
#extension GL_ARB_separate_shader_objects : enable
// shader 78a2659662685d55 //menu AA
// shader 78a2659662685d55
// Used for: Menu Anti-Aliasing and Specular Highlight?
float resXScale = float($width)/float($gameWidth);
float resYScale = float($height)/float($gameHeight);
// start of shader inputs/outputs, predetermined by Cemu. Do not touch
#ifdef VULKAN
#define ATTR_LAYOUT(__vkSet, __location) layout(set = __vkSet, location = __location)
@ -25,7 +30,7 @@ uniform vec2 uf_fragCoordScale;
TEXTURE_LAYOUT(0, 1, 0) uniform sampler2D textureUnitPS0;
layout(location = 0) in vec4 passParameterSem133;
layout(location = 0) out vec4 passPixelColor0;
const float resScale = ($gameWidth / $width);
// end of shader inputs/outputs
int clampFI32(int v)
{
@ -91,14 +96,14 @@ int cubeMapFaceId;
R0i = floatBitsToInt(passParameterSem133);
if( activeMaskStackC[1] == true ) {
// 0
R1i.xyz = floatBitsToInt(vec3(intBitsToFloat(R0i.x),intBitsToFloat(R0i.y),intBitsToFloat(R0i.x)) + vec3(0.0,0.0,intBitsToFloat(0xba99999a)*resScale));
R1i.xyz = floatBitsToInt(vec3(intBitsToFloat(R0i.x),intBitsToFloat(R0i.y),intBitsToFloat(R0i.x)) + vec3(0.0,0.0,(intBitsToFloat(0xba99999a)/resXScale)));
R1i.w = floatBitsToInt(intBitsToFloat(R0i.y) + 0.0);
R2i.x = floatBitsToInt(intBitsToFloat(R0i.x) + intBitsToFloat(0x3a99999a)*resScale);
R2i.x = floatBitsToInt(intBitsToFloat(R0i.x) + (intBitsToFloat(0x3a99999a)/resXScale);
PS0i = R2i.x;
// 1
R3i.x = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0);
R2i.y = floatBitsToInt(intBitsToFloat(R0i.y) + 0.0);
R3i.zwy = floatBitsToInt(vec3(intBitsToFloat(R0i.y),intBitsToFloat(R0i.x),intBitsToFloat(R0i.y)) + vec3(intBitsToFloat(0xbb088889)*resScale,0.0,intBitsToFloat(0x3b088889)*resScale));
R3i.zwy = floatBitsToInt(vec3(intBitsToFloat(R0i.y),intBitsToFloat(R0i.x),intBitsToFloat(R0i.y)) + vec3((intBitsToFloat(0xbb088889)/resYScale),0.0,(intBitsToFloat(0x3b088889)/resXScale)));
PS1i = R3i.y;
}
if( activeMaskStackC[1] == true ) {
@ -118,7 +123,7 @@ R127i.z = floatBitsToInt(intBitsToFloat(R8i.y) + intBitsToFloat(R9i.y));
R127i.z = floatBitsToInt(intBitsToFloat(R127i.z) * 2.0);
R127i.w = floatBitsToInt(intBitsToFloat(R8i.x) + intBitsToFloat(R9i.x));
R127i.w = floatBitsToInt(intBitsToFloat(R127i.w) * 2.0);
R3i.x = floatBitsToInt(intBitsToFloat(R0i.x) + intBitsToFloat(0xba4ccccd)*resScale);
R3i.x = floatBitsToInt(intBitsToFloat(R0i.x) + (intBitsToFloat(0xba4ccccd)/resXScale));
PS0i = R3i.x;
// 1
R5i.x = floatBitsToInt(intBitsToFloat(R10i.z) + intBitsToFloat(R11i.z));
@ -132,15 +137,15 @@ R4i.w = floatBitsToInt(intBitsToFloat(R4i.w) * 2.0);
R3i.y = floatBitsToInt(intBitsToFloat(R0i.y) + 0.0);
PS1i = R3i.y;
// 2
R2i.x = floatBitsToInt(intBitsToFloat(R0i.x) + intBitsToFloat(0x3a4ccccd)*resScale);
R2i.x = floatBitsToInt(intBitsToFloat(R0i.x) + (intBitsToFloat(0x3a4ccccd)/resXScale));
R2i.y = floatBitsToInt(intBitsToFloat(R0i.y) + 0.0);
R3i.z = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0);
R3i.w = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(0xbab60b61)*resScale);
R3i.w = floatBitsToInt(intBitsToFloat(R0i.y) + (intBitsToFloat(0xbab60b61)/resYScale));
R1i.x = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0);
PS0i = R1i.x;
// 3
R4i.x = floatBitsToInt((intBitsToFloat(R7i.y) * 2.0 + intBitsToFloat(R127i.z)));
R1i.y = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(0x3ab60b61)*resScale);
R1i.y = floatBitsToInt(intBitsToFloat(R0i.y) + (intBitsToFloat(0x3ab60b61)/resYScale));
R1i.z = floatBitsToInt((intBitsToFloat(R7i.x) * 2.0 + intBitsToFloat(R127i.w)));
R1i.w = floatBitsToInt((intBitsToFloat(R7i.z) * 2.0 + intBitsToFloat(R127i.y)));
R2i.w = floatBitsToInt((intBitsToFloat(R7i.w) * 2.0 + intBitsToFloat(R127i.x)));
@ -224,7 +229,7 @@ R124i.y = floatBitsToInt(intBitsToFloat(R125i.y) * intBitsToFloat(0x3e2aaaab));
R125i.z = floatBitsToInt(intBitsToFloat(R126i.x) * intBitsToFloat(0x3e2aaaab));
R123i.w = floatBitsToInt((intBitsToFloat(R7i.w) * 2.0 + intBitsToFloat(R4i.y)));
PV0i.w = R123i.w;
R126i.w = floatBitsToInt((intBitsToFloat(backupReg1i) * intBitsToFloat(0x40400000) + -(intBitsToFloat(0x3dcccccd)*resScale)));
R126i.w = floatBitsToInt((intBitsToFloat(backupReg1i) * intBitsToFloat(0x40400000) + -(intBitsToFloat(0x3dcccccd)/resYScale)));
PS0i = R126i.w;
// 9
backupReg0i = R125i.x;
@ -232,7 +237,7 @@ backupReg1i = R127i.w;
R125i.x = floatBitsToInt(-(intBitsToFloat(R7i.y)) + intBitsToFloat(R1i.y));
R125i.y = floatBitsToInt(-(intBitsToFloat(R7i.z)) + intBitsToFloat(R127i.z));
R124i.z = floatBitsToInt(intBitsToFloat(PV0i.w) * intBitsToFloat(0x3e2aaaab));
R127i.w = floatBitsToInt((intBitsToFloat(backupReg0i) * intBitsToFloat(0x40400000) + -(intBitsToFloat(0x3dcccccd)*resScale)));
R127i.w = floatBitsToInt((intBitsToFloat(backupReg0i) * intBitsToFloat(0x40400000) + -(intBitsToFloat(0x3dcccccd)/resYScale)));
R126i.z = floatBitsToInt(-(intBitsToFloat(R7i.w)) + intBitsToFloat(backupReg1i));
PS1i = R126i.z;
// 10
@ -255,8 +260,8 @@ PS1i = floatBitsToInt(1.0 / intBitsToFloat(PV0i.x));
R124i.x = floatBitsToInt(intBitsToFloat(R126i.w) * intBitsToFloat(PS1i));
R124i.x = clampFI32(R124i.x);
R1i.y = floatBitsToInt(intBitsToFloat(R0i.y) + 0.0);
R4i.z = floatBitsToInt(intBitsToFloat(R0i.x) + intBitsToFloat(0x3b333333)*resScale);
R1i.w = floatBitsToInt(intBitsToFloat(R0i.x) + intBitsToFloat(0x3b8ccccd)*resScale);
R4i.z = floatBitsToInt(intBitsToFloat(R0i.x) + (intBitsToFloat(0x3b333333)/resXScale));
R1i.w = floatBitsToInt(intBitsToFloat(R0i.x) + (intBitsToFloat(0x3b8ccccd)/resYScale));
PS0i = floatBitsToInt(1.0 / intBitsToFloat(PV1i.x));
// 13
backupReg0i = R0i.x;
@ -312,15 +317,15 @@ if( activeMaskStackC[1] == true ) {
// 0
R5i.x = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0);
R6i.y = floatBitsToInt(intBitsToFloat(R0i.y) + 0.0);
R5i.z = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(0x3b9f49f5)*resScale);
R5i.z = floatBitsToInt(intBitsToFloat(R0i.y) + (intBitsToFloat(0x3b9f49f5)/resXScale);
PV0i.w = floatBitsToInt(intBitsToFloat(R9i.w) + intBitsToFloat(R17i.w));
R3i.x = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0);
PS0i = R3i.x;
// 1
R1i.x = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0);
R3i.y = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(0x3bfa4fa5)*resScale);
R3i.y = floatBitsToInt(intBitsToFloat(R0i.y) + (intBitsToFloat(0x3bfa4fa5)/resYScale));
R2i.z = floatBitsToInt(intBitsToFloat(R18i.w) + intBitsToFloat(PV0i.w));
R1i.w = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(0x3c2aaaab)*resScale);
R1i.w = floatBitsToInt(intBitsToFloat(R0i.y) + (intBitsToFloat(0x3c2aaaab)/resXScale));
R4i.x = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0);
PS1i = R4i.x;
}
@ -337,14 +342,14 @@ backupReg1i = R0i.x;
backupReg0i = R0i.y;
PV0i.x = floatBitsToInt(intBitsToFloat(R11i.w) + intBitsToFloat(R6i.w));
PV0i.y = floatBitsToInt(intBitsToFloat(R19i.w) + intBitsToFloat(R2i.z));
R4i.z = floatBitsToInt(intBitsToFloat(backupReg0i) + intBitsToFloat(0xbb9f49f5)*resScale);
R4i.z = floatBitsToInt(intBitsToFloat(backupReg0i) + (intBitsToFloat(0xbb9f49f5)/resXScale));
R1i.w = floatBitsToInt(intBitsToFloat(backupReg1i) + 0.0);
R1i.y = floatBitsToInt(intBitsToFloat(backupReg0i) + intBitsToFloat(0xbbfa4fa5)*resScale);
R1i.y = floatBitsToInt(intBitsToFloat(backupReg0i) + (intBitsToFloat(0xbbfa4fa5)/resYScale));
PS0i = R1i.y;
// 1
backupReg0i = R3i.w;
R1i.x = floatBitsToInt(intBitsToFloat(R8i.w) + intBitsToFloat(PV0i.y));
R2i.y = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(0xbc2aaaab)*resScale);
R2i.y = floatBitsToInt(intBitsToFloat(R0i.y) + (intBitsToFloat(0xbc2aaaab)/resYScale));
R2i.z = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0);
R3i.w = floatBitsToInt(intBitsToFloat(backupReg0i) + intBitsToFloat(PV0i.x));
}

View file

@ -1,7 +1,12 @@
#version 430
#extension GL_ARB_texture_gather : enable
#extension GL_ARB_separate_shader_objects : enable
// shader 8a0efcdc3f556942 //frambuffer
// shader 8a0efcdc3f556942
// Used for: Pyramid Blur
float resXScale = float($width)/float($gameWidth);
float resYScale = float($height)/float($gameHeight);
// start of shader inputs/outputs, predetermined by Cemu. Do not touch
#ifdef VULKAN
#define ATTR_LAYOUT(__vkSet, __location) layout(set = __vkSet, location = __location)
@ -28,7 +33,6 @@ TEXTURE_LAYOUT(0, 1, 0) uniform sampler2D textureUnitPS0;
layout(location = 0) in vec4 passParameterSem133;
layout(location = 0) out vec4 passPixelColor0;
// end of shader inputs/outputs
const float resScale = ($gameWidth / $width);
int clampFI32(int v)
{
if( v == 0x7FFFFFFF )
@ -63,10 +67,10 @@ backupReg0f = R0f.x;
backupReg1f = R0f.y;
backupReg0f = R0f.x;
backupReg1f = R0f.y;
R0f.x = (intBitsToFloat(uf_remappedPS[0].z)*resScale * 2.0 + backupReg0f);
R0f.y = (intBitsToFloat(uf_remappedPS[0].w)*resScale * 2.0 + backupReg1f);
R0f.z = (-(intBitsToFloat(uf_remappedPS[0].z)*resScale) * 2.0 + backupReg0f);
R0f.w = (-(intBitsToFloat(uf_remappedPS[0].w)*resScale) * 2.0 + backupReg1f);
R0f.x = (intBitsToFloat(uf_remappedPS[0].z)/resXScale * 2.0 + backupReg0f);
R0f.y = (intBitsToFloat(uf_remappedPS[0].w)/resYScale * 2.0 + backupReg1f);
R0f.z = (-(intBitsToFloat(uf_remappedPS[0].z)/resXScale) * 2.0 + backupReg0f);
R0f.w = (-(intBitsToFloat(uf_remappedPS[0].w)/resYScale) * 2.0 + backupReg1f);
R1f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw);
R0f.xyzw = (texture(textureUnitPS0, R0f.zw).xyzw);
// 0

View file

@ -1,23 +0,0 @@
[Bayo2USv0] #v5 asm
moduleMatches = 0xAF5D1A85
.origin = codecave all
_widthScaleRatio:
.float $width
_heightScaleRatio:
.float $height
_Cave:
lis r7, _widthScaleRatio@ha
lfs f13, _widthScaleRatio@l(r7)
lis r7, _heightScaleRatio@ha
lfs f31, _heightScaleRatio@l(r7)
fdivs f2, f13, f31
blr
0x032F2E6C = bla _Cave
0x032F2044 = bla _Cave
0x032F207C = bla _Cave
0x032F2084 = bla _Cave

View file

@ -0,0 +1,28 @@
[Bayo2_AspectRatio_V0]
moduleMatches = 0xAF5D1A85
.origin = codecave
_widthScaleRatio:
.float $width
_heightScaleRatio:
.float $height
_calculateARForRendering:
lis r7, _widthScaleRatio@ha
lfs f13, _widthScaleRatio@l(r7)
lis r7, _heightScaleRatio@ha
lfs f31, _heightScaleRatio@l(r7)
fdivs f2, f13, f31
blr
_calculateARForCulling:
lis r7, _widthScaleRatio@ha
lfs f13, _widthScaleRatio@l(r7)
lis r7, _heightScaleRatio@ha
lfs f1, _heightScaleRatio@l(r7)
fdivs f10, f13, f1
blr
0x032F2E6C = bla _calculateARForRendering
0x032F2044 = bla _calculateARForCulling