mirror of
https://github.com/zhaobot/yuzu.git
synced 2025-01-12 09:02:29 -03:00
maxwell_3d: Restructure macro upload to use a single macro code memory.
- Fixes an issue where macros could be skipped. - Fixes rendering of distant objects in Super Mario Odyssey.
This commit is contained in:
parent
d08457f879
commit
de0ab806df
4 changed files with 55 additions and 27 deletions
|
@ -43,15 +43,17 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
|
|||
// Reset the current macro.
|
||||
executing_macro = 0;
|
||||
|
||||
// The requested macro must have been uploaded already.
|
||||
auto macro_code = uploaded_macros.find(method);
|
||||
if (macro_code == uploaded_macros.end()) {
|
||||
LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method);
|
||||
// Lookup the macro offset
|
||||
const u32 entry{(method - MacroRegistersStart) >> 1};
|
||||
const auto& search{macro_offsets.find(entry)};
|
||||
if (search == macro_offsets.end()) {
|
||||
LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
|
||||
UNREACHABLE();
|
||||
return;
|
||||
}
|
||||
|
||||
// Execute the current macro.
|
||||
macro_interpreter.Execute(macro_code->second, std::move(parameters));
|
||||
macro_interpreter.Execute(search->second, std::move(parameters));
|
||||
}
|
||||
|
||||
void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
|
||||
|
@ -97,6 +99,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
|
|||
ProcessMacroUpload(value);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(macros.bind): {
|
||||
ProcessMacroBind(value);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
|
||||
|
@ -158,9 +164,13 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
|
|||
}
|
||||
|
||||
void Maxwell3D::ProcessMacroUpload(u32 data) {
|
||||
// Store the uploaded macro code to interpret them when they're called.
|
||||
auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart];
|
||||
macro.push_back(data);
|
||||
ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
|
||||
"upload_address exceeded macro_memory size!");
|
||||
macro_memory[regs.macros.upload_address++] = data;
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMacroBind(u32 data) {
|
||||
macro_offsets[regs.macros.entry] = data;
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessQueryGet() {
|
||||
|
|
|
@ -475,12 +475,13 @@ public:
|
|||
INSERT_PADDING_WORDS(0x45);
|
||||
|
||||
struct {
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32 upload_address;
|
||||
u32 data;
|
||||
u32 entry;
|
||||
u32 bind;
|
||||
} macros;
|
||||
|
||||
INSERT_PADDING_WORDS(0x189);
|
||||
INSERT_PADDING_WORDS(0x188);
|
||||
|
||||
u32 tfb_enabled;
|
||||
|
||||
|
@ -994,12 +995,25 @@ public:
|
|||
/// Returns the texture information for a specific texture in a specific shader stage.
|
||||
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
|
||||
|
||||
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
|
||||
/// we've seen used.
|
||||
using MacroMemory = std::array<u32, 0x40000>;
|
||||
|
||||
/// Gets a reference to macro memory.
|
||||
const MacroMemory& GetMacroMemory() const {
|
||||
return macro_memory;
|
||||
}
|
||||
|
||||
private:
|
||||
void InitializeRegisterDefaults();
|
||||
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
|
||||
std::unordered_map<u32, std::vector<u32>> uploaded_macros;
|
||||
/// Start offsets of each macro in macro_memory
|
||||
std::unordered_map<u32, u32> macro_offsets;
|
||||
|
||||
/// Memory for macro code
|
||||
MacroMemory macro_memory;
|
||||
|
||||
/// Macro method that is currently being executed / being fed parameters.
|
||||
u32 executing_macro = 0;
|
||||
|
@ -1022,9 +1036,12 @@ private:
|
|||
*/
|
||||
void CallMacroMethod(u32 method, std::vector<u32> parameters);
|
||||
|
||||
/// Handles writes to the macro uploading registers.
|
||||
/// Handles writes to the macro uploading register.
|
||||
void ProcessMacroUpload(u32 data);
|
||||
|
||||
/// Handles writes to the macro bind register.
|
||||
void ProcessMacroBind(u32 data);
|
||||
|
||||
/// Handles a write to the CLEAR_BUFFERS register.
|
||||
void ProcessClearBuffers();
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ namespace Tegra {
|
|||
|
||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
|
||||
|
||||
void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) {
|
||||
void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
|
||||
Reset();
|
||||
registers[1] = parameters[0];
|
||||
this->parameters = std::move(parameters);
|
||||
|
@ -19,7 +19,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa
|
|||
// Execute the code until we hit an exit condition.
|
||||
bool keep_executing = true;
|
||||
while (keep_executing) {
|
||||
keep_executing = Step(code, false);
|
||||
keep_executing = Step(offset, false);
|
||||
}
|
||||
|
||||
// Assert the the macro used all the input parameters
|
||||
|
@ -37,10 +37,10 @@ void MacroInterpreter::Reset() {
|
|||
next_parameter_index = 1;
|
||||
}
|
||||
|
||||
bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
|
||||
bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||
u32 base_address = pc;
|
||||
|
||||
Opcode opcode = GetOpcode(code);
|
||||
Opcode opcode = GetOpcode(offset);
|
||||
pc += 4;
|
||||
|
||||
// Update the program counter if we were delayed
|
||||
|
@ -108,7 +108,7 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
|
|||
|
||||
delayed_pc = base_address + opcode.GetBranchTarget();
|
||||
// Execute one more instruction due to the delay slot.
|
||||
return Step(code, true);
|
||||
return Step(offset, true);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -121,17 +121,18 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
|
|||
// Exit has a delay slot, execute the next instruction
|
||||
// Note: Executing an exit during a branch delay slot will cause the instruction at the
|
||||
// branch target to be executed before exiting.
|
||||
Step(code, true);
|
||||
Step(offset, true);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const {
|
||||
MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const {
|
||||
const auto& macro_memory{maxwell3d.GetMacroMemory()};
|
||||
ASSERT((pc % sizeof(u32)) == 0);
|
||||
ASSERT(pc < code.size() * sizeof(u32));
|
||||
return {code[pc / sizeof(u32)]};
|
||||
ASSERT((pc + offset) < macro_memory.size() * sizeof(u32));
|
||||
return {macro_memory[offset + pc / sizeof(u32)]};
|
||||
}
|
||||
|
||||
u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const {
|
||||
|
|
|
@ -22,10 +22,10 @@ public:
|
|||
|
||||
/**
|
||||
* Executes the macro code with the specified input parameters.
|
||||
* @param code The macro byte code to execute
|
||||
* @param parameters The parameters of the macro
|
||||
* @param offset Offset to start execution at.
|
||||
* @param parameters The parameters of the macro.
|
||||
*/
|
||||
void Execute(const std::vector<u32>& code, std::vector<u32> parameters);
|
||||
void Execute(u32 offset, std::vector<u32> parameters);
|
||||
|
||||
private:
|
||||
enum class Operation : u32 {
|
||||
|
@ -110,11 +110,11 @@ private:
|
|||
/**
|
||||
* Executes a single macro instruction located at the current program counter. Returns whether
|
||||
* the interpreter should keep running.
|
||||
* @param code The macro code to execute.
|
||||
* @param offset Offset to start execution at.
|
||||
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
|
||||
* previous instruction.
|
||||
*/
|
||||
bool Step(const std::vector<u32>& code, bool is_delay_slot);
|
||||
bool Step(u32 offset, bool is_delay_slot);
|
||||
|
||||
/// Calculates the result of an ALU operation. src_a OP src_b;
|
||||
u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const;
|
||||
|
@ -127,7 +127,7 @@ private:
|
|||
bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;
|
||||
|
||||
/// Reads an opcode at the current program counter location.
|
||||
Opcode GetOpcode(const std::vector<u32>& code) const;
|
||||
Opcode GetOpcode(u32 offset) const;
|
||||
|
||||
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
|
||||
u32 GetRegister(u32 register_id) const;
|
||||
|
|
Loading…
Reference in a new issue