Added temp for switch case operand, fixed compilation issues in output

This commit is contained in:
Mr-Wiseguy 2022-11-15 19:55:48 -05:00
parent 2300a4b6c9
commit 5d9ea96abc
5 changed files with 132 additions and 33 deletions

View file

@ -24,6 +24,7 @@ namespace RecompPort {
uint32_t addend_reg; uint32_t addend_reg;
uint32_t rom; uint32_t rom;
uint32_t lw_vram; uint32_t lw_vram;
uint32_t addu_vram;
uint32_t jr_vram; uint32_t jr_vram;
std::vector<uint32_t> entries; std::vector<uint32_t> entries;
}; };
@ -33,6 +34,7 @@ namespace RecompPort {
uint32_t rom; uint32_t rom;
const std::span<const uint32_t> words; const std::span<const uint32_t> words;
std::string name; std::string name;
bool ignored;
}; };
struct FunctionStats { struct FunctionStats {

View file

@ -2,6 +2,7 @@
#define __RECOMP_H__ #define __RECOMP_H__
#include <stdint.h> #include <stdint.h>
#include <math.h>
#if 0 // treat GPRs as 32-bit, should be better codegen #if 0 // treat GPRs as 32-bit, should be better codegen
typedef uint32_t gpr; typedef uint32_t gpr;
@ -22,22 +23,26 @@ typedef uint64_t gpr;
((gpr)(int32_t)((a) - (b))) ((gpr)(int32_t)((a) - (b)))
#define MEM_D(offset, reg) \ #define MEM_D(offset, reg) \
(*(int64_t*)((rdram) + (((reg) + (offset)) ^ 3))) (*(int64_t*)(rdram + ((((reg) + (offset))) & 0x3FFFFFF)))
#define MEM_W(offset, reg) \ #define MEM_W(offset, reg) \
(*(int32_t*)((rdram) + (((reg) + (offset)) ^ 3))) (*(int32_t*)(rdram + ((((reg) + (offset))) & 0x3FFFFFF)))
#define MEM_H(offset, reg) \ #define MEM_H(offset, reg) \
(*(int16_t*)((rdram) + (((reg) + (offset)) ^ 3))) (*(int16_t*)(rdram + ((((reg) + (offset)) ^ 2) & 0x3FFFFFF)))
#define MEM_B(offset, reg) \ #define MEM_B(offset, reg) \
(*(int8_t*)((rdram) + (((reg) + (offset)) ^ 3))) (*(int8_t*)(rdram + ((((reg) + (offset)) ^ 3) & 0x3FFFFFF)))
#define MEM_HU(offset, reg) \ #define MEM_HU(offset, reg) \
(*(uint16_t*)((rdram) + (((reg) + (offset)) ^ 3))) (*(uint16_t*)(rdram + ((((reg) + (offset)) ^ 2) & 0x3FFFFFF)))
#define MEM_BU(offset, reg) \ #define MEM_BU(offset, reg) \
(*(uint8_t*)((rdram) + (((reg) + (offset)) ^ 3))) (*(uint8_t*)(rdram + ((((reg) + (offset)) ^ 3) & 0x3FFFFFF)))
// TODO proper lwl/lwr/swl/swr
#define MEM_WL(offset, reg) \
(*(int32_t*)(rdram + ((((reg) + (offset))) & 0x3FFFFFF)))
#define S32(val) \ #define S32(val) \
((int32_t)(val)) ((int32_t)(val))
@ -104,6 +109,23 @@ typedef struct {
uint64_t hi, lo; uint64_t hi, lo;
} recomp_context; } recomp_context;
#ifdef __cplusplus
#define restrict __restrict
extern "C" {
#endif
void switch_error(const char* func, uint32_t vram, uint32_t jtbl); void switch_error(const char* func, uint32_t vram, uint32_t jtbl);
void do_break(uint32_t vram);
typedef void (recomp_func_t)(uint8_t* restrict rdram, recomp_context* restrict ctx);
recomp_func_t* get_function(uint32_t vram);
#define LOOKUP_FUNC(val) \
get_function(val)
#ifdef __cplusplus
}
#endif
#endif #endif

View file

@ -12,14 +12,16 @@ extern "C" const char* RabbitizerRegister_getNameGpr(uint8_t regValue);
struct RegState { struct RegState {
// For tracking a register that will be used to load from RAM // For tracking a register that will be used to load from RAM
uint32_t prev_lui; uint32_t prev_lui;
uint32_t prev_addiu; uint32_t prev_addiu_vram;
uint32_t prev_addu_vram;
uint8_t prev_addend_reg; uint8_t prev_addend_reg;
bool valid_lui; bool valid_lui;
bool valid_addiu; bool valid_addiu;
bool valid_addend; bool valid_addend;
// For tracking a register that has been loaded from RAM // For tracking a register that has been loaded from RAM
uint32_t loaded_lw_addr; uint32_t loaded_lw_vram;
uint32_t loaded_addr; uint32_t loaded_addu_vram;
uint32_t loaded_address;
uint8_t loaded_addend_reg; uint8_t loaded_addend_reg;
bool valid_loaded; bool valid_loaded;
@ -27,15 +29,17 @@ struct RegState {
void invalidate() { void invalidate() {
prev_lui = 0; prev_lui = 0;
prev_addiu = 0; prev_addiu_vram = 0;
prev_addu_vram = 0;
prev_addend_reg = 0; prev_addend_reg = 0;
valid_lui = false; valid_lui = false;
valid_addiu = false; valid_addiu = false;
valid_addend = false; valid_addend = false;
loaded_lw_addr = 0; loaded_lw_vram = 0;
loaded_addr = 0; loaded_addu_vram = 0;
loaded_address = 0;
loaded_addend_reg = 0; loaded_addend_reg = 0;
valid_loaded = false; valid_loaded = false;
@ -82,7 +86,7 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo
reg_states[rt] = reg_states[rs]; reg_states[rt] = reg_states[rs];
// Set the addiu state if and only if there hasn't been an addiu already // Set the addiu state if and only if there hasn't been an addiu already
if (!reg_states[rt].valid_addiu) { if (!reg_states[rt].valid_addiu) {
reg_states[rt].prev_addiu = (int16_t)imm; reg_states[rt].prev_addiu_vram = (int16_t)imm;
reg_states[rt].valid_addiu = true; reg_states[rt].valid_addiu = true;
} else { } else {
// Otherwise, there have been 2 or more consecutive addius so invalidate the whole register // Otherwise, there have been 2 or more consecutive addius so invalidate the whole register
@ -102,6 +106,7 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo
temp = reg_states[valid_lui_reg]; temp = reg_states[valid_lui_reg];
temp.valid_addend = true; temp.valid_addend = true;
temp.prev_addend_reg = addend_reg; temp.prev_addend_reg = addend_reg;
temp.prev_addu_vram = instr.getVram();
} else { } else {
// Check if this is a move // Check if this is a move
check_move(); check_move();
@ -124,14 +129,15 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo
if (nonzero_immediate) { if (nonzero_immediate) {
lo16 = (int16_t)imm; lo16 = (int16_t)imm;
} else { } else {
lo16 = reg_states[base].prev_addiu; lo16 = reg_states[base].prev_addiu_vram;
} }
uint32_t address = reg_states[base].prev_lui + lo16; uint32_t address = reg_states[base].prev_lui + lo16;
temp.valid_loaded = true; temp.valid_loaded = true;
temp.loaded_lw_addr = instr.getVram(); temp.loaded_lw_vram = instr.getVram();
temp.loaded_addr = address; temp.loaded_address = address;
temp.loaded_addend_reg = reg_states[base].prev_addend_reg; temp.loaded_addend_reg = reg_states[base].prev_addend_reg;
temp.loaded_addu_vram = reg_states[base].prev_addu_vram;
} }
} }
reg_states[rt] = temp; reg_states[rt] = temp;
@ -144,10 +150,11 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo
// Check if the source reg has a valid loaded state and if so record that as a jump table // Check if the source reg has a valid loaded state and if so record that as a jump table
if (reg_states[rs].valid_loaded) { if (reg_states[rs].valid_loaded) {
stats.jump_tables.emplace_back( stats.jump_tables.emplace_back(
reg_states[rs].loaded_addr, reg_states[rs].loaded_address,
reg_states[rs].loaded_addend_reg, reg_states[rs].loaded_addend_reg,
0, 0,
reg_states[rs].loaded_lw_addr, reg_states[rs].loaded_lw_vram,
reg_states[rs].loaded_addu_vram,
instr.getVram(), instr.getVram(),
std::vector<uint32_t>{} std::vector<uint32_t>{}
); );

View file

@ -6,6 +6,7 @@
#include "rabbitizer.hpp" #include "rabbitizer.hpp"
#include "elfio/elfio.hpp" #include "elfio/elfio.hpp"
#include "fmt/format.h" #include "fmt/format.h"
#include "fmt/ostream.h"
#include "recomp_port.h" #include "recomp_port.h"
@ -221,7 +222,16 @@ std::unordered_set<std::string> ignored_funcs {
"__osSetConfig", "__osSetConfig",
"__osGetConfig", "__osGetConfig",
"__osSetWatchLo", "__osSetWatchLo",
"__osGetWatchLo" "__osGetWatchLo",
// Cache funcs
"osInvalDCache",
"osInvalICache",
"osWritebackDCache",
"osWritebackDCacheAll"
};
std::unordered_set<std::string> renamed_funcs{
"sincosf"
}; };
int main(int argc, char** argv) { int main(int argc, char** argv) {
@ -305,6 +315,14 @@ int main(int argc, char** argv) {
// Check if this symbol is a function or has no type (like a regular glabel would) // Check if this symbol is a function or has no type (like a regular glabel would)
// Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls // Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls
if (type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE) { if (type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE) {
bool ignored = false;
if (renamed_funcs.contains(name)) {
name = "_" + name;
}
if (ignored_funcs.contains(name)) {
name = name + "_recomp";
ignored = true;
}
if (section_index < section_rom_addrs.size()) { if (section_index < section_rom_addrs.size()) {
auto section_rom_addr = section_rom_addrs[section_index]; auto section_rom_addr = section_rom_addrs[section_index];
auto section_offset = value - elf_file.sections[section_index]->get_address(); auto section_offset = value - elf_file.sections[section_index]->get_address();
@ -316,7 +334,8 @@ int main(int argc, char** argv) {
vram, vram,
static_cast<uint32_t>(section_offset + section_rom_addr), static_cast<uint32_t>(section_offset + section_rom_addr),
std::span{ words, num_instructions }, std::span{ words, num_instructions },
std::move(name) std::move(name),
ignored
); );
} else { } else {
uint32_t vram = static_cast<uint32_t>(value); uint32_t vram = static_cast<uint32_t>(value);
@ -325,7 +344,8 @@ int main(int argc, char** argv) {
vram, vram,
0, 0,
std::span<const uint32_t>{}, std::span<const uint32_t>{},
std::move(name) std::move(name),
ignored
); );
} }
} }
@ -333,16 +353,52 @@ int main(int argc, char** argv) {
fmt::print("Function count: {}\n", context.functions.size()); fmt::print("Function count: {}\n", context.functions.size());
std::ofstream func_lookup_file{ "out/funcs/lookup.cpp" };
std::ofstream func_header_file{ "out/funcs/funcs.h" };
fmt::print(func_lookup_file,
"#include <utility>\n"
"#include \"recomp.h\"\n"
"#include \"funcs.h\"\n"
"\n"
"std::pair<uint32_t, recomp_func_t*> funcs[] {{\n"
);
fmt::print(func_header_file,
"#include \"recomp.h\"\n"
"\n"
"#ifdef __cplusplus\n"
"extern \"C\" {{\n"
"#endif\n"
"\n"
);
//#pragma omp parallel for //#pragma omp parallel for
for (size_t i = 0; i < context.functions.size(); i++) { for (size_t i = 0; i < context.functions.size(); i++) {
const auto& func = context.functions[i]; const auto& func = context.functions[i];
if (!ignored_funcs.contains(func.name) && func.words.size() != 0) { if (!func.ignored && func.words.size() != 0) {
if (RecompPort::recompile_function(context, func, "out/" + func.name + ".c") == false) { fmt::print(func_header_file,
"void {}(uint8_t* restrict rdram, recomp_context* restrict ctx);\n", func.name);
fmt::print(func_lookup_file,
" {{ 0x{:08X}u, {} }},\n", func.vram, func.name);
if (RecompPort::recompile_function(context, func, "out/funcs/" + func.name + ".c") == false) {
func_lookup_file.clear();
fmt::print(stderr, "Error recompiling {}\n", func.name); fmt::print(stderr, "Error recompiling {}\n", func.name);
std::exit(EXIT_FAILURE); std::exit(EXIT_FAILURE);
} }
} }
} }
fmt::print(func_lookup_file,
"}};\n"
"extern const size_t num_funcs = sizeof(funcs) / sizeof(funcs[0]);\n"
);
fmt::print(func_header_file,
"\n"
"#ifdef __cplusplus\n"
"}}\n"
"#endif\n"
);
return 0; return 0;
} }

View file

@ -31,7 +31,9 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
fmt::print(output_file, " // {}\n", instr.disassemble(0)); fmt::print(output_file, " // {}\n", instr.disassemble(0));
} }
if (skipped_insns.contains(instr.getVram())) { uint32_t instr_vram = instr.getVram();
if (skipped_insns.contains(instr_vram)) {
return true; return true;
} }
@ -105,6 +107,18 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
print_line("{}{} = {:#X} << 16", ctx_gpr_prefix(rt), rt, imm); print_line("{}{} = {:#X} << 16", ctx_gpr_prefix(rt), rt, imm);
break; break;
case InstrId::cpu_addu: case InstrId::cpu_addu:
{
// Check if this addu belongs to a jump table load
auto find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(),
[instr_vram](const RecompPort::JumpTable& jtbl) {
return jtbl.addu_vram == instr_vram;
});
// If so, create a temp to preserve the addend register's value
if (find_result != stats.jump_tables.end()) {
const RecompPort::JumpTable& cur_jtbl = *find_result;
print_line("gpr jr_addend_{:08X} = {}{}", cur_jtbl.jr_vram, ctx_gpr_prefix(cur_jtbl.addend_reg), cur_jtbl.addend_reg);
}
}
print_line("{}{} = ADD32({}{}, {}{})", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); print_line("{}{} = ADD32({}{}, {}{})", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break; break;
case InstrId::cpu_daddu: case InstrId::cpu_daddu:
@ -169,10 +183,10 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
print_line("{}{} = {}{} < {:#X} ? 1 : 0", ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, (int16_t)imm); print_line("{}{} = {}{} < {:#X} ? 1 : 0", ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, (int16_t)imm);
break; break;
case InstrId::cpu_mult: case InstrId::cpu_mult:
print_line("uint64_t result = S64({}{}) * S64({}{}); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); print_line("result = S64({}{}) * S64({}{}); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break; break;
case InstrId::cpu_multu: case InstrId::cpu_multu:
print_line("uint64_t result = U64({}{}) * U64({}{}); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); print_line("result = U64({}{}) * U64({}{}); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break; break;
case InstrId::cpu_div: case InstrId::cpu_div:
// Cast to 64-bits before division to prevent artihmetic exception for s32(0x80000000) / -1 // Cast to 64-bits before division to prevent artihmetic exception for s32(0x80000000) / -1
@ -232,13 +246,13 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
print_line("{}{} = MEM_WL({:#X}, {}{})", ctx_gpr_prefix(rt), rt, (int16_t)imm, ctx_gpr_prefix(base), base); print_line("{}{} = MEM_WL({:#X}, {}{})", ctx_gpr_prefix(rt), rt, (int16_t)imm, ctx_gpr_prefix(base), base);
break; break;
case InstrId::cpu_lwr: case InstrId::cpu_lwr:
print_line("{}{} = MEM_WR({:#X}, {}{})", ctx_gpr_prefix(rt), rt, (int16_t)imm, ctx_gpr_prefix(base), base); print_line("//{}{} = MEM_WR({:#X}, {}{})", ctx_gpr_prefix(rt), rt, (int16_t)imm, ctx_gpr_prefix(base), base);
break; break;
case InstrId::cpu_swl: case InstrId::cpu_swl:
print_line("MEM_WL({:#X}, {}{}) = {}{}", (int16_t)imm, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt); print_line("MEM_WL({:#X}, {}{}) = {}{}", (int16_t)imm, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt);
break; break;
case InstrId::cpu_swr: case InstrId::cpu_swr:
print_line("MEM_WR({:#X}, {}{}) = {}{}", (int16_t)imm, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt); print_line("//MEM_WR({:#X}, {}{}) = {}{}", (int16_t)imm, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt);
break; break;
// Branches // Branches
@ -310,7 +324,6 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) { if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) {
print_unconditional_branch("return"); print_unconditional_branch("return");
} else { } else {
uint32_t instr_vram = instr.getVram();
auto find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(), auto find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(),
[instr_vram](const RecompPort::JumpTable& jtbl) { [instr_vram](const RecompPort::JumpTable& jtbl) {
return jtbl.jr_vram == instr_vram; return jtbl.jr_vram == instr_vram;
@ -322,8 +335,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
bool dummy_needs_link_branch, dummy_is_branch_likely; bool dummy_needs_link_branch, dummy_is_branch_likely;
process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely); process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely);
print_indent(); print_indent();
// TODO this will fail if the register holding the addend is mangled, add logic to emit a temp with the addend into the code fmt::print(output_file, "switch (jr_addend_{:08X} >> 2) {{\n", cur_jtbl.jr_vram);
fmt::print(output_file, "switch ({}{} >> 2) {{\n", ctx_gpr_prefix(cur_jtbl.addend_reg), cur_jtbl.addend_reg, cur_jtbl.vram);
for (size_t entry_index = 0; entry_index < cur_jtbl.entries.size(); entry_index++) { for (size_t entry_index = 0; entry_index < cur_jtbl.entries.size(); entry_index++) {
print_indent(); print_indent();
print_line("case {}: goto L_{:08X}; break", entry_index, cur_jtbl.entries[entry_index]); print_line("case {}: goto L_{:08X}; break", entry_index, cur_jtbl.entries[entry_index]);
@ -383,7 +395,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric());
break; break;
case InstrId::cpu_break: case InstrId::cpu_break:
print_line("do_break();"); print_line("do_break({})", instr_vram);
break; break;
// Cop1 loads/stores // Cop1 loads/stores
@ -731,7 +743,7 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re
"\n" "\n"
"void {}(uint8_t* restrict rdram, recomp_context* restrict ctx) {{\n" "void {}(uint8_t* restrict rdram, recomp_context* restrict ctx) {{\n"
// these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output // these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output
" uint64_t hi = 0, lo = 0;\n" " uint64_t hi = 0, lo = 0, result = 0;\n"
" int c1cs = 0; \n", // cop1 conditional signal " int c1cs = 0; \n", // cop1 conditional signal
func.name); func.name);