Added temp for switch case operand, fixed compilation issues in output

This commit is contained in:
Mr-Wiseguy 2022-11-15 19:55:48 -05:00
parent 2300a4b6c9
commit 5d9ea96abc
5 changed files with 132 additions and 33 deletions

View file

@ -24,6 +24,7 @@ namespace RecompPort {
uint32_t addend_reg;
uint32_t rom;
uint32_t lw_vram;
uint32_t addu_vram;
uint32_t jr_vram;
std::vector<uint32_t> entries;
};
@ -33,6 +34,7 @@ namespace RecompPort {
uint32_t rom;
const std::span<const uint32_t> words;
std::string name;
bool ignored;
};
struct FunctionStats {

View file

@ -2,6 +2,7 @@
#define __RECOMP_H__
#include <stdint.h>
#include <math.h>
#if 0 // treat GPRs as 32-bit, should be better codegen
typedef uint32_t gpr;
@ -22,22 +23,26 @@ typedef uint64_t gpr;
((gpr)(int32_t)((a) - (b)))
#define MEM_D(offset, reg) \
(*(int64_t*)((rdram) + (((reg) + (offset)) ^ 3)))
(*(int64_t*)(rdram + ((((reg) + (offset))) & 0x3FFFFFF)))
#define MEM_W(offset, reg) \
(*(int32_t*)((rdram) + (((reg) + (offset)) ^ 3)))
(*(int32_t*)(rdram + ((((reg) + (offset))) & 0x3FFFFFF)))
#define MEM_H(offset, reg) \
(*(int16_t*)((rdram) + (((reg) + (offset)) ^ 3)))
(*(int16_t*)(rdram + ((((reg) + (offset)) ^ 2) & 0x3FFFFFF)))
#define MEM_B(offset, reg) \
(*(int8_t*)((rdram) + (((reg) + (offset)) ^ 3)))
(*(int8_t*)(rdram + ((((reg) + (offset)) ^ 3) & 0x3FFFFFF)))
#define MEM_HU(offset, reg) \
(*(uint16_t*)((rdram) + (((reg) + (offset)) ^ 3)))
(*(uint16_t*)(rdram + ((((reg) + (offset)) ^ 2) & 0x3FFFFFF)))
#define MEM_BU(offset, reg) \
(*(uint8_t*)((rdram) + (((reg) + (offset)) ^ 3)))
(*(uint8_t*)(rdram + ((((reg) + (offset)) ^ 3) & 0x3FFFFFF)))
// TODO proper lwl/lwr/swl/swr
#define MEM_WL(offset, reg) \
(*(int32_t*)(rdram + ((((reg) + (offset))) & 0x3FFFFFF)))
#define S32(val) \
((int32_t)(val))
@ -104,6 +109,23 @@ typedef struct {
uint64_t hi, lo;
} recomp_context;
#ifdef __cplusplus
#define restrict __restrict
extern "C" {
#endif
void switch_error(const char* func, uint32_t vram, uint32_t jtbl);
void do_break(uint32_t vram);
typedef void (recomp_func_t)(uint8_t* restrict rdram, recomp_context* restrict ctx);
recomp_func_t* get_function(uint32_t vram);
#define LOOKUP_FUNC(val) \
get_function(val)
#ifdef __cplusplus
}
#endif
#endif

View file

@ -12,14 +12,16 @@ extern "C" const char* RabbitizerRegister_getNameGpr(uint8_t regValue);
struct RegState {
// For tracking a register that will be used to load from RAM
uint32_t prev_lui;
uint32_t prev_addiu;
uint32_t prev_addiu_vram;
uint32_t prev_addu_vram;
uint8_t prev_addend_reg;
bool valid_lui;
bool valid_addiu;
bool valid_addend;
// For tracking a register that has been loaded from RAM
uint32_t loaded_lw_addr;
uint32_t loaded_addr;
uint32_t loaded_lw_vram;
uint32_t loaded_addu_vram;
uint32_t loaded_address;
uint8_t loaded_addend_reg;
bool valid_loaded;
@ -27,15 +29,17 @@ struct RegState {
void invalidate() {
prev_lui = 0;
prev_addiu = 0;
prev_addiu_vram = 0;
prev_addu_vram = 0;
prev_addend_reg = 0;
valid_lui = false;
valid_addiu = false;
valid_addend = false;
loaded_lw_addr = 0;
loaded_addr = 0;
loaded_lw_vram = 0;
loaded_addu_vram = 0;
loaded_address = 0;
loaded_addend_reg = 0;
valid_loaded = false;
@ -82,7 +86,7 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo
reg_states[rt] = reg_states[rs];
// Set the addiu state if and only if there hasn't been an addiu already
if (!reg_states[rt].valid_addiu) {
reg_states[rt].prev_addiu = (int16_t)imm;
reg_states[rt].prev_addiu_vram = (int16_t)imm;
reg_states[rt].valid_addiu = true;
} else {
// Otherwise, there have been 2 or more consecutive addius so invalidate the whole register
@ -102,6 +106,7 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo
temp = reg_states[valid_lui_reg];
temp.valid_addend = true;
temp.prev_addend_reg = addend_reg;
temp.prev_addu_vram = instr.getVram();
} else {
// Check if this is a move
check_move();
@ -124,14 +129,15 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo
if (nonzero_immediate) {
lo16 = (int16_t)imm;
} else {
lo16 = reg_states[base].prev_addiu;
lo16 = reg_states[base].prev_addiu_vram;
}
uint32_t address = reg_states[base].prev_lui + lo16;
temp.valid_loaded = true;
temp.loaded_lw_addr = instr.getVram();
temp.loaded_addr = address;
temp.loaded_lw_vram = instr.getVram();
temp.loaded_address = address;
temp.loaded_addend_reg = reg_states[base].prev_addend_reg;
temp.loaded_addu_vram = reg_states[base].prev_addu_vram;
}
}
reg_states[rt] = temp;
@ -144,10 +150,11 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo
// Check if the source reg has a valid loaded state and if so record that as a jump table
if (reg_states[rs].valid_loaded) {
stats.jump_tables.emplace_back(
reg_states[rs].loaded_addr,
reg_states[rs].loaded_address,
reg_states[rs].loaded_addend_reg,
0,
reg_states[rs].loaded_lw_addr,
reg_states[rs].loaded_lw_vram,
reg_states[rs].loaded_addu_vram,
instr.getVram(),
std::vector<uint32_t>{}
);

View file

@ -6,6 +6,7 @@
#include "rabbitizer.hpp"
#include "elfio/elfio.hpp"
#include "fmt/format.h"
#include "fmt/ostream.h"
#include "recomp_port.h"
@ -221,7 +222,16 @@ std::unordered_set<std::string> ignored_funcs {
"__osSetConfig",
"__osGetConfig",
"__osSetWatchLo",
"__osGetWatchLo"
"__osGetWatchLo",
// Cache funcs
"osInvalDCache",
"osInvalICache",
"osWritebackDCache",
"osWritebackDCacheAll"
};
std::unordered_set<std::string> renamed_funcs{
"sincosf"
};
int main(int argc, char** argv) {
@ -305,6 +315,14 @@ int main(int argc, char** argv) {
// Check if this symbol is a function or has no type (like a regular glabel would)
// Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls
if (type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE) {
bool ignored = false;
if (renamed_funcs.contains(name)) {
name = "_" + name;
}
if (ignored_funcs.contains(name)) {
name = name + "_recomp";
ignored = true;
}
if (section_index < section_rom_addrs.size()) {
auto section_rom_addr = section_rom_addrs[section_index];
auto section_offset = value - elf_file.sections[section_index]->get_address();
@ -316,7 +334,8 @@ int main(int argc, char** argv) {
vram,
static_cast<uint32_t>(section_offset + section_rom_addr),
std::span{ words, num_instructions },
std::move(name)
std::move(name),
ignored
);
} else {
uint32_t vram = static_cast<uint32_t>(value);
@ -325,7 +344,8 @@ int main(int argc, char** argv) {
vram,
0,
std::span<const uint32_t>{},
std::move(name)
std::move(name),
ignored
);
}
}
@ -333,16 +353,52 @@ int main(int argc, char** argv) {
fmt::print("Function count: {}\n", context.functions.size());
std::ofstream func_lookup_file{ "out/funcs/lookup.cpp" };
std::ofstream func_header_file{ "out/funcs/funcs.h" };
fmt::print(func_lookup_file,
"#include <utility>\n"
"#include \"recomp.h\"\n"
"#include \"funcs.h\"\n"
"\n"
"std::pair<uint32_t, recomp_func_t*> funcs[] {{\n"
);
fmt::print(func_header_file,
"#include \"recomp.h\"\n"
"\n"
"#ifdef __cplusplus\n"
"extern \"C\" {{\n"
"#endif\n"
"\n"
);
//#pragma omp parallel for
for (size_t i = 0; i < context.functions.size(); i++) {
const auto& func = context.functions[i];
if (!ignored_funcs.contains(func.name) && func.words.size() != 0) {
if (RecompPort::recompile_function(context, func, "out/" + func.name + ".c") == false) {
if (!func.ignored && func.words.size() != 0) {
fmt::print(func_header_file,
"void {}(uint8_t* restrict rdram, recomp_context* restrict ctx);\n", func.name);
fmt::print(func_lookup_file,
" {{ 0x{:08X}u, {} }},\n", func.vram, func.name);
if (RecompPort::recompile_function(context, func, "out/funcs/" + func.name + ".c") == false) {
func_lookup_file.clear();
fmt::print(stderr, "Error recompiling {}\n", func.name);
std::exit(EXIT_FAILURE);
}
}
}
fmt::print(func_lookup_file,
"}};\n"
"extern const size_t num_funcs = sizeof(funcs) / sizeof(funcs[0]);\n"
);
fmt::print(func_header_file,
"\n"
"#ifdef __cplusplus\n"
"}}\n"
"#endif\n"
);
return 0;
}

View file

@ -31,7 +31,9 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
fmt::print(output_file, " // {}\n", instr.disassemble(0));
}
if (skipped_insns.contains(instr.getVram())) {
uint32_t instr_vram = instr.getVram();
if (skipped_insns.contains(instr_vram)) {
return true;
}
@ -105,6 +107,18 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
print_line("{}{} = {:#X} << 16", ctx_gpr_prefix(rt), rt, imm);
break;
case InstrId::cpu_addu:
{
// Check if this addu belongs to a jump table load
auto find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(),
[instr_vram](const RecompPort::JumpTable& jtbl) {
return jtbl.addu_vram == instr_vram;
});
// If so, create a temp to preserve the addend register's value
if (find_result != stats.jump_tables.end()) {
const RecompPort::JumpTable& cur_jtbl = *find_result;
print_line("gpr jr_addend_{:08X} = {}{}", cur_jtbl.jr_vram, ctx_gpr_prefix(cur_jtbl.addend_reg), cur_jtbl.addend_reg);
}
}
print_line("{}{} = ADD32({}{}, {}{})", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_daddu:
@ -169,10 +183,10 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
print_line("{}{} = {}{} < {:#X} ? 1 : 0", ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, (int16_t)imm);
break;
case InstrId::cpu_mult:
print_line("uint64_t result = S64({}{}) * S64({}{}); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
print_line("result = S64({}{}) * S64({}{}); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_multu:
print_line("uint64_t result = U64({}{}) * U64({}{}); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
print_line("result = U64({}{}) * U64({}{}); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_div:
// Cast to 64-bits before division to prevent artihmetic exception for s32(0x80000000) / -1
@ -232,13 +246,13 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
print_line("{}{} = MEM_WL({:#X}, {}{})", ctx_gpr_prefix(rt), rt, (int16_t)imm, ctx_gpr_prefix(base), base);
break;
case InstrId::cpu_lwr:
print_line("{}{} = MEM_WR({:#X}, {}{})", ctx_gpr_prefix(rt), rt, (int16_t)imm, ctx_gpr_prefix(base), base);
print_line("//{}{} = MEM_WR({:#X}, {}{})", ctx_gpr_prefix(rt), rt, (int16_t)imm, ctx_gpr_prefix(base), base);
break;
case InstrId::cpu_swl:
print_line("MEM_WL({:#X}, {}{}) = {}{}", (int16_t)imm, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_swr:
print_line("MEM_WR({:#X}, {}{}) = {}{}", (int16_t)imm, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt);
print_line("//MEM_WR({:#X}, {}{}) = {}{}", (int16_t)imm, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt);
break;
// Branches
@ -310,7 +324,6 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) {
print_unconditional_branch("return");
} else {
uint32_t instr_vram = instr.getVram();
auto find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(),
[instr_vram](const RecompPort::JumpTable& jtbl) {
return jtbl.jr_vram == instr_vram;
@ -322,8 +335,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
bool dummy_needs_link_branch, dummy_is_branch_likely;
process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely);
print_indent();
// TODO this will fail if the register holding the addend is mangled, add logic to emit a temp with the addend into the code
fmt::print(output_file, "switch ({}{} >> 2) {{\n", ctx_gpr_prefix(cur_jtbl.addend_reg), cur_jtbl.addend_reg, cur_jtbl.vram);
fmt::print(output_file, "switch (jr_addend_{:08X} >> 2) {{\n", cur_jtbl.jr_vram);
for (size_t entry_index = 0; entry_index < cur_jtbl.entries.size(); entry_index++) {
print_indent();
print_line("case {}: goto L_{:08X}; break", entry_index, cur_jtbl.entries[entry_index]);
@ -383,7 +395,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric());
break;
case InstrId::cpu_break:
print_line("do_break();");
print_line("do_break({})", instr_vram);
break;
// Cop1 loads/stores
@ -731,7 +743,7 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re
"\n"
"void {}(uint8_t* restrict rdram, recomp_context* restrict ctx) {{\n"
// these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output
" uint64_t hi = 0, lo = 0;\n"
" uint64_t hi = 0, lo = 0, result = 0;\n"
" int c1cs = 0; \n", // cop1 conditional signal
func.name);