Implemented RSP microcode recompilation

This commit is contained in:
Mr-Wiseguy 2023-02-14 01:06:41 -05:00
parent 877524cf94
commit 217a30b032
10 changed files with 2424 additions and 39 deletions

View file

@ -1,33 +1,583 @@
#include <optional>
#include <fstream>
#include <array>
#include <unordered_set>
#include <unordered_map>
#include <cassert>
#include "rabbitizer.hpp"
#include "fmt/format.h"
#include "fmt/ostream.h"
int main() {
//rabbitizer::InstructionRsp instr{ 0xE9DD3801, 0x040013E0 }; // suv $v29[0], 0x8($14)
rabbitizer::InstructionRsp instr{ 0xEAF70B84, 0x04001624 }; // ssv $v23[7], 0x8($23)
//rabbitizer::InstructionRsp instr{ 0x4B5E888F, 0x04001414 }; // vmadh $v2, $v17, $v30[2]
bool has_element = false;
int element = 0;
using InstrId = rabbitizer::InstrId::UniqueId;
using Cop0Reg = rabbitizer::Registers::Rsp::Cop0;
constexpr size_t instr_size = sizeof(uint32_t);
fmt::print("{}\n", instr.disassemble(0));
fmt::print("{}\n", instr.getOpcodeName());
fmt::print("{}\n", instr.disassembleOperands());
// Can't use rabbitizer's operand types because we need to be able to provide a register reference or a register index
enum class RspOperand {
None,
Vt,
VtIndex,
Vd,
Vs,
VsIndex,
De,
Rt,
Rs,
Imm7,
};
if (instr.hasOperand(rabbitizer::OperandType::rsp_vt_elementhigh)) {
element = instr.GetRsp_elementhigh();
has_element = true;
} else if (instr.hasOperand(rabbitizer::OperandType::rsp_vt_elementlow)) {
if (has_element) {
fmt::print(stderr, "Instruction cannot have two element values {}\n", instr.disassemble(0));
std::exit(EXIT_FAILURE);
std::unordered_map<InstrId, std::array<RspOperand, 3>> vector_operands{
// Vt, Rs, Imm
{ InstrId::rsp_lbv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_ldv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_lfv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_lhv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_llv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_lpv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_lqv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_lrv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_lsv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_luv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
// { InstrId::rsp_lwv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}}, // Not in rabbitizer
{ InstrId::rsp_sbv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_sdv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_sfv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_shv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_slv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_spv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_sqv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_srv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_ssv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_suv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_swv, {RspOperand::Vt, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_stv, {RspOperand::VtIndex, RspOperand::Rs, RspOperand::Imm7}},
{ InstrId::rsp_ltv, {RspOperand::VtIndex, RspOperand::Rs, RspOperand::Imm7}},
// Vd, Vs, Vt
{ InstrId::rsp_vabs, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vadd, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vaddc, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vand, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vch, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vcl, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vcr, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_veq, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vge, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vlt, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmacf, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmacu, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmadh, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmadl, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmadm, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmadn, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmrg, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmudh, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmudl, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmudm, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmudn, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vne, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vnor, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vnxor, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vor, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vsub, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vsubc, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmulf, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmulu, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vmulq, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vnand, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vxor, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}},
{ InstrId::rsp_vsar, {RspOperand::Vd, RspOperand::Vs, RspOperand::None}},
{ InstrId::rsp_vmacq, {RspOperand::Vd, RspOperand::None, RspOperand::None}},
// { InstrId::rsp_vzero, {RspOperand::Vd, RspOperand::Vs, RspOperand::Vt}}, unused pseudo
{ InstrId::rsp_vrndn, {RspOperand::Vd, RspOperand::VsIndex, RspOperand::Vt}},
{ InstrId::rsp_vrndp, {RspOperand::Vd, RspOperand::VsIndex, RspOperand::Vt}},
// Vd, De, Vt
{ InstrId::rsp_vmov, {RspOperand::Vd, RspOperand::De, RspOperand::Vt}},
{ InstrId::rsp_vrcp, {RspOperand::Vd, RspOperand::De, RspOperand::Vt}},
{ InstrId::rsp_vrcpl, {RspOperand::Vd, RspOperand::De, RspOperand::Vt}},
{ InstrId::rsp_vrcph, {RspOperand::Vd, RspOperand::De, RspOperand::Vt}},
{ InstrId::rsp_vrsq, {RspOperand::Vd, RspOperand::De, RspOperand::Vt}},
{ InstrId::rsp_vrsql, {RspOperand::Vd, RspOperand::De, RspOperand::Vt}},
{ InstrId::rsp_vrsqh, {RspOperand::Vd, RspOperand::De, RspOperand::Vt}},
// Rt, Vs
{ InstrId::rsp_mfc2, {RspOperand::Rt, RspOperand::Vs, RspOperand::None}},
{ InstrId::rsp_mtc2, {RspOperand::Rt, RspOperand::Vs, RspOperand::None}},
// Nop
{ InstrId::rsp_vnop, {RspOperand::None, RspOperand::None, RspOperand::None}}
};
std::string_view ctx_gpr_prefix(int reg) {
if (reg != 0) {
return "r";
}
element = instr.GetRsp_elementlow();
has_element = true;
return "";
}
if (has_element) {
fmt::print("element: 0x{:X}\n", element);
uint32_t expected_c0_reg_value(int cop0_reg) {
switch (static_cast<Cop0Reg>(cop0_reg)) {
case Cop0Reg::RSP_COP0_SP_STATUS:
return 0; // None of the flags in RSP status are set
case Cop0Reg::RSP_COP0_SP_DMA_FULL:
return 0; // Pretend DMAs complete instantly
case Cop0Reg::RSP_COP0_SP_DMA_BUSY:
return 0; // Pretend DMAs complete instantly
case Cop0Reg::RSP_COP0_SP_SEMAPHORE:
return 0; // Always acquire the semaphore
}
fmt::print(stderr, "Unhandled mfc0: {}\n", cop0_reg);
assert(false);
return 0;
}
std::string_view c0_reg_write_action(int cop0_reg) {
switch (static_cast<Cop0Reg>(cop0_reg)) {
case Cop0Reg::RSP_COP0_SP_SEMAPHORE:
return ""; // Ignore semaphore functionality
case Cop0Reg::RSP_COP0_SP_STATUS:
return ""; // Ignore writes to the status flags since yielding is ignored
case Cop0Reg::RSP_COP0_SP_DRAM_ADDR:
return "SET_DMA_DRAM";
case Cop0Reg::RSP_COP0_SP_MEM_ADDR:
return "SET_DMA_DMEM";
case Cop0Reg::RSP_COP0_SP_RD_LEN:
return "DO_DMA_READ";
case Cop0Reg::RSP_COP0_SP_WR_LEN:
return "DO_DMA_WRITE";
}
fmt::print(stderr, "Unhandled mtc0: {}\n", cop0_reg);
assert(false);
return "";
}
std::optional<int> get_rsp_element(const rabbitizer::InstructionRsp& instr) {
if (instr.hasOperand(rabbitizer::OperandType::rsp_vt_elementhigh)) {
return instr.GetRsp_elementhigh();
} else if (instr.hasOperand(rabbitizer::OperandType::rsp_vt_elementlow) || instr.hasOperand(rabbitizer::OperandType::rsp_vs_index)) {
return instr.GetRsp_elementlow();
}
return std::nullopt;
}
bool rsp_ignores_element(InstrId id) {
return id == InstrId::rsp_vmacq || id == InstrId::rsp_vnop;
}
struct BranchTargets {
std::unordered_set<uint32_t> direct_targets;
std::unordered_set<uint32_t> indirect_targets;
};
BranchTargets get_branch_targets(const std::vector<rabbitizer::InstructionRsp>& instrs) {
BranchTargets ret;
for (const auto& instr : instrs) {
if (instr.isJumpWithAddress() || instr.isBranch()) {
ret.direct_targets.insert(instr.getBranchVramGeneric());
}
if (instr.doesLink()) {
ret.indirect_targets.insert(instr.getVram() + 2 * instr_size);
}
}
return ret;
}
bool process_instruction(size_t instr_index, const std::vector<rabbitizer::InstructionRsp>& instructions, std::ofstream& output_file, const BranchTargets& branch_targets, bool indent) {
const auto& instr = instructions[instr_index];
uint32_t instr_vram = instr.getVram();
InstrId instr_id = instr.getUniqueId();
// Print a label if one exists here
if (branch_targets.direct_targets.contains(instr_vram) || branch_targets.indirect_targets.contains(instr_vram)) {
fmt::print(output_file, "L_{:08X}:\n", instr_vram);
}
// Output a comment with the original instruction
if (instr.isBranch() || instr_id == InstrId::rsp_j) {
fmt::print(output_file, " // {}\n", instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric())));
} else if (instr_id == InstrId::rsp_jal) {
fmt::print(output_file, " // {}\n", instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric())));
} else {
fmt::print(output_file, " // {}\n", instr.disassemble(0));
}
auto print_indent = [&]() {
fmt::print(output_file, " ");
};
auto print_line = [&]<typename... Ts>(fmt::format_string<Ts...> fmt_str, Ts ...args) {
print_indent();
fmt::print(output_file, fmt_str, args...);
fmt::print(output_file, ";\n");
};
auto print_branch_condition = [&]<typename... Ts>(fmt::format_string<Ts...> fmt_str, Ts ...args) {
fmt::print(output_file, fmt_str, args...);
fmt::print(output_file, " ");
};
auto print_unconditional_branch = [&]<typename... Ts>(fmt::format_string<Ts...> fmt_str, Ts ...args) {
if (instr_index < instructions.size() - 1) {
uint32_t next_vram = instr_vram + 4;
process_instruction(instr_index + 1, instructions, output_file, branch_targets, false);
}
print_indent();
fmt::print(output_file, fmt_str, args...);
fmt::print(output_file, ";\n");
};
auto print_branch = [&]<typename... Ts>(fmt::format_string<Ts...> fmt_str, Ts ...args) {
fmt::print(output_file, "{{\n ");
if (instr_index < instructions.size() - 1) {
uint32_t next_vram = instr_vram + 4;
process_instruction(instr_index + 1, instructions, output_file, branch_targets, true);
}
fmt::print(output_file, " ");
fmt::print(output_file, fmt_str, args...);
fmt::print(output_file, ";\n }}\n");
};
if (indent) {
print_indent();
}
int rd = (int)instr.GetO32_rd();
int rs = (int)instr.GetO32_rs();
int base = rs;
int rt = (int)instr.GetO32_rt();
int sa = (int)instr.Get_sa();
int fd = (int)instr.GetO32_fd();
int fs = (int)instr.GetO32_fs();
int ft = (int)instr.GetO32_ft();
uint16_t imm = instr.Get_immediate();
std::string unsigned_imm_string = fmt::format("{:#X}", imm);
std::string signed_imm_string = fmt::format("{:#X}", (int16_t)imm);
auto rsp_element = get_rsp_element(instr);
// If this instruction is in the vector operand table then emit the appropriate function call for its implementation
auto operand_find_it = vector_operands.find(instr_id);
if (operand_find_it != vector_operands.end()) {
const auto& operands = operand_find_it->second;
int vd = (int)instr.GetRsp_vd();
int vs = (int)instr.GetRsp_vs();
int vt = (int)instr.GetRsp_vt();
std::string operand_string = "";
for (RspOperand operand : operands) {
switch (operand) {
case RspOperand::Vt:
operand_string += fmt::format("rsp.vpu.r[{}], ", vt);
break;
case RspOperand::VtIndex:
operand_string += fmt::format("{}, ", vt);
break;
case RspOperand::Vd:
operand_string += fmt::format("rsp.vpu.r[{}], ", vd);
break;
case RspOperand::Vs:
operand_string += fmt::format("rsp.vpu.r[{}], ", vs);
break;
case RspOperand::VsIndex:
operand_string += fmt::format("{}, ", vs);
break;
case RspOperand::De:
operand_string += fmt::format("{}, ", instr.GetRsp_de());
break;
case RspOperand::Rt:
operand_string += fmt::format("{}{}, ", ctx_gpr_prefix(rt), rt);
break;
case RspOperand::Rs:
operand_string += fmt::format("{}{}, ", ctx_gpr_prefix(rs), rs);
break;
case RspOperand::Imm7:
// Sign extend the 7-bit immediate
operand_string += fmt::format("{:#X}, ", ((int8_t)(imm << 1)) >> 1);
break;
}
}
// Trim the trailing comma off the operands
if (operand_string.size() > 0) {
operand_string = operand_string.substr(0, operand_string.size() - 2);
}
std::string uppercase_name = "";
std::string lowercase_name = instr.getOpcodeName();
uppercase_name.reserve(lowercase_name.size() + 1);
for (char c : lowercase_name) {
uppercase_name += std::toupper(c);
}
if (rsp_ignores_element(instr_id)) {
print_line("rsp.{}({})", uppercase_name, operand_string);
} else {
print_line("rsp.{}<{}>({})", uppercase_name, rsp_element.value(), operand_string);
}
}
// Otherwise, implement the instruction directly
else {
switch (instr_id) {
case InstrId::rsp_nop:
fmt::print(output_file, "\n");
break;
// Arithmetic
case InstrId::rsp_lui:
print_line("{}{} = S32({} << 16)", ctx_gpr_prefix(rt), rt, unsigned_imm_string);
break;
case InstrId::rsp_add:
case InstrId::rsp_addu:
print_line("{}{} = RSP_ADD32({}{}, {}{})", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::rsp_negu: // pseudo instruction for subu x, 0, y
case InstrId::rsp_sub:
case InstrId::rsp_subu:
print_line("{}{} = RSP_SUB32({}{}, {}{})", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::rsp_addi:
case InstrId::rsp_addiu:
print_line("{}{} = RSP_ADD32({}{}, {})", ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, signed_imm_string);
break;
case InstrId::rsp_and:
print_line("{}{} = {}{} & {}{}", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::rsp_andi:
print_line("{}{} = {}{} & {}", ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, unsigned_imm_string);
break;
case InstrId::rsp_or:
print_line("{}{} = {}{} | {}{}", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::rsp_ori:
print_line("{}{} = {}{} | {}", ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, unsigned_imm_string);
break;
case InstrId::rsp_nor:
print_line("{}{} = ~({}{} | {}{})", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::rsp_xor:
print_line("{}{} = {}{} ^ {}{}", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::rsp_xori:
print_line("{}{} = {}{} ^ {}", ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, unsigned_imm_string);
break;
case InstrId::rsp_sll:
print_line("{}{} = S32({}{}) << {}", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rt), rt, sa);
break;
case InstrId::rsp_sllv:
print_line("{}{} = S32({}{}) << ({}{} & 31)", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs);
break;
case InstrId::rsp_sra:
print_line("{}{} = S32(RSP_SIGNED({}{}) >> {})", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rt), rt, sa);
break;
case InstrId::rsp_srav:
print_line("{}{} = S32(RSP_SIGNED({}{}) >> ({}{} & 31))", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs);
break;
case InstrId::rsp_srl:
print_line("{}{} = S32(U32({}{}) >> {})", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rt), rt, sa);
break;
case InstrId::rsp_srlv:
print_line("{}{} = S32(U32({}{}) >> ({}{} & 31))", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs);
break;
case InstrId::rsp_slt:
print_line("{}{} = RSP_SIGNED({}{}) < RSP_SIGNED({}{}) ? 1 : 0", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::rsp_slti:
print_line("{}{} = RSP_SIGNED({}{}) < {} ? 1 : 0", ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, signed_imm_string);
break;
case InstrId::rsp_sltu:
print_line("{}{} = {}{} < {}{} ? 1 : 0", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::rsp_sltiu:
print_line("{}{} = {}{} < {} ? 1 : 0", ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, signed_imm_string);
break;
// Loads
// TODO ld
case InstrId::rsp_lw:
print_line("{}{} = RSP_MEM_W({}, {}{})", ctx_gpr_prefix(rt), rt, signed_imm_string, ctx_gpr_prefix(base), base);
break;
case InstrId::rsp_lh:
print_line("{}{} = RSP_MEM_H({}, {}{})", ctx_gpr_prefix(rt), rt, signed_imm_string, ctx_gpr_prefix(base), base);
break;
case InstrId::rsp_lb:
print_line("{}{} = RSP_MEM_B({}, {}{})", ctx_gpr_prefix(rt), rt, signed_imm_string, ctx_gpr_prefix(base), base);
break;
case InstrId::rsp_lhu:
print_line("{}{} = RSP_MEM_HU({}, {}{})", ctx_gpr_prefix(rt), rt, signed_imm_string, ctx_gpr_prefix(base), base);
break;
case InstrId::rsp_lbu:
print_line("{}{} = RSP_MEM_BU({}, {}{})", ctx_gpr_prefix(rt), rt, signed_imm_string, ctx_gpr_prefix(base), base);
break;
// Stores
case InstrId::rsp_sw:
print_line("RSP_MEM_W({}, {}{}) = {}{}", signed_imm_string, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt);
break;
case InstrId::rsp_sh:
print_line("RSP_MEM_H({}, {}{}) = {}{}", signed_imm_string, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt);
break;
case InstrId::rsp_sb:
print_line("RSP_MEM_B({}, {}{}) = {}{}", signed_imm_string, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt);
break;
// Branches
case InstrId::rsp_j:
case InstrId::rsp_b:
print_unconditional_branch("goto L_{:08X}", instr.getBranchVramGeneric());
break;
case InstrId::rsp_jal:
print_line("{}{} = 0x{:08X}", ctx_gpr_prefix(31), 31, instr_vram + 2 * instr_size);
print_unconditional_branch("goto L_{:08X}", instr.getBranchVramGeneric());
break;
case InstrId::rsp_jr:
print_line("jump_target = {}{}", ctx_gpr_prefix(rs), rs);
print_unconditional_branch("goto do_indirect_jump");
break;
case InstrId::rsp_jalr:
print_line("jump_target = {}{}; {}{} = 0x{:8X}", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rd), rd, instr_vram + 2 * instr_size);
print_unconditional_branch("goto do_indirect_jump");
break;
case InstrId::rsp_bne:
print_indent();
print_branch_condition("if ({}{} != {}{})", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric());
break;
case InstrId::rsp_beq:
print_indent();
print_branch_condition("if ({}{} == {}{})", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric());
break;
case InstrId::rsp_bgez:
print_indent();
print_branch_condition("if (RSP_SIGNED({}{}) >= 0)", ctx_gpr_prefix(rs), rs);
print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric());
break;
case InstrId::rsp_bgtz:
print_indent();
print_branch_condition("if (RSP_SIGNED({}{}) > 0)", ctx_gpr_prefix(rs), rs);
print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric());
break;
case InstrId::rsp_blez:
print_indent();
print_branch_condition("if (RSP_SIGNED({}{}) <= 0)", ctx_gpr_prefix(rs), rs);
print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric());
break;
case InstrId::rsp_bltz:
print_indent();
print_branch_condition("if (RSP_SIGNED({}{}) < 0)", ctx_gpr_prefix(rs), rs);
print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric());
break;
case InstrId::rsp_break:
print_line("return RspExitReason::Broke", instr_vram);
break;
case InstrId::rsp_mfc0:
print_line("{}{} = {}", ctx_gpr_prefix(rt), rt, expected_c0_reg_value(rd));
break;
case InstrId::rsp_mtc0:
{
std::string_view write_action = c0_reg_write_action(rd);
if (!write_action.empty()) {
print_line("{}({}{})", write_action, ctx_gpr_prefix(rt), rt); \
}
break;
}
default:
fmt::print(stderr, "Unhandled instruction: {}\n", instr.getOpcodeName());
assert(false);
return false;
}
}
return true;
}
void write_indirect_jumps(std::ofstream& output_file, const BranchTargets& branch_targets) {
fmt::print(output_file,
"do_indirect_jump:\n"
" switch (jump_target) {{ \n");
for (uint32_t branch_target: branch_targets.indirect_targets) {
fmt::print(output_file, " case 0x{0:08X}: goto L_{0:08X};\n", branch_target);
}
fmt::print(output_file,
" }}\n"
" return RspExitReason::UnhandledJumpTarget;\n");
}
// TODO de-hardcode these
constexpr size_t rsp_text_offset = 0xB8BAD0;
constexpr size_t rsp_text_size = 0xAF0;
constexpr size_t rsp_text_address = 0x04001080;
std::string rom_file_path = "../test/oot_mq_debug.z64";
std::string output_file_path = "../test/rsp/njpgdspMain.cpp";
std::string output_function_name = "njpgdspMain";
#ifdef _MSC_VER
inline uint32_t byteswap(uint32_t val) {
return _byteswap_ulong(val);
}
#else
constexpr uint32_t byteswap(uint32_t val) {
return __builtin_bswap32(val);
}
#endif
static_assert((rsp_text_size / instr_size) * instr_size == rsp_text_size, "RSP microcode must be a multiple of the instruction size");
int main() {
std::array<uint32_t, rsp_text_size / sizeof(uint32_t)> instr_words{};
{
std::ifstream rom_file{ rom_file_path, std::ios_base::binary };
if (!rom_file.good()) {
fmt::print(stderr, "Failed to open rom file\n");
return EXIT_FAILURE;
}
rom_file.seekg(rsp_text_offset);
rom_file.read(reinterpret_cast<char*>(instr_words.data()), rsp_text_size);
}
// Disable appropriate pseudo instructions
RabbitizerConfig_Cfg.pseudos.pseudoMove = false;
RabbitizerConfig_Cfg.pseudos.pseudoBeqz = false;
RabbitizerConfig_Cfg.pseudos.pseudoBnez = false;
RabbitizerConfig_Cfg.pseudos.pseudoNot = false;
// Decode the instruction words into instructions
std::vector<rabbitizer::InstructionRsp> instrs{};
instrs.reserve(instr_words.size());
uint32_t vram = rsp_text_address;
for (uint32_t instr_word : instr_words) {
const rabbitizer::InstructionRsp& instr = instrs.emplace_back(byteswap(instr_word), vram);
vram += instr_size;
}
// Collect indirect jump targets (return addresses for linked jumps)
BranchTargets branch_targets = get_branch_targets(instrs);
// Open output file and write beginning
std::ofstream output_file(output_file_path);
fmt::print(output_file,
"#include \"../src/rsp.h\"\n"
"#include \"../src/rsp_vu_impl.h\"\n"
"RspExitReason {}(uint8_t* rdram) {{\n"
" uint32_t r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0, r6 = 0, r7 = 0;\n"
" uint32_t r8 = 0, r9 = 0, r10 = 0, r11 = 0, r12 = 0, r13 = 0, r14 = 0, r15 = 0;\n"
" uint32_t r16 = 0, r17 = 0, r18 = 0, r19 = 0, r20 = 0, r21 = 0, r22 = 0, r23 = 0;\n"
" uint32_t r24 = 0, r25 = 0, r26 = 0, r27 = 0, r28 = 0, r29 = 0, r30 = 0, r31 = 0;\n"
" uint32_t dma_dmem_address = 0, dma_dram_address = 0, jump_target = 0;\n"
" RSP rsp{{}};\n"
" r1 = 0xFC0;\n", output_function_name);
// Write each instruction
for (size_t instr_index = 0; instr_index < instrs.size(); instr_index++) {
process_instruction(instr_index, instrs, output_file, branch_targets, false);
}
// Terminate instruction code with a return to indicate that the microcode has run past its end
fmt::print(output_file, " return RspExitReason::ImemOverrun;\n");
// Write the section containing the indirect jump table
write_indirect_jumps(output_file, branch_targets);
// End the file
fmt::print(output_file, "}}\n");
return 0;
}

@ -1 +1 @@
Subproject commit 54f997607c62d8c1c5316ef414adf17f5c060797
Subproject commit b9a39f6ec0a3ff6690ef2925e6275cf6578602cc

View file

@ -960,6 +960,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
return false;
}
// TODO is this used?
if (emit_link_branch) {
fmt::print(output_file, " after_{}:\n", link_branch_index);
}

View file

@ -162,6 +162,7 @@ XCOPY "$(ProjectDir)Lib\SDL2-2.24.0\lib\$(Platform)\SDL2.dll" "$(TargetDir)" /S
<ClCompile Include="portultra\task_pthreads.cpp" />
<ClCompile Include="portultra\task_win32.cpp" />
<ClCompile Include="portultra\threads.cpp" />
<ClCompile Include="rsp\njpgdspMain.cpp" />
<ClCompile Include="RT64\rt64_layer.cpp" />
<ClCompile Include="src\ai.cpp" />
<ClCompile Include="src\cont.cpp" />

View file

@ -30234,6 +30234,9 @@
<ClCompile Include="funcs\lookup.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="rsp\njpgdspMain.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="portultra\platform_specific.h">

View file

@ -15,6 +15,7 @@
#include "ultra64.h"
#include "multilibultra.hpp"
#include "recomp.h"
#include "../src/rsp.h"
struct SpTaskAction {
OSTask task;
@ -203,6 +204,44 @@ int sdl_event_filter(void* userdata, SDL_Event* event) {
return 1;
}
uint8_t dmem[0x1000];
uint16_t rspReciprocals[512];
uint16_t rspInverseSquareRoots[512];
using RspUcodeFunc = RspExitReason(uint8_t* rdram);
extern RspUcodeFunc njpgdspMain;
// From Ares emulator. For license details, see rsp_vu.h
void rsp_constants_init() {
rspReciprocals[0] = u16(~0);
for (u16 index = 1; index < 512; index++) {
u64 a = index + 512;
u64 b = (u64(1) << 34) / a;
rspReciprocals[index] = u16(b + 1 >> 8);
}
for (u16 index = 0; index < 512; index++) {
u64 a = index + 512 >> ((index % 2 == 1) ? 1 : 0);
u64 b = 1 << 17;
//find the largest b where b < 1.0 / sqrt(a)
while (a * (b + 1) * (b + 1) < (u64(1) << 44)) b++;
rspInverseSquareRoots[index] = u16(b >> 1);
}
}
// Runs a recompiled RSP microcode
void run_rsp_microcode(uint8_t* rdram, const OSTask* task, RspUcodeFunc* ucode_func) {
// Load the OSTask into DMEM
memcpy(&dmem[0xFC0], task, sizeof(OSTask));
// Load the ucode data into DMEM
dma_rdram_to_dmem(rdram, 0x0000, task->t.ucode_data, 0xF80 - 1);
// Run the ucode
RspExitReason exit_reason = ucode_func(rdram);
// Ensure that the ucode exited correctly
assert(exit_reason == RspExitReason::Broke);
sp_complete();
}
void event_thread_func(uint8_t* rdram, uint8_t* rom) {
using namespace std::chrono_literals;
if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) {
@ -216,6 +255,8 @@ void event_thread_func(uint8_t* rdram, uint8_t* rom) {
SDL_SetWindowTitle(window, "Recomp");
//SDL_SetEventFilter(sdl_event_filter, nullptr);
rsp_constants_init();
while (true) {
// Try to pull an action from the queue
Action action;
@ -230,20 +271,7 @@ void event_thread_func(uint8_t* rdram, uint8_t* rom) {
} else if (task_action->task.t.type == M_AUDTASK) {
sp_complete();
} else if (task_action->task.t.type == M_NJPEGTASK) {
uint32_t* jpeg_task = TO_PTR(uint32_t, (int32_t)(0x80000000 | task_action->task.t.data_ptr));
int32_t address = jpeg_task[0] | 0x80000000;
size_t mbCount = jpeg_task[1];
uint32_t mode = jpeg_task[2];
//int32_t qTableYPtr = jpeg_task[3] | 0x80000000;
//int32_t qTableUPtr = jpeg_task[4] | 0x80000000;
//int32_t qTableVPtr = jpeg_task[5] | 0x80000000;
//uint32_t mbSize = jpeg_task[6];
if (mode == 0) {
memset(TO_PTR(void, address), 0, mbCount * 0x40 * sizeof(uint16_t) * 4);
} else {
memset(TO_PTR(void, address), 0, mbCount * 0x40 * sizeof(uint16_t) * 6);
}
sp_complete();
run_rsp_microcode(rdram, &task_action->task, njpgdspMain);
} else {
fprintf(stderr, "Unknown task type: %" PRIu32 "\n", task_action->task.t.type);
assert(false);

1
test/rsp/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
njpgdspMain.cpp

65
test/src/rsp.h Normal file
View file

@ -0,0 +1,65 @@
#ifndef __RSP_H__
#define __RSP_H__
#include "rsp_vu.h"
#include "recomp.h"
enum class RspExitReason {
Invalid,
Broke,
ImemOverrun,
UnhandledJumpTarget
};
extern uint8_t dmem[];
extern uint16_t rspReciprocals[512];
extern uint16_t rspInverseSquareRoots[512];
#define RSP_MEM_W(offset, addr) \
(*reinterpret_cast<uint32_t*>(dmem + (offset) + (addr)))
#define RSP_MEM_H(offset, addr) \
(*reinterpret_cast<int16_t*>(dmem + (((offset) + (addr)) ^ 2)))
#define RSP_MEM_HU(offset, addr) \
(*reinterpret_cast<uint16_t*>(dmem + (((offset) + (addr)) ^ 2)))
#define RSP_MEM_B(offset, addr) \
(*reinterpret_cast<int8_t*>(dmem + (((offset) + (addr)) ^ 3)))
#define RSP_MEM_BU(offset, addr) \
(*reinterpret_cast<uint8_t*>(dmem + (((offset) + (addr)) ^ 3)))
#define RSP_ADD32(a, b) \
((int32_t)((a) + (b)))
#define RSP_SUB32(a, b) \
((int32_t)((a) - (b)))
#define RSP_SIGNED(val) \
((int32_t)(val))
#define SET_DMA_DMEM(dmem_addr) dma_dmem_address = (dmem_addr)
#define SET_DMA_DRAM(dram_addr) dma_dram_address = (dram_addr)
#define DO_DMA_READ(rd_len) dma_rdram_to_dmem(rdram, dma_dmem_address, dma_dram_address, (rd_len))
#define DO_DMA_WRITE(wr_len) dma_dmem_to_rdram(rdram, dma_dmem_address, dma_dram_address, (wr_len))
static inline void dma_rdram_to_dmem(uint8_t* rdram, uint32_t dmem_addr, uint32_t dram_addr, uint32_t rd_len) {
rd_len += 1; // Read length is inclusive
dram_addr &= 0xFFFFF8;
assert(dmem_addr + rd_len <= 0x1000);
for (uint32_t i = 0; i < rd_len; i++) {
RSP_MEM_B(i, dmem_addr) = MEM_B(0, (int64_t)(int32_t)(dram_addr + i + 0x80000000));
}
}
static inline void dma_dmem_to_rdram(uint8_t* rdram, uint32_t dmem_addr, uint32_t dram_addr, uint32_t wr_len) {
wr_len += 1; // Write length is inclusive
dram_addr &= 0xFFFFF8;
assert(dmem_addr + wr_len <= 0x1000);
for (uint32_t i = 0; i < wr_len; i++) {
MEM_B(0, (int64_t)(int32_t)(dram_addr + i + 0x80000000)) = RSP_MEM_B(i, dmem_addr);
}
}
#endif

199
test/src/rsp_vu.h Normal file
View file

@ -0,0 +1,199 @@
// This file is modified from the Ares N64 emulator core. Ares can
// be found at https://github.com/ares-emulator/ares. The original license
// for this portion of Ares is as follows:
// ----------------------------------------------------------------------
// ares
//
// Copyright(c) 2004 - 2021 ares team, Near et al
//
// Permission to use, copy, modify, and /or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright noticeand this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS.IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------
#include <cstdint>
#define ARCHITECTURE_AMD64
#define ARCHITECTURE_SUPPORTS_SSE4_1 1
#if defined(ARCHITECTURE_AMD64)
#include <nmmintrin.h>
using v128 = __m128i;
#elif defined(ARCHITECTURE_ARM64)
#include <sse2neon.h>
using v128 = __m128i;
#endif
namespace Accuracy {
namespace RSP {
#if ARCHITECTURE_SUPPORTS_SSE4_1
constexpr bool SISD = false;
constexpr bool SIMD = true;
#else
constexpr bool SISD = true;
constexpr bool SIMD = false;
#endif
}
}
using u8 = uint8_t;
using s8 = int8_t;
using u16 = uint16_t;
using s16 = int16_t;
using u32 = uint32_t;
using s32 = int32_t;
using u64 = uint64_t;
using s64 = int64_t;
using uint128_t = uint64_t[2];
template<u32 bits> inline auto sclamp(s64 x) -> s64 {
enum : s64 { b = 1ull << (bits - 1), m = b - 1 };
return (x > m) ? m : (x < -b) ? -b : x;
}
struct RSP {
using r32 = uint32_t;
using cr32 = const r32;
union r128 {
struct { uint64_t u128[2]; };
#if ARCHITECTURE_SUPPORTS_SSE4_1
struct { __m128i v128; };
operator __m128i() const { return v128; }
auto operator=(__m128i value) { v128 = value; }
#endif
auto byte(u32 index) -> uint8_t& { return ((uint8_t*)&u128)[15 - index]; }
auto byte(u32 index) const -> uint8_t { return ((uint8_t*)&u128)[15 - index]; }
auto element(u32 index) -> uint16_t& { return ((uint16_t*)&u128)[7 - index]; }
auto element(u32 index) const -> uint16_t { return ((uint16_t*)&u128)[7 - index]; }
auto u8(u32 index) -> uint8_t& { return ((uint8_t*)&u128)[15 - index]; }
auto u8(u32 index) const -> uint8_t { return ((uint8_t*)&u128)[15 - index]; }
auto s16(u32 index) -> int16_t& { return ((int16_t*)&u128)[7 - index]; }
auto s16(u32 index) const -> int16_t { return ((int16_t*)&u128)[7 - index]; }
auto u16(u32 index) -> uint16_t& { return ((uint16_t*)&u128)[7 - index]; }
auto u16(u32 index) const -> uint16_t { return ((uint16_t*)&u128)[7 - index]; }
//VCx registers
auto get(u32 index) const -> bool { return u16(index) != 0; }
auto set(u32 index, bool value) -> bool { return u16(index) = 0 - value, value; }
//vu-registers.cpp
auto operator()(u32 index) const -> r128;
};
using cr128 = const r128;
struct VU {
r128 r[32];
r128 acch, accm, accl;
r128 vcoh, vcol; //16-bit little endian
r128 vcch, vccl; //16-bit little endian
r128 vce; // 8-bit little endian
s16 divin;
s16 divout;
bool divdp;
} vpu;
static constexpr r128 zero{0};
static constexpr r128 invert{(uint64_t)-1, (uint64_t)-1};
auto accumulatorGet(u32 index) const -> u64;
auto accumulatorSet(u32 index, u64 value) -> void;
auto accumulatorSaturate(u32 index, bool slice, u16 negative, u16 positive) const -> u16;
auto CFC2(r32& rt, u8 rd) -> void;
auto CTC2(cr32& rt, u8 rd) -> void;
template<u8 e> auto LBV(r128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto LDV(r128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto LFV(r128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto LHV(r128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto LLV(r128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto LPV(r128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto LQV(r128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto LRV(r128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto LSV(r128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto LTV(u8 vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto LUV(r128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto LWV(r128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto MFC2(r32& rt, cr128& vs) -> void;
template<u8 e> auto MTC2(cr32& rt, r128& vs) -> void;
template<u8 e> auto SBV(cr128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto SDV(cr128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto SFV(cr128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto SHV(cr128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto SLV(cr128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto SPV(cr128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto SQV(cr128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto SRV(cr128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto SSV(cr128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto STV(u8 vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto SUV(cr128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto SWV(cr128& vt, cr32& rs, s8 imm) -> void;
template<u8 e> auto VABS(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VADD(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VADDC(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VAND(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VCH(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VCL(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VCR(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VEQ(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VGE(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VLT(r128& vd, cr128& vs, cr128& vt) -> void;
template<bool U, u8 e>
auto VMACF(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VMACF(r128& vd, cr128& vs, cr128& vt) -> void { VMACF<0, e>(vd, vs, vt); }
template<u8 e> auto VMACU(r128& vd, cr128& vs, cr128& vt) -> void { VMACF<1, e>(vd, vs, vt); }
auto VMACQ(r128& vd) -> void;
template<u8 e> auto VMADH(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VMADL(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VMADM(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VMADN(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VMOV(r128& vd, u8 de, cr128& vt) -> void;
template<u8 e> auto VMRG(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VMUDH(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VMUDL(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VMUDM(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VMUDN(r128& vd, cr128& vs, cr128& vt) -> void;
template<bool U, u8 e>
auto VMULF(r128& rd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VMULF(r128& rd, cr128& vs, cr128& vt) -> void { VMULF<0, e>(rd, vs, vt); }
template<u8 e> auto VMULU(r128& rd, cr128& vs, cr128& vt) -> void { VMULF<1, e>(rd, vs, vt); }
template<u8 e> auto VMULQ(r128& rd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VNAND(r128& rd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VNE(r128& vd, cr128& vs, cr128& vt) -> void;
auto VNOP() -> void;
template<u8 e> auto VNOR(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VNXOR(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VOR(r128& vd, cr128& vs, cr128& vt) -> void;
template<bool L, u8 e>
auto VRCP(r128& vd, u8 de, cr128& vt) -> void;
template<u8 e> auto VRCP(r128& vd, u8 de, cr128& vt) -> void { VRCP<0, e>(vd, de, vt); }
template<u8 e> auto VRCPL(r128& vd, u8 de, cr128& vt) -> void { VRCP<1, e>(vd, de, vt); }
template<u8 e> auto VRCPH(r128& vd, u8 de, cr128& vt) -> void;
template<bool D, u8 e>
auto VRND(r128& vd, u8 vs, cr128& vt) -> void;
template<u8 e> auto VRNDN(r128& vd, u8 vs, cr128& vt) -> void { VRND<0, e>(vd, vs, vt); }
template<u8 e> auto VRNDP(r128& vd, u8 vs, cr128& vt) -> void { VRND<1, e>(vd, vs, vt); }
template<bool L, u8 e>
auto VRSQ(r128& vd, u8 de, cr128& vt) -> void;
template<u8 e> auto VRSQ(r128& vd, u8 de, cr128& vt) -> void { VRSQ<0, e>(vd, de, vt); }
template<u8 e> auto VRSQL(r128& vd, u8 de, cr128& vt) -> void { VRSQ<1, e>(vd, de, vt); }
template<u8 e> auto VRSQH(r128& vd, u8 de, cr128& vt) -> void;
template<u8 e> auto VSAR(r128& vd, cr128& vs) -> void;
template<u8 e> auto VSUB(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VSUBC(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VXOR(r128& rd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VZERO(r128& rd, cr128& vs, cr128& vt) -> void;
};

1537
test/src/rsp_vu_impl.h Normal file

File diff suppressed because it is too large Load diff