aboutsummaryrefslogtreecommitdiff
path: root/deps/lightrec/lightrec.c
diff options
context:
space:
mode:
Diffstat (limited to 'deps/lightrec/lightrec.c')
-rw-r--r--deps/lightrec/lightrec.c1265
1 files changed, 1265 insertions, 0 deletions
diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c
new file mode 100644
index 0000000..47c49c8
--- /dev/null
+++ b/deps/lightrec/lightrec.c
@@ -0,0 +1,1265 @@
+/*
+ * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "blockcache.h"
+#include "config.h"
+#include "debug.h"
+#include "disassembler.h"
+#include "emitter.h"
+#include "interpreter.h"
+#include "lightrec.h"
+#include "memmanager.h"
+#include "recompiler.h"
+#include "regcache.h"
+#include "optimizer.h"
+
+#include <errno.h>
+#include <lightning.h>
+#include <limits.h>
+#if ENABLE_THREADED_COMPILER
+#include <stdatomic.h>
+#endif
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#if ENABLE_TINYMM
+#include <tinymm.h>
+#endif
+
+#define GENMASK(h, l) \
+ (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h))))
+
+static struct block * lightrec_precompile_block(struct lightrec_state *state,
+ u32 pc);
+
+static void __segfault_cb(struct lightrec_state *state, u32 addr)
+{
+ lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
+ pr_err("Segmentation fault in recompiled code: invalid "
+ "load/store at address 0x%08x\n", addr);
+}
+
+static u32 lightrec_rw_ops(struct lightrec_state *state, union code op,
+ const struct lightrec_mem_map_ops *ops, u32 addr, u32 data)
+{
+ switch (op.i.op) {
+ case OP_SB:
+ ops->sb(state, addr, (u8) data);
+ return 0;
+ case OP_SH:
+ ops->sh(state, addr, (u16) data);
+ return 0;
+ case OP_SWL:
+ case OP_SWR:
+ case OP_SW:
+ ops->sw(state, addr, data);
+ return 0;
+ case OP_LB:
+ return (s32) (s8) ops->lb(state, addr);
+ case OP_LBU:
+ return ops->lb(state, addr);
+ case OP_LH:
+ return (s32) (s16) ops->lh(state, addr);
+ case OP_LHU:
+ return ops->lh(state, addr);
+ case OP_LW:
+ default:
+ return ops->lw(state, addr);
+ }
+}
+
+static void lightrec_invalidate_map(struct lightrec_state *state,
+ const struct lightrec_mem_map *map, u32 addr)
+{
+ if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM])
+ state->code_lut[lut_offset(addr)] = NULL;
+}
+
+static const struct lightrec_mem_map *
+lightrec_get_map(struct lightrec_state *state, u32 kaddr)
+{
+ unsigned int i;
+
+ for (i = 0; i < state->nb_maps; i++) {
+ const struct lightrec_mem_map *map = &state->maps[i];
+
+ if (kaddr >= map->pc && kaddr < map->pc + map->length)
+ return map;
+ }
+
+ return NULL;
+}
+
+u32 lightrec_rw(struct lightrec_state *state, union code op,
+ u32 addr, u32 data, u16 *flags)
+{
+ const struct lightrec_mem_map *map;
+ u32 shift, mem_data, mask, pc;
+ uintptr_t new_addr;
+ u32 kaddr;
+
+ addr += (s16) op.i.imm;
+ kaddr = kunseg(addr);
+
+ map = lightrec_get_map(state, kaddr);
+ if (!map) {
+ __segfault_cb(state, addr);
+ return 0;
+ }
+
+ pc = map->pc;
+
+ if (unlikely(map->ops)) {
+ if (flags)
+ *flags |= LIGHTREC_HW_IO;
+
+ return lightrec_rw_ops(state, op, map->ops, addr, data);
+ }
+
+ while (map->mirror_of)
+ map = map->mirror_of;
+
+ if (flags)
+ *flags |= LIGHTREC_DIRECT_IO;
+
+ kaddr -= pc;
+ new_addr = (uintptr_t) map->address + kaddr;
+
+ switch (op.i.op) {
+ case OP_SB:
+ *(u8 *) new_addr = (u8) data;
+ if (!state->invalidate_from_dma_only)
+ lightrec_invalidate_map(state, map, kaddr);
+ return 0;
+ case OP_SH:
+ *(u16 *) new_addr = HTOLE16((u16) data);
+ if (!state->invalidate_from_dma_only)
+ lightrec_invalidate_map(state, map, kaddr);
+ return 0;
+ case OP_SWL:
+ shift = kaddr & 3;
+ mem_data = LE32TOH(*(u32 *)(new_addr & ~3));
+ mask = GENMASK(31, (shift + 1) * 8);
+
+ *(u32 *)(new_addr & ~3) = HTOLE32((data >> ((3 - shift) * 8))
+ | (mem_data & mask));
+ if (!state->invalidate_from_dma_only)
+ lightrec_invalidate_map(state, map, kaddr & ~0x3);
+ return 0;
+ case OP_SWR:
+ shift = kaddr & 3;
+ mem_data = LE32TOH(*(u32 *)(new_addr & ~3));
+ mask = (1 << (shift * 8)) - 1;
+
+ *(u32 *)(new_addr & ~3) = HTOLE32((data << (shift * 8))
+ | (mem_data & mask));
+ if (!state->invalidate_from_dma_only)
+ lightrec_invalidate_map(state, map, kaddr & ~0x3);
+ return 0;
+ case OP_SW:
+ *(u32 *) new_addr = HTOLE32(data);
+ if (!state->invalidate_from_dma_only)
+ lightrec_invalidate_map(state, map, kaddr);
+ return 0;
+ case OP_SWC2:
+ *(u32 *) new_addr = HTOLE32(state->ops.cop2_ops.mfc(state,
+ op.i.rt));
+ if (!state->invalidate_from_dma_only)
+ lightrec_invalidate_map(state, map, kaddr);
+ return 0;
+ case OP_LB:
+ return (s32) *(s8 *) new_addr;
+ case OP_LBU:
+ return *(u8 *) new_addr;
+ case OP_LH:
+ return (s32)(s16) LE16TOH(*(u16 *) new_addr);
+ case OP_LHU:
+ return LE16TOH(*(u16 *) new_addr);
+ case OP_LWL:
+ shift = kaddr & 3;
+ mem_data = LE32TOH(*(u32 *)(new_addr & ~3));
+ mask = (1 << (24 - shift * 8)) - 1;
+
+ return (data & mask) | (mem_data << (24 - shift * 8));
+ case OP_LWR:
+ shift = kaddr & 3;
+ mem_data = LE32TOH(*(u32 *)(new_addr & ~3));
+ mask = GENMASK(31, 32 - shift * 8);
+
+ return (data & mask) | (mem_data >> (shift * 8));
+ case OP_LWC2:
+ state->ops.cop2_ops.mtc(state, op.i.rt,
+ LE32TOH(*(u32 *) new_addr));
+ return 0;
+ case OP_LW:
+ default:
+ return LE32TOH(*(u32 *) new_addr);
+ }
+}
+
+static void lightrec_rw_helper(struct lightrec_state *state,
+ union code op, u16 *flags)
+{
+ u32 ret = lightrec_rw(state, op,
+ state->native_reg_cache[op.i.rs],
+ state->native_reg_cache[op.i.rt], flags);
+
+ switch (op.i.op) {
+ case OP_LB:
+ case OP_LBU:
+ case OP_LH:
+ case OP_LHU:
+ case OP_LWL:
+ case OP_LWR:
+ case OP_LW:
+ if (op.i.rt)
+ state->native_reg_cache[op.i.rt] = ret;
+ default: /* fall-through */
+ break;
+ }
+}
+
+static void lightrec_rw_cb(struct lightrec_state *state, union code op)
+{
+ lightrec_rw_helper(state, op, NULL);
+}
+
+static void lightrec_rw_generic_cb(struct lightrec_state *state,
+ struct opcode *op, struct block *block)
+{
+ bool was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
+
+ lightrec_rw_helper(state, op->c, &op->flags);
+
+ if (!was_tagged) {
+ pr_debug("Opcode of block at PC 0x%08x offset 0x%x has been "
+ "tagged - flag for recompilation\n",
+ block->pc, op->offset << 2);
+
+ lightrec_mark_for_recompilation(state->block_cache, block);
+ }
+}
+
+u32 lightrec_mfc(struct lightrec_state *state, union code op)
+{
+ bool is_cfc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CFC0) ||
+ (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CFC2);
+ u32 (*func)(struct lightrec_state *, u8);
+ const struct lightrec_cop_ops *ops;
+
+ if (op.i.op == OP_CP0)
+ ops = &state->ops.cop0_ops;
+ else
+ ops = &state->ops.cop2_ops;
+
+ if (is_cfc)
+ func = ops->cfc;
+ else
+ func = ops->mfc;
+
+ return (*func)(state, op.r.rd);
+}
+
+static void lightrec_mfc_cb(struct lightrec_state *state, union code op)
+{
+ u32 rt = lightrec_mfc(state, op);
+
+ if (op.r.rt)
+ state->native_reg_cache[op.r.rt] = rt;
+}
+
+void lightrec_mtc(struct lightrec_state *state, union code op, u32 data)
+{
+ bool is_ctc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CTC0) ||
+ (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CTC2);
+ void (*func)(struct lightrec_state *, u8, u32);
+ const struct lightrec_cop_ops *ops;
+
+ if (op.i.op == OP_CP0)
+ ops = &state->ops.cop0_ops;
+ else
+ ops = &state->ops.cop2_ops;
+
+ if (is_ctc)
+ func = ops->ctc;
+ else
+ func = ops->mtc;
+
+ (*func)(state, op.r.rd, data);
+}
+
+static void lightrec_mtc_cb(struct lightrec_state *state, union code op)
+{
+ lightrec_mtc(state, op, state->native_reg_cache[op.r.rt]);
+}
+
+static void lightrec_rfe_cb(struct lightrec_state *state, union code op)
+{
+ u32 status;
+
+ /* Read CP0 Status register (r12) */
+ status = state->ops.cop0_ops.mfc(state, 12);
+
+ /* Switch the bits */
+ status = ((status & 0x3c) >> 2) | (status & ~0xf);
+
+ /* Write it back */
+ state->ops.cop0_ops.ctc(state, 12, status);
+}
+
+static void lightrec_cp_cb(struct lightrec_state *state, union code op)
+{
+ void (*func)(struct lightrec_state *, u32);
+
+ if ((op.opcode >> 25) & 1)
+ func = state->ops.cop2_ops.op;
+ else
+ func = state->ops.cop0_ops.op;
+
+ (*func)(state, op.opcode);
+}
+
+static void lightrec_syscall_cb(struct lightrec_state *state, union code op)
+{
+ lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL);
+}
+
+static void lightrec_break_cb(struct lightrec_state *state, union code op)
+{
+ lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK);
+}
+
+struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
+{
+ struct block *block = lightrec_find_block(state->block_cache, pc);
+
+ if (block && lightrec_block_is_outdated(block)) {
+ pr_debug("Block at PC 0x%08x is outdated!\n", block->pc);
+
+ /* Make sure the recompiler isn't processing the block we'll
+ * destroy */
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_recompiler_remove(state->rec, block);
+
+ lightrec_unregister_block(state->block_cache, block);
+ lightrec_free_block(block);
+ block = NULL;
+ }
+
+ if (!block) {
+ block = lightrec_precompile_block(state, pc);
+ if (!block) {
+ pr_err("Unable to recompile block at PC 0x%x\n", pc);
+ lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
+ return NULL;
+ }
+
+ lightrec_register_block(state->block_cache, block);
+ }
+
+ return block;
+}
+
+static void * get_next_block_func(struct lightrec_state *state, u32 pc)
+{
+ struct block *block;
+ bool should_recompile;
+ void *func;
+
+ for (;;) {
+ func = state->code_lut[lut_offset(pc)];
+ if (func && func != state->get_next_block)
+ return func;
+
+ block = lightrec_get_block(state, pc);
+
+ if (unlikely(!block))
+ return NULL;
+
+ should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE;
+
+ if (unlikely(should_recompile)) {
+ pr_debug("Block at PC 0x%08x should recompile"
+ " - freeing old code\n", pc);
+
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_recompiler_remove(state->rec, block);
+
+ remove_from_code_lut(state->block_cache, block);
+ lightrec_unregister(MEM_FOR_CODE, block->code_size);
+ if (block->_jit)
+ _jit_destroy_state(block->_jit);
+ block->_jit = NULL;
+ block->function = NULL;
+ block->flags &= ~BLOCK_SHOULD_RECOMPILE;
+ }
+
+ if (ENABLE_THREADED_COMPILER && likely(!should_recompile))
+ func = lightrec_recompiler_run_first_pass(block, &pc);
+ else
+ func = block->function;
+
+ if (likely(func))
+ return func;
+
+ /* Block wasn't compiled yet - run the interpreter */
+ if (!ENABLE_THREADED_COMPILER &&
+ ((ENABLE_FIRST_PASS && likely(!should_recompile)) ||
+ unlikely(block->flags & BLOCK_NEVER_COMPILE)))
+ pc = lightrec_emulate_block(block, pc);
+
+ if (likely(!(block->flags & BLOCK_NEVER_COMPILE))) {
+ /* Then compile it using the profiled data */
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_recompiler_add(state->rec, block);
+ else
+ lightrec_compile_block(block);
+ }
+
+ if (state->exit_flags != LIGHTREC_EXIT_NORMAL ||
+ state->current_cycle >= state->target_cycle) {
+ state->next_pc = pc;
+ return NULL;
+ }
+ }
+}
+
+static s32 c_generic_function_wrapper(struct lightrec_state *state,
+ s32 cycles_delta,
+ void (*f)(struct lightrec_state *,
+ struct opcode *,
+ struct block *),
+ struct opcode *op, struct block *block)
+{
+ state->current_cycle = state->target_cycle - cycles_delta;
+
+ (*f)(state, op, block);
+
+ return state->target_cycle - state->current_cycle;
+}
+
+static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta,
+ void (*f)(struct lightrec_state *, union code),
+ union code op)
+{
+ state->current_cycle = state->target_cycle - cycles_delta;
+
+ (*f)(state, op);
+
+ return state->target_cycle - state->current_cycle;
+}
+
+static struct block * generate_wrapper(struct lightrec_state *state,
+ void *f, bool generic)
+{
+ struct block *block;
+ jit_state_t *_jit;
+ unsigned int i;
+ int stack_ptr;
+ jit_word_t code_size;
+ jit_node_t *to_tramp, *to_fn_epilog;
+
+ block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
+ if (!block)
+ goto err_no_mem;
+
+ _jit = jit_new_state();
+ if (!_jit)
+ goto err_free_block;
+
+ jit_name("RW wrapper");
+ jit_note(__FILE__, __LINE__);
+
+ /* Wrapper entry point */
+ jit_prolog();
+
+ stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS);
+
+ for (i = 0; i < NUM_TEMPS; i++)
+ jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i));
+
+ /* Jump to the trampoline */
+ to_tramp = jit_jmpi();
+
+ /* The trampoline will jump back here */
+ to_fn_epilog = jit_label();
+
+ for (i = 0; i < NUM_TEMPS; i++)
+ jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t));
+
+ jit_ret();
+ jit_epilog();
+
+ /* Trampoline entry point.
+ * The sole purpose of the trampoline is to cheese Lightning not to
+ * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we
+ * do want to return to the caller with this register modified. */
+ jit_prolog();
+ jit_tramp(256);
+ jit_patch(to_tramp);
+
+ jit_prepare();
+ jit_pushargr(LIGHTREC_REG_STATE);
+ jit_pushargr(LIGHTREC_REG_CYCLE);
+ jit_pushargi((uintptr_t)f);
+ jit_pushargr(JIT_R0);
+ if (generic) {
+ jit_pushargr(JIT_R1);
+ jit_finishi(c_generic_function_wrapper);
+ } else {
+ jit_finishi(c_function_wrapper);
+ }
+
+#if __WORDSIZE == 64
+ jit_retval_i(LIGHTREC_REG_CYCLE);
+#else
+ jit_retval(LIGHTREC_REG_CYCLE);
+#endif
+
+ jit_patch_at(jit_jmpi(), to_fn_epilog);
+ jit_epilog();
+
+ block->state = state;
+ block->_jit = _jit;
+ block->function = jit_emit();
+ block->opcode_list = NULL;
+ block->flags = 0;
+ block->nb_ops = 0;
+
+ jit_get_code(&code_size);
+ lightrec_register(MEM_FOR_CODE, code_size);
+
+ block->code_size = code_size;
+
+ if (ENABLE_DISASSEMBLER) {
+ pr_debug("Wrapper block:\n");
+ jit_disassemble();
+ }
+
+ jit_clear_state();
+ return block;
+
+err_free_block:
+ lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
+err_no_mem:
+ pr_err("Unable to compile wrapper: Out of memory\n");
+ return NULL;
+}
+
+static struct block * generate_dispatcher(struct lightrec_state *state)
+{
+ struct block *block;
+ jit_state_t *_jit;
+ jit_node_t *to_end, *to_end2, *to_c, *loop, *addr, *addr2;
+ unsigned int i;
+ u32 offset, ram_len;
+ jit_word_t code_size;
+
+ block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
+ if (!block)
+ goto err_no_mem;
+
+ _jit = jit_new_state();
+ if (!_jit)
+ goto err_free_block;
+
+ jit_name("dispatcher");
+ jit_note(__FILE__, __LINE__);
+
+ jit_prolog();
+ jit_frame(256);
+
+ jit_getarg(JIT_R0, jit_arg());
+#if __WORDSIZE == 64
+ jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg());
+#else
+ jit_getarg(LIGHTREC_REG_CYCLE, jit_arg());
+#endif
+
+ /* Force all callee-saved registers to be pushed on the stack */
+ for (i = 0; i < NUM_REGS; i++)
+ jit_movr(JIT_V(i), JIT_V(i));
+
+ /* Pass lightrec_state structure to blocks, using the last callee-saved
+ * register that Lightning provides */
+ jit_movi(LIGHTREC_REG_STATE, (intptr_t) state);
+
+ loop = jit_label();
+
+ /* Call the block's code */
+ jit_jmpr(JIT_R0);
+
+ /* The block will jump here, with the number of cycles remaining in
+ * LIGHTREC_REG_CYCLE */
+ addr2 = jit_indirect();
+
+ /* Jump to end if state->target_cycle < state->current_cycle */
+ to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
+
+ /* Convert next PC to KUNSEG and avoid mirrors */
+ ram_len = state->maps[PSX_MAP_KERNEL_USER_RAM].length;
+ jit_andi(JIT_R0, JIT_V0, 0x10000000 | (ram_len - 1));
+ to_c = jit_bgei(JIT_R0, ram_len);
+
+ /* Fast path: code is running from RAM, use the code LUT */
+#if __WORDSIZE == 64
+ jit_lshi(JIT_R0, JIT_R0, 1);
+#endif
+ jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE);
+ jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut));
+
+ /* If we get non-NULL, loop */
+ jit_patch_at(jit_bnei(JIT_R0, 0), loop);
+
+ /* Slow path: call C function get_next_block_func() */
+ jit_patch(to_c);
+
+ if (ENABLE_FIRST_PASS) {
+ /* We may call the interpreter - update state->current_cycle */
+ jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, target_cycle));
+ jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE);
+ jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
+ LIGHTREC_REG_STATE, JIT_R1);
+ }
+
+ /* The code LUT will be set to this address when the block at the target
+ * PC has been preprocessed but not yet compiled by the threaded
+ * recompiler */
+ addr = jit_indirect();
+
+ /* Get the next block */
+ jit_prepare();
+ jit_pushargr(LIGHTREC_REG_STATE);
+ jit_pushargr(JIT_V0);
+ jit_finishi(&get_next_block_func);
+ jit_retval(JIT_R0);
+
+ if (ENABLE_FIRST_PASS) {
+ /* The interpreter may have updated state->current_cycle and
+ * state->target_cycle - recalc the delta */
+ jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, current_cycle));
+ jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, target_cycle));
+ jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1);
+ }
+
+ /* If we get non-NULL, loop */
+ jit_patch_at(jit_bnei(JIT_R0, 0), loop);
+
+ to_end2 = jit_jmpi();
+
+ /* When exiting, the recompiled code will jump to that address */
+ jit_note(__FILE__, __LINE__);
+ jit_patch(to_end);
+
+ /* Store back the next_pc to the lightrec_state structure */
+ offset = offsetof(struct lightrec_state, next_pc);
+ jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0);
+
+ jit_patch(to_end2);
+
+ jit_retr(LIGHTREC_REG_CYCLE);
+ jit_epilog();
+
+ block->state = state;
+ block->_jit = _jit;
+ block->function = jit_emit();
+ block->opcode_list = NULL;
+ block->flags = 0;
+ block->nb_ops = 0;
+
+ jit_get_code(&code_size);
+ lightrec_register(MEM_FOR_CODE, code_size);
+
+ block->code_size = code_size;
+
+ state->eob_wrapper_func = jit_address(addr2);
+ state->get_next_block = jit_address(addr);
+
+ if (ENABLE_DISASSEMBLER) {
+ pr_debug("Dispatcher block:\n");
+ jit_disassemble();
+ }
+
+ /* We're done! */
+ jit_clear_state();
+ return block;
+
+err_free_block:
+ lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
+err_no_mem:
+ pr_err("Unable to compile dispatcher: Out of memory\n");
+ return NULL;
+}
+
+union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
+{
+ u32 addr, kunseg_pc = kunseg(pc);
+ const u32 *code;
+ const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
+
+ addr = kunseg_pc - map->pc;
+
+ while (map->mirror_of)
+ map = map->mirror_of;
+
+ code = map->address + addr;
+
+ return (union code) *code;
+}
+
+static struct block * lightrec_precompile_block(struct lightrec_state *state,
+ u32 pc)
+{
+ struct opcode *list;
+ struct block *block;
+ const u32 *code;
+ u32 addr, kunseg_pc = kunseg(pc);
+ const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
+ unsigned int length;
+
+ if (!map)
+ return NULL;
+
+ addr = kunseg_pc - map->pc;
+
+ while (map->mirror_of)
+ map = map->mirror_of;
+
+ code = map->address + addr;
+
+ block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
+ if (!block) {
+ pr_err("Unable to recompile block: Out of memory\n");
+ return NULL;
+ }
+
+ list = lightrec_disassemble(state, code, &length);
+ if (!list) {
+ lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
+ return NULL;
+ }
+
+ block->pc = pc;
+ block->state = state;
+ block->_jit = NULL;
+ block->function = NULL;
+ block->opcode_list = list;
+ block->map = map;
+ block->next = NULL;
+ block->flags = 0;
+ block->code_size = 0;
+#if ENABLE_THREADED_COMPILER
+ block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT;
+#endif
+ block->nb_ops = length / sizeof(u32);
+
+ lightrec_optimize(block);
+
+ length = block->nb_ops * sizeof(u32);
+
+ lightrec_register(MEM_FOR_MIPS_CODE, length);
+
+ if (ENABLE_DISASSEMBLER) {
+ pr_debug("Disassembled block at PC: 0x%x\n", block->pc);
+ lightrec_print_disassembly(block, code, length);
+ }
+
+ pr_debug("Block size: %lu opcodes\n", block->nb_ops);
+
+ /* If the first opcode is an 'impossible' branch, never compile the
+ * block */
+ if (list->flags & LIGHTREC_EMULATE_BRANCH)
+ block->flags |= BLOCK_NEVER_COMPILE;
+
+ block->hash = lightrec_calculate_block_hash(block);
+
+ return block;
+}
+
+static bool lightrec_block_is_fully_tagged(struct block *block)
+{
+ struct opcode *op;
+
+ for (op = block->opcode_list; op; op = op->next) {
+ /* Verify that all load/stores of the opcode list
+ * Check all loads/stores of the opcode list and mark the
+ * block as fully compiled if they all have been tagged. */
+ switch (op->c.i.op) {
+ case OP_LB:
+ case OP_LH:
+ case OP_LWL:
+ case OP_LW:
+ case OP_LBU:
+ case OP_LHU:
+ case OP_LWR:
+ case OP_SB:
+ case OP_SH:
+ case OP_SWL:
+ case OP_SW:
+ case OP_SWR:
+ case OP_LWC2:
+ case OP_SWC2:
+ if (!(op->flags & (LIGHTREC_DIRECT_IO |
+ LIGHTREC_HW_IO)))
+ return false;
+ default: /* fall-through */
+ continue;
+ }
+ }
+
+ return true;
+}
+
+int lightrec_compile_block(struct block *block)
+{
+ struct lightrec_state *state = block->state;
+ bool op_list_freed = false, fully_tagged = false;
+ struct opcode *elm;
+ jit_state_t *_jit;
+ jit_node_t *start_of_block;
+ bool skip_next = false;
+ jit_word_t code_size;
+ unsigned int i, j;
+ u32 next_pc;
+
+ fully_tagged = lightrec_block_is_fully_tagged(block);
+ if (fully_tagged)
+ block->flags |= BLOCK_FULLY_TAGGED;
+
+ _jit = jit_new_state();
+ if (!_jit)
+ return -ENOMEM;
+
+ block->_jit = _jit;
+
+ lightrec_regcache_reset(state->reg_cache);
+ state->cycles = 0;
+ state->nb_branches = 0;
+ state->nb_local_branches = 0;
+ state->nb_targets = 0;
+
+ jit_prolog();
+ jit_tramp(256);
+
+ start_of_block = jit_label();
+
+ for (elm = block->opcode_list; elm; elm = elm->next) {
+ next_pc = block->pc + elm->offset * sizeof(u32);
+
+ if (skip_next) {
+ skip_next = false;
+ continue;
+ }
+
+ state->cycles += lightrec_cycles_of_opcode(elm->c);
+
+ if (elm->flags & LIGHTREC_EMULATE_BRANCH) {
+ pr_debug("Branch at offset 0x%x will be emulated\n",
+ elm->offset << 2);
+ lightrec_emit_eob(block, elm, next_pc);
+ skip_next = !(elm->flags & LIGHTREC_NO_DS);
+ } else if (elm->opcode) {
+ lightrec_rec_opcode(block, elm, next_pc);
+ skip_next = has_delay_slot(elm->c) &&
+ !(elm->flags & LIGHTREC_NO_DS);
+#if _WIN32
+ /* FIXME: GNU Lightning on Windows seems to use our
+ * mapped registers as temporaries. Until the actual bug
+ * is found and fixed, unconditionally mark our
+ * registers as live here. */
+ lightrec_regcache_mark_live(state->reg_cache, _jit);
+#endif
+ }
+ }
+
+ for (i = 0; i < state->nb_branches; i++)
+ jit_patch(state->branches[i]);
+
+ for (i = 0; i < state->nb_local_branches; i++) {
+ struct lightrec_branch *branch = &state->local_branches[i];
+
+ pr_debug("Patch local branch to offset 0x%x\n",
+ branch->target << 2);
+
+ if (branch->target == 0) {
+ jit_patch_at(branch->branch, start_of_block);
+ continue;
+ }
+
+ for (j = 0; j < state->nb_targets; j++) {
+ if (state->targets[j].offset == branch->target) {
+ jit_patch_at(branch->branch,
+ state->targets[j].label);
+ break;
+ }
+ }
+
+ if (j == state->nb_targets)
+ pr_err("Unable to find branch target\n");
+ }
+
+ jit_ldxi(JIT_R0, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, eob_wrapper_func));
+
+ jit_jmpr(JIT_R0);
+
+ jit_ret();
+ jit_epilog();
+
+ block->function = jit_emit();
+
+ /* Add compiled function to the LUT */
+ state->code_lut[lut_offset(block->pc)] = block->function;
+
+ jit_get_code(&code_size);
+ lightrec_register(MEM_FOR_CODE, code_size);
+
+ block->code_size = code_size;
+
+ if (ENABLE_DISASSEMBLER) {
+ pr_debug("Compiling block at PC: 0x%x\n", block->pc);
+ jit_disassemble();
+ }
+
+ jit_clear_state();
+
+#if ENABLE_THREADED_COMPILER
+ if (fully_tagged)
+ op_list_freed = atomic_flag_test_and_set(&block->op_list_freed);
+#endif
+ if (fully_tagged && !op_list_freed) {
+ pr_debug("Block PC 0x%08x is fully tagged"
+ " - free opcode list\n", block->pc);
+ lightrec_free_opcode_list(state, block->opcode_list);
+ block->opcode_list = NULL;
+ }
+
+ return 0;
+}
+
+u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
+{
+ s32 (*func)(void *, s32) = (void *)state->dispatcher->function;
+ void *block_trace;
+ s32 cycles_delta;
+
+ state->exit_flags = LIGHTREC_EXIT_NORMAL;
+
+ /* Handle the cycle counter overflowing */
+ if (unlikely(target_cycle < state->current_cycle))
+ target_cycle = UINT_MAX;
+
+ state->target_cycle = target_cycle;
+
+ block_trace = get_next_block_func(state, pc);
+ if (block_trace) {
+ cycles_delta = state->target_cycle - state->current_cycle;
+
+ cycles_delta = (*func)(block_trace, cycles_delta);
+
+ state->current_cycle = state->target_cycle - cycles_delta;
+ }
+
+ return state->next_pc;
+}
+
+u32 lightrec_execute_one(struct lightrec_state *state, u32 pc)
+{
+ return lightrec_execute(state, pc, state->current_cycle);
+}
+
+u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
+{
+ struct block *block = lightrec_get_block(state, pc);
+ if (!block)
+ return 0;
+
+ state->exit_flags = LIGHTREC_EXIT_NORMAL;
+
+ return lightrec_emulate_block(block, pc);
+}
+
+void lightrec_free_block(struct block *block)
+{
+ lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32));
+ if (block->opcode_list)
+ lightrec_free_opcode_list(block->state, block->opcode_list);
+ if (block->_jit)
+ _jit_destroy_state(block->_jit);
+ lightrec_unregister(MEM_FOR_CODE, block->code_size);
+ lightrec_free(block->state, MEM_FOR_IR, sizeof(*block), block);
+}
+
+struct lightrec_state * lightrec_init(char *argv0,
+ const struct lightrec_mem_map *map,
+ size_t nb,
+ const struct lightrec_ops *ops)
+{
+ struct lightrec_state *state;
+
+ /* Sanity-check ops */
+ if (!ops ||
+ !ops->cop0_ops.mfc || !ops->cop0_ops.cfc || !ops->cop0_ops.mtc ||
+ !ops->cop0_ops.ctc || !ops->cop0_ops.op ||
+ !ops->cop2_ops.mfc || !ops->cop2_ops.cfc || !ops->cop2_ops.mtc ||
+ !ops->cop2_ops.ctc || !ops->cop2_ops.op) {
+ pr_err("Missing callbacks in lightrec_ops structure\n");
+ return NULL;
+ }
+
+ init_jit(argv0);
+
+ state = calloc(1, sizeof(*state) +
+ sizeof(*state->code_lut) * CODE_LUT_SIZE);
+ if (!state)
+ goto err_finish_jit;
+
+ lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) +
+ sizeof(*state->code_lut) * CODE_LUT_SIZE);
+
+#if ENABLE_TINYMM
+ state->tinymm = tinymm_init(malloc, free, 4096);
+ if (!state->tinymm)
+ goto err_free_state;
+#endif
+
+ state->block_cache = lightrec_blockcache_init(state);
+ if (!state->block_cache)
+ goto err_free_tinymm;
+
+ state->reg_cache = lightrec_regcache_init(state);
+ if (!state->reg_cache)
+ goto err_free_block_cache;
+
+ if (ENABLE_THREADED_COMPILER) {
+ state->rec = lightrec_recompiler_init(state);
+ if (!state->rec)
+ goto err_free_reg_cache;
+ }
+
+ state->nb_maps = nb;
+ state->maps = map;
+
+ memcpy(&state->ops, ops, sizeof(*ops));
+
+ state->dispatcher = generate_dispatcher(state);
+ if (!state->dispatcher)
+ goto err_free_recompiler;
+
+ state->rw_generic_wrapper = generate_wrapper(state,
+ lightrec_rw_generic_cb,
+ true);
+ if (!state->rw_generic_wrapper)
+ goto err_free_dispatcher;
+
+ state->rw_wrapper = generate_wrapper(state, lightrec_rw_cb, false);
+ if (!state->rw_wrapper)
+ goto err_free_generic_rw_wrapper;
+
+ state->mfc_wrapper = generate_wrapper(state, lightrec_mfc_cb, false);
+ if (!state->mfc_wrapper)
+ goto err_free_rw_wrapper;
+
+ state->mtc_wrapper = generate_wrapper(state, lightrec_mtc_cb, false);
+ if (!state->mtc_wrapper)
+ goto err_free_mfc_wrapper;
+
+ state->rfe_wrapper = generate_wrapper(state, lightrec_rfe_cb, false);
+ if (!state->rfe_wrapper)
+ goto err_free_mtc_wrapper;
+
+ state->cp_wrapper = generate_wrapper(state, lightrec_cp_cb, false);
+ if (!state->cp_wrapper)
+ goto err_free_rfe_wrapper;
+
+ state->syscall_wrapper = generate_wrapper(state, lightrec_syscall_cb,
+ false);
+ if (!state->syscall_wrapper)
+ goto err_free_cp_wrapper;
+
+ state->break_wrapper = generate_wrapper(state, lightrec_break_cb,
+ false);
+ if (!state->break_wrapper)
+ goto err_free_syscall_wrapper;
+
+ state->rw_generic_func = state->rw_generic_wrapper->function;
+ state->rw_func = state->rw_wrapper->function;
+ state->mfc_func = state->mfc_wrapper->function;
+ state->mtc_func = state->mtc_wrapper->function;
+ state->rfe_func = state->rfe_wrapper->function;
+ state->cp_func = state->cp_wrapper->function;
+ state->syscall_func = state->syscall_wrapper->function;
+ state->break_func = state->break_wrapper->function;
+
+ map = &state->maps[PSX_MAP_BIOS];
+ state->offset_bios = (uintptr_t)map->address - map->pc;
+
+ map = &state->maps[PSX_MAP_SCRATCH_PAD];
+ state->offset_scratch = (uintptr_t)map->address - map->pc;
+
+ map = &state->maps[PSX_MAP_KERNEL_USER_RAM];
+ state->offset_ram = (uintptr_t)map->address - map->pc;
+
+ if (state->maps[PSX_MAP_MIRROR1].address == map->address + 0x200000 &&
+ state->maps[PSX_MAP_MIRROR2].address == map->address + 0x400000 &&
+ state->maps[PSX_MAP_MIRROR3].address == map->address + 0x600000)
+ state->mirrors_mapped = true;
+
+ return state;
+
+err_free_syscall_wrapper:
+ lightrec_free_block(state->syscall_wrapper);
+err_free_cp_wrapper:
+ lightrec_free_block(state->cp_wrapper);
+err_free_rfe_wrapper:
+ lightrec_free_block(state->rfe_wrapper);
+err_free_mtc_wrapper:
+ lightrec_free_block(state->mtc_wrapper);
+err_free_mfc_wrapper:
+ lightrec_free_block(state->mfc_wrapper);
+err_free_rw_wrapper:
+ lightrec_free_block(state->rw_wrapper);
+err_free_generic_rw_wrapper:
+ lightrec_free_block(state->rw_generic_wrapper);
+err_free_dispatcher:
+ lightrec_free_block(state->dispatcher);
+err_free_recompiler:
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_free_recompiler(state->rec);
+err_free_reg_cache:
+ lightrec_free_regcache(state->reg_cache);
+err_free_block_cache:
+ lightrec_free_block_cache(state->block_cache);
+err_free_tinymm:
+#if ENABLE_TINYMM
+ tinymm_shutdown(state->tinymm);
+err_free_state:
+#endif
+ lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
+ sizeof(*state->code_lut) * CODE_LUT_SIZE);
+ free(state);
+err_finish_jit:
+ finish_jit();
+ return NULL;
+}
+
+void lightrec_destroy(struct lightrec_state *state)
+{
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_free_recompiler(state->rec);
+
+ lightrec_free_regcache(state->reg_cache);
+ lightrec_free_block_cache(state->block_cache);
+ lightrec_free_block(state->dispatcher);
+ lightrec_free_block(state->rw_generic_wrapper);
+ lightrec_free_block(state->rw_wrapper);
+ lightrec_free_block(state->mfc_wrapper);
+ lightrec_free_block(state->mtc_wrapper);
+ lightrec_free_block(state->rfe_wrapper);
+ lightrec_free_block(state->cp_wrapper);
+ lightrec_free_block(state->syscall_wrapper);
+ lightrec_free_block(state->break_wrapper);
+ finish_jit();
+
+#if ENABLE_TINYMM
+ tinymm_shutdown(state->tinymm);
+#endif
+ lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
+ sizeof(*state->code_lut) * CODE_LUT_SIZE);
+ free(state);
+}
+
+void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len)
+{
+ u32 kaddr = kunseg(addr & ~0x3);
+ const struct lightrec_mem_map *map = lightrec_get_map(state, kaddr);
+
+ if (map) {
+ while (map->mirror_of)
+ map = map->mirror_of;
+
+ if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM])
+ return;
+
+ /* Handle mirrors */
+ kaddr &= (state->maps[PSX_MAP_KERNEL_USER_RAM].length - 1);
+
+ for (; len > 4; len -= 4, kaddr += 4)
+ lightrec_invalidate_map(state, map, kaddr);
+
+ lightrec_invalidate_map(state, map, kaddr);
+ }
+}
+
+void lightrec_invalidate_all(struct lightrec_state *state)
+{
+ memset(state->code_lut, 0, sizeof(*state->code_lut) * CODE_LUT_SIZE);
+}
+
+void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only)
+{
+ if (state->invalidate_from_dma_only != dma_only)
+ lightrec_invalidate_all(state);
+
+ state->invalidate_from_dma_only = dma_only;
+}
+
+void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags)
+{
+ if (flags != LIGHTREC_EXIT_NORMAL) {
+ state->exit_flags |= flags;
+ state->target_cycle = state->current_cycle;
+ }
+}
+
+u32 lightrec_exit_flags(struct lightrec_state *state)
+{
+ return state->exit_flags;
+}
+
+void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34])
+{
+ memcpy(regs, state->native_reg_cache, sizeof(state->native_reg_cache));
+}
+
+void lightrec_restore_registers(struct lightrec_state *state, u32 regs[34])
+{
+ memcpy(state->native_reg_cache, regs, sizeof(state->native_reg_cache));
+}
+
+u32 lightrec_current_cycle_count(const struct lightrec_state *state)
+{
+ return state->current_cycle;
+}
+
+void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles)
+{
+ state->current_cycle = cycles;
+
+ if (state->target_cycle < cycles)
+ state->target_cycle = cycles;
+}
+
+void lightrec_set_target_cycle_count(struct lightrec_state *state, u32 cycles)
+{
+ if (state->exit_flags == LIGHTREC_EXIT_NORMAL) {
+ if (cycles < state->current_cycle)
+ cycles = state->current_cycle;
+
+ state->target_cycle = cycles;
+ }
+}