summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAutechre2021-03-23 20:13:44 +0100
committerGitHub2021-03-23 20:13:44 +0100
commite5c685583ed8808580ca33c5a59d554bfbb989ad (patch)
tree11399685ea3766006b09d33f983cfae5b98c4f20
parentf31fa6a57be67619ce10279152d7c9dbf6c2307b (diff)
parentff510e7f7a0c04c7862e598e8bfc75747f3bf7d1 (diff)
downloadpicogpsp-e5c685583ed8808580ca33c5a59d554bfbb989ad.tar.gz
picogpsp-e5c685583ed8808580ca33c5a59d554bfbb989ad.tar.bz2
picogpsp-e5c685583ed8808580ca33c5a59d554bfbb989ad.zip
Merge pull request #118 from davidgfnet/master
-rw-r--r--arm/arm_stub.S128
-rw-r--r--cpu.h19
-rw-r--r--cpu_threaded.c18
-rw-r--r--gpsp_config.h22
-rw-r--r--psp/mips_emit.h9
-rw-r--r--psp/mips_stub.S21
-rw-r--r--x86/x86_stub.S85
7 files changed, 137 insertions, 165 deletions
diff --git a/arm/arm_stub.S b/arm/arm_stub.S
index f5fceb0..f0b7f52 100644
--- a/arm/arm_stub.S
+++ b/arm/arm_stub.S
@@ -1,14 +1,14 @@
-.align 2
-.globl invalidate_icache_region
-.globl invalidate_cache_region
+#include "../gpsp_config.h"
+
+#define defsymbl(symbol) \
+.global symbol ; \
+.global _##symbol ; \
+symbol: \
+_##symbol:
-.globl memory_map_read
-.globl reg
-.globl palette_ram
-.globl palette_ram_converted
-.globl reg_mode
-.globl spsr
+.text
+.align 2
#define REG_R0 (0 * 4)
#define REG_R1 (1 * 4)
@@ -178,10 +178,7 @@
#define arm_update_gba_builder(name, mode, return_op) ;\
;\
.align 2 ;\
-.globl arm_update_gba_##name ;\
-.globl _arm_update_gba_##name ;\
-arm_update_gba_##name: ;\
-_arm_update_gba_##name: ;\
+defsymbl(arm_update_gba_##name) ;\
load_pc_##return_op() ;\
str r0, [reg_base, #REG_PC] /* write out the PC */;\
;\
@@ -243,30 +240,21 @@ arm_update_gba_builder(idle_thumb, thumb, add)
@ r0: PC to branch to
.align 2
-.globl arm_indirect_branch_arm
-.globl _arm_indirect_branch_arm
-arm_indirect_branch_arm:
-_arm_indirect_branch_arm:
+defsymbl(arm_indirect_branch_arm)
save_flags()
call_c_function(block_lookup_address_arm)
restore_flags()
bx r0
.align 2
-.globl arm_indirect_branch_thumb
-.globl _arm_indirect_branch_thumb
-arm_indirect_branch_thumb:
-_arm_indirect_branch_thumb:
+defsymbl(arm_indirect_branch_thumb)
save_flags()
call_c_function(block_lookup_address_thumb)
restore_flags()
bx r0
.align 2
-.globl arm_indirect_branch_dual_arm
-.globl _arm_indirect_branch_dual_arm
-arm_indirect_branch_dual_arm:
-_arm_indirect_branch_dual_arm:
+defsymbl(arm_indirect_branch_dual_arm)
save_flags()
tst r0, #0x01 @ check lower bit
bne 1f @ if set going to Thumb mode
@@ -286,10 +274,7 @@ _arm_indirect_branch_dual_arm:
bx r0 @ return
.align 2
-.globl arm_indirect_branch_dual_thumb
-.globl _arm_indirect_branch_dual_thumb
-arm_indirect_branch_dual_thumb:
-_arm_indirect_branch_dual_thumb:
+defsymbl(arm_indirect_branch_dual_thumb)
save_flags()
tst r0, #0x01 @ check lower bit
beq 1f @ if set going to ARM mode
@@ -317,10 +302,7 @@ _arm_indirect_branch_dual_thumb:
@ r2: current PC
.align 2
-.globl execute_store_cpsr
-.globl _execute_store_cpsr
-execute_store_cpsr:
-_execute_store_cpsr:
+defsymbl(execute_store_cpsr)
save_flags()
and reg_flags, r0, r1 @ reg_flags = new_cpsr & store_mask
ldr r0, [reg_base, #REG_CPSR] @ r0 = cpsr
@@ -354,10 +336,7 @@ _execute_store_cpsr:
@ r1: bitmask of which bits in spsr to update
.align 2
-.globl execute_store_spsr
-.globl _execute_store_spsr
-execute_store_spsr:
-_execute_store_spsr:
+defsymbl(execute_store_spsr)
ldr r1, =spsr @ r1 = spsr
ldr r2, [reg_base, #CPU_MODE] @ r2 = CPU_MODE
str r0, [r1, r2, lsl #2] @ spsr[CPU_MODE] = new_spsr
@@ -369,10 +348,7 @@ _execute_store_spsr:
@ r0: spsr
.align 2
-.globl execute_read_spsr
-.globl _execute_read_spsr
-execute_read_spsr:
-_execute_read_spsr:
+defsymbl(execute_read_spsr)
ldr r0, =spsr @ r0 = spsr
ldr r1, [reg_base, #CPU_MODE] @ r1 = CPU_MODE
ldr r0, [r0, r1, lsl #2] @ r0 = spsr[CPU_MODE]
@@ -385,10 +361,7 @@ _execute_read_spsr:
@ r0: current pc
.align 2
-.globl execute_spsr_restore
-.globl _execute_spsr_restore
-execute_spsr_restore:
-_execute_spsr_restore:
+defsymbl(execute_spsr_restore)
save_flags()
ldr r1, =spsr @ r1 = spsr
ldr r2, [reg_base, #CPU_MODE] @ r2 = cpu_mode
@@ -425,10 +398,7 @@ _execute_spsr_restore:
#define execute_swi_builder(mode) ;\
;\
.align 2 ;\
-.globl execute_swi_##mode ;\
-.globl _execute_swi_##mode ;\
-execute_swi_##mode: ;\
-_execute_swi_##mode: ;\
+defsymbl(execute_swi_##mode) ;\
save_flags() ;\
ldr r1, =reg_mode /* r1 = reg_mode */;\
/* reg_mode[MODE_SUPERVISOR][6] = pc */;\
@@ -460,10 +430,7 @@ execute_swi_builder(thumb)
#define execute_swi_function_builder(swi_function, mode) ;\
;\
.align 2 ;\
-.globl execute_swi_hle_##swi_function##_##mode ;\
-.globl _execute_swi_hle_##swi_function##_##mode ;\
-execute_swi_hle_##swi_function##_##mode: ;\
-_execute_swi_hle_##swi_function##_##mode: ;\
+defsymbl(execute_swi_hle_##swi_function##_##mode) ;\
save_flags() ;\
store_registers_##mode() ;\
call_c_function(execute_swi_hle_##swi_function##_c) ;\
@@ -485,10 +452,7 @@ execute_swi_function_builder(div, thumb)
@ Uses sp as reg_base; must hold consistently true.
.align 2
-.globl execute_arm_translate
-.globl _execute_arm_translate
-execute_arm_translate:
-_execute_arm_translate:
+defsymbl(execute_arm_translate)
@ save the registers to be able to return later
stmdb sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr }
@@ -615,10 +579,7 @@ ext_store_ignore:
#define execute_store_builder(store_type, store_op, load_op) ;\
;\
.align 2 ;\
-.globl execute_store_u##store_type ;\
-.globl _execute_store_u##store_type ;\
-execute_store_u##store_type: ;\
-_execute_store_u##store_type: ;\
+defsymbl(execute_store_u##store_type) ;\
execute_store_body(store_type, store_op) ;\
;\
ext_store_u##store_type: ;\
@@ -676,10 +637,7 @@ execute_store_builder(32, str, ldr)
@ This is a store that is executed in a strm case (so no SMC checks in-between)
-.globl execute_store_u32_safe
-.globl _execute_store_u32_safe
-execute_store_u32_safe:
-_execute_store_u32_safe:
+defsymbl(execute_store_u32_safe)
execute_store_body(32_safe, str)
restore_flags()
ldr pc, [reg_base, #REG_SAVE3] @ return
@@ -822,10 +780,7 @@ lookup_pc_arm:
#define execute_load_builder(load_type, load_function, load_op, mask) ;\
;\
.align 2 ;\
-.globl execute_load_##load_type ;\
-.globl _execute_load_##load_type ;\
-execute_load_##load_type: ;\
-_execute_load_##load_type: ;\
+defsymbl(execute_load_##load_type) ;\
save_flags() ;\
tst r0, mask /* make sure address is in range */;\
bne ext_load_##load_type /* if not do ext load */;\
@@ -859,19 +814,38 @@ execute_load_builder(u32, 32, ldrne, #0xF0000000)
.data
-memory_map_read:
+defsymbl(memory_map_read)
.space 0x8000
-palette_ram:
+defsymbl(palette_ram)
.space 0x400
-palette_ram_converted:
+defsymbl(palette_ram_converted)
.space 0x400
-spsr:
+defsymbl(spsr)
.space 24
-reg_mode:
+defsymbl(reg_mode)
.space 196
-.globl reg
-.globl _reg
-reg:
+defsymbl(reg)
.space 0x100, 0
+@ Vita and 3DS (and of course mmap) map their own cache sections through some
+@ platform-speficic mechanisms.
+#if !defined(HAVE_MMAP) && !defined(VITA) && !defined(_3DS)
+
+@ Make this section executable!
+.text
+#ifdef __ANDROID__
+@ Unfortunately Android builds don't like nobits, so we ship a ton of zeros
+@ TODO: Revisit this whenever we upgrade to the latest clang NDK
+.section .jit,"awx",%progbits
+#else
+.section .jit,"awx",%nobits
+#endif
+.align 4
+defsymbl(rom_translation_cache)
+ .space ROM_TRANSLATION_CACHE_SIZE
+defsymbl(ram_translation_cache)
+ .space RAM_TRANSLATION_CACHE_SIZE
+
+#endif
+
diff --git a/cpu.h b/cpu.h
index faa3bc1..fc57626 100644
--- a/cpu.h
+++ b/cpu.h
@@ -20,6 +20,8 @@
#ifndef CPU_H
#define CPU_H
+#include "gpsp_config.h"
+
// System mode and user mode are represented as the same here
typedef enum
@@ -120,18 +122,6 @@ s32 translate_block_arm(u32 pc, translation_region_type translation_region,
s32 translate_block_thumb(u32 pc, translation_region_type translation_region,
u32 smc_enable);
-#if defined(PSP)
- #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4)
- #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384)
- #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024)
-#else
- #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5)
- #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2)
- #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32)
-#endif
-
-#define STUB_ARENA_SIZE (4*1024)
-
#if defined(HAVE_MMAP)
extern u8* rom_translation_cache;
extern u8* ram_translation_cache;
@@ -147,8 +137,8 @@ extern int sceBlock;
#else
extern u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE];
extern u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE];
-extern u32 stub_arena[STUB_ARENA_SIZE];
#endif
+extern u32 stub_arena[STUB_ARENA_SIZE / 4];
extern u8 *rom_translation_ptr;
extern u8 *ram_translation_ptr;
@@ -162,9 +152,6 @@ extern u32 translation_gate_target_pc[MAX_TRANSLATION_GATES];
extern u32 in_interrupt;
-#define ROM_BRANCH_HASH_SIZE (1024 * 64)
-
-/* EDIT: Shouldn't this be extern ?! */
extern u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE];
void flush_translation_cache_rom(void);
diff --git a/cpu_threaded.c b/cpu_threaded.c
index 555b9c6..7f12b4f 100644
--- a/cpu_threaded.c
+++ b/cpu_threaded.c
@@ -47,26 +47,10 @@ u8* ram_translation_cache_ptr;
u8 *rom_translation_ptr = rom_translation_cache;
u8 *ram_translation_ptr = ram_translation_cache;
#else
-
-#ifdef __ANDROID__
-// Workaround for 'attempt to map x bytes at offset y'
-__asm__(".section .jit,\"awx\",%progbits");
-#else
-__asm__(".section .jit,\"awx\",%nobits");
-#endif
-
-u32 stub_arena[STUB_ARENA_SIZE]
- __attribute__ ((aligned(4),section(".jit")));
-u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE]
- __attribute__ ((aligned(4),section(".jit")));
u8 *rom_translation_ptr = rom_translation_cache;
-
-u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE]
- __attribute__ ((aligned(4),section(".jit")));
u8 *ram_translation_ptr = ram_translation_cache;
-
-__asm__(".section .text");
#endif
+/* Note, see stub files for more cache definitions */
u32 iwram_code_min = 0xFFFFFFFF;
u32 iwram_code_max = 0xFFFFFFFF;
diff --git a/gpsp_config.h b/gpsp_config.h
new file mode 100644
index 0000000..ea8db95
--- /dev/null
+++ b/gpsp_config.h
@@ -0,0 +1,22 @@
+
+#ifndef GPSP_CONFIG_H
+#define GPSP_CONFIG_H
+
+/* Cache sizes and their config knobs */
+#if defined(PSP)
+ #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4)
+ #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384)
+ #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024)
+#else
+ #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5)
+ #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2)
+ #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32)
+#endif
+
+/* This is MIPS specific for now */
+#define STUB_ARENA_SIZE (16*1024)
+
+/* Hash table size for ROM trans cache lookups */
+#define ROM_BRANCH_HASH_SIZE (1024 * 64)
+
+#endif
diff --git a/psp/mips_emit.h b/psp/mips_emit.h
index b75f7f5..b996f2b 100644
--- a/psp/mips_emit.h
+++ b/psp/mips_emit.h
@@ -2618,11 +2618,7 @@ static void emit_mem_access_loadop(
#define genccall(fn) mips_emit_jal(((u32)fn) >> 2);
#endif
-// Stub memory map:
-// 0 .. 63 First patch handler [#0]
-// 448 .. 511 Last patch handler [#7]
-// 512+ smc_write handler
-#define SMC_WRITE_OFF32 160
+#define SMC_WRITE_OFF32 (10*16) /* 10 handlers (16 insts) */
// Describes a "plain" memory are, that is, an area that is just accessed
// as normal memory (with some caveats tho).
@@ -2862,8 +2858,7 @@ static void emit_pmemst_stub(
}
// If the data is non zero, we just wrote over code
// Local-jump to the smc_write (which lives at offset:0)
- unsigned instoffset = (&stub_arena[SMC_WRITE_OFF32] - (((u32*)translation_ptr) + 1));
- mips_emit_b(bne, reg_zero, reg_temp, instoffset);
+ mips_emit_b(bne, reg_zero, reg_temp, branch_offset(&stub_arena[SMC_WRITE_OFF32]));
}
// Store the data (delay slot from the SMC branch)
diff --git a/psp/mips_stub.S b/psp/mips_stub.S
index 5e5a479..3d046d8 100644
--- a/psp/mips_stub.S
+++ b/psp/mips_stub.S
@@ -16,6 +16,8 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#include "../gpsp_config.h"
+
.set mips32r2
.align 4
@@ -645,3 +647,22 @@ fnptrs:
.long execute_spsr_restore_body # 6
.long execute_store_cpsr_body # 7
+#if !defined(HAVE_MMAP)
+
+# Make this section executable!
+.text
+.section .jit,"awx",%nobits
+.align 2
+.global stub_arena
+.global rom_translation_cache
+.global ram_translation_cache
+
+stub_arena:
+ .space STUB_ARENA_SIZE
+rom_translation_cache:
+ .space ROM_TRANSLATION_CACHE_SIZE
+ram_translation_cache:
+ .space RAM_TRANSLATION_CACHE_SIZE
+
+#endif
+
diff --git a/x86/x86_stub.S b/x86/x86_stub.S
index 595a789..9dd3fdd 100644
--- a/x86/x86_stub.S
+++ b/x86/x86_stub.S
@@ -16,21 +16,18 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#include "../gpsp_config.h"
+
.align 4
+#define defsymbl(symbol) \
+.global symbol ; \
+.global _##symbol ; \
+symbol: \
+_##symbol:
+
#ifndef _WIN32
-#define _x86_update_gba x86_update_gba
-#define _x86_indirect_branch_arm x86_indirect_branch_arm
-#define _x86_indirect_branch_thumb x86_indirect_branch_thumb
-#define _x86_indirect_branch_dual x86_indirect_branch_dual
-#define _execute_store_u8 execute_store_u8
-#define _execute_store_u16 execute_store_u16
-#define _execute_store_u32 execute_store_u32
-#define _execute_store_cpsr execute_store_cpsr
-#define _execute_arm_translate execute_arm_translate
-#define _memory_map_read memory_map_read
-#define _reg reg
-#define _reg_mode reg_mode
+# External symbols (data + functions)
#define _oam_update oam_update
#define _iwram iwram
#define _ewram ewram
@@ -38,7 +35,6 @@
#define _oam_ram oam_ram
#define _bios_rom bios_rom
#define _io_registers io_registers
-#define _spsr spsr
#define _update_gba update_gba
#define _block_lookup_address_arm block_lookup_address_arm
@@ -47,8 +43,6 @@
#define _write_io_register8 write_io_register8
#define _write_io_register16 write_io_register16
#define _write_io_register32 write_io_register32
-#define _palette_ram palette_ram
-#define _palette_ram_converted palette_ram_converted
#define _flush_translation_cache_ram flush_translation_cache_ram
#define _write_eeprom write_eeprom
#define _write_backup write_backup
@@ -56,25 +50,7 @@
#define _execute_store_cpsr_body execute_store_cpsr_body
#endif
-.global _x86_update_gba
-.global _x86_indirect_branch_arm
-.global _x86_indirect_branch_thumb
-.global _x86_indirect_branch_dual
-.global _execute_store_u8
-.global _execute_store_u16
-.global _execute_store_u32
-.global _execute_store_cpsr
-.global _execute_arm_translate
-
-.global _memory_map_read
-.global _reg
-.global _reg_mode
-.global _spsr
-.global _palette_ram
-.global _palette_ram_converted
-
.global _oam_update
-
.global _iwram
.global _ewram
.global _vram
@@ -147,7 +123,7 @@
st:
.asciz "u\n"
-_x86_update_gba:
+defsymbl(x86_update_gba)
mov %eax, REG_PC(%ebx) # current PC = eax
collapse_flags # update cpsr, trashes ecx and edx
@@ -171,14 +147,14 @@ _x86_update_gba:
# eax: GBA address to branch to
# edi: Cycle counter
-_x86_indirect_branch_arm:
+defsymbl(x86_indirect_branch_arm)
call _block_lookup_address_arm
jmp *%eax
# For indirect branches that'll definitely go to Thumb. In
# Thumb mode any indirect branches except for BX.
-_x86_indirect_branch_thumb:
+defsymbl(x86_indirect_branch_thumb)
call _block_lookup_address_thumb
jmp *%eax
@@ -186,7 +162,7 @@ _x86_indirect_branch_thumb:
# mainly BX (also data processing to PC with S bit set, be
# sure to adjust the target with a 1 in the lowest bit for this)
-_x86_indirect_branch_dual:
+defsymbl(x86_indirect_branch_dual)
call _block_lookup_address_dual
jmp *%eax
@@ -297,7 +273,7 @@ ext_store_u8_jtable:
# edx: value to write
# ecx: current pc
-_execute_store_u8:
+defsymbl(execute_store_u8)
mov %ecx, REG_PC(%ebx) # write out the PC
mov %eax, %ecx # ecx = address
shr $24, %ecx # ecx = address >> 24
@@ -383,7 +359,7 @@ ext_store_u16_jtable:
.long ext_store_eeprom # 0x0D EEPROM (possibly)
.long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit
-_execute_store_u16:
+defsymbl(execute_store_u16)
mov %ecx, REG_PC(%ebx) # write out the PC
and $~0x01, %eax # fix alignment
mov %eax, %ecx # ecx = address
@@ -400,6 +376,7 @@ ext_store_iwram32:
and $0x7FFF, %eax # wrap around address
mov %edx, (_iwram+0x8000)(%eax) # perform store
cmpl $0, _iwram(%eax) # Check SMC mirror
+
jne smc_write
ret
@@ -456,7 +433,7 @@ ext_store_u32_jtable:
.long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit
-_execute_store_u32:
+defsymbl(execute_store_u32)
mov %ecx, REG_PC(%ebx) # write out the PC
and $~0x03, %eax # fix alignment
mov %eax, %ecx # ecx = address
@@ -470,7 +447,7 @@ _execute_store_u32:
# %eax = new_cpsr
# %edx = store_mask
-_execute_store_cpsr:
+defsymbl(execute_store_cpsr)
mov %edx, REG_SAVE(%ebx) # save store_mask
mov %ecx, REG_SAVE2(%ebx) # save PC too
@@ -515,7 +492,7 @@ lookup_pc_arm:
# eax: cycle counter
-_execute_arm_translate:
+defsymbl(execute_arm_translate)
# Save main context, since we need to return gracefully
pushl %ebx
pushl %esi
@@ -556,18 +533,30 @@ return_to_main:
.data
.align 64
-_reg:
+defsymbl(reg)
.space 0x100, 0
-_palette_ram:
+defsymbl(palette_ram)
.space 0x400
-_palette_ram_converted:
+defsymbl(palette_ram_converted)
.space 0x400
-_spsr:
+defsymbl(spsr)
.space 24
-_reg_mode:
+defsymbl(reg_mode)
.space 196
-_memory_map_read:
+defsymbl(memory_map_read)
.space 0x8000
+#if !defined(HAVE_MMAP)
+
+# Make this section executable!
+.text
+.section .jit,"awx",%nobits
+.align 4
+defsymbl(rom_translation_cache)
+ .space ROM_TRANSLATION_CACHE_SIZE
+defsymbl(ram_translation_cache)
+ .space RAM_TRANSLATION_CACHE_SIZE
+
+#endif