From f6f3a910397afb769f84d1332014c4eda1fedebe Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Mon, 22 Mar 2021 21:45:52 +0100 Subject: Adding Normmatt's BIOS as a built-in BIOS Add options to select whether to boot from BIOS (default is no, as it is now) and whether to use the original bios or the builtin one (default is auto, which tries to use the official but falls back to the builtin if not found). --- Makefile.common | 2 +- bios/README.md | 7 ++++ bios/open_gba_bios.bin | Bin 0 -> 16384 bytes bios_data.S | 7 ++++ cpu.c | 18 +++++++-- gba_memory.h | 1 + libretro.c | 100 ++++++++++++++++++++++++++++++++++++++---------- libretro_core_options.h | 23 +++++++++++ main.h | 14 +++++++ 9 files changed, 147 insertions(+), 25 deletions(-) create mode 100644 bios/README.md create mode 100644 bios/open_gba_bios.bin create mode 100644 bios_data.S diff --git a/Makefile.common b/Makefile.common index 7e70627..0dcbedc 100644 --- a/Makefile.common +++ b/Makefile.common @@ -1,6 +1,6 @@ INCFLAGS := -I$(CORE_DIR)/libretro -I$(CORE_DIR)/src -SOURCES_ASM := +SOURCES_ASM := $(CORE_DIR)/bios_data.S ifeq ($(HAVE_GRIFFIN), 1) SOURCES_C := $(CORE_DIR)/gpsp_griffin.c diff --git a/bios/README.md b/bios/README.md new file mode 100644 index 0000000..64d6679 --- /dev/null +++ b/bios/README.md @@ -0,0 +1,7 @@ + +This BIOS is an open source replacement for Nintendo's official BIOS. +It was written originally by Normmatt and the VBA/VBA-M team, and its source +code can be found at https://github.com/Nebuleon/ReGBA/tree/master/bios + +It is distributed under the GPL2 license (see repo) + diff --git a/bios/open_gba_bios.bin b/bios/open_gba_bios.bin new file mode 100644 index 0000000..802982e Binary files /dev/null and b/bios/open_gba_bios.bin differ diff --git a/bios_data.S b/bios_data.S new file mode 100644 index 0000000..9de9d8a --- /dev/null +++ b/bios_data.S @@ -0,0 +1,7 @@ + +.globl open_gba_bios_rom + +.data +open_gba_bios_rom: + .incbin "bios/open_gba_bios.bin" + diff --git a/cpu.c b/cpu.c index 2c93f38..11c947f 100644 --- a/cpu.c +++ b/cpu.c @@ -4297,13 +4297,23 @@ void init_cpu(void) for(i = 0; i < 16; i++) reg[i] = 0; - reg[REG_SP] = 0x03007F00; - reg[REG_PC] = 0x08000000; - reg[REG_CPSR] = 0x0000001F; reg[CPU_HALT_STATE] = CPU_ACTIVE; - reg[CPU_MODE] = MODE_USER; reg[CHANGED_PC_STATUS] = 0; + if (selected_boot_mode == boot_game) { + reg[REG_SP] = 0x03007F00; + reg[REG_PC] = 0x08000000; + reg[REG_CPSR] = 0x0000001F; // system mode + reg[CPU_MODE] = MODE_USER; + } else { + reg[REG_SP] = 0x03007F00; + reg[REG_PC] = 0x00000000; + reg[REG_CPSR] = 0x00000013 | 0xC0; // supervisor + reg[CPU_MODE] = MODE_SUPERVISOR; + } + + // Stack pointers are set by BIOS, we set them + // nevertheless, should we not boot from BIOS reg_mode[MODE_USER][5] = 0x03007F00; reg_mode[MODE_IRQ][5] = 0x03007FA0; reg_mode[MODE_FIQ][5] = 0x03007FA0; diff --git a/gba_memory.h b/gba_memory.h index 9352e88..eaa5bab 100644 --- a/gba_memory.h +++ b/gba_memory.h @@ -201,6 +201,7 @@ extern u32 gbc_sound_update; extern u32 gbc_sound_wave_update; extern dma_transfer_type dma[4]; +extern u8 open_gba_bios_rom[1024*16]; extern u32 bios_read_protect; extern u16 palette_ram[512]; extern u16 oam_ram[512]; diff --git a/libretro.c b/libretro.c index bc61977..334864c 100644 --- a/libretro.c +++ b/libretro.c @@ -65,6 +65,7 @@ static unsigned audio_buff_occupancy = 0; static bool audio_buff_underrun = false; static unsigned audio_latency = 0; static bool update_audio_latency = false; +static bios_type selected_bios = auto_detect; static retro_log_printf_t log_cb; static retro_video_refresh_t video_cb; @@ -75,6 +76,7 @@ struct retro_perf_callback perf_cb; int dynarec_enable; int use_libretro_save_method = 0; +boot_mode selected_boot_mode = boot_game; u32 idle_loop_target_pc = 0xFFFFFFFF; u32 iwram_stack_optimize = 1; @@ -114,6 +116,25 @@ static void info_msg(const char* text) log_cb(RETRO_LOG_INFO, "[gpSP]: %s\n", text); } +static void show_warning_message(const char* text, unsigned durationms) { + unsigned ifversion = 0; + if (!environ_cb(RETRO_ENVIRONMENT_GET_MESSAGE_INTERFACE_VERSION, &ifversion) || ifversion >= 1) { + /* Use the new API to display messages */ + struct retro_message_ext msg = { + .msg = text, .duration = durationms, + .priority = 2, .level = RETRO_LOG_WARN, + .target = RETRO_MESSAGE_TARGET_ALL, + .type = RETRO_MESSAGE_TYPE_NOTIFICATION, + .progress = -1, + }; + environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE_EXT, &msg); + } + else { + struct retro_message msg = {.msg = text, .frames = durationms / 17}; + environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE, &msg); + } +} + /* Frameskip START */ static void audio_buff_status_cb( @@ -348,8 +369,8 @@ static void video_run(void) sceGuTexMode(GU_PSM_5650, 0, 0, GU_FALSE); sceGuCopyImage(GU_PSM_5650, 0, 0, GBA_SCREEN_WIDTH, GBA_SCREEN_HEIGHT, GBA_SCREEN_WIDTH, gba_screen_pixels_buf, 0, 0, GBA_SCREEN_WIDTH, texture_vram_p); - sceGuTexImage(0, next_pow2(GBA_SCREEN_WIDTH), next_pow2(GBA_SCREEN_HEIGHT), GBA_SCREEN_WIDTH, texture_vram_p); - sceGuTexFunc(GU_TFX_REPLACE, GU_TCC_RGB); + sceGuTexImage(0, next_pow2(GBA_SCREEN_WIDTH), next_pow2(GBA_SCREEN_HEIGHT), GBA_SCREEN_WIDTH, texture_vram_p); + sceGuTexFunc(GU_TFX_REPLACE, GU_TCC_RGB); sceGuDisable(GU_BLEND); sceGuFinish(); @@ -484,6 +505,8 @@ void retro_init(void) audio_buff_underrun = false; audio_latency = 0; update_audio_latency = false; + selected_bios = auto_detect; + selected_boot_mode = boot_game; } void retro_deinit(void) @@ -660,6 +683,32 @@ static void check_variables(int started_from_load) dynarec_enable = 0; #endif + if (started_from_load) { + var.key = "gpsp_bios"; + var.value = 0; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (!strcmp(var.value, "auto")) + selected_bios = auto_detect; + else if (!strcmp(var.value, "builtin")) + selected_bios = builtin_bios; + else if (!strcmp(var.value, "official")) + selected_bios = official_bios; + } + + var.key = "gpsp_boot_mode"; + var.value = 0; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (!strcmp(var.value, "game")) + selected_boot_mode = boot_game; + else if (!strcmp(var.value, "bios")) + selected_boot_mode = boot_bios; + } + } + var.key = "gpsp_frameskip"; var.value = 0; frameskip_type_prev = current_frameskip_type; @@ -795,30 +844,41 @@ bool retro_load_game(const struct retro_game_info* info) extract_directory(main_path, info->path, sizeof(main_path)); - if (environ_cb(RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY, &dir) && dir) - strcpy(filename_bios, dir); - else - strcpy(filename_bios, main_path); - - strcat(filename_bios, "/gba_bios.bin"); - - if (environ_cb(RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY, &dir) && dir) strcpy(save_path, dir); else strcpy(save_path, main_path); - if (load_bios(filename_bios) != 0) + if (environ_cb(RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY, &dir) && dir) + strcpy(filename_bios, dir); + else + strcpy(filename_bios, main_path); + + bool bios_loaded = false; + printf("USE %d\n", (int)selected_bios); + if (selected_bios == auto_detect || selected_bios == official_bios) { - error_msg("Could not load BIOS image file."); - return false; + bios_loaded = true; + strcat(filename_bios, "/gba_bios.bin"); + + if (load_bios(filename_bios) != 0) + { + if (selected_bios == official_bios) + show_warning_message("Could not load BIOS image file, using built-in BIOS", 2500); + bios_loaded = false; + } + + if (bios_loaded && bios_rom[0] != 0x18) + { + if (selected_bios == official_bios) + show_warning_message("BIOS image seems incorrect, using built-in BIOS", 2500); + bios_loaded = false; + } } - if (bios_rom[0] != 0x18) - { - info_msg("You have an incorrect BIOS image."); - info_msg("While many games will work fine, some will not."); - info_msg("It is strongly recommended that you obtain the correct BIOS file."); + if (!bios_loaded) { + /* Load the built-in BIOS */ + memcpy(bios_rom, open_gba_bios_rom, sizeof(bios_rom)); } memset(gamepak_backup, -1, sizeof(gamepak_backup)); @@ -921,8 +981,8 @@ void retro_run(void) input_poll_cb(); - /* Check whether current frame should - * be skipped */ + /* Check whether current frame should + * be skipped */ skip_next_frame = 0; if (current_frameskip_type != no_frameskip) diff --git a/libretro_core_options.h b/libretro_core_options.h index b5b138d..d2b95fa 100644 --- a/libretro_core_options.h +++ b/libretro_core_options.h @@ -53,6 +53,29 @@ extern "C" { * frontend language definition */ struct retro_core_option_definition option_defs_us[] = { + { + "gpsp_bios", + "BIOS", + "Choose the BIOS image to use. The official BIOS must be provided by the user. Using a non-official (or builtin) BIOS might result in incompatibility problems with some games. Best results are to be achieved with the official Nintendo BIOS.", + { + { "auto", "Auto select" }, + { "builtin", "Builtin BIOS" }, + { "official", "Original BIOS" }, + { NULL, NULL }, + }, + "auto" + }, + { + "gpsp_boot_mode", + "Boot mode", + "Choose whether to boot the BIOS before the game or not. There's not much difference in either modes.", + { + { "game", "Boot to game" }, + { "bios", "Boot to BIOS" }, + { NULL, NULL }, + }, + "game" + }, { "gpsp_frameskip", "Frameskip", diff --git a/main.h b/main.h index dbb839f..bdb29e3 100644 --- a/main.h +++ b/main.h @@ -64,6 +64,19 @@ typedef enum fixed_interval_frameskip } frameskip_type; +typedef enum +{ + auto_detect = 0, + builtin_bios, + official_bios +} bios_type; + +typedef enum +{ + boot_game = 0, + boot_bios +} boot_mode; + extern u32 cpu_ticks; extern u32 execute_cycles; extern u32 global_cycles_per_instruction; @@ -98,6 +111,7 @@ u32 file_length(FILE *fp); extern u32 num_skipped_frames; extern int dynarec_enable; +extern boot_mode selected_boot_mode; void change_ext(const char *src, char *buffer, const char *extension); -- cgit v1.2.3 From 8c18c8c421bae8d90485adeca8348957cef8a6cf Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Mon, 22 Mar 2021 23:17:24 +0100 Subject: Fix Windows compilers symbol names --- bios_data.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bios_data.S b/bios_data.S index 9de9d8a..bb46ded 100644 --- a/bios_data.S +++ b/bios_data.S @@ -1,7 +1,11 @@ -.globl open_gba_bios_rom +#ifndef _WIN32 +#define _open_gba_bios_rom open_gba_bios_rom +#endif + +.globl _open_gba_bios_rom .data -open_gba_bios_rom: +_open_gba_bios_rom: .incbin "bios/open_gba_bios.bin" -- cgit v1.2.3 From 55eaee631d7d273deadc65480428f8212c8ee585 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 23 Mar 2021 00:50:34 +0100 Subject: Export double symbol to cover Win but also Apple compilers --- bios_data.S | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bios_data.S b/bios_data.S index bb46ded..ff3f2c8 100644 --- a/bios_data.S +++ b/bios_data.S @@ -1,11 +1,9 @@ -#ifndef _WIN32 -#define _open_gba_bios_rom open_gba_bios_rom -#endif - .globl _open_gba_bios_rom +.globl open_gba_bios_rom .data _open_gba_bios_rom: +open_gba_bios_rom: .incbin "bios/open_gba_bios.bin" -- cgit v1.2.3 From 11ec213c99d5d22905ff82cf3fb26ba6a8adf290 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 23 Mar 2021 19:05:35 +0100 Subject: Make ewram memory lineal This saves a few cycles in MIPS and simplifies a bit the core. Removed the write map, only affects interpreter performance very minimally. Rewired ARM and x86 handlers to support direct access to I/EWRAM (and VRAM on ARM) to compensate. Overall performance is slightly better but code is cleaner and allows for further improvements in the dynarecs. --- arm/arm_stub.S | 168 +++++++++++++++++++++++++++++++++++++++++++------------- cpu.c | 30 ++-------- cpu.h | 4 -- cpu_threaded.c | 60 ++++++-------------- gba_memory.c | 121 +++++++++------------------------------- gba_memory.h | 1 - libretro.c | 11 +--- psp/mips_emit.h | 80 ++++++++------------------- psp/mips_stub.S | 4 -- x86/x86_emit.h | 16 +----- x86/x86_stub.S | 126 +++++++++++++++++++----------------------- 11 files changed, 262 insertions(+), 359 deletions(-) diff --git a/arm/arm_stub.S b/arm/arm_stub.S index e8f7316..f5fceb0 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -4,7 +4,6 @@ .globl invalidate_cache_region .globl memory_map_read -.globl memory_map_write .globl reg .globl palette_ram .globl palette_ram_converted @@ -533,40 +532,84 @@ return_to_main: bx lr +#define store_align_8() ;\ + and r1, r1, #0xff ;\ + +#define store_align_16() ;\ + bic r0, r0, #0x01 ;\ + extract_u16(r1, r1) ;\ + +#define store_align_32() ;\ + bic r0, r0, #0x03 ;\ + +#define mask_addr_8(nbits) ;\ + mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\ + mov r0, r0, lsr #(32 - nbits) /* high bits are now clear */;\ + +#define mask_addr_16(nbits) ;\ + mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\ + mov r0, r0, lsr #(32 - nbits + 1) /* high bits are now clear */;\ + mov r0, r0, lsl #1 /* LSB is also zero */;\ + +#define mask_addr_32(nbits) ;\ + mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\ + mov r0, r0, lsr #(32 - nbits + 2) /* high bits are now clear */;\ + mov r0, r0, lsl #2 /* 2 LSB are also zero */;\ + +@ Vram, OAM and palette memories can only be accessed at a 16 bit boundary +#define mask_addr_bus16_32(nbits) mask_addr_32(nbits) +#define mask_addr_bus16_16(nbits) mask_addr_16(nbits) +#define mask_addr_bus16_8(nbits) \ + mask_addr_16(nbits) \ + extract_u16(r1, r1) + + @ Write out to memory. @ Input: @ r0: address @ r1: value @ r2: current pc +@ +@ The instruction at LR is not an inst but a u32 data that contains the PC +@ Used for SMC. That's why return is essentially `pc = lr + 4` #define execute_store_body(store_type, store_op) ;\ save_flags() ;\ str lr, [reg_base, #REG_SAVE3] /* save lr */;\ + str r4, [reg_base, #REG_SAVE2] /* save r4 */;\ tst r0, #0xF0000000 /* make sure address is in range */;\ bne ext_store_u##store_type /* if not do ext store */;\ ;\ - ldr r2, =memory_map_write /* r2 = memory_map_write */;\ - mov lr, r0, lsr #15 /* lr = page index of address */;\ - ldr r2, [r2, lr, lsl #2] /* r2 = memory page */;\ - ;\ - cmp r2, #0 /* see if map is ext */;\ - beq ext_store_u##store_type /* if so do ext store */;\ + ldr lr, =ptr_tbl_##store_type /* lr = ptr table */;\ + mov r4, r0, lsr #24 /* r4 = region number */;\ + ldr lr, [lr, r4, lsl #2] /* lr = function pointer */;\ + ldr r4, [reg_base, #REG_SAVE2] /* restore r4 */;\ + bx lr /* jump to handler */;\ ;\ - mov r0, r0, lsl #17 /* isolate bottom 15 bits in top */;\ - mov r0, r0, lsr #17 /* like performing and 0x7FFF */;\ - store_op r1, [r2, r0] /* store result */;\ - - -#define store_align_8() ;\ - and r1, r1, #0xff ;\ - -#define store_align_16() ;\ - bic r0, r0, #0x01 ;\ - extract_u16(r1, r1) ;\ - -#define store_align_32() ;\ - bic r0, r0, #0x03 ;\ +ptr_tbl_##store_type: ;\ + .word ext_store_ignore /* 0x00: BIOS, ignore */;\ + .word ext_store_ignore /* 0x01: ignore */;\ + .word ext_store_ewram_u##store_type /* 0x02: ewram */;\ + .word ext_store_iwram_u##store_type /* 0x03: iwram */;\ + .word ext_store_u##store_type /* 0x04: I/O regs */;\ + .word ext_store_u##store_type /* 0x05: palette RAM */;\ + .word ext_store_vram_u##store_type /* 0x06: vram */;\ + .word ext_store_u##store_type /* 0x07: oam ram */;\ + .word ext_store_u##store_type /* 0x08: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x09: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0A: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0B: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0C: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0D: EEPROM */;\ + .word ext_store_u##store_type /* 0x0E: backup */;\ + .word ext_store_ignore /* 0x0F: ignore */;\ + +@ for ignored areas, just return +ext_store_ignore: + ldr lr, [reg_base, #REG_SAVE3] @ pop lr off of stack + restore_flags() + add pc, lr, #4 @ return #define execute_store_builder(store_type, store_op, load_op) ;\ @@ -577,20 +620,6 @@ return_to_main: execute_store_u##store_type: ;\ _execute_store_u##store_type: ;\ execute_store_body(store_type, store_op) ;\ - sub r2, r2, #0x8000 /* Pointer to code status data */;\ - load_op r0, [r2, r0] /* check code flag */;\ - ;\ - cmp r0, #0 /* see if it's not 0 */;\ - bne 2f /* if so perform smc write */;\ - ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ - restore_flags() ;\ - add pc, lr, #4 /* return */;\ - ;\ -2: ;\ - ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ - ldr r0, [lr] /* load PC */;\ - str r0, [reg_base, #REG_PC] /* write out PC */;\ - b smc_write /* perform smc write */;\ ;\ ext_store_u##store_type: ;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -599,11 +628,53 @@ ext_store_u##store_type: ;\ store_align_##store_type() ;\ call_c_function(write_memory##store_type) ;\ b write_epilogue /* handle additional write stuff */;\ + ;\ +ext_store_iwram_u##store_type: ;\ + mask_addr_##store_type(15) /* Mask to mirror memory (+align)*/;\ + ldr r2, =(iwram+0x8000) /* r2 = iwram base */;\ + store_op r1, [r0, r2] /* store data */;\ + sub r2, r2, #0x8000 /* r2 = iwram smc base */;\ + load_op r1, [r0, r2] /* r1 = SMC sentinel */;\ + cmp r1, #0 /* see if it's not 0 */;\ + bne 3f /* if so perform smc write */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +ext_store_ewram_u##store_type: ;\ + mask_addr_##store_type(18) /* Mask to mirror memory (+align)*/;\ + ldr r2, =(ewram) /* r2 = ewram base */;\ + store_op r1, [r0, r2] /* store data */;\ + add r2, r2, #0x40000 /* r2 = ewram smc base */;\ + load_op r1, [r0, r2] /* r1 = SMC sentinel */;\ + cmp r1, #0 /* see if it's not 0 */;\ + bne 3f /* if so perform smc write */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +ext_store_vram_u##store_type: ;\ + mask_addr_bus16_##store_type(17) /* Mask to mirror memory (+align)*/;\ + cmp r0, #0x18000 /* Check if exceeds 96KB */;\ + subcs r0, r0, #0x8000 /* Mirror to the last bank */;\ + ldr r2, =(vram) /* r2 = vram base */;\ + store_op r1, [r0, r2] /* store data */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +3: ;\ + ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ + ldr r0, [lr] /* load PC */;\ + str r0, [reg_base, #REG_PC] /* write out PC */;\ + b smc_write /* perform smc write */;\ + execute_store_builder(8, strb, ldrb) execute_store_builder(16, strh, ldrh) execute_store_builder(32, str, ldr) +@ This is a store that is executed in a strm case (so no SMC checks in-between) .globl execute_store_u32_safe .globl _execute_store_u32_safe @@ -619,6 +690,28 @@ ext_store_u32_safe: restore_flags() bx lr @ Return +ext_store_iwram_u32_safe: + mask_addr_8(15) @ Mask to mirror memory (no need to align!) + ldr r2, =(iwram+0x8000) @ r2 = iwram base + str r1, [r0, r2] @ store data + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return + +ext_store_ewram_u32_safe: + mask_addr_8(18) @ Mask to mirror memory (no need to align!) + ldr r2, =(ewram) @ r2 = ewram base + str r1, [r0, r2] @ store data + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return + +ext_store_vram_u32_safe: + mask_addr_8(17) @ Mask to mirror memory (no need to align!) + ldr r2, =(vram) @ r2 = vram base + cmp r0, #0x18000 @ Check if exceeds 96KB + subcs r0, r0, #0x8000 @ Mirror to the last bank + str r1, [r0, r2] @ store data + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return write_epilogue: cmp r0, #0 @ check if the write rose an alert @@ -756,6 +849,7 @@ ext_load_##load_type: ;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ +.pool execute_load_builder(u8, 8, ldrneb, #0xF0000000) execute_load_builder(s8, 8, ldrnesb, #0xF0000000) @@ -763,14 +857,10 @@ execute_load_builder(u16, 16, ldrneh, #0xF0000001) execute_load_builder(s16, 16_signed, ldrnesh, #0xF0000001) execute_load_builder(u32, 32, ldrne, #0xF0000000) -.pool - .data memory_map_read: .space 0x8000 -memory_map_write: - .space 0x8000 palette_ram: .space 0x400 palette_ram_converted: diff --git a/cpu.c b/cpu.c index 11c947f..ea0d69e 100644 --- a/cpu.c +++ b/cpu.c @@ -1003,7 +1003,6 @@ const u32 psr_masks[16] = #define fast_write_memory(size, type, address, value) \ { \ - u8 *map; \ u32 _address = (address) & ~(aligned_address_mask##size & 0x03); \ if(_address < 0x10000000) \ { \ @@ -1011,17 +1010,9 @@ const u32 psr_masks[16] = memory_writes_##type++; \ } \ \ - if(((_address & aligned_address_mask##size) == 0) && \ - (map = memory_map_write[_address >> 15])) \ - { \ - *((type *)((u8 *)map + (_address & 0x7FFF))) = value; \ - } \ - else \ - { \ - cpu_alert = write_memory##size(_address, value); \ - if(cpu_alert) \ - goto alert; \ - } \ + cpu_alert = write_memory##size(_address, value); \ + if(cpu_alert) \ + goto alert; \ } \ #define load_aligned32(address, dest) \ @@ -1046,22 +1037,14 @@ const u32 psr_masks[16] = #define store_aligned32(address, value) \ { \ u32 _address = address; \ - u8 *map = memory_map_write[_address >> 15]; \ if(_address < 0x10000000) \ { \ memory_region_access_write_u32[_address >> 24]++; \ memory_writes_u32++; \ } \ - if(map) \ - { \ - address32(map, _address & 0x7FFF) = value; \ - } \ - else \ - { \ - cpu_alert = write_memory32(_address, value); \ - if(cpu_alert) \ - goto alert; \ - } \ + cpu_alert = write_memory32(_address, value); \ + if(cpu_alert) \ + goto alert; \ } \ #define load_memory_u8(address, dest) \ @@ -1647,7 +1630,6 @@ void raise_interrupt(irq_type irq_raised) #ifndef HAVE_DYNAREC u8 *memory_map_read [8 * 1024]; -u8 *memory_map_write[8 * 1024]; u16 palette_ram[512]; u16 palette_ram_converted[512]; #endif diff --git a/cpu.h b/cpu.h index cde7b2f..faa3bc1 100644 --- a/cpu.h +++ b/cpu.h @@ -155,11 +155,7 @@ extern u8 *ram_translation_ptr; #define MAX_TRANSLATION_GATES 8 extern u32 idle_loop_target_pc; -extern u32 force_pc_update_target; extern u32 iwram_stack_optimize; -extern u32 allow_smc_ram_u8; -extern u32 allow_smc_ram_u16; -extern u32 allow_smc_ram_u32; extern u32 direct_map_vram; extern u32 translation_gate_targets; extern u32 translation_gate_target_pc[MAX_TRANSLATION_GATES]; diff --git a/cpu_threaded.c b/cpu_threaded.c index 519d7f0..555b9c6 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -76,12 +76,6 @@ u32 ewram_code_max = 0xFFFFFFFF; u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE]; -// Default -u32 force_pc_update_target = 0xFFFFFFFF; -u32 allow_smc_ram_u8 = 1; -u32 allow_smc_ram_u16 = 1; -u32 allow_smc_ram_u32 = 1; - typedef struct { u8 *block_offset; @@ -2813,7 +2807,7 @@ u8 function_cc *block_lookup_address_##type(u32 pc) \ switch(pc >> 24) \ { \ case 0x2: \ - location = (u16 *)(ewram + (pc & 0x7FFF) + ((pc & 0x38000) * 2)); \ + location = (u16 *)(ewram + (pc & 0x3FFFF) + 0x40000); \ block_lookup_translate(type, ram, 1); \ break; \ \ @@ -3119,18 +3113,22 @@ block_lookup_address_builder(dual); block_data_type block_data[MAX_BLOCK_SIZE]; block_exit_type block_exits[MAX_EXITS]; -#define smc_write_arm_yes() \ - if(address32(pc_address_block, (block_end_pc & 0x7FFF) - 0x8000) == 0x0000) \ +#define smc_write_arm_yes() { \ + int offset = (pc < 0x03000000) ? 0x40000 : -0x8000; \ + if(address32(pc_address_block, (block_end_pc & 0x7FFF) + offset) == 0) \ { \ - address32(pc_address_block, (block_end_pc & 0x7FFF) - 0x8000) = \ + address32(pc_address_block, (block_end_pc & 0x7FFF) + offset) = \ 0xFFFFFFFF; \ } \ +} -#define smc_write_thumb_yes() \ - if(address16(pc_address_block, (block_end_pc & 0x7FFF) - 0x8000) == 0x0000) \ +#define smc_write_thumb_yes() { \ + int offset = (pc < 0x03000000) ? 0x40000 : -0x8000; \ + if(address16(pc_address_block, (block_end_pc & 0x7FFF) + offset) == 0) \ { \ - address16(pc_address_block, (block_end_pc & 0x7FFF) - 0x8000) = 0xFFFF; \ + address16(pc_address_block, (block_end_pc & 0x7FFF) + offset) = 0xFFFF; \ } \ +} #define smc_write_arm_no() \ @@ -3428,7 +3426,6 @@ s32 translate_block_thumb(u32 pc, translation_region_type u32 opcode = 0; u32 last_opcode; u32 condition; - u32 last_condition; u32 pc_region = (pc >> 15); u32 new_pc_region; u8 *pc_address_block = memory_map_read[pc_region]; @@ -3514,8 +3511,6 @@ s32 translate_block_thumb(u32 pc, translation_region_type block_exit_position = 0; block_data_position = 0; - last_condition = 0x0E; - while(pc != block_end_pc) { block_data[block_data_position].block_offset = translation_ptr; @@ -3627,13 +3622,16 @@ s32 translate_block_thumb(u32 pc, translation_region_type void flush_translation_cache_ram(void) { flush_ram_count++; -/* printf("ram flush %d (pc %x), %x to %x, %x to %x\n", + /*printf("ram flush %d (pc %x), %x to %x, %x to %x\n", flush_ram_count, reg[REG_PC], iwram_code_min, iwram_code_max, - ewram_code_min, ewram_code_max); */ + ewram_code_min, ewram_code_max);*/ last_ram_translation_ptr = ram_translation_cache; ram_translation_ptr = ram_translation_cache; ram_block_tag_top = 0x0101; + + // Proceed to clean the SMC area if needed + // (also try to memset as little as possible for performance) if(iwram_code_min != 0xFFFFFFFF) { iwram_code_min &= 0x7FFF; @@ -3643,33 +3641,9 @@ void flush_translation_cache_ram(void) if(ewram_code_min != 0xFFFFFFFF) { - u32 ewram_code_min_page; - u32 ewram_code_max_page; - u32 ewram_code_min_offset; - u32 ewram_code_max_offset; - u32 i; - ewram_code_min &= 0x3FFFF; ewram_code_max &= 0x3FFFF; - - ewram_code_min_page = ewram_code_min >> 15; - ewram_code_max_page = ewram_code_max >> 15; - ewram_code_min_offset = ewram_code_min & 0x7FFF; - ewram_code_max_offset = ewram_code_max & 0x7FFF; - - if(ewram_code_min_page == ewram_code_max_page) - { - memset(ewram + (ewram_code_min_page * 0x10000) + - ewram_code_min_offset, 0, - ewram_code_max_offset - ewram_code_min_offset); - } - else - { - for(i = ewram_code_min_page + 1; i < ewram_code_max_page; i++) - memset(ewram + (i * 0x10000), 0, 0x8000); - - memset(ewram, 0, ewram_code_max_offset); - } + memset(&ewram[0x40000 + ewram_code_min], 0, ewram_code_max - ewram_code_min); } iwram_code_min = 0xFFFFFFFF; diff --git a/gba_memory.c b/gba_memory.c index 948bcc5..a51f183 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -596,8 +596,7 @@ u32 function_cc read_eeprom(void) \ case 0x02: \ /* external work RAM */ \ - address = (address & 0x7FFF) + ((address & 0x38000) * 2) + 0x8000; \ - value = address##type(ewram, address); \ + value = address##type(ewram, (address & 0x3FFFF)); \ break; \ \ case 0x03: \ @@ -1907,8 +1906,7 @@ void function_cc write_rtc(u32 address, u32 value) { \ case 0x02: \ /* external work RAM */ \ - address = (address & 0x7FFF) + ((address & 0x38000) * 2) + 0x8000; \ - address##type(ewram, address) = value; \ + address##type(ewram, (address & 0x3FFFF)) = value; \ break; \ \ case 0x03: \ @@ -2454,7 +2452,7 @@ s32 load_bios(char *name) // DMA memory regions can be one of the following: // IWRAM - 32kb offset from the contiguous iwram region. -// EWRAM - like segmented but with self modifying code check. +// EWRAM - also contiguous but with self modifying code check mirror. // VRAM - 96kb offset from the contiguous vram region, should take care // Palette RAM - Converts palette entries when written to. // OAM RAM - Sets OAM modified flag to true. @@ -2527,11 +2525,8 @@ dma_region_type dma_region_map[16] = #define dma_vars_iwram(type) \ dma_smc_vars_##type() \ -#define dma_vars_vram(type) \ - -#define dma_vars_palette_ram(type) \ - -#define dma_oam_ram_src() \ +#define dma_vars_ewram(type) \ + dma_smc_vars_##type() #define dma_oam_ram_dest() \ oam_update = 1 \ @@ -2539,14 +2534,17 @@ dma_region_type dma_region_map[16] = #define dma_vars_oam_ram(type) \ dma_oam_ram_##type() \ -#define dma_vars_io(type) \ +#define dma_vars_io(type) +#define dma_vars_vram(type) +#define dma_vars_palette_ram(type) +#define dma_vars_bios(type) +#define dma_vars_ext(type) + +#define dma_oam_ram_src() #define dma_segmented_load_src() \ memory_map_read[src_current_region] \ -#define dma_segmented_load_dest() \ - memory_map_write[dest_current_region] \ - #define dma_vars_gamepak(type) \ u32 type##_new_region; \ u32 type##_current_region = type##_ptr >> 15; \ @@ -2558,24 +2556,6 @@ dma_region_type dma_region_map[16] = type##_address_block = load_gamepak_page(type##_current_region & 0x3FF); \ } \ -#define dma_vars_ewram(type) \ - dma_smc_vars_##type(); \ - u32 type##_new_region; \ - u32 type##_current_region = type##_ptr >> 15; \ - u8 *type##_address_block = dma_segmented_load_##type() \ - -#define dma_vars_bios(type) \ - -#define dma_vars_ext(type) \ - -#define dma_ewram_check_region(type) \ - type##_new_region = (type##_ptr >> 15); \ - if(type##_new_region != type##_current_region) \ - { \ - type##_current_region = type##_new_region; \ - type##_address_block = dma_segmented_load_##type(); \ - } \ - #define dma_gamepak_check_region(type) \ type##_new_region = (type##_ptr >> 15); \ if(type##_new_region != type##_current_region) \ @@ -2605,9 +2585,7 @@ dma_region_type dma_region_map[16] = read_value = address##transfer_size(palette_ram, type##_ptr & 0x3FF) \ #define dma_read_ewram(type, transfer_size) \ - dma_ewram_check_region(type); \ - read_value = address##transfer_size(type##_address_block, \ - type##_ptr & 0x7FFF) \ + read_value = address##transfer_size(ewram, type##_ptr & 0x3FFFF) \ #define dma_read_gamepak(type, transfer_size) \ dma_gamepak_check_region(type); \ @@ -2642,12 +2620,9 @@ dma_region_type dma_region_map[16] = write_memory##transfer_size(type##_ptr, read_value) \ #define dma_write_ewram(type, transfer_size) \ - dma_ewram_check_region(type); \ - \ - address##transfer_size(type##_address_block, type##_ptr & 0x7FFF) = \ - read_value; \ - smc_trigger |= address##transfer_size(type##_address_block, \ - (type##_ptr & 0x7FFF) - 0x8000) \ + address##transfer_size(ewram, type##_ptr & 0x3FFFF) = read_value; \ + smc_trigger |= address##transfer_size(ewram, \ + (type##_ptr & 0x3FFFF) + 0x40000) \ #define dma_epilogue_iwram() \ if(smc_trigger) \ @@ -3105,14 +3080,6 @@ cpu_alert_type dma_transfer(dma_transfer_type *dma) map_offset++) \ memory_map_##type[map_offset] = NULL; \ -#define map_ram_region(type, start, end, mirror_blocks, region) \ - for(map_offset = (start) / 0x8000; map_offset < \ - ((end) / 0x8000); map_offset++) \ - { \ - memory_map_##type[map_offset] = \ - ((u8 *)region) + ((map_offset % mirror_blocks) * 0x10000) + 0x8000; \ - } \ - #define map_vram(type) \ for(map_offset = 0x6000000 / 0x8000; map_offset < (0x7000000 / 0x8000); \ map_offset += 4) \ @@ -3274,8 +3241,8 @@ void init_memory(void) // Fill memory map regions, areas marked as NULL must be checked directly map_region(read, 0x0000000, 0x1000000, 1, bios_rom); map_null(read, 0x1000000, 0x2000000); - map_ram_region(read, 0x2000000, 0x3000000, 8, ewram); - map_ram_region(read, 0x3000000, 0x4000000, 1, iwram); + map_region(read, 0x2000000, 0x3000000, 8, ewram); + map_region(read, 0x3000000, 0x4000000, 1, &iwram[0x8000]); map_region(read, 0x4000000, 0x5000000, 1, io_registers); map_null(read, 0x5000000, 0x6000000); map_null(read, 0x6000000, 0x7000000); @@ -3284,45 +3251,12 @@ void init_memory(void) init_memory_gamepak(); map_null(read, 0xE000000, 0x10000000); - // Fill memory map regions, areas marked as NULL must be checked directly - map_null(write, 0x0000000, 0x2000000); - map_ram_region(write, 0x2000000, 0x3000000, 8, ewram); - map_ram_region(write, 0x3000000, 0x4000000, 1, iwram); - map_null(write, 0x4000000, 0x5000000); - map_null(write, 0x5000000, 0x6000000); - - // The problem here is that the current method of handling self-modifying code - // requires writeable memory to be proceeded by 32KB SMC data areas or be - // indirectly writeable. It's possible to get around this if you turn off the SMC - // check altogether, but this will make a good number of ROMs crash (perhaps most - // of the ones that actually need it? This has yet to be determined). - - // This is because VRAM cannot be efficiently made incontiguous, and still allow - // the renderer to work as efficiently. It would, at the very least, require a - // lot of hacking of the renderer which I'm not prepared to do. - // TODO(davidgfnet): add SMC VRAM detection - - // However, it IS possible to directly map the first page no matter what because - // there's 32kb of blank stuff sitting beneath it. - if(direct_map_vram) - { - map_vram(write); - } - else - { - map_null(write, 0x6000000, 0x7000000); - } - - map_null(write, 0x7000000, 0x8000000); - map_null(write, 0x8000000, 0xE000000); - map_null(write, 0xE000000, 0x10000000); - - memset(io_registers, 0, 0x8000); - memset(oam_ram, 0, 0x400); - memset(palette_ram, 0, 0x400); - memset(iwram, 0, 0x10000); - memset(ewram, 0, 0x80000); - memset(vram, 0, 0x18000); + memset(io_registers, 0, sizeof(io_registers)); + memset(oam_ram, 0, sizeof(oam_ram)); + memset(palette_ram, 0, sizeof(palette_ram)); + memset(iwram, 0, sizeof(iwram)); + memset(ewram, 0, sizeof(ewram)); + memset(vram, 0, sizeof(vram)); io_registers[REG_DISPCNT] = 0x80; io_registers[REG_P1] = 0x3FF; @@ -3426,8 +3360,6 @@ void gba_save_state(void* dst) #define memory_savestate_builder(type) \ void memory_##type##_savestate(void) \ { \ - u32 i; \ - \ state_mem_##type##_variable(backup_type); \ state_mem_##type##_variable(sram_size); \ state_mem_##type##_variable(flash_mode); \ @@ -3453,10 +3385,7 @@ void memory_##type##_savestate(void) \ state_mem_##type##_array(dma); \ \ state_mem_##type(iwram + 0x8000, 0x8000); \ - for(i = 0; i < 8; i++) \ - { \ - state_mem_##type(ewram + (i * 0x10000) + 0x8000, 0x8000); \ - } \ + state_mem_##type(ewram, 0x40000); \ state_mem_##type(vram, 0x18000); \ state_mem_##type(oam_ram, 0x400); \ state_mem_##type(palette_ram, 0x400); \ diff --git a/gba_memory.h b/gba_memory.h index eaa5bab..14c8394 100644 --- a/gba_memory.h +++ b/gba_memory.h @@ -214,7 +214,6 @@ extern u8 ewram[1024 * 256 * 2]; extern u8 iwram[1024 * 32 * 2]; extern u8 *memory_map_read[8 * 1024]; -extern u8 *memory_map_write[8 * 1024]; extern u32 reg[64]; diff --git a/libretro.c b/libretro.c index 334864c..d94ddcb 100644 --- a/libretro.c +++ b/libretro.c @@ -809,15 +809,8 @@ static void set_memory_descriptors(void) { const uint64_t mem = RETRO_MEMORY_SYSTEM_RAM; struct retro_memory_descriptor desc[9] = { - { mem, iwram, 0x00000 + 0x8000, 0x3000000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x00000 + 0x8000, 0x2000000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x10000 + 0x8000, 0x2008000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x20000 + 0x8000, 0x2010000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x30000 + 0x8000, 0x2018000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x40000 + 0x8000, 0x2020000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x50000 + 0x8000, 0x2028000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x60000 + 0x8000, 0x2030000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x70000 + 0x8000, 0x2038000, 0, 0, 0x8000, NULL } + { mem, iwram, 0x00000 + 0x8000, 0x3000000, 0, 0, 0x8000, NULL }, + { mem, ewram, 0x00000, 0x2000000, 0, 0, 0x40000, NULL }, }; struct retro_memory_map retromap = { desc, diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 8d1d8d8..b75f7f5 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -1010,47 +1010,10 @@ u32 generate_load_rm_sh_##flags_op(u32 rm) \ { \ u32 _address = (u32)(address); \ u32 _address_hi = (_address + 0x8000) >> 16; \ - generate_load_imm(ireg, address); \ mips_emit_lui(ireg, _address_hi >> 16) \ generate_load_memory_##type(ireg, _address - (_address_hi << 16)); \ } \ -#define generate_known_address_load_builder(type) \ - u32 generate_known_address_load_##type(u32 rd, u32 address) \ - { \ - switch(address >> 24) \ - { \ - /* Read from the BIOS ROM, can be converted to an immediate load. \ - Only really possible to do this from the BIOS but should be okay \ - to allow it everywhere */ \ - case 0x00: \ - u32 imm = read_memory_constant_##type(address); \ - generate_load_imm(arm_to_mips_reg[rd], imm); \ - return 1; \ - \ - /* Read from RAM, can be converted to a load */ \ - case 0x02: \ - generate_load_memory(type, arm_to_mips_reg[rd], (u8 *)ewram + \ - (address & 0x7FFF) + ((address & 0x38000) * 2) + 0x8000); \ - return 1; \ - \ - case 0x03: \ - generate_load_memory(type, arm_to_mips_reg[rd], (u8 *)iwram + \ - (address & 0x7FFF) + 0x8000); \ - return 1; \ - \ - /* Read from gamepak ROM, this has to be an immediate load because \ - it might not actually be in memory anymore when we get to it. */ \ - case 0x08: \ - u32 imm = read_memory_constant_##type(address); \ - generate_load_imm(arm_to_mips_reg[rd], imm); \ - return 1; \ - \ - default: \ - return 0; \ - } \ - } \ - #define generate_block_extra_vars() \ u32 stored_pc = pc; \ u8 *update_trampoline \ @@ -1060,12 +1023,6 @@ u32 generate_load_rm_sh_##flags_op(u32 rm) \ generate_load_rm_sh_builder(flags); \ generate_load_rm_sh_builder(no_flags); \ \ -/* generate_known_address_load_builder(u8); \ - generate_known_address_load_builder(u16); \ - generate_known_address_load_builder(u32); \ - generate_known_address_load_builder(s8); \ - generate_known_address_load_builder(s16); */ \ - \ u32 generate_load_offset_sh(u32 rm) \ { \ switch((opcode >> 5) & 0x03) \ @@ -2787,12 +2744,13 @@ static void emit_pmemld_stub( mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); if (region == 2) { - // EWRAM is a bit special + // Can't do EWRAM with an `andi` instruction (18 bits mask) + mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff + if (!aligned && alignment != 0) { + mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size) + } // Need to insert a zero in the addr (due to how it's mapped) - mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB) - mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18) - mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16 - mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr + mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr } else if (region == 6) { // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 @@ -2862,12 +2820,13 @@ static void emit_pmemst_stub( } if (region == 2) { - // EWRAM is a bit special + // Can't do EWRAM with an `andi` instruction (18 bits mask) + mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff + if (!aligned && realsize != 0) { + mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size) + } // Need to insert a zero in the addr (due to how it's mapped) - mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB) - mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18) - mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16 - mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr + mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr } else if (region == 6) { // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 @@ -2888,7 +2847,12 @@ static void emit_pmemst_stub( // Generate SMC write and tracking // TODO: Should we have SMC checks here also for aligned? if (meminfo->check_smc && !aligned) { - mips_emit_addiu(reg_temp, reg_rv, 0x8000); // -32KB is the addr of the SMC buffer + if (region == 2) { + mips_emit_lui(reg_temp, 0x40000 >> 16); + mips_emit_addu(reg_temp, reg_rv, reg_temp); // SMC lives after the ewram + } else { + mips_emit_addiu(reg_temp, reg_rv, 0x8000); // -32KB is the addr of the SMC buffer + } if (realsize == 2) { mips_emit_lw(reg_temp, reg_temp, base_addr); } else if (realsize == 1) { @@ -3272,8 +3236,8 @@ void init_emitter() { const t_stub_meminfo ldinfo [] = { { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom }, // 1 Open load / Ignore store - { emit_pmemld_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] }, - { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above + { emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram }, // memsize wrong on purpose + { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers }, { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram }, { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case @@ -3308,8 +3272,8 @@ void init_emitter() { } const t_stub_meminfo stinfo [] = { - { emit_pmemst_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] }, - { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above + { emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram }, + { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // I/O is special and mapped with a function call { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram }, { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 2d40bf8..5e5a479 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -44,7 +44,6 @@ .global write_io_epilogue .global memory_map_read -.global memory_map_write .global tmemld .global tmemst .global tmemst @@ -609,9 +608,6 @@ execute_arm_translate_internal: .data .align 6 -memory_map_write: - .space 0x8000 - memory_map_read: .space 0x8000 diff --git a/x86/x86_emit.h b/x86/x86_emit.h index 67a3dc2..68930e1 100644 --- a/x86/x86_emit.h +++ b/x86/x86_emit.h @@ -1485,23 +1485,13 @@ u32 function_cc execute_aligned_load32(u32 address) return read_memory32(address); } -void function_cc execute_aligned_store32(u32 address, u32 source) -{ - u8 *map; - - if(!(address & 0xF0000000) && (map = memory_map_write[address >> 15])) - address32(map, address & 0x7FFF) = source; - else - write_memory32(address, source); -} - #define arm_block_memory_load() \ generate_function_call(execute_aligned_load32); \ generate_store_reg(rv, i) \ #define arm_block_memory_store() \ generate_load_reg_pc(a1, i, 8); \ - generate_function_call(execute_aligned_store32) \ + generate_function_call(write_memory32) \ #define arm_block_memory_final_load() \ arm_block_memory_load() \ @@ -1956,7 +1946,7 @@ u32 function_cc execute_ror_imm_op(u32 value, u32 shift) #define thumb_block_memory_extra_push_lr(base_reg) \ generate_add_reg_reg_imm(a0, s0, (bit_count[reg_list] * 4)); \ generate_load_reg(a1, REG_LR); \ - generate_function_call(execute_aligned_store32) \ + generate_function_call(write_memory32) \ #define thumb_block_memory_load() \ generate_function_call(execute_aligned_load32); \ @@ -1964,7 +1954,7 @@ u32 function_cc execute_ror_imm_op(u32 value, u32 shift) #define thumb_block_memory_store() \ generate_load_reg(a1, i); \ - generate_function_call(execute_aligned_store32) \ + generate_function_call(write_memory32) \ #define thumb_block_memory_final_load() \ thumb_block_memory_load() \ diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 1e338a4..595a789 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -29,7 +29,6 @@ #define _execute_store_cpsr execute_store_cpsr #define _execute_arm_translate execute_arm_translate #define _memory_map_read memory_map_read -#define _memory_map_write memory_map_write #define _reg reg #define _reg_mode reg_mode #define _oam_update oam_update @@ -68,7 +67,6 @@ .global _execute_arm_translate .global _memory_map_read -.global _memory_map_write .global _reg .global _reg_mode .global _spsr @@ -231,6 +229,20 @@ ext_store_eeprom: # 8bit ext memory routines +ext_store_iwram8: + and $0x7FFF, %eax # wrap around address + mov %dl, (_iwram+0x8000)(%eax) # perform store + cmpb $0, _iwram(%eax) # Check SMC mirror + jne smc_write + ret + +ext_store_ewram8: + and $0x3FFFF, %eax # wrap around address + mov %dl, _ewram(%eax) # perform store + cmpb $0, (_ewram+0x40000)(%eax) # Check SMC mirror + jne smc_write + ret + ext_store_io8: and $0x3FF, %eax # wrap around address and $0xFF, %edx @@ -267,8 +279,8 @@ ext_store_backup: ext_store_u8_jtable: .long ext_store_ignore # 0x00 BIOS, ignore .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_ignore # 0x02 EWRAM, should have been hit already - .long ext_store_ignore # 0x03 IWRAM, should have been hit already + .long ext_store_ewram8 # 0x02 EWRAM + .long ext_store_iwram8 # 0x03 IWRAM .long ext_store_io8 # 0x04 I/O registers .long ext_store_palette8 # 0x05 Palette RAM .long ext_store_vram8 # 0x06 VRAM @@ -281,7 +293,12 @@ ext_store_u8_jtable: .long ext_store_eeprom # 0x0D EEPROM (possibly) .long ext_store_backup # 0x0E Flash ROM/SRAM -ext_store_u8: +# eax: address to write to +# edx: value to write +# ecx: current pc + +_execute_store_u8: + mov %ecx, REG_PC(%ebx) # write out the PC mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 cmp $15, %ecx @@ -290,46 +307,21 @@ ext_store_u8: mov ext_store_u8_jtable(, %ecx, 4), %ecx jmp *%ecx # jump to table index -# eax: address to write to -# edx: value to write -# ecx: current pc +# 16bit ext memory routines -_execute_store_u8: - mov %ecx, REG_PC(%ebx) # write out the PC - mov %eax, %ecx # ecx = address - test $0xF0000000, %ecx # check address range - jnz ext_store_u8 # if above perform an extended write - shr $15, %ecx # ecx = page number of address - # load the corresponding memory map offset - mov _memory_map_write(, %ecx, 4), %ecx - test %ecx, %ecx # see if it's NULL - jz ext_store_u8 # if so perform an extended write - and $0x7FFF, %eax # isolate the lower 15bits of the address - mov %dl, (%eax, %ecx) # store the value - # check for self-modifying code - testb $0xFF, -32768(%eax, %ecx) +ext_store_iwram16: + and $0x7FFF, %eax # wrap around address + mov %dx, (_iwram+0x8000)(%eax) # perform store + cmpw $0, _iwram(%eax) # Check SMC mirror jne smc_write - ret # return + ret -_execute_store_u16: - mov %ecx, REG_PC(%ebx) # write out the PC - and $~0x01, %eax # fix alignment - mov %eax, %ecx # ecx = address - test $0xF0000000, %ecx # check address range - jnz ext_store_u16 # if above perform an extended write - shr $15, %ecx # ecx = page number of address - # load the corresponding memory map offset - mov _memory_map_write(, %ecx, 4), %ecx - test %ecx, %ecx # see if it's NULL - jz ext_store_u16 # if so perform an extended write - and $0x7FFF, %eax # isolate the lower 15bits of the address - mov %dx, (%eax, %ecx) # store the value - # check for self-modifying code - testw $0xFFFF, -32768(%eax, %ecx) +ext_store_ewram16: + and $0x3FFFF, %eax # wrap around address + mov %dx, _ewram(%eax) # perform store + cmpw $0, (_ewram+0x40000)(%eax) # Check SMC mirror jne smc_write - ret # return - -# 16bit ext memory routines + ret ext_store_io16: and $0x3FF, %eax # wrap around address @@ -377,8 +369,8 @@ ext_store_rtc: ext_store_u16_jtable: .long ext_store_ignore # 0x00 BIOS, ignore .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_ignore # 0x02 EWRAM, should have been hit already - .long ext_store_ignore # 0x03 IWRAM, should have been hit already + .long ext_store_ewram16 # 0x02 EWRAM + .long ext_store_iwram16 # 0x03 IWRAM .long ext_store_io16 # 0x04 I/O registers .long ext_store_palette16 # 0x05 Palette RAM .long ext_store_vram16 # 0x06 VRAM @@ -391,7 +383,9 @@ ext_store_u16_jtable: .long ext_store_eeprom # 0x0D EEPROM (possibly) .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -ext_store_u16: +_execute_store_u16: + mov %ecx, REG_PC(%ebx) # write out the PC + and $~0x01, %eax # fix alignment mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 cmp $15, %ecx @@ -400,25 +394,21 @@ ext_store_u16: mov ext_store_u16_jtable(, %ecx, 4), %ecx jmp *%ecx # jump to table index -_execute_store_u32: - mov %ecx, REG_PC(%ebx) # write out the PC - and $~0x03, %eax # fix alignment - mov %eax, %ecx # ecx = address - test $0xF0000000, %ecx # check address range - jnz ext_store_u32 # if above perform an extended write - shr $15, %ecx # ecx = page number of address - # load the corresponding memory map offset - mov _memory_map_write(, %ecx, 4), %ecx - test %ecx, %ecx # see if it's NULL - jz ext_store_u32 # if so perform an extended write - and $0x7FFF, %eax # isolate the lower 15bits of the address - mov %edx, (%eax, %ecx) # store the value - # check for self-modifying code - testl $0xFFFFFFFF, -32768(%eax, %ecx) +# 32bit ext memory routines + +ext_store_iwram32: + and $0x7FFF, %eax # wrap around address + mov %edx, (_iwram+0x8000)(%eax) # perform store + cmpl $0, _iwram(%eax) # Check SMC mirror jne smc_write - ret # return it + ret -# 32bit ext memory routines +ext_store_ewram32: + and $0x3FFFF, %eax # wrap around address + mov %edx, _ewram(%eax) # perform store + cmpl $0, (_ewram+0x40000)(%eax) # Check SMC mirror + jne smc_write + ret ext_store_io32: and $0x3FF, %eax # wrap around address @@ -451,8 +441,8 @@ ext_store_oam32: ext_store_u32_jtable: .long ext_store_ignore # 0x00 BIOS, ignore .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_ignore # 0x02 EWRAM, should have been hit already - .long ext_store_ignore # 0x03 IWRAM, should have been hit already + .long ext_store_ewram32 # 0x02 EWRAM + .long ext_store_iwram32 # 0x03 IWRAM .long ext_store_io32 # 0x04 I/O registers .long ext_store_palette32 # 0x05 Palette RAM .long ext_store_vram32 # 0x06 VRAM @@ -466,7 +456,9 @@ ext_store_u32_jtable: .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -ext_store_u32: +_execute_store_u32: + mov %ecx, REG_PC(%ebx) # write out the PC + and $~0x03, %eax # fix alignment mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 cmp $15, %ecx @@ -507,8 +499,8 @@ smc_write: call _flush_translation_cache_ram lookup_pc: - add $4, %esp - movl $0, CHANGED_PC_STATUS(%ebx) + add $4, %esp # Can't return, discard addr + movl $0, CHANGED_PC_STATUS(%ebx) # Lookup new block and jump to it mov REG_PC(%ebx), %eax testl $0x20, REG_CPSR(%ebx) jz lookup_pc_arm @@ -577,7 +569,5 @@ _reg_mode: _memory_map_read: .space 0x8000 -_memory_map_write: - .space 0x8000 -- cgit v1.2.3 From ff510e7f7a0c04c7862e598e8bfc75747f3bf7d1 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 23 Mar 2021 19:47:51 +0100 Subject: Move caches to stub files to get around gcc 10 Seems that using the __atribute__ magic for sections is not the best way of doing this, since it injects some default atributtes that collide with the user defined ones. Using assembly is far easier in this case. Reworked definitions a bit to make it easier to import from assembly. Also wrapped stuff around macros for easy and less verbose implementation of the symbol prefix issue. --- arm/arm_stub.S | 128 ++++++++++++++++++++++---------------------------------- cpu.h | 19 ++------- cpu_threaded.c | 18 +------- gpsp_config.h | 22 ++++++++++ psp/mips_emit.h | 9 +--- psp/mips_stub.S | 21 ++++++++++ x86/x86_stub.S | 85 ++++++++++++++++--------------------- 7 files changed, 137 insertions(+), 165 deletions(-) create mode 100644 gpsp_config.h diff --git a/arm/arm_stub.S b/arm/arm_stub.S index f5fceb0..f0b7f52 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -1,14 +1,14 @@ -.align 2 -.globl invalidate_icache_region -.globl invalidate_cache_region +#include "../gpsp_config.h" + +#define defsymbl(symbol) \ +.global symbol ; \ +.global _##symbol ; \ +symbol: \ +_##symbol: -.globl memory_map_read -.globl reg -.globl palette_ram -.globl palette_ram_converted -.globl reg_mode -.globl spsr +.text +.align 2 #define REG_R0 (0 * 4) #define REG_R1 (1 * 4) @@ -178,10 +178,7 @@ #define arm_update_gba_builder(name, mode, return_op) ;\ ;\ .align 2 ;\ -.globl arm_update_gba_##name ;\ -.globl _arm_update_gba_##name ;\ -arm_update_gba_##name: ;\ -_arm_update_gba_##name: ;\ +defsymbl(arm_update_gba_##name) ;\ load_pc_##return_op() ;\ str r0, [reg_base, #REG_PC] /* write out the PC */;\ ;\ @@ -243,30 +240,21 @@ arm_update_gba_builder(idle_thumb, thumb, add) @ r0: PC to branch to .align 2 -.globl arm_indirect_branch_arm -.globl _arm_indirect_branch_arm -arm_indirect_branch_arm: -_arm_indirect_branch_arm: +defsymbl(arm_indirect_branch_arm) save_flags() call_c_function(block_lookup_address_arm) restore_flags() bx r0 .align 2 -.globl arm_indirect_branch_thumb -.globl _arm_indirect_branch_thumb -arm_indirect_branch_thumb: -_arm_indirect_branch_thumb: +defsymbl(arm_indirect_branch_thumb) save_flags() call_c_function(block_lookup_address_thumb) restore_flags() bx r0 .align 2 -.globl arm_indirect_branch_dual_arm -.globl _arm_indirect_branch_dual_arm -arm_indirect_branch_dual_arm: -_arm_indirect_branch_dual_arm: +defsymbl(arm_indirect_branch_dual_arm) save_flags() tst r0, #0x01 @ check lower bit bne 1f @ if set going to Thumb mode @@ -286,10 +274,7 @@ _arm_indirect_branch_dual_arm: bx r0 @ return .align 2 -.globl arm_indirect_branch_dual_thumb -.globl _arm_indirect_branch_dual_thumb -arm_indirect_branch_dual_thumb: -_arm_indirect_branch_dual_thumb: +defsymbl(arm_indirect_branch_dual_thumb) save_flags() tst r0, #0x01 @ check lower bit beq 1f @ if set going to ARM mode @@ -317,10 +302,7 @@ _arm_indirect_branch_dual_thumb: @ r2: current PC .align 2 -.globl execute_store_cpsr -.globl _execute_store_cpsr -execute_store_cpsr: -_execute_store_cpsr: +defsymbl(execute_store_cpsr) save_flags() and reg_flags, r0, r1 @ reg_flags = new_cpsr & store_mask ldr r0, [reg_base, #REG_CPSR] @ r0 = cpsr @@ -354,10 +336,7 @@ _execute_store_cpsr: @ r1: bitmask of which bits in spsr to update .align 2 -.globl execute_store_spsr -.globl _execute_store_spsr -execute_store_spsr: -_execute_store_spsr: +defsymbl(execute_store_spsr) ldr r1, =spsr @ r1 = spsr ldr r2, [reg_base, #CPU_MODE] @ r2 = CPU_MODE str r0, [r1, r2, lsl #2] @ spsr[CPU_MODE] = new_spsr @@ -369,10 +348,7 @@ _execute_store_spsr: @ r0: spsr .align 2 -.globl execute_read_spsr -.globl _execute_read_spsr -execute_read_spsr: -_execute_read_spsr: +defsymbl(execute_read_spsr) ldr r0, =spsr @ r0 = spsr ldr r1, [reg_base, #CPU_MODE] @ r1 = CPU_MODE ldr r0, [r0, r1, lsl #2] @ r0 = spsr[CPU_MODE] @@ -385,10 +361,7 @@ _execute_read_spsr: @ r0: current pc .align 2 -.globl execute_spsr_restore -.globl _execute_spsr_restore -execute_spsr_restore: -_execute_spsr_restore: +defsymbl(execute_spsr_restore) save_flags() ldr r1, =spsr @ r1 = spsr ldr r2, [reg_base, #CPU_MODE] @ r2 = cpu_mode @@ -425,10 +398,7 @@ _execute_spsr_restore: #define execute_swi_builder(mode) ;\ ;\ .align 2 ;\ -.globl execute_swi_##mode ;\ -.globl _execute_swi_##mode ;\ -execute_swi_##mode: ;\ -_execute_swi_##mode: ;\ +defsymbl(execute_swi_##mode) ;\ save_flags() ;\ ldr r1, =reg_mode /* r1 = reg_mode */;\ /* reg_mode[MODE_SUPERVISOR][6] = pc */;\ @@ -460,10 +430,7 @@ execute_swi_builder(thumb) #define execute_swi_function_builder(swi_function, mode) ;\ ;\ .align 2 ;\ -.globl execute_swi_hle_##swi_function##_##mode ;\ -.globl _execute_swi_hle_##swi_function##_##mode ;\ -execute_swi_hle_##swi_function##_##mode: ;\ -_execute_swi_hle_##swi_function##_##mode: ;\ +defsymbl(execute_swi_hle_##swi_function##_##mode) ;\ save_flags() ;\ store_registers_##mode() ;\ call_c_function(execute_swi_hle_##swi_function##_c) ;\ @@ -485,10 +452,7 @@ execute_swi_function_builder(div, thumb) @ Uses sp as reg_base; must hold consistently true. .align 2 -.globl execute_arm_translate -.globl _execute_arm_translate -execute_arm_translate: -_execute_arm_translate: +defsymbl(execute_arm_translate) @ save the registers to be able to return later stmdb sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr } @@ -615,10 +579,7 @@ ext_store_ignore: #define execute_store_builder(store_type, store_op, load_op) ;\ ;\ .align 2 ;\ -.globl execute_store_u##store_type ;\ -.globl _execute_store_u##store_type ;\ -execute_store_u##store_type: ;\ -_execute_store_u##store_type: ;\ +defsymbl(execute_store_u##store_type) ;\ execute_store_body(store_type, store_op) ;\ ;\ ext_store_u##store_type: ;\ @@ -676,10 +637,7 @@ execute_store_builder(32, str, ldr) @ This is a store that is executed in a strm case (so no SMC checks in-between) -.globl execute_store_u32_safe -.globl _execute_store_u32_safe -execute_store_u32_safe: -_execute_store_u32_safe: +defsymbl(execute_store_u32_safe) execute_store_body(32_safe, str) restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return @@ -822,10 +780,7 @@ lookup_pc_arm: #define execute_load_builder(load_type, load_function, load_op, mask) ;\ ;\ .align 2 ;\ -.globl execute_load_##load_type ;\ -.globl _execute_load_##load_type ;\ -execute_load_##load_type: ;\ -_execute_load_##load_type: ;\ +defsymbl(execute_load_##load_type) ;\ save_flags() ;\ tst r0, mask /* make sure address is in range */;\ bne ext_load_##load_type /* if not do ext load */;\ @@ -859,19 +814,38 @@ execute_load_builder(u32, 32, ldrne, #0xF0000000) .data -memory_map_read: +defsymbl(memory_map_read) .space 0x8000 -palette_ram: +defsymbl(palette_ram) .space 0x400 -palette_ram_converted: +defsymbl(palette_ram_converted) .space 0x400 -spsr: +defsymbl(spsr) .space 24 -reg_mode: +defsymbl(reg_mode) .space 196 -.globl reg -.globl _reg -reg: +defsymbl(reg) .space 0x100, 0 +@ Vita and 3DS (and of course mmap) map their own cache sections through some +@ platform-speficic mechanisms. +#if !defined(HAVE_MMAP) && !defined(VITA) && !defined(_3DS) + +@ Make this section executable! +.text +#ifdef __ANDROID__ +@ Unfortunately Android builds don't like nobits, so we ship a ton of zeros +@ TODO: Revisit this whenever we upgrade to the latest clang NDK +.section .jit,"awx",%progbits +#else +.section .jit,"awx",%nobits +#endif +.align 4 +defsymbl(rom_translation_cache) + .space ROM_TRANSLATION_CACHE_SIZE +defsymbl(ram_translation_cache) + .space RAM_TRANSLATION_CACHE_SIZE + +#endif + diff --git a/cpu.h b/cpu.h index faa3bc1..fc57626 100644 --- a/cpu.h +++ b/cpu.h @@ -20,6 +20,8 @@ #ifndef CPU_H #define CPU_H +#include "gpsp_config.h" + // System mode and user mode are represented as the same here typedef enum @@ -120,18 +122,6 @@ s32 translate_block_arm(u32 pc, translation_region_type translation_region, s32 translate_block_thumb(u32 pc, translation_region_type translation_region, u32 smc_enable); -#if defined(PSP) - #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4) - #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384) - #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024) -#else - #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5) - #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2) - #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32) -#endif - -#define STUB_ARENA_SIZE (4*1024) - #if defined(HAVE_MMAP) extern u8* rom_translation_cache; extern u8* ram_translation_cache; @@ -147,8 +137,8 @@ extern int sceBlock; #else extern u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE]; extern u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE]; -extern u32 stub_arena[STUB_ARENA_SIZE]; #endif +extern u32 stub_arena[STUB_ARENA_SIZE / 4]; extern u8 *rom_translation_ptr; extern u8 *ram_translation_ptr; @@ -162,9 +152,6 @@ extern u32 translation_gate_target_pc[MAX_TRANSLATION_GATES]; extern u32 in_interrupt; -#define ROM_BRANCH_HASH_SIZE (1024 * 64) - -/* EDIT: Shouldn't this be extern ?! */ extern u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE]; void flush_translation_cache_rom(void); diff --git a/cpu_threaded.c b/cpu_threaded.c index 555b9c6..7f12b4f 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -47,26 +47,10 @@ u8* ram_translation_cache_ptr; u8 *rom_translation_ptr = rom_translation_cache; u8 *ram_translation_ptr = ram_translation_cache; #else - -#ifdef __ANDROID__ -// Workaround for 'attempt to map x bytes at offset y' -__asm__(".section .jit,\"awx\",%progbits"); -#else -__asm__(".section .jit,\"awx\",%nobits"); -#endif - -u32 stub_arena[STUB_ARENA_SIZE] - __attribute__ ((aligned(4),section(".jit"))); -u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE] - __attribute__ ((aligned(4),section(".jit"))); u8 *rom_translation_ptr = rom_translation_cache; - -u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE] - __attribute__ ((aligned(4),section(".jit"))); u8 *ram_translation_ptr = ram_translation_cache; - -__asm__(".section .text"); #endif +/* Note, see stub files for more cache definitions */ u32 iwram_code_min = 0xFFFFFFFF; u32 iwram_code_max = 0xFFFFFFFF; diff --git a/gpsp_config.h b/gpsp_config.h new file mode 100644 index 0000000..ea8db95 --- /dev/null +++ b/gpsp_config.h @@ -0,0 +1,22 @@ + +#ifndef GPSP_CONFIG_H +#define GPSP_CONFIG_H + +/* Cache sizes and their config knobs */ +#if defined(PSP) + #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4) + #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384) + #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024) +#else + #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5) + #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2) + #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32) +#endif + +/* This is MIPS specific for now */ +#define STUB_ARENA_SIZE (16*1024) + +/* Hash table size for ROM trans cache lookups */ +#define ROM_BRANCH_HASH_SIZE (1024 * 64) + +#endif diff --git a/psp/mips_emit.h b/psp/mips_emit.h index b75f7f5..b996f2b 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2618,11 +2618,7 @@ static void emit_mem_access_loadop( #define genccall(fn) mips_emit_jal(((u32)fn) >> 2); #endif -// Stub memory map: -// 0 .. 63 First patch handler [#0] -// 448 .. 511 Last patch handler [#7] -// 512+ smc_write handler -#define SMC_WRITE_OFF32 160 +#define SMC_WRITE_OFF32 (10*16) /* 10 handlers (16 insts) */ // Describes a "plain" memory are, that is, an area that is just accessed // as normal memory (with some caveats tho). @@ -2862,8 +2858,7 @@ static void emit_pmemst_stub( } // If the data is non zero, we just wrote over code // Local-jump to the smc_write (which lives at offset:0) - unsigned instoffset = (&stub_arena[SMC_WRITE_OFF32] - (((u32*)translation_ptr) + 1)); - mips_emit_b(bne, reg_zero, reg_temp, instoffset); + mips_emit_b(bne, reg_zero, reg_temp, branch_offset(&stub_arena[SMC_WRITE_OFF32])); } // Store the data (delay slot from the SMC branch) diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 5e5a479..3d046d8 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -16,6 +16,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +#include "../gpsp_config.h" + .set mips32r2 .align 4 @@ -645,3 +647,22 @@ fnptrs: .long execute_spsr_restore_body # 6 .long execute_store_cpsr_body # 7 +#if !defined(HAVE_MMAP) + +# Make this section executable! +.text +.section .jit,"awx",%nobits +.align 2 +.global stub_arena +.global rom_translation_cache +.global ram_translation_cache + +stub_arena: + .space STUB_ARENA_SIZE +rom_translation_cache: + .space ROM_TRANSLATION_CACHE_SIZE +ram_translation_cache: + .space RAM_TRANSLATION_CACHE_SIZE + +#endif + diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 595a789..9dd3fdd 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -16,21 +16,18 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +#include "../gpsp_config.h" + .align 4 +#define defsymbl(symbol) \ +.global symbol ; \ +.global _##symbol ; \ +symbol: \ +_##symbol: + #ifndef _WIN32 -#define _x86_update_gba x86_update_gba -#define _x86_indirect_branch_arm x86_indirect_branch_arm -#define _x86_indirect_branch_thumb x86_indirect_branch_thumb -#define _x86_indirect_branch_dual x86_indirect_branch_dual -#define _execute_store_u8 execute_store_u8 -#define _execute_store_u16 execute_store_u16 -#define _execute_store_u32 execute_store_u32 -#define _execute_store_cpsr execute_store_cpsr -#define _execute_arm_translate execute_arm_translate -#define _memory_map_read memory_map_read -#define _reg reg -#define _reg_mode reg_mode +# External symbols (data + functions) #define _oam_update oam_update #define _iwram iwram #define _ewram ewram @@ -38,7 +35,6 @@ #define _oam_ram oam_ram #define _bios_rom bios_rom #define _io_registers io_registers -#define _spsr spsr #define _update_gba update_gba #define _block_lookup_address_arm block_lookup_address_arm @@ -47,8 +43,6 @@ #define _write_io_register8 write_io_register8 #define _write_io_register16 write_io_register16 #define _write_io_register32 write_io_register32 -#define _palette_ram palette_ram -#define _palette_ram_converted palette_ram_converted #define _flush_translation_cache_ram flush_translation_cache_ram #define _write_eeprom write_eeprom #define _write_backup write_backup @@ -56,25 +50,7 @@ #define _execute_store_cpsr_body execute_store_cpsr_body #endif -.global _x86_update_gba -.global _x86_indirect_branch_arm -.global _x86_indirect_branch_thumb -.global _x86_indirect_branch_dual -.global _execute_store_u8 -.global _execute_store_u16 -.global _execute_store_u32 -.global _execute_store_cpsr -.global _execute_arm_translate - -.global _memory_map_read -.global _reg -.global _reg_mode -.global _spsr -.global _palette_ram -.global _palette_ram_converted - .global _oam_update - .global _iwram .global _ewram .global _vram @@ -147,7 +123,7 @@ st: .asciz "u\n" -_x86_update_gba: +defsymbl(x86_update_gba) mov %eax, REG_PC(%ebx) # current PC = eax collapse_flags # update cpsr, trashes ecx and edx @@ -171,14 +147,14 @@ _x86_update_gba: # eax: GBA address to branch to # edi: Cycle counter -_x86_indirect_branch_arm: +defsymbl(x86_indirect_branch_arm) call _block_lookup_address_arm jmp *%eax # For indirect branches that'll definitely go to Thumb. In # Thumb mode any indirect branches except for BX. -_x86_indirect_branch_thumb: +defsymbl(x86_indirect_branch_thumb) call _block_lookup_address_thumb jmp *%eax @@ -186,7 +162,7 @@ _x86_indirect_branch_thumb: # mainly BX (also data processing to PC with S bit set, be # sure to adjust the target with a 1 in the lowest bit for this) -_x86_indirect_branch_dual: +defsymbl(x86_indirect_branch_dual) call _block_lookup_address_dual jmp *%eax @@ -297,7 +273,7 @@ ext_store_u8_jtable: # edx: value to write # ecx: current pc -_execute_store_u8: +defsymbl(execute_store_u8) mov %ecx, REG_PC(%ebx) # write out the PC mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 @@ -383,7 +359,7 @@ ext_store_u16_jtable: .long ext_store_eeprom # 0x0D EEPROM (possibly) .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -_execute_store_u16: +defsymbl(execute_store_u16) mov %ecx, REG_PC(%ebx) # write out the PC and $~0x01, %eax # fix alignment mov %eax, %ecx # ecx = address @@ -400,6 +376,7 @@ ext_store_iwram32: and $0x7FFF, %eax # wrap around address mov %edx, (_iwram+0x8000)(%eax) # perform store cmpl $0, _iwram(%eax) # Check SMC mirror + jne smc_write ret @@ -456,7 +433,7 @@ ext_store_u32_jtable: .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -_execute_store_u32: +defsymbl(execute_store_u32) mov %ecx, REG_PC(%ebx) # write out the PC and $~0x03, %eax # fix alignment mov %eax, %ecx # ecx = address @@ -470,7 +447,7 @@ _execute_store_u32: # %eax = new_cpsr # %edx = store_mask -_execute_store_cpsr: +defsymbl(execute_store_cpsr) mov %edx, REG_SAVE(%ebx) # save store_mask mov %ecx, REG_SAVE2(%ebx) # save PC too @@ -515,7 +492,7 @@ lookup_pc_arm: # eax: cycle counter -_execute_arm_translate: +defsymbl(execute_arm_translate) # Save main context, since we need to return gracefully pushl %ebx pushl %esi @@ -556,18 +533,30 @@ return_to_main: .data .align 64 -_reg: +defsymbl(reg) .space 0x100, 0 -_palette_ram: +defsymbl(palette_ram) .space 0x400 -_palette_ram_converted: +defsymbl(palette_ram_converted) .space 0x400 -_spsr: +defsymbl(spsr) .space 24 -_reg_mode: +defsymbl(reg_mode) .space 196 -_memory_map_read: +defsymbl(memory_map_read) .space 0x8000 +#if !defined(HAVE_MMAP) + +# Make this section executable! +.text +.section .jit,"awx",%nobits +.align 4 +defsymbl(rom_translation_cache) + .space ROM_TRANSLATION_CACHE_SIZE +defsymbl(ram_translation_cache) + .space RAM_TRANSLATION_CACHE_SIZE + +#endif -- cgit v1.2.3 From 6f2d0bbee840b9fb1e48958d3fdf140a650d512c Mon Sep 17 00:00:00 2001 From: twinaphex Date: Thu, 25 Mar 2021 21:21:21 +0100 Subject: Add od-beta --- .gitlab-ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 424847a..fdbedc4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -179,3 +179,9 @@ libretro-build-dingux-mips32: extends: - .libretro-dingux-mips32-make-default - .core-defs + +# OpenDingux Beta +libretro-build-dingux-odbeta-mips32: + extends: + - .libretro-dingux-odbeta-mips32-make-default + - .core-defs -- cgit v1.2.3 From 3db35eab70be425c679169fbda3a08bedc023107 Mon Sep 17 00:00:00 2001 From: jdgleaver Date: Fri, 26 Mar 2021 16:03:46 +0000 Subject: Fix OpenDingux Beta build --- .gitlab-ci.yml | 2 ++ Makefile | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fdbedc4..e3550f2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -185,3 +185,5 @@ libretro-build-dingux-odbeta-mips32: extends: - .libretro-dingux-odbeta-mips32-make-default - .core-defs + variables: + platform: gcw0-odbeta diff --git a/Makefile b/Makefile index eddfe36..a5f2649 100644 --- a/Makefile +++ b/Makefile @@ -379,6 +379,22 @@ else ifeq ($(platform), gcw0) HAVE_DYNAREC := 1 CPU_ARCH := mips +# GCW0 (OpenDingux Beta) +else ifeq ($(platform), gcw0-odbeta) + TARGET := $(TARGET_NAME)_libretro.so + CC = /opt/gcw0-toolchain/usr/bin/mipsel-linux-gcc + CXX = /opt/gcw0-toolchain/usr/bin/mipsel-linux-g++ + AR = /opt/gcw0-toolchain/usr/bin/mipsel-linux-ar + SHARED := -shared -nostdlib -Wl,--version-script=link.T + fpic := -fPIC -DPIC + CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float + # The ASM code and/or MIPS dynarec of GPSP does not respect + # MIPS calling conventions, so we must use '-fno-caller-saves' + # for the OpenDingux Beta build + CFLAGS += -fno-caller-saves + HAVE_DYNAREC := 1 + CPU_ARCH := mips + # Windows else TARGET := $(TARGET_NAME)_libretro.dll -- cgit v1.2.3 From a494a3f00ee3bd35ee9ab76f8cd4f164da080113 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Thu, 25 Mar 2021 21:02:06 +0100 Subject: Move OAM update flag to a register Fix a small bug in MIPS dynarec that affects non -G0 targets --- arm/arm_stub.S | 4 ++-- cpu.h | 4 ++-- gba_memory.c | 15 +++++---------- main.c | 2 +- psp/mips_emit.h | 8 ++++---- psp/mips_stub.S | 4 +++- video.c | 4 ++-- x86/x86_stub.S | 9 ++++----- 8 files changed, 23 insertions(+), 27 deletions(-) diff --git a/arm/arm_stub.S b/arm/arm_stub.S index f0b7f52..8e6cc9b 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -43,8 +43,8 @@ _##symbol: #define CPU_HALT_STATE (30 * 4) #define CHANGED_PC_STATUS (31 * 4) #define COMPLETED_FRAME (32 * 4) - -#define MAIN_THREAD_SP (33 * 4) +#define OAM_UPDATED (33 * 4) +#define MAIN_THREAD_SP (34 * 4) #define reg_a0 r0 #define reg_a1 r1 diff --git a/cpu.h b/cpu.h index fc57626..2b250ca 100644 --- a/cpu.h +++ b/cpu.h @@ -85,7 +85,8 @@ typedef enum CPU_MODE = 29, CPU_HALT_STATE = 30, CHANGED_PC_STATUS = 31, - COMPLETED_FRAME = 32 + COMPLETED_FRAME = 32, + OAM_UPDATED = 33 } ext_reg_numbers; typedef enum @@ -146,7 +147,6 @@ extern u8 *ram_translation_ptr; extern u32 idle_loop_target_pc; extern u32 iwram_stack_optimize; -extern u32 direct_map_vram; extern u32 translation_gate_targets; extern u32 translation_gate_target_pc[MAX_TRANSLATION_GATES]; diff --git a/gba_memory.c b/gba_memory.c index a51f183..e647304 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -342,14 +342,9 @@ gamepak_swap_entry_type *gamepak_memory_map; // a lot. FILE *gamepak_file_large = NULL; -u32 direct_map_vram = 0; - // Writes to these respective locations should trigger an update // so the related subsystem may react to it. -// If OAM is written to: -u32 oam_update = 1; - // If GBC audio is written to: u32 gbc_sound_update = 0; @@ -755,7 +750,7 @@ cpu_alert_type function_cc write_io_register8(u32 address, u32 value) u32 dispcnt = io_registers[REG_DISPCNT]; if((value & 0x07) != (dispcnt & 0x07)) - oam_update = 1; + reg[OAM_UPDATED] = 1; address8(io_registers, 0x00) = value; break; @@ -1171,7 +1166,7 @@ cpu_alert_type function_cc write_io_register16(u32 address, u32 value) { u32 dispcnt = io_registers[REG_DISPCNT]; if((value & 0x07) != (dispcnt & 0x07)) - oam_update = 1; + reg[OAM_UPDATED] = 1; address16(io_registers, 0x00) = value; break; @@ -1934,7 +1929,7 @@ void function_cc write_rtc(u32 address, u32 value) \ case 0x07: \ /* OAM RAM */ \ - oam_update = 1; \ + reg[OAM_UPDATED] = 1; \ address##type(oam_ram, address & 0x3FF) = value; \ break; \ \ @@ -2529,7 +2524,7 @@ dma_region_type dma_region_map[16] = dma_smc_vars_##type() #define dma_oam_ram_dest() \ - oam_update = 1 \ + reg[OAM_UPDATED] = 1 \ #define dma_vars_oam_ram(type) \ dma_oam_ram_##type() \ @@ -3331,7 +3326,7 @@ void gba_load_state(const void* src) wipe_caches(); #endif - oam_update = 1; + reg[OAM_UPDATED] = 1; gbc_sound_update = 1; for(i = 0; i < 512; i++) diff --git a/main.c b/main.c index 73371e4..2a82338 100644 --- a/main.c +++ b/main.c @@ -158,7 +158,7 @@ u32 update_gba(void) if((dispstat & 0x01) == 0) { u32 i; - if(oam_update) + if(reg[OAM_UPDATED]) oam_update_count++; if(no_alpha) diff --git a/psp/mips_emit.h b/psp/mips_emit.h index b996f2b..d24c174 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2512,7 +2512,8 @@ u8 swi_hle_handle[256] = #define ReOff_SaveR1 (21*4) // 3 save scratch regs #define ReOff_SaveR2 (22*4) #define ReOff_SaveR3 (23*4) -#define ReOff_GP_Save (32*4) // GP_SAVE +#define ReOff_OamUpd (33*4) // OAM_UPDATED +#define ReOff_GP_Save (34*4) // GP_SAVE // Saves all regs to their right slot and loads gp #define emit_save_regs(save_a2) { \ @@ -2873,9 +2874,8 @@ static void emit_pmemst_stub( // Post processing store: // Signal that OAM was updated if (region == 7) { - u32 palcaddr = (u32)&oam_update; - mips_emit_lui(reg_temp, ((palcaddr + 0x8000) >> 16)); - mips_emit_sw(reg_base, reg_temp, palcaddr & 0xffff); // Write any nonzero data + // Write any nonzero data + mips_emit_sw(reg_base, reg_base, ReOff_OamUpd); generate_function_return_swap_delay(); } else { diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 3d046d8..cc3a220 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -52,6 +52,7 @@ .global reg .global spsr .global reg_mode +.global oam_update # MIPS register layout: @@ -116,7 +117,8 @@ .equ CPU_HALT_STATE, (30 * 4) .equ CHANGED_PC_STATUS, (31 * 4) .equ COMPLETED_FRAME, (32 * 4) -.equ GP_SAVE, (33 * 4) +.equ OAM_UPDATED, (33 * 4) +.equ GP_SAVE, (34 * 4) .equ SPSR_BASE, (0x900) .equ REGMODE_BASE, (0x900 + 24) diff --git a/video.c b/video.c index 23cd368..4221f25 100644 --- a/video.c +++ b/video.c @@ -4429,10 +4429,10 @@ void update_scanline(void) // If OAM has been modified since the last scanline has been updated then // reorder and reprofile the OBJ lists. - if(oam_update) + if(reg[OAM_UPDATED]) { order_obj(video_mode); - oam_update = 0; + reg[OAM_UPDATED] = 0; } order_layers((dispcnt >> 8) & active_layers[video_mode]); diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 9dd3fdd..948572c 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -28,7 +28,6 @@ _##symbol: #ifndef _WIN32 # External symbols (data + functions) -#define _oam_update oam_update #define _iwram iwram #define _ewram ewram #define _vram vram @@ -50,7 +49,6 @@ _##symbol: #define _execute_store_cpsr_body execute_store_cpsr_body #endif -.global _oam_update .global _iwram .global _ewram .global _vram @@ -75,6 +73,7 @@ _##symbol: .equ CPU_HALT_STATE, (30 * 4) .equ CHANGED_PC_STATUS, (31 * 4) .equ COMPLETED_FRAME, (32 * 4) +.equ OAM_UPDATED, (33 * 4) # destroys ecx and edx @@ -241,7 +240,7 @@ ext_store_vram8b: ret ext_store_oam8: - movl $1, _oam_update # flag OAM update + movl $1, OAM_UPDATED(%ebx) # flag OAM update and $0x3FE, %eax # wrap around address and align to 16bits mov %dl, %dh # copy lower 8bits of value into full 16bits mov %dx, _oam_ram(%eax) # perform 16bit store @@ -332,7 +331,7 @@ ext_store_vram16b: ret ext_store_oam16: - movl $1, _oam_update # flag OAM update + movl $1, OAM_UPDATED(%ebx) # flag OAM update and $0x3FF, %eax # wrap around address mov %dx, _oam_ram(%eax) # perform 16bit store ret @@ -410,7 +409,7 @@ ext_store_vram32b: ret ext_store_oam32: - movl $1, _oam_update # flag OAM update + movl $1, OAM_UPDATED(%ebx) # flag OAM update and $0x3FF, %eax # wrap around address mov %edx, _oam_ram(%eax) # perform 32bit store ret -- cgit v1.2.3 From 7ea6c5e247a742af6f7acfbf215c23264410451f Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Thu, 25 Mar 2021 23:01:20 +0100 Subject: Move OAM RAM to stubs also Makes accesses more efficient for MIPS. Make accesses also fast for palette reads. --- Makefile | 9 +++++++++ arm/arm_stub.S | 2 ++ cpu.c | 1 + gba_memory.c | 1 - psp/mips_emit.h | 50 ++++++++++++++++++++++++++++---------------------- psp/mips_stub.S | 10 +++++++--- x86/x86_stub.S | 2 ++ 7 files changed, 49 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index a5f2649..dfc7c37 100644 --- a/Makefile +++ b/Makefile @@ -362,6 +362,15 @@ else ifneq (,$(findstring armv,$(platform))) endif LDFLAGS := -Wl,--no-undefined +# MIPS +else ifeq ($(platform), mips32) + TARGET := $(TARGET_NAME)_libretro.so + SHARED := -shared -nostdlib -Wl,--version-script=link.T + fpic := -fPIC -DPIC + CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float + HAVE_DYNAREC := 1 + CPU_ARCH := mips + # emscripten else ifeq ($(platform), emscripten) TARGET := $(TARGET_NAME)_libretro_$(platform).bc diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 8e6cc9b..374daba 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -820,6 +820,8 @@ defsymbl(palette_ram) .space 0x400 defsymbl(palette_ram_converted) .space 0x400 +defsymbl(oam_ram) + .space 0x400 defsymbl(spsr) .space 24 defsymbl(reg_mode) diff --git a/cpu.c b/cpu.c index ea0d69e..badb9c2 100644 --- a/cpu.c +++ b/cpu.c @@ -1630,6 +1630,7 @@ void raise_interrupt(irq_type irq_raised) #ifndef HAVE_DYNAREC u8 *memory_map_read [8 * 1024]; +u16 oam_ram[512]; u16 palette_ram[512]; u16 palette_ram_converted[512]; #endif diff --git a/gba_memory.c b/gba_memory.c index e647304..b66dce7 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -305,7 +305,6 @@ u32 gamepak_waitstate_sequential[2][3][3] = } }; -u16 oam_ram[512]; u16 io_registers[1024 * 16]; u8 ewram[1024 * 256 * 2]; u8 iwram[1024 * 32 * 2]; diff --git a/psp/mips_emit.h b/psp/mips_emit.h index d24c174..818b724 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2630,6 +2630,7 @@ typedef struct { bool check_smc; // Whether the memory can contain code bool bus16; // Whether it can only be accessed at 16bit u32 baseptr; // Memory base address. + u32 baseoff; // Offset from base_reg } t_stub_meminfo; // Generates the stub to access memory for a given region, access type, @@ -2738,7 +2739,11 @@ static void emit_pmemld_stub( } else { // Generate upper bits of the addr and do addr mirroring // (The address hi16 is rounded up since load uses signed offset) - mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); + if (!meminfo->baseoff) { + mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); + } else { + base_addr = meminfo->baseoff; + } if (region == 2) { // Can't do EWRAM with an `andi` instruction (18 bits mask) @@ -2761,8 +2766,9 @@ static void emit_pmemld_stub( mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset } else { // Generate regular (<=32KB) mirroring - mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring) - mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr + mips_reg_number breg = (meminfo->baseoff ? reg_base : reg_rv); + mips_emit_andi(reg_temp, reg_a0, memmask); // Clear upper bits (mirroring) + mips_emit_addu(reg_rv, breg, reg_temp); // Adds to base addr } } @@ -3154,7 +3160,7 @@ static void emit_phand( mips_emit_ins(reg_temp, reg_a0, 6, size); // Alignment bits (1 or 2, to bits 6 (and 7) } - unsigned tbloff = 256 + 2048 + 220 + 4 * toff; // Skip regs and palettes + unsigned tbloff = 256 + 3*1024 + 220 + 4 * toff; // Skip regs and RAMs mips_emit_addu(reg_rv, reg_temp, reg_base); // Add to the base_reg the table offset mips_emit_lw(reg_rv, reg_rv, tbloff); // Read addr from table mips_emit_sll(reg_temp, reg_rv, 4); // 26 bit immediate to the MSB @@ -3229,21 +3235,21 @@ void init_emitter() { // Generate memory handlers const t_stub_meminfo ldinfo [] = { - { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom }, + { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom, 0}, // 1 Open load / Ignore store - { emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram }, // memsize wrong on purpose - { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, - { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers }, - { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram }, - { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case - { emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram }, - { emit_pmemld_stub, 8, 0x8000, false, false, 0 }, - { emit_pmemld_stub, 9, 0x8000, false, false, 0 }, - { emit_pmemld_stub, 10, 0x8000, false, false, 0 }, - { emit_pmemld_stub, 11, 0x8000, false, false, 0 }, - { emit_pmemld_stub, 12, 0x8000, false, false, 0 }, + { emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram, 0 }, // memsize wrong on purpose + { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000], 0 }, + { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers, 0 }, + { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram, 0x100 }, + { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram, 0 }, // same, vram is a special case + { emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram, 0x900 }, + { emit_pmemld_stub, 8, 0x8000, false, false, 0, 0 }, + { emit_pmemld_stub, 9, 0x8000, false, false, 0, 0 }, + { emit_pmemld_stub, 10, 0x8000, false, false, 0, 0 }, + { emit_pmemld_stub, 11, 0x8000, false, false, 0, 0 }, + { emit_pmemld_stub, 12, 0x8000, false, false, 0, 0 }, // 13 is EEPROM mapped already (a bit special) - { emit_pmemld_stub, 14, 0, false, false, 0 }, // Mapped via function call + { emit_pmemld_stub, 14, 0, false, false, 0, 0 }, // Mapped via function call // 15 Open load / Ignore store }; @@ -3267,12 +3273,12 @@ void init_emitter() { } const t_stub_meminfo stinfo [] = { - { emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram }, - { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, + { emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram, 0 }, + { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000], 0 }, // I/O is special and mapped with a function call - { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram }, - { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case - { emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram }, + { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram, 0x100 }, + { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram, 0 }, // same, vram is a special case + { emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram, 0x900 }, }; // Store only for "regular"-ish mem regions diff --git a/psp/mips_stub.S b/psp/mips_stub.S index cc3a220..1c4ad4b 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -40,6 +40,7 @@ .global reg_check .global palette_ram .global palette_ram_converted +.global oam_ram .global init_emitter .global mips_lookup_pc .global smc_write @@ -120,11 +121,12 @@ .equ OAM_UPDATED, (33 * 4) .equ GP_SAVE, (34 * 4) -.equ SPSR_BASE, (0x900) -.equ REGMODE_BASE, (0x900 + 24) +.equ SPSR_BASE, (0x100 + 0x400 * 3) +.equ REGMODE_BASE, (SPSR_BASE + 24) .equ SUPERVISOR_SPSR, (3 * 4 + SPSR_BASE) .equ SUPERVISOR_LR, ((3 * (7 * 4)) + (6 * 4) + REGMODE_BASE) -.equ FNPTRS_BASE, (0x900 + 220 + 960) +.equ FNPTRS_MEMOPS, (REGMODE_BASE + 196) +.equ FNPTRS_BASE, (FNPTRS_MEMOPS + 960) .set noat .set noreorder @@ -625,6 +627,8 @@ palette_ram: .space 0x400 palette_ram_converted: .space 0x400 +oam_ram: + .space 0x400 spsr: .space 24 # u32[6] reg_mode: diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 948572c..333c8fd 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -538,6 +538,8 @@ defsymbl(palette_ram) .space 0x400 defsymbl(palette_ram_converted) .space 0x400 +defsymbl(oam_ram) + .space 0x400 defsymbl(spsr) .space 24 defsymbl(reg_mode) -- cgit v1.2.3 From d284c868e9e23fb210b8c448cdace39f394cb895 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 26 Mar 2021 13:00:08 +0100 Subject: Improve ARM store accesses --- arm/arm_stub.S | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 374daba..1db913e 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -559,7 +559,7 @@ ptr_tbl_##store_type: ;\ .word ext_store_u##store_type /* 0x04: I/O regs */;\ .word ext_store_u##store_type /* 0x05: palette RAM */;\ .word ext_store_vram_u##store_type /* 0x06: vram */;\ - .word ext_store_u##store_type /* 0x07: oam ram */;\ + .word ext_store_oam_ram_u##store_type /* 0x07: oam ram */;\ .word ext_store_u##store_type /* 0x08: gamepak: ignore */;\ .word ext_store_u##store_type /* 0x09: gamepak: ignore */;\ .word ext_store_u##store_type /* 0x0A: gamepak: ignore */;\ @@ -624,6 +624,15 @@ ext_store_vram_u##store_type: ;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ ;\ +ext_store_oam_ram_u##store_type: ;\ + mask_addr_bus16_##store_type(10) /* Mask to mirror memory (+align)*/;\ + add r2, reg_base, #256 /* r2 = oam ram base */;\ + store_op r1, [r0, r2] /* store data */;\ + str r2, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ 3: ;\ ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ ldr r0, [lr] /* load PC */;\ @@ -671,6 +680,14 @@ ext_store_vram_u32_safe: restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return +ext_store_oam_ram_u32_safe: + mask_addr_8(10) @ Mask to mirror memory (no need to align!) + add r2, reg_base, #256 @ r2 = oam ram base + str r1, [r0, r2] @ store data + str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return + write_epilogue: cmp r0, #0 @ check if the write rose an alert beq 4f @ if not we can exit @@ -820,8 +837,6 @@ defsymbl(palette_ram) .space 0x400 defsymbl(palette_ram_converted) .space 0x400 -defsymbl(oam_ram) - .space 0x400 defsymbl(spsr) .space 24 defsymbl(reg_mode) @@ -829,6 +844,8 @@ defsymbl(reg_mode) defsymbl(reg) .space 0x100, 0 +defsymbl(oam_ram) + .space 0x400 @ Vita and 3DS (and of course mmap) map their own cache sections through some @ platform-speficic mechanisms. -- cgit v1.2.3 From 452ba76ba898c5fc6d176ae8f8e2d77cf15f64a2 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 26 Mar 2021 13:25:50 +0100 Subject: Fix 16 bit RAM stores (VRAM and OAM) in ARM --- arm/arm_stub.S | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 1db913e..5917e82 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -538,7 +538,7 @@ return_to_main: @ The instruction at LR is not an inst but a u32 data that contains the PC @ Used for SMC. That's why return is essentially `pc = lr + 4` -#define execute_store_body(store_type, store_op) ;\ +#define execute_store_body(store_type) ;\ save_flags() ;\ str lr, [reg_base, #REG_SAVE3] /* save lr */;\ str r4, [reg_base, #REG_SAVE2] /* save r4 */;\ @@ -576,11 +576,11 @@ ext_store_ignore: add pc, lr, #4 @ return -#define execute_store_builder(store_type, store_op, load_op) ;\ +#define execute_store_builder(store_type, store_op, store_op16, load_op) ;\ ;\ .align 2 ;\ defsymbl(execute_store_u##store_type) ;\ - execute_store_body(store_type, store_op) ;\ + execute_store_body(store_type) ;\ ;\ ext_store_u##store_type: ;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -619,7 +619,7 @@ ext_store_vram_u##store_type: ;\ cmp r0, #0x18000 /* Check if exceeds 96KB */;\ subcs r0, r0, #0x8000 /* Mirror to the last bank */;\ ldr r2, =(vram) /* r2 = vram base */;\ - store_op r1, [r0, r2] /* store data */;\ + store_op16 r1, [r0, r2] /* store data */;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ @@ -627,7 +627,7 @@ ext_store_vram_u##store_type: ;\ ext_store_oam_ram_u##store_type: ;\ mask_addr_bus16_##store_type(10) /* Mask to mirror memory (+align)*/;\ add r2, reg_base, #256 /* r2 = oam ram base */;\ - store_op r1, [r0, r2] /* store data */;\ + store_op16 r1, [r0, r2] /* store data */;\ str r2, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ restore_flags() ;\ @@ -640,14 +640,14 @@ ext_store_oam_ram_u##store_type: ;\ b smc_write /* perform smc write */;\ -execute_store_builder(8, strb, ldrb) -execute_store_builder(16, strh, ldrh) -execute_store_builder(32, str, ldr) +execute_store_builder(8, strb, strh, ldrb) +execute_store_builder(16, strh, strh, ldrh) +execute_store_builder(32, str, str, ldr) @ This is a store that is executed in a strm case (so no SMC checks in-between) defsymbl(execute_store_u32_safe) - execute_store_body(32_safe, str) + execute_store_body(32_safe) restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return -- cgit v1.2.3 From fd2079354572372f4516fcc13c51992ef4b4c715 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Sat, 27 Mar 2021 15:09:01 +0100 Subject: Add CROSS_COMPILE rules --- Makefile | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index dfc7c37..4a5806d 100644 --- a/Makefile +++ b/Makefile @@ -104,10 +104,18 @@ else ifeq ($(platform), osx) ifeq ($(HAVE_DYNAREC),1) HAVE_MMAP = 1 endif - ifndef ($(NOUNIVERSAL)) - CFLAGS += $(ARCHFLAGS) - LDFLAGS += $(ARCHFLAGS) - endif + + ifeq ($(CROSS_COMPILE),1) + TARGET_RULE = -target $(LIBRETRO_APPLE_PLATFORM) -isysroot $(LIBRETRO_APPLE_ISYSROOT) + CFLAGS += $(TARGET_RULE) + CPPFLAGS += $(TARGET_RULE) + CXXFLAGS += $(TARGET_RULE) + LDFLAGS += $(TARGET_RULE) + endif + + CFLAGS += $(ARCHFLAGS) + CXXFLAGS += $(ARCHFLAGS) + LDFLAGS += $(ARCHFLAGS) # iOS else ifneq (,$(findstring ios,$(platform))) -- cgit v1.2.3