summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAutechre2021-03-27 00:31:51 +0100
committerGitHub2021-03-27 00:31:51 +0100
commit08d2fa1ebe9e259da228dfa5e5086bd5b62c4347 (patch)
tree73d1a6eba45d5ed80a698b254332c8e9de2b7bd2
parent53cc4a2475ebc8cc510dc97fe8db95939230cee9 (diff)
parent452ba76ba898c5fc6d176ae8f8e2d77cf15f64a2 (diff)
downloadpicogpsp-08d2fa1ebe9e259da228dfa5e5086bd5b62c4347.tar.gz
picogpsp-08d2fa1ebe9e259da228dfa5e5086bd5b62c4347.tar.bz2
picogpsp-08d2fa1ebe9e259da228dfa5e5086bd5b62c4347.zip
Merge pull request #120 from davidgfnet/master
Fixes and improvements for MIPS and ARM
-rw-r--r--Makefile9
-rw-r--r--arm/arm_stub.S41
-rw-r--r--cpu.c1
-rw-r--r--cpu.h4
-rw-r--r--gba_memory.c16
-rw-r--r--main.c2
-rw-r--r--psp/mips_emit.h58
-rw-r--r--psp/mips_stub.S14
-rw-r--r--video.c4
-rw-r--r--x86/x86_stub.S11
10 files changed, 98 insertions, 62 deletions
diff --git a/Makefile b/Makefile
index a5f2649..dfc7c37 100644
--- a/Makefile
+++ b/Makefile
@@ -362,6 +362,15 @@ else ifneq (,$(findstring armv,$(platform)))
endif
LDFLAGS := -Wl,--no-undefined
+# MIPS
+else ifeq ($(platform), mips32)
+ TARGET := $(TARGET_NAME)_libretro.so
+ SHARED := -shared -nostdlib -Wl,--version-script=link.T
+ fpic := -fPIC -DPIC
+ CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float
+ HAVE_DYNAREC := 1
+ CPU_ARCH := mips
+
# emscripten
else ifeq ($(platform), emscripten)
TARGET := $(TARGET_NAME)_libretro_$(platform).bc
diff --git a/arm/arm_stub.S b/arm/arm_stub.S
index f0b7f52..5917e82 100644
--- a/arm/arm_stub.S
+++ b/arm/arm_stub.S
@@ -43,8 +43,8 @@ _##symbol:
#define CPU_HALT_STATE (30 * 4)
#define CHANGED_PC_STATUS (31 * 4)
#define COMPLETED_FRAME (32 * 4)
-
-#define MAIN_THREAD_SP (33 * 4)
+#define OAM_UPDATED (33 * 4)
+#define MAIN_THREAD_SP (34 * 4)
#define reg_a0 r0
#define reg_a1 r1
@@ -538,7 +538,7 @@ return_to_main:
@ The instruction at LR is not an inst but a u32 data that contains the PC
@ Used for SMC. That's why return is essentially `pc = lr + 4`
-#define execute_store_body(store_type, store_op) ;\
+#define execute_store_body(store_type) ;\
save_flags() ;\
str lr, [reg_base, #REG_SAVE3] /* save lr */;\
str r4, [reg_base, #REG_SAVE2] /* save r4 */;\
@@ -559,7 +559,7 @@ ptr_tbl_##store_type: ;\
.word ext_store_u##store_type /* 0x04: I/O regs */;\
.word ext_store_u##store_type /* 0x05: palette RAM */;\
.word ext_store_vram_u##store_type /* 0x06: vram */;\
- .word ext_store_u##store_type /* 0x07: oam ram */;\
+ .word ext_store_oam_ram_u##store_type /* 0x07: oam ram */;\
.word ext_store_u##store_type /* 0x08: gamepak: ignore */;\
.word ext_store_u##store_type /* 0x09: gamepak: ignore */;\
.word ext_store_u##store_type /* 0x0A: gamepak: ignore */;\
@@ -576,11 +576,11 @@ ext_store_ignore:
add pc, lr, #4 @ return
-#define execute_store_builder(store_type, store_op, load_op) ;\
+#define execute_store_builder(store_type, store_op, store_op16, load_op) ;\
;\
.align 2 ;\
defsymbl(execute_store_u##store_type) ;\
- execute_store_body(store_type, store_op) ;\
+ execute_store_body(store_type) ;\
;\
ext_store_u##store_type: ;\
ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\
@@ -619,7 +619,16 @@ ext_store_vram_u##store_type: ;\
cmp r0, #0x18000 /* Check if exceeds 96KB */;\
subcs r0, r0, #0x8000 /* Mirror to the last bank */;\
ldr r2, =(vram) /* r2 = vram base */;\
- store_op r1, [r0, r2] /* store data */;\
+ store_op16 r1, [r0, r2] /* store data */;\
+ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\
+ restore_flags() ;\
+ add pc, lr, #4 /* return */;\
+ ;\
+ext_store_oam_ram_u##store_type: ;\
+ mask_addr_bus16_##store_type(10) /* Mask to mirror memory (+align)*/;\
+ add r2, reg_base, #256 /* r2 = oam ram base */;\
+ store_op16 r1, [r0, r2] /* store data */;\
+ str r2, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\
ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\
restore_flags() ;\
add pc, lr, #4 /* return */;\
@@ -631,14 +640,14 @@ ext_store_vram_u##store_type: ;\
b smc_write /* perform smc write */;\
-execute_store_builder(8, strb, ldrb)
-execute_store_builder(16, strh, ldrh)
-execute_store_builder(32, str, ldr)
+execute_store_builder(8, strb, strh, ldrb)
+execute_store_builder(16, strh, strh, ldrh)
+execute_store_builder(32, str, str, ldr)
@ This is a store that is executed in a strm case (so no SMC checks in-between)
defsymbl(execute_store_u32_safe)
- execute_store_body(32_safe, str)
+ execute_store_body(32_safe)
restore_flags()
ldr pc, [reg_base, #REG_SAVE3] @ return
@@ -671,6 +680,14 @@ ext_store_vram_u32_safe:
restore_flags()
ldr pc, [reg_base, #REG_SAVE3] @ return
+ext_store_oam_ram_u32_safe:
+ mask_addr_8(10) @ Mask to mirror memory (no need to align!)
+ add r2, reg_base, #256 @ r2 = oam ram base
+ str r1, [r0, r2] @ store data
+ str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here
+ restore_flags()
+ ldr pc, [reg_base, #REG_SAVE3] @ return
+
write_epilogue:
cmp r0, #0 @ check if the write rose an alert
beq 4f @ if not we can exit
@@ -827,6 +844,8 @@ defsymbl(reg_mode)
defsymbl(reg)
.space 0x100, 0
+defsymbl(oam_ram)
+ .space 0x400
@ Vita and 3DS (and of course mmap) map their own cache sections through some
@ platform-speficic mechanisms.
diff --git a/cpu.c b/cpu.c
index ea0d69e..badb9c2 100644
--- a/cpu.c
+++ b/cpu.c
@@ -1630,6 +1630,7 @@ void raise_interrupt(irq_type irq_raised)
#ifndef HAVE_DYNAREC
u8 *memory_map_read [8 * 1024];
+u16 oam_ram[512];
u16 palette_ram[512];
u16 palette_ram_converted[512];
#endif
diff --git a/cpu.h b/cpu.h
index fc57626..2b250ca 100644
--- a/cpu.h
+++ b/cpu.h
@@ -85,7 +85,8 @@ typedef enum
CPU_MODE = 29,
CPU_HALT_STATE = 30,
CHANGED_PC_STATUS = 31,
- COMPLETED_FRAME = 32
+ COMPLETED_FRAME = 32,
+ OAM_UPDATED = 33
} ext_reg_numbers;
typedef enum
@@ -146,7 +147,6 @@ extern u8 *ram_translation_ptr;
extern u32 idle_loop_target_pc;
extern u32 iwram_stack_optimize;
-extern u32 direct_map_vram;
extern u32 translation_gate_targets;
extern u32 translation_gate_target_pc[MAX_TRANSLATION_GATES];
diff --git a/gba_memory.c b/gba_memory.c
index a51f183..b66dce7 100644
--- a/gba_memory.c
+++ b/gba_memory.c
@@ -305,7 +305,6 @@ u32 gamepak_waitstate_sequential[2][3][3] =
}
};
-u16 oam_ram[512];
u16 io_registers[1024 * 16];
u8 ewram[1024 * 256 * 2];
u8 iwram[1024 * 32 * 2];
@@ -342,14 +341,9 @@ gamepak_swap_entry_type *gamepak_memory_map;
// a lot.
FILE *gamepak_file_large = NULL;
-u32 direct_map_vram = 0;
-
// Writes to these respective locations should trigger an update
// so the related subsystem may react to it.
-// If OAM is written to:
-u32 oam_update = 1;
-
// If GBC audio is written to:
u32 gbc_sound_update = 0;
@@ -755,7 +749,7 @@ cpu_alert_type function_cc write_io_register8(u32 address, u32 value)
u32 dispcnt = io_registers[REG_DISPCNT];
if((value & 0x07) != (dispcnt & 0x07))
- oam_update = 1;
+ reg[OAM_UPDATED] = 1;
address8(io_registers, 0x00) = value;
break;
@@ -1171,7 +1165,7 @@ cpu_alert_type function_cc write_io_register16(u32 address, u32 value)
{
u32 dispcnt = io_registers[REG_DISPCNT];
if((value & 0x07) != (dispcnt & 0x07))
- oam_update = 1;
+ reg[OAM_UPDATED] = 1;
address16(io_registers, 0x00) = value;
break;
@@ -1934,7 +1928,7 @@ void function_cc write_rtc(u32 address, u32 value)
\
case 0x07: \
/* OAM RAM */ \
- oam_update = 1; \
+ reg[OAM_UPDATED] = 1; \
address##type(oam_ram, address & 0x3FF) = value; \
break; \
\
@@ -2529,7 +2523,7 @@ dma_region_type dma_region_map[16] =
dma_smc_vars_##type()
#define dma_oam_ram_dest() \
- oam_update = 1 \
+ reg[OAM_UPDATED] = 1 \
#define dma_vars_oam_ram(type) \
dma_oam_ram_##type() \
@@ -3331,7 +3325,7 @@ void gba_load_state(const void* src)
wipe_caches();
#endif
- oam_update = 1;
+ reg[OAM_UPDATED] = 1;
gbc_sound_update = 1;
for(i = 0; i < 512; i++)
diff --git a/main.c b/main.c
index 73371e4..2a82338 100644
--- a/main.c
+++ b/main.c
@@ -158,7 +158,7 @@ u32 update_gba(void)
if((dispstat & 0x01) == 0)
{
u32 i;
- if(oam_update)
+ if(reg[OAM_UPDATED])
oam_update_count++;
if(no_alpha)
diff --git a/psp/mips_emit.h b/psp/mips_emit.h
index b996f2b..818b724 100644
--- a/psp/mips_emit.h
+++ b/psp/mips_emit.h
@@ -2512,7 +2512,8 @@ u8 swi_hle_handle[256] =
#define ReOff_SaveR1 (21*4) // 3 save scratch regs
#define ReOff_SaveR2 (22*4)
#define ReOff_SaveR3 (23*4)
-#define ReOff_GP_Save (32*4) // GP_SAVE
+#define ReOff_OamUpd (33*4) // OAM_UPDATED
+#define ReOff_GP_Save (34*4) // GP_SAVE
// Saves all regs to their right slot and loads gp
#define emit_save_regs(save_a2) { \
@@ -2629,6 +2630,7 @@ typedef struct {
bool check_smc; // Whether the memory can contain code
bool bus16; // Whether it can only be accessed at 16bit
u32 baseptr; // Memory base address.
+ u32 baseoff; // Offset from base_reg
} t_stub_meminfo;
// Generates the stub to access memory for a given region, access type,
@@ -2737,7 +2739,11 @@ static void emit_pmemld_stub(
} else {
// Generate upper bits of the addr and do addr mirroring
// (The address hi16 is rounded up since load uses signed offset)
- mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16));
+ if (!meminfo->baseoff) {
+ mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16));
+ } else {
+ base_addr = meminfo->baseoff;
+ }
if (region == 2) {
// Can't do EWRAM with an `andi` instruction (18 bits mask)
@@ -2760,8 +2766,9 @@ static void emit_pmemld_stub(
mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset
} else {
// Generate regular (<=32KB) mirroring
- mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring)
- mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr
+ mips_reg_number breg = (meminfo->baseoff ? reg_base : reg_rv);
+ mips_emit_andi(reg_temp, reg_a0, memmask); // Clear upper bits (mirroring)
+ mips_emit_addu(reg_rv, breg, reg_temp); // Adds to base addr
}
}
@@ -2873,9 +2880,8 @@ static void emit_pmemst_stub(
// Post processing store:
// Signal that OAM was updated
if (region == 7) {
- u32 palcaddr = (u32)&oam_update;
- mips_emit_lui(reg_temp, ((palcaddr + 0x8000) >> 16));
- mips_emit_sw(reg_base, reg_temp, palcaddr & 0xffff); // Write any nonzero data
+ // Write any nonzero data
+ mips_emit_sw(reg_base, reg_base, ReOff_OamUpd);
generate_function_return_swap_delay();
}
else {
@@ -3154,7 +3160,7 @@ static void emit_phand(
mips_emit_ins(reg_temp, reg_a0, 6, size); // Alignment bits (1 or 2, to bits 6 (and 7)
}
- unsigned tbloff = 256 + 2048 + 220 + 4 * toff; // Skip regs and palettes
+ unsigned tbloff = 256 + 3*1024 + 220 + 4 * toff; // Skip regs and RAMs
mips_emit_addu(reg_rv, reg_temp, reg_base); // Add to the base_reg the table offset
mips_emit_lw(reg_rv, reg_rv, tbloff); // Read addr from table
mips_emit_sll(reg_temp, reg_rv, 4); // 26 bit immediate to the MSB
@@ -3229,21 +3235,21 @@ void init_emitter() {
// Generate memory handlers
const t_stub_meminfo ldinfo [] = {
- { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom },
+ { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom, 0},
// 1 Open load / Ignore store
- { emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram }, // memsize wrong on purpose
- { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] },
- { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers },
- { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram },
- { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case
- { emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram },
- { emit_pmemld_stub, 8, 0x8000, false, false, 0 },
- { emit_pmemld_stub, 9, 0x8000, false, false, 0 },
- { emit_pmemld_stub, 10, 0x8000, false, false, 0 },
- { emit_pmemld_stub, 11, 0x8000, false, false, 0 },
- { emit_pmemld_stub, 12, 0x8000, false, false, 0 },
+ { emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram, 0 }, // memsize wrong on purpose
+ { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000], 0 },
+ { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers, 0 },
+ { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram, 0x100 },
+ { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram, 0 }, // same, vram is a special case
+ { emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram, 0x900 },
+ { emit_pmemld_stub, 8, 0x8000, false, false, 0, 0 },
+ { emit_pmemld_stub, 9, 0x8000, false, false, 0, 0 },
+ { emit_pmemld_stub, 10, 0x8000, false, false, 0, 0 },
+ { emit_pmemld_stub, 11, 0x8000, false, false, 0, 0 },
+ { emit_pmemld_stub, 12, 0x8000, false, false, 0, 0 },
// 13 is EEPROM mapped already (a bit special)
- { emit_pmemld_stub, 14, 0, false, false, 0 }, // Mapped via function call
+ { emit_pmemld_stub, 14, 0, false, false, 0, 0 }, // Mapped via function call
// 15 Open load / Ignore store
};
@@ -3267,12 +3273,12 @@ void init_emitter() {
}
const t_stub_meminfo stinfo [] = {
- { emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram },
- { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] },
+ { emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram, 0 },
+ { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000], 0 },
// I/O is special and mapped with a function call
- { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram },
- { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case
- { emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram },
+ { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram, 0x100 },
+ { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram, 0 }, // same, vram is a special case
+ { emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram, 0x900 },
};
// Store only for "regular"-ish mem regions
diff --git a/psp/mips_stub.S b/psp/mips_stub.S
index 3d046d8..1c4ad4b 100644
--- a/psp/mips_stub.S
+++ b/psp/mips_stub.S
@@ -40,6 +40,7 @@
.global reg_check
.global palette_ram
.global palette_ram_converted
+.global oam_ram
.global init_emitter
.global mips_lookup_pc
.global smc_write
@@ -52,6 +53,7 @@
.global reg
.global spsr
.global reg_mode
+.global oam_update
# MIPS register layout:
@@ -116,13 +118,15 @@
.equ CPU_HALT_STATE, (30 * 4)
.equ CHANGED_PC_STATUS, (31 * 4)
.equ COMPLETED_FRAME, (32 * 4)
-.equ GP_SAVE, (33 * 4)
+.equ OAM_UPDATED, (33 * 4)
+.equ GP_SAVE, (34 * 4)
-.equ SPSR_BASE, (0x900)
-.equ REGMODE_BASE, (0x900 + 24)
+.equ SPSR_BASE, (0x100 + 0x400 * 3)
+.equ REGMODE_BASE, (SPSR_BASE + 24)
.equ SUPERVISOR_SPSR, (3 * 4 + SPSR_BASE)
.equ SUPERVISOR_LR, ((3 * (7 * 4)) + (6 * 4) + REGMODE_BASE)
-.equ FNPTRS_BASE, (0x900 + 220 + 960)
+.equ FNPTRS_MEMOPS, (REGMODE_BASE + 196)
+.equ FNPTRS_BASE, (FNPTRS_MEMOPS + 960)
.set noat
.set noreorder
@@ -623,6 +627,8 @@ palette_ram:
.space 0x400
palette_ram_converted:
.space 0x400
+oam_ram:
+ .space 0x400
spsr:
.space 24 # u32[6]
reg_mode:
diff --git a/video.c b/video.c
index 23cd368..4221f25 100644
--- a/video.c
+++ b/video.c
@@ -4429,10 +4429,10 @@ void update_scanline(void)
// If OAM has been modified since the last scanline has been updated then
// reorder and reprofile the OBJ lists.
- if(oam_update)
+ if(reg[OAM_UPDATED])
{
order_obj(video_mode);
- oam_update = 0;
+ reg[OAM_UPDATED] = 0;
}
order_layers((dispcnt >> 8) & active_layers[video_mode]);
diff --git a/x86/x86_stub.S b/x86/x86_stub.S
index 9dd3fdd..333c8fd 100644
--- a/x86/x86_stub.S
+++ b/x86/x86_stub.S
@@ -28,7 +28,6 @@ _##symbol:
#ifndef _WIN32
# External symbols (data + functions)
-#define _oam_update oam_update
#define _iwram iwram
#define _ewram ewram
#define _vram vram
@@ -50,7 +49,6 @@ _##symbol:
#define _execute_store_cpsr_body execute_store_cpsr_body
#endif
-.global _oam_update
.global _iwram
.global _ewram
.global _vram
@@ -75,6 +73,7 @@ _##symbol:
.equ CPU_HALT_STATE, (30 * 4)
.equ CHANGED_PC_STATUS, (31 * 4)
.equ COMPLETED_FRAME, (32 * 4)
+.equ OAM_UPDATED, (33 * 4)
# destroys ecx and edx
@@ -241,7 +240,7 @@ ext_store_vram8b:
ret
ext_store_oam8:
- movl $1, _oam_update # flag OAM update
+ movl $1, OAM_UPDATED(%ebx) # flag OAM update
and $0x3FE, %eax # wrap around address and align to 16bits
mov %dl, %dh # copy lower 8bits of value into full 16bits
mov %dx, _oam_ram(%eax) # perform 16bit store
@@ -332,7 +331,7 @@ ext_store_vram16b:
ret
ext_store_oam16:
- movl $1, _oam_update # flag OAM update
+ movl $1, OAM_UPDATED(%ebx) # flag OAM update
and $0x3FF, %eax # wrap around address
mov %dx, _oam_ram(%eax) # perform 16bit store
ret
@@ -410,7 +409,7 @@ ext_store_vram32b:
ret
ext_store_oam32:
- movl $1, _oam_update # flag OAM update
+ movl $1, OAM_UPDATED(%ebx) # flag OAM update
and $0x3FF, %eax # wrap around address
mov %edx, _oam_ram(%eax) # perform 32bit store
ret
@@ -539,6 +538,8 @@ defsymbl(palette_ram)
.space 0x400
defsymbl(palette_ram_converted)
.space 0x400
+defsymbl(oam_ram)
+ .space 0x400
defsymbl(spsr)
.space 24
defsymbl(reg_mode)