From 56dc6ecb70e6fc76d32d6a7194acb273b76bfe0e Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Mon, 8 Mar 2021 18:44:03 +0100 Subject: Remove libco This removes libco and all the usages of it (+pthreads). Rewired all dynarecs and interpreter to return after every frame so that libretro can process events. This required to make dynarec re-entrant. Dynarecs were updated to check for new frame on every update (IRQ, cycle exhaustion, I/O write, etc). The performance impact of doing so should be minimal (and definitely outweight the libco gains). While at it, fixed small issues to get a bit more perf: arm dynarec was not idling correctly, mips was using stack when not needed, etc. Tested on PSP (mips), OGA (armv7), Linux (x86 and interpreter). Not tested on Android though. --- Makefile | 11 -- Makefile.common | 6 - arm/arm_stub.S | 65 +++++++-- common.h | 2 - cpu.c | 47 ++++--- cpu.h | 3 +- gba_memory.h | 3 +- jni/Android.mk | 3 +- libco.h | 37 ----- libco/aarch64.c | 123 ---------------- libco/amd64.c | 161 --------------------- libco/armeabi.c | 95 ------------- libco/fiber.c | 58 -------- libco/libco.c | 27 ---- libco/ppc.c | 407 ----------------------------------------------------- libco/psp1.c | 45 ------ libco/psp2.c | 116 --------------- libco/scefiber.c | 96 ------------- libco/sjlj.c | 115 --------------- libco/ucontext.c | 81 ----------- libco/x86.c | 117 --------------- libretro.c | 87 +----------- main.c | 7 +- psp/mips_stub.S | 90 ++++++++---- retro_emu_thread.c | 175 ----------------------- retro_emu_thread.h | 51 ------- x86/x86_stub.S | 44 +++++- 27 files changed, 196 insertions(+), 1876 deletions(-) delete mode 100644 libco.h delete mode 100644 libco/aarch64.c delete mode 100644 libco/amd64.c delete mode 100644 libco/armeabi.c delete mode 100644 libco/fiber.c delete mode 100644 libco/libco.c delete mode 100644 libco/ppc.c delete mode 100644 libco/psp1.c delete mode 100644 libco/psp2.c delete mode 100644 libco/scefiber.c delete mode 100644 libco/sjlj.c delete mode 100644 libco/ucontext.c delete mode 100644 libco/x86.c delete mode 100644 retro_emu_thread.c delete mode 100644 retro_emu_thread.h diff --git a/Makefile b/Makefile index e846b3a..fa6ae4f 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,6 @@ FRONTEND_SUPPORTS_RGB565=1 FORCE_32BIT_ARCH=0 HAVE_MMAP=0 HAVE_MMAP_WIN32=0 -USE_LIBCO=1 UNAME=$(shell uname -a) @@ -391,7 +390,6 @@ else ifeq ($(platform), gcw0) SHARED := -shared -nostdlib -Wl,--version-script=link.T fpic := -fPIC CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float - USE_LIBCO = 0 # Windows else @@ -430,12 +428,6 @@ OBJECTS := $(SOURCES_C:.c=.o) $(SOURCES_ASM:.S=.o) DEFINES := -DHAVE_STRINGS_H -DHAVE_STDINT_H -DHAVE_INTTYPES_H -D__LIBRETRO__ -DINLINE=inline -Wall -ifeq ($(USE_LIBCO), 1) -DEFINES += -DUSE_LIBCO -else -LDFLAGS += -lpthread -endif - ifeq ($(HAVE_DYNAREC), 1) DEFINES += -DHAVE_DYNAREC endif @@ -491,9 +483,6 @@ endif cpu_threaded.o: cpu_threaded.c $(CC) $(CFLAGS) -Wno-unused-variable -Wno-unused-label $(OPTIMIZE_SAFE) $(INCDIRS) -c -o $@ $< -libco/libco.o: libco/libco.c - $(CC) $(INCFLAGS) $(CFLAGS) $(OPTIMIZE) -c -o $@ $< - %.o: %.S $(CC) $(ASFLAGS) $(CFLAGS) $(OPTIMIZE) -c -o $@ $< diff --git a/Makefile.common b/Makefile.common index ea75b78..7e70627 100644 --- a/Makefile.common +++ b/Makefile.common @@ -15,12 +15,6 @@ SOURCES_C := $(CORE_DIR)/main.c \ $(CORE_DIR)/libretro.c \ $(CORE_DIR)/gba_cc_lut.c -ifeq ($(USE_LIBCO), 1) -SOURCES_C += $(CORE_DIR)/libco/libco.c -else -SOURCES_C += $(CORE_DIR)/retro_emu_thread.c -endif - ifeq ($(HAVE_DYNAREC), 1) SOURCES_C += $(CORE_DIR)/cpu_threaded.c endif diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 0de4cb4..7deffc0 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -39,9 +39,9 @@ #define CPU_MODE (29 * 4) #define CPU_HALT_STATE (30 * 4) #define CHANGED_PC_STATUS (31 * 4) +#define COMPLETED_FRAME (32 * 4) -#define REG_HOST_SP (32 * 4) - +#define MAIN_THREAD_SP (33 * 4) #define reg_a0 r0 #define reg_a1 r1 @@ -147,11 +147,11 @@ @ registers which are important to the dynarec. #define call_c_function(function) ;\ - ldr sp, [reg_base, #REG_HOST_SP] ;\ + ldr sp, [reg_base, #MAIN_THREAD_SP] ;\ stmdb sp!, { call_c_saved_regs } ;\ bl function ;\ ldmia sp!, { call_c_saved_regs } ;\ - ldr sp, =base_reg_area ;\ + ldr sp, =reg ;\ @ Update the GBA hardware (video, sound, input, etc) @@ -186,8 +186,17 @@ _arm_update_gba_##name: ;\ collapse_flags(r0) /* update the flags */;\ ;\ store_registers_##mode() /* save out registers */;\ +wait_halt_##name: ;\ call_c_function(update_gba) /* update GBA state */;\ ;\ + ldr r1, [reg_base, #COMPLETED_FRAME] /* return if new frame */;\ + cmp r1, #0 ;\ + bne return_to_main ;\ + ;\ + ldr r1, [reg_base, #CPU_HALT_STATE] /* keep iterating if halted */;\ + cmp r1, #0 ;\ + bne wait_halt_##name ;\ + ;\ mvn reg_cycles, r0 /* load new cycle count */;\ ;\ ldr r0, [reg_base, #CHANGED_PC_STATUS] /* load PC changed status */;\ @@ -479,14 +488,20 @@ execute_swi_function_builder(div, thumb) .globl _execute_arm_translate execute_arm_translate: _execute_arm_translate: - ldr r1, =base_reg_area @ base_reg_area to r1 - str sp, [r1, #REG_HOST_SP] @ store the current sp - ldr sp, =base_reg_area @ reg_base = sp (loading addr) + + @ save the registers to be able to return later + stmdb sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr } + + ldr r1, =reg @ reg to r1 + str sp, [r1, #MAIN_THREAD_SP] @ store the current sp + ldr sp, =reg @ reg_base = sp (loading addr) mvn reg_cycles, r0 @ load cycle counter - mov r0, reg_base @ load reg_base into first param - call_c_function(move_reg) @ make reg_base the new reg ptr + @ Check whether the CPU is sleeping already, we should just wait for IRQs + ldr r1, [reg_base, #CPU_HALT_STATE] + cmp r1, #0 + bne alert_loop ldr r0, [reg_base, #REG_PC] @ r0 = current pc ldr r1, [reg_base, #REG_CPSR] @ r1 = flags @@ -506,6 +521,16 @@ _execute_arm_translate: bx r0 @ jump to first Thumb block +@ Epilogue to return to the main thread (whatever called execute_arm_translate) + +return_to_main: + @ restore the stack pointer + ldr sp, [reg_base, #MAIN_THREAD_SP] + @ restore the saved regs and return + ldmia sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr } + bx lr + + @ Write out to memory. @ Input: @@ -607,10 +632,22 @@ write_epilogue: bne 1f @ if so do Thumb update store_registers_arm() @ save ARM registers + b alert_loop -3: +1: + store_registers_thumb() @ save Thumb registers + +alert_loop: call_c_function(update_gba) @ update GBA until CPU isn't halted + ldr r1, [reg_base, #COMPLETED_FRAME] @ Check whether a frame was completed + cmp r1, #0 + bne return_to_main + + ldr r1, [reg_base, #CPU_HALT_STATE] @ Check whether the CPU is halted + cmp r1, #0 + bne alert_loop @ Keep looping until it is + mvn reg_cycles, r0 @ load new cycle count ldr r0, [reg_base, #REG_PC] @ load new PC ldr r1, [reg_base, #REG_CPSR] @ r1 = flags @@ -622,10 +659,6 @@ write_epilogue: restore_flags() bx r0 @ jump to new ARM block -1: - store_registers_thumb() @ save Thumb registers - b 3b - 2: load_registers_thumb() call_c_function(block_lookup_address_thumb) @@ -735,6 +768,8 @@ execute_load_builder(u32, 32, ldrne, #0xF0000000) .data -base_reg_area: +.globl reg +.globl _reg +reg: .space 0x100, 0 diff --git a/common.h b/common.h index a85bc57..f7c3624 100644 --- a/common.h +++ b/common.h @@ -135,8 +135,6 @@ #define GBA_SCREEN_HEIGHT (160) #define GBA_SCREEN_PITCH (240) -void switch_to_main_thread(void); - // These must be variables, not constants. #define file_read_variable(filename_tag, variable) \ diff --git a/cpu.c b/cpu.c index c555c0b..a15b432 100644 --- a/cpu.c +++ b/cpu.c @@ -1562,8 +1562,10 @@ u32 cpu_modes_cpsr[7] = { 0x10, 0x11, 0x12, 0x13, 0x17, 0x1B, 0x1F }; // When switching modes set spsr[new_mode] to cpsr. Modifying PC as the // target of a data proc instruction will set cpsr to spsr[cpu_mode]. -u32 initial_reg[64]; -u32 *reg = initial_reg; +#ifndef HAVE_DYNAREC +u32 reg[64]; +#endif + u32 spsr[6]; // ARM/Thumb mode is stored in the flags directly, this is simpler than @@ -1672,9 +1674,17 @@ void execute_arm(u32 cycles) if(!pc_address_block) pc_address_block = load_gamepak_page(pc_region & 0x3FF); + cycles_remaining = cycles; while(1) { - cycles_remaining = cycles; + /* Do not execute until CPU is active */ + while(reg[CPU_HALT_STATE] != CPU_ACTIVE) { + cycles_remaining = update_gba(); + + if (reg[COMPLETED_FRAME]) + return; + } + pc = reg[REG_PC]; extract_flags(); @@ -3292,7 +3302,9 @@ skip_instruction: } while(cycles_remaining > 0); collapse_flags(); - cycles = update_gba(); + cycles_remaining = update_gba(); + if (reg[COMPLETED_FRAME]) + return; continue; do @@ -4261,19 +4273,21 @@ thumb_loop: } while(cycles_remaining > 0); collapse_flags(); - cycles = update_gba(); + cycles_remaining = update_gba(); + if (reg[COMPLETED_FRAME]) + return; continue; alert: - if(cpu_alert == CPU_ALERT_IRQ) - cycles = cycles_remaining; - else - { + if(cpu_alert != CPU_ALERT_IRQ) { collapse_flags(); - while(reg[CPU_HALT_STATE] != CPU_ACTIVE) - cycles = update_gba(); + while(reg[CPU_HALT_STATE] != CPU_ACTIVE) { + cycles_remaining = update_gba(); + if (reg[COMPLETED_FRAME]) + return; + } } } } @@ -4298,17 +4312,6 @@ void init_cpu(void) reg_mode[MODE_SUPERVISOR][5] = 0x03007FE0; } -void move_reg(u32 *new_reg) -{ - u32 i; - - for(i = 0; i < 32; i++) - new_reg[i] = reg[i]; - - reg = new_reg; -} - - #define cpu_savestate_builder(type) \ void cpu_##type##_savestate(void) \ { \ diff --git a/cpu.h b/cpu.h index 3a0d85e..0d7553a 100644 --- a/cpu.h +++ b/cpu.h @@ -82,7 +82,8 @@ typedef enum REG_SAVE3 = 23, CPU_MODE = 29, CPU_HALT_STATE = 30, - CHANGED_PC_STATUS = 31 + CHANGED_PC_STATUS = 31, + COMPLETED_FRAME = 32 } ext_reg_numbers; typedef enum diff --git a/gba_memory.h b/gba_memory.h index 946ef37..a37de47 100644 --- a/gba_memory.h +++ b/gba_memory.h @@ -208,9 +208,10 @@ extern u8 ewram[1024 * 256 * 2]; extern u8 iwram[1024 * 32 * 2]; extern u8 *memory_map_read[8 * 1024]; -extern u32 *reg; extern u8 *memory_map_write[8 * 1024]; +extern u32 reg[64]; + extern flash_device_id_type flash_device_id; extern const u8 *state_mem_read_ptr; diff --git a/jni/Android.mk b/jni/Android.mk index 27e43ac..dc86e69 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -5,9 +5,8 @@ CORE_DIR := $(LOCAL_PATH)/.. CORE_LDLIBS := CPU_ARCH := HAVE_DYNAREC := -USE_LIBCO := 1 -COREFLAGS := -DINLINE=inline -D__LIBRETRO__ -DFRONTEND_SUPPORTS_RGB565 -DUSE_LIBCO +COREFLAGS := -DINLINE=inline -D__LIBRETRO__ -DFRONTEND_SUPPORTS_RGB565 ifeq ($(TARGET_ARCH),arm) COREFLAGS += -DARM_ARCH -DARM_MEMORY_DYNAREC diff --git a/libco.h b/libco.h deleted file mode 100644 index 1464804..0000000 --- a/libco.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - libco - version: 0.16 (2010-12-24) - license: public domain -*/ - -#ifndef LIBCO_H -#define LIBCO_H - -#ifdef LIBCO_C - #ifdef LIBCO_MP - #define thread_local __thread - #else - #define thread_local - #endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void* cothread_t; - -cothread_t co_active(void); - -cothread_t co_create(unsigned int, void (*)(void)); - -void co_delete(cothread_t); - -void co_switch(cothread_t); - -#ifdef __cplusplus -} -#endif - -/* ifndef LIBCO_H */ -#endif diff --git a/libco/aarch64.c b/libco/aarch64.c deleted file mode 100644 index 2eb9ac6..0000000 --- a/libco/aarch64.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - libco.aarch64 (2017-06-26) - author: webgeek1234 - license: public domain -*/ - -#define LIBCO_C -#include "libco.h" -#include -#include -#include -#include - -#ifndef __APPLE__ -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -static thread_local uint64_t co_active_buffer[64]; -static thread_local cothread_t co_active_handle; - -asm ( - ".globl co_switch_aarch64\n" - ".globl _co_switch_aarch64\n" - "co_switch_aarch64:\n" - "_co_switch_aarch64:\n" - " stp x8, x9, [x1]\n" - " stp x10, x11, [x1, #16]\n" - " stp x12, x13, [x1, #32]\n" - " stp x14, x15, [x1, #48]\n" - " str x19, [x1, #72]\n" - " stp x20, x21, [x1, #80]\n" - " stp x22, x23, [x1, #96]\n" - " stp x24, x25, [x1, #112]\n" - " stp x26, x27, [x1, #128]\n" - " stp x28, x29, [x1, #144]\n" - " mov x16, sp\n" - " stp x16, x30, [x1, #160]\n" - - " ldp x8, x9, [x0]\n" - " ldp x10, x11, [x0, #16]\n" - " ldp x12, x13, [x0, #32]\n" - " ldp x14, x15, [x0, #48]\n" - " ldr x19, [x0, #72]\n" - " ldp x20, x21, [x0, #80]\n" - " ldp x22, x23, [x0, #96]\n" - " ldp x24, x25, [x0, #112]\n" - " ldp x26, x27, [x0, #128]\n" - " ldp x28, x29, [x0, #144]\n" - " ldp x16, x17, [x0, #160]\n" - " mov sp, x16\n" - " br x17\n" - ); - -/* ASM */ -void co_switch_aarch64(cothread_t handle, cothread_t current); - -cothread_t co_create(unsigned int size, void (*entrypoint)(void)) -{ - size = (size + 1023) & ~1023; - cothread_t handle = 0; -#if HAVE_POSIX_MEMALIGN >= 1 - if (posix_memalign(&handle, 1024, size + 512) < 0) - return 0; -#else - handle = memalign(1024, size + 512); -#endif - - if (!handle) - return handle; - - uint64_t *ptr = (uint64_t*)handle; - /* Non-volatiles. */ - ptr[0] = 0; /* x8 */ - ptr[1] = 0; /* x9 */ - ptr[2] = 0; /* x10 */ - ptr[3] = 0; /* x11 */ - ptr[4] = 0; /* x12 */ - ptr[5] = 0; /* x13 */ - ptr[6] = 0; /* x14 */ - ptr[7] = 0; /* x15 */ - ptr[8] = 0; /* padding */ - ptr[9] = 0; /* x19 */ - ptr[10] = 0; /* x20 */ - ptr[11] = 0; /* x21 */ - ptr[12] = 0; /* x22 */ - ptr[13] = 0; /* x23 */ - ptr[14] = 0; /* x24 */ - ptr[15] = 0; /* x25 */ - ptr[16] = 0; /* x26 */ - ptr[17] = 0; /* x27 */ - ptr[18] = 0; /* x28 */ - ptr[20] = (uintptr_t)ptr + size + 512 - 16; /* x30, stack pointer */ - ptr[19] = ptr[20]; /* x29, frame pointer */ - ptr[21] = (uintptr_t)entrypoint; /* PC (link register x31 gets saved here). */ - return handle; -} - -cothread_t co_active(void) -{ - if (!co_active_handle) - co_active_handle = co_active_buffer; - return co_active_handle; -} - -void co_delete(cothread_t handle) -{ - free(handle); -} - -void co_switch(cothread_t handle) -{ - cothread_t co_previous_handle = co_active(); - co_switch_aarch64(co_active_handle = handle, co_previous_handle); -} - -#ifdef __cplusplus -} -#endif - diff --git a/libco/amd64.c b/libco/amd64.c deleted file mode 100644 index a9f6ee5..0000000 --- a/libco/amd64.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - libco.amd64 (2009-10-12) - author: byuu - license: public domain -*/ - -#define LIBCO_C -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static thread_local long long co_active_buffer[64]; -static thread_local cothread_t co_active_handle = 0; -static void (*co_swap)(cothread_t, cothread_t) = 0; - -#ifdef _WIN32 -//ABI: Win64 -static unsigned char co_swap_function[] = { - 0x48, 0x89, 0x22, /* mov [rdx],rsp */ - 0x48, 0x8b, 0x21, /* mov rsp,[rcx] */ - 0x58, /* pop rax */ - 0x48, 0x89, 0x6a, 0x08, /* mov [rdx+0x8],rbp */ - 0x48, 0x89, 0x72, 0x10, /* mov [rdx+0x10],rsi */ - 0x48, 0x89, 0x7a, 0x18, /* mov [rdx+0x18],rdi */ - 0x48, 0x89, 0x5a, 0x20, /* mov [rdx+0x20],rbx */ - 0x4c, 0x89, 0x62, 0x28, /* mov [rdx+0x28],r12 */ - 0x4c, 0x89, 0x6a, 0x30, /* mov [rdx+0x30],r13 */ - 0x4c, 0x89, 0x72, 0x38, /* mov [rdx+0x38],r14 */ - 0x4c, 0x89, 0x7a, 0x40, /* mov [rdx+0x40],r15 */ - 0x48, 0x81, 0xc2, 0x80, 0x00, 0x00, 0x00, /* add rdx,0x80 */ - 0x48, 0x83, 0xe2, 0xf0, /* and rdx,-0x10 */ - 0x0f, 0x29, 0x32, /* movaps [rdx],xmm6 */ - 0x0f, 0x29, 0x7a, 0x10, /* movaps [rdx+0x10],xmm7 */ - 0x44, 0x0f, 0x29, 0x42, 0x20, /* movaps [rdx+0x20],xmm8 */ - 0x44, 0x0f, 0x29, 0x4a, 0x30, /* movaps [rdx+0x30],xmm9 */ - 0x44, 0x0f, 0x29, 0x52, 0x40, /* movaps [rdx+0x40],xmm10 */ - 0x44, 0x0f, 0x29, 0x5a, 0x50, /* movaps [rdx+0x50],xmm11 */ - 0x44, 0x0f, 0x29, 0x62, 0x60, /* movaps [rdx+0x60],xmm12 */ - 0x44, 0x0f, 0x29, 0x6a, 0x70, /* movaps [rdx+0x70],xmm13 */ - 0x44, 0x0f, 0x29, 0xb2, 0x80, 0x00, 0x00, 0x00, /* movaps [rdx+0x80],xmm14 */ - 0x44, 0x0f, 0x29, 0xba, 0x90, 0x00, 0x00, 0x00, /* movaps [rdx+0x90],xmm15 */ - 0x48, 0x8b, 0x69, 0x08, /* mov rbp,[rcx+0x8] */ - 0x48, 0x8b, 0x71, 0x10, /* mov rsi,[rcx+0x10] */ - 0x48, 0x8b, 0x79, 0x18, /* mov rdi,[rcx+0x18] */ - 0x48, 0x8b, 0x59, 0x20, /* mov rbx,[rcx+0x20] */ - 0x4c, 0x8b, 0x61, 0x28, /* mov r12,[rcx+0x28] */ - 0x4c, 0x8b, 0x69, 0x30, /* mov r13,[rcx+0x30] */ - 0x4c, 0x8b, 0x71, 0x38, /* mov r14,[rcx+0x38] */ - 0x4c, 0x8b, 0x79, 0x40, /* mov r15,[rcx+0x40] */ - 0x48, 0x81, 0xc1, 0x80, 0x00, 0x00, 0x00, /* add rcx,0x80 */ - 0x48, 0x83, 0xe1, 0xf0, /* and rcx,-0x10 */ - 0x0f, 0x29, 0x31, /* movaps [rcx],xmm6 */ - 0x0f, 0x29, 0x79, 0x10, /* movaps [rcx+0x10],xmm7 */ - 0x44, 0x0f, 0x29, 0x41, 0x20, /* movaps [rcx+0x20],xmm8 */ - 0x44, 0x0f, 0x29, 0x49, 0x30, /* movaps [rcx+0x30],xmm9 */ - 0x44, 0x0f, 0x29, 0x51, 0x40, /* movaps [rcx+0x40],xmm10 */ - 0x44, 0x0f, 0x29, 0x59, 0x50, /* movaps [rcx+0x50],xmm11 */ - 0x44, 0x0f, 0x29, 0x61, 0x60, /* movaps [rcx+0x60],xmm12 */ - 0x44, 0x0f, 0x29, 0x69, 0x70, /* movaps [rcx+0x70],xmm13 */ - 0x44, 0x0f, 0x29, 0xb1, 0x80, 0x00, 0x00, 0x00, /* movaps [rcx+0x80],xmm14 */ - 0x44, 0x0f, 0x29, 0xb9, 0x90, 0x00, 0x00, 0x00, /* movaps [rcx+0x90],xmm15 */ - 0xff, 0xe0, /* jmp rax */ -}; - -#include - -void co_init(void) -{ - DWORD old_privileges; - VirtualProtect(co_swap_function, - sizeof(co_swap_function), PAGE_EXECUTE_READWRITE, &old_privileges); -} -#else -//ABI: SystemV -static unsigned char co_swap_function[] = { - 0x48, 0x89, 0x26, /* mov [rsi],rsp */ - 0x48, 0x8b, 0x27, /* mov rsp,[rdi] */ - 0x58, /* pop rax */ - 0x48, 0x89, 0x6e, 0x08, /* mov [rsi+0x08],rbp */ - 0x48, 0x89, 0x5e, 0x10, /* mov [rsi+0x10],rbx */ - 0x4c, 0x89, 0x66, 0x18, /* mov [rsi+0x18],r12 */ - 0x4c, 0x89, 0x6e, 0x20, /* mov [rsi+0x20],r13 */ - 0x4c, 0x89, 0x76, 0x28, /* mov [rsi+0x28],r14 */ - 0x4c, 0x89, 0x7e, 0x30, /* mov [rsi+0x30],r15 */ - 0x48, 0x8b, 0x6f, 0x08, /* mov rbp,[rdi+0x08] */ - 0x48, 0x8b, 0x5f, 0x10, /* mov rbx,[rdi+0x10] */ - 0x4c, 0x8b, 0x67, 0x18, /* mov r12,[rdi+0x18] */ - 0x4c, 0x8b, 0x6f, 0x20, /* mov r13,[rdi+0x20] */ - 0x4c, 0x8b, 0x77, 0x28, /* mov r14,[rdi+0x28] */ - 0x4c, 0x8b, 0x7f, 0x30, /* mov r15,[rdi+0x30] */ - 0xff, 0xe0, /* jmp rax */ -}; - -#include -#include - -void co_init(void) -{ - unsigned long long addr = (unsigned long long)co_swap_function; - unsigned long long base = addr - (addr % sysconf(_SC_PAGESIZE)); - unsigned long long size = (addr - base) + sizeof(co_swap_function); - mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); -} -#endif - -static void crash(void) -{ - assert(0); /* called only if cothread_t entrypoint returns */ -} - -cothread_t co_active(void) -{ - if (!co_active_handle) - co_active_handle = &co_active_buffer; - return co_active_handle; -} - -cothread_t co_create(unsigned int size, void (*entrypoint)(void)) -{ - cothread_t handle; - - if(!co_swap) - { - co_init(); - co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; - } - - if (!co_active_handle) - co_active_handle = &co_active_buffer; - size += 512; /* allocate additional space for storage */ - size &= ~15; /* align stack to 16-byte boundary */ - - if((handle = (cothread_t)malloc(size))) - { - long long *p = (long long*)((char*)handle + size); /* seek to top of stack */ - *--p = (long long)crash; /* crash if entrypoint returns */ - *--p = (long long)entrypoint; /* start of function */ - *(long long*)handle = (long long)p; /* stack pointer */ - } - - return handle; -} - -void co_delete(cothread_t handle) -{ - free(handle); -} - -void co_switch(cothread_t handle) -{ - register cothread_t co_previous_handle = co_active_handle; - co_swap(co_active_handle = handle, co_previous_handle); -} - -#ifdef __cplusplus -} -#endif diff --git a/libco/armeabi.c b/libco/armeabi.c deleted file mode 100644 index c9b68d0..0000000 --- a/libco/armeabi.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - libco.armeabi (2013-04-05) - author: Themaister - license: public domain -*/ - -#define LIBCO_C -#include -#include -#include -#include -#include - -#ifndef IOS -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -static thread_local uint32_t co_active_buffer[64]; -static thread_local cothread_t co_active_handle; - -asm ( - ".arm\n" - ".align 4\n" - ".globl co_switch_arm\n" - ".globl _co_switch_arm\n" - "co_switch_arm:\n" - "_co_switch_arm:\n" - " stmia r1!, {r4, r5, r6, r7, r8, r9, r10, r11, sp, lr}\n" - " ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11, sp, pc}\n" - ); - -/* ASM */ -void co_switch_arm(cothread_t handle, cothread_t current); - -static void crash(void) -{ - /* Called only if cothread_t entrypoint returns. */ - assert(0); -} - -cothread_t co_create(unsigned int size, void (*entrypoint)(void)) -{ - size = (size + 1023) & ~1023; - cothread_t handle = 0; -#if HAVE_POSIX_MEMALIGN >= 1 - if (posix_memalign(&handle, 1024, size + 256) < 0) - return 0; -#else - handle = memalign(1024, size + 256); -#endif - - if (!handle) - return handle; - - uint32_t *ptr = (uint32_t*)handle; - /* Non-volatiles. */ - ptr[0] = 0; /* r4 */ - ptr[1] = 0; /* r5 */ - ptr[2] = 0; /* r6 */ - ptr[3] = 0; /* r7 */ - ptr[4] = 0; /* r8 */ - ptr[5] = 0; /* r9 */ - ptr[6] = 0; /* r10 */ - ptr[7] = 0; /* r11 */ - ptr[8] = (uintptr_t)ptr + size + 256 - 4; /* r13, stack pointer */ - ptr[9] = (uintptr_t)entrypoint; /* r15, PC (link register r14 gets saved here). */ - return handle; -} - -cothread_t co_active(void) -{ - if (!co_active_handle) - co_active_handle = co_active_buffer; - return co_active_handle; -} - -void co_delete(cothread_t handle) -{ - free(handle); -} - -void co_switch(cothread_t handle) -{ - cothread_t co_previous_handle = co_active(); - co_switch_arm(co_active_handle = handle, co_previous_handle); -} - -#ifdef __cplusplus -} -#endif - diff --git a/libco/fiber.c b/libco/fiber.c deleted file mode 100644 index 90ba115..0000000 --- a/libco/fiber.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - libco.win (2008-01-28) - authors: Nach, byuu - license: public domain -*/ - -#define LIBCO_C -#include -#define WINVER 0x0400 -#define _WIN32_WINNT 0x0400 -#define WIN32_LEAN_AND_MEAN -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static thread_local cothread_t co_active_ = 0; - -static void __stdcall co_thunk(void *coentry) -{ - ((void (*)(void))coentry)(); -} - -cothread_t co_active(void) -{ - if(!co_active_) - { - ConvertThreadToFiber(0); - co_active_ = GetCurrentFiber(); - } - return co_active_; -} - -cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) -{ - if(!co_active_) - { - ConvertThreadToFiber(0); - co_active_ = GetCurrentFiber(); - } - return (cothread_t)CreateFiber(heapsize, co_thunk, (void*)coentry); -} - -void co_delete(cothread_t cothread) -{ - DeleteFiber(cothread); -} - -void co_switch(cothread_t cothread) -{ - co_active_ = cothread; - SwitchToFiber(cothread); -} - -#ifdef __cplusplus -} -#endif diff --git a/libco/libco.c b/libco/libco.c deleted file mode 100644 index 95a04f5..0000000 --- a/libco/libco.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - libco - auto-selection module - license: public domain -*/ - -#if defined(__GNUC__) && defined(__i386__) || (defined(_MSC_VER) && defined(_M_IX86)) - #include "x86.c" -#elif defined(__GNUC__) && defined(__amd64__) || (defined(_MSC_VER) && defined(_M_AMD64)) - #include "amd64.c" -#elif defined(__GNUC__) && defined(_ARCH_PPC) - #include "ppc.c" -#elif defined(VITA) - #include "scefiber.c" -#elif defined(PSP) - #include "psp1.c" -#elif defined(__GNUC__) && defined(__aarch64__) - #include "aarch64.c" -#elif defined(__GNUC__) && (defined(__ARM_EABI__) || defined(__arm__)) - #include "armeabi.c" -#elif defined(__GNUC__) - #include "sjlj.c" -#elif defined(_MSC_VER) - #include "fiber.c" -#else - #error "libco: unsupported processor, compiler or operating system" -#endif diff --git a/libco/ppc.c b/libco/ppc.c deleted file mode 100644 index f6cb536..0000000 --- a/libco/ppc.c +++ /dev/null @@ -1,407 +0,0 @@ -/* - libco.ppc (2010-10-17) - author: blargg - license: public domain -*/ - -/* PowerPC 32/64 using embedded or external asm, with optional -floating-point and AltiVec save/restore */ - -#define LIBCO_C -#include -#include -#include -#include - -#define LIBCO_MPROTECT (__unix__ && !LIBCO_PPC_ASM) - -#if LIBCO_MPROTECT - #include - #include -#endif - -/* State format (offsets in 32-bit words) - -+0 Pointer to swap code - Rest of function descriptor for entry function -+8 PC -+10 SP - Special regs - GPRs - FPRs - VRs - stack -*/ - -enum { state_size = 1024 }; -enum { above_stack = 2048 }; -enum { stack_align = 256 }; - -static thread_local cothread_t co_active_handle = 0; - -/**** Determine environment ****/ - -#define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__) - -/* Whether function calls are indirect through a descriptor, -or are directly to function */ -#ifndef LIBCO_PPCDESC - #if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64) - #define LIBCO_PPCDESC 1 - #endif -#endif - -#ifdef LIBCO_PPC_ASM - - #ifdef __cplusplus - extern "C" - #endif - - /* Swap code is in ppc.S */ - void co_swap_asm( cothread_t, cothread_t ); - #define CO_SWAP_ASM( x, y ) co_swap_asm( x, y ) - -#else - -/* Swap code is here in array. Please leave dieassembly comments, -as they make it easy to see what it does, and reorder instructions -if one wants to see whether that improves performance. */ -static const uint32_t libco_ppc_code [] = { -#if LIBCO_PPC64 - 0x7d000026, /* mfcr r8 */ - 0xf8240028, /* std r1,40(r4) */ - 0x7d2802a6, /* mflr r9 */ - 0xf9c40048, /* std r14,72(r4) */ - 0xf9e40050, /* std r15,80(r4) */ - 0xfa040058, /* std r16,88(r4) */ - 0xfa240060, /* std r17,96(r4) */ - 0xfa440068, /* std r18,104(r4) */ - 0xfa640070, /* std r19,112(r4) */ - 0xfa840078, /* std r20,120(r4) */ - 0xfaa40080, /* std r21,128(r4) */ - 0xfac40088, /* std r22,136(r4) */ - 0xfae40090, /* std r23,144(r4) */ - 0xfb040098, /* std r24,152(r4) */ - 0xfb2400a0, /* std r25,160(r4) */ - 0xfb4400a8, /* std r26,168(r4) */ - 0xfb6400b0, /* std r27,176(r4) */ - 0xfb8400b8, /* std r28,184(r4) */ - 0xfba400c0, /* std r29,192(r4) */ - 0xfbc400c8, /* std r30,200(r4) */ - 0xfbe400d0, /* std r31,208(r4) */ - 0xf9240020, /* std r9,32(r4) */ - 0xe8e30020, /* ld r7,32(r3) */ - 0xe8230028, /* ld r1,40(r3) */ - 0x48000009, /* bl 1 */ - 0x7fe00008, /* trap */ - 0x91040030,/*1:stw r8,48(r4) */ - 0x80c30030, /* lwz r6,48(r3) */ - 0x7ce903a6, /* mtctr r7 */ - 0xe9c30048, /* ld r14,72(r3) */ - 0xe9e30050, /* ld r15,80(r3) */ - 0xea030058, /* ld r16,88(r3) */ - 0xea230060, /* ld r17,96(r3) */ - 0xea430068, /* ld r18,104(r3) */ - 0xea630070, /* ld r19,112(r3) */ - 0xea830078, /* ld r20,120(r3) */ - 0xeaa30080, /* ld r21,128(r3) */ - 0xeac30088, /* ld r22,136(r3) */ - 0xeae30090, /* ld r23,144(r3) */ - 0xeb030098, /* ld r24,152(r3) */ - 0xeb2300a0, /* ld r25,160(r3) */ - 0xeb4300a8, /* ld r26,168(r3) */ - 0xeb6300b0, /* ld r27,176(r3) */ - 0xeb8300b8, /* ld r28,184(r3) */ - 0xeba300c0, /* ld r29,192(r3) */ - 0xebc300c8, /* ld r30,200(r3) */ - 0xebe300d0, /* ld r31,208(r3) */ - 0x7ccff120, /* mtcr r6 */ -#else - 0x7d000026, /* mfcr r8 */ - 0x90240028, /* stw r1,40(r4) */ - 0x7d2802a6, /* mflr r9 */ - 0x91a4003c, /* stw r13,60(r4) */ - 0x91c40040, /* stw r14,64(r4) */ - 0x91e40044, /* stw r15,68(r4) */ - 0x92040048, /* stw r16,72(r4) */ - 0x9224004c, /* stw r17,76(r4) */ - 0x92440050, /* stw r18,80(r4) */ - 0x92640054, /* stw r19,84(r4) */ - 0x92840058, /* stw r20,88(r4) */ - 0x92a4005c, /* stw r21,92(r4) */ - 0x92c40060, /* stw r22,96(r4) */ - 0x92e40064, /* stw r23,100(r4) */ - 0x93040068, /* stw r24,104(r4) */ - 0x9324006c, /* stw r25,108(r4) */ - 0x93440070, /* stw r26,112(r4) */ - 0x93640074, /* stw r27,116(r4) */ - 0x93840078, /* stw r28,120(r4) */ - 0x93a4007c, /* stw r29,124(r4) */ - 0x93c40080, /* stw r30,128(r4) */ - 0x93e40084, /* stw r31,132(r4) */ - 0x91240020, /* stw r9,32(r4) */ - 0x80e30020, /* lwz r7,32(r3) */ - 0x80230028, /* lwz r1,40(r3) */ - 0x48000009, /* bl 1 */ - 0x7fe00008, /* trap */ - 0x91040030,/*1:stw r8,48(r4) */ - 0x80c30030, /* lwz r6,48(r3) */ - 0x7ce903a6, /* mtctr r7 */ - 0x81a3003c, /* lwz r13,60(r3) */ - 0x81c30040, /* lwz r14,64(r3) */ - 0x81e30044, /* lwz r15,68(r3) */ - 0x82030048, /* lwz r16,72(r3) */ - 0x8223004c, /* lwz r17,76(r3) */ - 0x82430050, /* lwz r18,80(r3) */ - 0x82630054, /* lwz r19,84(r3) */ - 0x82830058, /* lwz r20,88(r3) */ - 0x82a3005c, /* lwz r21,92(r3) */ - 0x82c30060, /* lwz r22,96(r3) */ - 0x82e30064, /* lwz r23,100(r3) */ - 0x83030068, /* lwz r24,104(r3) */ - 0x8323006c, /* lwz r25,108(r3) */ - 0x83430070, /* lwz r26,112(r3) */ - 0x83630074, /* lwz r27,116(r3) */ - 0x83830078, /* lwz r28,120(r3) */ - 0x83a3007c, /* lwz r29,124(r3) */ - 0x83c30080, /* lwz r30,128(r3) */ - 0x83e30084, /* lwz r31,132(r3) */ - 0x7ccff120, /* mtcr r6 */ -#endif - -#ifndef LIBCO_PPC_NOFP - 0xd9c400e0, /* stfd f14,224(r4) */ - 0xd9e400e8, /* stfd f15,232(r4) */ - 0xda0400f0, /* stfd f16,240(r4) */ - 0xda2400f8, /* stfd f17,248(r4) */ - 0xda440100, /* stfd f18,256(r4) */ - 0xda640108, /* stfd f19,264(r4) */ - 0xda840110, /* stfd f20,272(r4) */ - 0xdaa40118, /* stfd f21,280(r4) */ - 0xdac40120, /* stfd f22,288(r4) */ - 0xdae40128, /* stfd f23,296(r4) */ - 0xdb040130, /* stfd f24,304(r4) */ - 0xdb240138, /* stfd f25,312(r4) */ - 0xdb440140, /* stfd f26,320(r4) */ - 0xdb640148, /* stfd f27,328(r4) */ - 0xdb840150, /* stfd f28,336(r4) */ - 0xdba40158, /* stfd f29,344(r4) */ - 0xdbc40160, /* stfd f30,352(r4) */ - 0xdbe40168, /* stfd f31,360(r4) */ - 0xc9c300e0, /* lfd f14,224(r3) */ - 0xc9e300e8, /* lfd f15,232(r3) */ - 0xca0300f0, /* lfd f16,240(r3) */ - 0xca2300f8, /* lfd f17,248(r3) */ - 0xca430100, /* lfd f18,256(r3) */ - 0xca630108, /* lfd f19,264(r3) */ - 0xca830110, /* lfd f20,272(r3) */ - 0xcaa30118, /* lfd f21,280(r3) */ - 0xcac30120, /* lfd f22,288(r3) */ - 0xcae30128, /* lfd f23,296(r3) */ - 0xcb030130, /* lfd f24,304(r3) */ - 0xcb230138, /* lfd f25,312(r3) */ - 0xcb430140, /* lfd f26,320(r3) */ - 0xcb630148, /* lfd f27,328(r3) */ - 0xcb830150, /* lfd f28,336(r3) */ - 0xcba30158, /* lfd f29,344(r3) */ - 0xcbc30160, /* lfd f30,352(r3) */ - 0xcbe30168, /* lfd f31,360(r3) */ -#endif - -#ifdef __ALTIVEC__ - 0x7ca042a6, /* mfvrsave r5 */ - 0x39040180, /* addi r8,r4,384 */ - 0x39240190, /* addi r9,r4,400 */ - 0x70a00fff, /* andi. r0,r5,4095 */ - 0x90a40034, /* stw r5,52(r4) */ - 0x4182005c, /* beq- 2 */ - 0x7e8041ce, /* stvx v20,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7ea049ce, /* stvx v21,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7ec041ce, /* stvx v22,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7ee049ce, /* stvx v23,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7f0041ce, /* stvx v24,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7f2049ce, /* stvx v25,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7f4041ce, /* stvx v26,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7f6049ce, /* stvx v27,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7f8041ce, /* stvx v28,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7fa049ce, /* stvx v29,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7fc041ce, /* stvx v30,r0,r8 */ - 0x7fe049ce, /* stvx v31,r0,r9 */ - 0x80a30034,/*2:lwz r5,52(r3) */ - 0x39030180, /* addi r8,r3,384 */ - 0x39230190, /* addi r9,r3,400 */ - 0x70a00fff, /* andi. r0,r5,4095 */ - 0x7ca043a6, /* mtvrsave r5 */ - 0x4d820420, /* beqctr */ - 0x7e8040ce, /* lvx v20,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7ea048ce, /* lvx v21,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7ec040ce, /* lvx v22,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7ee048ce, /* lvx v23,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7f0040ce, /* lvx v24,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7f2048ce, /* lvx v25,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7f4040ce, /* lvx v26,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7f6048ce, /* lvx v27,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7f8040ce, /* lvx v28,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7fa048ce, /* lvx v29,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7fc040ce, /* lvx v30,r0,r8 */ - 0x7fe048ce, /* lvx v31,r0,r9 */ -#endif - - 0x4e800420, /* bctr */ -}; - - #if LIBCO_PPCDESC - /* Function call goes through indirect descriptor */ - #define CO_SWAP_ASM( x, y ) \ - ((void (*)( cothread_t, cothread_t )) (uintptr_t) x)( x, y ) - #else - /* Function call goes directly to code */ - #define CO_SWAP_ASM( x, y ) \ - ((void (*)( cothread_t, cothread_t )) (uintptr_t) libco_ppc_code)( x, y ) - #endif - -#endif - -static uint32_t* co_create_( unsigned size, uintptr_t entry ) -{ - uint32_t* t = (uint32_t*) malloc( size ); - - (void) entry; - - #if LIBCO_PPCDESC - if ( t ) - { - /* Copy entry's descriptor */ - memcpy( t, (void*) entry, sizeof (void*) * 3 ); - - /* Set function pointer to swap routine */ - #ifdef LIBCO_PPC_ASM - *(const void**) t = *(void**) &co_swap_asm; - #else - *(const void**) t = libco_ppc_code; - #endif - } - #endif - - return t; -} - -cothread_t co_create( unsigned int size, void (*entry_)( void ) ) -{ - uintptr_t entry = (uintptr_t) entry_; - uint32_t* t = NULL; - - /* Be sure main thread was successfully allocated */ - if ( co_active() ) - { - size += state_size + above_stack + stack_align; - t = co_create_( size, entry ); - } - - if ( t ) - { - uintptr_t sp; - int shift; - - /* Save current registers into new thread, so that any special ones will - have proper values when thread is begun */ - CO_SWAP_ASM( t, t ); - - #if LIBCO_PPCDESC - /* Get real address */ - entry = (uintptr_t) *(void**) entry; - #endif - - /* Put stack near end of block, and align */ - sp = (uintptr_t) t + size - above_stack; - sp -= sp % stack_align; - - /* On PPC32, we save and restore GPRs as 32 bits. For PPC64, we - save and restore them as 64 bits, regardless of the size the ABI - uses. So, we manually write pointers at the proper size. We always - save and restore at the same address, and since PPC is big-endian, - we must put the low byte first on PPC32. */ - - /* If uintptr_t is 32 bits, >>32 is undefined behavior, so we do two shifts - and don't have to care how many bits uintptr_t is. */ - #if LIBCO_PPC64 - shift = 16; - #else - shift = 0; - #endif - - /* Set up so entry will be called on next swap */ - t [8] = (uint32_t) (entry >> shift >> shift); - t [9] = (uint32_t) entry; - - t [10] = (uint32_t) (sp >> shift >> shift); - t [11] = (uint32_t) sp; - } - - return t; -} - -void co_delete( cothread_t t ) -{ - free(t); -} - -static void co_init_( void ) -{ -#if LIBCO_MPROTECT - /* TODO: pre- and post-pad PPC code so that this doesn't make other - data executable and writable */ - long page_size = sysconf( _SC_PAGESIZE ); - if ( page_size > 0 ) - { - uintptr_t align = page_size; - uintptr_t begin = (uintptr_t) libco_ppc_code; - uintptr_t end = begin + sizeof libco_ppc_code; - - /* Align beginning and end */ - end += align - 1; - end -= end % align; - begin -= begin % align; - - mprotect( (void*) begin, end - begin, PROT_READ | PROT_WRITE | PROT_EXEC ); - } -#endif - - co_active_handle = co_create_( state_size, (uintptr_t) &co_switch ); -} - -cothread_t co_active(void) -{ - if (!co_active_handle) - co_init_(); - - return co_active_handle; -} - -void co_switch(cothread_t t) -{ - cothread_t old = co_active_handle; - co_active_handle = t; - - CO_SWAP_ASM( t, old ); -} diff --git a/libco/psp1.c b/libco/psp1.c deleted file mode 100644 index 90702ce..0000000 --- a/libco/psp1.c +++ /dev/null @@ -1,45 +0,0 @@ -#define LIBCO_C -#include "libco.h" - -#include -#include - -/* Since cothread_t is a void pointer it must contain an address. We can't return a reference to a local variable - * because it would go out of scope, so we create a static variable instead so we can return a reference to it. - */ -static SceUID active_thread_id = 0; - -cothread_t co_active() -{ - active_thread_id = sceKernelGetThreadId(); - return &active_thread_id; -} - -cothread_t co_create(unsigned int size, void (*entrypoint)(void)) -{ - /* Similar scenario as with active_thread_id except there will only be one active_thread_id while there could be many - * new threads each with their own handle, so we create them on the heap instead and delete them manually when they're - * no longer needed in co_delete(). - */ - cothread_t handle = malloc(sizeof(cothread_t)); - - /* SceKernelThreadEntry has a different signature than entrypoint, but in practice this seems to work */ - SceUID new_thread_id = sceKernelCreateThread("cothread", (SceKernelThreadEntry)entrypoint, 0x12, size, 0, NULL); - sceKernelStartThread(new_thread_id, 0, NULL); - - *(SceUID *)handle = new_thread_id; - return handle; -} - -void co_delete(cothread_t handle) -{ - sceKernelTerminateDeleteThread(*(SceUID *)handle); - free(handle); -} - -void co_switch(cothread_t handle) -{ - sceKernelWakeupThread(*(SceUID *)handle); - /* Sleep the currently active thread so the new thread can start */ - sceKernelSleepThread(); -} diff --git a/libco/psp2.c b/libco/psp2.c deleted file mode 100644 index 3ab1bc7..0000000 --- a/libco/psp2.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - libco.arm (2016-08-14) - author: frangarcj - license: public domain -*/ - -#define LIBCO_C -#include "libco.h" - -#include -#include -#include -#include -#include -#include -#ifdef __cplusplus -extern "C" { -#endif - -static inline int align(int x, int n) { - return (((x >> n) + 1) << n ); -} -#define FOUR_KB_ALIGN(x) align(x, 12) -#define MB_ALIGN(x) align(x, 20) - -static thread_local unsigned long co_active_buffer[64]; -static thread_local cothread_t co_active_handle = 0; -static void (*co_swap)(cothread_t, cothread_t) = 0; -static int block; -static uint32_t co_swap_function[] = { - 0xe8a16ff0, /* stmia r1!, {r4-r11,sp,lr} */ - 0xe8b0aff0, /* ldmia r0!, {r4-r11,sp,pc} */ - 0xe12fff1e, /* bx lr */ -}; - -void co_init() { - int ret; - void *base; - - block = sceKernelAllocMemBlockForVM("libco", MB_ALIGN(FOUR_KB_ALIGN(sizeof co_swap_function))); - if (block < 0) - { - return; - } - - // get base address - ret = sceKernelGetMemBlockBase(block, &base); - if (ret < 0) - { - return; - } - - // set domain to be writable by user - ret = sceKernelOpenVMDomain(); - if (ret < 0) - { - return; - } - - - memcpy(base,co_swap_function,sizeof co_swap_function); - - // set domain back to read-only - ret = sceKernelCloseVMDomain(); - if (ret < 0) - { - return; - } - - // flush icache - ret = sceKernelSyncVMDomain(block, base, MB_ALIGN(FOUR_KB_ALIGN(sizeof co_swap_function))); - if (ret < 0) - { - return; - } - - co_swap = (void (*)(cothread_t, cothread_t))base; - - -} - -cothread_t co_active() { - if(!co_active_handle) co_active_handle = &co_active_buffer; - return co_active_handle; -} - -cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { - unsigned long* handle = 0; - if(!co_swap) { - co_init(); - } - if(!co_active_handle) co_active_handle = &co_active_buffer; - size += 256; - size &= ~15; - - if((handle = (unsigned long*)malloc(size))) { - unsigned long* p = (unsigned long*)((unsigned char*)handle + size); - handle[8] = (unsigned long)p; - handle[9] = (unsigned long)entrypoint; - } - - return handle; -} - -void co_delete(cothread_t handle) { - free(handle); -} - -void co_switch(cothread_t handle) { - cothread_t co_previous_handle = co_active_handle; - co_swap(co_active_handle = handle, co_previous_handle); -} - -#ifdef __cplusplus -} -#endif diff --git a/libco/scefiber.c b/libco/scefiber.c deleted file mode 100644 index a233bec..0000000 --- a/libco/scefiber.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - libco.win (2016-09-06) - authors: frangarcj - license: public domain -*/ - -#define LIBCO_C -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static thread_local cothread_t co_active_ = 0; - -typedef struct SceFiber { - char reserved[128]; -} SceFiber __attribute__( ( aligned ( 8 ) ) ) ; - -int32_t _sceFiberInitializeImpl(SceFiber* fiber, char* name, void* entry, uint32_t argOnInitialize, void* addrContext, int32_t sizeContext, void* params); - -int32_t sceFiberFinalize(SceFiber* fiber); - -int32_t sceFiberRun(SceFiber* fiber, uint32_t argOnRunTo, uint32_t* argOnRun); - -int32_t sceFiberSwitch(SceFiber* fiber, uint32_t argOnRunTo, uint32_t* argOnRun); - -int32_t sceFiberReturnToThread(uint32_t argOnReturn, uint32_t* argOnRun); - -void co_thunk(uint32_t argOnInitialize, uint32_t argOnRun) -{ - ((void (*)(void))argOnInitialize)(); -} - -cothread_t co_active(void) -{ - if(!co_active_) - { - sceSysmoduleLoadModule(SCE_SYSMODULE_FIBER); - co_active_ = (cothread_t)1; - } - return co_active_; -} - -cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) -{ - SceFiber* tailFiber = malloc(sizeof(SceFiber)); - char * m_contextBuffer = malloc(sizeof(char)*heapsize); - if(!co_active_) - { - sceSysmoduleLoadModule(SCE_SYSMODULE_FIBER); - co_active_ = (cothread_t)1; - } - - //_sceFiberInitializeImpl - int ret = _sceFiberInitializeImpl(tailFiber, "tailFiber", co_thunk, (uint32_t)coentry, (void*) m_contextBuffer, heapsize, NULL); - if(ret==0){ - return (cothread_t)tailFiber; - }else{ - return (cothread_t)ret; - } - -} - -void co_delete(cothread_t cothread) -{ - if(cothread == (cothread_t)1){ - return; - } - sceFiberFinalize((SceFiber*)cothread); -} - -void co_switch(cothread_t cothread) -{ - - uint32_t argOnReturn = 0; - if(cothread == (cothread_t)1){ - co_active_ = cothread; - sceFiberReturnToThread(0, NULL); - }else{ - SceFiber* theFiber = (SceFiber*)cothread; - if(co_active_ == (cothread_t)1){ - co_active_ = cothread; - sceFiberRun(theFiber, 0, &argOnReturn); - }else{ - co_active_ = cothread; - sceFiberSwitch(theFiber, 0, &argOnReturn); - } - } -} - -#ifdef __cplusplus -} -#endif diff --git a/libco/sjlj.c b/libco/sjlj.c deleted file mode 100644 index f074714..0000000 --- a/libco/sjlj.c +++ /dev/null @@ -1,115 +0,0 @@ -/* - libco.sjlj (2008-01-28) - author: Nach - license: public domain -*/ - -/* - * Note this was designed for UNIX systems. Based on ideas expressed in a paper - * by Ralf Engelschall. - * For SJLJ on other systems, one would want to rewrite springboard() and - * co_create() and hack the jmb_buf stack pointer. - */ - -#define LIBCO_C -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct -{ - sigjmp_buf context; - void (*coentry)(void); - void *stack; -} cothread_struct; - -static thread_local cothread_struct co_primary; -static thread_local cothread_struct *creating, *co_running = 0; - -static void springboard(int ignored) -{ - if(sigsetjmp(creating->context, 0)) - co_running->coentry(); -} - -cothread_t co_active(void) -{ - if (!co_running) - co_running = &co_primary; - return (cothread_t)co_running; -} - -cothread_t co_create(unsigned int size, void (*coentry)(void)) -{ - if(!co_running) - co_running = &co_primary; - - cothread_struct *thread = (cothread_struct*)malloc(sizeof(cothread_struct)); - - if(thread) - { - struct sigaction handler; - struct sigaction old_handler; - - stack_t stack; - stack_t old_stack; - - thread->coentry = thread->stack = 0; - - stack.ss_flags = 0; - stack.ss_size = size; - thread->stack = stack.ss_sp = malloc(size); - - if(stack.ss_sp && !sigaltstack(&stack, &old_stack)) - { - handler.sa_handler = springboard; - handler.sa_flags = SA_ONSTACK; - sigemptyset(&handler.sa_mask); - creating = thread; - - if(!sigaction(SIGUSR1, &handler, &old_handler)) - { - if(!raise(SIGUSR1)) - thread->coentry = coentry; - sigaltstack(&old_stack, 0); - sigaction(SIGUSR1, &old_handler, 0); - } - } - - if(thread->coentry != coentry) - { - co_delete(thread); - thread = 0; - } - } - - return (cothread_t)thread; -} - -void co_delete(cothread_t cothread) -{ - if(cothread) - { - if(((cothread_struct*)cothread)->stack) - free(((cothread_struct*)cothread)->stack); - free(cothread); - } -} - -void co_switch(cothread_t cothread) -{ - if(!sigsetjmp(co_running->context, 0)) - { - co_running = (cothread_struct*)cothread; - siglongjmp(co_running->context, 1); - } -} - -#ifdef __cplusplus -} -#endif diff --git a/libco/ucontext.c b/libco/ucontext.c deleted file mode 100644 index 3e21816..0000000 --- a/libco/ucontext.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - libco.ucontext (2008-01-28) - author: Nach - license: public domain -*/ - -/* - * WARNING: the overhead of POSIX ucontext is very high, - * assembly versions of libco or libco_sjlj should be much faster - * - * This library only exists for two reasons: - * 1 - as an initial test for the viability of a ucontext implementation - * 2 - to demonstrate the power and speed of libco over existing implementations, - * such as pth (which defaults to wrapping ucontext on unix targets) - * - * Use this library only as a *last resort* - */ - -#define LIBCO_C -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static thread_local ucontext_t co_primary; -static thread_local ucontext_t *co_running = 0; - -cothread_t co_active(void) -{ - if (!co_running) - co_running = &co_primary; - return (cothread_t)co_running; -} - -cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) -{ - if (!co_running) - co_running = &co_primary; - ucontext_t *thread = (ucontext_t*)malloc(sizeof(ucontext_t)); - - if(thread) - { - if((!getcontext(thread) && !(thread->uc_stack.ss_sp = 0)) && (thread->uc_stack.ss_sp = malloc(heapsize))) - { - thread->uc_link = co_running; - thread->uc_stack.ss_size = heapsize; - makecontext(thread, coentry, 0); - } - else - { - co_delete((cothread_t)thread); - thread = 0; - } - } - return (cothread_t)thread; -} - -void co_delete(cothread_t cothread) -{ - if (!cothread) - return; - - if(((ucontext_t*)cothread)->uc_stack.ss_sp) - free(((ucontext_t*)cothread)->uc_stack.ss_sp); - free(cothread); -} - -void co_switch(cothread_t cothread) -{ - ucontext_t *old_thread = co_running; - - co_running = (ucontext_t*)cothread; - swapcontext(old_thread, co_running); -} - -#ifdef __cplusplus -} -#endif diff --git a/libco/x86.c b/libco/x86.c deleted file mode 100644 index ae16766..0000000 --- a/libco/x86.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - libco.x86 (2009-10-12) - author: byuu - license: public domain -*/ - -#define LIBCO_C -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(_MSC_VER) - #define fastcall __fastcall -#elif defined(__GNUC__) - #define fastcall __attribute__((fastcall)) -#else - #error "libco: please define fastcall macro" -#endif - -static thread_local long co_active_buffer[64]; -static thread_local cothread_t co_active_handle = 0; -static void (fastcall *co_swap)(cothread_t, cothread_t) = 0; - -//ABI: fastcall -static unsigned char co_swap_function[] = { - 0x89, 0x22, /* mov [edx],esp */ - 0x8b, 0x21, /* mov esp,[ecx] */ - 0x58, /* pop eax */ - 0x89, 0x6a, 0x04, /* mov [edx+0x04],ebp */ - 0x89, 0x72, 0x08, /* mov [edx+0x08],esi */ - 0x89, 0x7a, 0x0c, /* mov [edx+0x0c],edi */ - 0x89, 0x5a, 0x10, /* mov [edx+0x10],ebx */ - 0x8b, 0x69, 0x04, /* mov ebp,[ecx+0x04] */ - 0x8b, 0x71, 0x08, /* mov esi,[ecx+0x08] */ - 0x8b, 0x79, 0x0c, /* mov edi,[ecx+0x0c] */ - 0x8b, 0x59, 0x10, /* mov ebx,[ecx+0x10] */ - 0xff, 0xe0, /* jmp eax */ -}; - -#ifdef _WIN32 -#include - -void co_init(void) -{ - DWORD old_privileges; - VirtualProtect(co_swap_function, - sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges); -} -#else -#include -#include - -void co_init(void) -{ - unsigned long addr = (unsigned long)co_swap_function; - unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE)); - unsigned long size = (addr - base) + sizeof co_swap_function; - mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); -} -#endif - -static void crash(void) -{ - assert(0); /* called only if cothread_t entrypoint returns */ -} - -cothread_t co_active(void) -{ - if(!co_active_handle) - co_active_handle = &co_active_buffer; - return co_active_handle; -} - -cothread_t co_create(unsigned int size, void (*entrypoint)(void)) -{ - cothread_t handle; - if(!co_swap) - { - co_init(); - co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function; - } - - if(!co_active_handle) - co_active_handle = &co_active_buffer; - - size += 256; /* allocate additional space for storage */ - size &= ~15; /* align stack to 16-byte boundary */ - - if((handle = (cothread_t)malloc(size))) - { - long *p = (long*)((char*)handle + size); /* seek to top of stack */ - *--p = (long)crash; /* crash if entrypoint returns */ - *--p = (long)entrypoint; /* start of function */ - *(long*)handle = (long)p; /* stack pointer */ - } - - return handle; -} - -void co_delete(cothread_t handle) -{ - free(handle); -} - -void co_switch(cothread_t handle) -{ - register cothread_t co_previous_handle = co_active_handle; - co_swap(co_active_handle = handle, co_previous_handle); -} - -#ifdef __cplusplus -} -#endif diff --git a/libretro.c b/libretro.c index 397cca2..165600e 100644 --- a/libretro.c +++ b/libretro.c @@ -4,8 +4,6 @@ #include #include #include "common.h" -#include "libco.h" -#include "retro_emu_thread.h" #include "libretro.h" #include "libretro_core_options.h" #include "memmap.h" @@ -76,10 +74,6 @@ static retro_environment_t environ_cb; struct retro_perf_callback perf_cb; -#if defined(USE_LIBCO) -static cothread_t main_thread; -static cothread_t cpu_thread; -#endif int dynarec_enable; int use_libretro_save_method = 0; @@ -95,58 +89,6 @@ static void (*video_post_process)(void) = NULL; static bool post_process_cc = false; static bool post_process_mix = false; -void switch_to_main_thread(void) -{ -#if defined(USE_LIBCO) - co_switch(main_thread); -#else - retro_switch_thread(); -#endif -} - -static inline void switch_to_cpu_thread(void) -{ -#if defined(USE_LIBCO) - co_switch(cpu_thread); -#else - retro_switch_thread(); -#endif -} - -#if defined(USE_LIBCO) -static void cpu_thread_entry(void) -{ -#ifdef HAVE_DYNAREC - if (dynarec_enable) - execute_arm_translate(execute_cycles); -#endif - execute_arm(execute_cycles); -} -#endif - -static inline void init_context_switch(void) -{ -#if defined(USE_LIBCO) - main_thread = co_active(); - cpu_thread = co_create(0x20000, cpu_thread_entry); -#else - if (!retro_init_emu_thread(dynarec_enable, execute_cycles)) - if (log_cb) - log_cb(RETRO_LOG_ERROR, "[gpSP]: Failed to initialize emulation thread!\n"); -#endif -} - -static inline void deinit_context_switch(void) -{ -#if defined(USE_LIBCO) - co_delete(cpu_thread); -#else - retro_cancel_emu_thread(); - retro_join_emu_thread(); - retro_deinit_emu_thread(); -#endif -} - #if defined(PSP) static uint32_t next_pow2(uint32_t v) { @@ -649,12 +591,8 @@ void retro_set_controller_port_device(unsigned port, unsigned device) {} void retro_reset(void) { - deinit_context_switch(); - update_backup(); reset_gba(); - - init_context_switch(); } size_t retro_serialize_size(void) @@ -930,8 +868,6 @@ bool retro_load_game(const struct retro_game_info* info) reset_gba(); - init_context_switch(); - set_memory_descriptors(); return true; @@ -945,7 +881,6 @@ bool retro_load_game_special(unsigned game_type, void retro_unload_game(void) { - deinit_context_switch(); update_backup(); } @@ -1019,20 +954,6 @@ void retro_run(void) { bool updated = false; -#if !defined(USE_LIBCO) - if (!retro_is_emu_thread_initialized()) - { - environ_cb(RETRO_ENVIRONMENT_SHUTDOWN, NULL); - return; - } - if (retro_emu_thread_exited()) - { - environ_cb(RETRO_ENVIRONMENT_SHUTDOWN, NULL); - retro_join_emu_thread(); - return; - } -#endif - update_input(); input_poll_cb(); @@ -1107,7 +1028,13 @@ void retro_run(void) update_audio_latency = false; } - switch_to_cpu_thread(); + /* This runs just a frame */ + #ifdef HAVE_DYNAREC + if (dynarec_enable) + execute_arm_translate(execute_cycles); + else + #endif + execute_arm(execute_cycles); render_audio(); video_run(); diff --git a/main.c b/main.c index 0f9bb88..a2f6d8e 100644 --- a/main.c +++ b/main.c @@ -132,6 +132,7 @@ u32 update_gba(void) cpu_ticks += execute_cycles; reg[CHANGED_PC_STATUS] = 0; + reg[COMPLETED_FRAME] = 0; if(gbc_sound_update) { @@ -227,14 +228,14 @@ u32 update_gba(void) oam_update_count = 0; flush_ram_count = 0; - switch_to_main_thread(); - update_gbc_sound(cpu_ticks); gbc_sound_update = 0; process_cheats(); vcount = 0; + // We completed a frame, tell the dynarec to exit to the main thread + reg[COMPLETED_FRAME] = 1; } if(vcount == (dispstat >> 8)) @@ -267,7 +268,7 @@ u32 update_gba(void) if(timer[i].count < execute_cycles) execute_cycles = timer[i].count; } - } while(reg[CPU_HALT_STATE] != CPU_ACTIVE); + } while(reg[CPU_HALT_STATE] != CPU_ACTIVE && !reg[COMPLETED_FRAME]); return execute_cycles; } diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 420f602..c89a5b1 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -51,7 +51,6 @@ .global memory_map_write .global reg -.extern reg .extern spsr # MIPS register layout: @@ -117,7 +116,8 @@ .equ CPU_MODE, (29 * 4) .equ CPU_HALT_STATE, (30 * 4) .equ CHANGED_PC_STATUS, (31 * 4) -.equ GP_SAVE, (32 * 4) +.equ COMPLETED_FRAME, (32 * 4) +.equ GP_SAVE, (33 * 4) .equ SUPERVISOR_LR, (reg_mode + (3 * (7 * 4)) + (6 * 4)) .equ SUPERVISOR_SPSR, (spsr + (3 * 4)) @@ -206,28 +206,52 @@ .balign 64 +# This gets called every time the cycle counter runs out +# (checked at every branch/jump) mips_update_gba: sw $4, REG_PC($16) # current PC = $4 - addiu $sp, $sp, -4 # make room on the stack - sw $ra,($sp) # save return address + sw $ra, REG_SAVE2($16) # save return addr collapse_flags # update cpsr save_registers # save registers jal update_gba # process the next event sw $0, CHANGED_PC_STATUS($16) - lw $ra, ($sp) # restore return address - addiu $sp, $sp, 4 # fix stack + lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame + bne $1, $0, return_to_main # Return to main thread now + + addu $17, $2, $0 # $17 = new cycle count (ret value) + + lw $ra, REG_SAVE2($16) # restore return address lw $1, CHANGED_PC_STATUS($16) bne $1, $0, lookup_pc - addu $17, $2, $0 # $17 = new cycle count (delay slot) + nop restore_registers jr $ra # if not, go back to caller nop + +# Loads the main context and returns to it. +# ARM regs must be saved before branching here +return_to_main: + lw $28, GP_SAVE($16) # Restore previous state + lw $s0, 0($sp) + lw $s1, 4($sp) + lw $s2, 8($sp) + lw $s3, 12($sp) + lw $s4, 16($sp) + lw $s5, 20($sp) + lw $s6, 24($sp) + lw $s7, 28($sp) + lw $fp, 32($sp) + lw $ra, 36($sp) + jr $ra # Return to main + add $sp, $sp, 48 # Restore stack pointer (delay slot) + + # Perform an indirect branch. # $4: GBA address to branch to @@ -2059,8 +2083,7 @@ execute_store_io_u8: region_check 4, patch_store_u8 andi $5, $5, 0xFF # make value 8bit andi $4, $4, 0x3FF # wrap around address - addiu $sp, $sp, -4 # make room on the stack for $ra - sw $ra, ($sp) + sw $ra, REG_SAVE3($16) # preserve $ra save_registers jal write_io_register8 # write the value out @@ -2160,8 +2183,7 @@ execute_store_io_u16: region_check 4, patch_store_u16 andi $5, $5, 0xFFFF # make value 16bit andi $4, $4, 0x3FE # wrap around/align address - addiu $sp, $sp, -4 # make room on the stack for $ra - sw $ra, ($sp) + sw $ra, REG_SAVE3($16) # preserve $ra save_registers jal write_io_register16 # write the value out @@ -2263,8 +2285,7 @@ execute_store_io_u32: region_check 4, patch_store_u32 nop andi $4, $4, 0x3FC # wrap around/align address - addiu $sp, $sp, -4 # make room on the stack for $ra - sw $ra, ($sp) + sw $ra, REG_SAVE3($16) # preserve $ra save_registers jal write_io_register32 # write the value out @@ -2482,6 +2503,10 @@ write_io_epilogue: alert_loop: jal update_gba # process the next event nop + + lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame + bne $1, $0, return_to_main # Return to main thread now + lw $1, CPU_HALT_STATE($16) # check if CPU is sleeping bne $1, $0, alert_loop # see if it hasn't changed nop @@ -2490,21 +2515,20 @@ alert_loop: lw $4, REG_PC($16) # $4 = new PC j lookup_pc - addiu $sp, $sp, 4 # fix the stack (delay slot) + nop irq_alert: restore_registers j lookup_pc # PC has changed, get a new one - addiu $sp, $sp, 4 # fix the stack + nop no_alert: restore_registers - lw $ra, ($sp) # restore return address + lw $ra, REG_SAVE3($16) # restore return jr $ra # we can return - addiu $sp, $sp, 4 # fix the stack + nop smc_dma: - addiu $sp, $sp, 4 # fix the stack jal flush_translation_cache_ram # flush translation cache nop j lookup_pc @@ -2740,16 +2764,32 @@ ror_zero_shift: # $4: cycle counter argument execute_arm_translate: - addu $17, $4, $0 # load cycle counter register + add $sp, $sp, -48 # Store the main thread context + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + sw $s4, 16($sp) + sw $s5, 20($sp) + sw $s6, 24($sp) + sw $s7, 28($sp) + sw $fp, 32($sp) + sw $ra, 36($sp) + + lui $16, %hi(reg) # load reg address into base reg + addiu $16, %lo(reg) + + sw $28, GP_SAVE($16) - lui $4, %hi(arm_reg) # load arm_reg address into $4 - addiu $4, %lo(arm_reg) - - jal move_reg # update reg to point to arm_reg - addu $16, $4, $0 # copy address of arm_reg into $16 + addu $17, $4, $0 # load cycle counter register extract_flags # load flag variables + # CPU might be sleeping, do not wake ip up! + lw $1, CPU_HALT_STATE($16) # check if CPU is sleeping + bne $1, $0, alert_loop # see if it hasn't changed + + lw $1, REG_CPSR($16) and $1, $1, 0x20 # see if Thumb bit is set in flags bne $1, $0, 1f @@ -2810,7 +2850,7 @@ memory_map_read: # This must be between memory_map_read and memory_map_write because it's used # to calculate their addresses elsewhere in this file. -arm_reg: +reg: .space 0x100 memory_map_write: diff --git a/retro_emu_thread.c b/retro_emu_thread.c deleted file mode 100644 index 2df7db7..0000000 --- a/retro_emu_thread.c +++ /dev/null @@ -1,175 +0,0 @@ -// This is copyrighted software. More information is at the end of this file. -#include "retro_emu_thread.h" - -#include - -static pthread_t main_thread; -static pthread_t emu_thread; -static pthread_mutex_t emu_mutex; -static pthread_mutex_t main_mutex; -static pthread_cond_t emu_cv; -static pthread_cond_t main_cv; -static bool emu_keep_waiting = true; -static bool main_keep_waiting = true; -static bool emu_has_exited = false; -static bool emu_thread_canceled = false; -static bool emu_thread_initialized = false; - -static void* retro_run_emulator(void *args) -{ - char *args_str = (char *)args; - bool dynarec = (*args_str++ == 1) ? true : false; - u32 cycles = strtol(args_str, NULL, 10); - - emu_has_exited = false; - emu_thread_canceled = false; - -#if defined(HAVE_DYNAREC) - if (dynarec) - execute_arm_translate(cycles); -#endif - execute_arm(cycles); - - emu_has_exited = true; - return NULL; -} - -static void retro_switch_to_emu_thread() -{ - pthread_mutex_lock(&emu_mutex); - emu_keep_waiting = false; - pthread_mutex_unlock(&emu_mutex); - pthread_mutex_lock(&main_mutex); - pthread_cond_signal(&emu_cv); - - main_keep_waiting = true; - while (main_keep_waiting) - { - pthread_cond_wait(&main_cv, &main_mutex); - } - pthread_mutex_unlock(&main_mutex); -} - -static void retro_switch_to_main_thread() -{ - pthread_mutex_lock(&main_mutex); - main_keep_waiting = false; - pthread_mutex_unlock(&main_mutex); - pthread_mutex_lock(&emu_mutex); - pthread_cond_signal(&main_cv); - - emu_keep_waiting = true; - while (emu_keep_waiting) - { - pthread_cond_wait(&emu_cv, &emu_mutex); - } - pthread_mutex_unlock(&emu_mutex); -} - -void retro_switch_thread() -{ - if (pthread_self() == main_thread) - retro_switch_to_emu_thread(); - else - retro_switch_to_main_thread(); -} - -bool retro_init_emu_thread(bool dynarec, u32 cycles) -{ - char args[256]; - args[0] = '\0'; - - if (emu_thread_initialized) - return true; - - /* Keep this very simple: - * - First character: dynarec, 0/1 - * - Remaining characters: cycles */ - snprintf(args, sizeof(args), " %u", cycles); - args[0] = dynarec ? 1 : 0; - - main_thread = pthread_self(); - if (pthread_mutex_init(&main_mutex, NULL)) - goto main_mutex_error; - if (pthread_mutex_init(&emu_mutex, NULL)) - goto emu_mutex_error; - if (pthread_cond_init(&main_cv, NULL)) - goto main_cv_error; - if (pthread_cond_init(&emu_cv, NULL)) - goto emu_cv_error; - if (pthread_create(&emu_thread, NULL, retro_run_emulator, args)) - goto emu_thread_error; - - emu_thread_initialized = true; - return true; - -emu_thread_error: - pthread_cond_destroy(&emu_cv); -emu_cv_error: - pthread_cond_destroy(&main_cv); -main_cv_error: - pthread_mutex_destroy(&emu_mutex); -emu_mutex_error: - pthread_mutex_destroy(&main_mutex); -main_mutex_error: - return false; -} - -void retro_deinit_emu_thread() -{ - if (!emu_thread_initialized) - return; - - pthread_mutex_destroy(&main_mutex); - pthread_mutex_destroy(&emu_mutex); - pthread_cond_destroy(&main_cv); - pthread_cond_destroy(&emu_cv); - emu_thread_initialized = false; -} - -bool retro_is_emu_thread_initialized() -{ - return emu_thread_initialized; -} - -void retro_join_emu_thread() -{ - static bool is_joined = false; - if (is_joined) - return; - - pthread_join(emu_thread, NULL); - is_joined = true; -} - -void retro_cancel_emu_thread() -{ - if (emu_thread_canceled) - return; - - pthread_cancel(emu_thread); - emu_thread_canceled = true; -} - -bool retro_emu_thread_exited() -{ - return emu_has_exited; -} - -/* - -Copyright (C) 2020 Nikos Chantziaras - -This program is free software: you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, either version 2 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see . - -*/ diff --git a/retro_emu_thread.h b/retro_emu_thread.h deleted file mode 100644 index 472ae8b..0000000 --- a/retro_emu_thread.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef EMU_THREAD_H -#define EMU_THREAD_H - -#include "common.h" - -/* gpSP doesn't have a top-level main loop that we can use, so instead we run it in its own thread - * and switch between it and the main thread. Calling this function will block the current thread - * and unblock the other. - * - * This function can be called from either the main or the emulation thread. - */ -void retro_switch_thread(void); - -/* Initialize the emulation thread and any related resources. - * - * Only call this function from the main thread. - */ -bool retro_init_emu_thread(bool dynarec, u32 cycles); - -/* Destroy the emulation thread and any related resources. Only call this after the emulation thread - * has finished (or canceled) and joined. - * - * Only call this function from the main thread. - */ -void retro_deinit_emu_thread(void); - -/* Returns true if the emulation thread was initialized successfully. - * - * This function can be called from either the main or the emulation thread. - */ -bool retro_is_emu_thread_initialized(void); - -/* Join the emulation thread. The thread must have exited naturally or been canceled. - * - * Only call this function from the main thread. - */ -void retro_join_emu_thread(void); - -/* Cancel the emulation thread. - * - * Only call this function from the main thread. - */ -void retro_cancel_emu_thread(void); - -/* Returns true if the emulation thread has exited naturally. - * - * This function can be called from either the main or the emulation thread. - */ -bool retro_emu_thread_exited(void); - -#endif diff --git a/x86/x86_stub.S b/x86/x86_stub.S index dd98f7a..ba997ba 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -95,6 +95,7 @@ .equ CPU_MODE, (29 * 4) .equ CPU_HALT_STATE, (30 * 4) .equ CHANGED_PC_STATUS, (31 * 4) +.equ COMPLETED_FRAME, (32 * 4) # destroys ecx and edx @@ -150,6 +151,11 @@ _x86_update_gba: call _update_gba # process the next event mov %eax, %edi # edi = new cycle count + + # did we just complete a frame? go back to main then + cmpl $0, COMPLETED_FRAME(%ebx) + jne return_to_main + # did the PC change? cmpl $1, CHANGED_PC_STATUS(%ebx) je lookup_pc @@ -197,6 +203,10 @@ write_epilogue: alert_loop: call _update_gba # process the next event + + # did we just complete a frame? go back to main then + cmpl $0, COMPLETED_FRAME(%ebx) + jne return_to_main # see if the halt status has changed mov CPU_HALT_STATE(%ebx), %edx @@ -509,24 +519,50 @@ lookup_pc_arm: # eax: cycle counter _execute_arm_translate: - movl (_reg), %ebx # load base register + # Save main context, since we need to return gracefully + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + + movl $_reg, %ebx # load base register extract_flags # load flag variables movl %eax, %edi # load edi cycle counter movl REG_PC(%ebx), %eax # load PC + # (if the CPU is halted, do not start executing but + # loop in the alert loop until it wakes up) + cmp $0, CPU_HALT_STATE(%ebx) + je 1f + call alert_loop # Need to push something to the stack + +1: testl $0x20, REG_CPSR(%ebx) - jnz 1f + jnz 2f call _block_lookup_address_arm jmp *%eax # jump to it -1: +2: call _block_lookup_address_thumb jmp *%eax +return_to_main: + add $4, %esp # remove current return addr + popl %ebp + popl %edi + popl %esi + popl %ebx + ret + +.data +.align 64 + +_reg: + .space 0x100, 0 + .comm _memory_map_read 0x8000 .comm _memory_map_write 0x8000 -.comm _reg 4 -- cgit v1.2.3 From 0522d9a4f535a61953da64518097fc7f64d7120c Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 9 Mar 2021 19:29:18 +0100 Subject: Add workaround for Android ARM builds While we are at it, use ARM mode for better performance. --- cpu_threaded.c | 6 ++++++ jni/Android.mk | 1 + 2 files changed, 7 insertions(+) diff --git a/cpu_threaded.c b/cpu_threaded.c index df563d2..fd8d44f 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -54,7 +54,13 @@ u8 *rom_translation_ptr = rom_translation_cache; u8 *ram_translation_ptr = ram_translation_cache; u8 *bios_translation_ptr = bios_translation_cache; #elif defined(ARM_MEMORY_DYNAREC) + +#ifdef __ANDROID__ +// Workaround for 'attempt to map x bytes at offset y' +__asm__(".section .jit,\"awx\",%progbits"); +#else __asm__(".section .jit,\"awx\",%nobits"); +#endif u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE] __attribute__ ((aligned(4),section(".jit"))); diff --git a/jni/Android.mk b/jni/Android.mk index dc86e69..92f8929 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -36,4 +36,5 @@ LOCAL_SRC_FILES := $(SOURCES_C) $(SOURCES_ASM) LOCAL_CFLAGS := $(COREFLAGS) $(INCFLAGS) LOCAL_LDFLAGS := -Wl,-version-script=$(CORE_DIR)/link.T LOCAL_LDLIBS := $(CORE_LDLIBS) +LOCAL_ARM_MODE := arm include $(BUILD_SHARED_LIBRARY) -- cgit v1.2.3