diff --git a/Makefile b/Makefile index ecefd12..91fc859 100644 --- a/Makefile +++ b/Makefile @@ -451,6 +451,20 @@ else ifeq ($(platform), rs90) HAVE_DYNAREC := 1 CPU_ARCH := mips +else ifeq ($(platform), trimui) + TARGET := $(TARGET_NAME)_libretro.so + CC = $(CROSS_COMPILE)gcc + SHARED := -shared -Wl,--version-script=link.T -mcpu=arm926ej-s -mtune=arm926ej-s + CFLAGS += -ffast-math -fdata-sections -ffunction-sections -flto -fno-PIC -DOVERCLOCK_60FPS + LDFLAGS += -flto + ifeq (,$(DEBUG)) + LDFLAGS += -s + endif + HAVE_DYNAREC := 1 + CPU_ARCH_ARM_BLENDING_OPTS := 1 + CPU_ARCH := arm + OPTIMIZE := -Ofast -DNDEBUG + # Windows else TARGET := $(TARGET_NAME)_libretro.dll @@ -477,7 +491,14 @@ endif ifeq ($(DEBUG), 1) OPTIMIZE := -O0 -g else - OPTIMIZE := -O3 -DNDEBUG + OPTIMIZE ?= -O3 -DNDEBUG +endif + +ifeq ($(PROFILE), GENERATE) + CFLAGS += -fprofile-generate=./profile/gpsp + LDFLAGS += -lgcov +else ifeq ($(PROFILE), APPLY) + CFLAGS += -fprofile-use -fprofile-dir=../profile/gpsp -fbranch-probabilities endif DEFINES := -DHAVE_STRINGS_H -DHAVE_STDINT_H -DHAVE_INTTYPES_H -D__LIBRETRO__ -DINLINE=inline -Wall diff --git a/arm/arm_stub.S b/arm/arm_stub.S index cb68726..6593cf2 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -79,8 +79,26 @@ _##symbol: #define IOREG_OFF 0x8D00 -#define extract_u16(rd, rs) \ +#if __ARM_ARCH >= 6 +#define extract_u16(rd, rs) ;\ uxth rd, rs +#else +#define extract_u16(rd, rs) ;\ + bic rd, rs, #0xff000000 ;\ + bic rd, rd, #0x00ff0000 +#endif + +#if __ARM_ARCH >= 6 +#define sat_u4(rd, rs, shift) ;\ + usat rd, #4, rs, shift +#else +#define sat_u4(rd, rs, shift) ;\ + mov rd, rs, shift ;\ + bic rd, rd, rd, asr #31 ;\ + sub rd, rd, #15 ;\ + and rd, rd, rd, asr #31 ;\ + add rd, rd, #15 +#endif @ Will load the register set from memory into the appropriate cached registers. @ See arm_emit.h for listing explanation. @@ -538,7 +556,7 @@ return_to_main: #define execute_store_builder(store_type, str_op, str_op16, load_op, tnum) ;\ ;\ defsymbl(execute_store_u##store_type) ;\ - usat r2, #4, r0, asr #24 /* r2 contains [0-15] */;\ + sat_u4(r2, r0, asr #24) /* r2 contains [0-15] */;\ add r2, r2, #((STORE_TBL_OFF + 16*4*tnum) >> 2) /* add table offset */;\ ldr pc, [reg_base, r2, lsl #2] /* load handler addr */;\ nop ;\ @@ -627,7 +645,7 @@ execute_store_builder(32, str, str, ldr, 2) @ This is a store that is executed in a strm case (so no SMC checks in-between) defsymbl(execute_store_u32_safe) - usat r2, #4, r0, asr #24 + sat_u4(r2, r0, asr #24) add r2, r2, #((STORE_TBL_OFF + 16*4*3) >> 2) ldr pc, [reg_base, r2, lsl #2] nop @@ -772,9 +790,9 @@ lookup_pc_arm: defsymbl(execute_load_##load_type) ;\ .if albits >= 1 ;\ ror r1, r0, #(albits) /* move alignment bits to MSB */;\ - usat r1, #4, r1, asr #(24-albits) /* r1 contains [0-15] */;\ + sat_u4(r1, r1, asr #(24-albits)) /* r1 contains [0-15] */;\ .else ;\ - usat r1, #4, r0, asr #24 /* r1 contains [0-15] */;\ + sat_u4(r1, r0, asr #24) /* r1 contains [0-15] */;\ .endif ;\ add r1, r1, #((STORE_TBL_OFF + 16*4*tnum) >> 2) /* add table offset */;\ ldr pc, [reg_base, r1, lsl #2] /* load handler addr */;\