aboutsummaryrefslogtreecommitdiff
path: root/plugins/gpu_unai
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/gpu_unai')
-rw-r--r--plugins/gpu_unai/Makefile2
-rw-r--r--plugins/gpu_unai/gpu_inner_blend.h123
-rw-r--r--plugins/gpu_unai/gpu_inner_light.h12
3 files changed, 75 insertions, 62 deletions
diff --git a/plugins/gpu_unai/Makefile b/plugins/gpu_unai/Makefile
index e7640c6..026d950 100644
--- a/plugins/gpu_unai/Makefile
+++ b/plugins/gpu_unai/Makefile
@@ -8,8 +8,6 @@ ifdef MAEMO
CFLAGS += -DMAEMO
endif
CFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon -mfloat-abi=softfp
-# -fschedule-insns (from -O2+) causes bugs, probably bad asm() statements
-CFLAGS += -fno-schedule-insns -fno-schedule-insns2
TARGET = gpuPCSX4ALL.so
LDFLAGS += -shared -Wl,-soname,$(TARGET)
diff --git a/plugins/gpu_unai/gpu_inner_blend.h b/plugins/gpu_unai/gpu_inner_blend.h
index 20977ea..ce439d3 100644
--- a/plugins/gpu_unai/gpu_inner_blend.h
+++ b/plugins/gpu_unai/gpu_inner_blend.h
@@ -26,10 +26,11 @@
#ifdef __arm__
#define gpuBlending00(uSrc,uDst) \
{ \
- asm ("and %[src], %[src], %[msk] " : [src] "=r" (uSrc) : "0" (uSrc), [msk] "r" (uMsk) ); \
- asm ("and %[dst], %[dst], %[msk] " : [dst] "=r" (uDst) : "0" (uDst), [msk] "r" (uMsk) ); \
- asm ("add %[src], %[dst], %[src] " : [src] "=r" (uSrc) : [dst] "r" (uDst), "0" (uSrc) ); \
- asm ("mov %[src], %[src], lsr #1 " : [src] "=r" (uSrc) : "0" (uSrc) ); \
+ asm ("and %[src], %[src], %[msk]\n" \
+ "and %[dst], %[dst], %[msk]\n" \
+ "add %[src], %[dst], %[src]\n" \
+ "mov %[src], %[src], lsr #1\n" \
+ : [src] "=&r" (uSrc), [dst] "=&r" (uDst) : "0" (uSrc), "1" (uDst), [msk] "r" (uMsk)); \
}
#else
#define gpuBlending00(uSrc,uDst) \
@@ -42,24 +43,26 @@
#ifdef __arm__
#define gpuBlending01(uSrc,uDst) \
{ \
- u16 st,dt,out; \
- asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x7C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("add %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) ); \
- asm ("cmp %[out], #0x7C00 " : : [out] "r" (out) : "cc" ); \
- asm ("movhi %[out], #0x7C00 " : [out] "=r" (out) : "0" (out) ); \
- asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x03E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \
- asm ("cmp %[dt], #0x03E0 " : : [dt] "r" (dt) : "cc" ); \
- asm ("movhi %[dt], #0x03E0 " : [dt] "=r" (dt) : "0" (dt) ); \
- asm ("orr %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \
- asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x001F " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \
- asm ("cmp %[dt], #0x001F " : : [dt] "r" (dt) : "cc" ); \
- asm ("movhi %[dt], #0x001F " : [dt] "=r" (dt) : "0" (dt) ); \
- asm ("orr %[uSrc], %[out], %[dt] " : [uSrc] "=r" (uSrc) : [out] "r" (out), [dt] "r" (dt) ); \
+ u32 st,dt,out; \
+ asm ("and %[dt], %[dst], #0x7C00\n" \
+ "and %[st], %[src], #0x7C00\n" \
+ "add %[out], %[dt], %[st] \n" \
+ "cmp %[out], #0x7C00 \n" \
+ "movhi %[out], #0x7C00 \n" \
+ "and %[dt], %[dst], #0x03E0\n" \
+ "and %[st], %[src], #0x03E0\n" \
+ "add %[dt], %[dt], %[st] \n" \
+ "cmp %[dt], #0x03E0 \n" \
+ "movhi %[dt], #0x03E0 \n" \
+ "orr %[out], %[out], %[dt] \n" \
+ "and %[dt], %[dst], #0x001F\n" \
+ "and %[st], %[src], #0x001F\n" \
+ "add %[dt], %[dt], %[st] \n" \
+ "cmp %[dt], #0x001F \n" \
+ "movhi %[dt], #0x001F \n" \
+ "orr %[src], %[out], %[dt] \n" \
+ : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \
+ : [dst] "r" (uDst), "0" (uSrc) : "cc"); \
}
#else
#define gpuBlending01(uSrc,uDst) \
@@ -76,20 +79,28 @@
#ifdef __arm__
#define gpuBlending02(uSrc,uDst) \
{ \
- u16 st,dt,out; \
- asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x7C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("subs %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) : "cc" ); \
- asm ("movmi %[out], #0x0000 " : [out] "=r" (out) : "0" (out) ); \
- asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x03E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("subs %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) : "cc" ); \
- asm ("orrpl %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \
- asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x001F " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("subs %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) : "cc" ); \
- asm ("orrpl %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \
- asm ("mov %[uSrc], %[out]" : [uSrc] "=r" (uSrc) : [out] "r" (out) ); \
+ u32 st,dt,out; \
+ asm ("and %[dt], %[dst], #0x7C00\n" \
+ "and %[st], %[src], #0x7C00\n" \
+ "subs %[out], %[dt], %[st] \n" \
+ "movmi %[out], #0x0000 \n" \
+ "and %[dt], %[dst], #0x03E0\n" \
+ "and %[st], %[src], #0x03E0\n" \
+ "subs %[dt], %[dt], %[st] \n" \
+ "orrpl %[out], %[out], %[dt] \n" \
+ "and %[dt], %[dst], #0x001F\n" \
+ "and %[st], %[src], #0x001F\n" \
+ "subs %[dt], %[dt], %[st] \n" \
+ "orrpl %[out], %[out], %[dt] \n" \
+ "mov %[src], %[out] \n" \
+ : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \
+ : [dst] "r" (uDst), "0" (uSrc) : "cc"); \
+}
+
+int btest(int s, int d)
+{
+ gpuBlending02(s, d);
+ return s;
}
#else
#define gpuBlending02(uSrc,uDst) \
@@ -106,25 +117,27 @@
#ifdef __arm__
#define gpuBlending03(uSrc,uDst) \
{ \
- u16 st,dt,out; \
- asm ("mov %[src], %[src], lsr #2 " : [src] "=r" (uSrc) : "0" (uSrc) ); \
- asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x1C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("add %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) ); \
- asm ("cmp %[out], #0x7C00 " : : [out] "r" (out) : "cc" ); \
- asm ("movhi %[out], #0x7C00 " : [out] "=r" (out) : "0" (out) ); \
- asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x00E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \
- asm ("cmp %[dt], #0x03E0 " : : [dt] "r" (dt) : "cc" ); \
- asm ("movhi %[dt], #0x03E0 " : [dt] "=r" (dt) : "0" (dt) ); \
- asm ("orr %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \
- asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x0007 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \
- asm ("cmp %[dt], #0x001F " : : [dt] "r" (dt) : "cc" ); \
- asm ("movhi %[dt], #0x001F " : [dt] "=r" (dt) : "0" (dt) ); \
- asm ("orr %[uSrc], %[out], %[dt] " : [uSrc] "=r" (uSrc) : [out] "r" (out), [dt] "r" (dt) ); \
+ u32 st,dt,out; \
+ asm ("mov %[src], %[src], lsr #2 \n" \
+ "and %[dt], %[dst], #0x7C00\n" \
+ "and %[st], %[src], #0x1C00\n" \
+ "add %[out], %[dt], %[st] \n" \
+ "cmp %[out], #0x7C00 \n" \
+ "movhi %[out], #0x7C00 \n" \
+ "and %[dt], %[dst], #0x03E0\n" \
+ "and %[st], %[src], #0x00E0\n" \
+ "add %[dt], %[dt], %[st] \n" \
+ "cmp %[dt], #0x03E0 \n" \
+ "movhi %[dt], #0x03E0 \n" \
+ "orr %[out], %[out], %[dt] \n" \
+ "and %[dt], %[dst], #0x001F\n" \
+ "and %[st], %[src], #0x0007\n" \
+ "add %[dt], %[dt], %[st] \n" \
+ "cmp %[dt], #0x001F \n" \
+ "movhi %[dt], #0x001F \n" \
+ "orr %[src], %[out], %[dt] \n" \
+ : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \
+ : [dst] "r" (uDst), "0" (uSrc) : "cc"); \
}
#else
#define gpuBlending03(uSrc,uDst) \
diff --git a/plugins/gpu_unai/gpu_inner_light.h b/plugins/gpu_unai/gpu_inner_light.h
index b9e48bc..d291418 100644
--- a/plugins/gpu_unai/gpu_inner_light.h
+++ b/plugins/gpu_unai/gpu_inner_light.h
@@ -27,11 +27,13 @@
#define gpuLightingRGB(uSrc,lCol) \
{ \
u32 cb,cg; \
- asm ("and %[cb], %[lCol], #0x7C00/32 " : [cb] "=r" (cb) : [lCol] "r" (lCol) ); \
- asm ("and %[cg], %[lCol], #0x03E0*2048 " : [cg] "=r" (cg) : [lCol] "r" (lCol) ); \
- asm ("mov %[res], %[lCol], lsr #27 " : [res] "=r" (uSrc) : [lCol] "r" (lCol) ); \
- asm ("orr %[res], %[res], %[cb], lsl #5 " : [res] "=r" (uSrc) : "0" (uSrc), [cb] "r" (cb) ); \
- asm ("orr %[res], %[res], %[cg], lsr #11 " : [res] "=r" (uSrc) : "0" (uSrc), [cg] "r" (cg) ); \
+ asm ("and %[cb], %[lCol], #0x7C00/32 \n" \
+ "and %[cg], %[lCol], #0x03E0*2048 \n" \
+ "mov %[res], %[lCol], lsr #27\n" \
+ "orr %[res], %[res], %[cb], lsl #5 \n" \
+ "orr %[res], %[res], %[cg], lsr #11\n" \
+ : [res] "=&r" (uSrc), [cb] "=&r" (cb), [cg] "=&r" (cg) \
+ : [lCol] "r" (lCol)); \
}
#else
#define gpuLightingRGB(uSrc,lCol) uSrc=((lCol<<5)&0x7C00) | ((lCol>>11)&0x3E0) | (lCol>>27)