aboutsummaryrefslogtreecommitdiff
path: root/plugins/gpu_unai
diff options
context:
space:
mode:
authornotaz2011-08-12 01:17:36 +0300
committernotaz2011-08-13 00:56:40 +0300
commit1db5bc1ed7f992d7c2686f5359e53301eba2b2ca (patch)
tree951af157c0c23d247590df443ac514181bf9a635 /plugins/gpu_unai
parent53636f1583b53672d6908eff1aeff7213ee11e34 (diff)
downloadpcsx_rearmed-1db5bc1ed7f992d7c2686f5359e53301eba2b2ca.tar.gz
pcsx_rearmed-1db5bc1ed7f992d7c2686f5359e53301eba2b2ca.tar.bz2
pcsx_rearmed-1db5bc1ed7f992d7c2686f5359e53301eba2b2ca.zip
gpu_unai: fix inline asm statements
newer gcc reorders them and was allocating registers wrong due to incomplete constraints. Not that this code is any better than what gcc generates anyway, but whatever.
Diffstat (limited to 'plugins/gpu_unai')
-rw-r--r--plugins/gpu_unai/Makefile2
-rw-r--r--plugins/gpu_unai/gpu_inner_blend.h123
-rw-r--r--plugins/gpu_unai/gpu_inner_light.h12
3 files changed, 75 insertions, 62 deletions
diff --git a/plugins/gpu_unai/Makefile b/plugins/gpu_unai/Makefile
index e7640c6..026d950 100644
--- a/plugins/gpu_unai/Makefile
+++ b/plugins/gpu_unai/Makefile
@@ -8,8 +8,6 @@ ifdef MAEMO
CFLAGS += -DMAEMO
endif
CFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon -mfloat-abi=softfp
-# -fschedule-insns (from -O2+) causes bugs, probably bad asm() statements
-CFLAGS += -fno-schedule-insns -fno-schedule-insns2
TARGET = gpuPCSX4ALL.so
LDFLAGS += -shared -Wl,-soname,$(TARGET)
diff --git a/plugins/gpu_unai/gpu_inner_blend.h b/plugins/gpu_unai/gpu_inner_blend.h
index 20977ea..ce439d3 100644
--- a/plugins/gpu_unai/gpu_inner_blend.h
+++ b/plugins/gpu_unai/gpu_inner_blend.h
@@ -26,10 +26,11 @@
#ifdef __arm__
#define gpuBlending00(uSrc,uDst) \
{ \
- asm ("and %[src], %[src], %[msk] " : [src] "=r" (uSrc) : "0" (uSrc), [msk] "r" (uMsk) ); \
- asm ("and %[dst], %[dst], %[msk] " : [dst] "=r" (uDst) : "0" (uDst), [msk] "r" (uMsk) ); \
- asm ("add %[src], %[dst], %[src] " : [src] "=r" (uSrc) : [dst] "r" (uDst), "0" (uSrc) ); \
- asm ("mov %[src], %[src], lsr #1 " : [src] "=r" (uSrc) : "0" (uSrc) ); \
+ asm ("and %[src], %[src], %[msk]\n" \
+ "and %[dst], %[dst], %[msk]\n" \
+ "add %[src], %[dst], %[src]\n" \
+ "mov %[src], %[src], lsr #1\n" \
+ : [src] "=&r" (uSrc), [dst] "=&r" (uDst) : "0" (uSrc), "1" (uDst), [msk] "r" (uMsk)); \
}
#else
#define gpuBlending00(uSrc,uDst) \
@@ -42,24 +43,26 @@
#ifdef __arm__
#define gpuBlending01(uSrc,uDst) \
{ \
- u16 st,dt,out; \
- asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x7C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("add %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) ); \
- asm ("cmp %[out], #0x7C00 " : : [out] "r" (out) : "cc" ); \
- asm ("movhi %[out], #0x7C00 " : [out] "=r" (out) : "0" (out) ); \
- asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x03E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \
- asm ("cmp %[dt], #0x03E0 " : : [dt] "r" (dt) : "cc" ); \
- asm ("movhi %[dt], #0x03E0 " : [dt] "=r" (dt) : "0" (dt) ); \
- asm ("orr %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \
- asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x001F " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \
- asm ("cmp %[dt], #0x001F " : : [dt] "r" (dt) : "cc" ); \
- asm ("movhi %[dt], #0x001F " : [dt] "=r" (dt) : "0" (dt) ); \
- asm ("orr %[uSrc], %[out], %[dt] " : [uSrc] "=r" (uSrc) : [out] "r" (out), [dt] "r" (dt) ); \
+ u32 st,dt,out; \
+ asm ("and %[dt], %[dst], #0x7C00\n" \
+ "and %[st], %[src], #0x7C00\n" \
+ "add %[out], %[dt], %[st] \n" \
+ "cmp %[out], #0x7C00 \n" \
+ "movhi %[out], #0x7C00 \n" \
+ "and %[dt], %[dst], #0x03E0\n" \
+ "and %[st], %[src], #0x03E0\n" \
+ "add %[dt], %[dt], %[st] \n" \
+ "cmp %[dt], #0x03E0 \n" \
+ "movhi %[dt], #0x03E0 \n" \
+ "orr %[out], %[out], %[dt] \n" \
+ "and %[dt], %[dst], #0x001F\n" \
+ "and %[st], %[src], #0x001F\n" \
+ "add %[dt], %[dt], %[st] \n" \
+ "cmp %[dt], #0x001F \n" \
+ "movhi %[dt], #0x001F \n" \
+ "orr %[src], %[out], %[dt] \n" \
+ : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \
+ : [dst] "r" (uDst), "0" (uSrc) : "cc"); \
}
#else
#define gpuBlending01(uSrc,uDst) \
@@ -76,20 +79,28 @@
#ifdef __arm__
#define gpuBlending02(uSrc,uDst) \
{ \
- u16 st,dt,out; \
- asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x7C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("subs %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) : "cc" ); \
- asm ("movmi %[out], #0x0000 " : [out] "=r" (out) : "0" (out) ); \
- asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x03E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("subs %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) : "cc" ); \
- asm ("orrpl %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \
- asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x001F " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("subs %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) : "cc" ); \
- asm ("orrpl %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \
- asm ("mov %[uSrc], %[out]" : [uSrc] "=r" (uSrc) : [out] "r" (out) ); \
+ u32 st,dt,out; \
+ asm ("and %[dt], %[dst], #0x7C00\n" \
+ "and %[st], %[src], #0x7C00\n" \
+ "subs %[out], %[dt], %[st] \n" \
+ "movmi %[out], #0x0000 \n" \
+ "and %[dt], %[dst], #0x03E0\n" \
+ "and %[st], %[src], #0x03E0\n" \
+ "subs %[dt], %[dt], %[st] \n" \
+ "orrpl %[out], %[out], %[dt] \n" \
+ "and %[dt], %[dst], #0x001F\n" \
+ "and %[st], %[src], #0x001F\n" \
+ "subs %[dt], %[dt], %[st] \n" \
+ "orrpl %[out], %[out], %[dt] \n" \
+ "mov %[src], %[out] \n" \
+ : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \
+ : [dst] "r" (uDst), "0" (uSrc) : "cc"); \
+}
+
+int btest(int s, int d)
+{
+ gpuBlending02(s, d);
+ return s;
}
#else
#define gpuBlending02(uSrc,uDst) \
@@ -106,25 +117,27 @@
#ifdef __arm__
#define gpuBlending03(uSrc,uDst) \
{ \
- u16 st,dt,out; \
- asm ("mov %[src], %[src], lsr #2 " : [src] "=r" (uSrc) : "0" (uSrc) ); \
- asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x1C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("add %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) ); \
- asm ("cmp %[out], #0x7C00 " : : [out] "r" (out) : "cc" ); \
- asm ("movhi %[out], #0x7C00 " : [out] "=r" (out) : "0" (out) ); \
- asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x00E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \
- asm ("cmp %[dt], #0x03E0 " : : [dt] "r" (dt) : "cc" ); \
- asm ("movhi %[dt], #0x03E0 " : [dt] "=r" (dt) : "0" (dt) ); \
- asm ("orr %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \
- asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
- asm ("and %[st], %[src], #0x0007 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
- asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \
- asm ("cmp %[dt], #0x001F " : : [dt] "r" (dt) : "cc" ); \
- asm ("movhi %[dt], #0x001F " : [dt] "=r" (dt) : "0" (dt) ); \
- asm ("orr %[uSrc], %[out], %[dt] " : [uSrc] "=r" (uSrc) : [out] "r" (out), [dt] "r" (dt) ); \
+ u32 st,dt,out; \
+ asm ("mov %[src], %[src], lsr #2 \n" \
+ "and %[dt], %[dst], #0x7C00\n" \
+ "and %[st], %[src], #0x1C00\n" \
+ "add %[out], %[dt], %[st] \n" \
+ "cmp %[out], #0x7C00 \n" \
+ "movhi %[out], #0x7C00 \n" \
+ "and %[dt], %[dst], #0x03E0\n" \
+ "and %[st], %[src], #0x00E0\n" \
+ "add %[dt], %[dt], %[st] \n" \
+ "cmp %[dt], #0x03E0 \n" \
+ "movhi %[dt], #0x03E0 \n" \
+ "orr %[out], %[out], %[dt] \n" \
+ "and %[dt], %[dst], #0x001F\n" \
+ "and %[st], %[src], #0x0007\n" \
+ "add %[dt], %[dt], %[st] \n" \
+ "cmp %[dt], #0x001F \n" \
+ "movhi %[dt], #0x001F \n" \
+ "orr %[src], %[out], %[dt] \n" \
+ : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \
+ : [dst] "r" (uDst), "0" (uSrc) : "cc"); \
}
#else
#define gpuBlending03(uSrc,uDst) \
diff --git a/plugins/gpu_unai/gpu_inner_light.h b/plugins/gpu_unai/gpu_inner_light.h
index b9e48bc..d291418 100644
--- a/plugins/gpu_unai/gpu_inner_light.h
+++ b/plugins/gpu_unai/gpu_inner_light.h
@@ -27,11 +27,13 @@
#define gpuLightingRGB(uSrc,lCol) \
{ \
u32 cb,cg; \
- asm ("and %[cb], %[lCol], #0x7C00/32 " : [cb] "=r" (cb) : [lCol] "r" (lCol) ); \
- asm ("and %[cg], %[lCol], #0x03E0*2048 " : [cg] "=r" (cg) : [lCol] "r" (lCol) ); \
- asm ("mov %[res], %[lCol], lsr #27 " : [res] "=r" (uSrc) : [lCol] "r" (lCol) ); \
- asm ("orr %[res], %[res], %[cb], lsl #5 " : [res] "=r" (uSrc) : "0" (uSrc), [cb] "r" (cb) ); \
- asm ("orr %[res], %[res], %[cg], lsr #11 " : [res] "=r" (uSrc) : "0" (uSrc), [cg] "r" (cg) ); \
+ asm ("and %[cb], %[lCol], #0x7C00/32 \n" \
+ "and %[cg], %[lCol], #0x03E0*2048 \n" \
+ "mov %[res], %[lCol], lsr #27\n" \
+ "orr %[res], %[res], %[cb], lsl #5 \n" \
+ "orr %[res], %[res], %[cg], lsr #11\n" \
+ : [res] "=&r" (uSrc), [cb] "=&r" (cb), [cg] "=&r" (cg) \
+ : [lCol] "r" (lCol)); \
}
#else
#define gpuLightingRGB(uSrc,lCol) uSrc=((lCol<<5)&0x7C00) | ((lCol>>11)&0x3E0) | (lCol>>27)