aboutsummaryrefslogtreecommitdiff
path: root/plugins/gpu_unai
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/gpu_unai')
-rw-r--r--plugins/gpu_unai/Makefile5
-rw-r--r--plugins/gpu_unai/README_senquack.txt956
-rw-r--r--plugins/gpu_unai/gpu.cpp1061
-rw-r--r--plugins/gpu_unai/gpu.h99
-rw-r--r--plugins/gpu_unai/gpu_blit.h24
-rw-r--r--plugins/gpu_unai/gpu_command.h667
-rw-r--r--plugins/gpu_unai/gpu_fixedpoint.h107
-rw-r--r--plugins/gpu_unai/gpu_inner.h914
-rw-r--r--plugins/gpu_unai/gpu_inner_blend.h268
-rw-r--r--plugins/gpu_unai/gpu_inner_blend_arm5.h100
-rw-r--r--plugins/gpu_unai/gpu_inner_blend_arm7.h107
-rw-r--r--plugins/gpu_unai/gpu_inner_light.h293
-rw-r--r--plugins/gpu_unai/gpu_inner_quantization.h108
-rw-r--r--plugins/gpu_unai/gpu_raster_image.h98
-rw-r--r--plugins/gpu_unai/gpu_raster_line.h874
-rw-r--r--plugins/gpu_unai/gpu_raster_polygon.h1997
-rw-r--r--plugins/gpu_unai/gpu_raster_sprite.h219
-rw-r--r--plugins/gpu_unai/gpu_unai.h318
-rw-r--r--plugins/gpu_unai/gpulib_if.cpp708
19 files changed, 6158 insertions, 2765 deletions
diff --git a/plugins/gpu_unai/Makefile b/plugins/gpu_unai/Makefile
index 1075ee5..756d19a 100644
--- a/plugins/gpu_unai/Makefile
+++ b/plugins/gpu_unai/Makefile
@@ -1,6 +1,9 @@
CFLAGS += -ggdb -Wall -O3 -ffast-math
CFLAGS += -DREARMED
CFLAGS += -I../../include
+#CFLAGS += -DINLINE="static __inline__"
+#CFLAGS += -Dasm="__asm__ __volatile__"
+CFLAGS += -DUSE_GPULIB=1
include ../../config.mak
@@ -8,7 +11,7 @@ SRC_STANDALONE += gpu.cpp
SRC_GPULIB += gpulib_if.cpp
ifeq "$(ARCH)" "arm"
-SRC += gpu_arm.s
+SRC += gpu_arm.S
endif
#BIN_STANDALONE = gpuPCSX4ALL.so
diff --git a/plugins/gpu_unai/README_senquack.txt b/plugins/gpu_unai/README_senquack.txt
new file mode 100644
index 0000000..cda17fc
--- /dev/null
+++ b/plugins/gpu_unai/README_senquack.txt
@@ -0,0 +1,956 @@
+//NOTE: You can find the set of original Unai poly routines (disabled now)
+// at the bottom end of this file.
+
+//senquack - Original Unai GPU poly routines have been replaced with new
+// ones based on DrHell routines. The original routines suffered from
+// shifted rows, causing many quads to have their first triangle drawn
+// correctly, but the second triangle would randomly have pixels shifted
+// either left or right or entire rows not drawn at all. Furthermore,
+// some times entire triangles seemed to be either missing or only
+// partially drawn (most clearly seen in sky/road textures in NFS3,
+// clock tower in beginning of Castlevania SOTN). Pixel gaps were
+// prevalent.
+//
+// Since DrHell GPU didn't seem to exhibit these artifacts at all, I adapted
+// its routines to GPU Unai (Unai was probably already originally based on it).
+// DrHell uses 22.10 fixed point instead of Unai's 16.16, so gpu_fixedpoint.h
+// required modification as well as gpu_inner.h (where gpuPolySpanFn driver
+// functions are).
+//
+// Originally, I tried to patch up original Unai routines and got as far
+// as fixing the shifted rows, but still had other problem of triangles rendered
+// wrong (black triangular gaps in NFS3 sky, clock tower in Castlevania SOTN).
+// I eventually gave up. Even after rewriting/adapting the routines,
+// however, I still had some random pixel droupouts, specifically in
+// NFS3 sky texture. I discovered that gpu_inner.h gpuPolySpanFn function
+// was taking optimizations to an extreme and packing u/v texture coords
+// into one 32-bit word, reducing their accuracy. Only once they were
+// handled in full-accuracy individual words was that problem fixed.
+//
+// NOTE: I also added support for doing divisions using the FPU, either
+// with normal division or multiplication-by-reciprocal.
+// To use float division, GPU_UNAI_USE_FLOATMATH should be defined.
+// To use float mult-by-reciprocal, GPU_UNAI_USE_FLOAT_DIV_MULTINV
+// can be specified (GPU_UNAI_USE_FLOATMATH must also be specified)
+// To use inaccurate fixed-point mult-by-reciprocal, define
+// GPU_UNAI_USE_INT_DIV_MULTINV. This is the default on older
+// ARM devices like Wiz/Caanoo that have neither integer division
+// in hardware or an FPU. It results in some pixel dropouts,
+// texture glitches, but less than the original GPU UNAI code.
+//
+// If nothing is specified, integer division will be used.
+//
+// NOTE 2: Even with MIPS32R2 having FPU recip.s instruction, and it is
+// used when this platform is detected, I found it not to give any
+// noticeable speedup over normal float division (in fact seemed a tiny
+// tiny bit slower). I also found float division to not provide any
+// noticeable speedups versus integer division on MISP32R2 platform.
+// Granted, the differences were all around .5 FPS or less.
+//
+// TODO:
+// * See if anything can be done about remaining pixel gaps in Gran
+// Turismo car models, track.
+// * Find better way of passing parameters to gpuPolySpanFn functions than
+// through original Unai method of using global variables u4,v4,du4 etc.
+// * Come up with some newer way of drawing rows of pixels than by calling
+// gpuPolySpanFn through function pointer. For every row, at least on
+// MIPS platforms, many registers are having to be pushed/popped from stack
+// on each call, which is strange since MIPS has so many registers.
+// * MIPS MXU/ASM optimized gpuPolySpanFn ?
+
+//////////////////////////////////////////////////////////////////////////
+//senquack - Disabled original Unai poly routines left here for reference:
+// ( from gpu_raster_polygon.h )
+//////////////////////////////////////////////////////////////////////////
+#define GPU_TESTRANGE3() \
+{ \
+ if(x0<0) { if((x1-x0)>CHKMAX_X) return; if((x2-x0)>CHKMAX_X) return; } \
+ if(x1<0) { if((x0-x1)>CHKMAX_X) return; if((x2-x1)>CHKMAX_X) return; } \
+ if(x2<0) { if((x0-x2)>CHKMAX_X) return; if((x1-x2)>CHKMAX_X) return; } \
+ if(y0<0) { if((y1-y0)>CHKMAX_Y) return; if((y2-y0)>CHKMAX_Y) return; } \
+ if(y1<0) { if((y0-y1)>CHKMAX_Y) return; if((y2-y1)>CHKMAX_Y) return; } \
+ if(y2<0) { if((y0-y2)>CHKMAX_Y) return; if((y1-y2)>CHKMAX_Y) return; } \
+}
+
+/*----------------------------------------------------------------------
+F3
+----------------------------------------------------------------------*/
+
+void gpuDrawF3(const PP gpuPolySpanDriver)
+{
+ const int li=linesInterlace;
+ const int pi=(progressInterlace?(linesInterlace+1):0);
+ const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
+ s32 temp;
+ s32 xa, xb, xmin, xmax;
+ s32 ya, yb, ymin, ymax;
+ s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
+ s32 y0, y1, y2;
+
+ x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]);
+ y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]);
+ x1 = GPU_EXPANDSIGN(PacketBuffer.S2[4]);
+ y1 = GPU_EXPANDSIGN(PacketBuffer.S2[5]);
+ x2 = GPU_EXPANDSIGN(PacketBuffer.S2[6]);
+ y2 = GPU_EXPANDSIGN(PacketBuffer.S2[7]);
+
+ GPU_TESTRANGE3();
+
+ x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
+ y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
+
+ xmin = DrawingArea[0]; xmax = DrawingArea[2];
+ ymin = DrawingArea[1]; ymax = DrawingArea[3];
+
+ {
+ int rx0 = Max2(xmin,Min3(x0,x1,x2));
+ int ry0 = Max2(ymin,Min3(y0,y1,y2));
+ int rx1 = Min2(xmax,Max3(x0,x1,x2));
+ int ry1 = Min2(ymax,Max3(y0,y1,y2));
+ if( rx0>=rx1 || ry0>=ry1) return;
+ }
+
+ PixelData = GPU_RGB16(PacketBuffer.U4[0]);
+
+ if (y0 >= y1)
+ {
+ if( y0!=y1 || x0>x1 )
+ {
+ GPU_SWAP(x0, x1, temp);
+ GPU_SWAP(y0, y1, temp);
+ }
+ }
+ if (y1 >= y2)
+ {
+ if( y1!=y2 || x1>x2 )
+ {
+ GPU_SWAP(x1, x2, temp);
+ GPU_SWAP(y1, y2, temp);
+ }
+ }
+ if (y0 >= y1)
+ {
+ if( y0!=y1 || x0>x1 )
+ {
+ GPU_SWAP(x0, x1, temp);
+ GPU_SWAP(y0, y1, temp);
+ }
+ }
+
+ ya = y2 - y0;
+ yb = y2 - y1;
+ dx =(x2 - x1) * ya - (x2 - x0) * yb;
+
+ for (s32 loop0 = 2; loop0; --loop0)
+ {
+ if (loop0 == 2)
+ {
+ ya = y0;
+ yb = y1;
+ x3 = i2x(x0);
+ x4 = y0!=y1 ? x3 : i2x(x1);
+ if (dx < 0)
+ {
+ dx3 = xLoDivx((x2 - x0), (y2 - y0));
+ dx4 = xLoDivx((x1 - x0), (y1 - y0));
+ }
+ else
+ {
+ dx3 = xLoDivx((x1 - x0), (y1 - y0));
+ dx4 = xLoDivx((x2 - x0), (y2 - y0));
+ }
+ }
+ else
+ {
+ ya = y1;
+ yb = y2;
+ if (dx < 0)
+ {
+ x4 = i2x(x1);
+ x3 = i2x(x0) + (dx3 * (y1 - y0));
+ dx4 = xLoDivx((x2 - x1), (y2 - y1));
+ }
+ else
+ {
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+ dx3 = xLoDivx((x2 - x1), (y2 - y1));
+ }
+ }
+
+ temp = ymin - ya;
+ if (temp > 0)
+ {
+ ya = ymin;
+ x3 += dx3*temp;
+ x4 += dx4*temp;
+ }
+ if (yb > ymax) yb = ymax;
+ if (ya>=yb) continue;
+
+ x3+= fixed_HALF;
+ x4+= fixed_HALF;
+
+ u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
+
+ for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4)
+ {
+ if (ya&li) continue;
+ if ((ya&pi)==pif) continue;
+ xa = x2i(x3);
+ xb = x2i(x4);
+ if( (xa>xmax) || (xb<xmin) ) continue;
+ if(xa < xmin) xa = xmin;
+ if(xb > xmax) xb = xmax;
+ xb-=xa;
+ if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
+ }
+ }
+}
+
+/*----------------------------------------------------------------------
+FT3
+----------------------------------------------------------------------*/
+
+void gpuDrawFT3(const PP gpuPolySpanDriver)
+{
+ const int li=linesInterlace;
+ const int pi=(progressInterlace?(linesInterlace+1):0);
+ const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
+ s32 temp;
+ s32 xa, xb, xmin, xmax;
+ s32 ya, yb, ymin, ymax;
+ s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
+ s32 y0, y1, y2;
+ s32 u0, u1, u2, u3, du3=0;
+ s32 v0, v1, v2, v3, dv3=0;
+
+ x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
+ y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
+ x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] );
+ y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] );
+ x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]);
+ y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]);
+
+ GPU_TESTRANGE3();
+
+ x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
+ y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
+
+ xmin = DrawingArea[0]; xmax = DrawingArea[2];
+ ymin = DrawingArea[1]; ymax = DrawingArea[3];
+
+ {
+ int rx0 = Max2(xmin,Min3(x0,x1,x2));
+ int ry0 = Max2(ymin,Min3(y0,y1,y2));
+ int rx1 = Min2(xmax,Max3(x0,x1,x2));
+ int ry1 = Min2(ymax,Max3(y0,y1,y2));
+ if( rx0>=rx1 || ry0>=ry1) return;
+ }
+
+ u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9];
+ u1 = PacketBuffer.U1[16]; v1 = PacketBuffer.U1[17];
+ u2 = PacketBuffer.U1[24]; v2 = PacketBuffer.U1[25];
+
+ r4 = s32(PacketBuffer.U1[0]);
+ g4 = s32(PacketBuffer.U1[1]);
+ b4 = s32(PacketBuffer.U1[2]);
+ dr4 = dg4 = db4 = 0;
+
+ if (y0 >= y1)
+ {
+ if( y0!=y1 || x0>x1 )
+ {
+ GPU_SWAP(x0, x1, temp);
+ GPU_SWAP(y0, y1, temp);
+ GPU_SWAP(u0, u1, temp);
+ GPU_SWAP(v0, v1, temp);
+ }
+ }
+ if (y1 >= y2)
+ {
+ if( y1!=y2 || x1>x2 )
+ {
+ GPU_SWAP(x1, x2, temp);
+ GPU_SWAP(y1, y2, temp);
+ GPU_SWAP(u1, u2, temp);
+ GPU_SWAP(v1, v2, temp);
+ }
+ }
+ if (y0 >= y1)
+ {
+ if( y0!=y1 || x0>x1 )
+ {
+ GPU_SWAP(x0, x1, temp);
+ GPU_SWAP(y0, y1, temp);
+ GPU_SWAP(u0, u1, temp);
+ GPU_SWAP(v0, v1, temp);
+ }
+ }
+
+ ya = y2 - y0;
+ yb = y2 - y1;
+ dx = (x2 - x1) * ya - (x2 - x0) * yb;
+ du4 = (u2 - u1) * ya - (u2 - u0) * yb;
+ dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
+
+ s32 iF,iS;
+ xInv( dx, iF, iS);
+ du4 = xInvMulx( du4, iF, iS);
+ dv4 = xInvMulx( dv4, iF, iS);
+ tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff);
+ tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff;
+
+ for (s32 loop0 = 2; loop0; --loop0)
+ {
+ if (loop0 == 2)
+ {
+ ya = y0;
+ yb = y1;
+ u3 = i2x(u0);
+ v3 = i2x(v0);
+ x3 = i2x(x0);
+ x4 = y0!=y1 ? x3 : i2x(x1);
+ if (dx < 0)
+ {
+ xInv( (y2 - y0), iF, iS);
+ dx3 = xInvMulx( (x2 - x0), iF, iS);
+ du3 = xInvMulx( (u2 - u0), iF, iS);
+ dv3 = xInvMulx( (v2 - v0), iF, iS);
+ dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
+ }
+ else
+ {
+ xInv( (y1 - y0), iF, iS);
+ dx3 = xInvMulx( (x1 - x0), iF, iS);
+ du3 = xInvMulx( (u1 - u0), iF, iS);
+ dv3 = xInvMulx( (v1 - v0), iF, iS);
+ dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
+ }
+ }
+ else
+ {
+ ya = y1;
+ yb = y2;
+ if (dx < 0)
+ {
+ temp = y1 - y0;
+ u3 = i2x(u0) + (du3 * temp);
+ v3 = i2x(v0) + (dv3 * temp);
+ x3 = i2x(x0) + (dx3 * temp);
+ x4 = i2x(x1);
+ dx4 = xLoDivx((x2 - x1), (y2 - y1));
+ }
+ else
+ {
+ u3 = i2x(u1);
+ v3 = i2x(v1);
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+ xInv( (y2 - y1), iF, iS);
+ dx3 = xInvMulx( (x2 - x1), iF, iS);
+ du3 = xInvMulx( (u2 - u1), iF, iS);
+ dv3 = xInvMulx( (v2 - v1), iF, iS);
+ }
+ }
+
+ temp = ymin - ya;
+ if (temp > 0)
+ {
+ ya = ymin;
+ x3 += dx3*temp;
+ x4 += dx4*temp;
+ u3 += du3*temp;
+ v3 += dv3*temp;
+ }
+ if (yb > ymax) yb = ymax;
+ if (ya>=yb) continue;
+
+ x3+= fixed_HALF;
+ x4+= fixed_HALF;
+ u3+= fixed_HALF;
+ v4+= fixed_HALF;
+
+ u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
+
+ for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3)
+ {
+ if (ya&li) continue;
+ if ((ya&pi)==pif) continue;
+ xa = x2i(x3);
+ xb = x2i(x4);
+ if( (xa>xmax) || (xb<xmin) ) continue;
+
+ temp = xmin - xa;
+ if(temp > 0)
+ {
+ xa = xmin;
+ u4 = u3 + du4*temp;
+ v4 = v3 + dv4*temp;
+ }
+ else
+ {
+ u4 = u3;
+ v4 = v3;
+ }
+ if(xb > xmax) xb = xmax;
+ xb-=xa;
+ if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
+ }
+ }
+}
+
+/*----------------------------------------------------------------------
+G3
+----------------------------------------------------------------------*/
+
+void gpuDrawG3(const PP gpuPolySpanDriver)
+{
+ const int li=linesInterlace;
+ const int pi=(progressInterlace?(linesInterlace+1):0);
+ const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
+ s32 temp;
+ s32 xa, xb, xmin, xmax;
+ s32 ya, yb, ymin, ymax;
+ s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
+ s32 y0, y1, y2;
+ s32 r0, r1, r2, r3, dr3=0;
+ s32 g0, g1, g2, g3, dg3=0;
+ s32 b0, b1, b2, b3, db3=0;
+
+ x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
+ y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
+ x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] );
+ y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] );
+ x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]);
+ y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]);
+
+ GPU_TESTRANGE3();
+
+ x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
+ y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
+
+ xmin = DrawingArea[0]; xmax = DrawingArea[2];
+ ymin = DrawingArea[1]; ymax = DrawingArea[3];
+
+ {
+ int rx0 = Max2(xmin,Min3(x0,x1,x2));
+ int ry0 = Max2(ymin,Min3(y0,y1,y2));
+ int rx1 = Min2(xmax,Max3(x0,x1,x2));
+ int ry1 = Min2(ymax,Max3(y0,y1,y2));
+ if( rx0>=rx1 || ry0>=ry1) return;
+ }
+
+ r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2];
+ r1 = PacketBuffer.U1[8]; g1 = PacketBuffer.U1[9]; b1 = PacketBuffer.U1[10];
+ r2 = PacketBuffer.U1[16]; g2 = PacketBuffer.U1[17]; b2 = PacketBuffer.U1[18];
+
+ if (y0 >= y1)
+ {
+ if( y0!=y1 || x0>x1 )
+ {
+ GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
+ GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
+ }
+ }
+ if (y1 >= y2)
+ {
+ if( y1!=y2 || x1>x2 )
+ {
+ GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp);
+ GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp);
+ }
+ }
+ if (y0 >= y1)
+ {
+ if( y0!=y1 || x0>x1 )
+ {
+ GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
+ GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
+ }
+ }
+
+ ya = y2 - y0;
+ yb = y2 - y1;
+ dx = (x2 - x1) * ya - (x2 - x0) * yb;
+ dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
+ dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
+ db4 = (b2 - b1) * ya - (b2 - b0) * yb;
+
+ s32 iF,iS;
+ xInv( dx, iF, iS);
+ dr4 = xInvMulx( dr4, iF, iS);
+ dg4 = xInvMulx( dg4, iF, iS);
+ db4 = xInvMulx( db4, iF, iS);
+ u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21;
+ u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10;
+ u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0;
+ lInc = db + dg + dr;
+
+ for (s32 loop0 = 2; loop0; --loop0)
+ {
+ if (loop0 == 2)
+ {
+ ya = y0;
+ yb = y1;
+ r3 = i2x(r0);
+ g3 = i2x(g0);
+ b3 = i2x(b0);
+ x3 = i2x(x0);
+ x4 = y0!=y1 ? x3 : i2x(x1);
+ if (dx < 0)
+ {
+ xInv( (y2 - y0), iF, iS);
+ dx3 = xInvMulx( (x2 - x0), iF, iS);
+ dr3 = xInvMulx( (r2 - r0), iF, iS);
+ dg3 = xInvMulx( (g2 - g0), iF, iS);
+ db3 = xInvMulx( (b2 - b0), iF, iS);
+ dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
+ }
+ else
+ {
+ xInv( (y1 - y0), iF, iS);
+ dx3 = xInvMulx( (x1 - x0), iF, iS);
+ dr3 = xInvMulx( (r1 - r0), iF, iS);
+ dg3 = xInvMulx( (g1 - g0), iF, iS);
+ db3 = xInvMulx( (b1 - b0), iF, iS);
+ dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
+ }
+ }
+ else
+ {
+ ya = y1;
+ yb = y2;
+ if (dx < 0)
+ {
+ temp = y1 - y0;
+ r3 = i2x(r0) + (dr3 * temp);
+ g3 = i2x(g0) + (dg3 * temp);
+ b3 = i2x(b0) + (db3 * temp);
+ x3 = i2x(x0) + (dx3 * temp);
+ x4 = i2x(x1);
+ dx4 = xLoDivx((x2 - x1), (y2 - y1));
+ }
+ else
+ {
+ r3 = i2x(r1);
+ g3 = i2x(g1);
+ b3 = i2x(b1);
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+
+ xInv( (y2 - y1), iF, iS);
+ dx3 = xInvMulx( (x2 - x1), iF, iS);
+ dr3 = xInvMulx( (r2 - r1), iF, iS);
+ dg3 = xInvMulx( (g2 - g1), iF, iS);
+ db3 = xInvMulx( (b2 - b1), iF, iS);
+ }
+ }
+
+ temp = ymin - ya;
+ if (temp > 0)
+ {
+ ya = ymin;
+ x3 += dx3*temp; x4 += dx4*temp;
+ r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp;
+ }
+ if (yb > ymax) yb = ymax;
+ if (ya>=yb) continue;
+
+ x3+= fixed_HALF; x4+= fixed_HALF;
+ r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF;
+
+ u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
+
+ for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, r3+=dr3, g3+=dg3, b3+=db3)
+ {
+ if (ya&li) continue;
+ if ((ya&pi)==pif) continue;
+ xa = x2i(x3);
+ xb = x2i(x4);
+ if( (xa>xmax) || (xb<xmin) ) continue;
+
+ temp = xmin - xa;
+ if(temp > 0)
+ {
+ xa = xmin;
+ r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp;
+ }
+ else
+ {
+ r4 = r3; g4 = g3; b4 = b3;
+ }
+ if(xb > xmax) xb = xmax;
+ xb-=xa;
+ if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
+ }
+ }
+}
+
+/*----------------------------------------------------------------------
+GT3
+----------------------------------------------------------------------*/
+
+void gpuDrawGT3(const PP gpuPolySpanDriver)
+{
+ const int li=linesInterlace;
+ const int pi=(progressInterlace?(linesInterlace+1):0);
+ const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
+ s32 temp;
+ s32 xa, xb, xmin, xmax;
+ s32 ya, yb, ymin, ymax;
+ s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
+ s32 y0, y1, y2;
+ s32 u0, u1, u2, u3, du3=0;
+ s32 v0, v1, v2, v3, dv3=0;
+ s32 r0, r1, r2, r3, dr3=0;
+ s32 g0, g1, g2, g3, dg3=0;
+ s32 b0, b1, b2, b3, db3=0;
+
+ x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
+ y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
+ x1 = GPU_EXPANDSIGN(PacketBuffer.S2[8] );
+ y1 = GPU_EXPANDSIGN(PacketBuffer.S2[9] );
+ x2 = GPU_EXPANDSIGN(PacketBuffer.S2[14]);
+ y2 = GPU_EXPANDSIGN(PacketBuffer.S2[15]);
+
+ GPU_TESTRANGE3();
+
+ x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
+ y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
+
+ xmin = DrawingArea[0]; xmax = DrawingArea[2];
+ ymin = DrawingArea[1]; ymax = DrawingArea[3];
+
+ {
+ int rx0 = Max2(xmin,Min3(x0,x1,x2));
+ int ry0 = Max2(ymin,Min3(y0,y1,y2));
+ int rx1 = Min2(xmax,Max3(x0,x1,x2));
+ int ry1 = Min2(ymax,Max3(y0,y1,y2));
+ if( rx0>=rx1 || ry0>=ry1) return;
+ }
+
+ r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2];
+ u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9];
+ r1 = PacketBuffer.U1[12]; g1 = PacketBuffer.U1[13]; b1 = PacketBuffer.U1[14];
+ u1 = PacketBuffer.U1[20]; v1 = PacketBuffer.U1[21];
+ r2 = PacketBuffer.U1[24]; g2 = PacketBuffer.U1[25]; b2 = PacketBuffer.U1[26];
+ u2 = PacketBuffer.U1[32]; v2 = PacketBuffer.U1[33];
+
+ if (y0 >= y1)
+ {
+ if( y0!=y1 || x0>x1 )
+ {
+ GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
+ GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp);
+ GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
+ }
+ }
+ if (y1 >= y2)
+ {
+ if( y1!=y2 || x1>x2 )
+ {
+ GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp);
+ GPU_SWAP(u1, u2, temp); GPU_SWAP(v1, v2, temp);
+ GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp);
+ }
+ }
+ if (y0 >= y1)
+ {
+ if( y0!=y1 || x0>x1 )
+ {
+ GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
+ GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp);
+ GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
+ }
+ }
+
+ ya = y2 - y0;
+ yb = y2 - y1;
+ dx = (x2 - x1) * ya - (x2 - x0) * yb;
+ du4 = (u2 - u1) * ya - (u2 - u0) * yb;
+ dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
+ dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
+ dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
+ db4 = (b2 - b1) * ya - (b2 - b0) * yb;
+
+ s32 iF,iS;
+
+ xInv( dx, iF, iS);
+ du4 = xInvMulx( du4, iF, iS);
+ dv4 = xInvMulx( dv4, iF, iS);
+ dr4 = xInvMulx( dr4, iF, iS);
+ dg4 = xInvMulx( dg4, iF, iS);
+ db4 = xInvMulx( db4, iF, iS);
+ u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21;
+ u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10;
+ u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0;
+ lInc = db + dg + dr;
+ tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff);
+ tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff;
+
+ for (s32 loop0 = 2; loop0; --loop0)
+ {
+ if (loop0 == 2)
+ {
+ ya = y0;
+ yb = y1;
+ u3 = i2x(u0);
+ v3 = i2x(v0);
+ r3 = i2x(r0);
+ g3 = i2x(g0);
+ b3 = i2x(b0);
+ x3 = i2x(x0);
+ x4 = y0!=y1 ? x3 : i2x(x1);
+ if (dx < 0)
+ {
+ xInv( (y2 - y0), iF, iS);
+ dx3 = xInvMulx( (x2 - x0), iF, iS);
+ du3 = xInvMulx( (u2 - u0), iF, iS);
+ dv3 = xInvMulx( (v2 - v0), iF, iS);
+ dr3 = xInvMulx( (r2 - r0), iF, iS);
+ dg3 = xInvMulx( (g2 - g0), iF, iS);
+ db3 = xInvMulx( (b2 - b0), iF, iS);
+ dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
+ }
+ else
+ {
+ xInv( (y1 - y0), iF, iS);
+ dx3 = xInvMulx( (x1 - x0), iF, iS);
+ du3 = xInvMulx( (u1 - u0), iF, iS);
+ dv3 = xInvMulx( (v1 - v0), iF, iS);
+ dr3 = xInvMulx( (r1 - r0), iF, iS);
+ dg3 = xInvMulx( (g1 - g0), iF, iS);
+ db3 = xInvMulx( (b1 - b0), iF, iS);
+ dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
+ }
+ }
+ else
+ {
+ ya = y1;
+ yb = y2;
+ if (dx < 0)
+ {
+ temp = y1 - y0;
+ u3 = i2x(u0) + (du3 * temp);
+ v3 = i2x(v0) + (dv3 * temp);
+ r3 = i2x(r0) + (dr3 * temp);
+ g3 = i2x(g0) + (dg3 * temp);
+ b3 = i2x(b0) + (db3 * temp);
+ x3 = i2x(x0) + (dx3 * temp);
+ x4 = i2x(x1);
+ dx4 = xLoDivx((x2 - x1), (y2 - y1));
+ }
+ else
+ {
+ u3 = i2x(u1);
+ v3 = i2x(v1);
+ r3 = i2x(r1);
+ g3 = i2x(g1);
+ b3 = i2x(b1);
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+
+ xInv( (y2 - y1), iF, iS);
+ dx3 = xInvMulx( (x2 - x1), iF, iS);
+ du3 = xInvMulx( (u2 - u1), iF, iS);
+ dv3 = xInvMulx( (v2 - v1), iF, iS);
+ dr3 = xInvMulx( (r2 - r1), iF, iS);
+ dg3 = xInvMulx( (g2 - g1), iF, iS);
+ db3 = xInvMulx( (b2 - b1), iF, iS);
+ }
+ }
+
+ temp = ymin - ya;
+ if (temp > 0)
+ {
+ ya = ymin;
+ x3 += dx3*temp; x4 += dx4*temp;
+ u3 += du3*temp; v3 += dv3*temp;
+ r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp;
+ }
+ if (yb > ymax) yb = ymax;
+ if (ya>=yb) continue;
+
+ x3+= fixed_HALF; x4+= fixed_HALF;
+ u3+= fixed_HALF; v4+= fixed_HALF;
+ r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF;
+ u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
+
+ for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3, r3+=dr3, g3+=dg3, b3+=db3)
+ {
+ if (ya&li) continue;
+ if ((ya&pi)==pif) continue;
+ xa = x2i(x3);
+ xb = x2i(x4);
+ if( (xa>xmax) || (xb<xmin)) continue;
+
+ temp = xmin - xa;
+ if(temp > 0)
+ {
+ xa = xmin;
+ u4 = u3 + du4*temp; v4 = v3 + dv4*temp;
+ r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp;
+ }
+ else
+ {
+ u4 = u3; v4 = v3;
+ r4 = r3; g4 = g3; b4 = b3;
+ }
+ if(xb > xmax) xb = xmax;
+ xb-=xa;
+ if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
+ }
+ }
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+//senquack - Original Unai poly routines left here for reference:
+// ( from gpu_inner.h ) NOTE: this uses 16.16, not 22.10 fixed point
+//////////////////////////////////////////////////////////////////////////
+template<const int CF>
+INLINE void gpuPolySpanFn(u16 *pDst, u32 count)
+{
+ if (!TM)
+ {
+ // NO TEXTURE
+ if (!G)
+ {
+ // NO GOURAUD
+ u16 data;
+ if (L) { u32 lCol=((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); gpuLightingRGB(data,lCol); }
+ else data=PixelData;
+ if ((!M)&&(!B))
+ {
+ if (MB) { data = data | 0x8000; }
+ do { *pDst++ = data; } while (--count);
+ }
+ else if ((M)&&(!B))
+ {
+ if (MB) { data = data | 0x8000; }
+ do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count);
+ }
+ else
+ {
+ u16 uSrc;
+ u16 uDst;
+ u32 uMsk; if (BM==0) uMsk=0x7BDE;
+ u32 bMsk; if (BI) bMsk=blit_mask;
+ do
+ {
+ // blit-mask
+ if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endtile; }
+ // masking
+ uDst = *pDst;
+ if(M) { if (uDst&0x8000) goto endtile; }
+ uSrc = data;
+ // blend
+ if (BM==0) gpuBlending00(uSrc, uDst);
+ if (BM==1) gpuBlending01(uSrc, uDst);
+ if (BM==2) gpuBlending02(uSrc, uDst);
+ if (BM==3) gpuBlending03(uSrc, uDst);
+ if (MB) { *pDst = uSrc | 0x8000; }
+ else { *pDst = uSrc; }
+ endtile: pDst++;
+ }
+ while (--count);
+ }
+ }
+ else
+ {
+ // GOURAUD
+ u16 uDst;
+ u16 uSrc;
+ u32 linc=lInc;
+ u32 lCol=((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21));
+ u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE;
+ u32 bMsk; if (BI) bMsk=blit_mask;
+ do
+ {
+ // blit-mask
+ if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endgou; }
+ // masking
+ if(M) { uDst = *pDst; if (uDst&0x8000) goto endgou; }
+ // blend
+ if(B)
+ {
+ // light
+ gpuLightingRGB(uSrc,lCol);
+ if(!M) { uDst = *pDst; }
+ if (BM==0) gpuBlending00(uSrc, uDst);
+ if (BM==1) gpuBlending01(uSrc, uDst);
+ if (BM==2) gpuBlending02(uSrc, uDst);
+ if (BM==3) gpuBlending03(uSrc, uDst);
+ }
+ else
+ {
+ // light
+ gpuLightingRGB(uSrc,lCol);
+ }
+ if (MB) { *pDst = uSrc | 0x8000; }
+ else { *pDst = uSrc; }
+ endgou: pDst++; lCol=(lCol+linc);
+ }
+ while (--count);
+ }
+ }
+ else
+ {
+ // TEXTURE
+ u16 uDst;
+ u16 uSrc;
+ u32 linc; if (L&&G) linc=lInc;
+ u32 tinc=tInc;
+ u32 tmsk=tMsk;
+ u32 tCor = ((u32)( u4<<7)&0x7fff0000) | ((u32)( v4>>9)&0x00007fff); tCor&= tmsk;
+ const u16* _TBA=TBA;
+ const u16* _CBA; if (TM!=3) _CBA=CBA;
+ u32 lCol;
+ if(L && !G) { lCol = ((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); }
+ else if(L && G) { lCol = ((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21)); }
+ u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE;
+ u32 bMsk; if (BI) bMsk=blit_mask;
+ do
+ {
+ // blit-mask
+ if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endpoly; }
+ // masking
+ if(M) { uDst = *pDst; if (uDst&0x8000) goto endpoly; }
+ // texture
+ if (TM==1) { u32 tu=(tCor>>23); u32 tv=(tCor<<4)&(0xff<<11); u8 rgb=((u8*)_TBA)[tv+(tu>>1)]; uSrc=_CBA[(rgb>>((tu&1)<<2))&0xf]; if(!uSrc) goto endpoly; }
+ if (TM==2) { uSrc = _CBA[(((u8*)_TBA)[(tCor>>23)+((tCor<<4)&(0xff<<11))])]; if(!uSrc) goto endpoly; }
+ if (TM==3) { uSrc = _TBA[(tCor>>23)+((tCor<<3)&(0xff<<10))]; if(!uSrc) goto endpoly; }
+ // blend
+ if(B)
+ {
+ if (uSrc&0x8000)
+ {
+ // light
+ if(L) gpuLightingTXT(uSrc, lCol);
+ if(!M) { uDst = *pDst; }
+ if (BM==0) gpuBlending00(uSrc, uDst);
+ if (BM==1) gpuBlending01(uSrc, uDst);
+ if (BM==2) gpuBlending02(uSrc, uDst);
+ if (BM==3) gpuBlending03(uSrc, uDst);
+ }
+ else
+ {
+ // light
+ if(L) gpuLightingTXT(uSrc, lCol);
+ }
+ }
+ else
+ {
+ // light
+ if(L) { gpuLightingTXT(uSrc, lCol); } else if(!MB) { uSrc&= 0x7fff; }
+ }
+ if (MB) { *pDst = uSrc | 0x8000; }
+ else { *pDst = uSrc; }
+ endpoly: pDst++;
+ tCor=(tCor+tinc)&tmsk;
+ if (L&&G) lCol=(lCol+linc);
+ }
+ while (--count);
+ }
+}
diff --git a/plugins/gpu_unai/gpu.cpp b/plugins/gpu_unai/gpu.cpp
index 1552bed..c3f7095 100644
--- a/plugins/gpu_unai/gpu.cpp
+++ b/plugins/gpu_unai/gpu.cpp
@@ -1,6 +1,7 @@
/***************************************************************************
* Copyright (C) 2010 PCSX4ALL Team *
* Copyright (C) 2010 Unai *
+* Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com) *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -18,103 +19,43 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
***************************************************************************/
-#include "port.h"
-#include "gpu.h"
-#include "profiler.h"
-#include "debug.h"
+#include <stddef.h>
+#include "plugins.h"
+#include "psxcommon.h"
+//#include "port.h"
+#include "gpu_unai.h"
-int skipCount = 2; /* frame skip (0,1,2,3...) */
-int skCount = 0; /* internal frame skip */
-int linesInterlace = 0; /* internal lines interlace */
-int linesInterlace_user = 0; /* Lines interlace */
+#define VIDEO_WIDTH 320
-bool isSkip = false; /* skip frame (info coming from GPU) */
-bool wasSkip = false;
-bool skipFrame = false; /* skip frame (according to frame skip) */
-bool alt_fps = false; /* Alternative FPS algorithm */
-bool show_fps = false; /* Show FPS statistics */
-
-bool isPAL = false; /* PAL video timing */
-bool progressInterlace_flag = false; /* Progressive interlace flag */
-bool progressInterlace = false; /* Progressive interlace option*/
-bool frameLimit = false; /* frames to wait */
-
-bool light = true; /* lighting */
-bool blend = true; /* blending */
-bool FrameToRead = false; /* load image in progress */
-bool FrameToWrite = false; /* store image in progress */
-bool fb_dirty = false;
-
-bool enableAbbeyHack = false; /* Abe's Odyssey hack */
-
-u8 BLEND_MODE;
-u8 TEXT_MODE;
-u8 Masking;
-
-u16 PixelMSB;
-u16 PixelData;
-
-///////////////////////////////////////////////////////////////////////////////
-// GPU Global data
-///////////////////////////////////////////////////////////////////////////////
-
-///////////////////////////////////////////////////////////////////////////////
-// Dma Transfers info
-s32 px,py;
-s32 x_end,y_end;
-u16* pvram;
-
-u32 GP0;
-s32 PacketCount;
-s32 PacketIndex;
-
-///////////////////////////////////////////////////////////////////////////////
-// Display status
-u32 DisplayArea [6];
-
-///////////////////////////////////////////////////////////////////////////////
-// Rasterizer status
-u32 TextureWindow [4];
-u32 DrawingArea [4];
-u32 DrawingOffset [2];
+#ifdef TIME_IN_MSEC
+#define TPS 1000
+#else
+#define TPS 1000000
+#endif
-///////////////////////////////////////////////////////////////////////////////
-// Rasterizer status
+#define IS_PAL (gpu_unai.GPU_GP1&(0x08<<17))
-u16* TBA;
-u16* CBA;
+//senquack - Original 512KB of guard space seems not to be enough, as Xenogears
+// accesses outside this range and crashes in town intro fight sequence.
+// Increased to 2MB total (double PSX VRAM) and Xenogears no longer
+// crashes, but some textures are still messed up. Also note that alignment min
+// is 16 bytes, needed for pixel-skipping rendering/blitting in high horiz res.
+// Extra 4KB is for guard room at beginning.
+// TODO: Determine cause of out-of-bounds write/reads. <-- Note: this is largely
+// solved by adoption of PCSX Rearmed's 'gpulib' in gpulib_if.cpp, which
+// replaces this file (gpu.cpp)
+//u16 GPU_FrameBuffer[(FRAME_BUFFER_SIZE+512*1024)/2] __attribute__((aligned(32)));
+static u16 GPU_FrameBuffer[(FRAME_BUFFER_SIZE*2 + 4096)/2] __attribute__((aligned(32)));
///////////////////////////////////////////////////////////////////////////////
-// Inner Loops
-s32 u4, du4;
-s32 v4, dv4;
-s32 r4, dr4;
-s32 g4, dg4;
-s32 b4, db4;
-u32 lInc;
-u32 tInc, tMsk;
-
-GPUPacket PacketBuffer;
-// FRAME_BUFFER_SIZE is defined in bytes; 512K is guard memory for out of range reads
-u16 GPU_FrameBuffer[(FRAME_BUFFER_SIZE+512*1024)/2] __attribute__((aligned(2048)));
-u32 GPU_GP1;
+// GPU fixed point math
+#include "gpu_fixedpoint.h"
///////////////////////////////////////////////////////////////////////////////
-// Inner loop driver instanciation file
+// Inner loop driver instantiation file
#include "gpu_inner.h"
///////////////////////////////////////////////////////////////////////////////
-// GPU Raster Macros
-#define GPU_RGB16(rgb) ((((rgb)&0xF80000)>>9)|(((rgb)&0xF800)>>6)|(((rgb)&0xF8)>>3))
-
-#define GPU_EXPANDSIGN(x) (((s32)(x)<<21)>>21)
-
-#define CHKMAX_X 1024
-#define CHKMAX_Y 512
-
-#define GPU_SWAP(a,b,t) {(t)=(a);(a)=(b);(b)=(t);}
-
-///////////////////////////////////////////////////////////////////////////////
// GPU internal image drawing functions
#include "gpu_raster_image.h"
@@ -135,72 +76,88 @@ u32 GPU_GP1;
#include "gpu_command.h"
///////////////////////////////////////////////////////////////////////////////
-INLINE void gpuReset(void)
+static void gpuReset(void)
{
- GPU_GP1 = 0x14802000;
- TextureWindow[0] = 0;
- TextureWindow[1] = 0;
- TextureWindow[2] = 255;
- TextureWindow[3] = 255;
- DrawingArea[2] = 256;
- DrawingArea[3] = 240;
- DisplayArea[2] = 256;
- DisplayArea[3] = 240;
- DisplayArea[5] = 240;
+ memset((void*)&gpu_unai, 0, sizeof(gpu_unai));
+ gpu_unai.vram = (u16*)GPU_FrameBuffer + (4096/2); //4kb guard room in front
+ gpu_unai.GPU_GP1 = 0x14802000;
+ gpu_unai.DrawingArea[2] = 256;
+ gpu_unai.DrawingArea[3] = 240;
+ gpu_unai.DisplayArea[2] = 256;
+ gpu_unai.DisplayArea[3] = 240;
+ gpu_unai.DisplayArea[5] = 240;
+ gpu_unai.TextureWindow[0] = 0;
+ gpu_unai.TextureWindow[1] = 0;
+ gpu_unai.TextureWindow[2] = 255;
+ gpu_unai.TextureWindow[3] = 255;
+ //senquack - new vars must be updated whenever texture window is changed:
+ // (used for polygon-drawing in gpu_inner.h, gpu_raster_polygon.h)
+ const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4
+ gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1);
+ gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1);
+
+ // Configuration options
+ gpu_unai.config = gpu_unai_config_ext;
+ gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
+ gpu_unai.frameskip.skipCount = gpu_unai.config.frameskip_count;
+
+ SetupLightLUT();
+ SetupDitheringConstants();
}
///////////////////////////////////////////////////////////////////////////////
-bool GPU_init(void)
+long GPU_init(void)
{
gpuReset();
-
+
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
// s_invTable
- for(int i=1;i<=(1<<TABLE_BITS);++i)
+ for(unsigned int i=1;i<=(1<<TABLE_BITS);++i)
{
- double v = 1.0 / double(i);
- #ifdef GPU_TABLE_10_BITS
- v *= double(0xffffffff>>1);
- #else
- v *= double(0x80000000);
- #endif
- s_invTable[i-1]=s32(v);
+ s_invTable[i-1]=0x7fffffff/i;
}
+#endif
+
+ gpu_unai.fb_dirty = true;
+ gpu_unai.dma.last_dma = NULL;
return (0);
}
///////////////////////////////////////////////////////////////////////////////
-void GPU_shutdown(void)
+long GPU_shutdown(void)
{
+ return 0;
}
///////////////////////////////////////////////////////////////////////////////
-long GPU_freeze(unsigned int bWrite, GPUFreeze_t* p2)
+long GPU_freeze(u32 bWrite, GPUFreeze_t* p2)
{
if (!p2) return (0);
- if (p2->Version != 1) return (0);
+ if (p2->ulFreezeVersion != 1) return (0);
if (bWrite)
{
- p2->GPU_gp1 = GPU_GP1;
- memset(p2->Control, 0, sizeof(p2->Control));
+ p2->ulStatus = gpu_unai.GPU_GP1;
+ memset(p2->ulControl, 0, sizeof(p2->ulControl));
// save resolution and registers for P.E.Op.S. compatibility
- p2->Control[3] = (3 << 24) | ((GPU_GP1 >> 23) & 1);
- p2->Control[4] = (4 << 24) | ((GPU_GP1 >> 29) & 3);
- p2->Control[5] = (5 << 24) | (DisplayArea[0] | (DisplayArea[1] << 10));
- p2->Control[6] = (6 << 24) | (2560 << 12);
- p2->Control[7] = (7 << 24) | (DisplayArea[4] | (DisplayArea[5] << 10));
- p2->Control[8] = (8 << 24) | ((GPU_GP1 >> 17) & 0x3f) | ((GPU_GP1 >> 10) & 0x40);
- memcpy(p2->FrameBuffer, (u16*)GPU_FrameBuffer, FRAME_BUFFER_SIZE);
+ p2->ulControl[3] = (3 << 24) | ((gpu_unai.GPU_GP1 >> 23) & 1);
+ p2->ulControl[4] = (4 << 24) | ((gpu_unai.GPU_GP1 >> 29) & 3);
+ p2->ulControl[5] = (5 << 24) | (gpu_unai.DisplayArea[0] | (gpu_unai.DisplayArea[1] << 10));
+ p2->ulControl[6] = (6 << 24) | (2560 << 12);
+ p2->ulControl[7] = (7 << 24) | (gpu_unai.DisplayArea[4] | (gpu_unai.DisplayArea[5] << 10));
+ p2->ulControl[8] = (8 << 24) | ((gpu_unai.GPU_GP1 >> 17) & 0x3f) | ((gpu_unai.GPU_GP1 >> 10) & 0x40);
+ memcpy((void*)p2->psxVRam, (void*)gpu_unai.vram, FRAME_BUFFER_SIZE);
return (1);
}
else
{
- GPU_GP1 = p2->GPU_gp1;
- memcpy((u16*)GPU_FrameBuffer, p2->FrameBuffer, FRAME_BUFFER_SIZE);
- GPU_writeStatus((5 << 24) | p2->Control[5]);
- GPU_writeStatus((7 << 24) | p2->Control[7]);
- GPU_writeStatus((8 << 24) | p2->Control[8]);
- gpuSetTexture(GPU_GP1);
+ extern void GPU_writeStatus(u32 data);
+ gpu_unai.GPU_GP1 = p2->ulStatus;
+ memcpy((void*)gpu_unai.vram, (void*)p2->psxVRam, FRAME_BUFFER_SIZE);
+ GPU_writeStatus((5 << 24) | p2->ulControl[5]);
+ GPU_writeStatus((7 << 24) | p2->ulControl[7]);
+ GPU_writeStatus((8 << 24) | p2->ulControl[8]);
+ gpuSetTexture(gpu_unai.GPU_GP1);
return (1);
}
return (0);
@@ -233,72 +190,69 @@ u8 PacketSize[256] =
///////////////////////////////////////////////////////////////////////////////
INLINE void gpuSendPacket()
{
-#ifdef DEBUG_ANALYSIS
- dbg_anacnt_GPU_sendPacket++;
-#endif
- gpuSendPacketFunction(PacketBuffer.U4[0]>>24);
+ gpuSendPacketFunction(gpu_unai.PacketBuffer.U4[0]>>24);
}
///////////////////////////////////////////////////////////////////////////////
INLINE void gpuCheckPacket(u32 uData)
{
- if (PacketCount)
+ if (gpu_unai.PacketCount)
{
- PacketBuffer.U4[PacketIndex++] = uData;
- --PacketCount;
+ gpu_unai.PacketBuffer.U4[gpu_unai.PacketIndex++] = uData;
+ --gpu_unai.PacketCount;
}
else
{
- PacketBuffer.U4[0] = uData;
- PacketCount = PacketSize[uData >> 24];
- PacketIndex = 1;
+ gpu_unai.PacketBuffer.U4[0] = uData;
+ gpu_unai.PacketCount = PacketSize[uData >> 24];
+ gpu_unai.PacketIndex = 1;
}
- if (!PacketCount) gpuSendPacket();
+ if (!gpu_unai.PacketCount) gpuSendPacket();
}
///////////////////////////////////////////////////////////////////////////////
-void GPU_writeDataMem(u32* dmaAddress, s32 dmaCount)
+void GPU_writeDataMem(u32* dmaAddress, int dmaCount)
{
-#ifdef DEBUG_ANALYSIS
- dbg_anacnt_GPU_writeDataMem++;
-#endif
- pcsx4all_prof_pause(PCSX4ALL_PROF_CPU);
- pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"GPU_writeDataMem(%d)\n",dmaCount);
+ #endif
u32 data;
- const u16 *VIDEO_END=(GPU_FrameBuffer+(FRAME_BUFFER_SIZE/2)-1);
- GPU_GP1 &= ~0x14000000;
+ const u16 *VIDEO_END = (u16*)gpu_unai.vram+(FRAME_BUFFER_SIZE/2)-1;
+ gpu_unai.GPU_GP1 &= ~0x14000000;
while (dmaCount)
{
- if (FrameToWrite)
+ if (gpu_unai.dma.FrameToWrite)
{
while (dmaCount)
{
dmaCount--;
data = *dmaAddress++;
- if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024;
- pvram[px] = data;
- if (++px>=x_end)
+ if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024;
+ gpu_unai.dma.pvram[gpu_unai.dma.px] = data;
+ if (++gpu_unai.dma.px >= gpu_unai.dma.x_end)
{
- px = 0;
- pvram += 1024;
- if (++py>=y_end)
+ gpu_unai.dma.px = 0;
+ gpu_unai.dma.pvram += 1024;
+ if (++gpu_unai.dma.py >= gpu_unai.dma.y_end)
{
- FrameToWrite = false;
- GPU_GP1 &= ~0x08000000;
+ gpu_unai.dma.FrameToWrite = false;
+ gpu_unai.GPU_GP1 &= ~0x08000000;
+ gpu_unai.fb_dirty = true;
break;
}
}
- if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024;
- pvram[px] = data>>16;
- if (++px>=x_end)
+ if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024;
+ gpu_unai.dma.pvram[gpu_unai.dma.px] = data>>16;
+ if (++gpu_unai.dma.px >= gpu_unai.dma.x_end)
{
- px = 0;
- pvram += 1024;
- if (++py>=y_end)
+ gpu_unai.dma.px = 0;
+ gpu_unai.dma.pvram += 1024;
+ if (++gpu_unai.dma.py >= gpu_unai.dma.y_end)
{
- FrameToWrite = false;
- GPU_GP1 &= ~0x08000000;
+ gpu_unai.dma.FrameToWrite = false;
+ gpu_unai.GPU_GP1 &= ~0x08000000;
+ gpu_unai.fb_dirty = true;
break;
}
}
@@ -312,95 +266,100 @@ void GPU_writeDataMem(u32* dmaAddress, s32 dmaCount)
}
}
- GPU_GP1 = (GPU_GP1 | 0x14000000) & ~0x60000000;
- fb_dirty = true;
- pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
- pcsx4all_prof_resume(PCSX4ALL_PROF_CPU);
+ gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 | 0x14000000) & ~0x60000000;
}
-u32 *lUsedAddr[3];
-INLINE int CheckForEndlessLoop(u32 *laddr)
+long GPU_dmaChain(u32 *rambase, u32 start_addr)
{
- if(laddr==lUsedAddr[1]) return 1;
- if(laddr==lUsedAddr[2]) return 1;
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"GPU_dmaChain(0x%x)\n",start_addr);
+ #endif
- if(laddr<lUsedAddr[0]) lUsedAddr[1]=laddr;
- else lUsedAddr[2]=laddr;
- lUsedAddr[0]=laddr;
- return 0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-long GPU_dmaChain(u32* baseAddr, u32 dmaVAddr)
-{
-#ifdef DEBUG_ANALYSIS
- dbg_anacnt_GPU_dmaChain++;
-#endif
- pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
- u32 data, *address, count, offset;
- unsigned int DMACommandCounter = 0;
+ u32 addr, *list;
+ u32 len, count;
long dma_words = 0;
- GPU_GP1 &= ~0x14000000;
- lUsedAddr[0]=lUsedAddr[1]=lUsedAddr[2]=(u32*)0x1fffff;
- dmaVAddr &= 0x001FFFFF;
- while (dmaVAddr != 0x1FFFFF)
+ if (gpu_unai.dma.last_dma) *gpu_unai.dma.last_dma |= 0x800000;
+
+ gpu_unai.GPU_GP1 &= ~0x14000000;
+
+ addr = start_addr & 0xffffff;
+ for (count = 0; addr != 0xffffff; count++)
{
- address = (baseAddr + (dmaVAddr >> 2));
- if(DMACommandCounter++ > 2000000) break;
- if(CheckForEndlessLoop(address)) break;
- data = *address++;
- count = (data >> 24);
- offset = data & 0x001FFFFF;
- if (dmaVAddr != offset) dmaVAddr = offset;
- else dmaVAddr = 0x1FFFFF;
-
- if(count>0) GPU_writeDataMem(address,count);
- dma_words += 1 + count;
+ list = rambase + (addr & 0x1fffff) / 4;
+ len = list[0] >> 24;
+ addr = list[0] & 0xffffff;
+
+ dma_words += 1 + len;
+
+ // add loop detection marker
+ list[0] |= 0x800000;
+
+ if (len) GPU_writeDataMem(list + 1, len);
+
+ if (addr & 0x800000)
+ {
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"GPU_dmaChain(LOOP)\n");
+ #endif
+ break;
+ }
}
- GPU_GP1 = (GPU_GP1 | 0x14000000) & ~0x60000000;
- pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
+
+ // remove loop detection markers
+ addr = start_addr & 0x1fffff;
+ while (count-- > 0)
+ {
+ list = rambase + addr / 4;
+ addr = list[0] & 0x1fffff;
+ list[0] &= ~0x800000;
+ }
+
+ if (gpu_unai.dma.last_dma) *gpu_unai.dma.last_dma &= ~0x800000;
+ gpu_unai.dma.last_dma = rambase + (start_addr & 0x1fffff) / 4;
+
+ gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 | 0x14000000) & ~0x60000000;
return dma_words;
}
///////////////////////////////////////////////////////////////////////////////
-void GPU_writeData(u32 data)
+void GPU_writeData(u32 data)
{
- const u16 *VIDEO_END=(GPU_FrameBuffer+(FRAME_BUFFER_SIZE/2)-1);
-#ifdef DEBUG_ANALYSIS
- dbg_anacnt_GPU_writeData++;
-#endif
- pcsx4all_prof_pause(PCSX4ALL_PROF_CPU);
- pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
- GPU_GP1 &= ~0x14000000;
+ const u16 *VIDEO_END = (u16*)gpu_unai.vram+(FRAME_BUFFER_SIZE/2)-1;
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"GPU_writeData()\n");
+ #endif
+ gpu_unai.GPU_GP1 &= ~0x14000000;
- if (FrameToWrite)
+ if (gpu_unai.dma.FrameToWrite)
{
- if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024;
- pvram[px]=(u16)data;
- if (++px>=x_end)
+ if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024;
+ gpu_unai.dma.pvram[gpu_unai.dma.px]=(u16)data;
+ if (++gpu_unai.dma.px >= gpu_unai.dma.x_end)
{
- px = 0;
- pvram += 1024;
- if (++py>=y_end)
+ gpu_unai.dma.px = 0;
+ gpu_unai.dma.pvram += 1024;
+ if (++gpu_unai.dma.py >= gpu_unai.dma.y_end)
{
- FrameToWrite = false;
- GPU_GP1 &= ~0x08000000;
+ gpu_unai.dma.FrameToWrite = false;
+ gpu_unai.GPU_GP1 &= ~0x08000000;
+ gpu_unai.fb_dirty = true;
}
}
- if (FrameToWrite)
+ if (gpu_unai.dma.FrameToWrite)
{
- if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024;
- pvram[px]=data>>16;
- if (++px>=x_end)
+ if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024;
+ gpu_unai.dma.pvram[gpu_unai.dma.px]=data>>16;
+ if (++gpu_unai.dma.px >= gpu_unai.dma.x_end)
{
- px = 0;
- pvram += 1024;
- if (++py>=y_end)
+ gpu_unai.dma.px = 0;
+ gpu_unai.dma.pvram += 1024;
+ if (++gpu_unai.dma.py >= gpu_unai.dma.y_end)
{
- FrameToWrite = false;
- GPU_GP1 &= ~0x08000000;
+ gpu_unai.dma.FrameToWrite = false;
+ gpu_unai.GPU_GP1 &= ~0x08000000;
+ gpu_unai.fb_dirty = true;
}
}
}
@@ -409,507 +368,463 @@ void GPU_writeData(u32 data)
{
gpuCheckPacket(data);
}
- GPU_GP1 |= 0x14000000;
- fb_dirty = true;
- pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
- pcsx4all_prof_resume(PCSX4ALL_PROF_CPU);
-
+ gpu_unai.GPU_GP1 |= 0x14000000;
}
///////////////////////////////////////////////////////////////////////////////
-void GPU_readDataMem(u32* dmaAddress, s32 dmaCount)
+void GPU_readDataMem(u32* dmaAddress, int dmaCount)
{
- const u16 *VIDEO_END=(GPU_FrameBuffer+(FRAME_BUFFER_SIZE/2)-1);
-#ifdef DEBUG_ANALYSIS
- dbg_anacnt_GPU_readDataMem++;
-#endif
- if(!FrameToRead) return;
+ const u16 *VIDEO_END = (u16*)gpu_unai.vram+(FRAME_BUFFER_SIZE/2)-1;
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"GPU_readDataMem(%d)\n",dmaCount);
+ #endif
+ if(!gpu_unai.dma.FrameToRead) return;
- pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
- GPU_GP1 &= ~0x14000000;
+ gpu_unai.GPU_GP1 &= ~0x14000000;
do
{
- if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024;
+ if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024;
// lower 16 bit
- u32 data = pvram[px];
+ //senquack - 64-bit fix (from notaz)
+ //u32 data = (unsigned long)gpu_unai.dma.pvram[gpu_unai.dma.px];
+ u32 data = (u32)gpu_unai.dma.pvram[gpu_unai.dma.px];
- if (++px>=x_end)
+ if (++gpu_unai.dma.px >= gpu_unai.dma.x_end)
{
- px = 0;
- pvram += 1024;
+ gpu_unai.dma.px = 0;
+ gpu_unai.dma.pvram += 1024;
}
- if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024;
+ if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024;
// higher 16 bit (always, even if it's an odd width)
- data |= (u32)(pvram[px])<<16;
+ //senquack - 64-bit fix (from notaz)
+ //data |= (unsigned long)(gpu_unai.dma.pvram[gpu_unai.dma.px])<<16;
+ data |= (u32)(gpu_unai.dma.pvram[gpu_unai.dma.px])<<16;
*dmaAddress++ = data;
- if (++px>=x_end)
+ if (++gpu_unai.dma.px >= gpu_unai.dma.x_end)
{
- px = 0;
- pvram += 1024;
- if (++py>=y_end)
+ gpu_unai.dma.px = 0;
+ gpu_unai.dma.pvram += 1024;
+ if (++gpu_unai.dma.py >= gpu_unai.dma.y_end)
{
- FrameToRead = false;
- GPU_GP1 &= ~0x08000000;
+ gpu_unai.dma.FrameToRead = false;
+ gpu_unai.GPU_GP1 &= ~0x08000000;
break;
}
}
} while (--dmaCount);
- GPU_GP1 = (GPU_GP1 | 0x14000000) & ~0x60000000;
- pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
+ gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 | 0x14000000) & ~0x60000000;
}
///////////////////////////////////////////////////////////////////////////////
-u32 GPU_readData(void)
+u32 GPU_readData(void)
{
- const u16 *VIDEO_END=(GPU_FrameBuffer+(FRAME_BUFFER_SIZE/2)-1);
-#ifdef DEBUG_ANALYSIS
- dbg_anacnt_GPU_readData++;
-#endif
- pcsx4all_prof_pause(PCSX4ALL_PROF_CPU);
- pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_READ);
- GPU_GP1 &= ~0x14000000;
- if (FrameToRead)
+ const u16 *VIDEO_END = (u16*)gpu_unai.vram+(FRAME_BUFFER_SIZE/2)-1;
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"GPU_readData()\n");
+ #endif
+ gpu_unai.GPU_GP1 &= ~0x14000000;
+ if (gpu_unai.dma.FrameToRead)
{
- if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024;
- GP0 = pvram[px];
- if (++px>=x_end)
+ if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024;
+ gpu_unai.GPU_GP0 = gpu_unai.dma.pvram[gpu_unai.dma.px];
+ if (++gpu_unai.dma.px >= gpu_unai.dma.x_end)
{
- px = 0;
- pvram += 1024;
- if (++py>=y_end)
+ gpu_unai.dma.px = 0;
+ gpu_unai.dma.pvram += 1024;
+ if (++gpu_unai.dma.py >= gpu_unai.dma.y_end)
{
- FrameToRead = false;
- GPU_GP1 &= ~0x08000000;
+ gpu_unai.dma.FrameToRead = false;
+ gpu_unai.GPU_GP1 &= ~0x08000000;
}
}
- if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024;
- GP0 |= pvram[px]<<16;
- if (++px>=x_end)
+ if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024;
+ gpu_unai.GPU_GP0 |= gpu_unai.dma.pvram[gpu_unai.dma.px]<<16;
+ if (++gpu_unai.dma.px >= gpu_unai.dma.x_end)
{
- px = 0;
- pvram +=1024;
- if (++py>=y_end)
+ gpu_unai.dma.px = 0;
+ gpu_unai.dma.pvram += 1024;
+ if (++gpu_unai.dma.py >= gpu_unai.dma.y_end)
{
- FrameToRead = false;
- GPU_GP1 &= ~0x08000000;
+ gpu_unai.dma.FrameToRead = false;
+ gpu_unai.GPU_GP1 &= ~0x08000000;
}
}
}
- GPU_GP1 |= 0x14000000;
+ gpu_unai.GPU_GP1 |= 0x14000000;
- pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_READ);
- pcsx4all_prof_resume(PCSX4ALL_PROF_CPU);
- return (GP0);
+ return (gpu_unai.GPU_GP0);
}
///////////////////////////////////////////////////////////////////////////////
-u32 GPU_readStatus(void)
+u32 GPU_readStatus(void)
{
-#ifdef DEBUG_ANALYSIS
- dbg_anacnt_GPU_readStatus++;
-#endif
- return GPU_GP1;
+ return gpu_unai.GPU_GP1;
+}
+
+INLINE void GPU_NoSkip(void)
+{
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"GPU_NoSkip()\n");
+ #endif
+ gpu_unai.frameskip.wasSkip = gpu_unai.frameskip.isSkip;
+ if (gpu_unai.frameskip.isSkip)
+ {
+ gpu_unai.frameskip.isSkip = false;
+ gpu_unai.frameskip.skipGPU = false;
+ }
+ else
+ {
+ gpu_unai.frameskip.isSkip = gpu_unai.frameskip.skipFrame;
+ gpu_unai.frameskip.skipGPU = gpu_unai.frameskip.skipFrame;
+ }
}
///////////////////////////////////////////////////////////////////////////////
void GPU_writeStatus(u32 data)
{
-#ifdef DEBUG_ANALYSIS
- dbg_anacnt_GPU_writeStatus++;
-#endif
- pcsx4all_prof_pause(PCSX4ALL_PROF_CPU);
- pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"GPU_writeStatus(%d,%d)\n",data>>24,data & 0xff);
+ #endif
switch (data >> 24) {
case 0x00:
gpuReset();
break;
case 0x01:
- GPU_GP1 &= ~0x08000000;
- PacketCount = 0; FrameToRead = FrameToWrite = false;
+ gpu_unai.GPU_GP1 &= ~0x08000000;
+ gpu_unai.PacketCount = 0;
+ gpu_unai.dma.FrameToRead = gpu_unai.dma.FrameToWrite = false;
break;
case 0x02:
- GPU_GP1 &= ~0x08000000;
- PacketCount = 0; FrameToRead = FrameToWrite = false;
+ gpu_unai.GPU_GP1 &= ~0x08000000;
+ gpu_unai.PacketCount = 0;
+ gpu_unai.dma.FrameToRead = gpu_unai.dma.FrameToWrite = false;
break;
case 0x03:
- GPU_GP1 = (GPU_GP1 & ~0x00800000) | ((data & 1) << 23);
+ gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x00800000) | ((data & 1) << 23);
break;
case 0x04:
- if (data == 0x04000000)
- PacketCount = 0;
- GPU_GP1 = (GPU_GP1 & ~0x60000000) | ((data & 3) << 29);
+ if (data == 0x04000000) gpu_unai.PacketCount = 0;
+ gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x60000000) | ((data & 3) << 29);
break;
case 0x05:
- DisplayArea[0] = (data & 0x000003FF); //(short)(data & 0x3ff);
- DisplayArea[1] = ((data & 0x0007FC00)>>10); //(data & 0x000FFC00) >> 10; //(short)((data>>10)&0x1ff);
- fb_dirty = true;
- wasSkip = isSkip;
- if (isSkip)
- isSkip = false;
- else
- isSkip = skipFrame;
+ // Start of Display Area in VRAM
+ gpu_unai.DisplayArea[0] = data & 0x3ff; // X (0..1023)
+ gpu_unai.DisplayArea[1] = (data >> 10) & 0x1ff; // Y (0..511)
+ GPU_NoSkip();
+ break;
+ case 0x06:
+ // GP1(06h) - Horizontal Display range (on Screen)
+ // 0-11 X1 (260h+0) ;12bit ;\counted in 53.222400MHz units,
+ // 12-23 X2 (260h+320*8) ;12bit ;/relative to HSYNC
+
+ // senquack - gpu_unai completely ignores GP1(0x06) command and
+ // lacks even a place in DisplayArea[] array to store the values.
+ // It seems to have been concerned only with vertical display range
+ // and centering top/bottom. I will not add support here, and
+ // focus instead on the gpulib version (gpulib_if.cpp) which uses
+ // gpulib for its PS1->host framebuffer blitting.
break;
case 0x07:
- DisplayArea[4] = data & 0x000003FF; //(short)(data & 0x3ff);
- DisplayArea[5] = (data & 0x000FFC00) >> 10; //(short)((data>>10) & 0x3ff);
- fb_dirty = true;
+ // GP1(07h) - Vertical Display range (on Screen)
+ // 0-9 Y1 (NTSC=88h-(224/2), (PAL=A3h-(264/2)) ;\scanline numbers on screen,
+ // 10-19 Y2 (NTSC=88h+(224/2), (PAL=A3h+(264/2)) ;/relative to VSYNC
+ // 20-23 Not used (zero)
+ {
+ u32 v1=data & 0x000003FF; //(short)(data & 0x3ff);
+ u32 v2=(data & 0x000FFC00) >> 10; //(short)((data>>10) & 0x3ff);
+ if ((gpu_unai.DisplayArea[4]!=v1)||(gpu_unai.DisplayArea[5]!=v2))
+ {
+ gpu_unai.DisplayArea[4] = v1;
+ gpu_unai.DisplayArea[5] = v2;
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"video_clear(CHANGE_Y)\n");
+ #endif
+ video_clear();
+ }
+ }
break;
case 0x08:
{
- GPU_GP1 = (GPU_GP1 & ~0x007F0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
- static u32 HorizontalResolution[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
- DisplayArea[2] = HorizontalResolution[(GPU_GP1 >> 16) & 7];
- static u32 VerticalResolution[4] = { 240, 480, 256, 480 };
- DisplayArea[3] = VerticalResolution[(GPU_GP1 >> 19) & 3];
- isPAL = (data & 0x08) ? true : false; // if 1 - PAL mode, else NTSC
+ static const u32 HorizontalResolution[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
+ static const u32 VerticalResolution[4] = { 240, 480, 256, 480 };
+ gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x007F0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"GPU_writeStatus(RES=%dx%d,BITS=%d,PAL=%d)\n",HorizontalResolution[(gpu_unai.GPU_GP1 >> 16) & 7],
+ VerticalResolution[(gpu_unai.GPU_GP1 >> 19) & 3],(gpu_unai.GPU_GP1&0x00200000?24:15),(IS_PAL?1:0));
+ #endif
+ // Video mode change
+ u32 new_width = HorizontalResolution[(gpu_unai.GPU_GP1 >> 16) & 7];
+ u32 new_height = VerticalResolution[(gpu_unai.GPU_GP1 >> 19) & 3];
+
+ if (gpu_unai.DisplayArea[2] != new_width || gpu_unai.DisplayArea[3] != new_height)
+ {
+ // Update width
+ gpu_unai.DisplayArea[2] = new_width;
+
+ if (PixelSkipEnabled()) {
+ // Set blit_mask for high horizontal resolutions. This allows skipping
+ // rendering pixels that would never get displayed on low-resolution
+ // platforms that use simple pixel-dropping scaler.
+ switch (gpu_unai.DisplayArea[2])
+ {
+ case 512: gpu_unai.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS
+ case 640: gpu_unai.blit_mask = 0xaa; break; // GPU_BlitWS
+ default: gpu_unai.blit_mask = 0; break;
+ }
+ } else {
+ gpu_unai.blit_mask = 0;
+ }
+
+ // Update height
+ gpu_unai.DisplayArea[3] = new_height;
+
+ if (LineSkipEnabled()) {
+ // Set rendering line-skip (only render every other line in high-res
+ // 480 vertical mode, or, optionally, force it for all video modes)
+
+ if (gpu_unai.DisplayArea[3] == 480) {
+ if (gpu_unai.config.ilace_force) {
+ gpu_unai.ilace_mask = 3; // Only need 1/4 of lines
+ } else {
+ gpu_unai.ilace_mask = 1; // Only need 1/2 of lines
+ }
+ } else {
+ // Vert resolution changed from 480 to lower one
+ gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
+ }
+ } else {
+ gpu_unai.ilace_mask = 0;
+ }
+
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"video_clear(CHANGE_RES)\n");
+ #endif
+ video_clear();
+ }
+
}
- fb_dirty = true;
break;
case 0x10:
- switch (data & 0xffff) {
- case 0:
- case 1:
- case 3:
- GP0 = (DrawingArea[1] << 10) | DrawingArea[0];
- break;
- case 4:
- GP0 = ((DrawingArea[3]-1) << 10) | (DrawingArea[2]-1);
- break;
- case 6:
- case 5:
- GP0 = (DrawingOffset[1] << 11) | DrawingOffset[0];
- break;
- case 7:
- GP0 = 2;
- break;
- default:
- GP0 = 0;
+ switch (data & 0xff) {
+ case 2: gpu_unai.GPU_GP0 = gpu_unai.tex_window; break;
+ case 3: gpu_unai.GPU_GP0 = (gpu_unai.DrawingArea[1] << 10) | gpu_unai.DrawingArea[0]; break;
+ case 4: gpu_unai.GPU_GP0 = ((gpu_unai.DrawingArea[3]-1) << 10) | (gpu_unai.DrawingArea[2]-1); break;
+ case 5: case 6: gpu_unai.GPU_GP0 = (((u32)gpu_unai.DrawingOffset[1] & 0x7ff) << 11) | ((u32)gpu_unai.DrawingOffset[0] & 0x7ff); break;
+ case 7: gpu_unai.GPU_GP0 = 2; break;
+ case 8: case 15: gpu_unai.GPU_GP0 = 0xBFC03720; break;
}
break;
}
- pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
- pcsx4all_prof_resume(PCSX4ALL_PROF_CPU);
}
-#ifndef REARMED
-
// Blitting functions
#include "gpu_blit.h"
-INLINE void gpuVideoOutput(void)
+static void gpuVideoOutput(void)
{
- static s16 old_res_horz, old_res_vert, old_rgb24;
- s16 h0, x0, y0, w0, h1;
+ int h0, x0, y0, w0, h1;
- x0 = DisplayArea[0];
- y0 = DisplayArea[1];
+ x0 = gpu_unai.DisplayArea[0];
+ y0 = gpu_unai.DisplayArea[1];
- w0 = DisplayArea[2];
- h0 = DisplayArea[3]; // video mode
+ w0 = gpu_unai.DisplayArea[2];
+ h0 = gpu_unai.DisplayArea[3]; // video mode
- h1 = DisplayArea[5] - DisplayArea[4]; // display needed
+ h1 = gpu_unai.DisplayArea[5] - gpu_unai.DisplayArea[4]; // display needed
if (h0 == 480) h1 = Min2(h1*2,480);
- u16* dest_screen16 = SCREEN;
- u16* src_screen16 = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0,y0)];
- u32 isRGB24 = (GPU_GP1 & 0x00200000 ? 32 : 0);
+ bool isRGB24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false);
+ u16* dst16 = SCREEN;
+ u16* src16 = (u16*)gpu_unai.vram;
- /* Clear the screen if resolution changed to prevent interlacing and clipping to clash */
- if( (w0 != old_res_horz || h1 != old_res_vert || (s16)isRGB24 != old_rgb24) )
- {
- // Update old resolution
- old_res_horz = w0;
- old_res_vert = h1;
- old_rgb24 = (s16)isRGB24;
- // Finally, clear the screen for this special case
- video_clear();
- }
+ // PS1 fb read wraps around (fixes black screen in 'Tobal no. 1')
+ unsigned int src16_offs_msk = 1024*512-1;
+ unsigned int src16_offs = (x0 + y0*1024) & src16_offs_msk;
// Height centering
int sizeShift = 1;
- if(h0==256) h0 = 240; else if(h0==480) sizeShift = 2;
- if(h1>h0) { src_screen16 += ((h1-h0)>>sizeShift)*1024; h1 = h0; }
- else if(h1<h0) dest_screen16 += ((h0-h1)>>sizeShift)*VIDEO_WIDTH;
+ if (h0 == 256) {
+ h0 = 240;
+ } else if (h0 == 480) {
+ sizeShift = 2;
+ }
+ if (h1 > h0) {
+ src16_offs = (src16_offs + (((h1-h0) / 2) * 1024)) & src16_offs_msk;
+ h1 = h0;
+ } else if (h1<h0) {
+ dst16 += ((h0-h1) >> sizeShift) * VIDEO_WIDTH;
+ }
+
/* Main blitter */
int incY = (h0==480) ? 2 : 1;
h0=(h0==480 ? 2048 : 1024);
{
- const int li=linesInterlace;
- bool pi=progressInterlace;
- bool pif=progressInterlace_flag;
+ const int li=gpu_unai.ilace_mask;
+ bool pi = ProgressiveInterlaceEnabled();
+ bool pif = gpu_unai.prog_ilace_flag;
switch ( w0 )
{
case 256:
for(int y1=y0+h1; y0<y1; y0+=incY)
{
- if(( 0 == (y0&li) ) && ((!pi) || (pif=!pif))) GPU_BlitWWDWW( src_screen16, dest_screen16, isRGB24);
- dest_screen16 += VIDEO_WIDTH;
- src_screen16 += h0;
+ if (( 0 == (y0&li) ) && ((!pi) || (pif=!pif)))
+ GPU_BlitWWDWW(src16 + src16_offs, dst16, isRGB24);
+ dst16 += VIDEO_WIDTH;
+ src16_offs = (src16_offs + h0) & src16_offs_msk;
}
break;
case 368:
for(int y1=y0+h1; y0<y1; y0+=incY)
{
- if(( 0 == (y0&li) ) && ((!pi) || (pif=!pif))) GPU_BlitWWWWWWWWS( src_screen16, dest_screen16, isRGB24, 4);
- dest_screen16 += VIDEO_WIDTH;
- src_screen16 += h0;
+ if (( 0 == (y0&li) ) && ((!pi) || (pif=!pif)))
+ GPU_BlitWWWWWWWWS(src16 + src16_offs, dst16, isRGB24, 4);
+ dst16 += VIDEO_WIDTH;
+ src16_offs = (src16_offs + h0) & src16_offs_msk;
}
break;
case 320:
+ // Ensure 32-bit alignment for GPU_BlitWW() blitter:
+ src16_offs &= ~1;
for(int y1=y0+h1; y0<y1; y0+=incY)
{
- if(( 0 == (y0&li) ) && ((!pi) || (pif=!pif))) GPU_BlitWW( src_screen16, dest_screen16, isRGB24);
- dest_screen16 += VIDEO_WIDTH;
- src_screen16 += h0;
+ if (( 0 == (y0&li) ) && ((!pi) || (pif=!pif)))
+ GPU_BlitWW(src16 + src16_offs, dst16, isRGB24);
+ dst16 += VIDEO_WIDTH;
+ src16_offs = (src16_offs + h0) & src16_offs_msk;
}
break;
case 384:
for(int y1=y0+h1; y0<y1; y0+=incY)
{
- if(( 0 == (y0&li) ) && ((!pi) || (pif=!pif))) GPU_BlitWWWWWS( src_screen16, dest_screen16, isRGB24);
- dest_screen16 += VIDEO_WIDTH;
- src_screen16 += h0;
+ if (( 0 == (y0&li) ) && ((!pi) || (pif=!pif)))
+ GPU_BlitWWWWWS(src16 + src16_offs, dst16, isRGB24);
+ dst16 += VIDEO_WIDTH;
+ src16_offs = (src16_offs + h0) & src16_offs_msk;
}
break;
case 512:
for(int y1=y0+h1; y0<y1; y0+=incY)
{
- if(( 0 == (y0&li) ) && ((!pi) || (pif=!pif))) GPU_BlitWWSWWSWS( src_screen16, dest_screen16, isRGB24);
- dest_screen16 += VIDEO_WIDTH;
- src_screen16 += h0;
+ if (( 0 == (y0&li) ) && ((!pi) || (pif=!pif)))
+ GPU_BlitWWSWWSWS(src16 + src16_offs, dst16, isRGB24);
+ dst16 += VIDEO_WIDTH;
+ src16_offs = (src16_offs + h0) & src16_offs_msk;
}
break;
case 640:
for(int y1=y0+h1; y0<y1; y0+=incY)
{
- if(( 0 == (y0&li) ) && ((!pi) || (pif=!pif))) GPU_BlitWS( src_screen16, dest_screen16, isRGB24);
- dest_screen16 += VIDEO_WIDTH;
- src_screen16 += h0;
+ if (( 0 == (y0&li) ) && ((!pi) || (pif=!pif)))
+ GPU_BlitWS(src16 + src16_offs, dst16, isRGB24);
+ dst16 += VIDEO_WIDTH;
+ src16_offs = (src16_offs + h0) & src16_offs_msk;
}
break;
}
- progressInterlace_flag=!progressInterlace_flag;
+ gpu_unai.prog_ilace_flag = !gpu_unai.prog_ilace_flag;
}
video_flip();
}
-///////////////////////////////////////////////////////////////////////////////
-void GPU_updateLace(void)
-{
-#ifdef ENABLE_GPU_LOG_SUPPORT
- fprintf(stdout,"GPU_updateLace()\n");
-#endif
-#ifdef DEBUG_ANALYSIS
- dbg_anacnt_GPU_updateLace++;
-#endif
- pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_COUNTERS);
-#ifdef PROFILER_PCSX4ALL
- pcsx4all_prof_frames++;
-#endif
-#ifdef DEBUG_FRAME
- if(isdbg_frame())
- {
- static int passed=0;
- if (!passed) dbg_enable();
- else pcsx4all_exit();
- passed++;
- }
-#endif
-
- // Frame skip table
- static const unsigned char skipTable[12][12] =
- {
- { 0,0,0,0,0,0,0,0,0,0,0,0 },
- { 0,0,0,0,0,0,0,0,0,0,0,1 },
- { 0,0,0,0,0,1,0,0,0,0,0,1 },
- { 0,0,0,1,0,0,0,1,0,0,0,1 },
- { 0,0,1,0,0,1,0,0,1,0,0,1 },
- { 0,1,0,0,1,0,1,0,0,1,0,1 },
- { 0,1,0,1,0,1,0,1,0,1,0,1 },
- { 0,1,0,1,1,0,1,0,1,1,0,1 },
- { 0,1,1,0,1,1,0,1,1,0,1,1 },
- { 0,1,1,1,0,1,1,1,0,1,1,1 },
- { 0,1,1,1,1,1,0,1,1,1,1,1 },
- { 0,1,1,1,1,1,1,1,1,1,1,1 }
- };
-
- // Interlace bit toggle
- GPU_GP1 ^= 0x80000000;
-
- // Update display
- if ((!skipFrame) && (!isSkip) && (fb_dirty) && (!(((GPU_GP1&0x08000000))||((GPU_GP1&0x00800000)))))
- {
- gpuVideoOutput(); // Display updated
-
- if (DisplayArea[3] == 480)
- {
- if (linesInterlace_user) linesInterlace = 3; // 1/4 of lines
- else linesInterlace = 1; // if 480 we only need half of lines
- }
- else if (linesInterlace != linesInterlace_user)
- {
- linesInterlace = linesInterlace_user; // resolution changed from 480 to lower one
- video_clear();
- }
- }
+// Update frames-skip each second>>3 (8 times per second)
+#define GPU_FRAMESKIP_UPDATE 3
- // Limit FPS
- if (frameLimit)
- {
- static unsigned next=get_ticks();
- if (!skipFrame)
- {
- unsigned now=get_ticks();
- if (now<next) wait_ticks(next-now);
- }
- next+=(isPAL?(1000000/50):((unsigned)(1000000.0/59.94)));
- }
+static void GPU_frameskip (bool show)
+{
+ u32 now=get_ticks(); // current frame
- // Show FPS statistics
- if (show_fps)
+ // Update frameskip
+ if (gpu_unai.frameskip.skipCount==0) gpu_unai.frameskip.skipFrame=false; // frameskip off
+ else if (gpu_unai.frameskip.skipCount==7) { if (show) gpu_unai.frameskip.skipFrame=!gpu_unai.frameskip.skipFrame; } // frameskip medium
+ else if (gpu_unai.frameskip.skipCount==8) gpu_unai.frameskip.skipFrame=true; // frameskip maximum
+ else
{
- static u32 real_fps=0;
- static u32 prev=get_ticks();
- static char msg[32]="FPS=000/00 SPD=000%";
- u32 now=get_ticks();
- real_fps++;
- if ((now-prev)>=1000000)
+ static u32 spd=100; // speed %
+ static u32 frames=0; // frames counter
+ static u32 prev=now; // previous fps calculation
+ frames++;
+ if ((now-prev)>=(TPS>>GPU_FRAMESKIP_UPDATE))
{
- u32 expected_fps=(isPAL?50:60);
- sprintf(msg,"FPS=%3d/%2d SPD=%3d%%",((real_fps*(12-skipCount))/12),((expected_fps*(12-skipCount))/12),((real_fps*100)/expected_fps));
+ if (IS_PAL) spd=(frames<<1);
+ else spd=((frames*1001)/600);
+ spd<<=GPU_FRAMESKIP_UPDATE;
+ frames=0;
prev=now;
- real_fps=0;
}
- port_printf(5,5,msg);
- }
-
- // Update frame-skip
- if (!alt_fps)
- {
- // Video frame-skip
- skipFrame=skipTable[skipCount][skCount];
- skCount--; if (skCount<0) skCount=11;
- isSkip=skipFrame;
- }
- else
- {
- // Game frame-skip
- if (!isSkip)
+ switch(gpu_unai.frameskip.skipCount)
{
- skipFrame=skipTable[skipCount][skCount];
- skCount--; if (skCount<0) skCount=11;
- isSkip=true;
+ case 1: if (spd<50) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<50%)
+ case 2: if (spd<60) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<60%)
+ case 3: if (spd<70) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<70%)
+ case 4: if (spd<80) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<80%)
+ case 5: if (spd<90) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<90%)
}
}
- fb_dirty=false;
-
- pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_COUNTERS);
-}
-
-#else
-
-#include "../../frontend/plugin_lib.h"
-
-extern "C" {
-
-static const struct rearmed_cbs *cbs;
-static s16 old_res_horz, old_res_vert, old_rgb24;
-
-static void blit(void)
-{
- u16 *base = (u16 *)GPU_FrameBuffer;
- s16 isRGB24 = (GPU_GP1 & 0x00200000) ? 1 : 0;
- s16 h0, x0, y0, w0, h1;
-
- x0 = DisplayArea[0] & ~1; // alignment needed by blitter
- y0 = DisplayArea[1];
- base += FRAME_OFFSET(x0, y0);
-
- w0 = DisplayArea[2];
- h0 = DisplayArea[3]; // video mode
-
- h1 = DisplayArea[5] - DisplayArea[4]; // display needed
- if (h0 == 480) h1 = Min2(h1*2,480);
-
- if (h1 <= 0)
- return;
-
- if (w0 != old_res_horz || h1 != old_res_vert || isRGB24 != old_rgb24)
- {
- old_res_horz = w0;
- old_res_vert = h1;
- old_rgb24 = (s16)isRGB24;
- cbs->pl_vout_set_mode(w0, h1, w0, h1, isRGB24 ? 24 : 16);
- }
-
- cbs->pl_vout_flip(base, 1024, isRGB24, w0, h1);
}
+///////////////////////////////////////////////////////////////////////////////
void GPU_updateLace(void)
{
// Interlace bit toggle
- GPU_GP1 ^= 0x80000000;
+ gpu_unai.GPU_GP1 ^= 0x80000000;
- if (!fb_dirty || (GPU_GP1&0x08800000))
- return;
-
- if (!wasSkip) {
- blit();
- fb_dirty = false;
- skCount = 0;
- }
- else {
- skCount++;
- if (skCount >= 8)
- wasSkip = isSkip = 0;
+ // Update display?
+ if ((gpu_unai.fb_dirty) && (!gpu_unai.frameskip.wasSkip) && (!(gpu_unai.GPU_GP1&0x00800000)))
+ {
+ // Display updated
+ gpuVideoOutput();
+ GPU_frameskip(true);
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"GPU_updateLace(UPDATE)\n");
+ #endif
+ } else {
+ GPU_frameskip(false);
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"GPU_updateLace(SKIP)\n");
+ #endif
}
- skipFrame = cbs->fskip_advice || cbs->frameskip == 1;
-}
+ if ((!gpu_unai.frameskip.skipCount) && (gpu_unai.DisplayArea[3] == 480)) gpu_unai.frameskip.skipGPU=true; // Tekken 3 hack
-long GPUopen(unsigned long *, char *, char *)
-{
- cbs->pl_vout_open();
- return 0;
+ gpu_unai.fb_dirty=false;
+ gpu_unai.dma.last_dma = NULL;
}
-long GPUclose(void)
+// Allows frontend to signal plugin to redraw screen after returning to emu
+void GPU_requestScreenRedraw()
{
- cbs->pl_vout_close();
- return 0;
+ gpu_unai.fb_dirty = true;
}
-long GPUfreeze(unsigned int ulGetFreezeData, GPUFreeze_t* p2)
+void GPU_getScreenInfo(GPUScreenInfo_t *sinfo)
{
- if (ulGetFreezeData > 1)
- return 0;
-
- return GPU_freeze(ulGetFreezeData, p2);
+ bool depth24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false);
+ int16_t hres = (uint16_t)gpu_unai.DisplayArea[2];
+ int16_t vres = (uint16_t)gpu_unai.DisplayArea[3];
+ int16_t w = hres; // Original gpu_unai doesn't support width < 100%
+ int16_t h = gpu_unai.DisplayArea[5] - gpu_unai.DisplayArea[4];
+ if (vres == 480)
+ h *= 2;
+ if (h <= 0 || h > vres)
+ h = vres;
+
+ sinfo->vram = (uint8_t*)gpu_unai.vram;
+ sinfo->x = (uint16_t)gpu_unai.DisplayArea[0];
+ sinfo->y = (uint16_t)gpu_unai.DisplayArea[1];
+ sinfo->w = w;
+ sinfo->h = h;
+ sinfo->hres = hres;
+ sinfo->vres = vres;
+ sinfo->depth24 = depth24;
+ sinfo->pal = IS_PAL;
}
-
-void GPUrearmedCallbacks(const struct rearmed_cbs *cbs_)
-{
- enableAbbeyHack = cbs_->gpu_unai.abe_hack;
- light = !cbs_->gpu_unai.no_light;
- blend = !cbs_->gpu_unai.no_blend;
- if (cbs_->pl_vout_set_raw_vram)
- cbs_->pl_vout_set_raw_vram((void *)GPU_FrameBuffer);
-
- cbs = cbs_;
- if (cbs->pl_set_gpu_caps)
- cbs->pl_set_gpu_caps(0);
-}
-
-} /* extern "C" */
-
-#endif
diff --git a/plugins/gpu_unai/gpu.h b/plugins/gpu_unai/gpu.h
index 1811630..eade2a8 100644
--- a/plugins/gpu_unai/gpu.h
+++ b/plugins/gpu_unai/gpu.h
@@ -1,6 +1,7 @@
/***************************************************************************
* Copyright (C) 2010 PCSX4ALL Team *
* Copyright (C) 2010 Unai *
+* Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com) *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -18,70 +19,52 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
***************************************************************************/
-#ifndef NEW_GPU_H
-#define NEW_GPU_H
+#ifndef GPU_UNAI_GPU_H
+#define GPU_UNAI_GPU_H
-///////////////////////////////////////////////////////////////////////////////
-// GPU global definitions
-#define FRAME_BUFFER_SIZE (1024*512*2)
-#define FRAME_WIDTH 1024
-#define FRAME_HEIGHT 512
-#define FRAME_OFFSET(x,y) (((y)<<10)+(x))
+struct gpu_unai_config_t {
+ uint8_t pixel_skip:1; // If 1, allows skipping rendering pixels that
+ // would not be visible when a high horizontal
+ // resolution PS1 video mode is set.
+ // Only applies to devices with low resolutions
+ // like 320x240. Should not be used if a
+ // down-scaling framebuffer blitter is in use.
+ // Can cause gfx artifacts if game reads VRAM
+ // to do framebuffer effects.
-#define VIDEO_WIDTH 320
+ uint8_t ilace_force:3; // Option to force skipping rendering of lines,
+ // for very slow platforms. Value will be
+ // assigned to 'ilace_mask' in gpu_unai struct.
+ // Normally 0. Value '1' will skip rendering
+ // odd lines.
-typedef char s8;
-typedef signed short s16;
-typedef signed int s32;
-typedef signed long long s64;
+ uint8_t lighting:1;
+ uint8_t fast_lighting:1;
+ uint8_t blending:1;
+ uint8_t dithering:1;
-typedef unsigned char u8;
-typedef unsigned short u16;
-typedef unsigned int u32;
-typedef unsigned long long u64;
+ //senquack Only PCSX Rearmed's version of gpu_unai had this, and I
+ // don't think it's necessary. It would require adding 'AH' flag to
+ // gpuSpriteSpanFn() increasing size of sprite span function array.
+ //uint8_t enableAbbeyHack:1; // Abe's Odyssey hack
-#include "gpu_fixedpoint.h"
-
-///////////////////////////////////////////////////////////////////////////////
-// Tweaks and Hacks
-extern int skipCount;
-extern bool enableAbbeyHack;
-extern bool show_fps;
-extern bool alt_fps;
-
-///////////////////////////////////////////////////////////////////////////////
-// interlaced rendering
-extern int linesInterlace_user;
-extern bool progressInterlace;
-
-extern bool light;
-extern bool blend;
-
-typedef struct {
- u32 Version;
- u32 GPU_gp1;
- u32 Control[256];
- unsigned char FrameBuffer[1024*512*2];
-} GPUFreeze_t;
-
-struct GPUPacket
-{
- union
- {
- u32 U4[16];
- s32 S4[16];
- u16 U2[32];
- s16 S2[32];
- u8 U1[64];
- s8 S1[64];
- };
+ ////////////////////////////////////////////////////////////////////////////
+ // Variables used only by older standalone version of gpu_unai (gpu.cpp)
+#ifndef USE_GPULIB
+ uint8_t prog_ilace:1; // Progressive interlace option (old option)
+ // This option was somewhat oddly named:
+ // When in interlaced video mode, on a low-res
+ // 320x240 device, only the even lines are
+ // rendered. This option will take that one
+ // step further and only render half the even
+ // even lines one frame, and then the other half.
+ uint8_t frameskip_count:3; // Frame skip (0..7)
+#endif
};
-///////////////////////////////////////////////////////////////////////////////
-// Compile Options
+extern gpu_unai_config_t gpu_unai_config_ext;
-//#define ENABLE_GPU_NULL_SUPPORT // Enables NullGPU support
-//#define ENABLE_GPU_LOG_SUPPORT // Enables gpu logger, very slow only for windows debugging
+// TODO: clean up show_fps frontend option
+extern bool show_fps;
-///////////////////////////////////////////////////////////////////////////////
-#endif // NEW_GPU_H
+#endif // GPU_UNAI_GPU_H
diff --git a/plugins/gpu_unai/gpu_blit.h b/plugins/gpu_unai/gpu_blit.h
index 35cd056..e93f12f 100644
--- a/plugins/gpu_unai/gpu_blit.h
+++ b/plugins/gpu_unai/gpu_blit.h
@@ -32,10 +32,10 @@
///////////////////////////////////////////////////////////////////////////////
// GPU Blitting code with rescale and interlace support.
-INLINE void GPU_BlitWW(const void* src, u16* dst16, u32 isRGB24)
+INLINE void GPU_BlitWW(const void* src, u16* dst16, bool isRGB24)
{
u32 uCount;
- if(isRGB24 == 0)
+ if(!isRGB24)
{
#ifndef USE_BGR15
uCount = 20;
@@ -85,10 +85,10 @@ INLINE void GPU_BlitWW(const void* src, u16* dst16, u32 isRGB24)
}
}
-INLINE void GPU_BlitWWSWWSWS(const void* src, u16* dst16, u32 isRGB24)
+INLINE void GPU_BlitWWSWWSWS(const void* src, u16* dst16, bool isRGB24)
{
u32 uCount;
- if(isRGB24 == 0)
+ if(!isRGB24)
{
#ifndef USE_BGR15
uCount = 32;
@@ -145,10 +145,10 @@ INLINE void GPU_BlitWWSWWSWS(const void* src, u16* dst16, u32 isRGB24)
}
}
-INLINE void GPU_BlitWWWWWS(const void* src, u16* dst16, u32 isRGB24)
+INLINE void GPU_BlitWWWWWS(const void* src, u16* dst16, bool isRGB24)
{
u32 uCount;
- if(isRGB24 == 0)
+ if(!isRGB24)
{
#ifndef USE_BGR15
uCount = 32;
@@ -201,10 +201,10 @@ INLINE void GPU_BlitWWWWWS(const void* src, u16* dst16, u32 isRGB24)
}
}
-INLINE void GPU_BlitWWWWWWWWS(const void* src, u16* dst16, u32 isRGB24, u32 uClip_src)
+INLINE void GPU_BlitWWWWWWWWS(const void* src, u16* dst16, bool isRGB24, u32 uClip_src)
{
u32 uCount;
- if(isRGB24 == 0)
+ if(!isRGB24)
{
#ifndef USE_BGR15
uCount = 20;
@@ -274,10 +274,10 @@ INLINE void GPU_BlitWWWWWWWWS(const void* src, u16* dst16, u32 isRGB24, u32 uCli
}
}
-INLINE void GPU_BlitWWDWW(const void* src, u16* dst16, u32 isRGB24)
+INLINE void GPU_BlitWWDWW(const void* src, u16* dst16, bool isRGB24)
{
u32 uCount;
- if(isRGB24 == 0)
+ if(!isRGB24)
{
#ifndef USE_BGR15
uCount = 32;
@@ -331,10 +331,10 @@ INLINE void GPU_BlitWWDWW(const void* src, u16* dst16, u32 isRGB24)
}
-INLINE void GPU_BlitWS(const void* src, u16* dst16, u32 isRGB24)
+INLINE void GPU_BlitWS(const void* src, u16* dst16, bool isRGB24)
{
u32 uCount;
- if(isRGB24 == 0)
+ if(!isRGB24)
{
#ifndef USE_BGR15
uCount = 20;
diff --git a/plugins/gpu_unai/gpu_command.h b/plugins/gpu_unai/gpu_command.h
index d6e7a74..7096b75 100644
--- a/plugins/gpu_unai/gpu_command.h
+++ b/plugins/gpu_unai/gpu_command.h
@@ -1,6 +1,7 @@
/***************************************************************************
* Copyright (C) 2010 PCSX4ALL Team *
* Copyright (C) 2010 Unai *
+* Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com) *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -19,34 +20,35 @@
***************************************************************************/
///////////////////////////////////////////////////////////////////////////////
-INLINE void gpuSetTexture(u16 tpage)
+void gpuSetTexture(u16 tpage)
{
- u32 tp;
- u32 tx, ty;
- GPU_GP1 = (GPU_GP1 & ~0x1FF) | (tpage & 0x1FF);
+ u32 tmode, tx, ty;
+ gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x1FF) | (tpage & 0x1FF);
+ gpu_unai.TextureWindow[0]&= ~gpu_unai.TextureWindow[2];
+ gpu_unai.TextureWindow[1]&= ~gpu_unai.TextureWindow[3];
- TextureWindow[0]&= ~TextureWindow[2];
- TextureWindow[1]&= ~TextureWindow[3];
+ tmode = (tpage >> 7) & 3; // 16bpp, 8bpp, or 4bpp texture colors?
+ // 0: 4bpp 1: 8bpp 2/3: 16bpp
+
+ // Nocash PSX docs state setting of 3 is same as setting of 2 (16bpp):
+ // Note: DrHell assumes 3 is same as 0.. TODO: verify which is correct?
+ if (tmode == 3) tmode = 2;
- tp = (tpage >> 7) & 3;
tx = (tpage & 0x0F) << 6;
ty = (tpage & 0x10) << 4;
- if (tp == 3) tp = 2;
- tx += (TextureWindow[0] >> (2 - tp));
- ty += TextureWindow[1];
+ tx += (gpu_unai.TextureWindow[0] >> (2 - tmode));
+ ty += gpu_unai.TextureWindow[1];
- BLEND_MODE = (((tpage>>5)&0x3) ) << 3;
- TEXT_MODE = (((tpage>>7)&0x3) + 1 ) << 5; // +1 el cero no lo usamos
-
- TBA = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(tx, ty)];
-
+ gpu_unai.BLEND_MODE = ((tpage>>5) & 3) << 3;
+ gpu_unai.TEXT_MODE = (tmode + 1) << 5; // gpu_unai.TEXT_MODE should be values 1..3, so add one
+ gpu_unai.TBA = &((u16*)gpu_unai.vram)[FRAME_OFFSET(tx, ty)];
}
///////////////////////////////////////////////////////////////////////////////
INLINE void gpuSetCLUT(u16 clut)
{
- CBA = &((u16*)GPU_FrameBuffer)[(clut & 0x7FFF) << 4];
+ gpu_unai.CBA = &((u16*)gpu_unai.vram)[(clut & 0x7FFF) << 4];
}
#ifdef ENABLE_GPU_NULL_SUPPORT
@@ -61,159 +63,305 @@ INLINE void gpuSetCLUT(u16 clut)
#define DO_LOG(expr) {}
#endif
-#define Blending (((PRIM&0x2)&&(blend))?(PRIM&0x2):0)
-#define Blending_Mode (((PRIM&0x2)&&(blend))?BLEND_MODE:0)
-#define Lighting (((~PRIM)&0x1)&&(light))
+#define Blending (((PRIM&0x2) && BlendingEnabled()) ? (PRIM&0x2) : 0)
+#define Blending_Mode (((PRIM&0x2) && BlendingEnabled()) ? gpu_unai.BLEND_MODE : 0)
+#define Lighting (((~PRIM)&0x1) && LightingEnabled())
+// Dithering applies only to Gouraud-shaded polys or texture-blended polys:
+#define Dithering (((((~PRIM)&0x1) || (PRIM&0x10)) && DitheringEnabled()) ? \
+ (ForcedDitheringEnabled() ? (1<<9) : (gpu_unai.GPU_GP1 & (1 << 9))) \
+ : 0)
+
+///////////////////////////////////////////////////////////////////////////////
+//Now handled by Rearmed's gpulib and gpu_unai/gpulib_if.cpp:
+///////////////////////////////////////////////////////////////////////////////
+#ifndef USE_GPULIB
+
+// Handles GP0 draw settings commands 0xE1...0xE6
+static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word)
+{
+ // Assume incoming GP0 command is 0xE1..0xE6, convert to 1..6
+ u8 num = (cmd_word >> 24) & 7;
+ switch (num) {
+ case 1: {
+ // GP0(E1h) - Draw Mode setting (aka "Texpage")
+ DO_LOG(("GP0(0xE1) DrawMode TexPage(0x%x)\n", cmd_word));
+ u32 cur_texpage = gpu_unai.GPU_GP1 & 0x7FF;
+ u32 new_texpage = cmd_word & 0x7FF;
+ if (cur_texpage != new_texpage) {
+ gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x7FF) | new_texpage;
+ gpuSetTexture(gpu_unai.GPU_GP1);
+ }
+ } break;
+
+ case 2: {
+ // GP0(E2h) - Texture Window setting
+ DO_LOG(("GP0(0xE2) TextureWindow(0x%x)\n", cmd_word));
+ if (cmd_word != gpu_unai.TextureWindowCur) {
+ static const u8 TextureMask[32] = {
+ 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7,
+ 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7
+ };
+ gpu_unai.TextureWindowCur = cmd_word;
+ gpu_unai.TextureWindow[0] = ((cmd_word >> 10) & 0x1F) << 3;
+ gpu_unai.TextureWindow[1] = ((cmd_word >> 15) & 0x1F) << 3;
+ gpu_unai.TextureWindow[2] = TextureMask[(cmd_word >> 0) & 0x1F];
+ gpu_unai.TextureWindow[3] = TextureMask[(cmd_word >> 5) & 0x1F];
+ gpu_unai.TextureWindow[0] &= ~gpu_unai.TextureWindow[2];
+ gpu_unai.TextureWindow[1] &= ~gpu_unai.TextureWindow[3];
+
+ // Inner loop vars must be updated whenever texture window is changed:
+ const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4
+ gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1);
+ gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1);
+
+ gpuSetTexture(gpu_unai.GPU_GP1);
+ }
+ } break;
+
+ case 3: {
+ // GP0(E3h) - Set Drawing Area top left (X1,Y1)
+ DO_LOG(("GP0(0xE3) DrawingArea Pos(0x%x)\n", cmd_word));
+ gpu_unai.DrawingArea[0] = cmd_word & 0x3FF;
+ gpu_unai.DrawingArea[1] = (cmd_word >> 10) & 0x3FF;
+ } break;
+
+ case 4: {
+ // GP0(E4h) - Set Drawing Area bottom right (X2,Y2)
+ DO_LOG(("GP0(0xE4) DrawingArea Size(0x%x)\n", cmd_word));
+ gpu_unai.DrawingArea[2] = (cmd_word & 0x3FF) + 1;
+ gpu_unai.DrawingArea[3] = ((cmd_word >> 10) & 0x3FF) + 1;
+ } break;
+
+ case 5: {
+ // GP0(E5h) - Set Drawing Offset (X,Y)
+ DO_LOG(("GP0(0xE5) DrawingOffset(0x%x)\n", cmd_word));
+ gpu_unai.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11);
+ gpu_unai.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11);
+ } break;
+
+ case 6: {
+ // GP0(E6h) - Mask Bit Setting
+ DO_LOG(("GP0(0xE6) SetMask(0x%x)\n", cmd_word));
+ gpu_unai.Masking = (cmd_word & 0x2) << 1;
+ gpu_unai.PixelMSB = (cmd_word & 0x1) << 8;
+ } break;
+ }
+}
void gpuSendPacketFunction(const int PRIM)
{
//printf("0x%x\n",PRIM);
+ //senquack - TODO: optimize this (packet pointer union as prim draw parameter
+ // introduced as optimization for gpulib command-list processing)
+ PtrUnion packet = { .ptr = (void*)&gpu_unai.PacketBuffer };
+
switch (PRIM)
{
- case 0x02:
+ case 0x02: {
NULL_GPU();
- gpuClearImage(); // prim handles updateLace && skip
+ gpuClearImage(packet); // prim handles updateLace && skip
+ gpu_unai.fb_dirty = true;
DO_LOG(("gpuClearImage(0x%x)\n",PRIM));
- break;
+ } break;
+
case 0x20:
case 0x21:
case 0x22:
- case 0x23:
- if (!isSkip)
+ case 0x23: { // Monochrome 3-pt poly
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuDrawF3(gpuPolySpanDrivers [Blending_Mode | Masking | Blending | PixelMSB]);
- DO_LOG(("gpuDrawF3(0x%x)\n",PRIM));
+ PP driver = gpuPolySpanDrivers[
+ (gpu_unai.blit_mask?1024:0) |
+ Blending_Mode |
+ gpu_unai.Masking | Blending | gpu_unai.PixelMSB
+ ];
+ gpuDrawPolyF(packet, driver, false);
+ gpu_unai.fb_dirty = true;
+ DO_LOG(("gpuDrawPolyF(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x24:
case 0x25:
case 0x26:
- case 0x27:
- if (!isSkip)
+ case 0x27: { // Textured 3-pt poly
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (PacketBuffer.U4[4] >> 16);
- if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
- gpuDrawFT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | PixelMSB]);
- else
- gpuDrawFT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | PixelMSB]);
- DO_LOG(("gpuDrawFT3(0x%x)\n",PRIM));
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuSetTexture (gpu_unai.PacketBuffer.U4[4] >> 16);
+
+ u32 driver_idx =
+ (gpu_unai.blit_mask?1024:0) |
+ Dithering |
+ Blending_Mode | gpu_unai.TEXT_MODE |
+ gpu_unai.Masking | Blending | gpu_unai.PixelMSB;
+
+ if (!FastLightingEnabled()) {
+ driver_idx |= Lighting;
+ } else {
+ if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)))
+ driver_idx |= Lighting;
+ }
+
+ PP driver = gpuPolySpanDrivers[driver_idx];
+ gpuDrawPolyFT(packet, driver, false);
+ gpu_unai.fb_dirty = true;
+ DO_LOG(("gpuDrawPolyFT(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x28:
case 0x29:
case 0x2A:
- case 0x2B:
- if (!isSkip)
+ case 0x2B: { // Monochrome 4-pt poly
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | Masking | Blending | PixelMSB];
- //--PacketBuffer.S2[6];
- gpuDrawF3(gpuPolySpanDriver);
- PacketBuffer.U4[1] = PacketBuffer.U4[4];
- //--PacketBuffer.S2[2];
- gpuDrawF3(gpuPolySpanDriver);
- DO_LOG(("gpuDrawF4(0x%x)\n",PRIM));
+ PP driver = gpuPolySpanDrivers[
+ (gpu_unai.blit_mask?1024:0) |
+ Blending_Mode |
+ gpu_unai.Masking | Blending | gpu_unai.PixelMSB
+ ];
+ gpuDrawPolyF(packet, driver, true); // is_quad = true
+ gpu_unai.fb_dirty = true;
+ DO_LOG(("gpuDrawPolyF(0x%x) (4-pt QUAD)\n",PRIM));
}
- break;
+ } break;
+
case 0x2C:
case 0x2D:
case 0x2E:
- case 0x2F:
- if (!isSkip)
+ case 0x2F: { // Textured 4-pt poly
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (PacketBuffer.U4[4] >> 16);
- PP gpuPolySpanDriver;
- if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
- gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | PixelMSB];
- else
- gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | PixelMSB];
- //--PacketBuffer.S2[6];
- gpuDrawFT3(gpuPolySpanDriver);
- PacketBuffer.U4[1] = PacketBuffer.U4[7];
- PacketBuffer.U4[2] = PacketBuffer.U4[8];
- //--PacketBuffer.S2[2];
- gpuDrawFT3(gpuPolySpanDriver);
- DO_LOG(("gpuDrawFT4(0x%x)\n",PRIM));
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuSetTexture (gpu_unai.PacketBuffer.U4[4] >> 16);
+
+ u32 driver_idx =
+ (gpu_unai.blit_mask?1024:0) |
+ Dithering |
+ Blending_Mode | gpu_unai.TEXT_MODE |
+ gpu_unai.Masking | Blending | gpu_unai.PixelMSB;
+
+ if (!FastLightingEnabled()) {
+ driver_idx |= Lighting;
+ } else {
+ if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)))
+ driver_idx |= Lighting;
+ }
+
+ PP driver = gpuPolySpanDrivers[driver_idx];
+ gpuDrawPolyFT(packet, driver, true); // is_quad = true
+ gpu_unai.fb_dirty = true;
+ DO_LOG(("gpuDrawPolyFT(0x%x) (4-pt QUAD)\n",PRIM));
}
- break;
+ } break;
+
case 0x30:
case 0x31:
case 0x32:
- case 0x33:
- if (!isSkip)
+ case 0x33: { // Gouraud-shaded 3-pt poly
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuDrawG3(gpuPolySpanDrivers [Blending_Mode | Masking | Blending | 129 | PixelMSB]);
- DO_LOG(("gpuDrawG3(0x%x)\n",PRIM));
+ //NOTE: The '129' here is CF_GOURAUD | CF_LIGHT, however
+ // this is an untextured poly, so CF_LIGHT (texture blend)
+ // shouldn't apply. Until the original array of template
+ // instantiation ptrs is fixed, we're stuck with this. (TODO)
+ PP driver = gpuPolySpanDrivers[
+ (gpu_unai.blit_mask?1024:0) |
+ Dithering |
+ Blending_Mode |
+ gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
+ ];
+ gpuDrawPolyG(packet, driver, false);
+ gpu_unai.fb_dirty = true;
+ DO_LOG(("gpuDrawPolyG(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x34:
case 0x35:
case 0x36:
- case 0x37:
- if (!isSkip)
+ case 0x37: { // Gouraud-shaded, textured 3-pt poly
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (PacketBuffer.U4[5] >> 16);
- gpuDrawGT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | ((Lighting)?129:0) | PixelMSB]);
- DO_LOG(("gpuDrawGT3(0x%x)\n",PRIM));
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16);
+ PP driver = gpuPolySpanDrivers[
+ (gpu_unai.blit_mask?1024:0) |
+ Dithering |
+ Blending_Mode | gpu_unai.TEXT_MODE |
+ gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
+ ];
+ gpuDrawPolyGT(packet, driver, false);
+ gpu_unai.fb_dirty = true;
+ DO_LOG(("gpuDrawPolyGT(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x38:
case 0x39:
case 0x3A:
- case 0x3B:
- if (!isSkip)
+ case 0x3B: { // Gouraud-shaded 4-pt poly
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | Masking | Blending | 129 | PixelMSB];
- //--PacketBuffer.S2[6];
- gpuDrawG3(gpuPolySpanDriver);
- PacketBuffer.U4[0] = PacketBuffer.U4[6];
- PacketBuffer.U4[1] = PacketBuffer.U4[7];
- //--PacketBuffer.S2[2];
- gpuDrawG3(gpuPolySpanDriver);
- DO_LOG(("gpuDrawG4(0x%x)\n",PRIM));
+ // See notes regarding '129' for 0x30..0x33 further above -senquack
+ PP driver = gpuPolySpanDrivers[
+ (gpu_unai.blit_mask?1024:0) |
+ Dithering |
+ Blending_Mode |
+ gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
+ ];
+ gpuDrawPolyG(packet, driver, true); // is_quad = true
+ gpu_unai.fb_dirty = true;
+ DO_LOG(("gpuDrawPolyG(0x%x) (4-pt QUAD)\n",PRIM));
}
- break;
+ } break;
+
case 0x3C:
case 0x3D:
case 0x3E:
- case 0x3F:
- if (!isSkip)
+ case 0x3F: { // Gouraud-shaded, textured 4-pt poly
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (PacketBuffer.U4[5] >> 16);
- const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | ((Lighting)?129:0) | PixelMSB];
- //--PacketBuffer.S2[6];
- gpuDrawGT3(gpuPolySpanDriver);
- PacketBuffer.U4[0] = PacketBuffer.U4[9];
- PacketBuffer.U4[1] = PacketBuffer.U4[10];
- PacketBuffer.U4[2] = PacketBuffer.U4[11];
- //--PacketBuffer.S2[2];
- gpuDrawGT3(gpuPolySpanDriver);
- DO_LOG(("gpuDrawGT4(0x%x)\n",PRIM));
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16);
+ PP driver = gpuPolySpanDrivers[
+ (gpu_unai.blit_mask?1024:0) |
+ Dithering |
+ Blending_Mode | gpu_unai.TEXT_MODE |
+ gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
+ ];
+ gpuDrawPolyGT(packet, driver, true); // is_quad = true
+ gpu_unai.fb_dirty = true;
+ DO_LOG(("gpuDrawPolyGT(0x%x) (4-pt QUAD)\n",PRIM));
}
- break;
+ } break;
+
case 0x40:
case 0x41:
case 0x42:
- case 0x43:
- if (!isSkip)
+ case 0x43: { // Monochrome line
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
- DO_LOG(("gpuDrawLF(0x%x)\n",PRIM));
+ // Shift index right by one, as untextured prims don't use lighting
+ u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
+ PSD driver = gpuPixelSpanDrivers[driver_idx];
+ gpuDrawLineF(packet, driver);
+ gpu_unai.fb_dirty = true;
+ DO_LOG(("gpuDrawLineF(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x48:
case 0x49:
case 0x4A:
@@ -221,32 +369,44 @@ void gpuSendPacketFunction(const int PRIM)
case 0x4C:
case 0x4D:
case 0x4E:
- case 0x4F:
- if (!isSkip)
+ case 0x4F: { // Monochrome line strip
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
- DO_LOG(("gpuDrawLF(0x%x)\n",PRIM));
+ // Shift index right by one, as untextured prims don't use lighting
+ u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
+ PSD driver = gpuPixelSpanDrivers[driver_idx];
+ gpuDrawLineF(packet, driver);
+ gpu_unai.fb_dirty = true;
+ DO_LOG(("gpuDrawLineF(0x%x)\n",PRIM));
}
- if ((PacketBuffer.U4[3] & 0xF000F000) != 0x50005000)
+ if ((gpu_unai.PacketBuffer.U4[3] & 0xF000F000) != 0x50005000)
{
- PacketBuffer.U4[1] = PacketBuffer.U4[2];
- PacketBuffer.U4[2] = PacketBuffer.U4[3];
- PacketCount = 1;
- PacketIndex = 3;
+ gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2];
+ gpu_unai.PacketBuffer.U4[2] = gpu_unai.PacketBuffer.U4[3];
+ gpu_unai.PacketCount = 1;
+ gpu_unai.PacketIndex = 3;
}
- break;
+ } break;
+
case 0x50:
case 0x51:
case 0x52:
- case 0x53:
- if (!isSkip)
+ case 0x53: { // Gouraud-shaded line
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
- DO_LOG(("gpuDrawLG(0x%x)\n",PRIM));
+ // Shift index right by one, as untextured prims don't use lighting
+ u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
+ // Index MSB selects Gouraud-shaded PixelSpanDriver:
+ driver_idx |= (1 << 5);
+ PSD driver = gpuPixelSpanDrivers[driver_idx];
+ gpuDrawLineG(packet, driver);
+ gpu_unai.fb_dirty = true;
+ DO_LOG(("gpuDrawLineG(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x58:
case 0x59:
case 0x5A:
@@ -254,204 +414,203 @@ void gpuSendPacketFunction(const int PRIM)
case 0x5C:
case 0x5D:
case 0x5E:
- case 0x5F:
- if (!isSkip)
+ case 0x5F: { // Gouraud-shaded line strip
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
- DO_LOG(("gpuDrawLG(0x%x)\n",PRIM));
+ // Shift index right by one, as untextured prims don't use lighting
+ u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
+ // Index MSB selects Gouraud-shaded PixelSpanDriver:
+ driver_idx |= (1 << 5);
+ PSD driver = gpuPixelSpanDrivers[driver_idx];
+ gpuDrawLineG(packet, driver);
+ gpu_unai.fb_dirty = true;
+ DO_LOG(("gpuDrawLineG(0x%x)\n",PRIM));
}
- if ((PacketBuffer.U4[4] & 0xF000F000) != 0x50005000)
+ if ((gpu_unai.PacketBuffer.U4[4] & 0xF000F000) != 0x50005000)
{
- PacketBuffer.U1[3 + (2 * 4)] = PacketBuffer.U1[3 + (0 * 4)];
- PacketBuffer.U4[0] = PacketBuffer.U4[2];
- PacketBuffer.U4[1] = PacketBuffer.U4[3];
- PacketBuffer.U4[2] = PacketBuffer.U4[4];
- PacketCount = 2;
- PacketIndex = 3;
+ gpu_unai.PacketBuffer.U1[3 + (2 * 4)] = gpu_unai.PacketBuffer.U1[3 + (0 * 4)];
+ gpu_unai.PacketBuffer.U4[0] = gpu_unai.PacketBuffer.U4[2];
+ gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[3];
+ gpu_unai.PacketBuffer.U4[2] = gpu_unai.PacketBuffer.U4[4];
+ gpu_unai.PacketCount = 2;
+ gpu_unai.PacketIndex = 3;
}
- break;
+ } break;
+
case 0x60:
case 0x61:
case 0x62:
- case 0x63:
- if (!isSkip)
+ case 0x63: { // Monochrome rectangle (variable size)
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]);
+ PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+ gpuDrawT(packet, driver);
+ gpu_unai.fb_dirty = true;
DO_LOG(("gpuDrawT(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x64:
case 0x65:
case 0x66:
- case 0x67:
- if (!isSkip)
+ case 0x67: { // Textured rectangle (variable size)
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (GPU_GP1);
- if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
- gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]);
- else
- gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]);
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
+
+ // This fixes Silent Hill running animation on loading screens:
+ // (On PSX, color values 0x00-0x7F darken the source texture's color,
+ // 0x81-FF lighten textures (ultimately clamped to 0x1F),
+ // 0x80 leaves source texture color unchanged, HOWEVER,
+ // gpu_unai uses a simple lighting LUT whereby only the upper
+ // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as
+ // 0x80.
+ //
+ // NOTE: I've changed all textured sprite draw commands here and
+ // elsewhere to use proper behavior, but left poly commands
+ // alone, I don't want to slow rendering down too much. (TODO)
+ //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
+ // Strip lower 3 bits of each color and determine if lighting should be used:
+ if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+ driver_idx |= Lighting;
+ PS driver = gpuSpriteSpanDrivers[driver_idx];
+ gpuDrawS(packet, driver);
+ gpu_unai.fb_dirty = true;
DO_LOG(("gpuDrawS(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x68:
case 0x69:
case 0x6A:
- case 0x6B:
- if (!isSkip)
+ case 0x6B: { // Monochrome rectangle (1x1 dot)
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- PacketBuffer.U4[2] = 0x00010001;
- gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]);
+ gpu_unai.PacketBuffer.U4[2] = 0x00010001;
+ PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+ gpuDrawT(packet, driver);
+ gpu_unai.fb_dirty = true;
DO_LOG(("gpuDrawT(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x70:
case 0x71:
case 0x72:
- case 0x73:
- if (!isSkip)
+ case 0x73: { // Monochrome rectangle (8x8)
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- PacketBuffer.U4[2] = 0x00080008;
- gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]);
+ gpu_unai.PacketBuffer.U4[2] = 0x00080008;
+ PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+ gpuDrawT(packet, driver);
+ gpu_unai.fb_dirty = true;
DO_LOG(("gpuDrawT(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x74:
case 0x75:
case 0x76:
- case 0x77:
- if (!isSkip)
+ case 0x77: { // Textured rectangle (8x8)
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- PacketBuffer.U4[3] = 0x00080008;
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (GPU_GP1);
- if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
- gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]);
- else
- gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]);
+ gpu_unai.PacketBuffer.U4[3] = 0x00080008;
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
+
+ //senquack - Only color 808080h-878787h allows skipping lighting calculation:
+ //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
+ // Strip lower 3 bits of each color and determine if lighting should be used:
+ if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+ driver_idx |= Lighting;
+ PS driver = gpuSpriteSpanDrivers[driver_idx];
+ gpuDrawS(packet, driver);
+ gpu_unai.fb_dirty = true;
DO_LOG(("gpuDrawS(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x78:
case 0x79:
case 0x7A:
- case 0x7B:
- if (!isSkip)
+ case 0x7B: { // Monochrome rectangle (16x16)
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- PacketBuffer.U4[2] = 0x00100010;
- gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]);
+ gpu_unai.PacketBuffer.U4[2] = 0x00100010;
+ PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+ gpuDrawT(packet, driver);
+ gpu_unai.fb_dirty = true;
DO_LOG(("gpuDrawT(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x7C:
case 0x7D:
-#ifdef __arm__
- if ((GPU_GP1 & 0x180) == 0 && (Masking | PixelMSB) == 0)
+ #ifdef __arm__
+ /* Notaz 4bit sprites optimization */
+ if ((!gpu_unai.frameskip.skipGPU) && (!(gpu_unai.GPU_GP1&0x180)) && (!(gpu_unai.Masking|gpu_unai.PixelMSB)))
{
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (GPU_GP1);
- gpuDrawS16();
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuDrawS16(packet);
+ gpu_unai.fb_dirty = true;
break;
}
- // fallthrough
-#endif
+ #endif
case 0x7E:
- case 0x7F:
- if (!isSkip)
+ case 0x7F: { // Textured rectangle (16x16)
+ if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- PacketBuffer.U4[3] = 0x00100010;
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (GPU_GP1);
- if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
- gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]);
- else
- gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]);
+ gpu_unai.PacketBuffer.U4[3] = 0x00100010;
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
+
+ //senquack - Only color 808080h-878787h allows skipping lighting calculation:
+ //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
+ // Strip lower 3 bits of each color and determine if lighting should be used:
+ if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+ driver_idx |= Lighting;
+ PS driver = gpuSpriteSpanDrivers[driver_idx];
+ gpuDrawS(packet, driver);
+ gpu_unai.fb_dirty = true;
DO_LOG(("gpuDrawS(0x%x)\n",PRIM));
}
- break;
+ } break;
+
case 0x80: // vid -> vid
- gpuMoveImage(); // prim handles updateLace && skip
+ gpuMoveImage(packet); // prim handles updateLace && skip
+ if ((!gpu_unai.frameskip.skipCount) && (gpu_unai.DisplayArea[3] == 480)) // Tekken 3 hack
+ {
+ if (!gpu_unai.frameskip.skipGPU) gpu_unai.fb_dirty = true;
+ }
+ else
+ {
+ gpu_unai.fb_dirty = true;
+ }
DO_LOG(("gpuMoveImage(0x%x)\n",PRIM));
break;
case 0xA0: // sys ->vid
- gpuLoadImage(); // prim handles updateLace && skip
-#ifndef isSkip // not a define
- if (alt_fps) isSkip=false;
-#endif
+ gpuLoadImage(packet); // prim handles updateLace && skip
DO_LOG(("gpuLoadImage(0x%x)\n",PRIM));
break;
case 0xC0: // vid -> sys
- gpuStoreImage(); // prim handles updateLace && skip
+ gpuStoreImage(packet); // prim handles updateLace && skip
DO_LOG(("gpuStoreImage(0x%x)\n",PRIM));
break;
- case 0xE1:
- {
- const u32 temp = PacketBuffer.U4[0];
- GPU_GP1 = (GPU_GP1 & ~0x000007FF) | (temp & 0x000007FF);
- gpuSetTexture(temp);
- DO_LOG(("gpuSetTexture(0x%x)\n",PRIM));
- }
- break;
- case 0xE2:
- {
- static const u8 TextureMask[32] = {
- 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7, //
- 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7 //
- };
- const u32 temp = PacketBuffer.U4[0];
- TextureWindow[0] = ((temp >> 10) & 0x1F) << 3;
- TextureWindow[1] = ((temp >> 15) & 0x1F) << 3;
- TextureWindow[2] = TextureMask[(temp >> 0) & 0x1F];
- TextureWindow[3] = TextureMask[(temp >> 5) & 0x1F];
- gpuSetTexture(GPU_GP1);
- //isSkip = false;
- DO_LOG(("TextureWindow(0x%x)\n",PRIM));
- }
- break;
- case 0xE3:
- {
- const u32 temp = PacketBuffer.U4[0];
- DrawingArea[0] = temp & 0x3FF;
- DrawingArea[1] = (temp >> 10) & 0x3FF;
- //isSkip = false;
- DO_LOG(("DrawingArea_Pos(0x%x)\n",PRIM));
- }
- break;
- case 0xE4:
- {
- const u32 temp = PacketBuffer.U4[0];
- DrawingArea[2] = (temp & 0x3FF) + 1;
- DrawingArea[3] = ((temp >> 10) & 0x3FF) + 1;
- //isSkip = false;
- DO_LOG(("DrawingArea_Size(0x%x)\n",PRIM));
- }
- break;
- case 0xE5:
- {
- const u32 temp = PacketBuffer.U4[0];
- DrawingOffset[0] = ((s32)temp<<(32-11))>>(32-11);
- DrawingOffset[1] = ((s32)temp<<(32-22))>>(32-11);
- //isSkip = false;
- DO_LOG(("DrawingOffset(0x%x)\n",PRIM));
- }
- break;
- case 0xE6:
- {
- const u32 temp = PacketBuffer.U4[0];
- //GPU_GP1 = (GPU_GP1 & ~0x00001800) | ((temp&3) << 11);
- Masking = (temp & 0x2) << 1;
- PixelMSB =(temp & 0x1) << 8;
- DO_LOG(("SetMask(0x%x)\n",PRIM));
- }
- break;
+ case 0xE1 ... 0xE6: { // Draw settings
+ gpuGP0Cmd_0xEx(gpu_unai, gpu_unai.PacketBuffer.U4[0]);
+ } break;
}
}
+#endif //!USE_GPULIB
+///////////////////////////////////////////////////////////////////////////////
+// End of code specific to non-gpulib standalone version of gpu_unai
+///////////////////////////////////////////////////////////////////////////////
diff --git a/plugins/gpu_unai/gpu_fixedpoint.h b/plugins/gpu_unai/gpu_fixedpoint.h
index e72fda1..5df42cf 100644
--- a/plugins/gpu_unai/gpu_fixedpoint.h
+++ b/plugins/gpu_unai/gpu_fixedpoint.h
@@ -21,60 +21,73 @@
#ifndef FIXED_H
#define FIXED_H
-#include "arm_features.h"
-
typedef s32 fixed;
-#ifdef GPU_TABLE_10_BITS
-#define TABLE_BITS 10
-#else
-#define TABLE_BITS 16
-#endif
-
-#define FIXED_BITS 16
+//senquack - The gpu_drhell poly routines I adapted use 22.10 fixed point,
+// while original Unai used 16.16: (see README_senquack.txt)
+//#define FIXED_BITS 16
+#define FIXED_BITS 10
#define fixed_ZERO ((fixed)0)
#define fixed_ONE ((fixed)1<<FIXED_BITS)
#define fixed_TWO ((fixed)2<<FIXED_BITS)
#define fixed_HALF ((fixed)((1<<FIXED_BITS)>>1))
-// big precision inverse table.
-s32 s_invTable[(1<<TABLE_BITS)];
+#define fixed_LOMASK ((fixed)((1<<FIXED_BITS)-1))
+#define fixed_HIMASK ((fixed)(~fixed_LOMASK))
+
+// int<->fixed conversions:
+#define i2x(x) ((x)<<FIXED_BITS)
+#define x2i(x) ((x)>>FIXED_BITS)
+
+INLINE fixed FixedCeil(const fixed x)
+{
+ return (x + (fixed_ONE - 1)) & fixed_HIMASK;
+}
-INLINE fixed i2x(const int _x) { return ((_x)<<FIXED_BITS); }
-INLINE fixed x2i(const fixed _x) { return ((_x)>>FIXED_BITS); }
+INLINE s32 FixedCeilToInt(const fixed x)
+{
+ return (x + (fixed_ONE - 1)) >> FIXED_BITS;
+}
-/*
-INLINE u32 Log2(u32 _a)
+//senquack - float<->fixed conversions:
+#define f2x(x) ((s32)((x) * (float)(1<<FIXED_BITS)))
+#define x2f(x) ((float)(x) / (float)(1<<FIXED_BITS))
+
+//senquack - floating point reciprocal:
+//NOTE: These assume x is always != 0 !!!
+#ifdef GPU_UNAI_USE_FLOATMATH
+#if defined(_MIPS_ARCH_MIPS32R2) || (__mips == 64)
+INLINE float FloatInv(const float x)
+{
+ float res;
+ asm("recip.s %0,%1" : "=f" (res) : "f" (x));
+ return res;
+}
+#else
+INLINE float FloatInv(const float x)
{
- u32 c = 0; // result of log2(v) will go here
- if (_a & 0xFFFF0000) { _a >>= 16; c |= 16; }
- if (_a & 0xFF00) { _a >>= 8; c |= 8; }
- if (_a & 0xF0) { _a >>= 4; c |= 4; }
- if (_a & 0xC) { _a >>= 2; c |= 2; }
- if (_a & 0x2) { _a >>= 1; c |= 1; }
- return c;
+ return (1.0f / x);
}
-*/
+#endif
+#endif
-#ifdef HAVE_ARMV5
+///////////////////////////////////////////////////////////////////////////
+// --- BEGIN INVERSE APPROXIMATION SECTION ---
+///////////////////////////////////////////////////////////////////////////
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+
+// big precision inverse table.
+#define TABLE_BITS 16
+s32 s_invTable[(1<<TABLE_BITS)];
+
+//senquack - MIPS32 happens to have same instruction/format:
+#if defined(__arm__) || (__mips == 32)
INLINE u32 Log2(u32 x) { u32 res; asm("clz %0,%1" : "=r" (res) : "r" (x)); return 32-res; }
#else
INLINE u32 Log2(u32 x) { u32 i = 0; for ( ; x > 0; ++i, x >>= 1); return i - 1; }
#endif
-#ifdef GPU_TABLE_10_BITS
-INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_)
-{
- u32 uD = (_b<0) ? -_b : _b ;
- u32 uLog = Log2(uD);
- uLog = uLog>(TABLE_BITS-1) ? uLog-(TABLE_BITS-1) : 0;
- u32 uDen = uD>>uLog;
- iFactor_ = s_invTable[uDen];
- iFactor_ = (_b<0) ? -iFactor_ :iFactor_;
- iShift_ = 15+uLog;
-}
-#else
INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_)
{
u32 uD = (_b<0) ? -_b : _b;
@@ -82,10 +95,12 @@ INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_)
{
u32 uLog = Log2(uD);
uLog = uLog>(TABLE_BITS-1) ? uLog-(TABLE_BITS-1) : 0;
- u32 uDen = (uD>>uLog)-1;
+ u32 uDen = (uD>>uLog);
iFactor_ = s_invTable[uDen];
iFactor_ = (_b<0) ? -iFactor_ :iFactor_;
- iShift_ = 15+uLog;
+ //senquack - Adapted to 22.10 fixed point (originally 16.16):
+ //iShift_ = 15+uLog;
+ iShift_ = 21+uLog;
}
else
{
@@ -93,7 +108,6 @@ INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_)
iShift_ = 0;
}
}
-#endif
INLINE fixed xInvMulx (const fixed _a, const s32 _iFact, const s32 _iShift)
{
@@ -112,20 +126,9 @@ INLINE fixed xLoDivx (const fixed _a, const fixed _b)
xInv(_b, iFact, iShift);
return xInvMulx(_a, iFact, iShift);
}
-
+#endif // GPU_UNAI_USE_INT_DIV_MULTINV
///////////////////////////////////////////////////////////////////////////
-template<typename T>
-INLINE T Min2 (const T _a, const T _b) { return (_a<_b)?_a:_b; }
-
-template<typename T>
-INLINE T Min3 (const T _a, const T _b, const T _c) { return Min2(Min2(_a,_b),_c); }
-
+// --- END INVERSE APPROXIMATION SECTION ---
///////////////////////////////////////////////////////////////////////////
-template<typename T>
-INLINE T Max2 (const T _a, const T _b) { return (_a>_b)?_a:_b; }
-template<typename T>
-INLINE T Max3 (const T _a, const T _b, const T _c) { return Max2(Max2(_a,_b),_c); }
-
-///////////////////////////////////////////////////////////////////////////
#endif //FIXED_H
diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h
index 4cd7bff..723e09f 100644
--- a/plugins/gpu_unai/gpu_inner.h
+++ b/plugins/gpu_unai/gpu_inner.h
@@ -1,6 +1,7 @@
/***************************************************************************
* Copyright (C) 2010 PCSX4ALL Team *
* Copyright (C) 2010 Unai *
+* Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com) *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -19,415 +20,688 @@
***************************************************************************/
///////////////////////////////////////////////////////////////////////////////
-// Inner loop driver instanciation file
+// Inner loop driver instantiation file
///////////////////////////////////////////////////////////////////////////////
-// Option Masks
-#define L ((CF>>0)&1)
-#define B ((CF>>1)&1)
-#define M ((CF>>2)&1)
-#define BM ((CF>>3)&3)
-#define TM ((CF>>5)&3)
-#define G ((CF>>7)&1)
+// Option Masks (CF template paramter)
+#define CF_LIGHT ((CF>> 0)&1) // Lighting
+#define CF_BLEND ((CF>> 1)&1) // Blending
+#define CF_MASKCHECK ((CF>> 2)&1) // Mask bit check
+#define CF_BLENDMODE ((CF>> 3)&3) // Blend mode 0..3
+#define CF_TEXTMODE ((CF>> 5)&3) // Texture mode 1..3 (0: texturing disabled)
+#define CF_GOURAUD ((CF>> 7)&1) // Gouraud shading
+#define CF_MASKSET ((CF>> 8)&1) // Mask bit set
+#define CF_DITHER ((CF>> 9)&1) // Dithering
+#define CF_BLITMASK ((CF>>10)&1) // blit_mask check (skip rendering pixels
+ // that wouldn't end up displayed on
+ // low-res screen using simple downscaler)
-#define AH ((CF>>7)&1)
-
-#define MB ((CF>>8)&1)
+//#ifdef __arm__
+//#ifndef ENABLE_GPU_ARMV7
+/* ARMv5 */
+//#include "gpu_inner_blend_arm5.h"
+//#else
+/* ARMv7 optimized */
+//#include "gpu_inner_blend_arm7.h"
+//#endif
+//#else
+//#include "gpu_inner_blend.h"
+//#endif
+// TODO: use the arm-optimized gpu_inner_blends for arm builds
#include "gpu_inner_blend.h"
+
+#include "gpu_inner_quantization.h"
#include "gpu_inner_light.h"
+// If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16
+// This is only for debugging/verification of low-precision colors in C.
+// Low-precision Gouraud is intended for use by SIMD-optimized inner drivers
+// which get/use Gouraud colors in SIMD registers.
+//#define GPU_GOURAUD_LOW_PRECISION
+
+// How many bits of fixed-point precision GouraudColor uses
+#ifdef GPU_GOURAUD_LOW_PRECISION
+#define GPU_GOURAUD_FIXED_BITS 11
+#else
+#define GPU_GOURAUD_FIXED_BITS 16
+#endif
+
+// Used to pass Gouraud colors to gpuPixelSpanFn() (lines)
+struct GouraudColor {
+#ifdef GPU_GOURAUD_LOW_PRECISION
+ u16 r, g, b;
+ s16 r_incr, g_incr, b_incr;
+#else
+ u32 r, g, b;
+ s32 r_incr, g_incr, b_incr;
+#endif
+};
+
+static inline u16 gpuGouraudColor15bpp(u32 r, u32 g, u32 b)
+{
+ r >>= GPU_GOURAUD_FIXED_BITS;
+ g >>= GPU_GOURAUD_FIXED_BITS;
+ b >>= GPU_GOURAUD_FIXED_BITS;
+
+#ifndef GPU_GOURAUD_LOW_PRECISION
+ // High-precision Gouraud colors are 8-bit + fractional
+ r >>= 3; g >>= 3; b >>= 3;
+#endif
+
+ return r | (g << 5) | (b << 10);
+}
+
///////////////////////////////////////////////////////////////////////////////
-// GPU Pixel opperations generator
-template<const int CF>
-INLINE void gpuPixelFn(u16 *pixel,const u16 data)
+// GPU Pixel span operations generator gpuPixelSpanFn<>
+// Oct 2016: Created/adapted from old gpuPixelFn by senquack:
+// Original gpuPixelFn was used to draw lines one pixel at a time. I wrote
+// new line algorithms that draw lines using horizontal/vertical/diagonal
+// spans of pixels, necessitating new pixel-drawing function that could
+// not only render spans of pixels, but gouraud-shade them as well.
+// This speeds up line rendering and would allow tile-rendering (untextured
+// rectangles) to use the same set of functions. Since tiles are always
+// monochrome, they simply wouldn't use the extra set of 32 gouraud-shaded
+// gpuPixelSpanFn functions (TODO?).
+//
+// NOTE: While the PS1 framebuffer is 16 bit, we use 8-bit pointers here,
+// so that pDst can be incremented directly by 'incr' parameter
+// without having to shift it before use.
+template<int CF>
+static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
{
- if ((!M)&&(!B))
- {
- if(MB) { *pixel = data | 0x8000; }
- else { *pixel = data; }
+ // Blend func can save an operation if it knows uSrc MSB is
+ // unset. For untextured prims, this is always true.
+ const bool skip_uSrc_mask = true;
+
+ u16 col;
+ struct GouraudColor * gcPtr;
+ u32 r, g, b;
+ s32 r_incr, g_incr, b_incr;
+
+ if (CF_GOURAUD) {
+ gcPtr = (GouraudColor*)data;
+ r = gcPtr->r; r_incr = gcPtr->r_incr;
+ g = gcPtr->g; g_incr = gcPtr->g_incr;
+ b = gcPtr->b; b_incr = gcPtr->b_incr;
+ } else {
+ col = (u16)data;
}
- else if ((M)&&(!B))
- {
- if (!(*pixel&0x8000))
- {
- if(MB) { *pixel = data | 0x8000; }
- else { *pixel = data; }
+
+ do {
+ if (!CF_GOURAUD)
+ { // NO GOURAUD
+ if (!CF_MASKCHECK && !CF_BLEND) {
+ if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
+ else { *(u16*)pDst = col; }
+ } else if (CF_MASKCHECK && !CF_BLEND) {
+ if (!(*(u16*)pDst & 0x8000)) {
+ if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
+ else { *(u16*)pDst = col; }
+ }
+ } else {
+ u16 uDst = *(u16*)pDst;
+ if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; }
+
+ u16 uSrc = col;
+
+ if (CF_BLEND)
+ uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
+
+ if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; }
+ else { *(u16*)pDst = uSrc; }
+ }
+
+ } else
+ { // GOURAUD
+
+ if (!CF_MASKCHECK && !CF_BLEND) {
+ col = gpuGouraudColor15bpp(r, g, b);
+ if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
+ else { *(u16*)pDst = col; }
+ } else if (CF_MASKCHECK && !CF_BLEND) {
+ col = gpuGouraudColor15bpp(r, g, b);
+ if (!(*(u16*)pDst & 0x8000)) {
+ if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
+ else { *(u16*)pDst = col; }
+ }
+ } else {
+ u16 uDst = *(u16*)pDst;
+ if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; }
+ col = gpuGouraudColor15bpp(r, g, b);
+
+ u16 uSrc = col;
+
+ // Blend func can save an operation if it knows uSrc MSB is
+ // unset. For untextured prims, this is always true.
+ const bool skip_uSrc_mask = true;
+
+ if (CF_BLEND)
+ uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
+
+ if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; }
+ else { *(u16*)pDst = uSrc; }
+ }
}
+
+endpixel:
+ if (CF_GOURAUD) {
+ r += r_incr;
+ g += g_incr;
+ b += b_incr;
+ }
+ pDst += incr;
+ } while (len-- > 1);
+
+ // Note from senquack: Normally, I'd prefer to write a 'do {} while (--len)'
+ // loop, or even a for() loop, however, on MIPS platforms anything but the
+ // 'do {} while (len-- > 1)' tends to generate very unoptimal asm, with
+ // many unneeded MULs/ADDs/branches at the ends of these functions.
+ // If you change the loop structure above, be sure to compare the quality
+ // of the generated code!!
+
+ if (CF_GOURAUD) {
+ gcPtr->r = r;
+ gcPtr->g = g;
+ gcPtr->b = b;
}
- else
- {
- u16 uDst = *pixel;
- if(M) { if (uDst&0x8000) return; }
- u16 uSrc = data;
- u32 uMsk; if (BM==0) uMsk=0x7BDE;
- if (BM==0) gpuBlending00(uSrc, uDst);
- if (BM==1) gpuBlending01(uSrc, uDst);
- if (BM==2) gpuBlending02(uSrc, uDst);
- if (BM==3) gpuBlending03(uSrc, uDst);
- if(MB) { *pixel = uSrc | 0x8000; }
- else { *pixel = uSrc; }
- }
+ return pDst;
+}
+
+static u8* PixelSpanNULL(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
+{
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"PixelSpanNULL()\n");
+ #endif
+ return pDst;
}
-///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
-// Pixel drawing drivers, for lines (only blending)
-typedef void (*PD)(u16 *pixel,const u16 data);
-const PD gpuPixelDrivers[32] = // We only generate pixel op for MASKING/BLEND_ENABLE/BLEND_MODE
+// PixelSpan (lines) innerloops driver
+typedef u8* (*PSD)(u8* dst, uintptr_t data, ptrdiff_t incr, size_t len);
+
+const PSD gpuPixelSpanDrivers[64] =
{
- gpuPixelFn<0x00<<1>,gpuPixelFn<0x01<<1>,gpuPixelFn<0x02<<1>,gpuPixelFn<0x03<<1>,
- NULL,gpuPixelFn<0x05<<1>,NULL,gpuPixelFn<0x07<<1>,
- NULL,gpuPixelFn<0x09<<1>,NULL,gpuPixelFn<0x0B<<1>,
- NULL,gpuPixelFn<0x0D<<1>,NULL,gpuPixelFn<0x0F<<1>,
-
- gpuPixelFn<(0x00<<1)|256>,gpuPixelFn<(0x01<<1)|256>,gpuPixelFn<(0x02<<1)|256>,gpuPixelFn<(0x03<<1)|256>,
- NULL,gpuPixelFn<(0x05<<1)|256>,NULL,gpuPixelFn<(0x07<<1)|256>,
- NULL,gpuPixelFn<(0x09<<1)|256>,NULL,gpuPixelFn<(0x0B<<1)|256>,
- NULL,gpuPixelFn<(0x0D<<1)|256>,NULL,gpuPixelFn<(0x0F<<1)|256>
+ // Array index | 'CF' template field | Field value
+ // ------------+---------------------+----------------
+ // Bit 0 | CF_BLEND | off (0), on (1)
+ // Bit 1 | CF_MASKCHECK | off (0), on (1)
+ // Bit 3:2 | CF_BLENDMODE | 0..3
+ // Bit 4 | CF_MASKSET | off (0), on (1)
+ // Bit 5 | CF_GOURAUD | off (0), on (1)
+ //
+ // NULL entries are ones for which blending is disabled and blend-mode
+ // field is non-zero, which is obviously invalid.
+
+ // Flat-shaded
+ gpuPixelSpanFn<0x00<<1>, gpuPixelSpanFn<0x01<<1>, gpuPixelSpanFn<0x02<<1>, gpuPixelSpanFn<0x03<<1>,
+ PixelSpanNULL, gpuPixelSpanFn<0x05<<1>, PixelSpanNULL, gpuPixelSpanFn<0x07<<1>,
+ PixelSpanNULL, gpuPixelSpanFn<0x09<<1>, PixelSpanNULL, gpuPixelSpanFn<0x0B<<1>,
+ PixelSpanNULL, gpuPixelSpanFn<0x0D<<1>, PixelSpanNULL, gpuPixelSpanFn<0x0F<<1>,
+
+ // Flat-shaded + PixelMSB (CF_MASKSET)
+ gpuPixelSpanFn<(0x00<<1)|0x100>, gpuPixelSpanFn<(0x01<<1)|0x100>, gpuPixelSpanFn<(0x02<<1)|0x100>, gpuPixelSpanFn<(0x03<<1)|0x100>,
+ PixelSpanNULL, gpuPixelSpanFn<(0x05<<1)|0x100>, PixelSpanNULL, gpuPixelSpanFn<(0x07<<1)|0x100>,
+ PixelSpanNULL, gpuPixelSpanFn<(0x09<<1)|0x100>, PixelSpanNULL, gpuPixelSpanFn<(0x0B<<1)|0x100>,
+ PixelSpanNULL, gpuPixelSpanFn<(0x0D<<1)|0x100>, PixelSpanNULL, gpuPixelSpanFn<(0x0F<<1)|0x100>,
+
+ // Gouraud-shaded (CF_GOURAUD)
+ gpuPixelSpanFn<(0x00<<1)|0x80>, gpuPixelSpanFn<(0x01<<1)|0x80>, gpuPixelSpanFn<(0x02<<1)|0x80>, gpuPixelSpanFn<(0x03<<1)|0x80>,
+ PixelSpanNULL, gpuPixelSpanFn<(0x05<<1)|0x80>, PixelSpanNULL, gpuPixelSpanFn<(0x07<<1)|0x80>,
+ PixelSpanNULL, gpuPixelSpanFn<(0x09<<1)|0x80>, PixelSpanNULL, gpuPixelSpanFn<(0x0B<<1)|0x80>,
+ PixelSpanNULL, gpuPixelSpanFn<(0x0D<<1)|0x80>, PixelSpanNULL, gpuPixelSpanFn<(0x0F<<1)|0x80>,
+
+ // Gouraud-shaded (CF_GOURAUD) + PixelMSB (CF_MASKSET)
+ gpuPixelSpanFn<(0x00<<1)|0x180>, gpuPixelSpanFn<(0x01<<1)|0x180>, gpuPixelSpanFn<(0x02<<1)|0x180>, gpuPixelSpanFn<(0x03<<1)|0x180>,
+ PixelSpanNULL, gpuPixelSpanFn<(0x05<<1)|0x180>, PixelSpanNULL, gpuPixelSpanFn<(0x07<<1)|0x180>,
+ PixelSpanNULL, gpuPixelSpanFn<(0x09<<1)|0x180>, PixelSpanNULL, gpuPixelSpanFn<(0x0B<<1)|0x180>,
+ PixelSpanNULL, gpuPixelSpanFn<(0x0D<<1)|0x180>, PixelSpanNULL, gpuPixelSpanFn<(0x0F<<1)|0x180>
};
///////////////////////////////////////////////////////////////////////////////
// GPU Tiles innerloops generator
-template<const int CF>
-INLINE void gpuTileSpanFn(u16 *pDst, u32 count, u16 data)
+template<int CF>
+static void gpuTileSpanFn(u16 *pDst, u32 count, u16 data)
{
- if ((!M)&&(!B))
- {
- if (MB) { data = data | 0x8000; }
+ if (!CF_MASKCHECK && !CF_BLEND) {
+ if (CF_MASKSET) { data = data | 0x8000; }
do { *pDst++ = data; } while (--count);
- }
- else if ((M)&&(!B))
- {
- if (MB) { data = data | 0x8000; }
+ } else if (CF_MASKCHECK && !CF_BLEND) {
+ if (CF_MASKSET) { data = data | 0x8000; }
do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count);
- }
- else
+ } else
{
- u16 uSrc;
- u16 uDst;
- u32 uMsk; if (BM==0) uMsk=0x7BDE;
+ // Blend func can save an operation if it knows uSrc MSB is
+ // unset. For untextured prims, this is always true.
+ const bool skip_uSrc_mask = true;
+
+ u16 uSrc, uDst;
do
{
- // MASKING
- uDst = *pDst;
- if(M) { if (uDst&0x8000) goto endtile; }
+ if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; }
+ if (CF_MASKCHECK) { if (uDst&0x8000) goto endtile; }
+
uSrc = data;
- // BLEND
- if (BM==0) gpuBlending00(uSrc, uDst);
- if (BM==1) gpuBlending01(uSrc, uDst);
- if (BM==2) gpuBlending02(uSrc, uDst);
- if (BM==3) gpuBlending03(uSrc, uDst);
+ if (CF_BLEND)
+ uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
- if (MB) { *pDst = uSrc | 0x8000; }
- else { *pDst = uSrc; }
- endtile: pDst++;
+ if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
+ else { *pDst = uSrc; }
+
+ //senquack - Did not apply "Silent Hill" mask-bit fix to here.
+ // It is hard to tell from scarce documentation available and
+ // lack of comments in code, but I believe the tile-span
+ // functions here should not bother to preserve any source MSB,
+ // as they are not drawing from a texture.
+endtile:
+ pDst++;
}
while (--count);
}
}
+static void TileNULL(u16 *pDst, u32 count, u16 data)
+{
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"TileNULL()\n");
+ #endif
+}
+
///////////////////////////////////////////////////////////////////////////////
// Tiles innerloops driver
typedef void (*PT)(u16 *pDst, u32 count, u16 data);
-const PT gpuTileSpanDrivers[64] =
-{
- gpuTileSpanFn<0x00>,NULL,gpuTileSpanFn<0x02>,NULL, gpuTileSpanFn<0x04>,NULL,gpuTileSpanFn<0x06>,NULL, NULL,NULL,gpuTileSpanFn<0x0A>,NULL, NULL,NULL,gpuTileSpanFn<0x0E>,NULL,
- NULL,NULL,gpuTileSpanFn<0x12>,NULL, NULL,NULL,gpuTileSpanFn<0x16>,NULL, NULL,NULL,gpuTileSpanFn<0x1A>,NULL, NULL,NULL,gpuTileSpanFn<0x1E>,NULL,
- gpuTileSpanFn<0x100>,NULL,gpuTileSpanFn<0x102>,NULL, gpuTileSpanFn<0x104>,NULL,gpuTileSpanFn<0x106>,NULL, NULL,NULL,gpuTileSpanFn<0x10A>,NULL, NULL,NULL,gpuTileSpanFn<0x10E>,NULL,
- NULL,NULL,gpuTileSpanFn<0x112>,NULL, NULL,NULL,gpuTileSpanFn<0x116>,NULL, NULL,NULL,gpuTileSpanFn<0x11A>,NULL, NULL,NULL,gpuTileSpanFn<0x11E>,NULL,
+// Template instantiation helper macros
+#define TI(cf) gpuTileSpanFn<(cf)>
+#define TN TileNULL
+#define TIBLOCK(ub) \
+ TI((ub)|0x00), TI((ub)|0x02), TI((ub)|0x04), TI((ub)|0x06), \
+ TN, TI((ub)|0x0a), TN, TI((ub)|0x0e), \
+ TN, TI((ub)|0x12), TN, TI((ub)|0x16), \
+ TN, TI((ub)|0x1a), TN, TI((ub)|0x1e)
+
+const PT gpuTileSpanDrivers[32] = {
+ TIBLOCK(0<<8), TIBLOCK(1<<8)
};
+#undef TI
+#undef TN
+#undef TIBLOCK
+
+
///////////////////////////////////////////////////////////////////////////////
// GPU Sprites innerloops generator
-template<const int CF>
-INLINE void gpuSpriteSpanFn(u16 *pDst, u32 count, u32 u0, const u32 mask)
+template<int CF>
+static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0)
{
- u16 uSrc;
- u16 uDst;
- const u16* pTxt = TBA+(u0&~0x1ff); u0=u0&0x1ff;
- const u16 *_CBA; if(TM!=3) _CBA=CBA;
- u32 lCol; if(L) { lCol = ((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); }
- u8 rgb; if (TM==1) rgb = ((u8*)pTxt)[u0>>1];
- u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE;
+ // Blend func can save an operation if it knows uSrc MSB is unset.
+ // Untextured prims can always skip (source color always comes with MSB=0).
+ // For textured prims, lighting funcs always return it unset. (bonus!)
+ const bool skip_uSrc_mask = (!CF_TEXTMODE) || CF_LIGHT;
+
+ u16 uSrc, uDst, srcMSB;
+ u32 u0_mask = gpu_unai.TextureWindow[2];
+
+ u8 r5, g5, b5;
+ if (CF_LIGHT) {
+ r5 = gpu_unai.r5;
+ g5 = gpu_unai.g5;
+ b5 = gpu_unai.b5;
+ }
+
+ if (CF_TEXTMODE==3) {
+ // Texture is accessed byte-wise, so adjust mask if 16bpp
+ u0_mask <<= 1;
+ }
+
+ const u16 *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
do
{
- // MASKING
- if(M) { uDst = *pDst; if (uDst&0x8000) { u0=(u0+1)&mask; goto endsprite; } }
+ if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; }
+ if (CF_MASKCHECK) if (uDst&0x8000) { goto endsprite; }
- // TEXTURE MAPPING
- if (TM==1) { if (!(u0&1)) rgb = ((u8*)pTxt)[u0>>1]; uSrc = _CBA[(rgb>>((u0&1)<<2))&0xf]; u0=(u0+1)&mask; }
- if (TM==2) { uSrc = _CBA[((u8*)pTxt)[u0]]; u0=(u0+1)&mask; }
- if (TM==3) { uSrc = pTxt[u0]; u0=(u0+1)&mask; }
- if(!AH) { if (!uSrc) goto endsprite; }
-
- // BLEND
- if(B)
- {
- if(uSrc&0x8000)
- {
- // LIGHTING CALCULATIONS
- if(L) { gpuLightingTXT(uSrc, lCol); }
-
- if(!M) { uDst = *pDst; }
- if (BM==0) gpuBlending00(uSrc, uDst);
- if (BM==1) gpuBlending01(uSrc, uDst);
- if (BM==2) gpuBlending02(uSrc, uDst);
- if (BM==3) gpuBlending03(uSrc, uDst);
- }
- else
- {
- // LIGHTING CALCULATIONS
- if(L) { gpuLightingTXT(uSrc, lCol); }
- }
+ if (CF_TEXTMODE==1) { // 4bpp (CLUT)
+ u8 rgb = pTxt[(u0 & u0_mask)>>1];
+ uSrc = CBA_[(rgb>>((u0&1)<<2))&0xf];
}
- else
- {
- // LIGHTING CALCULATIONS
- if(L) { gpuLightingTXT(uSrc, lCol); } else
- { if(!MB) uSrc&= 0x7fff; }
+ if (CF_TEXTMODE==2) { // 8bpp (CLUT)
+ uSrc = CBA_[pTxt[u0 & u0_mask]];
+ }
+ if (CF_TEXTMODE==3) { // 16bpp
+ uSrc = *(u16*)(&pTxt[u0 & u0_mask]);
}
- if (MB) { *pDst = uSrc | 0x8000; }
- else { *pDst = uSrc; }
+ if (!uSrc) goto endsprite;
+
+ //senquack - save source MSB, as blending or lighting macros will not
+ // (Silent Hill gray rectangles mask bit bug)
+ if (CF_BLEND || CF_LIGHT) srcMSB = uSrc & 0x8000;
- endsprite: pDst++;
+ if (CF_LIGHT)
+ uSrc = gpuLightingTXT(uSrc, r5, g5, b5);
+
+ if (CF_BLEND && srcMSB)
+ uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
+
+ if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
+ else if (CF_BLEND || CF_LIGHT) { *pDst = uSrc | srcMSB; }
+ else { *pDst = uSrc; }
+
+endsprite:
+ u0 += (CF_TEXTMODE==3) ? 2 : 1;
+ pDst++;
}
while (--count);
}
+
+static void SpriteNULL(u16 *pDst, u32 count, u8* pTxt, u32 u0)
+{
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"SpriteNULL()\n");
+ #endif
+}
+
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Sprite innerloops driver
-typedef void (*PS)(u16 *pDst, u32 count, u32 u0, const u32 mask);
-const PS gpuSpriteSpanDrivers[512] =
-{
- NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
- NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
- gpuSpriteSpanFn<0x20>,gpuSpriteSpanFn<0x21>,gpuSpriteSpanFn<0x22>,gpuSpriteSpanFn<0x23>, gpuSpriteSpanFn<0x24>,gpuSpriteSpanFn<0x25>,gpuSpriteSpanFn<0x26>,gpuSpriteSpanFn<0x27>, NULL,NULL,gpuSpriteSpanFn<0x2A>,gpuSpriteSpanFn<0x2B>, NULL,NULL,gpuSpriteSpanFn<0x2E>,gpuSpriteSpanFn<0x2F>,
- NULL,NULL,gpuSpriteSpanFn<0x32>,gpuSpriteSpanFn<0x33>, NULL,NULL,gpuSpriteSpanFn<0x36>,gpuSpriteSpanFn<0x37>, NULL,NULL,gpuSpriteSpanFn<0x3A>,gpuSpriteSpanFn<0x3B>, NULL,NULL,gpuSpriteSpanFn<0x3E>,gpuSpriteSpanFn<0x3F>,
- gpuSpriteSpanFn<0x40>,gpuSpriteSpanFn<0x41>,gpuSpriteSpanFn<0x42>,gpuSpriteSpanFn<0x43>, gpuSpriteSpanFn<0x44>,gpuSpriteSpanFn<0x45>,gpuSpriteSpanFn<0x46>,gpuSpriteSpanFn<0x47>, NULL,NULL,gpuSpriteSpanFn<0x4A>,gpuSpriteSpanFn<0x4B>, NULL,NULL,gpuSpriteSpanFn<0x4E>,gpuSpriteSpanFn<0x4F>,
- NULL,NULL,gpuSpriteSpanFn<0x52>,gpuSpriteSpanFn<0x53>, NULL,NULL,gpuSpriteSpanFn<0x56>,gpuSpriteSpanFn<0x57>, NULL,NULL,gpuSpriteSpanFn<0x5A>,gpuSpriteSpanFn<0x5B>, NULL,NULL,gpuSpriteSpanFn<0x5E>,gpuSpriteSpanFn<0x5F>,
- gpuSpriteSpanFn<0x60>,gpuSpriteSpanFn<0x61>,gpuSpriteSpanFn<0x62>,gpuSpriteSpanFn<0x63>, gpuSpriteSpanFn<0x64>,gpuSpriteSpanFn<0x65>,gpuSpriteSpanFn<0x66>,gpuSpriteSpanFn<0x67>, NULL,NULL,gpuSpriteSpanFn<0x6A>,gpuSpriteSpanFn<0x6B>, NULL,NULL,gpuSpriteSpanFn<0x6E>,gpuSpriteSpanFn<0x6F>,
- NULL,NULL,gpuSpriteSpanFn<0x72>,gpuSpriteSpanFn<0x73>, NULL,NULL,gpuSpriteSpanFn<0x76>,gpuSpriteSpanFn<0x77>, NULL,NULL,gpuSpriteSpanFn<0x7A>,gpuSpriteSpanFn<0x7B>, NULL,NULL,gpuSpriteSpanFn<0x7E>,gpuSpriteSpanFn<0x7F>,
-
- NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
- NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
- gpuSpriteSpanFn<0xa0>,gpuSpriteSpanFn<0xa1>,gpuSpriteSpanFn<0xa2>,gpuSpriteSpanFn<0xa3>, gpuSpriteSpanFn<0xa4>,gpuSpriteSpanFn<0xa5>,gpuSpriteSpanFn<0xa6>,gpuSpriteSpanFn<0xa7>, NULL,NULL,gpuSpriteSpanFn<0xaA>,gpuSpriteSpanFn<0xaB>, NULL,NULL,gpuSpriteSpanFn<0xaE>,gpuSpriteSpanFn<0xaF>,
- NULL,NULL,gpuSpriteSpanFn<0xb2>,gpuSpriteSpanFn<0xb3>, NULL,NULL,gpuSpriteSpanFn<0xb6>,gpuSpriteSpanFn<0xb7>, NULL,NULL,gpuSpriteSpanFn<0xbA>,gpuSpriteSpanFn<0xbB>, NULL,NULL,gpuSpriteSpanFn<0xbE>,gpuSpriteSpanFn<0xbF>,
- gpuSpriteSpanFn<0xc0>,gpuSpriteSpanFn<0xc1>,gpuSpriteSpanFn<0xc2>,gpuSpriteSpanFn<0xc3>, gpuSpriteSpanFn<0xc4>,gpuSpriteSpanFn<0xc5>,gpuSpriteSpanFn<0xc6>,gpuSpriteSpanFn<0xc7>, NULL,NULL,gpuSpriteSpanFn<0xcA>,gpuSpriteSpanFn<0xcB>, NULL,NULL,gpuSpriteSpanFn<0xcE>,gpuSpriteSpanFn<0xcF>,
- NULL,NULL,gpuSpriteSpanFn<0xd2>,gpuSpriteSpanFn<0xd3>, NULL,NULL,gpuSpriteSpanFn<0xd6>,gpuSpriteSpanFn<0xd7>, NULL,NULL,gpuSpriteSpanFn<0xdA>,gpuSpriteSpanFn<0xdB>, NULL,NULL,gpuSpriteSpanFn<0xdE>,gpuSpriteSpanFn<0xdF>,
- gpuSpriteSpanFn<0xe0>,gpuSpriteSpanFn<0xe1>,gpuSpriteSpanFn<0xe2>,gpuSpriteSpanFn<0xe3>, gpuSpriteSpanFn<0xe4>,gpuSpriteSpanFn<0xe5>,gpuSpriteSpanFn<0xe6>,gpuSpriteSpanFn<0xe7>, NULL,NULL,gpuSpriteSpanFn<0xeA>,gpuSpriteSpanFn<0xeB>, NULL,NULL,gpuSpriteSpanFn<0xeE>,gpuSpriteSpanFn<0xeF>,
- NULL,NULL,gpuSpriteSpanFn<0xf2>,gpuSpriteSpanFn<0xf3>, NULL,NULL,gpuSpriteSpanFn<0xf6>,gpuSpriteSpanFn<0xf7>, NULL,NULL,gpuSpriteSpanFn<0xfA>,gpuSpriteSpanFn<0xfB>, NULL,NULL,gpuSpriteSpanFn<0xfE>,gpuSpriteSpanFn<0xfF>,
-
- NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
- NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
- gpuSpriteSpanFn<0x120>,gpuSpriteSpanFn<0x121>,gpuSpriteSpanFn<0x122>,gpuSpriteSpanFn<0x123>, gpuSpriteSpanFn<0x124>,gpuSpriteSpanFn<0x125>,gpuSpriteSpanFn<0x126>,gpuSpriteSpanFn<0x127>, NULL,NULL,gpuSpriteSpanFn<0x12A>,gpuSpriteSpanFn<0x12B>, NULL,NULL,gpuSpriteSpanFn<0x12E>,gpuSpriteSpanFn<0x12F>,
- NULL,NULL,gpuSpriteSpanFn<0x132>,gpuSpriteSpanFn<0x133>, NULL,NULL,gpuSpriteSpanFn<0x136>,gpuSpriteSpanFn<0x137>, NULL,NULL,gpuSpriteSpanFn<0x13A>,gpuSpriteSpanFn<0x13B>, NULL,NULL,gpuSpriteSpanFn<0x13E>,gpuSpriteSpanFn<0x13F>,
- gpuSpriteSpanFn<0x140>,gpuSpriteSpanFn<0x141>,gpuSpriteSpanFn<0x142>,gpuSpriteSpanFn<0x143>, gpuSpriteSpanFn<0x144>,gpuSpriteSpanFn<0x145>,gpuSpriteSpanFn<0x146>,gpuSpriteSpanFn<0x147>, NULL,NULL,gpuSpriteSpanFn<0x14A>,gpuSpriteSpanFn<0x14B>, NULL,NULL,gpuSpriteSpanFn<0x14E>,gpuSpriteSpanFn<0x14F>,
- NULL,NULL,gpuSpriteSpanFn<0x152>,gpuSpriteSpanFn<0x153>, NULL,NULL,gpuSpriteSpanFn<0x156>,gpuSpriteSpanFn<0x157>, NULL,NULL,gpuSpriteSpanFn<0x15A>,gpuSpriteSpanFn<0x15B>, NULL,NULL,gpuSpriteSpanFn<0x15E>,gpuSpriteSpanFn<0x15F>,
- gpuSpriteSpanFn<0x160>,gpuSpriteSpanFn<0x161>,gpuSpriteSpanFn<0x162>,gpuSpriteSpanFn<0x163>, gpuSpriteSpanFn<0x164>,gpuSpriteSpanFn<0x165>,gpuSpriteSpanFn<0x166>,gpuSpriteSpanFn<0x167>, NULL,NULL,gpuSpriteSpanFn<0x16A>,gpuSpriteSpanFn<0x16B>, NULL,NULL,gpuSpriteSpanFn<0x16E>,gpuSpriteSpanFn<0x16F>,
- NULL,NULL,gpuSpriteSpanFn<0x172>,gpuSpriteSpanFn<0x173>, NULL,NULL,gpuSpriteSpanFn<0x176>,gpuSpriteSpanFn<0x177>, NULL,NULL,gpuSpriteSpanFn<0x17A>,gpuSpriteSpanFn<0x17B>, NULL,NULL,gpuSpriteSpanFn<0x17E>,gpuSpriteSpanFn<0x17F>,
-
- NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
- NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
- gpuSpriteSpanFn<0x1a0>,gpuSpriteSpanFn<0x1a1>,gpuSpriteSpanFn<0x1a2>,gpuSpriteSpanFn<0x1a3>, gpuSpriteSpanFn<0x1a4>,gpuSpriteSpanFn<0x1a5>,gpuSpriteSpanFn<0x1a6>,gpuSpriteSpanFn<0x1a7>, NULL,NULL,gpuSpriteSpanFn<0x1aA>,gpuSpriteSpanFn<0x1aB>, NULL,NULL,gpuSpriteSpanFn<0x1aE>,gpuSpriteSpanFn<0x1aF>,
- NULL,NULL,gpuSpriteSpanFn<0x1b2>,gpuSpriteSpanFn<0x1b3>, NULL,NULL,gpuSpriteSpanFn<0x1b6>,gpuSpriteSpanFn<0x1b7>, NULL,NULL,gpuSpriteSpanFn<0x1bA>,gpuSpriteSpanFn<0x1bB>, NULL,NULL,gpuSpriteSpanFn<0x1bE>,gpuSpriteSpanFn<0x1bF>,
- gpuSpriteSpanFn<0x1c0>,gpuSpriteSpanFn<0x1c1>,gpuSpriteSpanFn<0x1c2>,gpuSpriteSpanFn<0x1c3>, gpuSpriteSpanFn<0x1c4>,gpuSpriteSpanFn<0x1c5>,gpuSpriteSpanFn<0x1c6>,gpuSpriteSpanFn<0x1c7>, NULL,NULL,gpuSpriteSpanFn<0x1cA>,gpuSpriteSpanFn<0x1cB>, NULL,NULL,gpuSpriteSpanFn<0x1cE>,gpuSpriteSpanFn<0x1cF>,
- NULL,NULL,gpuSpriteSpanFn<0x1d2>,gpuSpriteSpanFn<0x1d3>, NULL,NULL,gpuSpriteSpanFn<0x1d6>,gpuSpriteSpanFn<0x1d7>, NULL,NULL,gpuSpriteSpanFn<0x1dA>,gpuSpriteSpanFn<0x1dB>, NULL,NULL,gpuSpriteSpanFn<0x1dE>,gpuSpriteSpanFn<0x1dF>,
- gpuSpriteSpanFn<0x1e0>,gpuSpriteSpanFn<0x1e1>,gpuSpriteSpanFn<0x1e2>,gpuSpriteSpanFn<0x1e3>, gpuSpriteSpanFn<0x1e4>,gpuSpriteSpanFn<0x1e5>,gpuSpriteSpanFn<0x1e6>,gpuSpriteSpanFn<0x1e7>, NULL,NULL,gpuSpriteSpanFn<0x1eA>,gpuSpriteSpanFn<0x1eB>, NULL,NULL,gpuSpriteSpanFn<0x1eE>,gpuSpriteSpanFn<0x1eF>,
- NULL,NULL,gpuSpriteSpanFn<0x1f2>,gpuSpriteSpanFn<0x1f3>, NULL,NULL,gpuSpriteSpanFn<0x1f6>,gpuSpriteSpanFn<0x1f7>, NULL,NULL,gpuSpriteSpanFn<0x1fA>,gpuSpriteSpanFn<0x1fB>, NULL,NULL,gpuSpriteSpanFn<0x1fE>,gpuSpriteSpanFn<0x1fF>
+typedef void (*PS)(u16 *pDst, u32 count, u8* pTxt, u32 u0);
+
+// Template instantiation helper macros
+#define TI(cf) gpuSpriteSpanFn<(cf)>
+#define TN SpriteNULL
+#define TIBLOCK(ub) \
+ TN, TN, TN, TN, TN, TN, TN, TN, \
+ TN, TN, TN, TN, TN, TN, TN, TN, \
+ TN, TN, TN, TN, TN, TN, TN, TN, \
+ TN, TN, TN, TN, TN, TN, TN, TN, \
+ TI((ub)|0x20), TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \
+ TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \
+ TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \
+ TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \
+ TI((ub)|0x40), TI((ub)|0x41), TI((ub)|0x42), TI((ub)|0x43), TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \
+ TN, TN, TI((ub)|0x4a), TI((ub)|0x4b), TN, TN, TI((ub)|0x4e), TI((ub)|0x4f), \
+ TN, TN, TI((ub)|0x52), TI((ub)|0x53), TN, TN, TI((ub)|0x56), TI((ub)|0x57), \
+ TN, TN, TI((ub)|0x5a), TI((ub)|0x5b), TN, TN, TI((ub)|0x5e), TI((ub)|0x5f), \
+ TI((ub)|0x60), TI((ub)|0x61), TI((ub)|0x62), TI((ub)|0x63), TI((ub)|0x64), TI((ub)|0x65), TI((ub)|0x66), TI((ub)|0x67), \
+ TN, TN, TI((ub)|0x6a), TI((ub)|0x6b), TN, TN, TI((ub)|0x6e), TI((ub)|0x6f), \
+ TN, TN, TI((ub)|0x72), TI((ub)|0x73), TN, TN, TI((ub)|0x76), TI((ub)|0x77), \
+ TN, TN, TI((ub)|0x7a), TI((ub)|0x7b), TN, TN, TI((ub)|0x7e), TI((ub)|0x7f)
+
+const PS gpuSpriteSpanDrivers[256] = {
+ TIBLOCK(0<<8), TIBLOCK(1<<8)
};
+#undef TI
+#undef TN
+#undef TIBLOCK
+
///////////////////////////////////////////////////////////////////////////////
// GPU Polygon innerloops generator
-template<const int CF>
-INLINE void gpuPolySpanFn(u16 *pDst, u32 count)
+
+//senquack - Newer version with following changes:
+// * Adapted to work with new poly routings in gpu_raster_polygon.h
+// adapted from DrHell GPU. They are less glitchy and use 22.10
+// fixed-point instead of original UNAI's 16.16.
+// * Texture coordinates are no longer packed together into one
+// unsigned int. This seems to lose too much accuracy (they each
+// end up being only 8.7 fixed-point that way) and pixel-droupouts
+// were noticeable both with original code and current DrHell
+// adaptations. An example would be the sky in NFS3. Now, they are
+// stored in separate ints, using separate masks.
+// * Function is no longer INLINE, as it was always called
+// through a function pointer.
+// * Function now ensures the mask bit of source texture is preserved
+// across calls to blending functions (Silent Hill rectangles fix)
+// * November 2016: Large refactoring of blending/lighting when
+// JohnnyF added dithering. See gpu_inner_quantization.h and
+// relevant blend/light headers.
+// (see README_senquack.txt)
+template<int CF>
+static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count)
{
- if (!TM)
- {
- // NO TEXTURE
- if (!G)
+ // Blend func can save an operation if it knows uSrc MSB is unset.
+ // Untextured prims can always skip this (src color MSB is always 0).
+ // For textured prims, lighting funcs always return it unset. (bonus!)
+ const bool skip_uSrc_mask = (!CF_TEXTMODE) || CF_LIGHT;
+
+ u32 bMsk; if (CF_BLITMASK) bMsk = gpu_unai.blit_mask;
+
+ if (!CF_TEXTMODE)
+ {
+ if (!CF_GOURAUD)
{
- // NO GOURAUD
- u16 data;
- if (L) { u32 lCol=((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); gpuLightingRGB(data,lCol); }
- else data=PixelData;
- if ((!M)&&(!B))
- {
- if (MB) { data = data | 0x8000; }
- do { *pDst++ = data; } while (--count);
- }
- else if ((M)&&(!B))
- {
- if (MB) { data = data | 0x8000; }
- do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count);
- }
- else
- {
- u16 uSrc;
- u16 uDst;
- u32 uMsk; if (BM==0) uMsk=0x7BDE;
- do
- {
- // masking
- uDst = *pDst;
- if(M) { if (uDst&0x8000) goto endtile; }
- uSrc = data;
- // blend
- if (BM==0) gpuBlending00(uSrc, uDst);
- if (BM==1) gpuBlending01(uSrc, uDst);
- if (BM==2) gpuBlending02(uSrc, uDst);
- if (BM==3) gpuBlending03(uSrc, uDst);
- if (MB) { *pDst = uSrc | 0x8000; }
- else { *pDst = uSrc; }
- endtile: pDst++;
- }
- while (--count);
- }
+ // UNTEXTURED, NO GOURAUD
+ const u16 pix15 = gpu_unai.PixelData;
+ do {
+ u16 uSrc, uDst;
+
+ // NOTE: Don't enable CF_BLITMASK pixel skipping (speed hack)
+ // on untextured polys. It seems to do more harm than good: see
+ // gravestone text at end of Medieval intro sequence. -senquack
+ //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) { goto endpolynotextnogou; } }
+
+ if (CF_BLEND || CF_MASKCHECK) uDst = *pDst;
+ if (CF_MASKCHECK) { if (uDst&0x8000) { goto endpolynotextnogou; } }
+
+ uSrc = pix15;
+
+ if (CF_BLEND)
+ uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
+
+ if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
+ else { *pDst = uSrc; }
+
+endpolynotextnogou:
+ pDst++;
+ } while(--count);
}
else
{
- // GOURAUD
- u16 uDst;
- u16 uSrc;
- u32 linc=lInc;
- u32 lCol=((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21));
- u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE;
- do
- {
- // masking
- if(M) { uDst = *pDst; if (uDst&0x8000) goto endgou; }
- // blend
- if(B)
- {
- // light
- gpuLightingRGB(uSrc,lCol);
- if(!M) { uDst = *pDst; }
- if (BM==0) gpuBlending00(uSrc, uDst);
- if (BM==1) gpuBlending01(uSrc, uDst);
- if (BM==2) gpuBlending02(uSrc, uDst);
- if (BM==3) gpuBlending03(uSrc, uDst);
- }
- else
- {
- // light
- gpuLightingRGB(uSrc,lCol);
+ // UNTEXTURED, GOURAUD
+ u32 l_gCol = gpu_unai.gCol;
+ u32 l_gInc = gpu_unai.gInc;
+
+ do {
+ u16 uDst, uSrc;
+
+ // See note in above loop regarding CF_BLITMASK
+ //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolynotextgou; }
+
+ if (CF_BLEND || CF_MASKCHECK) uDst = *pDst;
+ if (CF_MASKCHECK) { if (uDst&0x8000) goto endpolynotextgou; }
+
+ if (CF_DITHER) {
+ // GOURAUD, DITHER
+
+ u32 uSrc24 = gpuLightingRGB24(l_gCol);
+ if (CF_BLEND)
+ uSrc24 = gpuBlending24<CF_BLENDMODE>(uSrc24, uDst);
+ uSrc = gpuColorQuantization24<CF_DITHER>(uSrc24, pDst);
+ } else {
+ // GOURAUD, NO DITHER
+
+ uSrc = gpuLightingRGB(l_gCol);
+
+ if (CF_BLEND)
+ uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
}
- if (MB) { *pDst = uSrc | 0x8000; }
- else { *pDst = uSrc; }
- endgou: pDst++; lCol=(lCol+linc);
+
+ if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
+ else { *pDst = uSrc; }
+
+endpolynotextgou:
+ pDst++;
+ l_gCol += l_gInc;
}
while (--count);
}
}
else
{
- // TEXTURE
- u16 uDst;
- u16 uSrc;
- u32 linc; if (L&&G) linc=lInc;
- u32 tinc=tInc;
- u32 tmsk=tMsk;
- u32 tCor = ((u32)( u4<<7)&0x7fff0000) | ((u32)( v4>>9)&0x00007fff); tCor&= tmsk;
- const u16* _TBA=TBA;
- const u16* _CBA; if (TM!=3) _CBA=CBA;
- u32 lCol;
- if(L && !G) { lCol = ((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); }
- else if(L && G) { lCol = ((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21)); }
- u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE;
+ // TEXTURED
+
+ u16 uDst, uSrc, srcMSB;
+
+ //senquack - note: original UNAI code had gpu_unai.{u4/v4} packed into
+ // one 32-bit unsigned int, but this proved to lose too much accuracy
+ // (pixel drouputs noticeable in NFS3 sky), so now are separate vars.
+ u32 l_u_msk = gpu_unai.u_msk; u32 l_v_msk = gpu_unai.v_msk;
+ u32 l_u = gpu_unai.u & l_u_msk; u32 l_v = gpu_unai.v & l_v_msk;
+ s32 l_u_inc = gpu_unai.u_inc; s32 l_v_inc = gpu_unai.v_inc;
+
+ const u16* TBA_ = gpu_unai.TBA;
+ const u16* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
+
+ u8 r5, g5, b5;
+ u8 r8, g8, b8;
+
+ u32 l_gInc, l_gCol;
+
+ if (CF_LIGHT) {
+ if (CF_GOURAUD) {
+ l_gInc = gpu_unai.gInc;
+ l_gCol = gpu_unai.gCol;
+ } else {
+ if (CF_DITHER) {
+ r8 = gpu_unai.r8;
+ g8 = gpu_unai.g8;
+ b8 = gpu_unai.b8;
+ } else {
+ r5 = gpu_unai.r5;
+ g5 = gpu_unai.g5;
+ b5 = gpu_unai.b5;
+ }
+ }
+ }
+
do
{
- // masking
- if(M) { uDst = *pDst; if (uDst&0x8000) goto endpoly; }
- // texture
- if (TM==1) { u32 tu=(tCor>>23); u32 tv=(tCor<<4)&(0xff<<11); u8 rgb=((u8*)_TBA)[tv+(tu>>1)]; uSrc=_CBA[(rgb>>((tu&1)<<2))&0xf]; if(!uSrc) goto endpoly; }
- if (TM==2) { uSrc = _CBA[(((u8*)_TBA)[(tCor>>23)+((tCor<<4)&(0xff<<11))])]; if(!uSrc) goto endpoly; }
- if (TM==3) { uSrc = _TBA[(tCor>>23)+((tCor<<3)&(0xff<<10))]; if(!uSrc) goto endpoly; }
- // blend
- if(B)
- {
- if (uSrc&0x8000)
- {
- // light
- if(L) gpuLightingTXT(uSrc, lCol);
- if(!M) { uDst = *pDst; }
- if (BM==0) gpuBlending00(uSrc, uDst);
- if (BM==1) gpuBlending01(uSrc, uDst);
- if (BM==2) gpuBlending02(uSrc, uDst);
- if (BM==3) gpuBlending03(uSrc, uDst);
- }
- else
- {
- // light
- if(L) gpuLightingTXT(uSrc, lCol);
- }
+ if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolytext; }
+ if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; }
+ if (CF_MASKCHECK) if (uDst&0x8000) { goto endpolytext; }
+
+ //senquack - adapted to work with new 22.10 fixed point routines:
+ // (UNAI originally used 16.16)
+ if (CF_TEXTMODE==1) { // 4bpp (CLUT)
+ u32 tu=(l_u>>10);
+ u32 tv=(l_v<<1)&(0xff<<11);
+ u8 rgb=((u8*)TBA_)[tv+(tu>>1)];
+ uSrc=CBA_[(rgb>>((tu&1)<<2))&0xf];
+ if (!uSrc) goto endpolytext;
+ }
+ if (CF_TEXTMODE==2) { // 8bpp (CLUT)
+ uSrc = CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])];
+ if (!uSrc) goto endpolytext;
}
- else
+ if (CF_TEXTMODE==3) { // 16bpp
+ uSrc = TBA_[(l_u>>10)+((l_v)&(0xff<<10))];
+ if (!uSrc) goto endpolytext;
+ }
+
+ // Save source MSB, as blending or lighting will not (Silent Hill)
+ if (CF_BLEND || CF_LIGHT) srcMSB = uSrc & 0x8000;
+
+ // When textured, only dither when LIGHT (texture blend) is enabled
+ // LIGHT && BLEND => dither
+ // LIGHT && !BLEND => dither
+ //!LIGHT && BLEND => no dither
+ //!LIGHT && !BLEND => no dither
+
+ if (CF_DITHER && CF_LIGHT) {
+ u32 uSrc24;
+ if ( CF_GOURAUD)
+ uSrc24 = gpuLightingTXT24Gouraud(uSrc, l_gCol);
+ if (!CF_GOURAUD)
+ uSrc24 = gpuLightingTXT24(uSrc, r8, g8, b8);
+
+ if (CF_BLEND && srcMSB)
+ uSrc24 = gpuBlending24<CF_BLENDMODE>(uSrc24, uDst);
+
+ uSrc = gpuColorQuantization24<CF_DITHER>(uSrc24, pDst);
+ } else
{
- // light
- if(L) { gpuLightingTXT(uSrc, lCol); } else if(!MB) { uSrc&= 0x7fff; }
+ if (CF_LIGHT) {
+ if ( CF_GOURAUD)
+ uSrc = gpuLightingTXTGouraud(uSrc, l_gCol);
+ if (!CF_GOURAUD)
+ uSrc = gpuLightingTXT(uSrc, r5, g5, b5);
+ }
+
+ if (CF_BLEND && srcMSB)
+ uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
}
- if (MB) { *pDst = uSrc | 0x8000; }
- else { *pDst = uSrc; }
- endpoly: pDst++;
- tCor=(tCor+tinc)&tmsk;
- if (L&&G) lCol=(lCol+linc);
+
+ if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
+ else if (CF_BLEND || CF_LIGHT) { *pDst = uSrc | srcMSB; }
+ else { *pDst = uSrc; }
+endpolytext:
+ pDst++;
+ l_u = (l_u + l_u_inc) & l_u_msk;
+ l_v = (l_v + l_v_inc) & l_v_msk;
+ if (CF_LIGHT && CF_GOURAUD) l_gCol += l_gInc;
}
while (--count);
}
}
-// supposedly shouldn't be called?
-static void gpuPolySpanFn_NULL_(u16 *pDst, u32 count)
+static void PolyNULL(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count)
{
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"PolyNULL()\n");
+ #endif
}
///////////////////////////////////////////////////////////////////////////////
-
-///////////////////////////////////////////////////////////////////////////////
// Polygon innerloops driver
-typedef void (*PP)(u16 *pDst, u32 count);
-const PP gpuPolySpanDrivers[512] =
-{
- gpuPolySpanFn<0x00>,gpuPolySpanFn<0x01>,gpuPolySpanFn<0x02>,gpuPolySpanFn<0x03>, gpuPolySpanFn<0x04>,gpuPolySpanFn<0x05>,gpuPolySpanFn<0x06>,gpuPolySpanFn<0x07>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x0A>,gpuPolySpanFn<0x0B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x0E>,gpuPolySpanFn<0x0F>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x12>,gpuPolySpanFn<0x13>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x16>,gpuPolySpanFn<0x17>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1A>,gpuPolySpanFn<0x1B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1E>,gpuPolySpanFn<0x1F>,
- gpuPolySpanFn<0x20>,gpuPolySpanFn<0x21>,gpuPolySpanFn<0x22>,gpuPolySpanFn<0x23>, gpuPolySpanFn<0x24>,gpuPolySpanFn<0x25>,gpuPolySpanFn<0x26>,gpuPolySpanFn<0x27>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x2A>,gpuPolySpanFn<0x2B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x2E>,gpuPolySpanFn<0x2F>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x32>,gpuPolySpanFn<0x33>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x36>,gpuPolySpanFn<0x37>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x3A>,gpuPolySpanFn<0x3B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x3E>,gpuPolySpanFn<0x3F>,
- gpuPolySpanFn<0x40>,gpuPolySpanFn<0x41>,gpuPolySpanFn<0x42>,gpuPolySpanFn<0x43>, gpuPolySpanFn<0x44>,gpuPolySpanFn<0x45>,gpuPolySpanFn<0x46>,gpuPolySpanFn<0x47>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x4A>,gpuPolySpanFn<0x4B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x4E>,gpuPolySpanFn<0x4F>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x52>,gpuPolySpanFn<0x53>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x56>,gpuPolySpanFn<0x57>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x5A>,gpuPolySpanFn<0x5B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x5E>,gpuPolySpanFn<0x5F>,
- gpuPolySpanFn<0x60>,gpuPolySpanFn<0x61>,gpuPolySpanFn<0x62>,gpuPolySpanFn<0x63>, gpuPolySpanFn<0x64>,gpuPolySpanFn<0x65>,gpuPolySpanFn<0x66>,gpuPolySpanFn<0x67>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x6A>,gpuPolySpanFn<0x6B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x6E>,gpuPolySpanFn<0x6F>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x72>,gpuPolySpanFn<0x73>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x76>,gpuPolySpanFn<0x77>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x7A>,gpuPolySpanFn<0x7B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x7E>,gpuPolySpanFn<0x7F>,
-
- gpuPolySpanFn_NULL_,gpuPolySpanFn<0x81>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x83>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x85>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x87>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x8B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x8F>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x93>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x97>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x9B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x9F>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn<0xa1>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xa3>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0xa5>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xa7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xaB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xaF>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xb3>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xb7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xbB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xbF>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn<0xc1>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xc3>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0xc5>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xc7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xcB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xcF>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xd3>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xd7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xdB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xdF>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn<0xe1>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xe3>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0xe5>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xe7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xeB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xeF>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xf3>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xf7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xfB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xfF>,
-
- gpuPolySpanFn<0x100>,gpuPolySpanFn<0x101>,gpuPolySpanFn<0x102>,gpuPolySpanFn<0x103>, gpuPolySpanFn<0x104>,gpuPolySpanFn<0x105>,gpuPolySpanFn<0x106>,gpuPolySpanFn<0x107>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x10A>,gpuPolySpanFn<0x10B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x10E>,gpuPolySpanFn<0x10F>,
- gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x112>,gpuPolySpanFn<0x113>, gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x116>,gpuPolySpanFn<0x117>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x11A>,gpuPolySpanFn<0x11B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x11E>,gpuPolySpanFn<0x11F>,
- gpuPolySpanFn<0x120>,gpuPolySpanFn<0x121>,gpuPolySpanFn<0x122>,gpuPolySpanFn<0x123>, gpuPolySpanFn<0x124>,gpuPolySpanFn<0x125>,gpuPolySpanFn<0x126>,gpuPolySpanFn<0x127>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x12A>,gpuPolySpanFn<0x12B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x12E>,gpuPolySpanFn<0x12F>,
- gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x132>,gpuPolySpanFn<0x133>, gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x136>,gpuPolySpanFn<0x137>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x13A>,gpuPolySpanFn<0x13B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x13E>,gpuPolySpanFn<0x13F>,
- gpuPolySpanFn<0x140>,gpuPolySpanFn<0x141>,gpuPolySpanFn<0x142>,gpuPolySpanFn<0x143>, gpuPolySpanFn<0x144>,gpuPolySpanFn<0x145>,gpuPolySpanFn<0x146>,gpuPolySpanFn<0x147>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x14A>,gpuPolySpanFn<0x14B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x14E>,gpuPolySpanFn<0x14F>,
- gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x152>,gpuPolySpanFn<0x153>, gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x156>,gpuPolySpanFn<0x157>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x15A>,gpuPolySpanFn<0x15B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x15E>,gpuPolySpanFn<0x15F>,
- gpuPolySpanFn<0x160>,gpuPolySpanFn<0x161>,gpuPolySpanFn<0x162>,gpuPolySpanFn<0x163>, gpuPolySpanFn<0x164>,gpuPolySpanFn<0x165>,gpuPolySpanFn<0x166>,gpuPolySpanFn<0x167>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x16A>,gpuPolySpanFn<0x16B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x16E>,gpuPolySpanFn<0x16F>,
- gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x172>,gpuPolySpanFn<0x173>, gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x176>,gpuPolySpanFn<0x177>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x17A>,gpuPolySpanFn<0x17B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x17E>,gpuPolySpanFn<0x17F>,
-
- gpuPolySpanFn_NULL_,gpuPolySpanFn<0x181>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x183>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x185>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x187>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x18B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x18F>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x193>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x197>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x19B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x19F>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1a1>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1a3>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1a5>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1a7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1aB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1aF>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1b3>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1b7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1bB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1bF>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1c1>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1c3>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1c5>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1c7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1cB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1cF>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1d3>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1d7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1dB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1dF>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1e1>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1e3>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1e5>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1e7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1eB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1eF>,
- gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1f3>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1f7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1fB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1fF>
+typedef void (*PP)(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count);
+
+// Template instantiation helper macros
+#define TI(cf) gpuPolySpanFn<(cf)>
+#define TN PolyNULL
+#define TIBLOCK(ub) \
+ TI((ub)|0x00), TI((ub)|0x01), TI((ub)|0x02), TI((ub)|0x03), TI((ub)|0x04), TI((ub)|0x05), TI((ub)|0x06), TI((ub)|0x07), \
+ TN, TN, TI((ub)|0x0a), TI((ub)|0x0b), TN, TN, TI((ub)|0x0e), TI((ub)|0x0f), \
+ TN, TN, TI((ub)|0x12), TI((ub)|0x13), TN, TN, TI((ub)|0x16), TI((ub)|0x17), \
+ TN, TN, TI((ub)|0x1a), TI((ub)|0x1b), TN, TN, TI((ub)|0x1e), TI((ub)|0x1f), \
+ TI((ub)|0x20), TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \
+ TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \
+ TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \
+ TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \
+ TI((ub)|0x40), TI((ub)|0x41), TI((ub)|0x42), TI((ub)|0x43), TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \
+ TN, TN, TI((ub)|0x4a), TI((ub)|0x4b), TN, TN, TI((ub)|0x4e), TI((ub)|0x4f), \
+ TN, TN, TI((ub)|0x52), TI((ub)|0x53), TN, TN, TI((ub)|0x56), TI((ub)|0x57), \
+ TN, TN, TI((ub)|0x5a), TI((ub)|0x5b), TN, TN, TI((ub)|0x5e), TI((ub)|0x5f), \
+ TI((ub)|0x60), TI((ub)|0x61), TI((ub)|0x62), TI((ub)|0x63), TI((ub)|0x64), TI((ub)|0x65), TI((ub)|0x66), TI((ub)|0x67), \
+ TN, TN, TI((ub)|0x6a), TI((ub)|0x6b), TN, TN, TI((ub)|0x6e), TI((ub)|0x6f), \
+ TN, TN, TI((ub)|0x72), TI((ub)|0x73), TN, TN, TI((ub)|0x76), TI((ub)|0x77), \
+ TN, TN, TI((ub)|0x7a), TI((ub)|0x7b), TN, TN, TI((ub)|0x7e), TI((ub)|0x7f), \
+ TN, TI((ub)|0x81), TN, TI((ub)|0x83), TN, TI((ub)|0x85), TN, TI((ub)|0x87), \
+ TN, TN, TN, TI((ub)|0x8b), TN, TN, TN, TI((ub)|0x8f), \
+ TN, TN, TN, TI((ub)|0x93), TN, TN, TN, TI((ub)|0x97), \
+ TN, TN, TN, TI((ub)|0x9b), TN, TN, TN, TI((ub)|0x9f), \
+ TN, TI((ub)|0xa1), TN, TI((ub)|0xa3), TN, TI((ub)|0xa5), TN, TI((ub)|0xa7), \
+ TN, TN, TN, TI((ub)|0xab), TN, TN, TN, TI((ub)|0xaf), \
+ TN, TN, TN, TI((ub)|0xb3), TN, TN, TN, TI((ub)|0xb7), \
+ TN, TN, TN, TI((ub)|0xbb), TN, TN, TN, TI((ub)|0xbf), \
+ TN, TI((ub)|0xc1), TN, TI((ub)|0xc3), TN, TI((ub)|0xc5), TN, TI((ub)|0xc7), \
+ TN, TN, TN, TI((ub)|0xcb), TN, TN, TN, TI((ub)|0xcf), \
+ TN, TN, TN, TI((ub)|0xd3), TN, TN, TN, TI((ub)|0xd7), \
+ TN, TN, TN, TI((ub)|0xdb), TN, TN, TN, TI((ub)|0xdf), \
+ TN, TI((ub)|0xe1), TN, TI((ub)|0xe3), TN, TI((ub)|0xe5), TN, TI((ub)|0xe7), \
+ TN, TN, TN, TI((ub)|0xeb), TN, TN, TN, TI((ub)|0xef), \
+ TN, TN, TN, TI((ub)|0xf3), TN, TN, TN, TI((ub)|0xf7), \
+ TN, TN, TN, TI((ub)|0xfb), TN, TN, TN, TI((ub)|0xff)
+
+const PP gpuPolySpanDrivers[2048] = {
+ TIBLOCK(0<<8), TIBLOCK(1<<8), TIBLOCK(2<<8), TIBLOCK(3<<8),
+ TIBLOCK(4<<8), TIBLOCK(5<<8), TIBLOCK(6<<8), TIBLOCK(7<<8)
};
+
+#undef TI
+#undef TN
+#undef TIBLOCK
diff --git a/plugins/gpu_unai/gpu_inner_blend.h b/plugins/gpu_unai/gpu_inner_blend.h
index ce439d3..93c268b 100644
--- a/plugins/gpu_unai/gpu_inner_blend.h
+++ b/plugins/gpu_unai/gpu_inner_blend.h
@@ -23,132 +23,166 @@
// GPU Blending operations functions
-#ifdef __arm__
-#define gpuBlending00(uSrc,uDst) \
-{ \
- asm ("and %[src], %[src], %[msk]\n" \
- "and %[dst], %[dst], %[msk]\n" \
- "add %[src], %[dst], %[src]\n" \
- "mov %[src], %[src], lsr #1\n" \
- : [src] "=&r" (uSrc), [dst] "=&r" (uDst) : "0" (uSrc), "1" (uDst), [msk] "r" (uMsk)); \
-}
-#else
-#define gpuBlending00(uSrc,uDst) \
-{ \
- uSrc = (((uDst & uMsk) + (uSrc & uMsk)) >> 1); \
-}
-#endif
+////////////////////////////////////////////////////////////////////////////////
+// Blend bgr555 color in 'uSrc' (foreground) with bgr555 color
+// in 'uDst' (background), returning resulting color.
+//
+// INPUT:
+// 'uSrc','uDst' input: -bbbbbgggggrrrrr
+// ^ bit 16
+// OUTPUT:
+// u16 output: 0bbbbbgggggrrrrr
+// ^ bit 16
+// RETURNS:
+// Where '0' is zero-padding, and '-' is don't care
+////////////////////////////////////////////////////////////////////////////////
+template <int BLENDMODE, bool SKIP_USRC_MSB_MASK>
+GPU_INLINE u16 gpuBlending(u16 uSrc, u16 uDst)
+{
+ // These use Blargg's bitwise modulo-clamping:
+ // http://blargg.8bitalley.com/info/rgb_mixing.html
+ // http://blargg.8bitalley.com/info/rgb_clamped_add.html
+ // http://blargg.8bitalley.com/info/rgb_clamped_sub.html
-// 1.0 x Back + 1.0 x Forward
-#ifdef __arm__
-#define gpuBlending01(uSrc,uDst) \
-{ \
- u32 st,dt,out; \
- asm ("and %[dt], %[dst], #0x7C00\n" \
- "and %[st], %[src], #0x7C00\n" \
- "add %[out], %[dt], %[st] \n" \
- "cmp %[out], #0x7C00 \n" \
- "movhi %[out], #0x7C00 \n" \
- "and %[dt], %[dst], #0x03E0\n" \
- "and %[st], %[src], #0x03E0\n" \
- "add %[dt], %[dt], %[st] \n" \
- "cmp %[dt], #0x03E0 \n" \
- "movhi %[dt], #0x03E0 \n" \
- "orr %[out], %[out], %[dt] \n" \
- "and %[dt], %[dst], #0x001F\n" \
- "and %[st], %[src], #0x001F\n" \
- "add %[dt], %[dt], %[st] \n" \
- "cmp %[dt], #0x001F \n" \
- "movhi %[dt], #0x001F \n" \
- "orr %[src], %[out], %[dt] \n" \
- : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \
- : [dst] "r" (uDst), "0" (uSrc) : "cc"); \
-}
+ u16 mix;
+
+ // 0.5 x Back + 0.5 x Forward
+ if (BLENDMODE==0) {
+#ifdef GPU_UNAI_USE_ACCURATE_BLENDING
+ // Slower, but more accurate (doesn't lose LSB data)
+ uDst &= 0x7fff;
+ if (!SKIP_USRC_MSB_MASK)
+ uSrc &= 0x7fff;
+ mix = ((uSrc + uDst) - ((uSrc ^ uDst) & 0x0421)) >> 1;
#else
-#define gpuBlending01(uSrc,uDst) \
-{ \
- u16 rr, gg, bb; \
- bb = (uDst & 0x7C00) + (uSrc & 0x7C00); if (bb > 0x7C00) bb = 0x7C00; \
- gg = (uDst & 0x03E0) + (uSrc & 0x03E0); if (gg > 0x03E0) gg = 0x03E0; bb |= gg; \
- rr = (uDst & 0x001F) + (uSrc & 0x001F); if (rr > 0x001F) rr = 0x001F; bb |= rr; \
- uSrc = bb; \
-}
+ mix = ((uDst & 0x7bde) + (uSrc & 0x7bde)) >> 1;
#endif
+ }
+
+ // 1.0 x Back + 1.0 x Forward
+ if (BLENDMODE==1) {
+ uDst &= 0x7fff;
+ if (!SKIP_USRC_MSB_MASK)
+ uSrc &= 0x7fff;
+ u32 sum = uSrc + uDst;
+ u32 low_bits = (uSrc ^ uDst) & 0x0421;
+ u32 carries = (sum - low_bits) & 0x8420;
+ u32 modulo = sum - carries;
+ u32 clamp = carries - (carries >> 5);
+ mix = modulo | clamp;
+ }
+
+ // 1.0 x Back - 1.0 x Forward
+ if (BLENDMODE==2) {
+ uDst &= 0x7fff;
+ if (!SKIP_USRC_MSB_MASK)
+ uSrc &= 0x7fff;
+ u32 diff = uDst - uSrc + 0x8420;
+ u32 low_bits = (uDst ^ uSrc) & 0x8420;
+ u32 borrows = (diff - low_bits) & 0x8420;
+ u32 modulo = diff - borrows;
+ u32 clamp = borrows - (borrows >> 5);
+ mix = modulo & clamp;
+ }
-// 1.0 x Back - 1.0 x Forward */
-#ifdef __arm__
-#define gpuBlending02(uSrc,uDst) \
-{ \
- u32 st,dt,out; \
- asm ("and %[dt], %[dst], #0x7C00\n" \
- "and %[st], %[src], #0x7C00\n" \
- "subs %[out], %[dt], %[st] \n" \
- "movmi %[out], #0x0000 \n" \
- "and %[dt], %[dst], #0x03E0\n" \
- "and %[st], %[src], #0x03E0\n" \
- "subs %[dt], %[dt], %[st] \n" \
- "orrpl %[out], %[out], %[dt] \n" \
- "and %[dt], %[dst], #0x001F\n" \
- "and %[st], %[src], #0x001F\n" \
- "subs %[dt], %[dt], %[st] \n" \
- "orrpl %[out], %[out], %[dt] \n" \
- "mov %[src], %[out] \n" \
- : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \
- : [dst] "r" (uDst), "0" (uSrc) : "cc"); \
+ // 1.0 x Back + 0.25 x Forward
+ if (BLENDMODE==3) {
+ uDst &= 0x7fff;
+ uSrc = ((uSrc >> 2) & 0x1ce7);
+ u32 sum = uSrc + uDst;
+ u32 low_bits = (uSrc ^ uDst) & 0x0421;
+ u32 carries = (sum - low_bits) & 0x8420;
+ u32 modulo = sum - carries;
+ u32 clamp = carries - (carries >> 5);
+ mix = modulo | clamp;
+ }
+
+ return mix;
}
-int btest(int s, int d)
+
+////////////////////////////////////////////////////////////////////////////////
+// Convert bgr555 color in uSrc to padded u32 5.4:5.4:5.4 bgr fixed-pt
+// color triplet suitable for use with HQ 24-bit quantization.
+//
+// INPUT:
+// 'uDst' input: -bbbbbgggggrrrrr
+// ^ bit 16
+// RETURNS:
+// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
+// ^ bit 31
+// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
+////////////////////////////////////////////////////////////////////////////////
+GPU_INLINE u32 gpuGetRGB24(u16 uSrc)
{
- gpuBlending02(s, d);
- return s;
-}
-#else
-#define gpuBlending02(uSrc,uDst) \
-{ \
- s32 rr, gg, bb; \
- bb = (uDst & 0x7C00) - (uSrc & 0x7C00); if (bb < 0) bb = 0; \
- gg = (uDst & 0x03E0) - (uSrc & 0x03E0); if (gg > 0) bb |= gg; \
- rr = (uDst & 0x001F) - (uSrc & 0x001F); if (rr > 0) bb |= rr; \
- uSrc = bb; \
+ return ((uSrc & 0x7C00)<<14)
+ | ((uSrc & 0x03E0)<< 9)
+ | ((uSrc & 0x001F)<< 4);
}
-#endif
-// 1.0 x Back + 0.25 x Forward */
-#ifdef __arm__
-#define gpuBlending03(uSrc,uDst) \
-{ \
- u32 st,dt,out; \
- asm ("mov %[src], %[src], lsr #2 \n" \
- "and %[dt], %[dst], #0x7C00\n" \
- "and %[st], %[src], #0x1C00\n" \
- "add %[out], %[dt], %[st] \n" \
- "cmp %[out], #0x7C00 \n" \
- "movhi %[out], #0x7C00 \n" \
- "and %[dt], %[dst], #0x03E0\n" \
- "and %[st], %[src], #0x00E0\n" \
- "add %[dt], %[dt], %[st] \n" \
- "cmp %[dt], #0x03E0 \n" \
- "movhi %[dt], #0x03E0 \n" \
- "orr %[out], %[out], %[dt] \n" \
- "and %[dt], %[dst], #0x001F\n" \
- "and %[st], %[src], #0x0007\n" \
- "add %[dt], %[dt], %[st] \n" \
- "cmp %[dt], #0x001F \n" \
- "movhi %[dt], #0x001F \n" \
- "orr %[src], %[out], %[dt] \n" \
- : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \
- : [dst] "r" (uDst), "0" (uSrc) : "cc"); \
-}
-#else
-#define gpuBlending03(uSrc,uDst) \
-{ \
- u16 rr, gg, bb; \
- uSrc >>= 2; \
- bb = (uDst & 0x7C00) + (uSrc & 0x1C00); if (bb > 0x7C00) bb = 0x7C00; \
- gg = (uDst & 0x03E0) + (uSrc & 0x00E0); if (gg > 0x03E0) gg = 0x03E0; bb |= gg; \
- rr = (uDst & 0x001F) + (uSrc & 0x0007); if (rr > 0x001F) rr = 0x001F; bb |= rr; \
- uSrc = bb; \
+
+////////////////////////////////////////////////////////////////////////////////
+// Blend padded u32 5.4:5.4:5.4 bgr fixed-pt color triplet in 'uSrc24'
+// (foreground color) with bgr555 color in 'uDst' (background color),
+// returning the resulting u32 5.4:5.4:5.4 color.
+//
+// INPUT:
+// 'uSrc24' input: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
+// ^ bit 31
+// 'uDst' input: -bbbbbgggggrrrrr
+// ^ bit 16
+// RETURNS:
+// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
+// ^ bit 31
+// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
+////////////////////////////////////////////////////////////////////////////////
+template <int BLENDMODE>
+GPU_INLINE u32 gpuBlending24(u32 uSrc24, u16 uDst)
+{
+ // These use techniques adapted from Blargg's techniques mentioned in
+ // in gpuBlending() comments above. Not as much bitwise trickery is
+ // necessary because of presence of 0 padding in uSrc24 format.
+
+ u32 uDst24 = gpuGetRGB24(uDst);
+ u32 mix;
+
+ // 0.5 x Back + 0.5 x Forward
+ if (BLENDMODE==0) {
+ const u32 uMsk = 0x1FE7F9FE;
+ // Only need to mask LSBs of uSrc24, uDst24's LSBs are 0 already
+ mix = (uDst24 + (uSrc24 & uMsk)) >> 1;
+ }
+
+ // 1.0 x Back + 1.0 x Forward
+ if (BLENDMODE==1) {
+ u32 sum = uSrc24 + uDst24;
+ u32 carries = sum & 0x20080200;
+ u32 modulo = sum - carries;
+ u32 clamp = carries - (carries >> 9);
+ mix = modulo | clamp;
+ }
+
+ // 1.0 x Back - 1.0 x Forward
+ if (BLENDMODE==2) {
+ // Insert ones in 0-padded borrow slot of color to be subtracted from
+ uDst24 |= 0x20080200;
+ u32 diff = uDst24 - uSrc24;
+ u32 borrows = diff & 0x20080200;
+ u32 clamp = borrows - (borrows >> 9);
+ mix = diff & clamp;
+ }
+
+ // 1.0 x Back + 0.25 x Forward
+ if (BLENDMODE==3) {
+ uSrc24 = (uSrc24 & 0x1FC7F1FC) >> 2;
+ u32 sum = uSrc24 + uDst24;
+ u32 carries = sum & 0x20080200;
+ u32 modulo = sum - carries;
+ u32 clamp = carries - (carries >> 9);
+ mix = modulo | clamp;
+ }
+
+ return mix;
}
-#endif
#endif //_OP_BLEND_H_
diff --git a/plugins/gpu_unai/gpu_inner_blend_arm5.h b/plugins/gpu_unai/gpu_inner_blend_arm5.h
new file mode 100644
index 0000000..0e9b74f
--- /dev/null
+++ b/plugins/gpu_unai/gpu_inner_blend_arm5.h
@@ -0,0 +1,100 @@
+/***************************************************************************
+* Copyright (C) 2010 PCSX4ALL Team *
+* Copyright (C) 2010 Unai *
+* *
+* This program is free software; you can redistribute it and/or modify *
+* it under the terms of the GNU General Public License as published by *
+* the Free Software Foundation; either version 2 of the License, or *
+* (at your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+* GNU General Public License for more details. *
+* *
+* You should have received a copy of the GNU General Public License *
+* along with this program; if not, write to the *
+* Free Software Foundation, Inc., *
+* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
+***************************************************************************/
+
+#ifndef _OP_BLEND_H_
+#define _OP_BLEND_H_
+
+// GPU Blending operations functions
+
+#define gpuBlending00(uSrc,uDst) \
+{ \
+ asm ("and %[src], %[src], %[msk] " : [src] "=r" (uSrc) : "0" (uSrc), [msk] "r" (uMsk) ); \
+ asm ("and %[dst], %[dst], %[msk] " : [dst] "=r" (uDst) : "0" (uDst), [msk] "r" (uMsk) ); \
+ asm ("add %[src], %[dst], %[src] " : [src] "=r" (uSrc) : [dst] "r" (uDst), "0" (uSrc) ); \
+ asm ("mov %[src], %[src], lsr #1 " : [src] "=r" (uSrc) : "0" (uSrc) ); \
+}
+
+// 1.0 x Back + 1.0 x Forward
+#define gpuBlending01(uSrc,uDst) \
+{ \
+ u16 st,dt,out; \
+ asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
+ asm ("and %[st], %[src], #0x7C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
+ asm ("add %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) ); \
+ asm ("cmp %[out], #0x7C00 " : : [out] "r" (out) : "cc" ); \
+ asm ("movhi %[out], #0x7C00 " : [out] "=r" (out) : "0" (out) ); \
+ asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
+ asm ("and %[st], %[src], #0x03E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
+ asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \
+ asm ("cmp %[dt], #0x03E0 " : : [dt] "r" (dt) : "cc" ); \
+ asm ("movhi %[dt], #0x03E0 " : [dt] "=r" (dt) : "0" (dt) ); \
+ asm ("orr %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \
+ asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
+ asm ("and %[st], %[src], #0x001F " : [st] "=r" (st) : [src] "r" (uSrc) ); \
+ asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \
+ asm ("cmp %[dt], #0x001F " : : [dt] "r" (dt) : "cc" ); \
+ asm ("movhi %[dt], #0x001F " : [dt] "=r" (dt) : "0" (dt) ); \
+ asm ("orr %[uSrc], %[out], %[dt] " : [uSrc] "=r" (uSrc) : [out] "r" (out), [dt] "r" (dt) ); \
+}
+
+// 1.0 x Back - 1.0 x Forward */
+#define gpuBlending02(uSrc,uDst) \
+{ \
+ u16 st,dt,out; \
+ asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
+ asm ("and %[st], %[src], #0x7C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
+ asm ("subs %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) : "cc" ); \
+ asm ("movmi %[out], #0x0000 " : [out] "=r" (out) : "0" (out) ); \
+ asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
+ asm ("and %[st], %[src], #0x03E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
+ asm ("subs %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) : "cc" ); \
+ asm ("orrpl %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \
+ asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
+ asm ("and %[st], %[src], #0x001F " : [st] "=r" (st) : [src] "r" (uSrc) ); \
+ asm ("subs %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) : "cc" ); \
+ asm ("orrpl %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \
+ asm ("mov %[uSrc], %[out]" : [uSrc] "=r" (uSrc) : [out] "r" (out) ); \
+}
+
+// 1.0 x Back + 0.25 x Forward */
+#define gpuBlending03(uSrc,uDst) \
+{ \
+ u16 st,dt,out; \
+ asm ("mov %[src], %[src], lsr #2 " : [src] "=r" (uSrc) : "0" (uSrc) ); \
+ asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
+ asm ("and %[st], %[src], #0x1C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
+ asm ("add %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) ); \
+ asm ("cmp %[out], #0x7C00 " : : [out] "r" (out) : "cc" ); \
+ asm ("movhi %[out], #0x7C00 " : [out] "=r" (out) : "0" (out) ); \
+ asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
+ asm ("and %[st], %[src], #0x00E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
+ asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \
+ asm ("cmp %[dt], #0x03E0 " : : [dt] "r" (dt) : "cc" ); \
+ asm ("movhi %[dt], #0x03E0 " : [dt] "=r" (dt) : "0" (dt) ); \
+ asm ("orr %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \
+ asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \
+ asm ("and %[st], %[src], #0x0007 " : [st] "=r" (st) : [src] "r" (uSrc) ); \
+ asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \
+ asm ("cmp %[dt], #0x001F " : : [dt] "r" (dt) : "cc" ); \
+ asm ("movhi %[dt], #0x001F " : [dt] "=r" (dt) : "0" (dt) ); \
+ asm ("orr %[uSrc], %[out], %[dt] " : [uSrc] "=r" (uSrc) : [out] "r" (out), [dt] "r" (dt) ); \
+}
+
+#endif //_OP_BLEND_H_
diff --git a/plugins/gpu_unai/gpu_inner_blend_arm7.h b/plugins/gpu_unai/gpu_inner_blend_arm7.h
new file mode 100644
index 0000000..083e62d
--- /dev/null
+++ b/plugins/gpu_unai/gpu_inner_blend_arm7.h
@@ -0,0 +1,107 @@
+/***************************************************************************
+* Copyright (C) 2010 PCSX4ALL Team *
+* Copyright (C) 2010 Unai *
+* *
+* This program is free software; you can redistribute it and/or modify *
+* it under the terms of the GNU General Public License as published by *
+* the Free Software Foundation; either version 2 of the License, or *
+* (at your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+* GNU General Public License for more details. *
+* *
+* You should have received a copy of the GNU General Public License *
+* along with this program; if not, write to the *
+* Free Software Foundation, Inc., *
+* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
+***************************************************************************/
+
+#ifndef _OP_BLEND_H_
+#define _OP_BLEND_H_
+
+// GPU Blending operations functions
+
+#define gpuBlending00(uSrc,uDst) \
+{ \
+ asm ("and %[src], %[src], %[msk]\n" \
+ "and %[dst], %[dst], %[msk]\n" \
+ "add %[src], %[dst], %[src]\n" \
+ "mov %[src], %[src], lsr #1\n" \
+ : [src] "=&r" (uSrc), [dst] "=&r" (uDst) : "0" (uSrc), "1" (uDst), [msk] "r" (uMsk)); \
+}
+
+// 1.0 x Back + 1.0 x Forward
+#define gpuBlending01(uSrc,uDst) \
+{ \
+ u32 st,dt,out; \
+ asm ("and %[dt], %[dst], #0x7C00\n" \
+ "and %[st], %[src], #0x7C00\n" \
+ "add %[out], %[dt], %[st] \n" \
+ "cmp %[out], #0x7C00 \n" \
+ "movhi %[out], #0x7C00 \n" \
+ "and %[dt], %[dst], #0x03E0\n" \
+ "and %[st], %[src], #0x03E0\n" \
+ "add %[dt], %[dt], %[st] \n" \
+ "cmp %[dt], #0x03E0 \n" \
+ "movhi %[dt], #0x03E0 \n" \
+ "orr %[out], %[out], %[dt] \n" \
+ "and %[dt], %[dst], #0x001F\n" \
+ "and %[st], %[src], #0x001F\n" \
+ "add %[dt], %[dt], %[st] \n" \
+ "cmp %[dt], #0x001F \n" \
+ "movhi %[dt], #0x001F \n" \
+ "orr %[src], %[out], %[dt] \n" \
+ : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \
+ : [dst] "r" (uDst), "0" (uSrc) : "cc"); \
+}
+
+// 1.0 x Back - 1.0 x Forward */
+#define gpuBlending02(uSrc,uDst) \
+{ \
+ u32 st,dt,out; \
+ asm ("and %[dt], %[dst], #0x7C00\n" \
+ "and %[st], %[src], #0x7C00\n" \
+ "subs %[out], %[dt], %[st] \n" \
+ "movmi %[out], #0x0000 \n" \
+ "and %[dt], %[dst], #0x03E0\n" \
+ "and %[st], %[src], #0x03E0\n" \
+ "subs %[dt], %[dt], %[st] \n" \
+ "orrpl %[out], %[out], %[dt] \n" \
+ "and %[dt], %[dst], #0x001F\n" \
+ "and %[st], %[src], #0x001F\n" \
+ "subs %[dt], %[dt], %[st] \n" \
+ "orrpl %[out], %[out], %[dt] \n" \
+ "mov %[src], %[out] \n" \
+ : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \
+ : [dst] "r" (uDst), "0" (uSrc) : "cc"); \
+}
+
+// 1.0 x Back + 0.25 x Forward */
+#define gpuBlending03(uSrc,uDst) \
+{ \
+ u32 st,dt,out; \
+ asm ("mov %[src], %[src], lsr #2 \n" \
+ "and %[dt], %[dst], #0x7C00\n" \
+ "and %[st], %[src], #0x1C00\n" \
+ "add %[out], %[dt], %[st] \n" \
+ "cmp %[out], #0x7C00 \n" \
+ "movhi %[out], #0x7C00 \n" \
+ "and %[dt], %[dst], #0x03E0\n" \
+ "and %[st], %[src], #0x00E0\n" \
+ "add %[dt], %[dt], %[st] \n" \
+ "cmp %[dt], #0x03E0 \n" \
+ "movhi %[dt], #0x03E0 \n" \
+ "orr %[out], %[out], %[dt] \n" \
+ "and %[dt], %[dst], #0x001F\n" \
+ "and %[st], %[src], #0x0007\n" \
+ "add %[dt], %[dt], %[st] \n" \
+ "cmp %[dt], #0x001F \n" \
+ "movhi %[dt], #0x001F \n" \
+ "orr %[src], %[out], %[dt] \n" \
+ : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \
+ : [dst] "r" (uDst), "0" (uSrc) : "cc"); \
+}
+
+#endif //_OP_BLEND_H_
diff --git a/plugins/gpu_unai/gpu_inner_light.h b/plugins/gpu_unai/gpu_inner_light.h
index d291418..b041dc3 100644
--- a/plugins/gpu_unai/gpu_inner_light.h
+++ b/plugins/gpu_unai/gpu_inner_light.h
@@ -1,5 +1,5 @@
/***************************************************************************
-* Copyright (C) 2010 PCSX4ALL Team *
+* Copyright (C) 2016 PCSX4ALL Team *
* Copyright (C) 2010 Unai *
* *
* This program is free software; you can redistribute it and/or modify *
@@ -23,60 +23,249 @@
// GPU color operations for lighting calculations
-#ifdef __arm__
-#define gpuLightingRGB(uSrc,lCol) \
-{ \
- u32 cb,cg; \
- asm ("and %[cb], %[lCol], #0x7C00/32 \n" \
- "and %[cg], %[lCol], #0x03E0*2048 \n" \
- "mov %[res], %[lCol], lsr #27\n" \
- "orr %[res], %[res], %[cb], lsl #5 \n" \
- "orr %[res], %[res], %[cg], lsr #11\n" \
- : [res] "=&r" (uSrc), [cb] "=&r" (cb), [cg] "=&r" (cg) \
- : [lCol] "r" (lCol)); \
+static void SetupLightLUT()
+{
+ // 1024-entry lookup table that modulates 5-bit texture + 5-bit light value.
+ // A light value of 15 does not modify the incoming texture color.
+ // LightLUT[32*32] array is initialized to following values:
+ // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ // 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+ // 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5,
+ // 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+ // 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9,
+ // 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,10,10,10,11,11,
+ // 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9,10,10,10,11,11,12,12,13,13,
+ // 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,
+ // 0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 7, 8, 9, 9,10,10,11,11,12,12,13,14,14,15,15,16,16,17,
+ // 0, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8, 8, 9,10,10,11,11,12,13,13,14,15,15,16,16,17,18,18,19,
+ // 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,19,20,21,
+ // 0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9,10,11,12,12,13,14,15,15,16,17,18,18,19,20,21,21,22,23,
+ // 0, 0, 1, 2, 3, 4, 4, 5, 6, 7, 8, 8, 9,10,11,12,13,13,14,15,16,17,17,18,19,20,21,21,22,23,24,25,
+ // 0, 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,15,16,17,18,19,20,21,21,22,23,24,25,26,27,
+ // 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,31,
+ // 0, 1, 2, 3, 4, 5, 6, 7, 9,10,11,12,13,14,15,16,18,19,20,21,22,23,24,25,27,28,29,30,31,31,31,31,
+ // 0, 1, 2, 3, 4, 5, 7, 8, 9,10,11,13,14,15,16,17,19,20,21,22,23,24,26,27,28,29,30,31,31,31,31,31,
+ // 0, 1, 2, 3, 5, 6, 7, 8,10,11,12,13,15,16,17,18,20,21,22,23,25,26,27,28,30,31,31,31,31,31,31,31,
+ // 0, 1, 2, 3, 5, 6, 7, 9,10,11,13,14,15,17,18,19,21,22,23,24,26,27,28,30,31,31,31,31,31,31,31,31,
+ // 0, 1, 2, 4, 5, 6, 8, 9,11,12,13,15,16,17,19,20,22,23,24,26,27,28,30,31,31,31,31,31,31,31,31,31,
+ // 0, 1, 2, 4, 5, 7, 8,10,11,12,14,15,17,18,20,21,23,24,25,27,28,30,31,31,31,31,31,31,31,31,31,31,
+ // 0, 1, 3, 4, 6, 7, 9,10,12,13,15,16,18,19,21,22,24,25,27,28,30,31,31,31,31,31,31,31,31,31,31,31,
+ // 0, 1, 3, 4, 6, 7, 9,10,12,14,15,17,18,20,21,23,25,26,28,29,31,31,31,31,31,31,31,31,31,31,31,31,
+ // 0, 1, 3, 4, 6, 8, 9,11,13,14,16,17,19,21,22,24,26,27,29,30,31,31,31,31,31,31,31,31,31,31,31,31,
+ // 0, 1, 3, 5, 6, 8,10,11,13,15,16,18,20,21,23,25,27,28,30,31,31,31,31,31,31,31,31,31,31,31,31,31,
+ // 0, 1, 3, 5, 7, 8,10,12,14,15,17,19,21,22,24,26,28,29,31,31,31,31,31,31,31,31,31,31,31,31,31,31,
+ // 0, 1, 3, 5, 7, 9,10,12,14,16,18,19,21,23,25,27,29,30,31,31,31,31,31,31,31,31,31,31,31,31,31,31,
+ // 0, 1, 3, 5, 7, 9,11,13,15,16,18,20,22,24,26,28,30,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,
+ // 0, 1, 3, 5, 7, 9,11,13,15,17,19,21,23,25,27,29,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31
+
+ for (int j=0; j < 32; ++j) {
+ for (int i=0; i < 32; ++i) {
+ int val = i * j / 16;
+ if (val > 31) val = 31;
+ gpu_unai.LightLUT[(j*32) + i] = val;
+ }
+ }
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Create packed Gouraud fixed-pt 8.3:8.3:8.2 rgb triplet
+//
+// INPUT:
+// 'r','g','b' are 8.10 fixed-pt color components (r shown here)
+// 'r' input: --------------rrrrrrrrXXXXXXXXXX
+// ^ bit 31
+// RETURNS:
+// u32 output: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
+// ^ bit 31
+// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '-' don't care
+////////////////////////////////////////////////////////////////////////////////
+GPU_INLINE u32 gpuPackGouraudCol(u32 r, u32 g, u32 b)
+{
+ return ((u32)(b>> 8)&(0x03ff ))
+ | ((u32)(g<< 3)&(0x07ff<<10))
+ | ((u32)(r<<14)&(0x07ff<<21));
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Create packed increment for Gouraud fixed-pt 8.3:8.3:8.2 rgb triplet
+//
+// INPUT:
+// Sign-extended 8.10 fixed-pt r,g,b color increment values (only dr is shown)
+// 'dr' input: ssssssssssssssrrrrrrrrXXXXXXXXXX
+// ^ bit 31
+// RETURNS:
+// u32 output: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
+// ^ bit 31
+// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and 's' sign bits
+//
+// NOTE: The correctness of this code/method has not been fully verified,
+// having been merely factored out from original code in
+// poly-drawing functions. Feel free to check/improve it -senquack
+////////////////////////////////////////////////////////////////////////////////
+GPU_INLINE u32 gpuPackGouraudColInc(s32 dr, s32 dg, s32 db)
+{
+ u32 dr_tmp = (u32)(dr << 14)&(0xffffffff<<21); if (dr < 0) dr_tmp += 1<<21;
+ u32 dg_tmp = (u32)(dg << 3)&(0xffffffff<<10); if (dg < 0) dg_tmp += 1<<10;
+ u32 db_tmp = (u32)(db >> 8)&(0xffffffff ); if (db < 0) db_tmp += 1<< 0;
+ return db_tmp + dg_tmp + dr_tmp;
}
-#else
-#define gpuLightingRGB(uSrc,lCol) uSrc=((lCol<<5)&0x7C00) | ((lCol>>11)&0x3E0) | (lCol>>27)
-#endif
-INLINE void gpuLightingTXT(u16 &uSrc, u32 &lCol)
+
+////////////////////////////////////////////////////////////////////////////////
+// Extract bgr555 color from Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet
+//
+// INPUT:
+// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
+// ^ bit 31
+// RETURNS:
+// u16 output: 0bbbbbgggggrrrrr
+// ^ bit 16
+// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '0' zero
+////////////////////////////////////////////////////////////////////////////////
+GPU_INLINE u16 gpuLightingRGB(u32 gCol)
+{
+ return ((gCol<< 5)&0x7C00) |
+ ((gCol>>11)&0x03E0) |
+ (gCol>>27);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Convert packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet in 'gCol'
+// to padded u32 5.4:5.4:5.4 bgr fixed-pt triplet, suitable for use
+// with HQ 24-bit lighting/quantization.
+//
+// INPUT:
+// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
+// ^ bit 31
+// RETURNS:
+// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
+// ^ bit 31
+// Where 'X' are fixed-pt bits, '0' zero-padding, and '-' is don't care
+////////////////////////////////////////////////////////////////////////////////
+GPU_INLINE u32 gpuLightingRGB24(u32 gCol)
+{
+ return ((gCol<<19) & (0x1FF<<20)) |
+ ((gCol>> 2) & (0x1FF<<10)) |
+ (gCol>>23);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Apply fast (low-precision) 5-bit lighting to bgr555 texture color:
+//
+// INPUT:
+// 'r5','g5','b5' are unsigned 5-bit color values, value of 15
+// is midpoint that doesn't modify that component of texture
+// 'uSrc' input: -bbbbbgggggrrrrr
+// ^ bit 16
+// RETURNS:
+// u16 output: 0bbbbbgggggrrrrr
+// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
+////////////////////////////////////////////////////////////////////////////////
+GPU_INLINE u16 gpuLightingTXT(u16 uSrc, u8 r5, u8 g5, u8 b5)
{
- // Pixelops Table
- static const u8 _gpuLitT[32*32] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5,
- 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
- 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9,
- 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,10,10,10,11,11,
- 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9,10,10,10,11,11,12,12,13,13,
- 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,
- 0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 7, 8, 9, 9,10,10,11,11,12,12,13,14,14,15,15,16,16,17,
- 0, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8, 8, 9,10,10,11,11,12,13,13,14,15,15,16,16,17,18,18,19,
- 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,19,20,21,
- 0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9,10,11,12,12,13,14,15,15,16,17,18,18,19,20,21,21,22,23,
- 0, 0, 1, 2, 3, 4, 4, 5, 6, 7, 8, 8, 9,10,11,12,13,13,14,15,16,17,17,18,19,20,21,21,22,23,24,25,
- 0, 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,15,16,17,18,19,20,21,21,22,23,24,25,26,27,
- 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,31,
- 0, 1, 2, 3, 4, 5, 6, 7, 9,10,11,12,13,14,15,16,18,19,20,21,22,23,24,25,27,28,29,30,31,31,31,31,
- 0, 1, 2, 3, 4, 5, 7, 8, 9,10,11,13,14,15,16,17,19,20,21,22,23,24,26,27,28,29,30,31,31,31,31,31,
- 0, 1, 2, 3, 5, 6, 7, 8,10,11,12,13,15,16,17,18,20,21,22,23,25,26,27,28,30,31,31,31,31,31,31,31,
- 0, 1, 2, 3, 5, 6, 7, 9,10,11,13,14,15,17,18,19,21,22,23,24,26,27,28,30,31,31,31,31,31,31,31,31,
- 0, 1, 2, 4, 5, 6, 8, 9,11,12,13,15,16,17,19,20,22,23,24,26,27,28,30,31,31,31,31,31,31,31,31,31,
- 0, 1, 2, 4, 5, 7, 8,10,11,12,14,15,17,18,20,21,23,24,25,27,28,30,31,31,31,31,31,31,31,31,31,31,
- 0, 1, 3, 4, 6, 7, 9,10,12,13,15,16,18,19,21,22,24,25,27,28,30,31,31,31,31,31,31,31,31,31,31,31,
- 0, 1, 3, 4, 6, 7, 9,10,12,14,15,17,18,20,21,23,25,26,28,29,31,31,31,31,31,31,31,31,31,31,31,31,
- 0, 1, 3, 4, 6, 8, 9,11,13,14,16,17,19,21,22,24,26,27,29,30,31,31,31,31,31,31,31,31,31,31,31,31,
- 0, 1, 3, 5, 6, 8,10,11,13,15,16,18,20,21,23,25,27,28,30,31,31,31,31,31,31,31,31,31,31,31,31,31,
- 0, 1, 3, 5, 7, 8,10,12,14,15,17,19,21,22,24,26,28,29,31,31,31,31,31,31,31,31,31,31,31,31,31,31,
- 0, 1, 3, 5, 7, 9,10,12,14,16,18,19,21,23,25,27,29,30,31,31,31,31,31,31,31,31,31,31,31,31,31,31,
- 0, 1, 3, 5, 7, 9,11,13,15,16,18,20,22,24,26,28,30,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,
- 0, 1, 3, 5, 7, 9,11,13,15,17,19,21,23,25,27,29,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31
- };
- uSrc = (_gpuLitT[((uSrc&0x7C00)>>5)|((lCol>>5)&0x1f)]<<10)|(_gpuLitT[(uSrc&0x03E0)|((lCol>>16)&0x1f)]<<5)|(_gpuLitT[((uSrc&0x001F)<<5)|(lCol>>27)]);
+ return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | b5] << 10) |
+ (gpu_unai.LightLUT[ (uSrc&0x03E0) | g5] << 5) |
+ (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | r5] );
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Apply fast (low-precision) 5-bit Gouraud lighting to bgr555 texture color:
+//
+// INPUT:
+// 'gCol' is a packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet, value of
+// 15.0 is midpoint that does not modify color of texture
+// gCol input : rrrrrXXXXXXgggggXXXXXXbbbbbXXXXX
+// ^ bit 31
+// 'uSrc' input: -bbbbbgggggrrrrr
+// ^ bit 16
+// RETURNS:
+// u16 output: 0bbbbbgggggrrrrr
+// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
+////////////////////////////////////////////////////////////////////////////////
+GPU_INLINE u16 gpuLightingTXTGouraud(u16 uSrc, u32 gCol)
+{
+ return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | ((gCol>> 5)&0x1F)]<<10) |
+ (gpu_unai.LightLUT[ (uSrc&0x03E0) | ((gCol>>16)&0x1F)]<< 5) |
+ (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | (gCol>>27) ] );
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Apply high-precision 8-bit lighting to bgr555 texture color,
+// returning a padded u32 5.4:5.4:5.4 bgr fixed-pt triplet
+// suitable for use with HQ 24-bit lighting/quantization.
+//
+// INPUT:
+// 'r8','g8','b8' are unsigned 8-bit color component values, value of
+// 127 is midpoint that doesn't modify that component of texture
+//
+// uSrc input: -bbbbbgggggrrrrr
+// ^ bit 16
+// RETURNS:
+// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
+// ^ bit 31
+// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
+////////////////////////////////////////////////////////////////////////////////
+GPU_INLINE u32 gpuLightingTXT24(u16 uSrc, u8 r8, u8 g8, u8 b8)
+{
+ u16 r1 = uSrc&0x001F;
+ u16 g1 = uSrc&0x03E0;
+ u16 b1 = uSrc&0x7C00;
+
+ u16 r2 = r8;
+ u16 g2 = g8;
+ u16 b2 = b8;
+
+ u32 r3 = r1 * r2; if (r3 & 0xFFFFF000) r3 = ~0xFFFFF000;
+ u32 g3 = g1 * g2; if (g3 & 0xFFFE0000) g3 = ~0xFFFE0000;
+ u32 b3 = b1 * b2; if (b3 & 0xFFC00000) b3 = ~0xFFC00000;
+
+ return ((r3>> 3) ) |
+ ((g3>> 8)<<10) |
+ ((b3>>13)<<20);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Apply high-precision 8-bit lighting to bgr555 texture color in 'uSrc',
+// returning a padded u32 5.4:5.4:5.4 bgr fixed-pt triplet
+// suitable for use with HQ 24-bit lighting/quantization.
+//
+// INPUT:
+// 'uSrc' input: -bbbbbgggggrrrrr
+// ^ bit 16
+// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
+// ^ bit 31
+// RETURNS:
+// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
+// ^ bit 31
+// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
+////////////////////////////////////////////////////////////////////////////////
+GPU_INLINE u32 gpuLightingTXT24Gouraud(u16 uSrc, u32 gCol)
+{
+ u16 r1 = uSrc&0x001F;
+ u16 g1 = uSrc&0x03E0;
+ u16 b1 = uSrc&0x7C00;
+
+ u16 r2 = (gCol>>24) & 0xFF;
+ u16 g2 = (gCol>>13) & 0xFF;
+ u16 b2 = (gCol>> 2) & 0xFF;
+
+ u32 r3 = r1 * r2; if (r3 & 0xFFFFF000) r3 = ~0xFFFFF000;
+ u32 g3 = g1 * g2; if (g3 & 0xFFFE0000) g3 = ~0xFFFE0000;
+ u32 b3 = b1 * b2; if (b3 & 0xFFC00000) b3 = ~0xFFC00000;
+
+ return ((r3>> 3) ) |
+ ((g3>> 8)<<10) |
+ ((b3>>13)<<20);
}
#endif //_OP_LIGHT_H_
diff --git a/plugins/gpu_unai/gpu_inner_quantization.h b/plugins/gpu_unai/gpu_inner_quantization.h
new file mode 100644
index 0000000..0e7e3e8
--- /dev/null
+++ b/plugins/gpu_unai/gpu_inner_quantization.h
@@ -0,0 +1,108 @@
+/***************************************************************************
+* Copyright (C) 2016 PCSX4ALL Team *
+* *
+* This program is free software; you can redistribute it and/or modify *
+* it under the terms of the GNU General Public License as published by *
+* the Free Software Foundation; either version 2 of the License, or *
+* (at your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+* GNU General Public License for more details. *
+* *
+* You should have received a copy of the GNU General Public License *
+* along with this program; if not, write to the *
+* Free Software Foundation, Inc., *
+* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
+***************************************************************************/
+
+#ifndef _OP_DITHER_H_
+#define _OP_DITHER_H_
+
+static void SetupDitheringConstants()
+{
+ // Initialize Dithering Constants
+ // The screen is divided into 8x8 chunks and sub-unitary noise is applied
+ // using the following matrix. This ensures that data lost in color
+ // quantization will be added back to the image 'by chance' in predictable
+ // patterns that are naturally 'smoothed' by your sight when viewed from a
+ // certain distance.
+ //
+ // http://caca.zoy.org/study/index.html
+ //
+ // Shading colors are encoded in 4.5, and then are quantitized to 5.0,
+ // DitherMatrix constants reflect that.
+
+ static const u8 DitherMatrix[] = {
+ 0, 32, 8, 40, 2, 34, 10, 42,
+ 48, 16, 56, 24, 50, 18, 58, 26,
+ 12, 44, 4, 36, 14, 46, 6, 38,
+ 60, 28, 52, 20, 62, 30, 54, 22,
+ 3, 35, 11, 43, 1, 33, 9, 41,
+ 51, 19, 59, 27, 49, 17, 57, 25,
+ 15, 47, 7, 39, 13, 45, 5, 37,
+ 63, 31, 55, 23, 61, 29, 53, 21
+ };
+
+ int i, j;
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ u16 offset = (i << 3) | j;
+
+ u32 component = ((DitherMatrix[offset] + 1) << 4) / 65; //[5.5] -> [5]
+
+ // XXX - senquack - hack Dec 2016
+ // Until JohnnyF gets the time to work further on dithering,
+ // force lower bit of component to 0. This fixes grid pattern
+ // affecting quality of dithered image, as well as loss of
+ // detail in dark areas. With lower bit unset like this, existing
+ // 27-bit accuracy of dithering math is unneeded, could be 24-bit.
+ // Is 8x8 matrix overkill as a result, can we use 4x4?
+ component &= ~1;
+
+ gpu_unai.DitherMatrix[offset] = (component)
+ | (component << 10)
+ | (component << 20);
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Convert padded u32 5.4:5.4:5.4 bgr fixed-pt triplet to final bgr555 color,
+// applying dithering if specified by template parameter.
+//
+// INPUT:
+// 'uSrc24' input: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
+// ^ bit 31
+// 'pDst' is a pointer to destination framebuffer pixel, used
+// to determine which DitherMatrix[] entry to apply.
+// RETURNS:
+// u16 output: 0bbbbbgggggrrrrr
+// ^ bit 16
+// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
+////////////////////////////////////////////////////////////////////////////////
+template <int DITHER>
+GPU_INLINE u16 gpuColorQuantization24(u32 uSrc24, const u16 *pDst)
+{
+ if (DITHER)
+ {
+ u16 fbpos = (u32)(pDst - gpu_unai.vram);
+ u16 offset = ((fbpos & (0x7 << 10)) >> 7) | (fbpos & 0x7);
+
+ //clean overflow flags and add
+ uSrc24 = (uSrc24 & 0x1FF7FDFF) + gpu_unai.DitherMatrix[offset];
+
+ if (uSrc24 & (1<< 9)) uSrc24 |= (0x1FF );
+ if (uSrc24 & (1<<19)) uSrc24 |= (0x1FF<<10);
+ if (uSrc24 & (1<<29)) uSrc24 |= (0x1FF<<20);
+ }
+
+ return ((uSrc24>> 4) & (0x1F ))
+ | ((uSrc24>> 9) & (0x1F<<5 ))
+ | ((uSrc24>>14) & (0x1F<<10));
+}
+
+#endif //_OP_DITHER_H_
diff --git a/plugins/gpu_unai/gpu_raster_image.h b/plugins/gpu_unai/gpu_raster_image.h
index 0c82aa9..87d2151 100644
--- a/plugins/gpu_unai/gpu_raster_image.h
+++ b/plugins/gpu_unai/gpu_raster_image.h
@@ -19,71 +19,79 @@
***************************************************************************/
///////////////////////////////////////////////////////////////////////////////
-INLINE void gpuLoadImage(void)
+#ifndef USE_GPULIB
+void gpuLoadImage(PtrUnion packet)
{
u16 x0, y0, w0, h0;
- x0 = PacketBuffer.U2[2] & 1023;
- y0 = PacketBuffer.U2[3] & 511;
- w0 = PacketBuffer.U2[4];
- h0 = PacketBuffer.U2[5];
+ x0 = packet.U2[2] & 1023;
+ y0 = packet.U2[3] & 511;
+ w0 = packet.U2[4];
+ h0 = packet.U2[5];
if ((y0 + h0) > FRAME_HEIGHT)
{
h0 = FRAME_HEIGHT - y0;
}
- FrameToWrite = ((w0)&&(h0));
+ gpu_unai.dma.FrameToWrite = ((w0)&&(h0));
- px = 0;
- py = 0;
- x_end = w0;
- y_end = h0;
- pvram = &((u16*)GPU_FrameBuffer)[x0+(y0*1024)];
+ gpu_unai.dma.px = 0;
+ gpu_unai.dma.py = 0;
+ gpu_unai.dma.x_end = w0;
+ gpu_unai.dma.y_end = h0;
+ gpu_unai.dma.pvram = &((u16*)gpu_unai.vram)[x0+(y0*1024)];
- GPU_GP1 |= 0x08000000;
+ gpu_unai.GPU_GP1 |= 0x08000000;
}
+#endif // !USE_GPULIB
///////////////////////////////////////////////////////////////////////////////
-INLINE void gpuStoreImage(void)
+#ifndef USE_GPULIB
+void gpuStoreImage(PtrUnion packet)
{
u16 x0, y0, w0, h0;
- x0 = PacketBuffer.U2[2] & 1023;
- y0 = PacketBuffer.U2[3] & 511;
- w0 = PacketBuffer.U2[4];
- h0 = PacketBuffer.U2[5];
+ x0 = packet.U2[2] & 1023;
+ y0 = packet.U2[3] & 511;
+ w0 = packet.U2[4];
+ h0 = packet.U2[5];
if ((y0 + h0) > FRAME_HEIGHT)
{
h0 = FRAME_HEIGHT - y0;
}
- FrameToRead = ((w0)&&(h0));
+ gpu_unai.dma.FrameToRead = ((w0)&&(h0));
- px = 0;
- py = 0;
- x_end = w0;
- y_end = h0;
- pvram = &((u16*)GPU_FrameBuffer)[x0+(y0*1024)];
+ gpu_unai.dma.px = 0;
+ gpu_unai.dma.py = 0;
+ gpu_unai.dma.x_end = w0;
+ gpu_unai.dma.y_end = h0;
+ gpu_unai.dma.pvram = &((u16*)gpu_unai.vram)[x0+(y0*1024)];
- GPU_GP1 |= 0x08000000;
+ gpu_unai.GPU_GP1 |= 0x08000000;
}
+#endif // !USE_GPULIB
-INLINE void gpuMoveImage(void)
+void gpuMoveImage(PtrUnion packet)
{
u32 x0, y0, x1, y1;
s32 w0, h0;
- x0 = PacketBuffer.U2[2] & 1023;
- y0 = PacketBuffer.U2[3] & 511;
- x1 = PacketBuffer.U2[4] & 1023;
- y1 = PacketBuffer.U2[5] & 511;
- w0 = PacketBuffer.U2[6];
- h0 = PacketBuffer.U2[7];
+ x0 = packet.U2[2] & 1023;
+ y0 = packet.U2[3] & 511;
+ x1 = packet.U2[4] & 1023;
+ y1 = packet.U2[5] & 511;
+ w0 = packet.U2[6];
+ h0 = packet.U2[7];
if( (x0==x1) && (y0==y1) ) return;
if ((w0<=0) || (h0<=0)) return;
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"gpuMoveImage(x0=%u,y0=%u,x1=%u,y1=%u,w0=%d,h0=%d)\n",x0,y0,x1,y1,w0,h0);
+ #endif
+
if (((y0+h0)>512)||((x0+w0)>1024)||((y1+h0)>512)||((x1+w0)>1024))
{
- u16 *psxVuw=GPU_FrameBuffer;
+ u16 *psxVuw=gpu_unai.vram;
s32 i,j;
for(j=0;j<h0;j++)
for(i=0;i<w0;i++)
@@ -93,7 +101,7 @@ INLINE void gpuMoveImage(void)
else if ((x0&1)||(x1&1))
{
u16 *lpDst, *lpSrc;
- lpDst = lpSrc = (u16*)GPU_FrameBuffer;
+ lpDst = lpSrc = (u16*)gpu_unai.vram;
lpSrc += FRAME_OFFSET(x0, y0);
lpDst += FRAME_OFFSET(x1, y1);
x1 = FRAME_WIDTH - w0;
@@ -107,7 +115,7 @@ INLINE void gpuMoveImage(void)
else
{
u32 *lpDst, *lpSrc;
- lpDst = lpSrc = (u32*)(void*)GPU_FrameBuffer;
+ lpDst = lpSrc = (u32*)(void*)gpu_unai.vram;
lpSrc += ((FRAME_OFFSET(x0, y0))>>1);
lpDst += ((FRAME_OFFSET(x1, y1))>>1);
if (w0&1)
@@ -143,13 +151,13 @@ INLINE void gpuMoveImage(void)
}
}
-INLINE void gpuClearImage(void)
+void gpuClearImage(PtrUnion packet)
{
s32 x0, y0, w0, h0;
- x0 = PacketBuffer.S2[2];
- y0 = PacketBuffer.S2[3];
- w0 = PacketBuffer.S2[4] & 0x3ff;
- h0 = PacketBuffer.S2[5] & 0x3ff;
+ x0 = packet.S2[2];
+ y0 = packet.S2[3];
+ w0 = packet.S2[4] & 0x3ff;
+ h0 = packet.S2[5] & 0x3ff;
w0 += x0;
if (x0 < 0) x0 = 0;
@@ -162,10 +170,14 @@ INLINE void gpuClearImage(void)
h0 -= y0;
if (h0 <= 0) return;
+ #ifdef ENABLE_GPU_LOG_SUPPORT
+ fprintf(stdout,"gpuClearImage(x0=%d,y0=%d,w0=%d,h0=%d)\n",x0,y0,w0,h0);
+ #endif
+
if (x0&1)
{
- u16* pixel = (u16*)GPU_FrameBuffer + FRAME_OFFSET(x0, y0);
- u16 rgb = GPU_RGB16(PacketBuffer.S4[0]);
+ u16* pixel = (u16*)gpu_unai.vram + FRAME_OFFSET(x0, y0);
+ u16 rgb = GPU_RGB16(packet.U4[0]);
y0 = FRAME_WIDTH - w0;
do {
x0=w0;
@@ -175,8 +187,8 @@ INLINE void gpuClearImage(void)
}
else
{
- u32* pixel = (u32*)(void*)GPU_FrameBuffer + ((FRAME_OFFSET(x0, y0))>>1);
- u32 rgb = GPU_RGB16(PacketBuffer.S4[0]);
+ u32* pixel = (u32*)gpu_unai.vram + ((FRAME_OFFSET(x0, y0))>>1);
+ u32 rgb = GPU_RGB16(packet.U4[0]);
rgb |= (rgb<<16);
if (w0&1)
{
diff --git a/plugins/gpu_unai/gpu_raster_line.h b/plugins/gpu_unai/gpu_raster_line.h
index fc59b79..28ea074 100644
--- a/plugins/gpu_unai/gpu_raster_line.h
+++ b/plugins/gpu_unai/gpu_raster_line.h
@@ -1,6 +1,7 @@
/***************************************************************************
* Copyright (C) 2010 PCSX4ALL Team *
* Copyright (C) 2010 Unai *
+* Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com) *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -18,240 +19,697 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
***************************************************************************/
-#define GPU_TESTRANGE(x) { if((u32)(x+1024) > 2047) return; }
-
///////////////////////////////////////////////////////////////////////////////
// GPU internal line drawing functions
+//
+// Rewritten October 2016 by senquack:
+// Instead of one pixel at a time, lines are now drawn in runs of pixels,
+// whether vertical, horizontal, or diagonal. A new inner driver
+// 'gpuPixelSpanFn' is used, as well as an enhanced Bresenham run-slice
+// algorithm. For more information, see the following:
+//
+// Michael Abrash - Graphics Programming Black Book
+// Chapters 35 - 36 (does not implement diagonal runs)
+// http://www.drdobbs.com/parallel/graphics-programming-black-book/184404919
+// http://www.jagregory.com/abrash-black-book/
+//
+// Article by Andrew Delong (does not implement diagonal runs)
+// http://timetraces.ca/nw/drawline.htm
+//
+// 'Run-Based Multi-Point Line Drawing' by Eun Jae Lee & Larry F. Hodges
+// https://smartech.gatech.edu/bitstream/handle/1853/3632/93-22.pdf
+// Provided the idea of doing a half-octant transform allowing lines with
+// slopes between 0.5 and 2.0 (diagonal runs of pixels) to be handled
+// identically to the traditional horizontal/vertical run-slice method.
-#define GPU_DIGITS 16
-#define GPU_DIGITSC (GPU_DIGITS+3)
+// Use 16.16 fixed point precision for line math.
+// NOTE: Gouraud colors used by gpuPixelSpanFn can use a different precision.
+#define GPU_LINE_FIXED_BITS 16
-INLINE s32 GPU_DIV(s32 rs, s32 rt)
-{
- return rt ? (rs / rt) : (0);
-}
+// If defined, Gouraud lines will use fixed-point multiply-by-inverse to
+// do most divisions. With enough accuracy, this should be OK.
+#define USE_LINES_ALL_FIXED_PT_MATH
-///////////////////////////////////////////////////////////////////////////////
-void gpuDrawLF(const PD gpuPixelDriver)
+//////////////////////
+// Flat-shaded line //
+//////////////////////
+void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver)
{
- s32 temp;
- s32 xmin, xmax;
- s32 ymin, ymax;
- s32 x0, x1, dx;
- s32 y0, y1, dy;
-
- x0 = PacketBuffer.S2[2] + DrawingOffset[0]; GPU_TESTRANGE(x0);
- y0 = PacketBuffer.S2[3] + DrawingOffset[1]; GPU_TESTRANGE(y0);
- x1 = PacketBuffer.S2[4] + DrawingOffset[0]; GPU_TESTRANGE(x1);
- y1 = PacketBuffer.S2[5] + DrawingOffset[1]; GPU_TESTRANGE(y1);
-
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
- const u16 pixeldata = GPU_RGB16(PacketBuffer.U4[0]);
-
- dy = (y1 - y0);
- if (dy < 0) dy = -dy;
- dx = (x1 - x0);
- if (dx < 0) dx = -dx;
- if (dx > dy) {
- if (x0 > x1) {
- GPU_SWAP(x0, x1, temp);
- GPU_SWAP(y0, y1, temp);
+ int x0, y0, x1, y1;
+ int dx, dy;
+
+ // All three of these variables should be signed (so multiplication works)
+ ptrdiff_t sx; // Sign of x delta, positive when x0 < x1
+ const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel
+ const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line
+
+ // Clip region: xmax/ymax seem to normally be one *past* the rightmost/
+ // bottommost pixels of the draw area. Since we render every pixel between
+ // and including both line endpoints, subtract one from xmax/ymax.
+ const int xmin = gpu_unai.DrawingArea[0];
+ const int ymin = gpu_unai.DrawingArea[1];
+ const int xmax = gpu_unai.DrawingArea[2] - 1;
+ const int ymax = gpu_unai.DrawingArea[3] - 1;
+
+ x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0];
+ y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1];
+ x1 = GPU_EXPANDSIGN(packet.S2[4]) + gpu_unai.DrawingOffset[0];
+ y1 = GPU_EXPANDSIGN(packet.S2[5]) + gpu_unai.DrawingOffset[1];
+
+ // Always draw top to bottom, so ensure y0 <= y1
+ if (y0 > y1) {
+ SwapValues(y0, y1);
+ SwapValues(x0, x1);
+ }
+
+ // Is line totally outside Y clipping range?
+ if (y0 > ymax || y1 < ymin) return;
+
+ dx = x1 - x0;
+ dy = y1 - y0;
+
+ // X-axis range check : max distance between any two X coords is 1023
+ // (PSX hardware will not render anything violating this rule)
+ // NOTE: We'll check y coord range further below
+ if (dx >= CHKMAX_X || dx <= -CHKMAX_X)
+ return;
+
+ // Y-axis range check and clipping
+ if (dy) {
+ // Y-axis range check : max distance between any two Y coords is 511
+ // (PSX hardware will not render anything violating this rule)
+ if (dy >= CHKMAX_Y)
+ return;
+
+ // We already know y0 < y1
+ if (y0 < ymin) {
+ x0 += GPU_FAST_DIV(((ymin - y0) * dx), dy);
+ y0 = ymin;
}
- y1 = GPU_DIV((y1 - y0) << GPU_DIGITS, dx);
- y0 <<= GPU_DIGITS;
- temp = xmin - x0;
- if (temp > 0) {
- x0 = xmin;
- y0 += (y1 * temp);
+ if (y1 > ymax) {
+ x1 += GPU_FAST_DIV(((ymax - y1) * dx), dy);
+ y1 = ymax;
}
- if (x1 > xmax) x1 = xmax;
- x1 -= x0;
- if (x1 < 0) x1 = 0;
-
- const int li=linesInterlace;
- for (; x1; x1--) {
- temp = y0 >> GPU_DIGITS;
- if( 0 == (temp&li) ) {
- if ((u32) (temp - ymin) < (u32) (ymax - ymin)) {
- gpuPixelDriver(&((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0, temp)],pixeldata);
- }
+
+ // Recompute in case clipping occurred:
+ dx = x1 - x0;
+ dy = y1 - y0;
+ }
+
+ // Check X clipping range, set 'sx' x-direction variable
+ if (dx == 0) {
+ // Is vertical line totally outside X clipping range?
+ if (x0 < xmin || x0 > xmax)
+ return;
+ sx = 0;
+ } else {
+ if (dx > 0) {
+ // x0 is leftmost coordinate
+ if (x0 > xmax) return; // Both points outside X clip range
+
+ if (x0 < xmin) {
+ if (x1 < xmin) return; // Both points outside X clip range
+ y0 += GPU_FAST_DIV(((xmin - x0) * dy), dx);
+ x0 = xmin;
+ }
+
+ if (x1 > xmax) {
+ y1 += GPU_FAST_DIV(((xmax - x1) * dy), dx);
+ x1 = xmax;
+ }
+
+ sx = +1;
+ dx = x1 - x0; // Get final value, which should also be absolute value
+ } else {
+ // x1 is leftmost coordinate
+ if (x1 > xmax) return; // Both points outside X clip range
+
+ if (x1 < xmin) {
+ if (x0 < xmin) return; // Both points outside X clip range
+
+ y1 += GPU_FAST_DIV(((xmin - x1) * dy), dx);
+ x1 = xmin;
}
- x0++;
- y0 += y1;
+
+ if (x0 > xmax) {
+ y0 += GPU_FAST_DIV(((xmax - x0) * dy), dx);
+ x0 = xmax;
+ }
+
+ sx = -1;
+ dx = x0 - x1; // Get final value, which should also be absolute value
+ }
+
+ // Recompute in case clipping occurred:
+ dy = y1 - y0;
+ }
+
+ // IMPORTANT: dx,dy should now contain their absolute values
+
+ int min_length, // Minimum length of a pixel run
+ start_length, // Length of first run
+ end_length, // Length of last run
+ err_term, // Cumulative error to determine when to draw longer run
+ err_adjup, // Increment to err_term for each run drawn
+ err_adjdown; // Subract this from err_term after drawing longer run
+
+ // Color to draw with (16 bits, highest of which is unset mask bit)
+ uintptr_t col16 = GPU_RGB16(packet.U4[0]);
+
+ // We use u8 pointers even though PS1 has u16 framebuffer.
+ // This allows pixel-drawing functions to increment dst pointer
+ // directly by the passed 'incr' value, not having to shift it first.
+ u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth;
+
+ // SPECIAL CASE: Vertical line
+ if (dx == 0) {
+ gpuPixelSpanDriver(dst, col16, dst_stride, dy+1);
+ return;
+ }
+
+ // SPECIAL CASE: Horizontal line
+ if (dy == 0) {
+ gpuPixelSpanDriver(dst, col16, sx * dst_depth, dx+1);
+ return;
+ }
+
+ // SPECIAL CASE: Diagonal line
+ if (dx == dy) {
+ gpuPixelSpanDriver(dst, col16, dst_stride + (sx * dst_depth), dy+1);
+ return;
+ }
+
+ int major, minor; // Major axis, minor axis
+ ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis
+
+ if (dx > dy) {
+ major = dx;
+ minor = dy;
+ } else {
+ major = dy;
+ minor = dx;
+ }
+
+ // Determine if diagonal or horizontal runs
+ if (major < (2 * minor)) {
+ // Diagonal runs, so perform half-octant transformation
+ minor = major - minor;
+
+ // Advance diagonally when drawing runs
+ incr_major = dst_stride + (sx * dst_depth);
+
+ // After drawing each run, correct for over-advance along minor axis
+ if (dx > dy)
+ incr_minor = -dst_stride;
+ else
+ incr_minor = -sx * dst_depth;
+ } else {
+ // Horizontal or vertical runs
+ if (dx > dy) {
+ incr_major = sx * dst_depth;
+ incr_minor = dst_stride;
+ } else {
+ incr_major = dst_stride;
+ incr_minor = sx * dst_depth;
}
- } else if (dy) {
- if (y0 > y1) {
- GPU_SWAP(x0, x1, temp);
- GPU_SWAP(y0, y1, temp);
+ }
+
+ if (minor > 1) {
+ // Minimum number of pixels each run
+ min_length = major / minor;
+
+ // Initial error term; reflects an initial step of 0.5 along minor axis
+ err_term = (major % minor) - (minor * 2);
+
+ // Increment err_term this much each step along minor axis; when
+ // err_term crosses zero, draw longer pixel run.
+ err_adjup = (major % minor) * 2;
+ } else {
+ min_length = major;
+ err_term = 0;
+ err_adjup = 0;
+ }
+
+ // Error term adjustment when err_term turns over; used to factor
+ // out the major-axis step made at that time
+ err_adjdown = minor * 2;
+
+ // The initial and last runs are partial, because minor axis advances
+ // only 0.5 for these runs, rather than 1. Each is half a full run,
+ // plus the initial pixel.
+ start_length = end_length = (min_length / 2) + 1;
+
+ if (min_length & 1) {
+ // If there're an odd number of pixels per run, we have 1 pixel that
+ // can't be allocated to either the initial or last partial run, so
+ // we'll add 0.5 to err_term so that this pixel will be handled
+ // by the normal full-run loop
+ err_term += minor;
+ } else {
+ // If the minimum run length is even and there's no fractional advance,
+ // we have one pixel that could go to either the initial or last
+ // partial run, which we arbitrarily allocate to the last run
+ if (err_adjup == 0)
+ start_length--; // Leave out the extra pixel at the start
+ }
+
+ // First run of pixels
+ dst = gpuPixelSpanDriver(dst, col16, incr_major, start_length);
+ dst += incr_minor;
+
+ // Middle runs of pixels
+ while (--minor > 0) {
+ int run_length = min_length;
+ err_term += err_adjup;
+
+ // If err_term passed 0, reset it and draw longer run
+ if (err_term > 0) {
+ err_term -= err_adjdown;
+ run_length++;
}
- x1 = GPU_DIV((x1 - x0) << GPU_DIGITS, dy);
- x0 <<= GPU_DIGITS;
- temp = ymin - y0;
- if (temp > 0) {
+
+ dst = gpuPixelSpanDriver(dst, col16, incr_major, run_length);
+ dst += incr_minor;
+ }
+
+ // Final run of pixels
+ gpuPixelSpanDriver(dst, col16, incr_major, end_length);
+}
+
+/////////////////////////
+// Gouraud-shaded line //
+/////////////////////////
+void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver)
+{
+ int x0, y0, x1, y1;
+ int dx, dy, dr, dg, db;
+ u32 r0, g0, b0, r1, g1, b1;
+
+ // All three of these variables should be signed (so multiplication works)
+ ptrdiff_t sx; // Sign of x delta, positive when x0 < x1
+ const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel
+ const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line
+
+ // Clip region: xmax/ymax seem to normally be one *past* the rightmost/
+ // bottommost pixels of the draw area. We'll render every pixel between
+ // and including both line endpoints, so subtract one from xmax/ymax.
+ const int xmin = gpu_unai.DrawingArea[0];
+ const int ymin = gpu_unai.DrawingArea[1];
+ const int xmax = gpu_unai.DrawingArea[2] - 1;
+ const int ymax = gpu_unai.DrawingArea[3] - 1;
+
+ x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0];
+ y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1];
+ x1 = GPU_EXPANDSIGN(packet.S2[6]) + gpu_unai.DrawingOffset[0];
+ y1 = GPU_EXPANDSIGN(packet.S2[7]) + gpu_unai.DrawingOffset[1];
+
+ u32 col0 = packet.U4[0];
+ u32 col1 = packet.U4[2];
+
+ // Always draw top to bottom, so ensure y0 <= y1
+ if (y0 > y1) {
+ SwapValues(y0, y1);
+ SwapValues(x0, x1);
+ SwapValues(col0, col1);
+ }
+
+ // Is line totally outside Y clipping range?
+ if (y0 > ymax || y1 < ymin) return;
+
+ // If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16
+ // (This is only beneficial if using SIMD-optimized pixel driver)
+#ifdef GPU_GOURAUD_LOW_PRECISION
+ r0 = (col0 >> 3) & 0x1f; g0 = (col0 >> 11) & 0x1f; b0 = (col0 >> 19) & 0x1f;
+ r1 = (col1 >> 3) & 0x1f; g1 = (col1 >> 11) & 0x1f; b1 = (col1 >> 19) & 0x1f;
+#else
+ r0 = col0 & 0xff; g0 = (col0 >> 8) & 0xff; b0 = (col0 >> 16) & 0xff;
+ r1 = col1 & 0xff; g1 = (col1 >> 8) & 0xff; b1 = (col1 >> 16) & 0xff;
+#endif
+
+ dx = x1 - x0;
+ dy = y1 - y0;
+ dr = r1 - r0;
+ dg = g1 - g0;
+ db = b1 - b0;
+
+ // X-axis range check : max distance between any two X coords is 1023
+ // (PSX hardware will not render anything violating this rule)
+ // NOTE: We'll check y coord range further below
+ if (dx >= CHKMAX_X || dx <= -CHKMAX_X)
+ return;
+
+ // Y-axis range check and clipping
+ if (dy) {
+ // Y-axis range check : max distance between any two Y coords is 511
+ // (PSX hardware will not render anything violating this rule)
+ if (dy >= CHKMAX_Y)
+ return;
+
+ // We already know y0 < y1
+ if (y0 < ymin) {
+#ifdef USE_LINES_ALL_FIXED_PT_MATH
+ s32 factor = GPU_FAST_DIV(((ymin - y0) << GPU_LINE_FIXED_BITS), dy);
+ x0 += (dx * factor) >> GPU_LINE_FIXED_BITS;
+ r0 += (dr * factor) >> GPU_LINE_FIXED_BITS;
+ g0 += (dg * factor) >> GPU_LINE_FIXED_BITS;
+ b0 += (db * factor) >> GPU_LINE_FIXED_BITS;
+#else
+ x0 += (ymin - y0) * dx / dy;
+ r0 += (ymin - y0) * dr / dy;
+ g0 += (ymin - y0) * dg / dy;
+ b0 += (ymin - y0) * db / dy;
+#endif
y0 = ymin;
- x0 += (x1 * temp);
}
- if (y1 > ymax) y1 = ymax;
- y1 -= y0;
- if (y1 < 0) y1 = 0;
-
- const int li=linesInterlace;
- for (; y1; y1--) {
- if( 0 == (y0&li) ) {
- temp = x0 >> GPU_DIGITS;
- if ((u32) (temp - xmin) < (u32) (xmax - xmin)) {
- gpuPixelDriver(&((u16*)GPU_FrameBuffer)[FRAME_OFFSET(temp, y0)],pixeldata);
- }
- }
- y0++;
- x0 += x1;
+
+ if (y1 > ymax) {
+#ifdef USE_LINES_ALL_FIXED_PT_MATH
+ s32 factor = GPU_FAST_DIV(((ymax - y1) << GPU_LINE_FIXED_BITS), dy);
+ x1 += (dx * factor) >> GPU_LINE_FIXED_BITS;
+ r1 += (dr * factor) >> GPU_LINE_FIXED_BITS;
+ g1 += (dg * factor) >> GPU_LINE_FIXED_BITS;
+ b1 += (db * factor) >> GPU_LINE_FIXED_BITS;
+#else
+ x1 += (ymax - y1) * dx / dy;
+ r1 += (ymax - y1) * dr / dy;
+ g1 += (ymax - y1) * dg / dy;
+ b1 += (ymax - y1) * db / dy;
+#endif
+ y1 = ymax;
}
-
+
+ // Recompute in case clipping occurred:
+ dx = x1 - x0;
+ dy = y1 - y0;
+ dr = r1 - r0;
+ dg = g1 - g0;
+ db = b1 - b0;
+ }
+
+ // Check X clipping range, set 'sx' x-direction variable
+ if (dx == 0) {
+ // Is vertical line totally outside X clipping range?
+ if (x0 < xmin || x0 > xmax)
+ return;
+ sx = 0;
} else {
- if( 0 == (y0&linesInterlace) ) {
- if ((u32) (x0 - xmin) < (u32) (xmax - xmin)) {
- if ((u32) (y0 - ymin) < (u32) (ymax - ymin)) {
- gpuPixelDriver(&((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0, y0)],pixeldata);
- }
+ if (dx > 0) {
+ // x0 is leftmost coordinate
+ if (x0 > xmax) return; // Both points outside X clip range
+
+ if (x0 < xmin) {
+ if (x1 < xmin) return; // Both points outside X clip range
+
+#ifdef USE_LINES_ALL_FIXED_PT_MATH
+ s32 factor = GPU_FAST_DIV(((xmin - x0) << GPU_LINE_FIXED_BITS), dx);
+ y0 += (dy * factor) >> GPU_LINE_FIXED_BITS;
+ r0 += (dr * factor) >> GPU_LINE_FIXED_BITS;
+ g0 += (dg * factor) >> GPU_LINE_FIXED_BITS;
+ b0 += (db * factor) >> GPU_LINE_FIXED_BITS;
+#else
+ y0 += (xmin - x0) * dy / dx;
+ r0 += (xmin - x0) * dr / dx;
+ g0 += (xmin - x0) * dg / dx;
+ b0 += (xmin - x0) * db / dx;
+#endif
+ x0 = xmin;
}
+
+ if (x1 > xmax) {
+#ifdef USE_LINES_ALL_FIXED_PT_MATH
+ s32 factor = GPU_FAST_DIV(((xmax - x1) << GPU_LINE_FIXED_BITS), dx);
+ y1 += (dy * factor) >> GPU_LINE_FIXED_BITS;
+ r1 += (dr * factor) >> GPU_LINE_FIXED_BITS;
+ g1 += (dg * factor) >> GPU_LINE_FIXED_BITS;
+ b1 += (db * factor) >> GPU_LINE_FIXED_BITS;
+#else
+ y1 += (xmax - x1) * dy / dx;
+ r1 += (xmax - x1) * dr / dx;
+ g1 += (xmax - x1) * dg / dx;
+ b1 += (xmax - x1) * db / dx;
+#endif
+ x1 = xmax;
+ }
+
+ sx = +1;
+ dx = x1 - x0; // Get final value, which should also be absolute value
+ } else {
+ // x1 is leftmost coordinate
+ if (x1 > xmax) return; // Both points outside X clip range
+
+ if (x1 < xmin) {
+ if (x0 < xmin) return; // Both points outside X clip range
+
+#ifdef USE_LINES_ALL_FIXED_PT_MATH
+ s32 factor = GPU_FAST_DIV(((xmin - x1) << GPU_LINE_FIXED_BITS), dx);
+ y1 += (dy * factor) >> GPU_LINE_FIXED_BITS;
+ r1 += (dr * factor) >> GPU_LINE_FIXED_BITS;
+ g1 += (dg * factor) >> GPU_LINE_FIXED_BITS;
+ b1 += (db * factor) >> GPU_LINE_FIXED_BITS;
+#else
+ y1 += (xmin - x1) * dy / dx;
+ r1 += (xmin - x1) * dr / dx;
+ g1 += (xmin - x1) * dg / dx;
+ b1 += (xmin - x1) * db / dx;
+#endif
+ x1 = xmin;
+ }
+
+ if (x0 > xmax) {
+#ifdef USE_LINES_ALL_FIXED_PT_MATH
+ s32 factor = GPU_FAST_DIV(((xmax - x0) << GPU_LINE_FIXED_BITS), dx);
+ y0 += (dy * factor) >> GPU_LINE_FIXED_BITS;
+ r0 += (dr * factor) >> GPU_LINE_FIXED_BITS;
+ g0 += (dg * factor) >> GPU_LINE_FIXED_BITS;
+ b0 += (db * factor) >> GPU_LINE_FIXED_BITS;
+#else
+ y0 += (xmax - x0) * dy / dx;
+ r0 += (xmax - x0) * dr / dx;
+ g0 += (xmax - x0) * dg / dx;
+ b0 += (xmax - x0) * db / dx;
+#endif
+ x0 = xmax;
+ }
+
+ sx = -1;
+ dx = x0 - x1; // Get final value, which should also be absolute value
}
+
+ // Recompute in case clipping occurred:
+ dy = y1 - y0;
+ dr = r1 - r0;
+ dg = g1 - g0;
+ db = b1 - b0;
}
-}
-/*----------------------------------------------------------------------
-GF
-----------------------------------------------------------------------*/
+ // IMPORTANT: dx,dy should now contain their absolute values
-///////////////////////////////////////////////////////////////////////////////
-void gpuDrawLG(const PD gpuPixelDriver)
-{
- s32 temp;
- s32 xmin, xmax;
- s32 ymin, ymax;
- s32 x0, x1, dx;
- s32 y0, y1, dy;
- s32 r0, r1;
- s32 g0, g1;
- s32 b0, b1;
-
- x0 = PacketBuffer.S2[2] + DrawingOffset[0]; GPU_TESTRANGE(x0);
- y0 = PacketBuffer.S2[3] + DrawingOffset[1]; GPU_TESTRANGE(y0);
- x1 = PacketBuffer.S2[6] + DrawingOffset[0]; GPU_TESTRANGE(x1);
- y1 = PacketBuffer.S2[7] + DrawingOffset[1]; GPU_TESTRANGE(y1);
-
- r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2];
- r1 = PacketBuffer.U1[8]; g1 = PacketBuffer.U1[9]; b1 = PacketBuffer.U1[10];
-
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
-
- dy = (y1 - y0);
- if (dy < 0)
- dy = -dy;
- dx = (x1 - x0);
- if (dx < 0)
- dx = -dx;
- if (dx > dy) {
- if (x0 > x1) {
- GPU_SWAP(x0, x1, temp);
- GPU_SWAP(y0, y1, temp);
- GPU_SWAP(r0, r1, temp);
- GPU_SWAP(g0, g1, temp);
- GPU_SWAP(b0, b1, temp);
- }
- y1 = GPU_DIV((y1 - y0) << GPU_DIGITS, dx);
- r1 = GPU_DIV((r1 - r0) << GPU_DIGITS, dx);
- g1 = GPU_DIV((g1 - g0) << GPU_DIGITS, dx);
- b1 = GPU_DIV((b1 - b0) << GPU_DIGITS, dx);
- y0 <<= GPU_DIGITS;
- r0 <<= GPU_DIGITS;
- g0 <<= GPU_DIGITS;
- b0 <<= GPU_DIGITS;
- temp = xmin - x0;
- if (temp > 0) {
- x0 = xmin;
- y0 += (y1 * temp);
- r0 += (r1 * temp);
- g0 += (g1 * temp);
- b0 += (b1 * temp);
+ int min_length, // Minimum length of a pixel run
+ start_length, // Length of first run
+ end_length, // Length of last run
+ err_term, // Cumulative error to determine when to draw longer run
+ err_adjup, // Increment to err_term for each run drawn
+ err_adjdown; // Subract this from err_term after drawing longer run
+
+ GouraudColor gcol;
+ gcol.r = r0 << GPU_GOURAUD_FIXED_BITS;
+ gcol.g = g0 << GPU_GOURAUD_FIXED_BITS;
+ gcol.b = b0 << GPU_GOURAUD_FIXED_BITS;
+
+ // We use u8 pointers even though PS1 has u16 framebuffer.
+ // This allows pixel-drawing functions to increment dst pointer
+ // directly by the passed 'incr' value, not having to shift it first.
+ u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth;
+
+ // SPECIAL CASE: Vertical line
+ if (dx == 0) {
+#ifdef USE_LINES_ALL_FIXED_PT_MATH
+ // Get dy fixed-point inverse
+ s32 inv_factor = 1 << GPU_GOURAUD_FIXED_BITS;
+ if (dy > 1) inv_factor = GPU_FAST_DIV(inv_factor, dy);
+
+ // Simultaneously divide and convert integer to Gouraud fixed point:
+ gcol.r_incr = dr * inv_factor;
+ gcol.g_incr = dg * inv_factor;
+ gcol.b_incr = db * inv_factor;
+#else
+ // First, convert to Gouraud fixed point
+ gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS;
+ gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS;
+ gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS;
+
+ if (dy > 1) {
+ if (dr) gcol.r_incr /= dy;
+ if (dg) gcol.g_incr /= dy;
+ if (db) gcol.b_incr /= dy;
}
- if (x1 > xmax) x1 = xmax;
- x1 -= x0;
- if (x1 < 0) x1 = 0;
+#endif
- const int li=linesInterlace;
- for (; x1; x1--) {
- temp = y0 >> GPU_DIGITS;
- if( 0 == (temp&li) ) {
- if ((u32) (temp - ymin) < (u32) (ymax - ymin)) {
- gpuPixelDriver (
- &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0, temp)],
- (((b0>>GPU_DIGITSC)&0x1F)<<10) | (((g0>>GPU_DIGITSC)&0x1F)<< 5) | ((r0>>GPU_DIGITSC)&0x1F)
- );
- }
- }
- x0++;
- y0 += y1;
- r0 += r1;
- g0 += g1;
- b0 += b1;
- }
- } else if (dy) {
- if (y0 > y1) {
- GPU_SWAP(x0, x1, temp);
- GPU_SWAP(y0, y1, temp);
- GPU_SWAP(r0, r1, temp);
- GPU_SWAP(g0, g1, temp);
- GPU_SWAP(b0, b1, temp);
+ gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride, dy+1);
+ return;
+ }
+
+ // SPECIAL CASE: Horizontal line
+ if (dy == 0) {
+#ifdef USE_LINES_ALL_FIXED_PT_MATH
+ // Get dx fixed-point inverse
+ s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS);
+ if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx);
+
+ // Simultaneously divide and convert integer to Gouraud fixed point:
+ gcol.r_incr = dr * inv_factor;
+ gcol.g_incr = dg * inv_factor;
+ gcol.b_incr = db * inv_factor;
+#else
+ gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS;
+ gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS;
+ gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS;
+
+ if (dx > 1) {
+ if (dr) gcol.r_incr /= dx;
+ if (dg) gcol.g_incr /= dx;
+ if (db) gcol.b_incr /= dx;
}
- x1 = GPU_DIV((x1 - x0) << GPU_DIGITS, dy);
- r1 = GPU_DIV((r1 - r0) << GPU_DIGITS, dy);
- g1 = GPU_DIV((g1 - g0) << GPU_DIGITS, dy);
- b1 = GPU_DIV((b1 - b0) << GPU_DIGITS, dy);
- x0 <<= GPU_DIGITS;
- r0 <<= GPU_DIGITS;
- g0 <<= GPU_DIGITS;
- b0 <<= GPU_DIGITS;
- temp = ymin - y0;
- if (temp > 0) {
- y0 = ymin;
- x0 += (x1 * temp);
- r0 += (r1 * temp);
- g0 += (g1 * temp);
- b0 += (b1 * temp);
+#endif
+
+ gpuPixelSpanDriver(dst, (uintptr_t)&gcol, sx * dst_depth, dx+1);
+ return;
+ }
+
+ // SPECIAL CASE: Diagonal line
+ if (dx == dy) {
+#ifdef USE_LINES_ALL_FIXED_PT_MATH
+ // Get dx fixed-point inverse
+ s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS);
+ if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx);
+
+ // Simultaneously divide and convert integer to Gouraud fixed point:
+ gcol.r_incr = dr * inv_factor;
+ gcol.g_incr = dg * inv_factor;
+ gcol.b_incr = db * inv_factor;
+#else
+ // First, convert to Gouraud fixed point
+ gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS;
+ gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS;
+ gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS;
+
+ if (dx > 1) {
+ if (dr) gcol.r_incr /= dx;
+ if (dg) gcol.g_incr /= dx;
+ if (db) gcol.b_incr /= dx;
}
- if (y1 > ymax) y1 = ymax;
- y1 -= y0;
- if (y1 < 0) y1 = 0;
-
- const int li=linesInterlace;
- for (; y1; y1--) {
- if( 0 == (y0&li) ) {
- temp = x0 >> GPU_DIGITS;
- if ((u32) (temp - xmin) < (u32) (xmax - xmin)) {
- gpuPixelDriver (
- &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(temp, y0)],
- (((b0>>GPU_DIGITSC)&0x1F)<<10) | (((g0>>GPU_DIGITSC)&0x1F)<< 5) | ((r0>>GPU_DIGITSC)&0x1F)
- );
- }
- }
- y0++;
- x0 += x1;
- r0 += r1;
- g0 += g1;
- b0 += b1;
+#endif
+
+ gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride + (sx * dst_depth), dy+1);
+ return;
+ }
+
+ int major, minor; // Absolute val of major,minor axis delta
+ ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis
+
+ if (dx > dy) {
+ major = dx;
+ minor = dy;
+ } else {
+ major = dy;
+ minor = dx;
+ }
+
+ // Determine if diagonal or horizontal runs
+ if (major < (2 * minor)) {
+ // Diagonal runs, so perform half-octant transformation
+ minor = major - minor;
+
+ // Advance diagonally when drawing runs
+ incr_major = dst_stride + (sx * dst_depth);
+
+ // After drawing each run, correct for over-advance along minor axis
+ if (dx > dy)
+ incr_minor = -dst_stride;
+ else
+ incr_minor = -sx * dst_depth;
+ } else {
+ // Horizontal or vertical runs
+ if (dx > dy) {
+ incr_major = sx * dst_depth;
+ incr_minor = dst_stride;
+ } else {
+ incr_major = dst_stride;
+ incr_minor = sx * dst_depth;
}
+ }
+
+#ifdef USE_LINES_ALL_FIXED_PT_MATH
+ s32 major_inv = GPU_FAST_DIV((1 << GPU_GOURAUD_FIXED_BITS), major);
+
+ // Simultaneously divide and convert from integer to Gouraud fixed point:
+ gcol.r_incr = dr * major_inv;
+ gcol.g_incr = dg * major_inv;
+ gcol.b_incr = db * major_inv;
+#else
+ gcol.r_incr = dr ? ((dr << GPU_GOURAUD_FIXED_BITS) / major) : 0;
+ gcol.g_incr = dg ? ((dg << GPU_GOURAUD_FIXED_BITS) / major) : 0;
+ gcol.b_incr = db ? ((db << GPU_GOURAUD_FIXED_BITS) / major) : 0;
+#endif
+
+ if (minor > 1) {
+ // Minimum number of pixels each run
+ min_length = major / minor;
+
+ // Initial error term; reflects an initial step of 0.5 along minor axis
+ err_term = (major % minor) - (minor * 2);
+
+ // Increment err_term this much each step along minor axis; when
+ // err_term crosses zero, draw longer pixel run.
+ err_adjup = (major % minor) * 2;
} else {
- if( 0 == (y0&linesInterlace) ) {
- if ((u32) (x0 - xmin) < (u32) (xmax - xmin)) {
- if ((u32) (y0 - ymin) < (u32) (ymax - ymin)) {
- gpuPixelDriver (
- &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0, y0)],
- (((b0>>GPU_DIGITSC)&0x1F)<<10) | (((g0>>GPU_DIGITSC)&0x1F)<< 5) | ((r0>>GPU_DIGITSC)&0x1F)
- );
- }
- }
+ min_length = major;
+ err_term = 0;
+ err_adjup = 0;
+ }
+
+ // Error term adjustment when err_term turns over; used to factor
+ // out the major-axis step made at that time
+ err_adjdown = minor * 2;
+
+ // The initial and last runs are partial, because minor axis advances
+ // only 0.5 for these runs, rather than 1. Each is half a full run,
+ // plus the initial pixel.
+ start_length = end_length = (min_length / 2) + 1;
+
+ if (min_length & 1) {
+ // If there're an odd number of pixels per run, we have 1 pixel that
+ // can't be allocated to either the initial or last partial run, so
+ // we'll add 0.5 to err_term so that this pixel will be handled
+ // by the normal full-run loop
+ err_term += minor;
+ } else {
+ // If the minimum run length is even and there's no fractional advance,
+ // we have one pixel that could go to either the initial or last
+ // partial run, which we'll arbitrarily allocate to the last run
+ if (err_adjup == 0)
+ start_length--; // Leave out the extra pixel at the start
+ }
+
+ // First run of pixels
+ dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, start_length);
+ dst += incr_minor;
+
+ // Middle runs of pixels
+ while (--minor > 0) {
+ int run_length = min_length;
+ err_term += err_adjup;
+
+ // If err_term passed 0, reset it and draw longer run
+ if (err_term > 0) {
+ err_term -= err_adjdown;
+ run_length++;
}
+
+ dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, run_length);
+ dst += incr_minor;
}
+
+ // Final run of pixels
+ gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, end_length);
}
diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h
index c4b0350..f66a9e2 100644
--- a/plugins/gpu_unai/gpu_raster_polygon.h
+++ b/plugins/gpu_unai/gpu_raster_polygon.h
@@ -18,732 +18,1431 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
***************************************************************************/
-#define GPU_TESTRANGE3() \
-{ \
- if(x0<0) { if((x1-x0)>CHKMAX_X) return; if((x2-x0)>CHKMAX_X) return; } \
- if(x1<0) { if((x0-x1)>CHKMAX_X) return; if((x2-x1)>CHKMAX_X) return; } \
- if(x2<0) { if((x0-x2)>CHKMAX_X) return; if((x1-x2)>CHKMAX_X) return; } \
- if(y0<0) { if((y1-y0)>CHKMAX_Y) return; if((y2-y0)>CHKMAX_Y) return; } \
- if(y1<0) { if((y0-y1)>CHKMAX_Y) return; if((y2-y1)>CHKMAX_Y) return; } \
- if(y2<0) { if((y0-y2)>CHKMAX_Y) return; if((y1-y2)>CHKMAX_Y) return; } \
-}
+//senquack - NOTE: GPU Unai poly routines have been rewritten/adapted
+// from DrHell routines to fix multiple issues. See README_senquack.txt
///////////////////////////////////////////////////////////////////////////////
-// GPU internal polygon drawing functions
+// Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type.
+///////////////////////////////////////////////////////////////////////////////
+struct PolyVertex {
+ s32 x, y; // Sign-extended 11-bit X,Y coords
+ union {
+ struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
+ u32 tex_word;
+ };
+ union {
+ struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
+ u32 col_word;
+ };
+};
+
+enum PolyAttribute {
+ POLYATTR_TEXTURE = (1 << 0),
+ POLYATTR_GOURAUD = (1 << 1)
+};
+
+enum PolyType {
+ POLYTYPE_F = 0,
+ POLYTYPE_FT = (POLYATTR_TEXTURE),
+ POLYTYPE_G = (POLYATTR_GOURAUD),
+ POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD)
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// polyInitVertexBuffer()
+// Fills vbuf[] array with data from any type of poly draw-command packet.
///////////////////////////////////////////////////////////////////////////////
-void gpuDrawF3(const PP gpuPolySpanDriver)
+static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad)
{
- const int li=linesInterlace;
- s32 temp;
- s32 xa, xb, xmin, xmax;
- s32 ya, yb, ymin, ymax;
- s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
- s32 y0, y1, y2;
+ bool texturing = ptype & POLYATTR_TEXTURE;
+ bool gouraud = ptype & POLYATTR_GOURAUD;
+
+ int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words
+ if (texturing)
+ vert_stride++;
+ if (gouraud)
+ vert_stride++;
+
+ int num_verts = (is_quad) ? 4 : 3;
+ u32 *ptr;
+
+ // X,Y coords, adjusted by draw offsets
+ s32 x_off = gpu_unai.DrawingOffset[0];
+ s32 y_off = gpu_unai.DrawingOffset[1];
+ ptr = &packet.U4[1];
+ for (int i=0; i < num_verts; ++i, ptr += vert_stride) {
+ s16* coord_ptr = (s16*)ptr;
+ vbuf[i].x = GPU_EXPANDSIGN(coord_ptr[0]) + x_off;
+ vbuf[i].y = GPU_EXPANDSIGN(coord_ptr[1]) + y_off;
+ }
- x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]);
- y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]);
- x1 = GPU_EXPANDSIGN(PacketBuffer.S2[4]);
- y1 = GPU_EXPANDSIGN(PacketBuffer.S2[5]);
- x2 = GPU_EXPANDSIGN(PacketBuffer.S2[6]);
- y2 = GPU_EXPANDSIGN(PacketBuffer.S2[7]);
+ // U,V texture coords (if applicable)
+ if (texturing) {
+ ptr = &packet.U4[2];
+ for (int i=0; i < num_verts; ++i, ptr += vert_stride)
+ vbuf[i].tex_word = *ptr;
+ }
- GPU_TESTRANGE3();
+ // Colors (if applicable)
+ if (gouraud) {
+ ptr = &packet.U4[0];
+ for (int i=0; i < num_verts; ++i, ptr += vert_stride)
+ vbuf[i].col_word = *ptr;
+ }
+}
- x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
- y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
+///////////////////////////////////////////////////////////////////////////////
+// Helper functions to determine which vertex in a 2 or 3 vertex array
+// has the highest/lowest X/Y coordinate.
+// Note: the comparison logic is such that, given a set of vertices with
+// identical values for a given coordinate, a different index will be
+// returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..().
+// This ensures that, during the vertex-ordering phase of rasterization,
+// all three vertices remain unique.
+///////////////////////////////////////////////////////////////////////////////
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
+template<typename T>
+static inline int vertIdxOfLeastXCoord2(const T *Tptr)
+{
+ return (Tptr[0].x <= Tptr[1].x) ? 0 : 1;
+}
- {
- int rx0 = Max2(xmin,Min3(x0,x1,x2));
- int ry0 = Max2(ymin,Min3(y0,y1,y2));
- int rx1 = Min2(xmax,Max3(x0,x1,x2));
- int ry1 = Min2(ymax,Max3(y0,y1,y2));
- if( rx0>=rx1 || ry0>=ry1) return;
- }
-
- PixelData = GPU_RGB16(PacketBuffer.U4[0]);
+template<typename T>
+static inline int vertIdxOfLeastXCoord3(const T *Tptr)
+{
+ int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr);
+ return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2;
+}
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp);
- GPU_SWAP(y0, y1, temp);
- }
- }
- if (y1 >= y2)
- {
- if( y1!=y2 || x1>x2 )
- {
- GPU_SWAP(x1, x2, temp);
- GPU_SWAP(y1, y2, temp);
- }
- }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp);
- GPU_SWAP(y0, y1, temp);
- }
- }
+template<typename T>
+static inline int vertIdxOfLeastYCoord2(const T *Tptr)
+{
+ return (Tptr[0].y <= Tptr[1].y) ? 0 : 1;
+}
- ya = y2 - y0;
- yb = y2 - y1;
- dx =(x2 - x1) * ya - (x2 - x0) * yb;
+template<typename T>
+static inline int vertIdxOfLeastYCoord3(const T *Tptr)
+{
+ int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr);
+ return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2;
+}
+
+template<typename T>
+static inline int vertIdxOfHighestXCoord2(const T *Tptr)
+{
+ return (Tptr[1].x >= Tptr[0].x) ? 1 : 0;
+}
+
+template<typename T>
+static inline int vertIdxOfHighestXCoord3(const T *Tptr)
+{
+ int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr);
+ return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1;
+}
+
+template<typename T>
+static inline int vertIdxOfHighestYCoord2(const T *Tptr)
+{
+ return (Tptr[1].y >= Tptr[0].y) ? 1 : 0;
+}
+
+template<typename T>
+static inline int vertIdxOfHighestYCoord3(const T *Tptr)
+{
+ int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr);
+ return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1;
+}
- for (s32 loop0 = 2; loop0; --loop0)
+///////////////////////////////////////////////////////////////////////////////
+// polyUseTriangle()
+// Determines if the specified triangle should be rendered. If so, it
+// fills the given array of vertex pointers, vert_ptrs, in order of
+// increasing Y coordinate values, as required by rasterization algorithm.
+// Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]),
+// or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]).
+// Returns true if triangle should be rendered, false if not.
+///////////////////////////////////////////////////////////////////////////////
+static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs)
+{
+ // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)?
+ const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1];
+
+ // Get indices of highest/lowest X,Y coords within triangle
+ int idx_lowest_x = vertIdxOfLeastXCoord3(tri_ptr);
+ int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr);
+ int idx_lowest_y = vertIdxOfLeastYCoord3(tri_ptr);
+ int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr);
+
+ // Maximum absolute distance between any two X coordinates is 1023,
+ // and for Y coordinates is 511 (PS1 hardware limitation)
+ int lowest_x = tri_ptr[idx_lowest_x].x;
+ int highest_x = tri_ptr[idx_highest_x].x;
+ int lowest_y = tri_ptr[idx_lowest_y].y;
+ int highest_y = tri_ptr[idx_highest_y].y;
+ if ((highest_x - lowest_x) >= CHKMAX_X ||
+ (highest_y - lowest_y) >= CHKMAX_Y)
+ return false;
+
+ // Determine if triangle is completely outside clipping range
+ int xmin, xmax, ymin, ymax;
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
+ int clipped_lowest_x = Max2(xmin,lowest_x);
+ int clipped_lowest_y = Max2(ymin,lowest_y);
+ int clipped_highest_x = Min2(xmax,highest_x);
+ int clipped_highest_y = Min2(ymax,highest_y);
+ if (clipped_lowest_x >= clipped_highest_x ||
+ clipped_lowest_y >= clipped_highest_y)
+ return false;
+
+ // Order vertex ptrs by increasing y value (draw routines need this).
+ // The middle index is deduced by a binary math trick that depends
+ // on index range always being between 0..2
+ vert_ptrs[0] = tri_ptr + idx_lowest_y;
+ vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3);
+ vert_ptrs[2] = tri_ptr + idx_highest_y;
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// GPU internal polygon drawing functions
+///////////////////////////////////////////////////////////////////////////////
+
+/*----------------------------------------------------------------------
+gpuDrawPolyF - Flat-shaded, untextured poly
+----------------------------------------------------------------------*/
+void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
+{
+ // Set up bgr555 color to be used across calls in inner driver
+ gpu_unai.PixelData = GPU_RGB16(packet.U4[0]);
+
+ PolyVertex vbuf[4];
+ polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad);
+
+ int total_passes = is_quad ? 2 : 1;
+ int cur_pass = 0;
+ do
{
- if (loop0 == 2)
- {
- ya = y0;
- yb = y1;
- x3 = i2x(x0);
- x4 = y0!=y1 ? x3 : i2x(x1);
- if (dx < 0)
- {
- dx3 = xLoDivx((x2 - x0), (y2 - y0));
- dx4 = xLoDivx((x1 - x0), (y1 - y0));
- }
- else
- {
- dx3 = xLoDivx((x1 - x0), (y1 - y0));
- dx4 = xLoDivx((x2 - x0), (y2 - y0));
+ const PolyVertex* vptrs[3];
+ if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
+ continue;
+
+ s32 xa, xb, ya, yb;
+ s32 x3, dx3, x4, dx4, dx;
+ s32 x0, x1, x2, y0, y1, y2;
+
+ x0 = vptrs[0]->x; y0 = vptrs[0]->y;
+ x1 = vptrs[1]->x; y1 = vptrs[1]->y;
+ x2 = vptrs[2]->x; y2 = vptrs[2]->y;
+
+ ya = y2 - y0;
+ yb = y2 - y1;
+ dx = (x2 - x1) * ya - (x2 - x0) * yb;
+
+ for (int loop0 = 2; loop0; loop0--) {
+ if (loop0 == 2) {
+ ya = y0; yb = y1;
+ x3 = x4 = i2x(x0);
+ if (dx < 0) {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
+#else
+ dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
+ dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
+#else
+ dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
+ dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
+#endif
+#endif
+ } else {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
+#else
+ dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
+ dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
+#else
+ dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
+ dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
+#endif
+#endif
+ }
+ } else {
+ //senquack - break out of final loop if nothing to be drawn (1st loop
+ // must always be taken to setup dx3/dx4)
+ if (y1 == y2) break;
+
+ ya = y1; yb = y2;
+
+ if (dx < 0) {
+ x3 = i2x(x0) + (dx3 * (y1 - y0));
+ x4 = i2x(x1);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
+#endif
+#endif
+ } else {
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
+#else
+ dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
+#else
+ dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
+#endif
+#endif
+ }
}
- }
- else
- {
- ya = y1;
- yb = y2;
- if (dx < 0)
- {
- x4 = i2x(x1);
- x3 = i2x(x0) + (dx3 * (y1 - y0));
- dx4 = xLoDivx((x2 - x1), (y2 - y1));
+
+ s32 xmin, xmax, ymin, ymax;
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
+
+ if ((ymin - ya) > 0) {
+ x3 += (dx3 * (ymin - ya));
+ x4 += (dx4 * (ymin - ya));
+ ya = ymin;
}
- else
+
+ if (yb > ymax) yb = ymax;
+
+ int loop1 = yb - ya;
+ if (loop1 <= 0)
+ continue;
+
+ u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+ int li=gpu_unai.ilace_mask;
+ int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
+ int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+
+ for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH,
+ x3 += dx3, x4 += dx4 )
{
- x3 = i2x(x1);
- x4 = i2x(x0) + (dx4 * (y1 - y0));
- dx3 = xLoDivx((x2 - x1), (y2 - y1));
+ if (ya&li) continue;
+ if ((ya&pi)==pif) continue;
+
+ xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4);
+ if ((xmin - xa) > 0) xa = xmin;
+ if (xb > xmax) xb = xmax;
+ if ((xb - xa) > 0)
+ gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
}
}
-
- temp = ymin - ya;
- if (temp > 0)
- {
- ya = ymin;
- x3 += dx3*temp;
- x4 += dx4*temp;
- }
- if (yb > ymax) yb = ymax;
- if (ya>=yb) continue;
-
- x3+= fixed_HALF;
- x4+= fixed_HALF;
-
- u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
-
- for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4)
- {
- if (ya&li) continue;
- xa = x2i(x3);
- xb = x2i(x4);
- if( (xa>xmax) || (xb<xmin) ) continue;
- if(xa < xmin) xa = xmin;
- if(xb > xmax) xb = xmax;
- xb-=xa;
- if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
- }
- }
+ } while (++cur_pass < total_passes);
}
/*----------------------------------------------------------------------
-FT3
+gpuDrawPolyFT - Flat-shaded, textured poly
----------------------------------------------------------------------*/
-
-void gpuDrawFT3(const PP gpuPolySpanDriver)
+void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
{
- const int li=linesInterlace;
- s32 temp;
- s32 xa, xb, xmin, xmax;
- s32 ya, yb, ymin, ymax;
- s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
- s32 y0, y1, y2;
- s32 u0, u1, u2, u3, du3=0;
- s32 v0, v1, v2, v3, dv3=0;
-
- x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
- y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
- x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] );
- y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] );
- x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]);
- y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]);
-
- GPU_TESTRANGE3();
-
- x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
- y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
-
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
-
+ // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
+ gpu_unai.r8 = packet.U1[0];
+ gpu_unai.g8 = packet.U1[1];
+ gpu_unai.b8 = packet.U1[2];
+ // r5/g5/b5 used if just texture-blending is applied (15-bit light)
+ gpu_unai.r5 = packet.U1[0] >> 3;
+ gpu_unai.g5 = packet.U1[1] >> 3;
+ gpu_unai.b5 = packet.U1[2] >> 3;
+
+ PolyVertex vbuf[4];
+ polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad);
+
+ int total_passes = is_quad ? 2 : 1;
+ int cur_pass = 0;
+ do
{
- int rx0 = Max2(xmin,Min3(x0,x1,x2));
- int ry0 = Max2(ymin,Min3(y0,y1,y2));
- int rx1 = Min2(xmax,Max3(x0,x1,x2));
- int ry1 = Min2(ymax,Max3(y0,y1,y2));
- if( rx0>=rx1 || ry0>=ry1) return;
- }
-
- u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9];
- u1 = PacketBuffer.U1[16]; v1 = PacketBuffer.U1[17];
- u2 = PacketBuffer.U1[24]; v2 = PacketBuffer.U1[25];
-
- r4 = s32(PacketBuffer.U1[0]);
- g4 = s32(PacketBuffer.U1[1]);
- b4 = s32(PacketBuffer.U1[2]);
- dr4 = dg4 = db4 = 0;
+ const PolyVertex* vptrs[3];
+ if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
+ continue;
+
+ s32 xa, xb, ya, yb;
+ s32 x3, dx3, x4, dx4, dx;
+ s32 u3, du3, v3, dv3;
+ s32 x0, x1, x2, y0, y1, y2;
+ s32 u0, u1, u2, v0, v1, v2;
+ s32 du4, dv4;
+
+ x0 = vptrs[0]->x; y0 = vptrs[0]->y;
+ u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
+ x1 = vptrs[1]->x; y1 = vptrs[1]->y;
+ u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
+ x2 = vptrs[2]->x; y2 = vptrs[2]->y;
+ u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
+
+ ya = y2 - y0;
+ yb = y2 - y1;
+ dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
+ du4 = (u2 - u1) * ya - (u2 - u0) * yb;
+ dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
+ dx = dx4;
+ if (dx4 < 0) {
+ dx4 = -dx4;
+ du4 = -du4;
+ dv4 = -dv4;
+ }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp);
- GPU_SWAP(y0, y1, temp);
- GPU_SWAP(u0, u1, temp);
- GPU_SWAP(v0, v1, temp);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if (dx4 != 0) {
+ float finv = FloatInv(dx4);
+ du4 = (fixed)((du4 << FIXED_BITS) * finv);
+ dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
+ } else {
+ du4 = dv4 = 0;
}
- }
- if (y1 >= y2)
- {
- if( y1!=y2 || x1>x2 )
- {
- GPU_SWAP(x1, x2, temp);
- GPU_SWAP(y1, y2, temp);
- GPU_SWAP(u1, u2, temp);
- GPU_SWAP(v1, v2, temp);
+#else
+ if (dx4 != 0) {
+ float fdiv = dx4;
+ du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
+ dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
+ } else {
+ du4 = dv4 = 0;
}
- }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp);
- GPU_SWAP(y0, y1, temp);
- GPU_SWAP(u0, u1, temp);
- GPU_SWAP(v0, v1, temp);
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if (dx4 != 0) {
+ int iF, iS;
+ xInv(dx4, iF, iS);
+ du4 = xInvMulx(du4, iF, iS);
+ dv4 = xInvMulx(dv4, iF, iS);
+ } else {
+ du4 = dv4 = 0;
}
- }
-
- ya = y2 - y0;
- yb = y2 - y1;
- dx = (x2 - x1) * ya - (x2 - x0) * yb;
- du4 = (u2 - u1) * ya - (u2 - u0) * yb;
- dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
+#else
+ if (dx4 != 0) {
+ du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
+ dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
+ } else {
+ du4 = dv4 = 0;
+ }
+#endif
+#endif
+ // Set u,v increments for inner driver
+ gpu_unai.u_inc = du4;
+ gpu_unai.v_inc = dv4;
+
+ //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here?
+ // (SAME ISSUE ELSEWHERE)
+ for (s32 loop0 = 2; loop0; loop0--) {
+ if (loop0 == 2) {
+ ya = y0; yb = y1;
+ x3 = x4 = i2x(x0);
+ u3 = i2x(u0); v3 = i2x(v0);
+ if (dx < 0) {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ float finv = FloatInv(y2 - y0);
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ float fdiv = y2 - y0;
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ int iF, iS;
+ xInv((y2 - y0), iF, iS);
+ dx3 = xInvMulx((x2 - x0), iF, iS);
+ du3 = xInvMulx((u2 - u0), iF, iS);
+ dv3 = xInvMulx((v2 - v0), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
+ du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
+ dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
+#endif
+#endif
+ } else {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ float finv = FloatInv(y1 - y0);
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ float fdiv = y1 - y0;
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ int iF, iS;
+ xInv((y1 - y0), iF, iS);
+ dx3 = xInvMulx((x1 - x0), iF, iS);
+ du3 = xInvMulx((u1 - u0), iF, iS);
+ dv3 = xInvMulx((v1 - v0), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
+ du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
+ dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
+#endif
+#endif
+ }
+ } else {
+ //senquack - break out of final loop if nothing to be drawn (1st loop
+ // must always be taken to setup dx3/dx4)
+ if (y1 == y2) break;
+
+ ya = y1; yb = y2;
+
+ if (dx < 0) {
+ x3 = i2x(x0);
+ x4 = i2x(x1);
+ u3 = i2x(u0);
+ v3 = i2x(v0);
+ if ((y1 - y0) != 0) {
+ x3 += (dx3 * (y1 - y0));
+ u3 += (du3 * (y1 - y0));
+ v3 += (dv3 * (y1 - y0));
+ }
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
+#endif
+#endif
+ } else {
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+ u3 = i2x(u1);
+ v3 = i2x(v1);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ float finv = FloatInv(y2 - y1);
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ float fdiv = y2 - y1;
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ int iF, iS;
+ xInv((y2 - y1), iF, iS);
+ dx3 = xInvMulx((x2 - x1), iF, iS);
+ du3 = xInvMulx((u2 - u1), iF, iS);
+ dv3 = xInvMulx((v2 - v1), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
+ du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
+ dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+#endif
+#endif
+ }
+ }
- s32 iF,iS;
- xInv( dx, iF, iS);
- du4 = xInvMulx( du4, iF, iS);
- dv4 = xInvMulx( dv4, iF, iS);
- tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff);
- tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff;
+ s32 xmin, xmax, ymin, ymax;
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
- for (s32 loop0 = 2; loop0; --loop0)
- {
- if (loop0 == 2)
- {
- ya = y0;
- yb = y1;
- u3 = i2x(u0);
- v3 = i2x(v0);
- x3 = i2x(x0);
- x4 = y0!=y1 ? x3 : i2x(x1);
- if (dx < 0)
- {
- xInv( (y2 - y0), iF, iS);
- dx3 = xInvMulx( (x2 - x0), iF, iS);
- du3 = xInvMulx( (u2 - u0), iF, iS);
- dv3 = xInvMulx( (v2 - v0), iF, iS);
- dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
- }
- else
- {
- xInv( (y1 - y0), iF, iS);
- dx3 = xInvMulx( (x1 - x0), iF, iS);
- du3 = xInvMulx( (u1 - u0), iF, iS);
- dv3 = xInvMulx( (v1 - v0), iF, iS);
- dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
+ if ((ymin - ya) > 0) {
+ x3 += dx3 * (ymin - ya);
+ x4 += dx4 * (ymin - ya);
+ u3 += du3 * (ymin - ya);
+ v3 += dv3 * (ymin - ya);
+ ya = ymin;
}
- }
- else
- {
- ya = y1;
- yb = y2;
- if (dx < 0)
- {
- temp = y1 - y0;
- u3 = i2x(u0) + (du3 * temp);
- v3 = i2x(v0) + (dv3 * temp);
- x3 = i2x(x0) + (dx3 * temp);
- x4 = i2x(x1);
- dx4 = xLoDivx((x2 - x1), (y2 - y1));
- }
- else
+
+ if (yb > ymax) yb = ymax;
+
+ int loop1 = yb - ya;
+ if (loop1 <= 0)
+ continue;
+
+ u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+ int li=gpu_unai.ilace_mask;
+ int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
+ int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+
+ for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
+ x3 += dx3, x4 += dx4,
+ u3 += du3, v3 += dv3 )
{
- u3 = i2x(u1);
- v3 = i2x(v1);
- x3 = i2x(x1);
- x4 = i2x(x0) + (dx4 * (y1 - y0));
- xInv( (y2 - y1), iF, iS);
- dx3 = xInvMulx( (x2 - x1), iF, iS);
- du3 = xInvMulx( (u2 - u1), iF, iS);
- dv3 = xInvMulx( (v2 - v1), iF, iS);
- }
- }
+ if (ya&li) continue;
+ if ((ya&pi)==pif) continue;
- temp = ymin - ya;
- if (temp > 0)
- {
- ya = ymin;
- x3 += dx3*temp;
- x4 += dx4*temp;
- u3 += du3*temp;
- v3 += dv3*temp;
- }
- if (yb > ymax) yb = ymax;
- if (ya>=yb) continue;
+ u32 u4, v4;
- x3+= fixed_HALF;
- x4+= fixed_HALF;
- u3+= fixed_HALF;
- v4+= fixed_HALF;
+ xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4);
+ u4 = u3; v4 = v3;
- u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
+ fixed itmp = i2x(xa) - x3;
+ if (itmp != 0) {
+ u4 += (du4 * itmp) >> FIXED_BITS;
+ v4 += (dv4 * itmp) >> FIXED_BITS;
+ }
- for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3)
- {
- if (ya&li) continue;
- xa = x2i(x3);
- xb = x2i(x4);
- if( (xa>xmax) || (xb<xmin) ) continue;
+ u4 += fixed_HALF;
+ v4 += fixed_HALF;
- temp = xmin - xa;
- if(temp > 0)
- {
- xa = xmin;
- u4 = u3 + du4*temp;
- v4 = v3 + dv4*temp;
- }
- else
- {
- u4 = u3;
- v4 = v3;
+ if ((xmin - xa) > 0) {
+ u4 += du4 * (xmin - xa);
+ v4 += dv4 * (xmin - xa);
+ xa = xmin;
+ }
+
+ // Set u,v coords for inner driver
+ gpu_unai.u = u4;
+ gpu_unai.v = v4;
+
+ if (xb > xmax) xb = xmax;
+ if ((xb - xa) > 0)
+ gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
}
- if(xb > xmax) xb = xmax;
- xb-=xa;
- if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
}
- }
+ } while (++cur_pass < total_passes);
}
/*----------------------------------------------------------------------
-G3
+gpuDrawPolyG - Gouraud-shaded, untextured poly
----------------------------------------------------------------------*/
-
-void gpuDrawG3(const PP gpuPolySpanDriver)
+void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
{
- const int li=linesInterlace;
- s32 temp;
- s32 xa, xb, xmin, xmax;
- s32 ya, yb, ymin, ymax;
- s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
- s32 y0, y1, y2;
- s32 r0, r1, r2, r3, dr3=0;
- s32 g0, g1, g2, g3, dg3=0;
- s32 b0, b1, b2, b3, db3=0;
-
- x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
- y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
- x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] );
- y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] );
- x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]);
- y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]);
-
- GPU_TESTRANGE3();
-
- x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
- y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
-
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
+ PolyVertex vbuf[4];
+ polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad);
+ int total_passes = is_quad ? 2 : 1;
+ int cur_pass = 0;
+ do
{
- int rx0 = Max2(xmin,Min3(x0,x1,x2));
- int ry0 = Max2(ymin,Min3(y0,y1,y2));
- int rx1 = Min2(xmax,Max3(x0,x1,x2));
- int ry1 = Min2(ymax,Max3(y0,y1,y2));
- if( rx0>=rx1 || ry0>=ry1) return;
- }
-
- r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2];
- r1 = PacketBuffer.U1[8]; g1 = PacketBuffer.U1[9]; b1 = PacketBuffer.U1[10];
- r2 = PacketBuffer.U1[16]; g2 = PacketBuffer.U1[17]; b2 = PacketBuffer.U1[18];
+ const PolyVertex* vptrs[3];
+ if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
+ continue;
+
+ s32 xa, xb, ya, yb;
+ s32 x3, dx3, x4, dx4, dx;
+ s32 r3, dr3, g3, dg3, b3, db3;
+ s32 x0, x1, x2, y0, y1, y2;
+ s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
+ s32 dr4, dg4, db4;
+
+ x0 = vptrs[0]->x; y0 = vptrs[0]->y;
+ r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
+ x1 = vptrs[1]->x; y1 = vptrs[1]->y;
+ r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
+ x2 = vptrs[2]->x; y2 = vptrs[2]->y;
+ r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
+
+ ya = y2 - y0;
+ yb = y2 - y1;
+ dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
+ dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
+ dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
+ db4 = (b2 - b1) * ya - (b2 - b0) * yb;
+ dx = dx4;
+ if (dx4 < 0) {
+ dx4 = -dx4;
+ dr4 = -dr4;
+ dg4 = -dg4;
+ db4 = -db4;
+ }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
- GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if (dx4 != 0) {
+ float finv = FloatInv(dx4);
+ dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
+ dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
+ db4 = (fixed)((db4 << FIXED_BITS) * finv);
+ } else {
+ dr4 = dg4 = db4 = 0;
}
- }
- if (y1 >= y2)
- {
- if( y1!=y2 || x1>x2 )
- {
- GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp);
- GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp);
+#else
+ if (dx4 != 0) {
+ float fdiv = dx4;
+ dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
+ dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
+ db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
+ } else {
+ dr4 = dg4 = db4 = 0;
}
- }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
- GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if (dx4 != 0) {
+ int iF, iS;
+ xInv(dx4, iF, iS);
+ dr4 = xInvMulx(dr4, iF, iS);
+ dg4 = xInvMulx(dg4, iF, iS);
+ db4 = xInvMulx(db4, iF, iS);
+ } else {
+ dr4 = dg4 = db4 = 0;
}
- }
-
- ya = y2 - y0;
- yb = y2 - y1;
- dx = (x2 - x1) * ya - (x2 - x0) * yb;
- dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
- dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
- db4 = (b2 - b1) * ya - (b2 - b0) * yb;
-
- s32 iF,iS;
- xInv( dx, iF, iS);
- dr4 = xInvMulx( dr4, iF, iS);
- dg4 = xInvMulx( dg4, iF, iS);
- db4 = xInvMulx( db4, iF, iS);
- u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21;
- u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10;
- u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0;
- lInc = db + dg + dr;
-
- for (s32 loop0 = 2; loop0; --loop0)
- {
- if (loop0 == 2)
- {
- ya = y0;
- yb = y1;
- r3 = i2x(r0);
- g3 = i2x(g0);
- b3 = i2x(b0);
- x3 = i2x(x0);
- x4 = y0!=y1 ? x3 : i2x(x1);
- if (dx < 0)
- {
- xInv( (y2 - y0), iF, iS);
- dx3 = xInvMulx( (x2 - x0), iF, iS);
- dr3 = xInvMulx( (r2 - r0), iF, iS);
- dg3 = xInvMulx( (g2 - g0), iF, iS);
- db3 = xInvMulx( (b2 - b0), iF, iS);
- dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
- }
- else
- {
- xInv( (y1 - y0), iF, iS);
- dx3 = xInvMulx( (x1 - x0), iF, iS);
- dr3 = xInvMulx( (r1 - r0), iF, iS);
- dg3 = xInvMulx( (g1 - g0), iF, iS);
- db3 = xInvMulx( (b1 - b0), iF, iS);
- dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
- }
+#else
+ if (dx4 != 0) {
+ dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
+ dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
+ db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
+ } else {
+ dr4 = dg4 = db4 = 0;
}
- else
- {
- ya = y1;
- yb = y2;
- if (dx < 0)
- {
- temp = y1 - y0;
- r3 = i2x(r0) + (dr3 * temp);
- g3 = i2x(g0) + (dg3 * temp);
- b3 = i2x(b0) + (db3 * temp);
- x3 = i2x(x0) + (dx3 * temp);
- x4 = i2x(x1);
- dx4 = xLoDivx((x2 - x1), (y2 - y1));
- }
- else
- {
- r3 = i2x(r1);
- g3 = i2x(g1);
- b3 = i2x(b1);
- x3 = i2x(x1);
- x4 = i2x(x0) + (dx4 * (y1 - y0));
-
- xInv( (y2 - y1), iF, iS);
- dx3 = xInvMulx( (x2 - x1), iF, iS);
- dr3 = xInvMulx( (r2 - r1), iF, iS);
- dg3 = xInvMulx( (g2 - g1), iF, iS);
- db3 = xInvMulx( (b2 - b1), iF, iS);
+#endif
+#endif
+ // Setup packed Gouraud increment for inner driver
+ gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
+
+ for (s32 loop0 = 2; loop0; loop0--) {
+ if (loop0 == 2) {
+ ya = y0;
+ yb = y1;
+ x3 = x4 = i2x(x0);
+ r3 = i2x(r0);
+ g3 = i2x(g0);
+ b3 = i2x(b0);
+ if (dx < 0) {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ float finv = FloatInv(y2 - y0);
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ float fdiv = y2 - y0;
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ int iF, iS;
+ xInv((y2 - y0), iF, iS);
+ dx3 = xInvMulx((x2 - x0), iF, iS);
+ dr3 = xInvMulx((r2 - r0), iF, iS);
+ dg3 = xInvMulx((g2 - g0), iF, iS);
+ db3 = xInvMulx((b2 - b0), iF, iS);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
+ dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
+ dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
+ db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
+#endif
+#endif
+ } else {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ float finv = FloatInv(y1 - y0);
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ float fdiv = y1 - y0;
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ int iF, iS;
+ xInv((y1 - y0), iF, iS);
+ dx3 = xInvMulx((x1 - x0), iF, iS);
+ dr3 = xInvMulx((r1 - r0), iF, iS);
+ dg3 = xInvMulx((g1 - g0), iF, iS);
+ db3 = xInvMulx((b1 - b0), iF, iS);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
+ dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
+ dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
+ db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
+#endif
+#endif
+ }
+ } else {
+ //senquack - break out of final loop if nothing to be drawn (1st loop
+ // must always be taken to setup dx3/dx4)
+ if (y1 == y2) break;
+
+ ya = y1; yb = y2;
+
+ if (dx < 0) {
+ x3 = i2x(x0); x4 = i2x(x1);
+ r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
+
+ if ((y1 - y0) != 0) {
+ x3 += (dx3 * (y1 - y0));
+ r3 += (dr3 * (y1 - y0));
+ g3 += (dg3 * (y1 - y0));
+ b3 += (db3 * (y1 - y0));
+ }
+
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
+#endif
+#endif
+ } else {
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+
+ r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
+
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ float finv = FloatInv(y2 - y1);
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ float fdiv = y2 - y1;
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ int iF, iS;
+ xInv((y2 - y1), iF, iS);
+ dx3 = xInvMulx((x2 - x1), iF, iS);
+ dr3 = xInvMulx((r2 - r1), iF, iS);
+ dg3 = xInvMulx((g2 - g1), iF, iS);
+ db3 = xInvMulx((b2 - b1), iF, iS);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
+ dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
+ dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
+ db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+#endif
+#endif
+ }
}
- }
- temp = ymin - ya;
- if (temp > 0)
- {
- ya = ymin;
- x3 += dx3*temp; x4 += dx4*temp;
- r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp;
- }
- if (yb > ymax) yb = ymax;
- if (ya>=yb) continue;
-
- x3+= fixed_HALF; x4+= fixed_HALF;
- r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF;
-
- u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
-
- for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, r3+=dr3, g3+=dg3, b3+=db3)
- {
- if (ya&li) continue;
- xa = x2i(x3);
- xb = x2i(x4);
- if( (xa>xmax) || (xb<xmin) ) continue;
-
- temp = xmin - xa;
- if(temp > 0)
- {
- xa = xmin;
- r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp;
+ s32 xmin, xmax, ymin, ymax;
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
+
+ if ((ymin - ya) > 0) {
+ x3 += (dx3 * (ymin - ya));
+ x4 += (dx4 * (ymin - ya));
+ r3 += (dr3 * (ymin - ya));
+ g3 += (dg3 * (ymin - ya));
+ b3 += (db3 * (ymin - ya));
+ ya = ymin;
}
- else
+
+ if (yb > ymax) yb = ymax;
+
+ int loop1 = yb - ya;
+ if (loop1 <= 0)
+ continue;
+
+ u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+ int li=gpu_unai.ilace_mask;
+ int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
+ int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+
+ for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
+ x3 += dx3, x4 += dx4,
+ r3 += dr3, g3 += dg3, b3 += db3 )
{
+ if (ya&li) continue;
+ if ((ya&pi)==pif) continue;
+
+ u32 r4, g4, b4;
+
+ xa = FixedCeilToInt(x3);
+ xb = FixedCeilToInt(x4);
r4 = r3; g4 = g3; b4 = b3;
+
+ fixed itmp = i2x(xa) - x3;
+ if (itmp != 0) {
+ r4 += (dr4 * itmp) >> FIXED_BITS;
+ g4 += (dg4 * itmp) >> FIXED_BITS;
+ b4 += (db4 * itmp) >> FIXED_BITS;
+ }
+
+ r4 += fixed_HALF;
+ g4 += fixed_HALF;
+ b4 += fixed_HALF;
+
+ if ((xmin - xa) > 0) {
+ r4 += (dr4 * (xmin - xa));
+ g4 += (dg4 * (xmin - xa));
+ b4 += (db4 * (xmin - xa));
+ xa = xmin;
+ }
+
+ // Setup packed Gouraud color for inner driver
+ gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4);
+
+ if (xb > xmax) xb = xmax;
+ if ((xb - xa) > 0)
+ gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
}
- if(xb > xmax) xb = xmax;
- xb-=xa;
- if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
}
- }
+ } while (++cur_pass < total_passes);
}
/*----------------------------------------------------------------------
-GT3
+gpuDrawPolyGT - Gouraud-shaded, textured poly
----------------------------------------------------------------------*/
-
-void gpuDrawGT3(const PP gpuPolySpanDriver)
+void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
{
- const int li=linesInterlace;
- s32 temp;
- s32 xa, xb, xmin, xmax;
- s32 ya, yb, ymin, ymax;
- s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
- s32 y0, y1, y2;
- s32 u0, u1, u2, u3, du3=0;
- s32 v0, v1, v2, v3, dv3=0;
- s32 r0, r1, r2, r3, dr3=0;
- s32 g0, g1, g2, g3, dg3=0;
- s32 b0, b1, b2, b3, db3=0;
-
- x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
- y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
- x1 = GPU_EXPANDSIGN(PacketBuffer.S2[8] );
- y1 = GPU_EXPANDSIGN(PacketBuffer.S2[9] );
- x2 = GPU_EXPANDSIGN(PacketBuffer.S2[14]);
- y2 = GPU_EXPANDSIGN(PacketBuffer.S2[15]);
-
- GPU_TESTRANGE3();
-
- x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
- y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
-
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
+ PolyVertex vbuf[4];
+ polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad);
+ int total_passes = is_quad ? 2 : 1;
+ int cur_pass = 0;
+ do
{
- int rx0 = Max2(xmin,Min3(x0,x1,x2));
- int ry0 = Max2(ymin,Min3(y0,y1,y2));
- int rx1 = Min2(xmax,Max3(x0,x1,x2));
- int ry1 = Min2(ymax,Max3(y0,y1,y2));
- if( rx0>=rx1 || ry0>=ry1) return;
- }
-
- r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2];
- u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9];
- r1 = PacketBuffer.U1[12]; g1 = PacketBuffer.U1[13]; b1 = PacketBuffer.U1[14];
- u1 = PacketBuffer.U1[20]; v1 = PacketBuffer.U1[21];
- r2 = PacketBuffer.U1[24]; g2 = PacketBuffer.U1[25]; b2 = PacketBuffer.U1[26];
- u2 = PacketBuffer.U1[32]; v2 = PacketBuffer.U1[33];
+ const PolyVertex* vptrs[3];
+ if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
+ continue;
+
+ s32 xa, xb, ya, yb;
+ s32 x3, dx3, x4, dx4, dx;
+ s32 u3, du3, v3, dv3;
+ s32 r3, dr3, g3, dg3, b3, db3;
+ s32 x0, x1, x2, y0, y1, y2;
+ s32 u0, u1, u2, v0, v1, v2;
+ s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
+ s32 du4, dv4;
+ s32 dr4, dg4, db4;
+
+ x0 = vptrs[0]->x; y0 = vptrs[0]->y;
+ u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
+ r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
+ x1 = vptrs[1]->x; y1 = vptrs[1]->y;
+ u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
+ r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
+ x2 = vptrs[2]->x; y2 = vptrs[2]->y;
+ u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
+ r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
+
+ ya = y2 - y0;
+ yb = y2 - y1;
+ dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
+ du4 = (u2 - u1) * ya - (u2 - u0) * yb;
+ dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
+ dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
+ dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
+ db4 = (b2 - b1) * ya - (b2 - b0) * yb;
+ dx = dx4;
+ if (dx4 < 0) {
+ dx4 = -dx4;
+ du4 = -du4;
+ dv4 = -dv4;
+ dr4 = -dr4;
+ dg4 = -dg4;
+ db4 = -db4;
+ }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
- GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp);
- GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if (dx4 != 0) {
+ float finv = FloatInv(dx4);
+ du4 = (fixed)((du4 << FIXED_BITS) * finv);
+ dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
+ dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
+ dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
+ db4 = (fixed)((db4 << FIXED_BITS) * finv);
+ } else {
+ du4 = dv4 = dr4 = dg4 = db4 = 0;
}
- }
- if (y1 >= y2)
- {
- if( y1!=y2 || x1>x2 )
- {
- GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp);
- GPU_SWAP(u1, u2, temp); GPU_SWAP(v1, v2, temp);
- GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp);
+#else
+ if (dx4 != 0) {
+ float fdiv = dx4;
+ du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
+ dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
+ dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
+ dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
+ db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
+ } else {
+ du4 = dv4 = dr4 = dg4 = db4 = 0;
}
- }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
- GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp);
- GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if (dx4 != 0) {
+ int iF, iS;
+ xInv(dx4, iF, iS);
+ du4 = xInvMulx(du4, iF, iS);
+ dv4 = xInvMulx(dv4, iF, iS);
+ dr4 = xInvMulx(dr4, iF, iS);
+ dg4 = xInvMulx(dg4, iF, iS);
+ db4 = xInvMulx(db4, iF, iS);
+ } else {
+ du4 = dv4 = dr4 = dg4 = db4 = 0;
}
- }
-
- ya = y2 - y0;
- yb = y2 - y1;
- dx = (x2 - x1) * ya - (x2 - x0) * yb;
- du4 = (u2 - u1) * ya - (u2 - u0) * yb;
- dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
- dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
- dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
- db4 = (b2 - b1) * ya - (b2 - b0) * yb;
-
- s32 iF,iS;
-
- xInv( dx, iF, iS);
- du4 = xInvMulx( du4, iF, iS);
- dv4 = xInvMulx( dv4, iF, iS);
- dr4 = xInvMulx( dr4, iF, iS);
- dg4 = xInvMulx( dg4, iF, iS);
- db4 = xInvMulx( db4, iF, iS);
- u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21;
- u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10;
- u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0;
- lInc = db + dg + dr;
- tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff);
- tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff;
-
- for (s32 loop0 = 2; loop0; --loop0)
- {
- if (loop0 == 2)
- {
- ya = y0;
- yb = y1;
- u3 = i2x(u0);
- v3 = i2x(v0);
- r3 = i2x(r0);
- g3 = i2x(g0);
- b3 = i2x(b0);
- x3 = i2x(x0);
- x4 = y0!=y1 ? x3 : i2x(x1);
- if (dx < 0)
- {
- xInv( (y2 - y0), iF, iS);
- dx3 = xInvMulx( (x2 - x0), iF, iS);
- du3 = xInvMulx( (u2 - u0), iF, iS);
- dv3 = xInvMulx( (v2 - v0), iF, iS);
- dr3 = xInvMulx( (r2 - r0), iF, iS);
- dg3 = xInvMulx( (g2 - g0), iF, iS);
- db3 = xInvMulx( (b2 - b0), iF, iS);
- dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
- }
- else
- {
- xInv( (y1 - y0), iF, iS);
- dx3 = xInvMulx( (x1 - x0), iF, iS);
- du3 = xInvMulx( (u1 - u0), iF, iS);
- dv3 = xInvMulx( (v1 - v0), iF, iS);
- dr3 = xInvMulx( (r1 - r0), iF, iS);
- dg3 = xInvMulx( (g1 - g0), iF, iS);
- db3 = xInvMulx( (b1 - b0), iF, iS);
- dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
- }
+#else
+ if (dx4 != 0) {
+ du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
+ dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
+ dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
+ dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
+ db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
+ } else {
+ du4 = dv4 = dr4 = dg4 = db4 = 0;
}
- else
- {
- ya = y1;
- yb = y2;
- if (dx < 0)
- {
- temp = y1 - y0;
- u3 = i2x(u0) + (du3 * temp);
- v3 = i2x(v0) + (dv3 * temp);
- r3 = i2x(r0) + (dr3 * temp);
- g3 = i2x(g0) + (dg3 * temp);
- b3 = i2x(b0) + (db3 * temp);
- x3 = i2x(x0) + (dx3 * temp);
- x4 = i2x(x1);
- dx4 = xLoDivx((x2 - x1), (y2 - y1));
+#endif
+#endif
+ // Set u,v increments and packed Gouraud increment for inner driver
+ gpu_unai.u_inc = du4;
+ gpu_unai.v_inc = dv4;
+ gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
+
+ for (s32 loop0 = 2; loop0; loop0--) {
+ if (loop0 == 2) {
+ ya = y0; yb = y1;
+ x3 = x4 = i2x(x0);
+ u3 = i2x(u0); v3 = i2x(v0);
+ r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
+ if (dx < 0) {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ float finv = FloatInv(y2 - y0);
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ float fdiv = y2 - y0;
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ int iF, iS;
+ xInv((y2 - y0), iF, iS);
+ dx3 = xInvMulx((x2 - x0), iF, iS);
+ du3 = xInvMulx((u2 - u0), iF, iS);
+ dv3 = xInvMulx((v2 - v0), iF, iS);
+ dr3 = xInvMulx((r2 - r0), iF, iS);
+ dg3 = xInvMulx((g2 - g0), iF, iS);
+ db3 = xInvMulx((b2 - b0), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
+ du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
+ dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
+ dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
+ dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
+ db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
+#endif
+#endif
+ } else {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ float finv = FloatInv(y1 - y0);
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ float fdiv = y1 - y0;
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ int iF, iS;
+ xInv((y1 - y0), iF, iS);
+ dx3 = xInvMulx((x1 - x0), iF, iS);
+ du3 = xInvMulx((u1 - u0), iF, iS);
+ dv3 = xInvMulx((v1 - v0), iF, iS);
+ dr3 = xInvMulx((r1 - r0), iF, iS);
+ dg3 = xInvMulx((g1 - g0), iF, iS);
+ db3 = xInvMulx((b1 - b0), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
+ du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
+ dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
+ dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
+ dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
+ db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
+#endif
+#endif
+ }
+ } else {
+ //senquack - break out of final loop if nothing to be drawn (1st loop
+ // must always be taken to setup dx3/dx4)
+ if (y1 == y2) break;
+
+ ya = y1; yb = y2;
+
+ if (dx < 0) {
+ x3 = i2x(x0); x4 = i2x(x1);
+ u3 = i2x(u0); v3 = i2x(v0);
+ r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
+
+ if ((y1 - y0) != 0) {
+ x3 += (dx3 * (y1 - y0));
+ u3 += (du3 * (y1 - y0));
+ v3 += (dv3 * (y1 - y0));
+ r3 += (dr3 * (y1 - y0));
+ g3 += (dg3 * (y1 - y0));
+ b3 += (db3 * (y1 - y0));
+ }
+
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
+#endif
+#endif
+ } else {
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+
+ u3 = i2x(u1); v3 = i2x(v1);
+ r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ float finv = FloatInv(y2 - y1);
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ float fdiv = y2 - y1;
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ int iF, iS;
+ xInv((y2 - y1), iF, iS);
+ dx3 = xInvMulx((x2 - x1), iF, iS);
+ du3 = xInvMulx((u2 - u1), iF, iS);
+ dv3 = xInvMulx((v2 - v1), iF, iS);
+ dr3 = xInvMulx((r2 - r1), iF, iS);
+ dg3 = xInvMulx((g2 - g1), iF, iS);
+ db3 = xInvMulx((b2 - b1), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
+ du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
+ dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
+ dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
+ dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
+ db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+#endif
+#endif
+ }
}
- else
- {
- u3 = i2x(u1);
- v3 = i2x(v1);
- r3 = i2x(r1);
- g3 = i2x(g1);
- b3 = i2x(b1);
- x3 = i2x(x1);
- x4 = i2x(x0) + (dx4 * (y1 - y0));
-
- xInv( (y2 - y1), iF, iS);
- dx3 = xInvMulx( (x2 - x1), iF, iS);
- du3 = xInvMulx( (u2 - u1), iF, iS);
- dv3 = xInvMulx( (v2 - v1), iF, iS);
- dr3 = xInvMulx( (r2 - r1), iF, iS);
- dg3 = xInvMulx( (g2 - g1), iF, iS);
- db3 = xInvMulx( (b2 - b1), iF, iS);
- }
- }
- temp = ymin - ya;
- if (temp > 0)
- {
- ya = ymin;
- x3 += dx3*temp; x4 += dx4*temp;
- u3 += du3*temp; v3 += dv3*temp;
- r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp;
- }
- if (yb > ymax) yb = ymax;
- if (ya>=yb) continue;
-
- x3+= fixed_HALF; x4+= fixed_HALF;
- u3+= fixed_HALF; v4+= fixed_HALF;
- r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF;
- u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
-
- for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3, r3+=dr3, g3+=dg3, b3+=db3)
- {
- if (ya&li) continue;
- xa = x2i(x3);
- xb = x2i(x4);
- if( (xa>xmax) || (xb<xmin)) continue;
-
- temp = xmin - xa;
- if(temp > 0)
- {
- xa = xmin;
- u4 = u3 + du4*temp; v4 = v3 + dv4*temp;
- r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp;
+ s32 xmin, xmax, ymin, ymax;
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
+
+ if ((ymin - ya) > 0) {
+ x3 += (dx3 * (ymin - ya));
+ x4 += (dx4 * (ymin - ya));
+ u3 += (du3 * (ymin - ya));
+ v3 += (dv3 * (ymin - ya));
+ r3 += (dr3 * (ymin - ya));
+ g3 += (dg3 * (ymin - ya));
+ b3 += (db3 * (ymin - ya));
+ ya = ymin;
}
- else
+
+ if (yb > ymax) yb = ymax;
+
+ int loop1 = yb - ya;
+ if (loop1 <= 0)
+ continue;
+
+ u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+ int li=gpu_unai.ilace_mask;
+ int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
+ int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+
+ for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
+ x3 += dx3, x4 += dx4,
+ u3 += du3, v3 += dv3,
+ r3 += dr3, g3 += dg3, b3 += db3 )
{
+ if (ya&li) continue;
+ if ((ya&pi)==pif) continue;
+
+ u32 u4, v4;
+ u32 r4, g4, b4;
+
+ xa = FixedCeilToInt(x3);
+ xb = FixedCeilToInt(x4);
u4 = u3; v4 = v3;
r4 = r3; g4 = g3; b4 = b3;
+
+ fixed itmp = i2x(xa) - x3;
+ if (itmp != 0) {
+ u4 += (du4 * itmp) >> FIXED_BITS;
+ v4 += (dv4 * itmp) >> FIXED_BITS;
+ r4 += (dr4 * itmp) >> FIXED_BITS;
+ g4 += (dg4 * itmp) >> FIXED_BITS;
+ b4 += (db4 * itmp) >> FIXED_BITS;
+ }
+
+ u4 += fixed_HALF;
+ v4 += fixed_HALF;
+ r4 += fixed_HALF;
+ g4 += fixed_HALF;
+ b4 += fixed_HALF;
+
+ if ((xmin - xa) > 0) {
+ u4 += du4 * (xmin - xa);
+ v4 += dv4 * (xmin - xa);
+ r4 += dr4 * (xmin - xa);
+ g4 += dg4 * (xmin - xa);
+ b4 += db4 * (xmin - xa);
+ xa = xmin;
+ }
+
+ // Set packed Gouraud color and u,v coords for inner driver
+ gpu_unai.u = u4;
+ gpu_unai.v = v4;
+ gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4);
+
+ if (xb > xmax) xb = xmax;
+ if ((xb - xa) > 0)
+ gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
}
- if(xb > xmax) xb = xmax;
- xb-=xa;
- if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
}
- }
+ } while (++cur_pass < total_passes);
}
diff --git a/plugins/gpu_unai/gpu_raster_sprite.h b/plugins/gpu_unai/gpu_raster_sprite.h
index a700db3..0afdbf5 100644
--- a/plugins/gpu_unai/gpu_raster_sprite.h
+++ b/plugins/gpu_unai/gpu_raster_sprite.h
@@ -21,73 +21,70 @@
///////////////////////////////////////////////////////////////////////////////
// GPU internal sprite drawing functions
-///////////////////////////////////////////////////////////////////////////////
-void gpuDrawS(const PS gpuSpriteSpanDriver)
+void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver)
{
- s32 x0, x1;
- s32 y0, y1;
- s32 u0;
- s32 v0;
-
- x1 = x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]) + DrawingOffset[0];
- y1 = y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]) + DrawingOffset[1];
- x1+= PacketBuffer.S2[6];
- y1+= PacketBuffer.S2[7];
-
- {
- s32 xmin, xmax;
- s32 ymin, ymax;
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
-
- {
- int rx0 = Max2(xmin,Min2(x0,x1));
- int ry0 = Max2(ymin,Min2(y0,y1));
- int rx1 = Min2(xmax,Max2(x0,x1));
- int ry1 = Min2(ymax,Max2(y0,y1));
- if( rx0>=rx1 || ry0>=ry1) return;
- }
-
- u0 = PacketBuffer.U1[8];
- v0 = PacketBuffer.U1[9];
-
- r4 = s32(PacketBuffer.U1[0]);
- g4 = s32(PacketBuffer.U1[1]);
- b4 = s32(PacketBuffer.U1[2]);
-
- {
- s32 temp;
- temp = ymin - y0;
- if (temp > 0) { y0 = ymin; v0 += temp; }
- if (y1 > ymax) y1 = ymax;
- if (y1 <= y0) return;
-
- temp = xmin - x0;
- if (temp > 0) { x0 = xmin; u0 += temp; }
- if (x1 > xmax) x1 = xmax;
- x1 -= x0;
- if (x1 <= 0) return;
- }
- }
-
- {
- u16 *Pixel = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0, y0)];
- const int li=linesInterlace;
- const u32 masku=TextureWindow[2];
- const u32 maskv=TextureWindow[3];
-
- for (;y0<y1;++y0) {
- if( 0 == (y0&li) ) gpuSpriteSpanDriver(Pixel,x1,FRAME_OFFSET(u0,v0),masku);
- Pixel += FRAME_WIDTH;
- v0 = (v0+1)&maskv;
- }
+ s32 x0, x1, y0, y1;
+ u32 u0, v0;
+
+ //NOTE: Must 11-bit sign-extend the whole sum here, not just packet X/Y,
+ // or sprites in 1st level of SkullMonkeys disappear when walking right.
+ // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon:
+ x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_unai.DrawingOffset[0]);
+ y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_unai.DrawingOffset[1]);
+
+ u32 w = packet.U2[6] & 0x3ff; // Max width is 1023
+ u32 h = packet.U2[7] & 0x1ff; // Max height is 511
+ x1 = x0 + w;
+ y1 = y0 + h;
+
+ s32 xmin, xmax, ymin, ymax;
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
+
+ u0 = packet.U1[8];
+ v0 = packet.U1[9];
+
+ s32 temp;
+ temp = ymin - y0;
+ if (temp > 0) { y0 = ymin; v0 += temp; }
+ if (y1 > ymax) y1 = ymax;
+ if (y1 <= y0) return;
+
+ temp = xmin - x0;
+ if (temp > 0) { x0 = xmin; u0 += temp; }
+ if (x1 > xmax) x1 = xmax;
+ x1 -= x0;
+ if (x1 <= 0) return;
+
+ gpu_unai.r5 = packet.U1[0] >> 3;
+ gpu_unai.g5 = packet.U1[1] >> 3;
+ gpu_unai.b5 = packet.U1[2] >> 3;
+
+ u16 *Pixel = &((u16*)gpu_unai.vram)[FRAME_OFFSET(x0, y0)];
+ const int li=gpu_unai.ilace_mask;
+ const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
+ const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+ unsigned int tmode = gpu_unai.TEXT_MODE >> 5;
+ const u32 v0_mask = gpu_unai.TextureWindow[3];
+ u8* pTxt_base = (u8*)gpu_unai.TBA;
+
+ // Texture is accessed byte-wise, so adjust idx if 16bpp
+ if (tmode == 3) u0 <<= 1;
+
+ for (; y0<y1; ++y0) {
+ u8* pTxt = pTxt_base + ((v0 & v0_mask) * 2048);
+ if (!(y0&li) && (y0&pi)!=pif)
+ gpuSpriteSpanDriver(Pixel, x1, pTxt, u0);
+ Pixel += FRAME_WIDTH;
+ v0++;
}
}
#ifdef __arm__
#include "gpu_arm.h"
-void gpuDrawS16(void)
+/* Notaz 4bit sprites optimization */
+void gpuDrawS16(PtrUnion packet)
{
s32 x0, y0;
s32 u0, v0;
@@ -95,19 +92,22 @@ void gpuDrawS16(void)
s32 ymin, ymax;
u32 h = 16;
- x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]) + DrawingOffset[0];
- y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]) + DrawingOffset[1];
+ //NOTE: Must 11-bit sign-extend the whole sum here, not just packet X/Y,
+ // or sprites in 1st level of SkullMonkeys disappear when walking right.
+ // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon:
+ x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_unai.DrawingOffset[0]);
+ y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_unai.DrawingOffset[1]);
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
- u0 = PacketBuffer.U1[8];
- v0 = PacketBuffer.U1[9];
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
+ u0 = packet.U1[8];
+ v0 = packet.U1[9];
if (x0 > xmax - 16 || x0 < xmin ||
- ((u0 | v0) & 15) || !(TextureWindow[2] & TextureWindow[3] & 8)) {
+ ((u0 | v0) & 15) || !(gpu_unai.TextureWindow[2] & gpu_unai.TextureWindow[3] & 8)) {
// send corner cases to general handler
- PacketBuffer.U4[3] = 0x00100010;
- gpuDrawS(gpuSpriteSpanFn<0x20>);
+ packet.U4[3] = 0x00100010;
+ gpuDrawS(packet, gpuSpriteSpanFn<0x20>);
return;
}
@@ -121,54 +121,45 @@ void gpuDrawS16(void)
else if (ymax - y0 < 16)
h = ymax - y0;
- draw_spr16_full(&GPU_FrameBuffer[FRAME_OFFSET(x0, y0)], &TBA[FRAME_OFFSET(u0/4, v0)], CBA, h);
+ draw_spr16_full(&gpu_unai.vram[FRAME_OFFSET(x0, y0)], &gpu_unai.TBA[FRAME_OFFSET(u0/4, v0)], gpu_unai.CBA, h);
}
#endif // __arm__
-///////////////////////////////////////////////////////////////////////////////
-void gpuDrawT(const PT gpuTileSpanDriver)
+void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver)
{
- s32 x0, y0;
- s32 x1, y1;
-
- x1 = x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]) + DrawingOffset[0];
- y1 = y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]) + DrawingOffset[1];
- x1+= PacketBuffer.S2[4];
- y1+= PacketBuffer.S2[5];
-
- {
- s32 xmin, xmax;
- s32 ymin, ymax;
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
-
- {
- int rx0 = Max2(xmin,Min2(x0,x1));
- int ry0 = Max2(ymin,Min2(y0,y1));
- int rx1 = Min2(xmax,Max2(x0,x1));
- int ry1 = Min2(ymax,Max2(y0,y1));
- if(rx0>=rx1 || ry0>=ry1) return;
- }
-
- if (y0 < ymin) y0 = ymin;
- if (y1 > ymax) y1 = ymax;
- if (y1 <= y0) return;
-
- if (x0 < xmin) x0 = xmin;
- if (x1 > xmax) x1 = xmax;
- x1 -= x0;
- if (x1 <= 0) return;
- }
-
- {
- u16 *Pixel = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0, y0)];
- const u16 Data = GPU_RGB16(PacketBuffer.U4[0]);
- const int li=linesInterlace;
-
- for (; y0<y1; ++y0)
- {
- if( 0 == (y0&li) ) gpuTileSpanDriver(Pixel,x1,Data);
- Pixel += FRAME_WIDTH;
- }
+ s32 x0, x1, y0, y1;
+
+ // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon:
+ x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_unai.DrawingOffset[0]);
+ y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_unai.DrawingOffset[1]);
+
+ u32 w = packet.U2[4] & 0x3ff; // Max width is 1023
+ u32 h = packet.U2[5] & 0x1ff; // Max height is 511
+ x1 = x0 + w;
+ y1 = y0 + h;
+
+ s32 xmin, xmax, ymin, ymax;
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
+
+ if (y0 < ymin) y0 = ymin;
+ if (y1 > ymax) y1 = ymax;
+ if (y1 <= y0) return;
+
+ if (x0 < xmin) x0 = xmin;
+ if (x1 > xmax) x1 = xmax;
+ x1 -= x0;
+ if (x1 <= 0) return;
+
+ const u16 Data = GPU_RGB16(packet.U4[0]);
+ u16 *Pixel = &((u16*)gpu_unai.vram)[FRAME_OFFSET(x0, y0)];
+ const int li=gpu_unai.ilace_mask;
+ const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
+ const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+
+ for (; y0<y1; ++y0) {
+ if (!(y0&li) && (y0&pi)!=pif)
+ gpuTileSpanDriver(Pixel,x1,Data);
+ Pixel += FRAME_WIDTH;
}
}
diff --git a/plugins/gpu_unai/gpu_unai.h b/plugins/gpu_unai/gpu_unai.h
new file mode 100644
index 0000000..8fb2293
--- /dev/null
+++ b/plugins/gpu_unai/gpu_unai.h
@@ -0,0 +1,318 @@
+/***************************************************************************
+* Copyright (C) 2010 PCSX4ALL Team *
+* Copyright (C) 2010 Unai *
+* Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com) *
+* *
+* This program is free software; you can redistribute it and/or modify *
+* it under the terms of the GNU General Public License as published by *
+* the Free Software Foundation; either version 2 of the License, or *
+* (at your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+* GNU General Public License for more details. *
+* *
+* You should have received a copy of the GNU General Public License *
+* along with this program; if not, write to the *
+* Free Software Foundation, Inc., *
+* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
+***************************************************************************/
+
+#ifndef GPU_UNAI_H
+#define GPU_UNAI_H
+
+#include "gpu.h"
+
+// Header shared between both standalone gpu_unai (gpu.cpp) and new
+// gpulib-compatible gpu_unai (gpulib_if.cpp)
+// -> Anything here should be for gpu_unai's private use. <-
+
+///////////////////////////////////////////////////////////////////////////////
+// Compile Options
+
+//#define ENABLE_GPU_NULL_SUPPORT // Enables NullGPU support
+//#define ENABLE_GPU_LOG_SUPPORT // Enables gpu logger, very slow only for windows debugging
+//#define ENABLE_GPU_ARMV7 // Enables ARMv7 optimized assembly
+
+//Poly routine options (default is integer math and accurate division)
+//#define GPU_UNAI_USE_FLOATMATH // Use float math in poly routines
+//#define GPU_UNAI_USE_FLOAT_DIV_MULTINV // If GPU_UNAI_USE_FLOATMATH is defined,
+ // use multiply-by-inverse for division
+//#define GPU_UNAI_USE_INT_DIV_MULTINV // If GPU_UNAI_USE_FLOATMATH is *not*
+ // defined, use old inaccurate division
+
+
+#define GPU_INLINE static inline __attribute__((always_inline))
+#define INLINE static inline __attribute__((always_inline))
+
+#define u8 uint8_t
+#define s8 int8_t
+#define u16 uint16_t
+#define s16 int16_t
+#define u32 uint32_t
+#define s32 int32_t
+#define s64 int64_t
+
+union PtrUnion
+{
+ u32 *U4;
+ s32 *S4;
+ u16 *U2;
+ s16 *S2;
+ u8 *U1;
+ s8 *S1;
+ void *ptr;
+};
+
+union GPUPacket
+{
+ u32 U4[16];
+ s32 S4[16];
+ u16 U2[32];
+ s16 S2[32];
+ u8 U1[64];
+ s8 S1[64];
+};
+
+template<class T> static inline void SwapValues(T &x, T &y)
+{
+ T tmp(x); x = y; y = tmp;
+}
+
+template<typename T>
+static inline T Min2 (const T a, const T b)
+{
+ return (a<b)?a:b;
+}
+
+template<typename T>
+static inline T Min3 (const T a, const T b, const T c)
+{
+ return Min2(Min2(a,b),c);
+}
+
+template<typename T>
+static inline T Max2 (const T a, const T b)
+{
+ return (a>b)?a:b;
+}
+
+template<typename T>
+static inline T Max3 (const T a, const T b, const T c)
+{
+ return Max2(Max2(a,b),c);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GPU Raster Macros
+
+// Convert 24bpp color parameter of GPU command to 16bpp (15bpp + mask bit)
+#define GPU_RGB16(rgb) ((((rgb)&0xF80000)>>9)|(((rgb)&0xF800)>>6)|(((rgb)&0xF8)>>3))
+
+// Sign-extend 11-bit coordinate command param
+#define GPU_EXPANDSIGN(x) (((s32)(x)<<(32-11))>>(32-11))
+
+// Max difference between any two X or Y primitive coordinates
+#define CHKMAX_X 1024
+#define CHKMAX_Y 512
+
+#define FRAME_BUFFER_SIZE (1024*512*2)
+#define FRAME_WIDTH 1024
+#define FRAME_HEIGHT 512
+#define FRAME_OFFSET(x,y) (((y)<<10)+(x))
+#define FRAME_BYTE_STRIDE 2048
+#define FRAME_BYTES_PER_PIXEL 2
+
+static inline s32 GPU_DIV(s32 rs, s32 rt)
+{
+ return rt ? (rs / rt) : (0);
+}
+
+// 'Unsafe' version of above that doesn't check for div-by-zero
+#define GPU_FAST_DIV(rs, rt) ((signed)(rs) / (signed)(rt))
+
+struct gpu_unai_t {
+ u32 GPU_GP1;
+ GPUPacket PacketBuffer;
+ u16 *vram;
+
+ ////////////////////////////////////////////////////////////////////////////
+ // Variables used only by older standalone version of gpu_unai (gpu.cpp)
+#ifndef USE_GPULIB
+ u32 GPU_GP0;
+ u32 tex_window; // Current texture window vals (set by GP0(E2h) cmd)
+ s32 PacketCount;
+ s32 PacketIndex;
+ bool fb_dirty; // Framebuffer is dirty (according to GPU)
+
+ // Display status
+ // NOTE: Standalone older gpu_unai didn't care about horiz display range
+ u16 DisplayArea[6]; // [0] : Start of display area (in VRAM) X
+ // [1] : Start of display area (in VRAM) Y
+ // [2] : Display mode resolution HORIZONTAL
+ // [3] : Display mode resolution VERTICAL
+ // [4] : Vertical display range (on TV) START
+ // [5] : Vertical display range (on TV) END
+
+ ////////////////////////////////////////////////////////////////////////////
+ // Dma Transfers info
+ struct {
+ s32 px,py;
+ s32 x_end,y_end;
+ u16* pvram;
+ u32 *last_dma; // Last dma pointer
+ bool FrameToRead; // Load image in progress
+ bool FrameToWrite; // Store image in progress
+ } dma;
+
+ ////////////////////////////////////////////////////////////////////////////
+ // Frameskip
+ struct {
+ int skipCount; // Frame skip (0,1,2,3...)
+ bool isSkip; // Skip frame (according to GPU)
+ bool skipFrame; // Skip this frame (according to frame skip)
+ bool wasSkip; // Skip frame old value (according to GPU)
+ bool skipGPU; // Skip GPU primitives
+ } frameskip;
+#endif
+ // END of standalone gpu_unai variables
+ ////////////////////////////////////////////////////////////////////////////
+
+ u32 TextureWindowCur; // Current setting from last GP0(0xE2) cmd (raw form)
+ u8 TextureWindow[4]; // [0] : Texture window offset X
+ // [1] : Texture window offset Y
+ // [2] : Texture window mask X
+ // [3] : Texture window mask Y
+
+ u16 DrawingArea[4]; // [0] : Drawing area top left X
+ // [1] : Drawing area top left Y
+ // [2] : Drawing area bottom right X
+ // [3] : Drawing area bottom right Y
+
+ s16 DrawingOffset[2]; // [0] : Drawing offset X (signed)
+ // [1] : Drawing offset Y (signed)
+
+ u16* TBA; // Ptr to current texture in VRAM
+ u16* CBA; // Ptr to current CLUT in VRAM
+
+ ////////////////////////////////////////////////////////////////////////////
+ // Inner Loop parameters
+
+ // 22.10 Fixed-pt texture coords, mask, scanline advance
+ // NOTE: U,V are no longer packed together into one u32, this proved to be
+ // too imprecise, leading to pixel dropouts. Example: NFS3's skybox.
+ u32 u, v;
+ u32 u_msk, v_msk;
+ s32 u_inc, v_inc;
+
+ // Color for Gouraud-shaded prims
+ // Packed fixed-pt 8.3:8.3:8.2 rgb triplet
+ // layout: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
+ // ^ bit 31 ^ bit 0
+ u32 gCol;
+ u32 gInc; // Increment along scanline for gCol
+
+ // Color for flat-shaded, texture-blended prims
+ u8 r5, g5, b5; // 5-bit light for undithered prims
+ u8 r8, g8, b8; // 8-bit light for dithered prims
+
+ // Color for flat-shaded, untextured prims
+ u16 PixelData; // bgr555 color for untextured flat-shaded polys
+
+ // End of inner Loop parameters
+ ////////////////////////////////////////////////////////////////////////////
+
+
+ u8 blit_mask; // Determines what pixels to skip when rendering.
+ // Only useful on low-resolution devices using
+ // a simple pixel-dropping downscaler for PS1
+ // high-res modes. See 'pixel_skip' option.
+
+ u8 ilace_mask; // Determines what lines to skip when rendering.
+ // Normally 0 when PS1 240 vertical res is in
+ // use and ilace_force is 0. When running in
+ // PS1 480 vertical res on a low-resolution
+ // device (320x240), will usually be set to 1
+ // so odd lines are not rendered. (Unless future
+ // full-screen scaling option is in use ..TODO)
+
+ bool prog_ilace_flag; // Tracks successive frames for 'prog_ilace' option
+
+ u8 BLEND_MODE;
+ u8 TEXT_MODE;
+ u8 Masking;
+
+ u16 PixelMSB;
+
+ gpu_unai_config_t config;
+
+ u8 LightLUT[32*32]; // 5-bit lighting LUT (gpu_inner_light.h)
+ u32 DitherMatrix[64]; // Matrix of dither coefficients
+};
+
+static gpu_unai_t gpu_unai;
+
+// Global config that frontend can alter.. Values are read in GPU_init().
+// TODO: if frontend menu modifies a setting, add a function that can notify
+// GPU plugin to use new setting.
+gpu_unai_config_t gpu_unai_config_ext;
+
+///////////////////////////////////////////////////////////////////////////////
+// Internal inline funcs to get option status: (Allows flexibility)
+static inline bool LightingEnabled()
+{
+ return gpu_unai.config.lighting;
+}
+
+static inline bool FastLightingEnabled()
+{
+ return gpu_unai.config.fast_lighting;
+}
+
+static inline bool BlendingEnabled()
+{
+ return gpu_unai.config.blending;
+}
+
+static inline bool DitheringEnabled()
+{
+ return gpu_unai.config.dithering;
+}
+
+// For now, this is just for development/experimentation purposes..
+// If modified to return true, it will allow ignoring the status register
+// bit 9 setting (dither enable). It will still restrict dithering only
+// to Gouraud-shaded or texture-blended polys.
+static inline bool ForcedDitheringEnabled()
+{
+ return false;
+}
+
+static inline bool ProgressiveInterlaceEnabled()
+{
+#ifdef USE_GPULIB
+ // Using this old option greatly decreases quality of image. Disabled
+ // for now when using new gpulib, since it also adds more work in loops.
+ return false;
+#else
+ return gpu_unai.config.prog_ilace;
+#endif
+}
+
+// For now, 320x240 output resolution is assumed, using simple line-skipping
+// and pixel-skipping downscaler.
+// TODO: Flesh these out so they return useful values based on whether
+// running on higher-res device or a resampling downscaler is enabled.
+static inline bool PixelSkipEnabled()
+{
+ return gpu_unai.config.pixel_skip;
+}
+
+static inline bool LineSkipEnabled()
+{
+ return true;
+}
+
+#endif // GPU_UNAI_H
diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp
index e9a199c..8b5174e 100644
--- a/plugins/gpu_unai/gpulib_if.cpp
+++ b/plugins/gpu_unai/gpulib_if.cpp
@@ -2,6 +2,7 @@
* Copyright (C) 2010 PCSX4ALL Team *
* Copyright (C) 2010 Unai *
* Copyright (C) 2011 notaz *
+* Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com) *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -19,140 +20,81 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
***************************************************************************/
+#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../gpulib/gpu.h"
-#include "arm_features.h"
-
-#define u8 uint8_t
-#define s8 int8_t
-#define u16 uint16_t
-#define s16 int16_t
-#define u32 uint32_t
-#define s32 int32_t
-#define s64 int64_t
-
-#define INLINE static
-
-#define FRAME_BUFFER_SIZE (1024*512*2)
-#define FRAME_WIDTH 1024
-#define FRAME_HEIGHT 512
-#define FRAME_OFFSET(x,y) (((y)<<10)+(x))
-
-#define isSkip 0 /* skip frame (info coming from GPU) */
-#define alt_fps 0
-static int linesInterlace; /* internal lines interlace */
-static int force_interlace;
-
-static bool light = true; /* lighting */
-static bool blend = true; /* blending */
-static bool FrameToRead = false; /* load image in progress */
-static bool FrameToWrite = false; /* store image in progress */
-
-static bool enableAbbeyHack = false; /* Abe's Odyssey hack */
-
-static u8 BLEND_MODE;
-static u8 TEXT_MODE;
-static u8 Masking;
-
-static u16 PixelMSB;
-static u16 PixelData;
-
-///////////////////////////////////////////////////////////////////////////////
-// GPU Global data
-///////////////////////////////////////////////////////////////////////////////
-
-// Dma Transfers info
-static s32 px,py;
-static s32 x_end,y_end;
-static u16* pvram;
-
-static s32 PacketCount;
-static s32 PacketIndex;
-
-// Rasterizer status
-static u32 TextureWindow [4];
-static u32 DrawingArea [4];
-static u32 DrawingOffset [2];
-
-static u16* TBA;
-static u16* CBA;
-
-// Inner Loops
-static s32 u4, du4;
-static s32 v4, dv4;
-static s32 r4, dr4;
-static s32 g4, dg4;
-static s32 b4, db4;
-static u32 lInc;
-static u32 tInc, tMsk;
-
-union GPUPacket
-{
- u32 U4[16];
- s32 S4[16];
- u16 U2[32];
- s16 S2[32];
- u8 U1[64];
- s8 S1[64];
-};
-
-static GPUPacket PacketBuffer;
-static u16 *GPU_FrameBuffer;
-static u32 GPU_GP1;
-
-///////////////////////////////////////////////////////////////////////////////
-
-#include "../gpu_unai/gpu_fixedpoint.h"
-
-// Inner loop driver instanciation file
-#include "../gpu_unai/gpu_inner.h"
-
-// GPU Raster Macros
-#define GPU_RGB16(rgb) ((((rgb)&0xF80000)>>9)|(((rgb)&0xF800)>>6)|(((rgb)&0xF8)>>3))
+//#include "port.h"
+#include "gpu_unai.h"
-#define GPU_EXPANDSIGN(x) (((s32)(x)<<21)>>21)
+// GPU fixed point math
+#include "gpu_fixedpoint.h"
-#define CHKMAX_X 1024
-#define CHKMAX_Y 512
-
-#define GPU_SWAP(a,b,t) {(t)=(a);(a)=(b);(b)=(t);}
+// Inner loop driver instantiation file
+#include "gpu_inner.h"
// GPU internal image drawing functions
-#include "../gpu_unai/gpu_raster_image.h"
+#include "gpu_raster_image.h"
// GPU internal line drawing functions
-#include "../gpu_unai/gpu_raster_line.h"
+#include "gpu_raster_line.h"
// GPU internal polygon drawing functions
-#include "../gpu_unai/gpu_raster_polygon.h"
+#include "gpu_raster_polygon.h"
// GPU internal sprite drawing functions
-#include "../gpu_unai/gpu_raster_sprite.h"
+#include "gpu_raster_sprite.h"
// GPU command buffer execution/store
-#include "../gpu_unai/gpu_command.h"
+#include "gpu_command.h"
/////////////////////////////////////////////////////////////////////////////
int renderer_init(void)
{
- GPU_FrameBuffer = (u16 *)gpu.vram;
-
- // s_invTable
- for(int i=1;i<=(1<<TABLE_BITS);++i)
- {
- double v = 1.0 / double(i);
- #ifdef GPU_TABLE_10_BITS
- v *= double(0xffffffff>>1);
- #else
- v *= double(0x80000000);
- #endif
- s_invTable[i-1]=s32(v);
- }
-
- return 0;
+ memset((void*)&gpu_unai, 0, sizeof(gpu_unai));
+ gpu_unai.vram = (u16*)gpu.vram;
+
+ // Original standalone gpu_unai initialized TextureWindow[]. I added the
+ // same behavior here, since it seems unsafe to leave [2],[3] unset when
+ // using HLE and Rearmed gpu_neon sets this similarly on init. -senquack
+ gpu_unai.TextureWindow[0] = 0;
+ gpu_unai.TextureWindow[1] = 0;
+ gpu_unai.TextureWindow[2] = 255;
+ gpu_unai.TextureWindow[3] = 255;
+ //senquack - new vars must be updated whenever texture window is changed:
+ // (used for polygon-drawing in gpu_inner.h, gpu_raster_polygon.h)
+ const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4
+ gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1);
+ gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1);
+
+ // Configuration options
+ gpu_unai.config = gpu_unai_config_ext;
+ //senquack - disabled, not sure this is needed and would require modifying
+ // sprite-span functions, perhaps unnecessarily. No Abe Oddysey hack was
+ // present in latest PCSX4ALL sources we were using.
+ //gpu_unai.config.enableAbbeyHack = gpu_unai_config_ext.abe_hack;
+ gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
+
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ // s_invTable
+ for(int i=1;i<=(1<<TABLE_BITS);++i)
+ {
+ double v = 1.0 / double(i);
+#ifdef GPU_TABLE_10_BITS
+ v *= double(0xffffffff>>1);
+#else
+ v *= double(0x80000000);
+#endif
+ s_invTable[i-1]=s32(v);
+ }
+#endif
+
+ SetupLightLUT();
+ SetupDitheringConstants();
+
+ return 0;
}
void renderer_finish(void)
@@ -161,6 +103,111 @@ void renderer_finish(void)
void renderer_notify_res_change(void)
{
+ if (PixelSkipEnabled()) {
+ // Set blit_mask for high horizontal resolutions. This allows skipping
+ // rendering pixels that would never get displayed on low-resolution
+ // platforms that use simple pixel-dropping scaler.
+
+ switch (gpu.screen.hres)
+ {
+ case 512: gpu_unai.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS
+ case 640: gpu_unai.blit_mask = 0xaa; break; // GPU_BlitWS
+ default: gpu_unai.blit_mask = 0; break;
+ }
+ } else {
+ gpu_unai.blit_mask = 0;
+ }
+
+ if (LineSkipEnabled()) {
+ // Set rendering line-skip (only render every other line in high-res
+ // 480 vertical mode, or, optionally, force it for all video modes)
+
+ if (gpu.screen.vres == 480) {
+ if (gpu_unai.config.ilace_force) {
+ gpu_unai.ilace_mask = 3; // Only need 1/4 of lines
+ } else {
+ gpu_unai.ilace_mask = 1; // Only need 1/2 of lines
+ }
+ } else {
+ // Vert resolution changed from 480 to lower one
+ gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
+ }
+ } else {
+ gpu_unai.ilace_mask = 0;
+ }
+
+ /*
+ printf("res change hres: %d vres: %d depth: %d ilace_mask: %d\n",
+ gpu.screen.hres, gpu.screen.vres, gpu.status.rgb24 ? 24 : 15,
+ gpu_unai.ilace_mask);
+ */
+}
+
+// Handles GP0 draw settings commands 0xE1...0xE6
+static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word)
+{
+ // Assume incoming GP0 command is 0xE1..0xE6, convert to 1..6
+ u8 num = (cmd_word >> 24) & 7;
+ gpu.ex_regs[num] = cmd_word; // Update gpulib register
+ switch (num) {
+ case 1: {
+ // GP0(E1h) - Draw Mode setting (aka "Texpage")
+ u32 cur_texpage = gpu_unai.GPU_GP1 & 0x7FF;
+ u32 new_texpage = cmd_word & 0x7FF;
+ if (cur_texpage != new_texpage) {
+ gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x7FF) | new_texpage;
+ gpuSetTexture(gpu_unai.GPU_GP1);
+ }
+ } break;
+
+ case 2: {
+ // GP0(E2h) - Texture Window setting
+ if (cmd_word != gpu_unai.TextureWindowCur) {
+ static const u8 TextureMask[32] = {
+ 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7,
+ 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7
+ };
+ gpu_unai.TextureWindowCur = cmd_word;
+ gpu_unai.TextureWindow[0] = ((cmd_word >> 10) & 0x1F) << 3;
+ gpu_unai.TextureWindow[1] = ((cmd_word >> 15) & 0x1F) << 3;
+ gpu_unai.TextureWindow[2] = TextureMask[(cmd_word >> 0) & 0x1F];
+ gpu_unai.TextureWindow[3] = TextureMask[(cmd_word >> 5) & 0x1F];
+ gpu_unai.TextureWindow[0] &= ~gpu_unai.TextureWindow[2];
+ gpu_unai.TextureWindow[1] &= ~gpu_unai.TextureWindow[3];
+
+ // Inner loop vars must be updated whenever texture window is changed:
+ const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4
+ gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1);
+ gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1);
+
+ gpuSetTexture(gpu_unai.GPU_GP1);
+ }
+ } break;
+
+ case 3: {
+ // GP0(E3h) - Set Drawing Area top left (X1,Y1)
+ gpu_unai.DrawingArea[0] = cmd_word & 0x3FF;
+ gpu_unai.DrawingArea[1] = (cmd_word >> 10) & 0x3FF;
+ } break;
+
+ case 4: {
+ // GP0(E4h) - Set Drawing Area bottom right (X2,Y2)
+ gpu_unai.DrawingArea[2] = (cmd_word & 0x3FF) + 1;
+ gpu_unai.DrawingArea[3] = ((cmd_word >> 10) & 0x3FF) + 1;
+ } break;
+
+ case 5: {
+ // GP0(E5h) - Set Drawing Offset (X,Y)
+ gpu_unai.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11);
+ gpu_unai.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11);
+ } break;
+
+ case 6: {
+ // GP0(E6h) - Mask Bit Setting
+ gpu_unai.Masking = (cmd_word & 0x2) << 1;
+ gpu_unai.PixelMSB = (cmd_word & 0x1) << 8;
+ } break;
+ }
}
extern const unsigned char cmd_lengths[256];
@@ -171,9 +218,12 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
u32 *list_start = list;
u32 *list_end = list + list_len;
- linesInterlace = force_interlace;
+ //TODO: set ilace_mask when resolution changes instead of every time,
+ // eliminate #ifdef below.
+ gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
+
#ifdef HAVE_PRE_ARMV7 /* XXX */
- linesInterlace |= gpu.status.interlace;
+ gpu_unai.ilace_mask |= gpu.status.interlace;
#endif
for (; list < list_end; list += 1 + len)
@@ -186,126 +236,175 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
}
#define PRIM cmd
- PacketBuffer.U4[0] = list[0];
+ gpu_unai.PacketBuffer.U4[0] = list[0];
for (i = 1; i <= len; i++)
- PacketBuffer.U4[i] = list[i];
+ gpu_unai.PacketBuffer.U4[i] = list[i];
+
+ PtrUnion packet = { .ptr = (void*)&gpu_unai.PacketBuffer };
switch (cmd)
{
case 0x02:
- gpuClearImage();
+ gpuClearImage(packet);
break;
case 0x20:
case 0x21:
case 0x22:
- case 0x23:
- gpuDrawF3(gpuPolySpanDrivers [Blending_Mode | Masking | Blending | PixelMSB]);
- break;
+ case 0x23: { // Monochrome 3-pt poly
+ PP driver = gpuPolySpanDrivers[
+ (gpu_unai.blit_mask?1024:0) |
+ Blending_Mode |
+ gpu_unai.Masking | Blending | gpu_unai.PixelMSB
+ ];
+ gpuDrawPolyF(packet, driver, false);
+ } break;
case 0x24:
case 0x25:
case 0x26:
- case 0x27:
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture(PacketBuffer.U4[4] >> 16);
- if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
- gpuDrawFT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | PixelMSB]);
- else
- gpuDrawFT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | PixelMSB]);
- break;
+ case 0x27: { // Textured 3-pt poly
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuSetTexture(gpu_unai.PacketBuffer.U4[4] >> 16);
+
+ u32 driver_idx =
+ (gpu_unai.blit_mask?1024:0) |
+ Dithering |
+ Blending_Mode | gpu_unai.TEXT_MODE |
+ gpu_unai.Masking | Blending | gpu_unai.PixelMSB;
+
+ if (!FastLightingEnabled()) {
+ driver_idx |= Lighting;
+ } else {
+ if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)))
+ driver_idx |= Lighting;
+ }
+
+ PP driver = gpuPolySpanDrivers[driver_idx];
+ gpuDrawPolyFT(packet, driver, false);
+ } break;
case 0x28:
case 0x29:
case 0x2A:
- case 0x2B: {
- const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | Masking | Blending | PixelMSB];
- gpuDrawF3(gpuPolySpanDriver);
- PacketBuffer.U4[1] = PacketBuffer.U4[4];
- gpuDrawF3(gpuPolySpanDriver);
- break;
- }
+ case 0x2B: { // Monochrome 4-pt poly
+ PP driver = gpuPolySpanDrivers[
+ (gpu_unai.blit_mask?1024:0) |
+ Blending_Mode |
+ gpu_unai.Masking | Blending | gpu_unai.PixelMSB
+ ];
+ gpuDrawPolyF(packet, driver, true); // is_quad = true
+ } break;
case 0x2C:
case 0x2D:
case 0x2E:
- case 0x2F: {
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture(PacketBuffer.U4[4] >> 16);
- PP gpuPolySpanDriver;
- if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
- gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | PixelMSB];
- else
- gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | PixelMSB];
- gpuDrawFT3(gpuPolySpanDriver);
- PacketBuffer.U4[1] = PacketBuffer.U4[7];
- PacketBuffer.U4[2] = PacketBuffer.U4[8];
- gpuDrawFT3(gpuPolySpanDriver);
- break;
- }
+ case 0x2F: { // Textured 4-pt poly
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuSetTexture(gpu_unai.PacketBuffer.U4[4] >> 16);
+
+ u32 driver_idx =
+ (gpu_unai.blit_mask?1024:0) |
+ Dithering |
+ Blending_Mode | gpu_unai.TEXT_MODE |
+ gpu_unai.Masking | Blending | gpu_unai.PixelMSB;
+
+ if (!FastLightingEnabled()) {
+ driver_idx |= Lighting;
+ } else {
+ if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)))
+ driver_idx |= Lighting;
+ }
+
+ PP driver = gpuPolySpanDrivers[driver_idx];
+ gpuDrawPolyFT(packet, driver, true); // is_quad = true
+ } break;
case 0x30:
case 0x31:
case 0x32:
- case 0x33:
- gpuDrawG3(gpuPolySpanDrivers [Blending_Mode | Masking | Blending | 129 | PixelMSB]);
- break;
+ case 0x33: { // Gouraud-shaded 3-pt poly
+ //NOTE: The '129' here is CF_GOURAUD | CF_LIGHT, however
+ // this is an untextured poly, so CF_LIGHT (texture blend)
+ // shouldn't apply. Until the original array of template
+ // instantiation ptrs is fixed, we're stuck with this. (TODO)
+ PP driver = gpuPolySpanDrivers[
+ (gpu_unai.blit_mask?1024:0) |
+ Dithering |
+ Blending_Mode |
+ gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
+ ];
+ gpuDrawPolyG(packet, driver, false);
+ } break;
case 0x34:
case 0x35:
case 0x36:
- case 0x37:
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (PacketBuffer.U4[5] >> 16);
- gpuDrawGT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | ((Lighting)?129:0) | PixelMSB]);
- break;
+ case 0x37: { // Gouraud-shaded, textured 3-pt poly
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16);
+ PP driver = gpuPolySpanDrivers[
+ (gpu_unai.blit_mask?1024:0) |
+ Dithering |
+ Blending_Mode | gpu_unai.TEXT_MODE |
+ gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
+ ];
+ gpuDrawPolyGT(packet, driver, false);
+ } break;
case 0x38:
case 0x39:
case 0x3A:
- case 0x3B: {
- const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | Masking | Blending | 129 | PixelMSB];
- gpuDrawG3(gpuPolySpanDriver);
- PacketBuffer.U4[0] = PacketBuffer.U4[6];
- PacketBuffer.U4[1] = PacketBuffer.U4[7];
- gpuDrawG3(gpuPolySpanDriver);
- break;
- }
+ case 0x3B: { // Gouraud-shaded 4-pt poly
+ // See notes regarding '129' for 0x30..0x33 further above -senquack
+ PP driver = gpuPolySpanDrivers[
+ (gpu_unai.blit_mask?1024:0) |
+ Dithering |
+ Blending_Mode |
+ gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
+ ];
+ gpuDrawPolyG(packet, driver, true); // is_quad = true
+ } break;
case 0x3C:
case 0x3D:
case 0x3E:
- case 0x3F: {
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (PacketBuffer.U4[5] >> 16);
- const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | ((Lighting)?129:0) | PixelMSB];
- gpuDrawGT3(gpuPolySpanDriver);
- PacketBuffer.U4[0] = PacketBuffer.U4[9];
- PacketBuffer.U4[1] = PacketBuffer.U4[10];
- PacketBuffer.U4[2] = PacketBuffer.U4[11];
- gpuDrawGT3(gpuPolySpanDriver);
- break;
- }
+ case 0x3F: { // Gouraud-shaded, textured 4-pt poly
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16);
+ PP driver = gpuPolySpanDrivers[
+ (gpu_unai.blit_mask?1024:0) |
+ Dithering |
+ Blending_Mode | gpu_unai.TEXT_MODE |
+ gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
+ ];
+ gpuDrawPolyGT(packet, driver, true); // is_quad = true
+ } break;
case 0x40:
case 0x41:
case 0x42:
- case 0x43:
- gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
- break;
-
- case 0x48 ... 0x4F:
- {
+ case 0x43: { // Monochrome line
+ // Shift index right by one, as untextured prims don't use lighting
+ u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
+ PSD driver = gpuPixelSpanDrivers[driver_idx];
+ gpuDrawLineF(packet, driver);
+ } break;
+
+ case 0x48 ... 0x4F: { // Monochrome line strip
u32 num_vertexes = 1;
u32 *list_position = &(list[2]);
- gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
+ // Shift index right by one, as untextured prims don't use lighting
+ u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
+ PSD driver = gpuPixelSpanDrivers[driver_idx];
+ gpuDrawLineF(packet, driver);
while(1)
{
- PacketBuffer.U4[1] = PacketBuffer.U4[2];
- PacketBuffer.U4[2] = *list_position++;
- gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
+ gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2];
+ gpu_unai.PacketBuffer.U4[2] = *list_position++;
+ gpuDrawLineF(packet, driver);
num_vertexes++;
if(list_position >= list_end) {
@@ -317,30 +416,38 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
}
len += (num_vertexes - 2);
- break;
- }
+ } break;
case 0x50:
case 0x51:
case 0x52:
- case 0x53:
- gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
- break;
-
- case 0x58 ... 0x5F:
- {
+ case 0x53: { // Gouraud-shaded line
+ // Shift index right by one, as untextured prims don't use lighting
+ u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
+ // Index MSB selects Gouraud-shaded PixelSpanDriver:
+ driver_idx |= (1 << 5);
+ PSD driver = gpuPixelSpanDrivers[driver_idx];
+ gpuDrawLineG(packet, driver);
+ } break;
+
+ case 0x58 ... 0x5F: { // Gouraud-shaded line strip
u32 num_vertexes = 1;
u32 *list_position = &(list[2]);
- gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
+ // Shift index right by one, as untextured prims don't use lighting
+ u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
+ // Index MSB selects Gouraud-shaded PixelSpanDriver:
+ driver_idx |= (1 << 5);
+ PSD driver = gpuPixelSpanDrivers[driver_idx];
+ gpuDrawLineG(packet, driver);
while(1)
{
- PacketBuffer.U4[0] = PacketBuffer.U4[2];
- PacketBuffer.U4[1] = PacketBuffer.U4[3];
- PacketBuffer.U4[2] = *list_position++;
- PacketBuffer.U4[3] = *list_position++;
- gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
+ gpu_unai.PacketBuffer.U4[0] = gpu_unai.PacketBuffer.U4[2];
+ gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[3];
+ gpu_unai.PacketBuffer.U4[2] = *list_position++;
+ gpu_unai.PacketBuffer.U4[3] = *list_position++;
+ gpuDrawLineG(packet, driver);
num_vertexes++;
if(list_position >= list_end) {
@@ -352,91 +459,116 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
}
len += (num_vertexes - 2) * 2;
- break;
- }
+ } break;
case 0x60:
case 0x61:
case 0x62:
- case 0x63:
- gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]);
- break;
+ case 0x63: { // Monochrome rectangle (variable size)
+ PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+ gpuDrawT(packet, driver);
+ } break;
case 0x64:
case 0x65:
case 0x66:
- case 0x67:
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (GPU_GP1);
- if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
- gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]);
- else
- gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]);
- break;
+ case 0x67: { // Textured rectangle (variable size)
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
+
+ //senquack - Only color 808080h-878787h allows skipping lighting calculation:
+ // This fixes Silent Hill running animation on loading screens:
+ // (On PSX, color values 0x00-0x7F darken the source texture's color,
+ // 0x81-FF lighten textures (ultimately clamped to 0x1F),
+ // 0x80 leaves source texture color unchanged, HOWEVER,
+ // gpu_unai uses a simple lighting LUT whereby only the upper
+ // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as
+ // 0x80.
+ //
+ // NOTE: I've changed all textured sprite draw commands here and
+ // elsewhere to use proper behavior, but left poly commands
+ // alone, I don't want to slow rendering down too much. (TODO)
+ //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
+ // Strip lower 3 bits of each color and determine if lighting should be used:
+ if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+ driver_idx |= Lighting;
+ PS driver = gpuSpriteSpanDrivers[driver_idx];
+ gpuDrawS(packet, driver);
+ } break;
case 0x68:
case 0x69:
case 0x6A:
- case 0x6B:
- PacketBuffer.U4[2] = 0x00010001;
- gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]);
- break;
+ case 0x6B: { // Monochrome rectangle (1x1 dot)
+ gpu_unai.PacketBuffer.U4[2] = 0x00010001;
+ PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+ gpuDrawT(packet, driver);
+ } break;
case 0x70:
case 0x71:
case 0x72:
- case 0x73:
- PacketBuffer.U4[2] = 0x00080008;
- gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]);
- break;
+ case 0x73: { // Monochrome rectangle (8x8)
+ gpu_unai.PacketBuffer.U4[2] = 0x00080008;
+ PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+ gpuDrawT(packet, driver);
+ } break;
case 0x74:
case 0x75:
case 0x76:
- case 0x77:
- PacketBuffer.U4[3] = 0x00080008;
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (GPU_GP1);
- if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
- gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]);
- else
- gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]);
- break;
+ case 0x77: { // Textured rectangle (8x8)
+ gpu_unai.PacketBuffer.U4[3] = 0x00080008;
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
+
+ //senquack - Only color 808080h-878787h allows skipping lighting calculation:
+ //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
+ // Strip lower 3 bits of each color and determine if lighting should be used:
+ if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+ driver_idx |= Lighting;
+ PS driver = gpuSpriteSpanDrivers[driver_idx];
+ gpuDrawS(packet, driver);
+ } break;
case 0x78:
case 0x79:
case 0x7A:
- case 0x7B:
- PacketBuffer.U4[2] = 0x00100010;
- gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]);
- break;
+ case 0x7B: { // Monochrome rectangle (16x16)
+ gpu_unai.PacketBuffer.U4[2] = 0x00100010;
+ PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+ gpuDrawT(packet, driver);
+ } break;
case 0x7C:
case 0x7D:
#ifdef __arm__
- if ((GPU_GP1 & 0x180) == 0 && (Masking | PixelMSB) == 0)
+ if ((gpu_unai.GPU_GP1 & 0x180) == 0 && (gpu_unai.Masking | gpu_unai.PixelMSB) == 0)
{
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (GPU_GP1);
- gpuDrawS16();
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuDrawS16(packet);
break;
}
// fallthrough
#endif
case 0x7E:
- case 0x7F:
- PacketBuffer.U4[3] = 0x00100010;
- gpuSetCLUT (PacketBuffer.U4[2] >> 16);
- gpuSetTexture (GPU_GP1);
- if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
- gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]);
- else
- gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]);
- break;
+ case 0x7F: { // Textured rectangle (16x16)
+ gpu_unai.PacketBuffer.U4[3] = 0x00100010;
+ gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
+ //senquack - Only color 808080h-878787h allows skipping lighting calculation:
+ //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
+ // Strip lower 3 bits of each color and determine if lighting should be used:
+ if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+ driver_idx |= Lighting;
+ PS driver = gpuSpriteSpanDrivers[driver_idx];
+ gpuDrawS(packet, driver);
+ } break;
case 0x80: // vid -> vid
- gpuMoveImage(); // prim handles updateLace && skip
+ gpuMoveImage(packet);
break;
+
#ifdef TEST
case 0xA0: // sys -> vid
{
@@ -445,70 +577,25 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
u32 load_size = load_width * load_height;
len += load_size / 2;
- break;
- }
+ } break;
+
case 0xC0:
break;
#else
case 0xA0: // sys ->vid
case 0xC0: // vid -> sys
+ // Handled by gpulib
goto breakloop;
#endif
- case 0xE1: {
- const u32 temp = PacketBuffer.U4[0];
- GPU_GP1 = (GPU_GP1 & ~0x000007FF) | (temp & 0x000007FF);
- gpuSetTexture(temp);
- gpu.ex_regs[1] = temp;
- break;
- }
- case 0xE2: {
- static const u8 TextureMask[32] = {
- 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7,
- 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7
- };
- const u32 temp = PacketBuffer.U4[0];
- TextureWindow[0] = ((temp >> 10) & 0x1F) << 3;
- TextureWindow[1] = ((temp >> 15) & 0x1F) << 3;
- TextureWindow[2] = TextureMask[(temp >> 0) & 0x1F];
- TextureWindow[3] = TextureMask[(temp >> 5) & 0x1F];
- gpuSetTexture(GPU_GP1);
- gpu.ex_regs[2] = temp;
- break;
- }
- case 0xE3: {
- const u32 temp = PacketBuffer.U4[0];
- DrawingArea[0] = temp & 0x3FF;
- DrawingArea[1] = (temp >> 10) & 0x3FF;
- gpu.ex_regs[3] = temp;
- break;
- }
- case 0xE4: {
- const u32 temp = PacketBuffer.U4[0];
- DrawingArea[2] = (temp & 0x3FF) + 1;
- DrawingArea[3] = ((temp >> 10) & 0x3FF) + 1;
- gpu.ex_regs[4] = temp;
- break;
- }
- case 0xE5: {
- const u32 temp = PacketBuffer.U4[0];
- DrawingOffset[0] = ((s32)temp<<(32-11))>>(32-11);
- DrawingOffset[1] = ((s32)temp<<(32-22))>>(32-11);
- gpu.ex_regs[5] = temp;
- break;
- }
- case 0xE6: {
- const u32 temp = PacketBuffer.U4[0];
- Masking = (temp & 0x2) << 1;
- PixelMSB =(temp & 0x1) << 8;
- gpu.ex_regs[6] = temp;
- break;
- }
+ case 0xE1 ... 0xE6: { // Draw settings
+ gpuGP0Cmd_0xEx(gpu_unai, gpu_unai.PacketBuffer.U4[0]);
+ } break;
}
}
breakloop:
gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= GPU_GP1 & 0x1ff;
+ gpu.ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff;
*last_cmd = cmd;
return list - list_start;
@@ -532,20 +619,17 @@ void renderer_set_interlace(int enable, int is_odd)
{
}
-#ifndef TEST
-
#include "../../frontend/plugin_lib.h"
-
+// Handle any gpulib settings applicable to gpu_unai:
void renderer_set_config(const struct rearmed_cbs *cbs)
{
- force_interlace = cbs->gpu_unai.lineskip;
- enableAbbeyHack = cbs->gpu_unai.abe_hack;
- light = !cbs->gpu_unai.no_light;
- blend = !cbs->gpu_unai.no_blend;
-
- GPU_FrameBuffer = (u16 *)gpu.vram;
+ gpu_unai.vram = (u16*)gpu.vram;
+ gpu_unai.config.ilace_force = cbs->gpu_unai.ilace_force;
+ gpu_unai.config.pixel_skip = cbs->gpu_unai.pixel_skip;
+ gpu_unai.config.lighting = cbs->gpu_unai.lighting;
+ gpu_unai.config.fast_lighting = cbs->gpu_unai.fast_lighting;
+ gpu_unai.config.blending = cbs->gpu_unai.blending;
+ gpu_unai.config.dithering = cbs->gpu_unai.dithering;
}
-#endif
-
// vim:shiftwidth=2:expandtab