aboutsummaryrefslogtreecommitdiff
path: root/plugins/gpu_unai/gpu_raster_polygon.h
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/gpu_unai/gpu_raster_polygon.h')
-rw-r--r--plugins/gpu_unai/gpu_raster_polygon.h1997
1 files changed, 1348 insertions, 649 deletions
diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h
index c4b0350..f66a9e2 100644
--- a/plugins/gpu_unai/gpu_raster_polygon.h
+++ b/plugins/gpu_unai/gpu_raster_polygon.h
@@ -18,732 +18,1431 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
***************************************************************************/
-#define GPU_TESTRANGE3() \
-{ \
- if(x0<0) { if((x1-x0)>CHKMAX_X) return; if((x2-x0)>CHKMAX_X) return; } \
- if(x1<0) { if((x0-x1)>CHKMAX_X) return; if((x2-x1)>CHKMAX_X) return; } \
- if(x2<0) { if((x0-x2)>CHKMAX_X) return; if((x1-x2)>CHKMAX_X) return; } \
- if(y0<0) { if((y1-y0)>CHKMAX_Y) return; if((y2-y0)>CHKMAX_Y) return; } \
- if(y1<0) { if((y0-y1)>CHKMAX_Y) return; if((y2-y1)>CHKMAX_Y) return; } \
- if(y2<0) { if((y0-y2)>CHKMAX_Y) return; if((y1-y2)>CHKMAX_Y) return; } \
-}
+//senquack - NOTE: GPU Unai poly routines have been rewritten/adapted
+// from DrHell routines to fix multiple issues. See README_senquack.txt
///////////////////////////////////////////////////////////////////////////////
-// GPU internal polygon drawing functions
+// Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type.
+///////////////////////////////////////////////////////////////////////////////
+struct PolyVertex {
+ s32 x, y; // Sign-extended 11-bit X,Y coords
+ union {
+ struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
+ u32 tex_word;
+ };
+ union {
+ struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
+ u32 col_word;
+ };
+};
+
+enum PolyAttribute {
+ POLYATTR_TEXTURE = (1 << 0),
+ POLYATTR_GOURAUD = (1 << 1)
+};
+
+enum PolyType {
+ POLYTYPE_F = 0,
+ POLYTYPE_FT = (POLYATTR_TEXTURE),
+ POLYTYPE_G = (POLYATTR_GOURAUD),
+ POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD)
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// polyInitVertexBuffer()
+// Fills vbuf[] array with data from any type of poly draw-command packet.
///////////////////////////////////////////////////////////////////////////////
-void gpuDrawF3(const PP gpuPolySpanDriver)
+static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad)
{
- const int li=linesInterlace;
- s32 temp;
- s32 xa, xb, xmin, xmax;
- s32 ya, yb, ymin, ymax;
- s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
- s32 y0, y1, y2;
+ bool texturing = ptype & POLYATTR_TEXTURE;
+ bool gouraud = ptype & POLYATTR_GOURAUD;
+
+ int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words
+ if (texturing)
+ vert_stride++;
+ if (gouraud)
+ vert_stride++;
+
+ int num_verts = (is_quad) ? 4 : 3;
+ u32 *ptr;
+
+ // X,Y coords, adjusted by draw offsets
+ s32 x_off = gpu_unai.DrawingOffset[0];
+ s32 y_off = gpu_unai.DrawingOffset[1];
+ ptr = &packet.U4[1];
+ for (int i=0; i < num_verts; ++i, ptr += vert_stride) {
+ s16* coord_ptr = (s16*)ptr;
+ vbuf[i].x = GPU_EXPANDSIGN(coord_ptr[0]) + x_off;
+ vbuf[i].y = GPU_EXPANDSIGN(coord_ptr[1]) + y_off;
+ }
- x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]);
- y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]);
- x1 = GPU_EXPANDSIGN(PacketBuffer.S2[4]);
- y1 = GPU_EXPANDSIGN(PacketBuffer.S2[5]);
- x2 = GPU_EXPANDSIGN(PacketBuffer.S2[6]);
- y2 = GPU_EXPANDSIGN(PacketBuffer.S2[7]);
+ // U,V texture coords (if applicable)
+ if (texturing) {
+ ptr = &packet.U4[2];
+ for (int i=0; i < num_verts; ++i, ptr += vert_stride)
+ vbuf[i].tex_word = *ptr;
+ }
- GPU_TESTRANGE3();
+ // Colors (if applicable)
+ if (gouraud) {
+ ptr = &packet.U4[0];
+ for (int i=0; i < num_verts; ++i, ptr += vert_stride)
+ vbuf[i].col_word = *ptr;
+ }
+}
- x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
- y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
+///////////////////////////////////////////////////////////////////////////////
+// Helper functions to determine which vertex in a 2 or 3 vertex array
+// has the highest/lowest X/Y coordinate.
+// Note: the comparison logic is such that, given a set of vertices with
+// identical values for a given coordinate, a different index will be
+// returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..().
+// This ensures that, during the vertex-ordering phase of rasterization,
+// all three vertices remain unique.
+///////////////////////////////////////////////////////////////////////////////
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
+template<typename T>
+static inline int vertIdxOfLeastXCoord2(const T *Tptr)
+{
+ return (Tptr[0].x <= Tptr[1].x) ? 0 : 1;
+}
- {
- int rx0 = Max2(xmin,Min3(x0,x1,x2));
- int ry0 = Max2(ymin,Min3(y0,y1,y2));
- int rx1 = Min2(xmax,Max3(x0,x1,x2));
- int ry1 = Min2(ymax,Max3(y0,y1,y2));
- if( rx0>=rx1 || ry0>=ry1) return;
- }
-
- PixelData = GPU_RGB16(PacketBuffer.U4[0]);
+template<typename T>
+static inline int vertIdxOfLeastXCoord3(const T *Tptr)
+{
+ int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr);
+ return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2;
+}
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp);
- GPU_SWAP(y0, y1, temp);
- }
- }
- if (y1 >= y2)
- {
- if( y1!=y2 || x1>x2 )
- {
- GPU_SWAP(x1, x2, temp);
- GPU_SWAP(y1, y2, temp);
- }
- }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp);
- GPU_SWAP(y0, y1, temp);
- }
- }
+template<typename T>
+static inline int vertIdxOfLeastYCoord2(const T *Tptr)
+{
+ return (Tptr[0].y <= Tptr[1].y) ? 0 : 1;
+}
- ya = y2 - y0;
- yb = y2 - y1;
- dx =(x2 - x1) * ya - (x2 - x0) * yb;
+template<typename T>
+static inline int vertIdxOfLeastYCoord3(const T *Tptr)
+{
+ int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr);
+ return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2;
+}
+
+template<typename T>
+static inline int vertIdxOfHighestXCoord2(const T *Tptr)
+{
+ return (Tptr[1].x >= Tptr[0].x) ? 1 : 0;
+}
+
+template<typename T>
+static inline int vertIdxOfHighestXCoord3(const T *Tptr)
+{
+ int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr);
+ return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1;
+}
+
+template<typename T>
+static inline int vertIdxOfHighestYCoord2(const T *Tptr)
+{
+ return (Tptr[1].y >= Tptr[0].y) ? 1 : 0;
+}
+
+template<typename T>
+static inline int vertIdxOfHighestYCoord3(const T *Tptr)
+{
+ int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr);
+ return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1;
+}
- for (s32 loop0 = 2; loop0; --loop0)
+///////////////////////////////////////////////////////////////////////////////
+// polyUseTriangle()
+// Determines if the specified triangle should be rendered. If so, it
+// fills the given array of vertex pointers, vert_ptrs, in order of
+// increasing Y coordinate values, as required by rasterization algorithm.
+// Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]),
+// or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]).
+// Returns true if triangle should be rendered, false if not.
+///////////////////////////////////////////////////////////////////////////////
+static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs)
+{
+ // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)?
+ const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1];
+
+ // Get indices of highest/lowest X,Y coords within triangle
+ int idx_lowest_x = vertIdxOfLeastXCoord3(tri_ptr);
+ int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr);
+ int idx_lowest_y = vertIdxOfLeastYCoord3(tri_ptr);
+ int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr);
+
+ // Maximum absolute distance between any two X coordinates is 1023,
+ // and for Y coordinates is 511 (PS1 hardware limitation)
+ int lowest_x = tri_ptr[idx_lowest_x].x;
+ int highest_x = tri_ptr[idx_highest_x].x;
+ int lowest_y = tri_ptr[idx_lowest_y].y;
+ int highest_y = tri_ptr[idx_highest_y].y;
+ if ((highest_x - lowest_x) >= CHKMAX_X ||
+ (highest_y - lowest_y) >= CHKMAX_Y)
+ return false;
+
+ // Determine if triangle is completely outside clipping range
+ int xmin, xmax, ymin, ymax;
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
+ int clipped_lowest_x = Max2(xmin,lowest_x);
+ int clipped_lowest_y = Max2(ymin,lowest_y);
+ int clipped_highest_x = Min2(xmax,highest_x);
+ int clipped_highest_y = Min2(ymax,highest_y);
+ if (clipped_lowest_x >= clipped_highest_x ||
+ clipped_lowest_y >= clipped_highest_y)
+ return false;
+
+ // Order vertex ptrs by increasing y value (draw routines need this).
+ // The middle index is deduced by a binary math trick that depends
+ // on index range always being between 0..2
+ vert_ptrs[0] = tri_ptr + idx_lowest_y;
+ vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3);
+ vert_ptrs[2] = tri_ptr + idx_highest_y;
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// GPU internal polygon drawing functions
+///////////////////////////////////////////////////////////////////////////////
+
+/*----------------------------------------------------------------------
+gpuDrawPolyF - Flat-shaded, untextured poly
+----------------------------------------------------------------------*/
+void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
+{
+ // Set up bgr555 color to be used across calls in inner driver
+ gpu_unai.PixelData = GPU_RGB16(packet.U4[0]);
+
+ PolyVertex vbuf[4];
+ polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad);
+
+ int total_passes = is_quad ? 2 : 1;
+ int cur_pass = 0;
+ do
{
- if (loop0 == 2)
- {
- ya = y0;
- yb = y1;
- x3 = i2x(x0);
- x4 = y0!=y1 ? x3 : i2x(x1);
- if (dx < 0)
- {
- dx3 = xLoDivx((x2 - x0), (y2 - y0));
- dx4 = xLoDivx((x1 - x0), (y1 - y0));
- }
- else
- {
- dx3 = xLoDivx((x1 - x0), (y1 - y0));
- dx4 = xLoDivx((x2 - x0), (y2 - y0));
+ const PolyVertex* vptrs[3];
+ if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
+ continue;
+
+ s32 xa, xb, ya, yb;
+ s32 x3, dx3, x4, dx4, dx;
+ s32 x0, x1, x2, y0, y1, y2;
+
+ x0 = vptrs[0]->x; y0 = vptrs[0]->y;
+ x1 = vptrs[1]->x; y1 = vptrs[1]->y;
+ x2 = vptrs[2]->x; y2 = vptrs[2]->y;
+
+ ya = y2 - y0;
+ yb = y2 - y1;
+ dx = (x2 - x1) * ya - (x2 - x0) * yb;
+
+ for (int loop0 = 2; loop0; loop0--) {
+ if (loop0 == 2) {
+ ya = y0; yb = y1;
+ x3 = x4 = i2x(x0);
+ if (dx < 0) {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
+#else
+ dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
+ dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
+#else
+ dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
+ dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
+#endif
+#endif
+ } else {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
+#else
+ dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
+ dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
+#else
+ dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
+ dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
+#endif
+#endif
+ }
+ } else {
+ //senquack - break out of final loop if nothing to be drawn (1st loop
+ // must always be taken to setup dx3/dx4)
+ if (y1 == y2) break;
+
+ ya = y1; yb = y2;
+
+ if (dx < 0) {
+ x3 = i2x(x0) + (dx3 * (y1 - y0));
+ x4 = i2x(x1);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
+#endif
+#endif
+ } else {
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
+#else
+ dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
+#else
+ dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
+#endif
+#endif
+ }
}
- }
- else
- {
- ya = y1;
- yb = y2;
- if (dx < 0)
- {
- x4 = i2x(x1);
- x3 = i2x(x0) + (dx3 * (y1 - y0));
- dx4 = xLoDivx((x2 - x1), (y2 - y1));
+
+ s32 xmin, xmax, ymin, ymax;
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
+
+ if ((ymin - ya) > 0) {
+ x3 += (dx3 * (ymin - ya));
+ x4 += (dx4 * (ymin - ya));
+ ya = ymin;
}
- else
+
+ if (yb > ymax) yb = ymax;
+
+ int loop1 = yb - ya;
+ if (loop1 <= 0)
+ continue;
+
+ u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+ int li=gpu_unai.ilace_mask;
+ int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
+ int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+
+ for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH,
+ x3 += dx3, x4 += dx4 )
{
- x3 = i2x(x1);
- x4 = i2x(x0) + (dx4 * (y1 - y0));
- dx3 = xLoDivx((x2 - x1), (y2 - y1));
+ if (ya&li) continue;
+ if ((ya&pi)==pif) continue;
+
+ xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4);
+ if ((xmin - xa) > 0) xa = xmin;
+ if (xb > xmax) xb = xmax;
+ if ((xb - xa) > 0)
+ gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
}
}
-
- temp = ymin - ya;
- if (temp > 0)
- {
- ya = ymin;
- x3 += dx3*temp;
- x4 += dx4*temp;
- }
- if (yb > ymax) yb = ymax;
- if (ya>=yb) continue;
-
- x3+= fixed_HALF;
- x4+= fixed_HALF;
-
- u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
-
- for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4)
- {
- if (ya&li) continue;
- xa = x2i(x3);
- xb = x2i(x4);
- if( (xa>xmax) || (xb<xmin) ) continue;
- if(xa < xmin) xa = xmin;
- if(xb > xmax) xb = xmax;
- xb-=xa;
- if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
- }
- }
+ } while (++cur_pass < total_passes);
}
/*----------------------------------------------------------------------
-FT3
+gpuDrawPolyFT - Flat-shaded, textured poly
----------------------------------------------------------------------*/
-
-void gpuDrawFT3(const PP gpuPolySpanDriver)
+void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
{
- const int li=linesInterlace;
- s32 temp;
- s32 xa, xb, xmin, xmax;
- s32 ya, yb, ymin, ymax;
- s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
- s32 y0, y1, y2;
- s32 u0, u1, u2, u3, du3=0;
- s32 v0, v1, v2, v3, dv3=0;
-
- x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
- y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
- x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] );
- y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] );
- x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]);
- y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]);
-
- GPU_TESTRANGE3();
-
- x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
- y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
-
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
-
+ // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
+ gpu_unai.r8 = packet.U1[0];
+ gpu_unai.g8 = packet.U1[1];
+ gpu_unai.b8 = packet.U1[2];
+ // r5/g5/b5 used if just texture-blending is applied (15-bit light)
+ gpu_unai.r5 = packet.U1[0] >> 3;
+ gpu_unai.g5 = packet.U1[1] >> 3;
+ gpu_unai.b5 = packet.U1[2] >> 3;
+
+ PolyVertex vbuf[4];
+ polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad);
+
+ int total_passes = is_quad ? 2 : 1;
+ int cur_pass = 0;
+ do
{
- int rx0 = Max2(xmin,Min3(x0,x1,x2));
- int ry0 = Max2(ymin,Min3(y0,y1,y2));
- int rx1 = Min2(xmax,Max3(x0,x1,x2));
- int ry1 = Min2(ymax,Max3(y0,y1,y2));
- if( rx0>=rx1 || ry0>=ry1) return;
- }
-
- u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9];
- u1 = PacketBuffer.U1[16]; v1 = PacketBuffer.U1[17];
- u2 = PacketBuffer.U1[24]; v2 = PacketBuffer.U1[25];
-
- r4 = s32(PacketBuffer.U1[0]);
- g4 = s32(PacketBuffer.U1[1]);
- b4 = s32(PacketBuffer.U1[2]);
- dr4 = dg4 = db4 = 0;
+ const PolyVertex* vptrs[3];
+ if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
+ continue;
+
+ s32 xa, xb, ya, yb;
+ s32 x3, dx3, x4, dx4, dx;
+ s32 u3, du3, v3, dv3;
+ s32 x0, x1, x2, y0, y1, y2;
+ s32 u0, u1, u2, v0, v1, v2;
+ s32 du4, dv4;
+
+ x0 = vptrs[0]->x; y0 = vptrs[0]->y;
+ u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
+ x1 = vptrs[1]->x; y1 = vptrs[1]->y;
+ u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
+ x2 = vptrs[2]->x; y2 = vptrs[2]->y;
+ u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
+
+ ya = y2 - y0;
+ yb = y2 - y1;
+ dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
+ du4 = (u2 - u1) * ya - (u2 - u0) * yb;
+ dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
+ dx = dx4;
+ if (dx4 < 0) {
+ dx4 = -dx4;
+ du4 = -du4;
+ dv4 = -dv4;
+ }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp);
- GPU_SWAP(y0, y1, temp);
- GPU_SWAP(u0, u1, temp);
- GPU_SWAP(v0, v1, temp);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if (dx4 != 0) {
+ float finv = FloatInv(dx4);
+ du4 = (fixed)((du4 << FIXED_BITS) * finv);
+ dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
+ } else {
+ du4 = dv4 = 0;
}
- }
- if (y1 >= y2)
- {
- if( y1!=y2 || x1>x2 )
- {
- GPU_SWAP(x1, x2, temp);
- GPU_SWAP(y1, y2, temp);
- GPU_SWAP(u1, u2, temp);
- GPU_SWAP(v1, v2, temp);
+#else
+ if (dx4 != 0) {
+ float fdiv = dx4;
+ du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
+ dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
+ } else {
+ du4 = dv4 = 0;
}
- }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp);
- GPU_SWAP(y0, y1, temp);
- GPU_SWAP(u0, u1, temp);
- GPU_SWAP(v0, v1, temp);
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if (dx4 != 0) {
+ int iF, iS;
+ xInv(dx4, iF, iS);
+ du4 = xInvMulx(du4, iF, iS);
+ dv4 = xInvMulx(dv4, iF, iS);
+ } else {
+ du4 = dv4 = 0;
}
- }
-
- ya = y2 - y0;
- yb = y2 - y1;
- dx = (x2 - x1) * ya - (x2 - x0) * yb;
- du4 = (u2 - u1) * ya - (u2 - u0) * yb;
- dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
+#else
+ if (dx4 != 0) {
+ du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
+ dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
+ } else {
+ du4 = dv4 = 0;
+ }
+#endif
+#endif
+ // Set u,v increments for inner driver
+ gpu_unai.u_inc = du4;
+ gpu_unai.v_inc = dv4;
+
+ //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here?
+ // (SAME ISSUE ELSEWHERE)
+ for (s32 loop0 = 2; loop0; loop0--) {
+ if (loop0 == 2) {
+ ya = y0; yb = y1;
+ x3 = x4 = i2x(x0);
+ u3 = i2x(u0); v3 = i2x(v0);
+ if (dx < 0) {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ float finv = FloatInv(y2 - y0);
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ float fdiv = y2 - y0;
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ int iF, iS;
+ xInv((y2 - y0), iF, iS);
+ dx3 = xInvMulx((x2 - x0), iF, iS);
+ du3 = xInvMulx((u2 - u0), iF, iS);
+ dv3 = xInvMulx((v2 - v0), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
+ du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
+ dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
+#endif
+#endif
+ } else {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ float finv = FloatInv(y1 - y0);
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ float fdiv = y1 - y0;
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ int iF, iS;
+ xInv((y1 - y0), iF, iS);
+ dx3 = xInvMulx((x1 - x0), iF, iS);
+ du3 = xInvMulx((u1 - u0), iF, iS);
+ dv3 = xInvMulx((v1 - v0), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
+ du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
+ dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
+#endif
+#endif
+ }
+ } else {
+ //senquack - break out of final loop if nothing to be drawn (1st loop
+ // must always be taken to setup dx3/dx4)
+ if (y1 == y2) break;
+
+ ya = y1; yb = y2;
+
+ if (dx < 0) {
+ x3 = i2x(x0);
+ x4 = i2x(x1);
+ u3 = i2x(u0);
+ v3 = i2x(v0);
+ if ((y1 - y0) != 0) {
+ x3 += (dx3 * (y1 - y0));
+ u3 += (du3 * (y1 - y0));
+ v3 += (dv3 * (y1 - y0));
+ }
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
+#endif
+#endif
+ } else {
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+ u3 = i2x(u1);
+ v3 = i2x(v1);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ float finv = FloatInv(y2 - y1);
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ float fdiv = y2 - y1;
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ int iF, iS;
+ xInv((y2 - y1), iF, iS);
+ dx3 = xInvMulx((x2 - x1), iF, iS);
+ du3 = xInvMulx((u2 - u1), iF, iS);
+ dv3 = xInvMulx((v2 - v1), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
+ du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
+ dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
+ } else {
+ dx3 = du3 = dv3 = 0;
+ }
+#endif
+#endif
+ }
+ }
- s32 iF,iS;
- xInv( dx, iF, iS);
- du4 = xInvMulx( du4, iF, iS);
- dv4 = xInvMulx( dv4, iF, iS);
- tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff);
- tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff;
+ s32 xmin, xmax, ymin, ymax;
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
- for (s32 loop0 = 2; loop0; --loop0)
- {
- if (loop0 == 2)
- {
- ya = y0;
- yb = y1;
- u3 = i2x(u0);
- v3 = i2x(v0);
- x3 = i2x(x0);
- x4 = y0!=y1 ? x3 : i2x(x1);
- if (dx < 0)
- {
- xInv( (y2 - y0), iF, iS);
- dx3 = xInvMulx( (x2 - x0), iF, iS);
- du3 = xInvMulx( (u2 - u0), iF, iS);
- dv3 = xInvMulx( (v2 - v0), iF, iS);
- dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
- }
- else
- {
- xInv( (y1 - y0), iF, iS);
- dx3 = xInvMulx( (x1 - x0), iF, iS);
- du3 = xInvMulx( (u1 - u0), iF, iS);
- dv3 = xInvMulx( (v1 - v0), iF, iS);
- dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
+ if ((ymin - ya) > 0) {
+ x3 += dx3 * (ymin - ya);
+ x4 += dx4 * (ymin - ya);
+ u3 += du3 * (ymin - ya);
+ v3 += dv3 * (ymin - ya);
+ ya = ymin;
}
- }
- else
- {
- ya = y1;
- yb = y2;
- if (dx < 0)
- {
- temp = y1 - y0;
- u3 = i2x(u0) + (du3 * temp);
- v3 = i2x(v0) + (dv3 * temp);
- x3 = i2x(x0) + (dx3 * temp);
- x4 = i2x(x1);
- dx4 = xLoDivx((x2 - x1), (y2 - y1));
- }
- else
+
+ if (yb > ymax) yb = ymax;
+
+ int loop1 = yb - ya;
+ if (loop1 <= 0)
+ continue;
+
+ u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+ int li=gpu_unai.ilace_mask;
+ int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
+ int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+
+ for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
+ x3 += dx3, x4 += dx4,
+ u3 += du3, v3 += dv3 )
{
- u3 = i2x(u1);
- v3 = i2x(v1);
- x3 = i2x(x1);
- x4 = i2x(x0) + (dx4 * (y1 - y0));
- xInv( (y2 - y1), iF, iS);
- dx3 = xInvMulx( (x2 - x1), iF, iS);
- du3 = xInvMulx( (u2 - u1), iF, iS);
- dv3 = xInvMulx( (v2 - v1), iF, iS);
- }
- }
+ if (ya&li) continue;
+ if ((ya&pi)==pif) continue;
- temp = ymin - ya;
- if (temp > 0)
- {
- ya = ymin;
- x3 += dx3*temp;
- x4 += dx4*temp;
- u3 += du3*temp;
- v3 += dv3*temp;
- }
- if (yb > ymax) yb = ymax;
- if (ya>=yb) continue;
+ u32 u4, v4;
- x3+= fixed_HALF;
- x4+= fixed_HALF;
- u3+= fixed_HALF;
- v4+= fixed_HALF;
+ xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4);
+ u4 = u3; v4 = v3;
- u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
+ fixed itmp = i2x(xa) - x3;
+ if (itmp != 0) {
+ u4 += (du4 * itmp) >> FIXED_BITS;
+ v4 += (dv4 * itmp) >> FIXED_BITS;
+ }
- for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3)
- {
- if (ya&li) continue;
- xa = x2i(x3);
- xb = x2i(x4);
- if( (xa>xmax) || (xb<xmin) ) continue;
+ u4 += fixed_HALF;
+ v4 += fixed_HALF;
- temp = xmin - xa;
- if(temp > 0)
- {
- xa = xmin;
- u4 = u3 + du4*temp;
- v4 = v3 + dv4*temp;
- }
- else
- {
- u4 = u3;
- v4 = v3;
+ if ((xmin - xa) > 0) {
+ u4 += du4 * (xmin - xa);
+ v4 += dv4 * (xmin - xa);
+ xa = xmin;
+ }
+
+ // Set u,v coords for inner driver
+ gpu_unai.u = u4;
+ gpu_unai.v = v4;
+
+ if (xb > xmax) xb = xmax;
+ if ((xb - xa) > 0)
+ gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
}
- if(xb > xmax) xb = xmax;
- xb-=xa;
- if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
}
- }
+ } while (++cur_pass < total_passes);
}
/*----------------------------------------------------------------------
-G3
+gpuDrawPolyG - Gouraud-shaded, untextured poly
----------------------------------------------------------------------*/
-
-void gpuDrawG3(const PP gpuPolySpanDriver)
+void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
{
- const int li=linesInterlace;
- s32 temp;
- s32 xa, xb, xmin, xmax;
- s32 ya, yb, ymin, ymax;
- s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
- s32 y0, y1, y2;
- s32 r0, r1, r2, r3, dr3=0;
- s32 g0, g1, g2, g3, dg3=0;
- s32 b0, b1, b2, b3, db3=0;
-
- x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
- y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
- x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] );
- y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] );
- x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]);
- y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]);
-
- GPU_TESTRANGE3();
-
- x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
- y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
-
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
+ PolyVertex vbuf[4];
+ polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad);
+ int total_passes = is_quad ? 2 : 1;
+ int cur_pass = 0;
+ do
{
- int rx0 = Max2(xmin,Min3(x0,x1,x2));
- int ry0 = Max2(ymin,Min3(y0,y1,y2));
- int rx1 = Min2(xmax,Max3(x0,x1,x2));
- int ry1 = Min2(ymax,Max3(y0,y1,y2));
- if( rx0>=rx1 || ry0>=ry1) return;
- }
-
- r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2];
- r1 = PacketBuffer.U1[8]; g1 = PacketBuffer.U1[9]; b1 = PacketBuffer.U1[10];
- r2 = PacketBuffer.U1[16]; g2 = PacketBuffer.U1[17]; b2 = PacketBuffer.U1[18];
+ const PolyVertex* vptrs[3];
+ if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
+ continue;
+
+ s32 xa, xb, ya, yb;
+ s32 x3, dx3, x4, dx4, dx;
+ s32 r3, dr3, g3, dg3, b3, db3;
+ s32 x0, x1, x2, y0, y1, y2;
+ s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
+ s32 dr4, dg4, db4;
+
+ x0 = vptrs[0]->x; y0 = vptrs[0]->y;
+ r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
+ x1 = vptrs[1]->x; y1 = vptrs[1]->y;
+ r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
+ x2 = vptrs[2]->x; y2 = vptrs[2]->y;
+ r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
+
+ ya = y2 - y0;
+ yb = y2 - y1;
+ dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
+ dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
+ dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
+ db4 = (b2 - b1) * ya - (b2 - b0) * yb;
+ dx = dx4;
+ if (dx4 < 0) {
+ dx4 = -dx4;
+ dr4 = -dr4;
+ dg4 = -dg4;
+ db4 = -db4;
+ }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
- GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if (dx4 != 0) {
+ float finv = FloatInv(dx4);
+ dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
+ dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
+ db4 = (fixed)((db4 << FIXED_BITS) * finv);
+ } else {
+ dr4 = dg4 = db4 = 0;
}
- }
- if (y1 >= y2)
- {
- if( y1!=y2 || x1>x2 )
- {
- GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp);
- GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp);
+#else
+ if (dx4 != 0) {
+ float fdiv = dx4;
+ dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
+ dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
+ db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
+ } else {
+ dr4 = dg4 = db4 = 0;
}
- }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
- GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if (dx4 != 0) {
+ int iF, iS;
+ xInv(dx4, iF, iS);
+ dr4 = xInvMulx(dr4, iF, iS);
+ dg4 = xInvMulx(dg4, iF, iS);
+ db4 = xInvMulx(db4, iF, iS);
+ } else {
+ dr4 = dg4 = db4 = 0;
}
- }
-
- ya = y2 - y0;
- yb = y2 - y1;
- dx = (x2 - x1) * ya - (x2 - x0) * yb;
- dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
- dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
- db4 = (b2 - b1) * ya - (b2 - b0) * yb;
-
- s32 iF,iS;
- xInv( dx, iF, iS);
- dr4 = xInvMulx( dr4, iF, iS);
- dg4 = xInvMulx( dg4, iF, iS);
- db4 = xInvMulx( db4, iF, iS);
- u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21;
- u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10;
- u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0;
- lInc = db + dg + dr;
-
- for (s32 loop0 = 2; loop0; --loop0)
- {
- if (loop0 == 2)
- {
- ya = y0;
- yb = y1;
- r3 = i2x(r0);
- g3 = i2x(g0);
- b3 = i2x(b0);
- x3 = i2x(x0);
- x4 = y0!=y1 ? x3 : i2x(x1);
- if (dx < 0)
- {
- xInv( (y2 - y0), iF, iS);
- dx3 = xInvMulx( (x2 - x0), iF, iS);
- dr3 = xInvMulx( (r2 - r0), iF, iS);
- dg3 = xInvMulx( (g2 - g0), iF, iS);
- db3 = xInvMulx( (b2 - b0), iF, iS);
- dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
- }
- else
- {
- xInv( (y1 - y0), iF, iS);
- dx3 = xInvMulx( (x1 - x0), iF, iS);
- dr3 = xInvMulx( (r1 - r0), iF, iS);
- dg3 = xInvMulx( (g1 - g0), iF, iS);
- db3 = xInvMulx( (b1 - b0), iF, iS);
- dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
- }
+#else
+ if (dx4 != 0) {
+ dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
+ dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
+ db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
+ } else {
+ dr4 = dg4 = db4 = 0;
}
- else
- {
- ya = y1;
- yb = y2;
- if (dx < 0)
- {
- temp = y1 - y0;
- r3 = i2x(r0) + (dr3 * temp);
- g3 = i2x(g0) + (dg3 * temp);
- b3 = i2x(b0) + (db3 * temp);
- x3 = i2x(x0) + (dx3 * temp);
- x4 = i2x(x1);
- dx4 = xLoDivx((x2 - x1), (y2 - y1));
- }
- else
- {
- r3 = i2x(r1);
- g3 = i2x(g1);
- b3 = i2x(b1);
- x3 = i2x(x1);
- x4 = i2x(x0) + (dx4 * (y1 - y0));
-
- xInv( (y2 - y1), iF, iS);
- dx3 = xInvMulx( (x2 - x1), iF, iS);
- dr3 = xInvMulx( (r2 - r1), iF, iS);
- dg3 = xInvMulx( (g2 - g1), iF, iS);
- db3 = xInvMulx( (b2 - b1), iF, iS);
+#endif
+#endif
+ // Setup packed Gouraud increment for inner driver
+ gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
+
+ for (s32 loop0 = 2; loop0; loop0--) {
+ if (loop0 == 2) {
+ ya = y0;
+ yb = y1;
+ x3 = x4 = i2x(x0);
+ r3 = i2x(r0);
+ g3 = i2x(g0);
+ b3 = i2x(b0);
+ if (dx < 0) {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ float finv = FloatInv(y2 - y0);
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ float fdiv = y2 - y0;
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ int iF, iS;
+ xInv((y2 - y0), iF, iS);
+ dx3 = xInvMulx((x2 - x0), iF, iS);
+ dr3 = xInvMulx((r2 - r0), iF, iS);
+ dg3 = xInvMulx((g2 - g0), iF, iS);
+ db3 = xInvMulx((b2 - b0), iF, iS);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
+ dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
+ dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
+ db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
+#endif
+#endif
+ } else {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ float finv = FloatInv(y1 - y0);
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ float fdiv = y1 - y0;
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ int iF, iS;
+ xInv((y1 - y0), iF, iS);
+ dx3 = xInvMulx((x1 - x0), iF, iS);
+ dr3 = xInvMulx((r1 - r0), iF, iS);
+ dg3 = xInvMulx((g1 - g0), iF, iS);
+ db3 = xInvMulx((b1 - b0), iF, iS);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
+ dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
+ dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
+ db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
+#endif
+#endif
+ }
+ } else {
+ //senquack - break out of final loop if nothing to be drawn (1st loop
+ // must always be taken to setup dx3/dx4)
+ if (y1 == y2) break;
+
+ ya = y1; yb = y2;
+
+ if (dx < 0) {
+ x3 = i2x(x0); x4 = i2x(x1);
+ r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
+
+ if ((y1 - y0) != 0) {
+ x3 += (dx3 * (y1 - y0));
+ r3 += (dr3 * (y1 - y0));
+ g3 += (dg3 * (y1 - y0));
+ b3 += (db3 * (y1 - y0));
+ }
+
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
+#endif
+#endif
+ } else {
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+
+ r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
+
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ float finv = FloatInv(y2 - y1);
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ float fdiv = y2 - y1;
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ int iF, iS;
+ xInv((y2 - y1), iF, iS);
+ dx3 = xInvMulx((x2 - x1), iF, iS);
+ dr3 = xInvMulx((r2 - r1), iF, iS);
+ dg3 = xInvMulx((g2 - g1), iF, iS);
+ db3 = xInvMulx((b2 - b1), iF, iS);
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
+ dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
+ dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
+ db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
+ } else {
+ dx3 = dr3 = dg3 = db3 = 0;
+ }
+#endif
+#endif
+ }
}
- }
- temp = ymin - ya;
- if (temp > 0)
- {
- ya = ymin;
- x3 += dx3*temp; x4 += dx4*temp;
- r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp;
- }
- if (yb > ymax) yb = ymax;
- if (ya>=yb) continue;
-
- x3+= fixed_HALF; x4+= fixed_HALF;
- r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF;
-
- u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
-
- for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, r3+=dr3, g3+=dg3, b3+=db3)
- {
- if (ya&li) continue;
- xa = x2i(x3);
- xb = x2i(x4);
- if( (xa>xmax) || (xb<xmin) ) continue;
-
- temp = xmin - xa;
- if(temp > 0)
- {
- xa = xmin;
- r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp;
+ s32 xmin, xmax, ymin, ymax;
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
+
+ if ((ymin - ya) > 0) {
+ x3 += (dx3 * (ymin - ya));
+ x4 += (dx4 * (ymin - ya));
+ r3 += (dr3 * (ymin - ya));
+ g3 += (dg3 * (ymin - ya));
+ b3 += (db3 * (ymin - ya));
+ ya = ymin;
}
- else
+
+ if (yb > ymax) yb = ymax;
+
+ int loop1 = yb - ya;
+ if (loop1 <= 0)
+ continue;
+
+ u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+ int li=gpu_unai.ilace_mask;
+ int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
+ int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+
+ for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
+ x3 += dx3, x4 += dx4,
+ r3 += dr3, g3 += dg3, b3 += db3 )
{
+ if (ya&li) continue;
+ if ((ya&pi)==pif) continue;
+
+ u32 r4, g4, b4;
+
+ xa = FixedCeilToInt(x3);
+ xb = FixedCeilToInt(x4);
r4 = r3; g4 = g3; b4 = b3;
+
+ fixed itmp = i2x(xa) - x3;
+ if (itmp != 0) {
+ r4 += (dr4 * itmp) >> FIXED_BITS;
+ g4 += (dg4 * itmp) >> FIXED_BITS;
+ b4 += (db4 * itmp) >> FIXED_BITS;
+ }
+
+ r4 += fixed_HALF;
+ g4 += fixed_HALF;
+ b4 += fixed_HALF;
+
+ if ((xmin - xa) > 0) {
+ r4 += (dr4 * (xmin - xa));
+ g4 += (dg4 * (xmin - xa));
+ b4 += (db4 * (xmin - xa));
+ xa = xmin;
+ }
+
+ // Setup packed Gouraud color for inner driver
+ gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4);
+
+ if (xb > xmax) xb = xmax;
+ if ((xb - xa) > 0)
+ gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
}
- if(xb > xmax) xb = xmax;
- xb-=xa;
- if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
}
- }
+ } while (++cur_pass < total_passes);
}
/*----------------------------------------------------------------------
-GT3
+gpuDrawPolyGT - Gouraud-shaded, textured poly
----------------------------------------------------------------------*/
-
-void gpuDrawGT3(const PP gpuPolySpanDriver)
+void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
{
- const int li=linesInterlace;
- s32 temp;
- s32 xa, xb, xmin, xmax;
- s32 ya, yb, ymin, ymax;
- s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
- s32 y0, y1, y2;
- s32 u0, u1, u2, u3, du3=0;
- s32 v0, v1, v2, v3, dv3=0;
- s32 r0, r1, r2, r3, dr3=0;
- s32 g0, g1, g2, g3, dg3=0;
- s32 b0, b1, b2, b3, db3=0;
-
- x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
- y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
- x1 = GPU_EXPANDSIGN(PacketBuffer.S2[8] );
- y1 = GPU_EXPANDSIGN(PacketBuffer.S2[9] );
- x2 = GPU_EXPANDSIGN(PacketBuffer.S2[14]);
- y2 = GPU_EXPANDSIGN(PacketBuffer.S2[15]);
-
- GPU_TESTRANGE3();
-
- x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
- y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
-
- xmin = DrawingArea[0]; xmax = DrawingArea[2];
- ymin = DrawingArea[1]; ymax = DrawingArea[3];
+ PolyVertex vbuf[4];
+ polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad);
+ int total_passes = is_quad ? 2 : 1;
+ int cur_pass = 0;
+ do
{
- int rx0 = Max2(xmin,Min3(x0,x1,x2));
- int ry0 = Max2(ymin,Min3(y0,y1,y2));
- int rx1 = Min2(xmax,Max3(x0,x1,x2));
- int ry1 = Min2(ymax,Max3(y0,y1,y2));
- if( rx0>=rx1 || ry0>=ry1) return;
- }
-
- r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2];
- u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9];
- r1 = PacketBuffer.U1[12]; g1 = PacketBuffer.U1[13]; b1 = PacketBuffer.U1[14];
- u1 = PacketBuffer.U1[20]; v1 = PacketBuffer.U1[21];
- r2 = PacketBuffer.U1[24]; g2 = PacketBuffer.U1[25]; b2 = PacketBuffer.U1[26];
- u2 = PacketBuffer.U1[32]; v2 = PacketBuffer.U1[33];
+ const PolyVertex* vptrs[3];
+ if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
+ continue;
+
+ s32 xa, xb, ya, yb;
+ s32 x3, dx3, x4, dx4, dx;
+ s32 u3, du3, v3, dv3;
+ s32 r3, dr3, g3, dg3, b3, db3;
+ s32 x0, x1, x2, y0, y1, y2;
+ s32 u0, u1, u2, v0, v1, v2;
+ s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
+ s32 du4, dv4;
+ s32 dr4, dg4, db4;
+
+ x0 = vptrs[0]->x; y0 = vptrs[0]->y;
+ u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
+ r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
+ x1 = vptrs[1]->x; y1 = vptrs[1]->y;
+ u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
+ r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
+ x2 = vptrs[2]->x; y2 = vptrs[2]->y;
+ u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
+ r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
+
+ ya = y2 - y0;
+ yb = y2 - y1;
+ dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
+ du4 = (u2 - u1) * ya - (u2 - u0) * yb;
+ dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
+ dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
+ dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
+ db4 = (b2 - b1) * ya - (b2 - b0) * yb;
+ dx = dx4;
+ if (dx4 < 0) {
+ dx4 = -dx4;
+ du4 = -du4;
+ dv4 = -dv4;
+ dr4 = -dr4;
+ dg4 = -dg4;
+ db4 = -db4;
+ }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
- GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp);
- GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if (dx4 != 0) {
+ float finv = FloatInv(dx4);
+ du4 = (fixed)((du4 << FIXED_BITS) * finv);
+ dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
+ dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
+ dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
+ db4 = (fixed)((db4 << FIXED_BITS) * finv);
+ } else {
+ du4 = dv4 = dr4 = dg4 = db4 = 0;
}
- }
- if (y1 >= y2)
- {
- if( y1!=y2 || x1>x2 )
- {
- GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp);
- GPU_SWAP(u1, u2, temp); GPU_SWAP(v1, v2, temp);
- GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp);
+#else
+ if (dx4 != 0) {
+ float fdiv = dx4;
+ du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
+ dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
+ dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
+ dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
+ db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
+ } else {
+ du4 = dv4 = dr4 = dg4 = db4 = 0;
}
- }
- if (y0 >= y1)
- {
- if( y0!=y1 || x0>x1 )
- {
- GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
- GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp);
- GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if (dx4 != 0) {
+ int iF, iS;
+ xInv(dx4, iF, iS);
+ du4 = xInvMulx(du4, iF, iS);
+ dv4 = xInvMulx(dv4, iF, iS);
+ dr4 = xInvMulx(dr4, iF, iS);
+ dg4 = xInvMulx(dg4, iF, iS);
+ db4 = xInvMulx(db4, iF, iS);
+ } else {
+ du4 = dv4 = dr4 = dg4 = db4 = 0;
}
- }
-
- ya = y2 - y0;
- yb = y2 - y1;
- dx = (x2 - x1) * ya - (x2 - x0) * yb;
- du4 = (u2 - u1) * ya - (u2 - u0) * yb;
- dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
- dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
- dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
- db4 = (b2 - b1) * ya - (b2 - b0) * yb;
-
- s32 iF,iS;
-
- xInv( dx, iF, iS);
- du4 = xInvMulx( du4, iF, iS);
- dv4 = xInvMulx( dv4, iF, iS);
- dr4 = xInvMulx( dr4, iF, iS);
- dg4 = xInvMulx( dg4, iF, iS);
- db4 = xInvMulx( db4, iF, iS);
- u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21;
- u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10;
- u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0;
- lInc = db + dg + dr;
- tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff);
- tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff;
-
- for (s32 loop0 = 2; loop0; --loop0)
- {
- if (loop0 == 2)
- {
- ya = y0;
- yb = y1;
- u3 = i2x(u0);
- v3 = i2x(v0);
- r3 = i2x(r0);
- g3 = i2x(g0);
- b3 = i2x(b0);
- x3 = i2x(x0);
- x4 = y0!=y1 ? x3 : i2x(x1);
- if (dx < 0)
- {
- xInv( (y2 - y0), iF, iS);
- dx3 = xInvMulx( (x2 - x0), iF, iS);
- du3 = xInvMulx( (u2 - u0), iF, iS);
- dv3 = xInvMulx( (v2 - v0), iF, iS);
- dr3 = xInvMulx( (r2 - r0), iF, iS);
- dg3 = xInvMulx( (g2 - g0), iF, iS);
- db3 = xInvMulx( (b2 - b0), iF, iS);
- dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
- }
- else
- {
- xInv( (y1 - y0), iF, iS);
- dx3 = xInvMulx( (x1 - x0), iF, iS);
- du3 = xInvMulx( (u1 - u0), iF, iS);
- dv3 = xInvMulx( (v1 - v0), iF, iS);
- dr3 = xInvMulx( (r1 - r0), iF, iS);
- dg3 = xInvMulx( (g1 - g0), iF, iS);
- db3 = xInvMulx( (b1 - b0), iF, iS);
- dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
- }
+#else
+ if (dx4 != 0) {
+ du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
+ dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
+ dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
+ dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
+ db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
+ } else {
+ du4 = dv4 = dr4 = dg4 = db4 = 0;
}
- else
- {
- ya = y1;
- yb = y2;
- if (dx < 0)
- {
- temp = y1 - y0;
- u3 = i2x(u0) + (du3 * temp);
- v3 = i2x(v0) + (dv3 * temp);
- r3 = i2x(r0) + (dr3 * temp);
- g3 = i2x(g0) + (dg3 * temp);
- b3 = i2x(b0) + (db3 * temp);
- x3 = i2x(x0) + (dx3 * temp);
- x4 = i2x(x1);
- dx4 = xLoDivx((x2 - x1), (y2 - y1));
+#endif
+#endif
+ // Set u,v increments and packed Gouraud increment for inner driver
+ gpu_unai.u_inc = du4;
+ gpu_unai.v_inc = dv4;
+ gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
+
+ for (s32 loop0 = 2; loop0; loop0--) {
+ if (loop0 == 2) {
+ ya = y0; yb = y1;
+ x3 = x4 = i2x(x0);
+ u3 = i2x(u0); v3 = i2x(v0);
+ r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
+ if (dx < 0) {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ float finv = FloatInv(y2 - y0);
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ float fdiv = y2 - y0;
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ int iF, iS;
+ xInv((y2 - y0), iF, iS);
+ dx3 = xInvMulx((x2 - x0), iF, iS);
+ du3 = xInvMulx((u2 - u0), iF, iS);
+ dv3 = xInvMulx((v2 - v0), iF, iS);
+ dr3 = xInvMulx((r2 - r0), iF, iS);
+ dg3 = xInvMulx((g2 - g0), iF, iS);
+ db3 = xInvMulx((b2 - b0), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
+ du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
+ dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
+ dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
+ dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
+ db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
+#endif
+#endif
+ } else {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ float finv = FloatInv(y1 - y0);
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ float fdiv = y1 - y0;
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ int iF, iS;
+ xInv((y1 - y0), iF, iS);
+ dx3 = xInvMulx((x1 - x0), iF, iS);
+ du3 = xInvMulx((u1 - u0), iF, iS);
+ dv3 = xInvMulx((v1 - v0), iF, iS);
+ dr3 = xInvMulx((r1 - r0), iF, iS);
+ dg3 = xInvMulx((g1 - g0), iF, iS);
+ db3 = xInvMulx((b1 - b0), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
+ du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
+ dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
+ dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
+ dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
+ db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
+#endif
+#endif
+ }
+ } else {
+ //senquack - break out of final loop if nothing to be drawn (1st loop
+ // must always be taken to setup dx3/dx4)
+ if (y1 == y2) break;
+
+ ya = y1; yb = y2;
+
+ if (dx < 0) {
+ x3 = i2x(x0); x4 = i2x(x1);
+ u3 = i2x(u0); v3 = i2x(v0);
+ r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
+
+ if ((y1 - y0) != 0) {
+ x3 += (dx3 * (y1 - y0));
+ u3 += (du3 * (y1 - y0));
+ v3 += (dv3 * (y1 - y0));
+ r3 += (dr3 * (y1 - y0));
+ g3 += (dg3 * (y1 - y0));
+ b3 += (db3 * (y1 - y0));
+ }
+
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
+#endif
+#endif
+ } else {
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+
+ u3 = i2x(u1); v3 = i2x(v1);
+ r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ float finv = FloatInv(y2 - y1);
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ float fdiv = y2 - y1;
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ int iF, iS;
+ xInv((y2 - y1), iF, iS);
+ dx3 = xInvMulx((x2 - x1), iF, iS);
+ du3 = xInvMulx((u2 - u1), iF, iS);
+ dv3 = xInvMulx((v2 - v1), iF, iS);
+ dr3 = xInvMulx((r2 - r1), iF, iS);
+ dg3 = xInvMulx((g2 - g1), iF, iS);
+ db3 = xInvMulx((b2 - b1), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
+ du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
+ dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
+ dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
+ dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
+ db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+#endif
+#endif
+ }
}
- else
- {
- u3 = i2x(u1);
- v3 = i2x(v1);
- r3 = i2x(r1);
- g3 = i2x(g1);
- b3 = i2x(b1);
- x3 = i2x(x1);
- x4 = i2x(x0) + (dx4 * (y1 - y0));
-
- xInv( (y2 - y1), iF, iS);
- dx3 = xInvMulx( (x2 - x1), iF, iS);
- du3 = xInvMulx( (u2 - u1), iF, iS);
- dv3 = xInvMulx( (v2 - v1), iF, iS);
- dr3 = xInvMulx( (r2 - r1), iF, iS);
- dg3 = xInvMulx( (g2 - g1), iF, iS);
- db3 = xInvMulx( (b2 - b1), iF, iS);
- }
- }
- temp = ymin - ya;
- if (temp > 0)
- {
- ya = ymin;
- x3 += dx3*temp; x4 += dx4*temp;
- u3 += du3*temp; v3 += dv3*temp;
- r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp;
- }
- if (yb > ymax) yb = ymax;
- if (ya>=yb) continue;
-
- x3+= fixed_HALF; x4+= fixed_HALF;
- u3+= fixed_HALF; v4+= fixed_HALF;
- r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF;
- u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
-
- for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3, r3+=dr3, g3+=dg3, b3+=db3)
- {
- if (ya&li) continue;
- xa = x2i(x3);
- xb = x2i(x4);
- if( (xa>xmax) || (xb<xmin)) continue;
-
- temp = xmin - xa;
- if(temp > 0)
- {
- xa = xmin;
- u4 = u3 + du4*temp; v4 = v3 + dv4*temp;
- r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp;
+ s32 xmin, xmax, ymin, ymax;
+ xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
+ ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
+
+ if ((ymin - ya) > 0) {
+ x3 += (dx3 * (ymin - ya));
+ x4 += (dx4 * (ymin - ya));
+ u3 += (du3 * (ymin - ya));
+ v3 += (dv3 * (ymin - ya));
+ r3 += (dr3 * (ymin - ya));
+ g3 += (dg3 * (ymin - ya));
+ b3 += (db3 * (ymin - ya));
+ ya = ymin;
}
- else
+
+ if (yb > ymax) yb = ymax;
+
+ int loop1 = yb - ya;
+ if (loop1 <= 0)
+ continue;
+
+ u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+ int li=gpu_unai.ilace_mask;
+ int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
+ int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+
+ for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
+ x3 += dx3, x4 += dx4,
+ u3 += du3, v3 += dv3,
+ r3 += dr3, g3 += dg3, b3 += db3 )
{
+ if (ya&li) continue;
+ if ((ya&pi)==pif) continue;
+
+ u32 u4, v4;
+ u32 r4, g4, b4;
+
+ xa = FixedCeilToInt(x3);
+ xb = FixedCeilToInt(x4);
u4 = u3; v4 = v3;
r4 = r3; g4 = g3; b4 = b3;
+
+ fixed itmp = i2x(xa) - x3;
+ if (itmp != 0) {
+ u4 += (du4 * itmp) >> FIXED_BITS;
+ v4 += (dv4 * itmp) >> FIXED_BITS;
+ r4 += (dr4 * itmp) >> FIXED_BITS;
+ g4 += (dg4 * itmp) >> FIXED_BITS;
+ b4 += (db4 * itmp) >> FIXED_BITS;
+ }
+
+ u4 += fixed_HALF;
+ v4 += fixed_HALF;
+ r4 += fixed_HALF;
+ g4 += fixed_HALF;
+ b4 += fixed_HALF;
+
+ if ((xmin - xa) > 0) {
+ u4 += du4 * (xmin - xa);
+ v4 += dv4 * (xmin - xa);
+ r4 += dr4 * (xmin - xa);
+ g4 += dg4 * (xmin - xa);
+ b4 += db4 * (xmin - xa);
+ xa = xmin;
+ }
+
+ // Set packed Gouraud color and u,v coords for inner driver
+ gpu_unai.u = u4;
+ gpu_unai.v = v4;
+ gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4);
+
+ if (xb > xmax) xb = xmax;
+ if ((xb - xa) > 0)
+ gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
}
- if(xb > xmax) xb = xmax;
- xb-=xa;
- if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
}
- }
+ } while (++cur_pass < total_passes);
}