aboutsummaryrefslogtreecommitdiff
path: root/plugins/gpu_neon/psx_gpu/psx_gpu_standard.c
diff options
context:
space:
mode:
authorExophase2011-12-20 23:07:20 +0200
committernotaz2011-12-20 23:40:58 +0200
commit75e28f62b2a50044b58075d63d207409e0148409 (patch)
tree0e7c7aa5e368649e675850aa1f45b87d73a66760 /plugins/gpu_neon/psx_gpu/psx_gpu_standard.c
parentb3db94096d7e5b4f60d610a441e370d639b3fd06 (diff)
downloadpcsx_rearmed-75e28f62b2a50044b58075d63d207409e0148409.tar.gz
pcsx_rearmed-75e28f62b2a50044b58075d63d207409e0148409.tar.bz2
pcsx_rearmed-75e28f62b2a50044b58075d63d207409e0148409.zip
add NEON GPU rasterizer
Diffstat (limited to 'plugins/gpu_neon/psx_gpu/psx_gpu_standard.c')
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_standard.c1166
1 files changed, 1166 insertions, 0 deletions
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_standard.c b/plugins/gpu_neon/psx_gpu/psx_gpu_standard.c
new file mode 100644
index 0000000..68d29cf
--- /dev/null
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_standard.c
@@ -0,0 +1,1166 @@
+/*
+ * Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+#include <math.h>
+
+#include "common.h"
+
+typedef s32 fixed_type;
+
+#define EDGE_STEP_BITS 32
+#define FIXED_BITS 12
+
+#define fixed_center(value) \
+ ((((fixed_type)value) << FIXED_BITS) + (1 << (FIXED_BITS - 1))) \
+
+#define int_to_fixed(value) \
+ (((fixed_type)value) << FIXED_BITS) \
+
+#define fixed_to_int(value) \
+ ((value) >> FIXED_BITS) \
+
+#define fixed_mul(_a, _b) \
+ (((s64)(_a) * (_b)) >> FIXED_BITS) \
+
+#define fixed_to_double(value) \
+ ((value) / (double)(1 << FIXED_BITS)) \
+
+#define double_to_fixed(value) \
+ (fixed_type)(((value) * (double)(1 << FIXED_BITS))) \
+
+typedef struct
+{
+ fixed_type current_value;
+ fixed_type step_dx;
+ fixed_type step_dy;
+ fixed_type gradient_area_x;
+ fixed_type gradient_area_y;
+} interpolant_struct;
+
+typedef struct
+{
+ s32 base_x;
+
+ s64 left_x;
+ s64 left_dx_dy;
+
+ s64 right_x;
+ s64 right_dx_dy;
+
+ u32 triangle_area;
+ u32 triangle_winding;
+
+ interpolant_struct u;
+ interpolant_struct v;
+ interpolant_struct r;
+ interpolant_struct g;
+ interpolant_struct b;
+} _span_struct;
+
+
+u32 span_pixels = 0;
+u32 span_pixel_blocks = 0;
+u32 spans = 0;
+u32 triangles = 0;
+
+u32 texels_4bpp = 0;
+u32 texels_8bpp = 0;
+u32 texels_16bpp = 0;
+u32 untextured_pixels = 0;
+u32 blend_pixels = 0;
+u32 transparent_pixels = 0;
+
+u32 state_changes = 0;
+u32 render_buffer_flushes = 0;
+u32 trivial_rejects = 0;
+
+void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
+{
+
+}
+
+
+u32 fixed_reciprocal(u32 denominator, u32 *_shift)
+{
+ u32 shift = __builtin_clz(denominator);
+ u32 denominator_normalized = denominator << shift;
+
+ // Implement with a DP divide
+ u32 reciprocal =
+ (double)((1ULL << 62) + (denominator_normalized - 1)) /
+ (double)denominator_normalized;
+
+ *_shift = 62 - shift;
+ return reciprocal;
+}
+
+fixed_type fixed_reciprocal_multiply(s32 numerator, u32 reciprocal,
+ u32 reciprocal_sign, u32 shift)
+{
+ u32 numerator_sign = (u32)numerator >> 31;
+ u32 flip_sign = numerator_sign ^ reciprocal_sign;
+ u32 flip_sign_mask = ~(flip_sign - 1);
+ fixed_type value;
+
+ numerator = abs(numerator);
+
+ value = ((u64)numerator * reciprocal) >> shift;
+
+ value ^= flip_sign_mask;
+ value -= flip_sign_mask;
+
+ return value;
+}
+
+s32 triangle_signed_area_x2(s32 x0, s32 y0, s32 x1, s32 y1, s32 x2, s32 y2)
+{
+ return ((x1 - x0) * (y2 - y1)) - ((x2 - x1) * (y1 - y0));
+}
+
+u32 fetch_texel_4bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v)
+{
+ u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr;
+ u32 texel = texture_ptr_8bpp[(v * 2048) + (u / 2)];
+
+ if(u & 1)
+ texel >>= 4;
+ else
+ texel &= 0xF;
+
+ texels_4bpp++;
+
+ return psx_gpu->clut_ptr[texel];
+}
+
+u32 fetch_texel_8bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v)
+{
+ u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr;
+ u32 texel = texture_ptr_8bpp[(v * 2048) + u];
+
+ texels_8bpp++;
+
+ return psx_gpu->clut_ptr[texel];
+}
+
+u32 fetch_texel_16bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v)
+{
+ u16 *texture_ptr_16bpp = psx_gpu->texture_page_ptr;
+
+ texels_16bpp++;
+
+ return texture_ptr_16bpp[(v * 1024) + u];
+}
+
+u32 fetch_texel(psx_gpu_struct *psx_gpu, u32 u, u32 v)
+{
+ u &= psx_gpu->texture_mask_width;
+ v &= psx_gpu->texture_mask_height;
+
+ switch(psx_gpu->texture_mode)
+ {
+ case TEXTURE_MODE_4BPP:
+ return fetch_texel_4bpp(psx_gpu, u, v);
+
+ case TEXTURE_MODE_8BPP:
+ return fetch_texel_8bpp(psx_gpu, u, v);
+
+ case TEXTURE_MODE_16BPP:
+ return fetch_texel_16bpp(psx_gpu, u, v);
+ }
+
+ return 0;
+}
+
+void draw_pixel(psx_gpu_struct *psx_gpu, s32 r, s32 g, s32 b, u32 texel,
+ u32 x, u32 y, u32 flags)
+{
+ u32 pixel;
+
+ if(r > 31)
+ r = 31;
+
+ if(g > 31)
+ g = 31;
+
+ if(b > 31)
+ b = 31;
+
+ if(flags & RENDER_FLAGS_BLEND)
+ {
+ if(((flags & RENDER_FLAGS_TEXTURE_MAP) == 0) || (texel & 0x8000))
+ {
+ s32 fb_pixel = psx_gpu->vram[(y * 1024) + x];
+ s32 fb_r = fb_pixel & 0x1F;
+ s32 fb_g = (fb_pixel >> 5) & 0x1F;
+ s32 fb_b = (fb_pixel >> 10) & 0x1F;
+
+ blend_pixels++;
+
+ switch(psx_gpu->blend_mode)
+ {
+ case BLEND_MODE_AVERAGE:
+ r = (r + fb_r) / 2;
+ g = (g + fb_g) / 2;
+ b = (b + fb_b) / 2;
+ break;
+
+ case BLEND_MODE_ADD:
+ r += fb_r;
+ g += fb_g;
+ b += fb_b;
+
+ if(r > 31)
+ r = 31;
+
+ if(g > 31)
+ g = 31;
+
+ if(b > 31)
+ b = 31;
+
+ break;
+
+ case BLEND_MODE_SUBTRACT:
+ r = fb_r - r;
+ g = fb_g - g;
+ b = fb_b - b;
+
+ if(r < 0)
+ r = 0;
+
+ if(g < 0)
+ g = 0;
+
+ if(b < 0)
+ b = 0;
+
+ break;
+
+ case BLEND_MODE_ADD_FOURTH:
+ r = fb_r + (r / 4);
+ g = fb_g + (g / 4);
+ b = fb_b + (b / 4);
+
+ if(r > 31)
+ r = 31;
+
+ if(g > 31)
+ g = 31;
+
+ if(b > 31)
+ b = 31;
+
+ break;
+ }
+ }
+ }
+
+ pixel = r | (g << 5) | (b << 10);
+
+ if(psx_gpu->mask_apply || (texel & 0x8000))
+ pixel |= 0x8000;
+
+ psx_gpu->vram[(y * 1024) + x] = pixel;
+}
+
+s32 dither_table[4][4] =
+{
+ { -4, 0, -3, 1 },
+ { 2, -2, 3, -1 },
+ { -3, 1, -4, 0 },
+ { 3, -1, 2, -2 },
+};
+
+void render_span(psx_gpu_struct *psx_gpu, _span_struct *span, s32 y,
+ u32 flags)
+{
+ s32 left_x = span->left_x >> EDGE_STEP_BITS;
+ s32 right_x = span->right_x >> EDGE_STEP_BITS;
+ s32 current_x = left_x;
+ s32 delta_x;
+
+ fixed_type current_u = span->u.current_value;
+ fixed_type current_v = span->v.current_value;
+ fixed_type current_r = span->r.current_value;
+ fixed_type current_g = span->g.current_value;
+ fixed_type current_b = span->b.current_value;
+
+ if(y < psx_gpu->viewport_start_y)
+ return;
+
+ if(y > psx_gpu->viewport_end_y)
+ return;
+
+ if(right_x < psx_gpu->viewport_start_x)
+ return;
+
+ if(current_x > psx_gpu->viewport_end_x)
+ return;
+
+ spans++;
+
+ if(current_x < psx_gpu->viewport_start_x)
+ current_x = psx_gpu->viewport_start_x;
+
+ if(right_x > psx_gpu->viewport_end_x + 1)
+ right_x = psx_gpu->viewport_end_x + 1;
+
+ delta_x = current_x - span->base_x;
+
+ current_u += delta_x * span->u.step_dx;
+ current_v += delta_x * span->v.step_dx;
+ current_r += delta_x * span->r.step_dx;
+ current_g += delta_x * span->g.step_dx;
+ current_b += delta_x * span->b.step_dx;
+
+ span_pixels += right_x - current_x;
+ span_pixel_blocks += ((right_x / 8) - (current_x / 8)) + 1;
+
+ while(current_x < right_x)
+ {
+ s32 color_r, color_g, color_b;
+ u32 texel = 0;
+
+ if(psx_gpu->mask_evaluate &&
+ (psx_gpu->vram[(y * 1024) + current_x] & 0x8000))
+ {
+ goto skip_pixel;
+ }
+
+ if(flags & RENDER_FLAGS_SHADE)
+ {
+ color_r = fixed_to_int(current_r);
+ color_g = fixed_to_int(current_g);
+ color_b = fixed_to_int(current_b);
+ }
+ else
+ {
+ color_r = psx_gpu->primitive_color & 0xFF;
+ color_g = (psx_gpu->primitive_color >> 8) & 0xFF;
+ color_b = (psx_gpu->primitive_color >> 16) & 0xFF;
+ }
+
+ if(flags & RENDER_FLAGS_TEXTURE_MAP)
+ {
+ u32 texel_r, texel_g, texel_b;
+ u32 u = fixed_to_int(current_u);
+ u32 v = fixed_to_int(current_v);
+
+ texel = fetch_texel(psx_gpu, u, v);
+
+ if(texel == 0)
+ {
+ transparent_pixels++;
+ goto skip_pixel;
+ }
+
+ texel_r = texel & 0x1F;
+ texel_g = (texel >> 5) & 0x1F;
+ texel_b = (texel >> 10) & 0x1F;
+
+ if((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0)
+ {
+ color_r *= texel_r;
+ color_g *= texel_g;
+ color_b *= texel_b;
+ }
+ else
+ {
+ color_r = texel_r << 7;
+ color_g = texel_g << 7;
+ color_b = texel_b << 7;
+ }
+
+ color_r >>= 4;
+ color_g >>= 4;
+ color_b >>= 4;
+ }
+ else
+ {
+ untextured_pixels++;
+ }
+
+ if(psx_gpu->dither_mode && ((flags & RENDER_FLAGS_SHADE) ||
+ ((flags & RENDER_FLAGS_TEXTURE_MAP) &&
+ ((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0))))
+ {
+ s32 dither_offset = dither_table[y % 4][current_x % 4];
+ color_r += dither_offset;
+ color_g += dither_offset;
+ color_b += dither_offset;
+
+ if(color_r < 0)
+ color_r = 0;
+
+ if(color_g < 0)
+ color_g = 0;
+
+ if(color_b < 0)
+ color_b = 0;
+ }
+
+ color_r >>= 3;
+ color_g >>= 3;
+ color_b >>= 3;
+
+ draw_pixel(psx_gpu, color_r, color_g, color_b, texel, current_x, y, flags);
+
+ skip_pixel:
+
+ current_u += span->u.step_dx;
+ current_v += span->v.step_dx;
+ current_r += span->r.step_dx;
+ current_g += span->g.step_dx;
+ current_b += span->b.step_dx;
+
+ current_x++;
+ }
+}
+
+void increment_span(_span_struct *span)
+{
+ span->left_x += span->left_dx_dy;
+ span->right_x += span->right_dx_dy;
+
+ span->u.current_value += span->u.step_dy;
+ span->v.current_value += span->v.step_dy;
+ span->r.current_value += span->r.step_dy;
+ span->g.current_value += span->g.step_dy;
+ span->b.current_value += span->b.step_dy;
+}
+
+void decrement_span(_span_struct *span)
+{
+ span->left_x += span->left_dx_dy;
+ span->right_x += span->right_dx_dy;
+
+ span->u.current_value -= span->u.step_dy;
+ span->v.current_value -= span->v.step_dy;
+ span->r.current_value -= span->r.step_dy;
+ span->g.current_value -= span->g.step_dy;
+ span->b.current_value -= span->b.step_dy;
+}
+
+
+#define compute_gradient_area_x(interpolant) \
+{ \
+ span.interpolant.gradient_area_x = \
+ triangle_signed_area_x2(a->interpolant, a->y, b->interpolant, b->y, \
+ c->interpolant, c->y); \
+} \
+
+#define compute_gradient_area_y(interpolant) \
+{ \
+ span.interpolant.gradient_area_y = \
+ triangle_signed_area_x2(a->x, a->interpolant, b->x, b->interpolant, \
+ c->x, c->interpolant); \
+} \
+
+#define compute_all_gradient_areas() \
+ compute_gradient_area_x(u); \
+ compute_gradient_area_x(v); \
+ compute_gradient_area_x(r); \
+ compute_gradient_area_x(g); \
+ compute_gradient_area_x(b); \
+ compute_gradient_area_y(u); \
+ compute_gradient_area_y(v); \
+ compute_gradient_area_y(r); \
+ compute_gradient_area_y(g); \
+ compute_gradient_area_y(b) \
+
+#define set_interpolant_base(interpolant, base_vertex) \
+ span->interpolant.step_dx = \
+ fixed_reciprocal_multiply(span->interpolant.gradient_area_x, reciprocal, \
+ span->triangle_winding, shift); \
+ span->interpolant.step_dy = \
+ fixed_reciprocal_multiply(span->interpolant.gradient_area_y, reciprocal, \
+ span->triangle_winding, shift); \
+ span->interpolant.current_value = fixed_center(base_vertex->interpolant) \
+
+#define set_interpolant_bases(base_vertex) \
+{ \
+ u32 shift; \
+ u32 reciprocal = fixed_reciprocal(span->triangle_area, &shift); \
+ shift -= FIXED_BITS; \
+ set_interpolant_base(u, base_vertex); \
+ set_interpolant_base(v, base_vertex); \
+ set_interpolant_base(r, base_vertex); \
+ set_interpolant_base(g, base_vertex); \
+ set_interpolant_base(b, base_vertex); \
+ span->base_x = span->left_x >> EDGE_STEP_BITS; \
+} \
+
+#define compute_edge_delta(edge, start, end, height) \
+{ \
+ s32 x_start = start->x; \
+ s32 x_end = end->x; \
+ s32 width = x_end - x_start; \
+ \
+ s32 shift = __builtin_clz(height); \
+ u32 height_normalized = height << shift; \
+ u32 height_reciprocal = ((1ULL << 50) + (height_normalized - 1)) / \
+ height_normalized; \
+ \
+ shift -= (50 - EDGE_STEP_BITS); \
+ \
+ span->edge##_x = \
+ ((((s64)x_start * height) + (height - 1)) * height_reciprocal) << shift; \
+ span->edge##_dx_dy = ((s64)width * height_reciprocal) << shift; \
+} \
+
+
+#define render_spans_up(height) \
+ do \
+ { \
+ decrement_span(span); \
+ render_span(psx_gpu, span, current_y, flags); \
+ current_y--; \
+ height--; \
+ } while(height) \
+
+#define render_spans_down(height) \
+ do \
+ { \
+ render_span(psx_gpu, span, current_y, flags); \
+ increment_span(span); \
+ current_y++; \
+ height--; \
+ } while(height) \
+
+#define render_spans_up_up(minor, major) \
+ s32 current_y = bottom->y - 1; \
+ s32 height_minor_a = bottom->y - middle->y; \
+ s32 height_minor_b = middle->y - top->y; \
+ s32 height_major = height_minor_a + height_minor_b; \
+ \
+ compute_edge_delta(major, bottom, top, height_major); \
+ compute_edge_delta(minor, bottom, middle, height_minor_a); \
+ set_interpolant_bases(bottom); \
+ \
+ render_spans_up(height_minor_a); \
+ \
+ compute_edge_delta(minor, middle, top, height_minor_b); \
+ render_spans_up(height_minor_b) \
+
+void render_spans_up_left(psx_gpu_struct *psx_gpu, _span_struct *span,
+ vertex_struct *bottom, vertex_struct *middle, vertex_struct *top, u32 flags)
+{
+ render_spans_up_up(left, right);
+}
+
+void render_spans_up_right(psx_gpu_struct *psx_gpu, _span_struct *span,
+ vertex_struct *bottom, vertex_struct *middle, vertex_struct *top, u32 flags)
+{
+ render_spans_up_up(right, left);
+}
+
+#define render_spans_down_down(minor, major) \
+ s32 current_y = top->y; \
+ s32 height_minor_a = middle->y - top->y; \
+ s32 height_minor_b = bottom->y - middle->y; \
+ s32 height_major = height_minor_a + height_minor_b; \
+ \
+ compute_edge_delta(minor, top, middle, height_minor_a); \
+ compute_edge_delta(major, top, bottom, height_major); \
+ set_interpolant_bases(top); \
+ \
+ render_spans_down(height_minor_a); \
+ \
+ compute_edge_delta(minor, middle, bottom, height_minor_b); \
+ render_spans_down(height_minor_b) \
+
+void render_spans_down_left(psx_gpu_struct *psx_gpu, _span_struct *span,
+ vertex_struct *top, vertex_struct *middle, vertex_struct *bottom, u32 flags)
+{
+ render_spans_down_down(left, right);
+}
+
+void render_spans_down_right(psx_gpu_struct *psx_gpu, _span_struct *span,
+ vertex_struct *top, vertex_struct *middle, vertex_struct *bottom, u32 flags)
+{
+ render_spans_down_down(right, left);
+}
+
+#define render_spans_up_flat(bottom_left, bottom_right, top_left, top_right) \
+ s32 current_y = bottom_left->y - 1; \
+ s32 height = bottom_left->y - top_left->y; \
+ \
+ compute_edge_delta(left, bottom_left, top_left, height); \
+ compute_edge_delta(right, bottom_right, top_right, height); \
+ set_interpolant_bases(bottom_left); \
+ render_spans_up(height) \
+
+void render_spans_up_a(psx_gpu_struct *psx_gpu, _span_struct *span,
+ vertex_struct *bottom_left, vertex_struct *bottom_right, vertex_struct *top,
+ u32 flags)
+{
+ render_spans_up_flat(bottom_left, bottom_right, top, top);
+}
+
+void render_spans_up_b(psx_gpu_struct *psx_gpu, _span_struct *span,
+ vertex_struct *bottom, vertex_struct *top_left, vertex_struct *top_right,
+ u32 flags)
+{
+ render_spans_up_flat(bottom, bottom, top_left, top_right);
+}
+
+#define render_spans_down_flat(top_left, top_right, bottom_left, bottom_right) \
+ s32 current_y = top_left->y; \
+ s32 height = bottom_left->y - top_left->y; \
+ \
+ compute_edge_delta(left, top_left, bottom_left, height); \
+ compute_edge_delta(right, top_right, bottom_right, height); \
+ set_interpolant_bases(top_left); \
+ render_spans_down(height) \
+
+void render_spans_down_a(psx_gpu_struct *psx_gpu, _span_struct *span,
+ vertex_struct *top_left, vertex_struct *top_right, vertex_struct *bottom,
+ u32 flags)
+{
+ render_spans_down_flat(top_left, top_right, bottom, bottom);
+}
+
+void render_spans_down_b(psx_gpu_struct *psx_gpu, _span_struct *span,
+ vertex_struct *top, vertex_struct *bottom_left, vertex_struct *bottom_right,
+ u32 flags)
+{
+ render_spans_down_flat(top, top, bottom_left, bottom_right);
+}
+
+void render_spans_up_down(psx_gpu_struct *psx_gpu, _span_struct *span,
+ vertex_struct *middle, vertex_struct *top, vertex_struct *bottom, u32 flags)
+{
+ s32 middle_y = middle->y;
+ s32 current_y = middle_y - 1;
+ s32 height_minor_a = middle->y - top->y;
+ s32 height_minor_b = bottom->y - middle->y;
+ s32 height_major = height_minor_a + height_minor_b;
+
+ u64 right_x_mid;
+
+ compute_edge_delta(left, middle, top, height_minor_a);
+ compute_edge_delta(right, bottom, top, height_major);
+ set_interpolant_bases(middle);
+
+ right_x_mid = span->right_x + (span->right_dx_dy * height_minor_b);
+ span->right_x = right_x_mid;
+
+ render_spans_up(height_minor_a);
+
+ compute_edge_delta(left, middle, bottom, height_minor_b);
+ set_interpolant_bases(middle);
+
+ span->right_dx_dy *= -1;
+ span->right_x = right_x_mid;
+ current_y = middle_y;
+
+ render_spans_down(height_minor_b);
+}
+
+#define vertex_swap(_a, _b) \
+{ \
+ vertex_struct *temp_vertex = _a; \
+ _a = _b; \
+ _b = temp_vertex; \
+ triangle_winding ^= 1; \
+} \
+
+
+#define triangle_y_direction_up 1
+#define triangle_y_direction_flat 2
+#define triangle_y_direction_down 0
+
+#define triangle_winding_positive 0
+#define triangle_winding_negative 1
+
+#define triangle_set_direction(direction_variable, value) \
+ u32 direction_variable = (u32)(value) >> 31; \
+ if(value == 0) \
+ direction_variable = 2 \
+
+#define triangle_case(direction_a, direction_b, direction_c, winding) \
+ case (triangle_y_direction_##direction_a | \
+ (triangle_y_direction_##direction_b << 2) | \
+ (triangle_y_direction_##direction_c << 4) | \
+ (triangle_winding_##winding << 6)) \
+
+
+void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
+ u32 flags)
+{
+ s32 triangle_area;
+ u32 triangle_winding = 0;
+ _span_struct span;
+
+ vertex_struct *a = &(vertexes[0]);
+ vertex_struct *b = &(vertexes[1]);
+ vertex_struct *c = &(vertexes[2]);
+
+ triangle_area = triangle_signed_area_x2(a->x, a->y, b->x, b->y, c->x, c->y);
+
+ triangles++;
+
+ if(triangle_area == 0)
+ return;
+
+ if(b->y < a->y)
+ vertex_swap(a, b);
+
+ if(c->y < b->y)
+ {
+ vertex_swap(b, c);
+
+ if(b->y < a->y)
+ vertex_swap(a, b);
+ }
+
+ if((c->y - a->y) >= 512)
+ return;
+
+ if(triangle_area < 0)
+ {
+ triangle_area = -triangle_area;
+ triangle_winding ^= 1;
+ vertex_swap(a, c);
+ }
+
+ if(b->x < a->x)
+ vertex_swap(a, b);
+
+ if(c->x < b->x)
+ {
+ vertex_swap(b, c);
+
+ if(b->x < a->x)
+ vertex_swap(a, b);
+ }
+
+ if((c->x - a->x) >= 1024)
+ return;
+
+ s32 y_delta_a = b->y - a->y;
+ s32 y_delta_b = c->y - b->y;
+ s32 y_delta_c = c->y - a->y;
+
+ triangle_set_direction(y_direction_a, y_delta_a);
+ triangle_set_direction(y_direction_b, y_delta_b);
+ triangle_set_direction(y_direction_c, y_delta_c);
+
+ compute_all_gradient_areas();
+ span.triangle_area = triangle_area;
+ span.triangle_winding = triangle_winding;
+
+ switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) |
+ (triangle_winding << 6))
+ {
+ triangle_case(up, up, up, negative):
+ triangle_case(up, up, flat, negative):
+ triangle_case(up, up, down, negative):
+ render_spans_up_right(psx_gpu, &span, a, b, c, flags);
+ break;
+
+ triangle_case(flat, up, up, negative):
+ triangle_case(flat, up, flat, negative):
+ triangle_case(flat, up, down, negative):
+ render_spans_up_a(psx_gpu, &span, a, b, c, flags);
+ break;
+
+ triangle_case(down, up, up, negative):
+ render_spans_up_down(psx_gpu, &span, a, c, b, flags);
+ break;
+
+ triangle_case(down, up, flat, negative):
+ render_spans_down_a(psx_gpu, &span, a, c, b, flags);
+ break;
+
+ triangle_case(down, up, down, negative):
+ render_spans_down_right(psx_gpu, &span, a, c, b, flags);
+ break;
+
+ triangle_case(down, flat, up, negative):
+ triangle_case(down, flat, flat, negative):
+ triangle_case(down, flat, down, negative):
+ render_spans_down_b(psx_gpu, &span, a, b, c, flags);
+ break;
+
+ triangle_case(down, down, up, negative):
+ triangle_case(down, down, flat, negative):
+ triangle_case(down, down, down, negative):
+ render_spans_down_left(psx_gpu, &span, a, b, c, flags);
+ break;
+
+ triangle_case(up, up, up, positive):
+ triangle_case(up, up, flat, positive):
+ triangle_case(up, up, down, positive):
+ render_spans_up_left(psx_gpu, &span, a, b, c, flags);
+ break;
+
+ triangle_case(up, flat, up, positive):
+ triangle_case(up, flat, flat, positive):
+ triangle_case(up, flat, down, positive):
+ render_spans_up_b(psx_gpu, &span, a, b, c, flags);
+ break;
+
+ triangle_case(up, down, up, positive):
+ render_spans_up_right(psx_gpu, &span, a, c, b, flags);
+ break;
+
+ triangle_case(up, down, flat, positive):
+ render_spans_up_a(psx_gpu, &span, a, c, b, flags);
+ break;
+
+ triangle_case(up, down, down, positive):
+ render_spans_up_down(psx_gpu, &span, a, b, c, flags);
+ break;
+
+ triangle_case(flat, down, up, positive):
+ triangle_case(flat, down, flat, positive):
+ triangle_case(flat, down, down, positive):
+ render_spans_down_a(psx_gpu, &span, a, b, c, flags);
+ break;
+
+ triangle_case(down, down, up, positive):
+ triangle_case(down, down, flat, positive):
+ triangle_case(down, down, down, positive):
+ render_spans_down_right(psx_gpu, &span, a, b, c, flags);
+ break;
+ }
+
+}
+
+
+void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
+ s32 width, s32 height, u32 flags)
+{
+ // TODO: Flip/mirror
+ s32 current_x, current_y;
+ u32 current_u, current_v;
+ u32 primitive_color = psx_gpu->primitive_color;
+ u32 sprite_r, sprite_g, sprite_b;
+ s32 color_r = 0;
+ s32 color_g = 0;
+ s32 color_b = 0;
+ u32 texel = 0;
+
+ sprite_r = primitive_color & 0xFF;
+ sprite_g = (primitive_color >> 8) & 0xFF;
+ sprite_b = (primitive_color >> 16) & 0xFF;
+
+ static u32 sprites = 0;
+
+ sprites++;
+
+ for(current_y = y, current_v = v;
+ current_y < y + height; current_y++, current_v++)
+ {
+ for(current_x = x, current_u = u;
+ current_x < x + width; current_x++, current_u++)
+ {
+ if((current_x >= psx_gpu->viewport_start_x) &&
+ (current_y >= psx_gpu->viewport_start_y) &&
+ (current_x <= psx_gpu->viewport_end_x) &&
+ (current_y <= psx_gpu->viewport_end_y))
+ {
+ if(psx_gpu->mask_evaluate &&
+ (psx_gpu->vram[(y * 1024) + current_x] & 0x8000))
+ {
+ continue;
+ }
+
+ if(flags & RENDER_FLAGS_TEXTURE_MAP)
+ {
+ texel = fetch_texel(psx_gpu, current_u, current_v);
+ if(texel == 0)
+ continue;
+
+ color_r = texel & 0x1F;
+ color_g = (texel >> 5) & 0x1F;
+ color_b = (texel >> 10) & 0x1F;
+
+ if((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0)
+ {
+ color_r *= sprite_r;
+ color_g *= sprite_g;
+ color_b *= sprite_b;
+
+ color_r >>= 7;
+ color_g >>= 7;
+ color_b >>= 7;
+ }
+ }
+ else
+ {
+ color_r = sprite_r >> 3;
+ color_g = sprite_g >> 3;
+ color_b = sprite_b >> 3;
+ }
+
+ draw_pixel(psx_gpu, color_r, color_g, color_b, texel, current_x,
+ current_y, flags);
+ }
+ }
+ }
+}
+
+
+#define draw_pixel_line(_x, _y) \
+ if((_x >= psx_gpu->viewport_start_x) && (_y >= psx_gpu->viewport_start_y) && \
+ (_x <= psx_gpu->viewport_end_x) && (_y <= psx_gpu->viewport_end_y)) \
+ { \
+ if(flags & RENDER_FLAGS_SHADE) \
+ { \
+ color_r = fixed_to_int(current_r); \
+ color_g = fixed_to_int(current_g); \
+ color_b = fixed_to_int(current_b); \
+ \
+ current_r += gradient_r; \
+ current_g += gradient_g; \
+ current_b += gradient_b; \
+ } \
+ else \
+ { \
+ color_r = primitive_color & 0xFF; \
+ color_g = (primitive_color >> 8) & 0xFF; \
+ color_b = (primitive_color >> 16) & 0xFF; \
+ } \
+ \
+ if(psx_gpu->dither_mode) \
+ { \
+ s32 dither_offset = dither_table[_y % 4][_x % 4]; \
+ \
+ color_r += dither_offset; \
+ color_g += dither_offset; \
+ color_b += dither_offset; \
+ \
+ if(color_r < 0) \
+ color_r = 0; \
+ \
+ if(color_g < 0) \
+ color_g = 0; \
+ \
+ if(color_b < 0) \
+ color_b = 0; \
+ } \
+ color_r >>= 3; \
+ color_g >>= 3; \
+ color_b >>= 3; \
+ \
+ span_pixels++; \
+ \
+ draw_pixel(psx_gpu, color_r, color_g, color_b, 0, _x, _y, flags); \
+ } \
+
+#define update_increment(value) \
+ value++ \
+
+#define update_decrement(value) \
+ value-- \
+
+#define compare_increment(a, b) \
+ (a <= b) \
+
+#define compare_decrement(a, b) \
+ (a >= b) \
+
+#define set_line_gradients(minor) \
+{ \
+ s32 gradient_divisor = delta_##minor; \
+ gradient_r = int_to_fixed(vertex_b->r - vertex_a->r) / gradient_divisor; \
+ gradient_g = int_to_fixed(vertex_b->g - vertex_a->g) / gradient_divisor; \
+ gradient_b = int_to_fixed(vertex_b->b - vertex_a->b) / gradient_divisor; \
+ current_r = fixed_center(vertex_a->r); \
+ current_g = fixed_center(vertex_a->g); \
+ current_b = fixed_center(vertex_a->b); \
+}
+
+#define draw_line_span_horizontal(direction) \
+do \
+{ \
+ error_step = delta_y * 2; \
+ error_wrap = delta_x * 2; \
+ error = delta_x; \
+ \
+ current_y = y_a; \
+ set_line_gradients(x); \
+ \
+ for(current_x = x_a; current_x <= x_b; current_x++) \
+ { \
+ draw_pixel_line(current_x, current_y); \
+ error += error_step; \
+ \
+ if(error >= error_wrap) \
+ { \
+ update_##direction(current_y); \
+ error -= error_wrap; \
+ } \
+ } \
+} while(0) \
+
+#define draw_line_span_vertical(direction) \
+do \
+{ \
+ error_step = delta_x * 2; \
+ error_wrap = delta_y * 2; \
+ error = delta_y; \
+ \
+ current_x = x_a; \
+ set_line_gradients(y); \
+ \
+ for(current_y = y_a; compare_##direction(current_y, y_b); \
+ update_##direction(current_y)) \
+ { \
+ draw_pixel_line(current_x, current_y); \
+ error += error_step; \
+ \
+ if(error > error_wrap) \
+ { \
+ current_x++; \
+ error -= error_wrap; \
+ } \
+ } \
+} while(0) \
+
+void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags)
+{
+ u32 primitive_color = psx_gpu->primitive_color;
+ s32 color_r, color_g, color_b;
+
+ fixed_type gradient_r = 0;
+ fixed_type gradient_g = 0;
+ fixed_type gradient_b = 0;
+ fixed_type current_r = 0;
+ fixed_type current_g = 0;
+ fixed_type current_b = 0;
+
+ s32 y_a, y_b;
+ s32 x_a, x_b;
+
+ s32 delta_x, delta_y;
+ u32 triangle_winding = 0;
+
+ s32 current_x;
+ s32 current_y;
+
+ u32 error_step;
+ u32 error;
+ u32 error_wrap;
+
+ vertex_struct *vertex_a = &(vertexes[0]);
+ vertex_struct *vertex_b = &(vertexes[1]);
+
+ if(vertex_a->x >= vertex_b->x)
+ {
+ vertex_swap(vertex_a, vertex_b);
+ }
+
+ x_a = vertex_a->x;
+ x_b = vertex_b->x;
+
+ y_a = vertex_a->y;
+ y_b = vertex_b->y;
+
+ delta_x = x_b - x_a;
+ delta_y = y_b - y_a;
+
+ if(delta_x >= 1024)
+ return;
+
+ flags &= ~RENDER_FLAGS_TEXTURE_MAP;
+
+ if(delta_y < 0)
+ {
+ delta_y *= -1;
+
+ if(delta_y >= 512)
+ return;
+
+ if(delta_x > delta_y)
+ draw_line_span_horizontal(decrement);
+ else
+ draw_line_span_vertical(decrement);
+ }
+ else
+ {
+ if(delta_y >= 512)
+ return;
+
+ if(delta_x > delta_y)
+ draw_line_span_horizontal(increment);
+ else
+ draw_line_span_vertical(increment);
+ }
+}
+
+
+void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
+ u32 width, u32 height)
+{
+ u32 r = color & 0xFF;
+ u32 g = (color >> 8) & 0xFF;
+ u32 b = (color >> 16) & 0xFF;
+ u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
+
+ u16 *vram_ptr = psx_gpu->vram + x + (y * 1024);
+ u32 draw_x, draw_y;
+
+ for(draw_y = 0; draw_y < height; draw_y++)
+ {
+ for(draw_x = 0; draw_x < width; draw_x++)
+ {
+ vram_ptr[draw_x] = color_16bpp;
+ }
+
+ vram_ptr += 1024;
+ }
+}
+
+void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
+ u32 width, u32 height, u32 pitch)
+{
+ u16 *vram_ptr = psx_gpu->vram + x + (y * 1024);
+ u32 draw_x, draw_y;
+
+ for(draw_y = 0; draw_y < height; draw_y++)
+ {
+ for(draw_x = 0; draw_x < width; draw_x++)
+ {
+ vram_ptr[draw_x] = source[draw_x];
+ }
+
+ source += pitch;
+ vram_ptr += 1024;
+ }
+}
+
+void render_block_move(psx_gpu_struct *psx_gpu, u32 source_x, u32 source_y,
+ u32 dest_x, u32 dest_y, u32 width, u32 height)
+{
+ render_block_copy(psx_gpu, psx_gpu->vram + source_x + (source_y * 1024),
+ dest_x, dest_y, width, height, 1024);
+}
+
+void initialize_psx_gpu(psx_gpu_struct *psx_gpu)
+{
+ psx_gpu->pixel_count_mode = 0;
+ psx_gpu->pixel_compare_mode = 0;
+
+ psx_gpu->vram_pixel_counts_a = malloc(sizeof(u8) * 1024 * 512);
+ psx_gpu->vram_pixel_counts_b = malloc(sizeof(u8) * 1024 * 512);
+ memset(psx_gpu->vram_pixel_counts_a, 0, sizeof(u8) * 1024 * 512);
+ memset(psx_gpu->vram_pixel_counts_b, 0, sizeof(u8) * 1024 * 512);
+ psx_gpu->compare_vram = malloc(sizeof(u16) * 1024 * 512);
+}