diff options
Diffstat (limited to 'plugins/gpu_neon/psx_gpu/psx_gpu_standard.c')
-rw-r--r-- | plugins/gpu_neon/psx_gpu/psx_gpu_standard.c | 1166 |
1 files changed, 1166 insertions, 0 deletions
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_standard.c b/plugins/gpu_neon/psx_gpu/psx_gpu_standard.c new file mode 100644 index 0000000..68d29cf --- /dev/null +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_standard.c @@ -0,0 +1,1166 @@ +/* + * Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <malloc.h> +#include <math.h> + +#include "common.h" + +typedef s32 fixed_type; + +#define EDGE_STEP_BITS 32 +#define FIXED_BITS 12 + +#define fixed_center(value) \ + ((((fixed_type)value) << FIXED_BITS) + (1 << (FIXED_BITS - 1))) \ + +#define int_to_fixed(value) \ + (((fixed_type)value) << FIXED_BITS) \ + +#define fixed_to_int(value) \ + ((value) >> FIXED_BITS) \ + +#define fixed_mul(_a, _b) \ + (((s64)(_a) * (_b)) >> FIXED_BITS) \ + +#define fixed_to_double(value) \ + ((value) / (double)(1 << FIXED_BITS)) \ + +#define double_to_fixed(value) \ + (fixed_type)(((value) * (double)(1 << FIXED_BITS))) \ + +typedef struct +{ + fixed_type current_value; + fixed_type step_dx; + fixed_type step_dy; + fixed_type gradient_area_x; + fixed_type gradient_area_y; +} interpolant_struct; + +typedef struct +{ + s32 base_x; + + s64 left_x; + s64 left_dx_dy; + + s64 right_x; + s64 right_dx_dy; + + u32 triangle_area; + u32 triangle_winding; + + interpolant_struct u; + interpolant_struct v; + interpolant_struct r; + interpolant_struct g; + interpolant_struct b; +} _span_struct; + + +u32 span_pixels = 0; +u32 span_pixel_blocks = 0; +u32 spans = 0; +u32 triangles = 0; + +u32 texels_4bpp = 0; +u32 texels_8bpp = 0; +u32 texels_16bpp = 0; +u32 untextured_pixels = 0; +u32 blend_pixels = 0; +u32 transparent_pixels = 0; + +u32 state_changes = 0; +u32 render_buffer_flushes = 0; +u32 trivial_rejects = 0; + +void flush_render_block_buffer(psx_gpu_struct *psx_gpu) +{ + +} + + +u32 fixed_reciprocal(u32 denominator, u32 *_shift) +{ + u32 shift = __builtin_clz(denominator); + u32 denominator_normalized = denominator << shift; + + // Implement with a DP divide + u32 reciprocal = + (double)((1ULL << 62) + (denominator_normalized - 1)) / + (double)denominator_normalized; + + *_shift = 62 - shift; + return reciprocal; +} + +fixed_type fixed_reciprocal_multiply(s32 numerator, u32 reciprocal, + u32 reciprocal_sign, u32 shift) +{ + u32 numerator_sign = (u32)numerator >> 31; + u32 flip_sign = numerator_sign ^ reciprocal_sign; + u32 flip_sign_mask = ~(flip_sign - 1); + fixed_type value; + + numerator = abs(numerator); + + value = ((u64)numerator * reciprocal) >> shift; + + value ^= flip_sign_mask; + value -= flip_sign_mask; + + return value; +} + +s32 triangle_signed_area_x2(s32 x0, s32 y0, s32 x1, s32 y1, s32 x2, s32 y2) +{ + return ((x1 - x0) * (y2 - y1)) - ((x2 - x1) * (y1 - y0)); +} + +u32 fetch_texel_4bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v) +{ + u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr; + u32 texel = texture_ptr_8bpp[(v * 2048) + (u / 2)]; + + if(u & 1) + texel >>= 4; + else + texel &= 0xF; + + texels_4bpp++; + + return psx_gpu->clut_ptr[texel]; +} + +u32 fetch_texel_8bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v) +{ + u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr; + u32 texel = texture_ptr_8bpp[(v * 2048) + u]; + + texels_8bpp++; + + return psx_gpu->clut_ptr[texel]; +} + +u32 fetch_texel_16bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v) +{ + u16 *texture_ptr_16bpp = psx_gpu->texture_page_ptr; + + texels_16bpp++; + + return texture_ptr_16bpp[(v * 1024) + u]; +} + +u32 fetch_texel(psx_gpu_struct *psx_gpu, u32 u, u32 v) +{ + u &= psx_gpu->texture_mask_width; + v &= psx_gpu->texture_mask_height; + + switch(psx_gpu->texture_mode) + { + case TEXTURE_MODE_4BPP: + return fetch_texel_4bpp(psx_gpu, u, v); + + case TEXTURE_MODE_8BPP: + return fetch_texel_8bpp(psx_gpu, u, v); + + case TEXTURE_MODE_16BPP: + return fetch_texel_16bpp(psx_gpu, u, v); + } + + return 0; +} + +void draw_pixel(psx_gpu_struct *psx_gpu, s32 r, s32 g, s32 b, u32 texel, + u32 x, u32 y, u32 flags) +{ + u32 pixel; + + if(r > 31) + r = 31; + + if(g > 31) + g = 31; + + if(b > 31) + b = 31; + + if(flags & RENDER_FLAGS_BLEND) + { + if(((flags & RENDER_FLAGS_TEXTURE_MAP) == 0) || (texel & 0x8000)) + { + s32 fb_pixel = psx_gpu->vram[(y * 1024) + x]; + s32 fb_r = fb_pixel & 0x1F; + s32 fb_g = (fb_pixel >> 5) & 0x1F; + s32 fb_b = (fb_pixel >> 10) & 0x1F; + + blend_pixels++; + + switch(psx_gpu->blend_mode) + { + case BLEND_MODE_AVERAGE: + r = (r + fb_r) / 2; + g = (g + fb_g) / 2; + b = (b + fb_b) / 2; + break; + + case BLEND_MODE_ADD: + r += fb_r; + g += fb_g; + b += fb_b; + + if(r > 31) + r = 31; + + if(g > 31) + g = 31; + + if(b > 31) + b = 31; + + break; + + case BLEND_MODE_SUBTRACT: + r = fb_r - r; + g = fb_g - g; + b = fb_b - b; + + if(r < 0) + r = 0; + + if(g < 0) + g = 0; + + if(b < 0) + b = 0; + + break; + + case BLEND_MODE_ADD_FOURTH: + r = fb_r + (r / 4); + g = fb_g + (g / 4); + b = fb_b + (b / 4); + + if(r > 31) + r = 31; + + if(g > 31) + g = 31; + + if(b > 31) + b = 31; + + break; + } + } + } + + pixel = r | (g << 5) | (b << 10); + + if(psx_gpu->mask_apply || (texel & 0x8000)) + pixel |= 0x8000; + + psx_gpu->vram[(y * 1024) + x] = pixel; +} + +s32 dither_table[4][4] = +{ + { -4, 0, -3, 1 }, + { 2, -2, 3, -1 }, + { -3, 1, -4, 0 }, + { 3, -1, 2, -2 }, +}; + +void render_span(psx_gpu_struct *psx_gpu, _span_struct *span, s32 y, + u32 flags) +{ + s32 left_x = span->left_x >> EDGE_STEP_BITS; + s32 right_x = span->right_x >> EDGE_STEP_BITS; + s32 current_x = left_x; + s32 delta_x; + + fixed_type current_u = span->u.current_value; + fixed_type current_v = span->v.current_value; + fixed_type current_r = span->r.current_value; + fixed_type current_g = span->g.current_value; + fixed_type current_b = span->b.current_value; + + if(y < psx_gpu->viewport_start_y) + return; + + if(y > psx_gpu->viewport_end_y) + return; + + if(right_x < psx_gpu->viewport_start_x) + return; + + if(current_x > psx_gpu->viewport_end_x) + return; + + spans++; + + if(current_x < psx_gpu->viewport_start_x) + current_x = psx_gpu->viewport_start_x; + + if(right_x > psx_gpu->viewport_end_x + 1) + right_x = psx_gpu->viewport_end_x + 1; + + delta_x = current_x - span->base_x; + + current_u += delta_x * span->u.step_dx; + current_v += delta_x * span->v.step_dx; + current_r += delta_x * span->r.step_dx; + current_g += delta_x * span->g.step_dx; + current_b += delta_x * span->b.step_dx; + + span_pixels += right_x - current_x; + span_pixel_blocks += ((right_x / 8) - (current_x / 8)) + 1; + + while(current_x < right_x) + { + s32 color_r, color_g, color_b; + u32 texel = 0; + + if(psx_gpu->mask_evaluate && + (psx_gpu->vram[(y * 1024) + current_x] & 0x8000)) + { + goto skip_pixel; + } + + if(flags & RENDER_FLAGS_SHADE) + { + color_r = fixed_to_int(current_r); + color_g = fixed_to_int(current_g); + color_b = fixed_to_int(current_b); + } + else + { + color_r = psx_gpu->primitive_color & 0xFF; + color_g = (psx_gpu->primitive_color >> 8) & 0xFF; + color_b = (psx_gpu->primitive_color >> 16) & 0xFF; + } + + if(flags & RENDER_FLAGS_TEXTURE_MAP) + { + u32 texel_r, texel_g, texel_b; + u32 u = fixed_to_int(current_u); + u32 v = fixed_to_int(current_v); + + texel = fetch_texel(psx_gpu, u, v); + + if(texel == 0) + { + transparent_pixels++; + goto skip_pixel; + } + + texel_r = texel & 0x1F; + texel_g = (texel >> 5) & 0x1F; + texel_b = (texel >> 10) & 0x1F; + + if((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0) + { + color_r *= texel_r; + color_g *= texel_g; + color_b *= texel_b; + } + else + { + color_r = texel_r << 7; + color_g = texel_g << 7; + color_b = texel_b << 7; + } + + color_r >>= 4; + color_g >>= 4; + color_b >>= 4; + } + else + { + untextured_pixels++; + } + + if(psx_gpu->dither_mode && ((flags & RENDER_FLAGS_SHADE) || + ((flags & RENDER_FLAGS_TEXTURE_MAP) && + ((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0)))) + { + s32 dither_offset = dither_table[y % 4][current_x % 4]; + color_r += dither_offset; + color_g += dither_offset; + color_b += dither_offset; + + if(color_r < 0) + color_r = 0; + + if(color_g < 0) + color_g = 0; + + if(color_b < 0) + color_b = 0; + } + + color_r >>= 3; + color_g >>= 3; + color_b >>= 3; + + draw_pixel(psx_gpu, color_r, color_g, color_b, texel, current_x, y, flags); + + skip_pixel: + + current_u += span->u.step_dx; + current_v += span->v.step_dx; + current_r += span->r.step_dx; + current_g += span->g.step_dx; + current_b += span->b.step_dx; + + current_x++; + } +} + +void increment_span(_span_struct *span) +{ + span->left_x += span->left_dx_dy; + span->right_x += span->right_dx_dy; + + span->u.current_value += span->u.step_dy; + span->v.current_value += span->v.step_dy; + span->r.current_value += span->r.step_dy; + span->g.current_value += span->g.step_dy; + span->b.current_value += span->b.step_dy; +} + +void decrement_span(_span_struct *span) +{ + span->left_x += span->left_dx_dy; + span->right_x += span->right_dx_dy; + + span->u.current_value -= span->u.step_dy; + span->v.current_value -= span->v.step_dy; + span->r.current_value -= span->r.step_dy; + span->g.current_value -= span->g.step_dy; + span->b.current_value -= span->b.step_dy; +} + + +#define compute_gradient_area_x(interpolant) \ +{ \ + span.interpolant.gradient_area_x = \ + triangle_signed_area_x2(a->interpolant, a->y, b->interpolant, b->y, \ + c->interpolant, c->y); \ +} \ + +#define compute_gradient_area_y(interpolant) \ +{ \ + span.interpolant.gradient_area_y = \ + triangle_signed_area_x2(a->x, a->interpolant, b->x, b->interpolant, \ + c->x, c->interpolant); \ +} \ + +#define compute_all_gradient_areas() \ + compute_gradient_area_x(u); \ + compute_gradient_area_x(v); \ + compute_gradient_area_x(r); \ + compute_gradient_area_x(g); \ + compute_gradient_area_x(b); \ + compute_gradient_area_y(u); \ + compute_gradient_area_y(v); \ + compute_gradient_area_y(r); \ + compute_gradient_area_y(g); \ + compute_gradient_area_y(b) \ + +#define set_interpolant_base(interpolant, base_vertex) \ + span->interpolant.step_dx = \ + fixed_reciprocal_multiply(span->interpolant.gradient_area_x, reciprocal, \ + span->triangle_winding, shift); \ + span->interpolant.step_dy = \ + fixed_reciprocal_multiply(span->interpolant.gradient_area_y, reciprocal, \ + span->triangle_winding, shift); \ + span->interpolant.current_value = fixed_center(base_vertex->interpolant) \ + +#define set_interpolant_bases(base_vertex) \ +{ \ + u32 shift; \ + u32 reciprocal = fixed_reciprocal(span->triangle_area, &shift); \ + shift -= FIXED_BITS; \ + set_interpolant_base(u, base_vertex); \ + set_interpolant_base(v, base_vertex); \ + set_interpolant_base(r, base_vertex); \ + set_interpolant_base(g, base_vertex); \ + set_interpolant_base(b, base_vertex); \ + span->base_x = span->left_x >> EDGE_STEP_BITS; \ +} \ + +#define compute_edge_delta(edge, start, end, height) \ +{ \ + s32 x_start = start->x; \ + s32 x_end = end->x; \ + s32 width = x_end - x_start; \ + \ + s32 shift = __builtin_clz(height); \ + u32 height_normalized = height << shift; \ + u32 height_reciprocal = ((1ULL << 50) + (height_normalized - 1)) / \ + height_normalized; \ + \ + shift -= (50 - EDGE_STEP_BITS); \ + \ + span->edge##_x = \ + ((((s64)x_start * height) + (height - 1)) * height_reciprocal) << shift; \ + span->edge##_dx_dy = ((s64)width * height_reciprocal) << shift; \ +} \ + + +#define render_spans_up(height) \ + do \ + { \ + decrement_span(span); \ + render_span(psx_gpu, span, current_y, flags); \ + current_y--; \ + height--; \ + } while(height) \ + +#define render_spans_down(height) \ + do \ + { \ + render_span(psx_gpu, span, current_y, flags); \ + increment_span(span); \ + current_y++; \ + height--; \ + } while(height) \ + +#define render_spans_up_up(minor, major) \ + s32 current_y = bottom->y - 1; \ + s32 height_minor_a = bottom->y - middle->y; \ + s32 height_minor_b = middle->y - top->y; \ + s32 height_major = height_minor_a + height_minor_b; \ + \ + compute_edge_delta(major, bottom, top, height_major); \ + compute_edge_delta(minor, bottom, middle, height_minor_a); \ + set_interpolant_bases(bottom); \ + \ + render_spans_up(height_minor_a); \ + \ + compute_edge_delta(minor, middle, top, height_minor_b); \ + render_spans_up(height_minor_b) \ + +void render_spans_up_left(psx_gpu_struct *psx_gpu, _span_struct *span, + vertex_struct *bottom, vertex_struct *middle, vertex_struct *top, u32 flags) +{ + render_spans_up_up(left, right); +} + +void render_spans_up_right(psx_gpu_struct *psx_gpu, _span_struct *span, + vertex_struct *bottom, vertex_struct *middle, vertex_struct *top, u32 flags) +{ + render_spans_up_up(right, left); +} + +#define render_spans_down_down(minor, major) \ + s32 current_y = top->y; \ + s32 height_minor_a = middle->y - top->y; \ + s32 height_minor_b = bottom->y - middle->y; \ + s32 height_major = height_minor_a + height_minor_b; \ + \ + compute_edge_delta(minor, top, middle, height_minor_a); \ + compute_edge_delta(major, top, bottom, height_major); \ + set_interpolant_bases(top); \ + \ + render_spans_down(height_minor_a); \ + \ + compute_edge_delta(minor, middle, bottom, height_minor_b); \ + render_spans_down(height_minor_b) \ + +void render_spans_down_left(psx_gpu_struct *psx_gpu, _span_struct *span, + vertex_struct *top, vertex_struct *middle, vertex_struct *bottom, u32 flags) +{ + render_spans_down_down(left, right); +} + +void render_spans_down_right(psx_gpu_struct *psx_gpu, _span_struct *span, + vertex_struct *top, vertex_struct *middle, vertex_struct *bottom, u32 flags) +{ + render_spans_down_down(right, left); +} + +#define render_spans_up_flat(bottom_left, bottom_right, top_left, top_right) \ + s32 current_y = bottom_left->y - 1; \ + s32 height = bottom_left->y - top_left->y; \ + \ + compute_edge_delta(left, bottom_left, top_left, height); \ + compute_edge_delta(right, bottom_right, top_right, height); \ + set_interpolant_bases(bottom_left); \ + render_spans_up(height) \ + +void render_spans_up_a(psx_gpu_struct *psx_gpu, _span_struct *span, + vertex_struct *bottom_left, vertex_struct *bottom_right, vertex_struct *top, + u32 flags) +{ + render_spans_up_flat(bottom_left, bottom_right, top, top); +} + +void render_spans_up_b(psx_gpu_struct *psx_gpu, _span_struct *span, + vertex_struct *bottom, vertex_struct *top_left, vertex_struct *top_right, + u32 flags) +{ + render_spans_up_flat(bottom, bottom, top_left, top_right); +} + +#define render_spans_down_flat(top_left, top_right, bottom_left, bottom_right) \ + s32 current_y = top_left->y; \ + s32 height = bottom_left->y - top_left->y; \ + \ + compute_edge_delta(left, top_left, bottom_left, height); \ + compute_edge_delta(right, top_right, bottom_right, height); \ + set_interpolant_bases(top_left); \ + render_spans_down(height) \ + +void render_spans_down_a(psx_gpu_struct *psx_gpu, _span_struct *span, + vertex_struct *top_left, vertex_struct *top_right, vertex_struct *bottom, + u32 flags) +{ + render_spans_down_flat(top_left, top_right, bottom, bottom); +} + +void render_spans_down_b(psx_gpu_struct *psx_gpu, _span_struct *span, + vertex_struct *top, vertex_struct *bottom_left, vertex_struct *bottom_right, + u32 flags) +{ + render_spans_down_flat(top, top, bottom_left, bottom_right); +} + +void render_spans_up_down(psx_gpu_struct *psx_gpu, _span_struct *span, + vertex_struct *middle, vertex_struct *top, vertex_struct *bottom, u32 flags) +{ + s32 middle_y = middle->y; + s32 current_y = middle_y - 1; + s32 height_minor_a = middle->y - top->y; + s32 height_minor_b = bottom->y - middle->y; + s32 height_major = height_minor_a + height_minor_b; + + u64 right_x_mid; + + compute_edge_delta(left, middle, top, height_minor_a); + compute_edge_delta(right, bottom, top, height_major); + set_interpolant_bases(middle); + + right_x_mid = span->right_x + (span->right_dx_dy * height_minor_b); + span->right_x = right_x_mid; + + render_spans_up(height_minor_a); + + compute_edge_delta(left, middle, bottom, height_minor_b); + set_interpolant_bases(middle); + + span->right_dx_dy *= -1; + span->right_x = right_x_mid; + current_y = middle_y; + + render_spans_down(height_minor_b); +} + +#define vertex_swap(_a, _b) \ +{ \ + vertex_struct *temp_vertex = _a; \ + _a = _b; \ + _b = temp_vertex; \ + triangle_winding ^= 1; \ +} \ + + +#define triangle_y_direction_up 1 +#define triangle_y_direction_flat 2 +#define triangle_y_direction_down 0 + +#define triangle_winding_positive 0 +#define triangle_winding_negative 1 + +#define triangle_set_direction(direction_variable, value) \ + u32 direction_variable = (u32)(value) >> 31; \ + if(value == 0) \ + direction_variable = 2 \ + +#define triangle_case(direction_a, direction_b, direction_c, winding) \ + case (triangle_y_direction_##direction_a | \ + (triangle_y_direction_##direction_b << 2) | \ + (triangle_y_direction_##direction_c << 4) | \ + (triangle_winding_##winding << 6)) \ + + +void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + u32 flags) +{ + s32 triangle_area; + u32 triangle_winding = 0; + _span_struct span; + + vertex_struct *a = &(vertexes[0]); + vertex_struct *b = &(vertexes[1]); + vertex_struct *c = &(vertexes[2]); + + triangle_area = triangle_signed_area_x2(a->x, a->y, b->x, b->y, c->x, c->y); + + triangles++; + + if(triangle_area == 0) + return; + + if(b->y < a->y) + vertex_swap(a, b); + + if(c->y < b->y) + { + vertex_swap(b, c); + + if(b->y < a->y) + vertex_swap(a, b); + } + + if((c->y - a->y) >= 512) + return; + + if(triangle_area < 0) + { + triangle_area = -triangle_area; + triangle_winding ^= 1; + vertex_swap(a, c); + } + + if(b->x < a->x) + vertex_swap(a, b); + + if(c->x < b->x) + { + vertex_swap(b, c); + + if(b->x < a->x) + vertex_swap(a, b); + } + + if((c->x - a->x) >= 1024) + return; + + s32 y_delta_a = b->y - a->y; + s32 y_delta_b = c->y - b->y; + s32 y_delta_c = c->y - a->y; + + triangle_set_direction(y_direction_a, y_delta_a); + triangle_set_direction(y_direction_b, y_delta_b); + triangle_set_direction(y_direction_c, y_delta_c); + + compute_all_gradient_areas(); + span.triangle_area = triangle_area; + span.triangle_winding = triangle_winding; + + switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) | + (triangle_winding << 6)) + { + triangle_case(up, up, up, negative): + triangle_case(up, up, flat, negative): + triangle_case(up, up, down, negative): + render_spans_up_right(psx_gpu, &span, a, b, c, flags); + break; + + triangle_case(flat, up, up, negative): + triangle_case(flat, up, flat, negative): + triangle_case(flat, up, down, negative): + render_spans_up_a(psx_gpu, &span, a, b, c, flags); + break; + + triangle_case(down, up, up, negative): + render_spans_up_down(psx_gpu, &span, a, c, b, flags); + break; + + triangle_case(down, up, flat, negative): + render_spans_down_a(psx_gpu, &span, a, c, b, flags); + break; + + triangle_case(down, up, down, negative): + render_spans_down_right(psx_gpu, &span, a, c, b, flags); + break; + + triangle_case(down, flat, up, negative): + triangle_case(down, flat, flat, negative): + triangle_case(down, flat, down, negative): + render_spans_down_b(psx_gpu, &span, a, b, c, flags); + break; + + triangle_case(down, down, up, negative): + triangle_case(down, down, flat, negative): + triangle_case(down, down, down, negative): + render_spans_down_left(psx_gpu, &span, a, b, c, flags); + break; + + triangle_case(up, up, up, positive): + triangle_case(up, up, flat, positive): + triangle_case(up, up, down, positive): + render_spans_up_left(psx_gpu, &span, a, b, c, flags); + break; + + triangle_case(up, flat, up, positive): + triangle_case(up, flat, flat, positive): + triangle_case(up, flat, down, positive): + render_spans_up_b(psx_gpu, &span, a, b, c, flags); + break; + + triangle_case(up, down, up, positive): + render_spans_up_right(psx_gpu, &span, a, c, b, flags); + break; + + triangle_case(up, down, flat, positive): + render_spans_up_a(psx_gpu, &span, a, c, b, flags); + break; + + triangle_case(up, down, down, positive): + render_spans_up_down(psx_gpu, &span, a, b, c, flags); + break; + + triangle_case(flat, down, up, positive): + triangle_case(flat, down, flat, positive): + triangle_case(flat, down, down, positive): + render_spans_down_a(psx_gpu, &span, a, b, c, flags); + break; + + triangle_case(down, down, up, positive): + triangle_case(down, down, flat, positive): + triangle_case(down, down, down, positive): + render_spans_down_right(psx_gpu, &span, a, b, c, flags); + break; + } + +} + + +void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, + s32 width, s32 height, u32 flags) +{ + // TODO: Flip/mirror + s32 current_x, current_y; + u32 current_u, current_v; + u32 primitive_color = psx_gpu->primitive_color; + u32 sprite_r, sprite_g, sprite_b; + s32 color_r = 0; + s32 color_g = 0; + s32 color_b = 0; + u32 texel = 0; + + sprite_r = primitive_color & 0xFF; + sprite_g = (primitive_color >> 8) & 0xFF; + sprite_b = (primitive_color >> 16) & 0xFF; + + static u32 sprites = 0; + + sprites++; + + for(current_y = y, current_v = v; + current_y < y + height; current_y++, current_v++) + { + for(current_x = x, current_u = u; + current_x < x + width; current_x++, current_u++) + { + if((current_x >= psx_gpu->viewport_start_x) && + (current_y >= psx_gpu->viewport_start_y) && + (current_x <= psx_gpu->viewport_end_x) && + (current_y <= psx_gpu->viewport_end_y)) + { + if(psx_gpu->mask_evaluate && + (psx_gpu->vram[(y * 1024) + current_x] & 0x8000)) + { + continue; + } + + if(flags & RENDER_FLAGS_TEXTURE_MAP) + { + texel = fetch_texel(psx_gpu, current_u, current_v); + if(texel == 0) + continue; + + color_r = texel & 0x1F; + color_g = (texel >> 5) & 0x1F; + color_b = (texel >> 10) & 0x1F; + + if((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0) + { + color_r *= sprite_r; + color_g *= sprite_g; + color_b *= sprite_b; + + color_r >>= 7; + color_g >>= 7; + color_b >>= 7; + } + } + else + { + color_r = sprite_r >> 3; + color_g = sprite_g >> 3; + color_b = sprite_b >> 3; + } + + draw_pixel(psx_gpu, color_r, color_g, color_b, texel, current_x, + current_y, flags); + } + } + } +} + + +#define draw_pixel_line(_x, _y) \ + if((_x >= psx_gpu->viewport_start_x) && (_y >= psx_gpu->viewport_start_y) && \ + (_x <= psx_gpu->viewport_end_x) && (_y <= psx_gpu->viewport_end_y)) \ + { \ + if(flags & RENDER_FLAGS_SHADE) \ + { \ + color_r = fixed_to_int(current_r); \ + color_g = fixed_to_int(current_g); \ + color_b = fixed_to_int(current_b); \ + \ + current_r += gradient_r; \ + current_g += gradient_g; \ + current_b += gradient_b; \ + } \ + else \ + { \ + color_r = primitive_color & 0xFF; \ + color_g = (primitive_color >> 8) & 0xFF; \ + color_b = (primitive_color >> 16) & 0xFF; \ + } \ + \ + if(psx_gpu->dither_mode) \ + { \ + s32 dither_offset = dither_table[_y % 4][_x % 4]; \ + \ + color_r += dither_offset; \ + color_g += dither_offset; \ + color_b += dither_offset; \ + \ + if(color_r < 0) \ + color_r = 0; \ + \ + if(color_g < 0) \ + color_g = 0; \ + \ + if(color_b < 0) \ + color_b = 0; \ + } \ + color_r >>= 3; \ + color_g >>= 3; \ + color_b >>= 3; \ + \ + span_pixels++; \ + \ + draw_pixel(psx_gpu, color_r, color_g, color_b, 0, _x, _y, flags); \ + } \ + +#define update_increment(value) \ + value++ \ + +#define update_decrement(value) \ + value-- \ + +#define compare_increment(a, b) \ + (a <= b) \ + +#define compare_decrement(a, b) \ + (a >= b) \ + +#define set_line_gradients(minor) \ +{ \ + s32 gradient_divisor = delta_##minor; \ + gradient_r = int_to_fixed(vertex_b->r - vertex_a->r) / gradient_divisor; \ + gradient_g = int_to_fixed(vertex_b->g - vertex_a->g) / gradient_divisor; \ + gradient_b = int_to_fixed(vertex_b->b - vertex_a->b) / gradient_divisor; \ + current_r = fixed_center(vertex_a->r); \ + current_g = fixed_center(vertex_a->g); \ + current_b = fixed_center(vertex_a->b); \ +} + +#define draw_line_span_horizontal(direction) \ +do \ +{ \ + error_step = delta_y * 2; \ + error_wrap = delta_x * 2; \ + error = delta_x; \ + \ + current_y = y_a; \ + set_line_gradients(x); \ + \ + for(current_x = x_a; current_x <= x_b; current_x++) \ + { \ + draw_pixel_line(current_x, current_y); \ + error += error_step; \ + \ + if(error >= error_wrap) \ + { \ + update_##direction(current_y); \ + error -= error_wrap; \ + } \ + } \ +} while(0) \ + +#define draw_line_span_vertical(direction) \ +do \ +{ \ + error_step = delta_x * 2; \ + error_wrap = delta_y * 2; \ + error = delta_y; \ + \ + current_x = x_a; \ + set_line_gradients(y); \ + \ + for(current_y = y_a; compare_##direction(current_y, y_b); \ + update_##direction(current_y)) \ + { \ + draw_pixel_line(current_x, current_y); \ + error += error_step; \ + \ + if(error > error_wrap) \ + { \ + current_x++; \ + error -= error_wrap; \ + } \ + } \ +} while(0) \ + +void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags) +{ + u32 primitive_color = psx_gpu->primitive_color; + s32 color_r, color_g, color_b; + + fixed_type gradient_r = 0; + fixed_type gradient_g = 0; + fixed_type gradient_b = 0; + fixed_type current_r = 0; + fixed_type current_g = 0; + fixed_type current_b = 0; + + s32 y_a, y_b; + s32 x_a, x_b; + + s32 delta_x, delta_y; + u32 triangle_winding = 0; + + s32 current_x; + s32 current_y; + + u32 error_step; + u32 error; + u32 error_wrap; + + vertex_struct *vertex_a = &(vertexes[0]); + vertex_struct *vertex_b = &(vertexes[1]); + + if(vertex_a->x >= vertex_b->x) + { + vertex_swap(vertex_a, vertex_b); + } + + x_a = vertex_a->x; + x_b = vertex_b->x; + + y_a = vertex_a->y; + y_b = vertex_b->y; + + delta_x = x_b - x_a; + delta_y = y_b - y_a; + + if(delta_x >= 1024) + return; + + flags &= ~RENDER_FLAGS_TEXTURE_MAP; + + if(delta_y < 0) + { + delta_y *= -1; + + if(delta_y >= 512) + return; + + if(delta_x > delta_y) + draw_line_span_horizontal(decrement); + else + draw_line_span_vertical(decrement); + } + else + { + if(delta_y >= 512) + return; + + if(delta_x > delta_y) + draw_line_span_horizontal(increment); + else + draw_line_span_vertical(increment); + } +} + + +void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, + u32 width, u32 height) +{ + u32 r = color & 0xFF; + u32 g = (color >> 8) & 0xFF; + u32 b = (color >> 16) & 0xFF; + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10); + + u16 *vram_ptr = psx_gpu->vram + x + (y * 1024); + u32 draw_x, draw_y; + + for(draw_y = 0; draw_y < height; draw_y++) + { + for(draw_x = 0; draw_x < width; draw_x++) + { + vram_ptr[draw_x] = color_16bpp; + } + + vram_ptr += 1024; + } +} + +void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, + u32 width, u32 height, u32 pitch) +{ + u16 *vram_ptr = psx_gpu->vram + x + (y * 1024); + u32 draw_x, draw_y; + + for(draw_y = 0; draw_y < height; draw_y++) + { + for(draw_x = 0; draw_x < width; draw_x++) + { + vram_ptr[draw_x] = source[draw_x]; + } + + source += pitch; + vram_ptr += 1024; + } +} + +void render_block_move(psx_gpu_struct *psx_gpu, u32 source_x, u32 source_y, + u32 dest_x, u32 dest_y, u32 width, u32 height) +{ + render_block_copy(psx_gpu, psx_gpu->vram + source_x + (source_y * 1024), + dest_x, dest_y, width, height, 1024); +} + +void initialize_psx_gpu(psx_gpu_struct *psx_gpu) +{ + psx_gpu->pixel_count_mode = 0; + psx_gpu->pixel_compare_mode = 0; + + psx_gpu->vram_pixel_counts_a = malloc(sizeof(u8) * 1024 * 512); + psx_gpu->vram_pixel_counts_b = malloc(sizeof(u8) * 1024 * 512); + memset(psx_gpu->vram_pixel_counts_a, 0, sizeof(u8) * 1024 * 512); + memset(psx_gpu->vram_pixel_counts_b, 0, sizeof(u8) * 1024 * 512); + psx_gpu->compare_vram = malloc(sizeof(u16) * 1024 * 512); +} |