aboutsummaryrefslogtreecommitdiff
path: root/plugins
diff options
context:
space:
mode:
authornotaz2012-02-24 01:07:33 +0200
committernotaz2012-02-27 00:27:46 +0200
commitb243416b907e6ce366b051e77ed8a434f7668d5d (patch)
tree0d1bce54ae1c19934c83f9546257de94723bd76e /plugins
parentf7be0e2b7adc4f6c4057f44f789bd7405844701c (diff)
downloadpcsx_rearmed-b243416b907e6ce366b051e77ed8a434f7668d5d.tar.gz
pcsx_rearmed-b243416b907e6ce366b051e77ed8a434f7668d5d.tar.bz2
pcsx_rearmed-b243416b907e6ce366b051e77ed8a434f7668d5d.zip
gpulib: eliminate list scan-ahead
Diffstat (limited to 'plugins')
-rw-r--r--plugins/dfxvideo/gpulib_if.c39
-rw-r--r--plugins/gpu-gles/gpulib_if.c38
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu.h2
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_main.c4
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_parse.c40
-rw-r--r--plugins/gpu_neon/psx_gpu_if.c17
-rw-r--r--plugins/gpu_unai/gpulib_if.cpp40
-rw-r--r--plugins/gpulib/gpu.c120
-rw-r--r--plugins/gpulib/gpu.h2
9 files changed, 198 insertions, 104 deletions
diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c
index 82bc38d..12aa0a3 100644
--- a/plugins/dfxvideo/gpulib_if.c
+++ b/plugins/dfxvideo/gpulib_if.c
@@ -296,16 +296,27 @@ int renderer_init(void)
extern const unsigned char cmd_lengths[256];
-void do_cmd_list(unsigned int *list, int list_len)
+int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
{
- unsigned int cmd, len;
-
+ unsigned int cmd = 0, len;
+ unsigned int *list_start = list;
unsigned int *list_end = list + list_len;
for (; list < list_end; list += 1 + len)
{
cmd = *list >> 24;
len = cmd_lengths[cmd];
+ if (list + 1 + len > list_end) {
+ cmd = -1;
+ break;
+ }
+
+#ifndef TEST
+ if (cmd == 0xa0 || cmd == 0xc0)
+ break; // image i/o, forward to upper layer
+ else if ((cmd & 0xf8) == 0xe0)
+ gpu.ex_regs[cmd & 7] = list[0];
+#endif
primTableJ[cmd]((void *)list);
@@ -313,8 +324,8 @@ void do_cmd_list(unsigned int *list, int list_len)
{
case 0x48 ... 0x4F:
{
- u32 num_vertexes = 1;
- u32 *list_position = &(list[2]);
+ u32 num_vertexes = 2;
+ u32 *list_position = &(list[3]);
while(1)
{
@@ -325,16 +336,14 @@ void do_cmd_list(unsigned int *list, int list_len)
num_vertexes++;
}
- if(num_vertexes > 2)
- len += (num_vertexes - 2);
-
+ len += (num_vertexes - 2);
break;
}
case 0x58 ... 0x5F:
{
- u32 num_vertexes = 1;
- u32 *list_position = &(list[2]);
+ u32 num_vertexes = 2;
+ u32 *list_position = &(list[4]);
while(1)
{
@@ -345,9 +354,7 @@ void do_cmd_list(unsigned int *list, int list_len)
num_vertexes++;
}
- if(num_vertexes > 2)
- len += (num_vertexes - 2) * 2;
-
+ len += (num_vertexes - 2) * 2;
break;
}
@@ -365,6 +372,12 @@ void do_cmd_list(unsigned int *list, int list_len)
#endif
}
}
+
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff;
+
+ *last_cmd = cmd;
+ return list - list_start;
}
void renderer_sync_ecmds(uint32_t *ecmds)
diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c
index ce32aad..c669b63 100644
--- a/plugins/gpu-gles/gpulib_if.c
+++ b/plugins/gpu-gles/gpulib_if.c
@@ -502,16 +502,28 @@ int renderer_init(void)
extern const unsigned char cmd_lengths[256];
-void do_cmd_list(unsigned int *list, int list_len)
+// XXX: mostly dupe code from soft peops
+int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
{
unsigned int cmd, len;
-
+ unsigned int *list_start = list;
unsigned int *list_end = list + list_len;
for (; list < list_end; list += 1 + len)
{
cmd = *list >> 24;
len = cmd_lengths[cmd];
+ if (list + 1 + len > list_end) {
+ cmd = -1;
+ break;
+ }
+
+#ifndef TEST
+ if (cmd == 0xa0 || cmd == 0xc0)
+ break; // image i/o, forward to upper layer
+ else if ((cmd & 0xf8) == 0xe0)
+ gpu.ex_regs[cmd & 7] = list[0];
+#endif
primTableJ[cmd]((void *)list);
@@ -519,8 +531,8 @@ void do_cmd_list(unsigned int *list, int list_len)
{
case 0x48 ... 0x4F:
{
- uint32_t num_vertexes = 1;
- uint32_t *list_position = &(list[2]);
+ uint32_t num_vertexes = 2;
+ uint32_t *list_position = &(list[3]);
while(1)
{
@@ -531,16 +543,14 @@ void do_cmd_list(unsigned int *list, int list_len)
num_vertexes++;
}
- if(num_vertexes > 2)
- len += (num_vertexes - 2);
-
+ len += (num_vertexes - 2);
break;
}
case 0x58 ... 0x5F:
{
- uint32_t num_vertexes = 1;
- uint32_t *list_position = &(list[2]);
+ uint32_t num_vertexes = 2;
+ uint32_t *list_position = &(list[4]);
while(1)
{
@@ -551,9 +561,7 @@ void do_cmd_list(unsigned int *list, int list_len)
num_vertexes++;
}
- if(num_vertexes > 2)
- len += (num_vertexes - 2) * 2;
-
+ len += (num_vertexes - 2) * 2;
break;
}
@@ -571,6 +579,12 @@ void do_cmd_list(unsigned int *list, int list_len)
#endif
}
}
+
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff;
+
+ *last_cmd = cmd;
+ return list - list_start;
}
void renderer_sync_ecmds(uint32_t *ecmds)
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h
index 4605c39..53a8717 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.h
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h
@@ -231,7 +231,7 @@ u32 texture_region_mask(s32 x1, s32 y1, s32 x2, s32 y2);
void flush_render_block_buffer(psx_gpu_struct *psx_gpu);
void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram);
-void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size);
+u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command);
void triangle_benchmark(psx_gpu_struct *psx_gpu);
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c
index 6c17b0a..8ca3ad0 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c
@@ -192,7 +192,7 @@ int main(int argc, char *argv[])
init_counter();
#endif
- gpu_parse(psx_gpu, list, size);
+ gpu_parse(psx_gpu, list, size, NULL);
flush_render_block_buffer(psx_gpu);
clear_stats();
@@ -201,7 +201,7 @@ int main(int argc, char *argv[])
u32 cycles = get_counter();
#endif
- gpu_parse(psx_gpu, list, size);
+ gpu_parse(psx_gpu, list, size, NULL);
flush_render_block_buffer(psx_gpu);
#ifdef NEON_BUILD
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
index ac35631..3fc040d 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
@@ -199,12 +199,17 @@ void set_triangle_color(psx_gpu_struct *psx_gpu, u32 triangle_color)
get_vertex_data_xy(vertex_number, offset16); \
set_vertex_color_constant(vertex_number, color) \
+#ifndef SET_Ex
+#define SET_Ex(r, v)
+#endif
+
vertex_struct vertexes[4] __attribute__((aligned(32)));
-void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
+u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
{
- u32 current_command, command_length;
-
+ u32 current_command = 0, command_length;
+
+ u32 *list_start = list;
u32 *list_end = list + (size / 4);
for(; list < list_end; list += 1 + command_length)
@@ -212,6 +217,10 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
s16 *list_s16 = (void *)list;
current_command = *list >> 24;
command_length = command_lengths[current_command];
+ if (list + 1 + command_length > list_end) {
+ current_command = (u32)-1;
+ break;
+ }
switch(current_command)
{
@@ -590,7 +599,12 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
render_block_move(psx_gpu, list_s16[2] & 0x3FF, list_s16[3] & 0x1FF,
list_s16[4] & 0x3FF, list_s16[5] & 0x1FF, list_s16[6], list_s16[7]);
break;
-
+
+#ifdef PCSX
+ case 0xA0: // sys -> vid
+ case 0xC0: // vid -> sys
+ goto breakloop;
+#else
case 0xA0: // sys -> vid
{
u32 load_x = list_s16[2] & 0x3FF;
@@ -608,10 +622,11 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
load_width, load_height, load_width);
break;
}
-
+
case 0xC0: // vid -> sys
break;
-
+#endif
+
case 0xE1:
set_texture(psx_gpu, list[0] & 0x1FF);
@@ -621,6 +636,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
psx_gpu->render_state_base &= ~RENDER_STATE_DITHER;
psx_gpu->display_area_draw_enable = (list[0] >> 10) & 0x1;
+ SET_Ex(1, list[0]);
break;
case 0xE2:
@@ -653,6 +669,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
update_texture_ptr(psx_gpu);
}
+ SET_Ex(2, list[0]);
break;
}
@@ -666,6 +683,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
psx_gpu->viewport_end_y);
#endif
+ SET_Ex(3, list[0]);
break;
case 0xE4:
@@ -678,6 +696,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
psx_gpu->viewport_end_y);
#endif
+ SET_Ex(4, list[0]);
break;
case 0xE5:
@@ -687,6 +706,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
psx_gpu->offset_x = offset_x >> 21;
psx_gpu->offset_y = offset_y >> 21;
+ SET_Ex(5, list[0]);
break;
}
@@ -706,6 +726,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
psx_gpu->mask_msb = mask_msb;
}
+ SET_Ex(6, list[0]);
break;
}
@@ -713,5 +734,12 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
break;
}
}
+
+#ifdef PCSX
+breakloop:
+#endif
+ if (last_command != NULL)
+ *last_command = current_command;
+ return list - list_start;
}
diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c
index 3ff6e48..ff31c27 100644
--- a/plugins/gpu_neon/psx_gpu_if.c
+++ b/plugins/gpu_neon/psx_gpu_if.c
@@ -13,26 +13,37 @@
extern const unsigned char cmd_lengths[256];
#define command_lengths cmd_lengths
+static unsigned int *ex_regs;
+
+#define PCSX
+#define SET_Ex(r, v) \
+ ex_regs[r] = v
+
#include "psx_gpu/psx_gpu.c"
#include "psx_gpu/psx_gpu_parse.c"
#include "../gpulib/gpu.h"
static psx_gpu_struct egpu __attribute__((aligned(256)));
-void do_cmd_list(uint32_t *list, int count)
+int do_cmd_list(uint32_t *list, int count, int *last_cmd)
{
- gpu_parse(&egpu, list, count * 4);
+ int ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd);
+
+ ex_regs[1] &= ~0x1ff;
+ ex_regs[1] |= egpu.texture_settings & 0x1ff;
+ return ret;
}
int renderer_init(void)
{
initialize_psx_gpu(&egpu, gpu.vram);
+ ex_regs = gpu.ex_regs;
return 0;
}
void renderer_sync_ecmds(uint32_t *ecmds)
{
- gpu_parse(&egpu, ecmds + 1, 6 * 4);
+ gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL);
}
void renderer_update_caches(int x, int y, int w, int h)
diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp
index 22fe31e..4994ef4 100644
--- a/plugins/gpu_unai/gpulib_if.cpp
+++ b/plugins/gpu_unai/gpulib_if.cpp
@@ -162,9 +162,10 @@ int renderer_init(void)
extern const unsigned char cmd_lengths[256];
-void do_cmd_list(unsigned int *list, int list_len)
+int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
{
- unsigned int cmd, len;
+ unsigned int cmd = 0, len;
+ unsigned int *list_start = list;
unsigned int *list_end = list + list_len;
linesInterlace = force_interlace;
@@ -176,6 +177,17 @@ void do_cmd_list(unsigned int *list, int list_len)
{
cmd = *list >> 24;
len = cmd_lengths[cmd];
+ if (list + 1 + len > list_end) {
+ cmd = -1;
+ break;
+ }
+
+#ifndef TEST
+ if (cmd == 0xa0 || cmd == 0xc0)
+ break; // image i/o, forward to upper layer
+ else if ((cmd & 0xf8) == 0xe0)
+ gpu.ex_regs[cmd & 7] = list[0];
+#endif
switch(cmd)
{
@@ -190,19 +202,16 @@ void do_cmd_list(unsigned int *list, int list_len)
while(1)
{
- if((*list_position & 0xf000f000) == 0x50005000 || list_position >= list_end)
- break;
-
PacketBuffer.U4[1] = PacketBuffer.U4[2];
PacketBuffer.U4[2] = *list_position++;
gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
num_vertexes++;
+ if((*list_position & 0xf000f000) == 0x50005000 || list_position >= list_end)
+ break;
}
- if(num_vertexes > 2)
- len += (num_vertexes - 2);
-
+ len += (num_vertexes - 2);
break;
}
@@ -217,9 +226,6 @@ void do_cmd_list(unsigned int *list, int list_len)
while(1)
{
- if((*list_position & 0xf000f000) == 0x50005000 || list_position >= list_end)
- break;
-
PacketBuffer.U4[0] = PacketBuffer.U4[2];
PacketBuffer.U4[1] = PacketBuffer.U4[3];
PacketBuffer.U4[2] = *list_position++;
@@ -227,11 +233,11 @@ void do_cmd_list(unsigned int *list, int list_len)
gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
num_vertexes++;
+ if((*list_position & 0xf000f000) == 0x50005000 || list_position >= list_end)
+ break;
}
- if(num_vertexes > 2)
- len += (num_vertexes - 2) * 2;
-
+ len += (num_vertexes - 2) * 2;
break;
}
@@ -252,6 +258,12 @@ void do_cmd_list(unsigned int *list, int list_len)
break;
}
}
+
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= GPU_GP1 & 0x1ff;
+
+ *last_cmd = cmd;
+ return list - list_start;
}
void renderer_sync_ecmds(uint32_t *ecmds)
diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c
index 99b8eda..df0099c 100644
--- a/plugins/gpulib/gpu.c
+++ b/plugins/gpulib/gpu.c
@@ -94,7 +94,7 @@ static noinline void decide_frameskip(void)
gpu.frameskip.active = 0;
}
-static noinline void decide_frameskip_allow(uint32_t cmd_e3)
+static noinline int decide_frameskip_allow(uint32_t cmd_e3)
{
// no frameskip if it decides to draw to display area,
// but not for interlace since it'll most likely always do that
@@ -103,6 +103,7 @@ static noinline void decide_frameskip_allow(uint32_t cmd_e3)
gpu.frameskip.allow = gpu.status.interlace ||
(uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
(uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
+ return gpu.frameskip.allow;
}
static noinline void get_gpu_info(uint32_t data)
@@ -327,70 +328,84 @@ static void finish_vram_transfer(int is_read)
gpu.dma_start.w, gpu.dma_start.h);
}
+static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
+{
+ int cmd = 0, pos = 0, len, dummy;
+ int skip = 1;
+
+ while (pos < count && skip) {
+ uint32_t *list = data + pos;
+ cmd = list[0] >> 24;
+ len = 1 + cmd_lengths[cmd];
+
+ if (cmd == 0x02) {
+ if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
+ // clearing something large, don't skip
+ do_cmd_list(data + pos, 3, &dummy);
+ }
+ else if ((cmd & 0xf4) == 0x24) {
+ // flat textured prim
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= list[4] & 0x1ff;
+ }
+ else if ((cmd & 0xf4) == 0x34) {
+ // shaded textured prim
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= list[5] & 0x1ff;
+ }
+ else if (cmd == 0xe3)
+ skip = decide_frameskip_allow(list[0]);
+
+ if ((cmd & 0xf8) == 0xe0)
+ gpu.ex_regs[cmd & 7] = list[0];
+
+ if (pos + len > count) {
+ cmd = -1;
+ break; // incomplete cmd
+ }
+ if (cmd == 0xa0 || cmd == 0xc0)
+ break; // image i/o
+ pos += len;
+ }
+
+ renderer_sync_ecmds(gpu.ex_regs);
+ *last_cmd = cmd;
+ return pos;
+}
+
static noinline int do_cmd_buffer(uint32_t *data, int count)
{
- int len, cmd, start, pos;
+ int cmd, pos;
+ uint32_t old_e3 = gpu.ex_regs[3];
int vram_dirty = 0;
// process buffer
- for (start = pos = 0; pos < count; )
+ for (pos = 0; pos < count; )
{
- cmd = -1;
- len = 0;
-
- if (gpu.dma.h) {
+ if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
+ vram_dirty = 1;
pos += do_vram_io(data + pos, count - pos, 0);
if (pos == count)
break;
- start = pos;
- }
-
- // do look-ahead pass to detect SR changes and VRAM i/o
- while (pos < count) {
- uint32_t *list = data + pos;
- cmd = list[0] >> 24;
- len = 1 + cmd_lengths[cmd];
-
- //printf(" %3d: %02x %d\n", pos, cmd, len);
- if ((cmd & 0xf4) == 0x24) {
- // flat textured prim
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= list[4] & 0x1ff;
- }
- else if ((cmd & 0xf4) == 0x34) {
- // shaded textured prim
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= list[5] & 0x1ff;
- }
- else if (cmd == 0xe3)
- decide_frameskip_allow(list[0]);
-
- if (2 <= cmd && cmd < 0xc0)
- vram_dirty = 1;
- else if ((cmd & 0xf8) == 0xe0)
- gpu.ex_regs[cmd & 7] = list[0];
-
- if (pos + len > count) {
- cmd = -1;
- break; // incomplete cmd
- }
- if (cmd == 0xa0 || cmd == 0xc0)
- break; // image i/o
- pos += len;
- }
-
- if (pos - start > 0) {
- if (!gpu.frameskip.active || !gpu.frameskip.allow)
- do_cmd_list(data + start, pos - start);
- start = pos;
}
+ cmd = data[pos] >> 24;
if (cmd == 0xa0 || cmd == 0xc0) {
// consume vram write/read cmd
start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
- pos += len;
+ pos += 3;
+ continue;
}
- else if (cmd == -1)
+
+ if (gpu.frameskip.active && gpu.frameskip.allow)
+ pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
+ else {
+ pos += do_cmd_list(data + pos, count - pos, &cmd);
+ vram_dirty = 1;
+ }
+
+ if (cmd == -1)
+ // incomplete cmd
break;
}
@@ -398,10 +413,11 @@ static noinline int do_cmd_buffer(uint32_t *data, int count)
gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
- if (gpu.frameskip.active)
- renderer_sync_ecmds(gpu.ex_regs);
gpu.state.fb_dirty |= vram_dirty;
+ if (old_e3 != gpu.ex_regs[3])
+ decide_frameskip_allow(gpu.ex_regs[3]);
+
return count - pos;
}
diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h
index d9ad416..11bfe46 100644
--- a/plugins/gpulib/gpu.h
+++ b/plugins/gpulib/gpu.h
@@ -91,7 +91,7 @@ extern struct psx_gpu gpu;
extern const unsigned char cmd_lengths[256];
-void do_cmd_list(uint32_t *list, int count);
+int do_cmd_list(uint32_t *list, int count, int *last_cmd);
struct rearmed_cbs;