diff options
author | twinaphex | 2012-11-28 06:35:30 +0100 |
---|---|---|
committer | twinaphex | 2012-11-28 06:35:30 +0100 |
commit | b194a2ecd43debbb3724e332b660b993725acd52 (patch) | |
tree | a41deacfa7a6841e97d67fce60bb7f39b54b12be /plugins/gpulib | |
parent | b6514bd168106e978a0af0f5043d5da6fd35800a (diff) | |
parent | 06d2e1a58b6eb7fb449f79f7e807343bd8d39b59 (diff) | |
download | pcsx_rearmed-b194a2ecd43debbb3724e332b660b993725acd52.tar.gz pcsx_rearmed-b194a2ecd43debbb3724e332b660b993725acd52.tar.bz2 pcsx_rearmed-b194a2ecd43debbb3724e332b660b993725acd52.zip |
Merge git://github.com/notaz/pcsx_rearmed
Diffstat (limited to 'plugins/gpulib')
-rw-r--r-- | plugins/gpulib/cspace.c | 101 | ||||
-rw-r--r-- | plugins/gpulib/cspace.h | 4 | ||||
-rw-r--r-- | plugins/gpulib/gpu.c | 49 | ||||
-rw-r--r-- | plugins/gpulib/gpu.h | 11 | ||||
-rw-r--r-- | plugins/gpulib/gpulib.mak | 10 | ||||
-rw-r--r-- | plugins/gpulib/vout_pl.c | 98 |
6 files changed, 205 insertions, 68 deletions
diff --git a/plugins/gpulib/cspace.c b/plugins/gpulib/cspace.c index 1d5718c..8e3bee9 100644 --- a/plugins/gpulib/cspace.c +++ b/plugins/gpulib/cspace.c @@ -1,3 +1,13 @@ +/* + * (C) GraÅžvydas "notaz" Ignotas, 2011,2012 + * + * This work is licensed under the terms of any of these licenses + * (at your option): + * - GNU GPL, version 2 or later. + * - GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + #include "cspace.h" /* @@ -5,6 +15,8 @@ * in favor of NEON version or platform-specific conversion */ +#ifndef __ARM_NEON__ + void bgr555_to_rgb565(void *dst_, const void *src_, int bytes) { const unsigned int *src = src_; @@ -42,3 +54,92 @@ void bgr888_to_rgb565(void *dst_, const void *src_, int bytes) void rgb888_to_rgb565(void *dst, const void *src, int bytes) {} void bgr888_to_rgb888(void *dst, const void *src, int bytes) {} +#endif // __ARM_NEON__ + +/* YUV stuff */ +static int yuv_ry[32], yuv_gy[32], yuv_by[32]; +static unsigned char yuv_u[32 * 2], yuv_v[32 * 2]; + +void bgr_to_uyvy_init(void) +{ + int i, v; + + /* init yuv converter: + y0 = (int)((0.299f * r0) + (0.587f * g0) + (0.114f * b0)); + y1 = (int)((0.299f * r1) + (0.587f * g1) + (0.114f * b1)); + u = (int)(8 * 0.565f * (b0 - y0)) + 128; + v = (int)(8 * 0.713f * (r0 - y0)) + 128; + */ + for (i = 0; i < 32; i++) { + yuv_ry[i] = (int)(0.299f * i * 65536.0f + 0.5f); + yuv_gy[i] = (int)(0.587f * i * 65536.0f + 0.5f); + yuv_by[i] = (int)(0.114f * i * 65536.0f + 0.5f); + } + for (i = -32; i < 32; i++) { + v = (int)(8 * 0.565f * i) + 128; + if (v < 0) + v = 0; + if (v > 255) + v = 255; + yuv_u[i + 32] = v; + v = (int)(8 * 0.713f * i) + 128; + if (v < 0) + v = 0; + if (v > 255) + v = 255; + yuv_v[i + 32] = v; + } +} + +void bgr555_to_uyvy(void *d, const void *s, int pixels) +{ + unsigned int *dst = d; + const unsigned short *src = s; + const unsigned char *yu = yuv_u + 32; + const unsigned char *yv = yuv_v + 32; + int r0, g0, b0, r1, g1, b1; + int y0, y1, u, v; + + for (; pixels > 0; src += 2, dst++, pixels -= 2) + { + b0 = (src[0] >> 10) & 0x1f; + g0 = (src[0] >> 5) & 0x1f; + r0 = src[0] & 0x1f; + b1 = (src[1] >> 10) & 0x1f; + g1 = (src[1] >> 5) & 0x1f; + r1 = src[1] & 0x1f; + y0 = (yuv_ry[r0] + yuv_gy[g0] + yuv_by[b0]) >> 16; + y1 = (yuv_ry[r1] + yuv_gy[g1] + yuv_by[b1]) >> 16; + u = yu[b0 - y0]; + v = yv[r0 - y0]; + // valid Y range seems to be 16..235 + y0 = 16 + 219 * y0 / 31; + y1 = 16 + 219 * y1 / 31; + + *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u; + } +} + +void bgr888_to_uyvy(void *d, const void *s, int pixels) +{ + unsigned int *dst = d; + const unsigned char *src8 = s; + const unsigned char *yu = yuv_u + 32; + const unsigned char *yv = yuv_v + 32; + int r0, g0, b0, r1, g1, b1; + int y0, y1, u, v; + + for (; pixels > 0; src8 += 3*2, dst++, pixels -= 2) + { + r0 = src8[0], g0 = src8[1], b0 = src8[2]; + r1 = src8[3], g1 = src8[4], b1 = src8[5]; + y0 = (r0 * 19595 + g0 * 38470 + b0 * 7471) >> 16; + y1 = (r1 * 19595 + g1 * 38470 + b1 * 7471) >> 16; + u = yu[(b0 - y0) / 8]; + v = yv[(r0 - y0) / 8]; + y0 = 16 + 219 * y0 / 255; + y1 = 16 + 219 * y1 / 255; + + *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u; + } +} diff --git a/plugins/gpulib/cspace.h b/plugins/gpulib/cspace.h index 8c9bcfa..95eae85 100644 --- a/plugins/gpulib/cspace.h +++ b/plugins/gpulib/cspace.h @@ -8,6 +8,10 @@ void bgr888_to_rgb888(void *dst, const void *src, int bytes); void bgr888_to_rgb565(void *dst, const void *src, int bytes); void rgb888_to_rgb565(void *dst, const void *src, int bytes); +void bgr_to_uyvy_init(void); +void bgr555_to_uyvy(void *d, const void *s, int pixels); +void bgr888_to_uyvy(void *d, const void *s, int pixels); + #ifdef __cplusplus } #endif diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 46e92d1..b300c88 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -24,7 +24,7 @@ //#define log_anomaly gpu_log #define log_anomaly(...) -struct psx_gpu gpu __attribute__((aligned(2048))); +struct psx_gpu gpu; static noinline int do_cmd_buffer(uint32_t *data, int count); static void finish_vram_transfer(int is_read); @@ -133,6 +133,22 @@ static noinline void get_gpu_info(uint32_t data) } } +// double, for overdraw guard +#define VRAM_SIZE (1024 * 512 * 2 * 2) + +static int map_vram(void) +{ + gpu.vram = gpu.mmap(VRAM_SIZE); + if (gpu.vram != NULL) { + gpu.vram += 4096 / 2; + return 0; + } + else { + fprintf(stderr, "could not map vram, expect crashes\n"); + return -1; + } +} + long GPUinit(void) { int ret; @@ -145,12 +161,26 @@ long GPUinit(void) gpu.cmd_len = 0; do_reset(); + if (gpu.mmap != NULL) { + if (map_vram() != 0) + ret = -1; + } return ret; } long GPUshutdown(void) { - return vout_finish(); + long ret; + + renderer_finish(); + ret = vout_finish(); + if (gpu.vram != NULL) { + gpu.vram -= 4096 / 2; + gpu.munmap(gpu.vram, VRAM_SIZE); + } + gpu.vram = NULL; + + return ret; } void GPUwriteStatus(uint32_t data) @@ -182,7 +212,7 @@ void GPUwriteStatus(uint32_t data) break; case 0x05: gpu.screen.x = data & 0x3ff; - gpu.screen.y = (data >> 10) & 0x3ff; + gpu.screen.y = (data >> 10) & 0x1ff; if (gpu.frameskip.set) { decide_frameskip_allow(gpu.ex_regs[3]); if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) { @@ -207,6 +237,7 @@ void GPUwriteStatus(uint32_t data) gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3]; update_width(); update_height(); + renderer_notify_res_change(); break; default: if ((cmd & 0xf0) == 0x10) @@ -582,13 +613,13 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze) case 1: // save if (gpu.cmd_len > 0) flush_cmd_buffer(); - memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram)); + memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2); memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs)); memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs)); freeze->ulStatus = gpu.status.reg; break; case 0: // load - memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram)); + memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2); memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs)); memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs)); gpu.status.reg = freeze->ulStatus; @@ -669,6 +700,14 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) gpu.state.hcnt = cbs->gpu_hcnt; gpu.state.frame_count = cbs->gpu_frame_count; gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace; + gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable; + + gpu.mmap = cbs->mmap; + gpu.munmap = cbs->munmap; + + // delayed vram mmap + if (gpu.vram == NULL) + map_vram(); if (cbs->pl_vout_set_raw_vram) cbs->pl_vout_set_raw_vram(gpu.vram); diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 1cbe38c..d11f991 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -17,10 +17,9 @@ extern "C" { #define CMD_BUFFER_LEN 1024 struct psx_gpu { - uint16_t vram[1024 * 512]; - uint16_t guard[1024 * 512]; // overdraw guard uint32_t cmd_buffer[CMD_BUFFER_LEN]; uint32_t regs[16]; + uint16_t *vram; union { uint32_t reg; struct { @@ -67,6 +66,8 @@ struct psx_gpu { uint32_t old_interlace:1; uint32_t allow_interlace:2; uint32_t blanked:1; + uint32_t enhancement_enable:1; + uint32_t enhancement_active:1; uint32_t *frame_count; uint32_t *hcnt; /* hsync count */ struct { @@ -87,6 +88,10 @@ struct psx_gpu { uint32_t last_flip_frame; uint32_t pending_fill[3]; } frameskip; + uint16_t *(*get_enhancement_bufer) + (int *x, int *y, int *w, int *h, int *vram_h); + void *(*mmap)(unsigned int size); + void (*munmap)(void *ptr, unsigned int size); }; extern struct psx_gpu gpu; @@ -98,11 +103,13 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd); struct rearmed_cbs; int renderer_init(void); +void renderer_finish(void); void renderer_sync_ecmds(uint32_t * ecmds); void renderer_update_caches(int x, int y, int w, int h); void renderer_flush_queues(void); void renderer_set_interlace(int enable, int is_odd); void renderer_set_config(const struct rearmed_cbs *config); +void renderer_notify_res_change(void); int vout_init(void); int vout_finish(void); diff --git a/plugins/gpulib/gpulib.mak b/plugins/gpulib/gpulib.mak index 349a0c8..ad6a8ad 100644 --- a/plugins/gpulib/gpulib.mak +++ b/plugins/gpulib/gpulib.mak @@ -2,7 +2,7 @@ # always adding gpulib to deps in case cspace is needed # users must include ../../config.mak -LDFLAGS += -shared +LDFLAGS += -shared -Wl,--no-undefined CFLAGS += $(PLUGIN_CFLAGS) ifeq "$(ARCH)" "arm" EXT = @@ -17,10 +17,10 @@ endif GPULIB_A = ../gpulib/gpulib$(EXT).a ifdef BIN_STANDLALONE -TARGETS += $(BIN_STANDLALONE)$(EXT) +TARGETS += $(BIN_STANDLALONE) endif ifdef BIN_GPULIB -TARGETS += $(BIN_GPULIB)$(EXT) +TARGETS += $(BIN_GPULIB) endif CC_STANDLALONE = $(CC) CC_GPULIB = $(CC) @@ -34,7 +34,7 @@ ifdef BIN_STANDLALONE ifneq ($(findstring .cpp,$(SRC_STANDALONE)),) CC_STANDLALONE = $(CXX) endif -$(BIN_STANDLALONE)$(EXT): $(SRC) $(SRC_STANDALONE) $(GPULIB_A) +$(BIN_STANDLALONE): $(SRC) $(SRC_STANDALONE) $(GPULIB_A) $(CC_STANDLALONE) -o $@ $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) $(LDLIBS_STANDALONE) ln -fs $(PLUGINDIR)/$@ ../ endif @@ -43,7 +43,7 @@ ifdef BIN_GPULIB ifneq ($(findstring .cpp,$(SRC_GPULIB)),) CC_GPULIB = $(CXX) endif -$(BIN_GPULIB)$(EXT): $(SRC) $(SRC_GPULIB) $(GPULIB_A) +$(BIN_GPULIB): $(SRC) $(SRC_GPULIB) $(GPULIB_A) $(CC_GPULIB) -o $@ $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) $(LDLIBS_GPULIB) ln -fs $(PLUGINDIR)/$@ ../ endif diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index 0bd1ecf..49f53d6 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -15,7 +15,6 @@ #include "../../frontend/plugin_lib.h" static const struct rearmed_cbs *cbs; -static void *screen_buf; int vout_init(void) { @@ -27,90 +26,77 @@ int vout_finish(void) return 0; } -static void check_mode_change(void) +static void check_mode_change(int force) { static uint32_t old_status; static int old_h; + int w = gpu.screen.hres; + int h = gpu.screen.h; + int w_out = w; + int h_out = h; + + gpu.state.enhancement_active = + gpu.get_enhancement_bufer != NULL && gpu.state.enhancement_enable + && w <= 512 && h <= 256 && !gpu.status.rgb24; + + if (gpu.state.enhancement_active) { + w_out *= 2; + h_out *= 2; + } // width|rgb24 change? - if ((gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || gpu.screen.h != old_h) + if (force || (gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || h != old_h) { old_status = gpu.status.reg; - old_h = gpu.screen.h; - screen_buf = cbs->pl_vout_set_mode(gpu.screen.hres, gpu.screen.h, + old_h = h; + + cbs->pl_vout_set_mode(w_out, h_out, w, h, (gpu.status.rgb24 && !cbs->only_16bpp) ? 24 : 16); } } -static void blit(void) +void vout_update(void) { int x = gpu.screen.x & ~1; // alignment needed by blitter int y = gpu.screen.y; int w = gpu.screen.w; int h = gpu.screen.h; uint16_t *vram = gpu.vram; - int stride = gpu.screen.hres; - int fb_offs, doffs; - uint8_t *dest; + int vram_h = 512; - dest = (uint8_t *)screen_buf; - if (dest == NULL) + if (w == 0 || h == 0) return; - fb_offs = y * 1024 + x; + check_mode_change(0); + if (gpu.state.enhancement_active) + vram = gpu.get_enhancement_bufer(&x, &y, &w, &h, &vram_h); - // only do centering, at least for now - doffs = (stride - w) / 2 & ~1; - - if (gpu.status.rgb24) - { - if (cbs->only_16bpp) { - dest += doffs * 2; - for (; h-- > 0; dest += stride * 2, fb_offs += 1024) - { - fb_offs &= 1024*512-1; - bgr888_to_rgb565(dest, vram + fb_offs, w * 3); - } - } - else { - dest += (doffs / 8) * 24; - for (; h-- > 0; dest += stride * 3, fb_offs += 1024) - { - fb_offs &= 1024*512-1; - bgr888_to_rgb888(dest, vram + fb_offs, w * 3); - } - } - } - else - { - dest += doffs * 2; - for (; h-- > 0; dest += stride * 2, fb_offs += 1024) - { - fb_offs &= 1024*512-1; - bgr555_to_rgb565(dest, vram + fb_offs, w * 2); + if (y + h > vram_h) { + if (y + h - vram_h > h / 2) { + // wrap + h -= vram_h - y; + y = 0; } + else + // clip + h = vram_h - y; } - screen_buf = cbs->pl_vout_flip(); -} + vram += y * 1024 + x; -void vout_update(void) -{ - check_mode_change(); - if (cbs->pl_vout_raw_flip) - cbs->pl_vout_raw_flip(gpu.screen.x, gpu.screen.y); - else - blit(); + cbs->pl_vout_flip(vram, 1024, gpu.status.rgb24, w, h); } void vout_blank(void) { - check_mode_change(); - if (cbs->pl_vout_raw_flip == NULL) { - int bytespp = gpu.status.rgb24 ? 3 : 2; - memset(screen_buf, 0, gpu.screen.hres * gpu.screen.h * bytespp); - screen_buf = cbs->pl_vout_flip(); + int w = gpu.screen.hres; + int h = gpu.screen.h; + if (gpu.state.enhancement_active) { + w *= 2; + h *= 2; } + check_mode_change(0); + cbs->pl_vout_flip(NULL, 1024, gpu.status.rgb24, w, h); } long GPUopen(void **unused) @@ -119,7 +105,7 @@ long GPUopen(void **unused) gpu.frameskip.frame_ready = 1; cbs->pl_vout_open(); - screen_buf = cbs->pl_vout_flip(); + check_mode_change(1); return 0; } |