From 8ad26356f5e92bd396e58290217da55858345a4e Mon Sep 17 00:00:00 2001 From: neonloop Date: Sat, 7 Aug 2021 20:28:34 +0000 Subject: Adds generic nearest and smooth scalers Smooth is slower (10-15%) than a scaler built for a specific resolution. Works well for downscaling and for odd screen ratios until a custom scaler is built. Replaces the snes smooth scaler (outperforms it) and the gba smooth scaler (looks better) --- scale.c | 924 +++++++++++++++------------------------------------------------- 1 file changed, 219 insertions(+), 705 deletions(-) (limited to 'scale.c') diff --git a/scale.c b/scale.c index f9270ff..233b1ac 100644 --- a/scale.c +++ b/scale.c @@ -1,12 +1,10 @@ +#include +#include #include #include "core.h" #include "options.h" #include "scale.h" -#define prefetch(a,b) __builtin_prefetch(a,b) -#define likely(x) __builtin_expect((x),1) -#define unlikely(x) __builtin_expect((x),0) - typedef void (*scaler_t)(unsigned w, unsigned h, size_t pitch, const void *src, void *dst); struct dimensions { @@ -15,10 +13,65 @@ struct dimensions { size_t pitch; }; +struct blend_args { + int w_ratio_in; + int w_ratio_out; + uint16_t w_bp[2]; + int h_ratio_in; + int h_ratio_out; + uint16_t h_bp[2]; + uint16_t *blend_line; +} blend_args; + static scaler_t scaler; static unsigned dst_w, dst_h, dst_offs; struct dimensions prev; +#if __ARM_ARCH >= 5 +static inline uint32_t average32(uint32_t c1, uint32_t c2) { + uint32_t ret, lowbits = 0x08210821; + asm ("eor %0, %2, %3\r\n" + "and %0, %0, %1\r\n" + "add %0, %3, %0\r\n" + "adds %0, %0, %2\r\n" + "lsr %0, %0, #1\r\n" + "orrcs %0, %0, #0x80000000\r\n" + : "=&r" (ret) : "r" (lowbits), "r" (c1), "r" (c2) : "cc" ); + return ret; +} + +static inline uint32_t average16(uint32_t c1, uint32_t c2) { + uint32_t ret, lowbits = 0x0821; + asm ("eor %0, %2, %3\r\n" + "and %0, %0, %1\r\n" + "add %0, %3, %0\r\n" + "add %0, %0, %2\r\n" + "lsr %0, %0, #1\r\n" + : "=&r" (ret) : "r" (lowbits), "r" (c1), "r" (c2) : ); + return ret; +} + +#define AVERAGE16_NOCHK(c1, c2) (average16((c1), (c2))) +#define AVERAGE32_NOCHK(c1, c2) (average32((c1), (c2))) + +#else + +#define AVERAGE16_NOCHK(c1, c2) (((c1) + (c2) + (((c1) ^ (c2)) & 0x0821))>>1) //More accurate +#define AVERAGE32_NOCHK(c1, c2) ((((c1) + (c2) + (((c1) ^ (c2)) & 0x08210821))>>1) | (((c1) + (c2) < (c1)) << 31)) + +#endif + + +#define AVERAGE16(c1, c2) ((c1) == (c2) ? (c1) : AVERAGE16_NOCHK((c1), (c2))) +#define AVERAGE16_1_3(c1, c2) ((c1) == (c2) ? (c1) : (AVERAGE16_NOCHK(AVERAGE16_NOCHK((c1), (c2)), (c2)))) + +#define AVERAGE32(c1, c2) ((c1) == (c2) ? (c1) : AVERAGE32_NOCHK((c1), (c2))) +#define AVERAGE32_1_3(c1, c2) ((c1) == (c2) ? (c1) : (AVERAGE32_NOCHK(AVERAGE32_NOCHK((c1), (c2)), (c2)))) + +static inline int gcd(int a, int b) { + return b ? gcd(b, a % b) : a; +} + static void scale_memcpy(unsigned w, unsigned h, size_t pitch, const void *src, void *dst) { dst += dst_offs; memcpy(dst, src, h * pitch); @@ -27,62 +80,157 @@ static void scale_memcpy(unsigned w, unsigned h, size_t pitch, const void *src, static void scale_1x(unsigned w, unsigned h, size_t pitch, const void *src, void *dst) { dst += dst_offs; - for (int y = 0; y < h; y++) { + for (unsigned y = 0; y < h; y++) { memcpy(dst + y * SCREEN_PITCH, src + y * pitch, pitch); } } -static void scale_nearest(unsigned w, unsigned h, size_t pitch, const void *src_bytes, void *dst_bytes) { - const uint16_t *src = (const uint16_t *)src_bytes; - uint16_t *dst = (uint16_t *)dst_bytes; - int dy = -(dst_h / 2); - unsigned lines = 0; +static void scale_nearest(unsigned w, unsigned h, size_t pitch, const void *src, void *dst) { + int dy = -dst_h; + unsigned lines = h; + bool copy = false; - dst += dst_offs / sizeof(dst[0]); + dst += dst_offs; - while (lines < h) { - int dx = -(dst_w / 2); - const uint16_t *psrc = src; - uint16_t *pdst = dst; - bool copy = false; + while (lines) { + int dx = -dst_w; + const uint16_t *psrc16 = src; + uint16_t *pdst16 = dst; if (copy) { copy = false; - memcpy(dst, dst - SCREEN_PITCH / sizeof(dst[0]), SCREEN_PITCH); - } else if (dy <= 0) { - while (psrc - src < w) { - if (dx <= 0) { - *pdst++ = *psrc; + memcpy(dst, dst - SCREEN_PITCH, SCREEN_PITCH); + dst += SCREEN_PITCH; + dy += h; + } else if (dy < 0) { + int col = w; + while(col--) { + while (dx < 0) { + *pdst16++ = *psrc16; dx += w; } - if (dx > 0) { - psrc++; - dx -= dst_w; - } + dx -= dst_w; + psrc16++; } - } - if (dy <= 0) { + dst += SCREEN_PITCH; dy += h; - dst += SCREEN_PITCH / sizeof(dst[0]); } - if (dy > 0) { + if (dy >= 0) { dy -= dst_h; - src += pitch / sizeof(src[0]); - lines++; + src += pitch; + lines--; } else { copy = true; } } } +/* Generic blend based on % of dest pixel in next src pixel, using + * rough quintiles: aaaa, aaab, aabb, abbb, bbbb. Quintile breakpoints + * can be adjusted for sharper or smoother blending. Default 0-20%, + * 20%-50% (round down), 50%(down)-50%(up), 50%(round up)-80%, + * 80%-100%. This matches existing scalers */ +static void scale_blend(unsigned w, unsigned h, size_t pitch, const void *src, void *dst) { + int dy = 0; + int lines = h; + + int rat_w = blend_args.w_ratio_in; + int rat_dst_w = blend_args.w_ratio_out; + uint16_t *bw = blend_args.w_bp; + + int rat_h = blend_args.h_ratio_in; + int rat_dst_h = blend_args.h_ratio_out; + uint16_t *bh = blend_args.h_bp; + + dst += dst_offs; + + while (lines--) { + while (dy < rat_dst_h) { + uint16_t *dst16 = (uint16_t *)dst; + uint16_t *pblend = (uint16_t *)blend_args.blend_line; + int col = w; + int dx = 0; + + uint16_t *pnext = (uint16_t *)(src + pitch); + + if (dy > rat_dst_h - bh[0]) { + pblend = pnext; + } else if (dy <= bh[0]) { + /* Drops const, won't get touched later though */ + pblend = (uint16_t *)src; + } else { + const uint32_t *src32 = (const uint32_t *)src; + const uint32_t *pnext32 = (const uint32_t *)pnext; + uint32_t *pblend32 = (uint32_t *)pblend; + int count = w / 2; + + if (dy <= bh[1]) { + const uint32_t *tmp = pnext32; + pnext32 = src32; + src32 = tmp; + } + + if (dy > rat_dst_h - bh[1] || dy <= bh[1]) { + while(count--) { + *pblend32++ = AVERAGE32_1_3(*src32, *pnext32); + src32++; + pnext32++; + } + } else { + while(count--) { + *pblend32++ = AVERAGE32(*src32, *pnext32); + src32++; + pnext32++; + } + } + } + + while (col--) { + uint16_t a, b, out; + + a = *pblend; + b = *(pblend+1); + + while (dx < rat_dst_w) { + if (a == b) { + out = a; + } else if (dx > rat_dst_w - bw[0]) { // top quintile, bbbb + out = b; + } else if (dx <= bw[0]) { // last quintile, aaaa + out = a; + } else { + if (dx > rat_dst_w - bw[1]) { // 2nd quintile, abbb + a = AVERAGE16_NOCHK(a,b); + } else if (dx <= bw[1]) { // 4th quintile, aaab + b = AVERAGE16_NOCHK(a,b); + } + + out = AVERAGE16_NOCHK(a, b); // also 3rd quintile, aabb + } + *dst16++ = out; + dx += rat_w; + } + + dx -= rat_dst_w; + pblend++; + } + + dy += rat_h; + dst += SCREEN_PITCH; + } + + dy -= rat_dst_h; + src += pitch; + } +} + /* drowsnug's nofilter upscaler, edited by eggs for smoothness */ -#define AVERAGE16(c1, c2) (((c1) + (c2) + (((c1) ^ (c2)) & 0x0821))>>1) //More accurate static void scale_sharp_240x160_320xXXX(unsigned _w, unsigned _h, size_t _pitch, const void *src_bytes, void *dst_bytes) { - int Eh = 0; + unsigned Eh = 0; int dh = 0; int width = 240; int vf = 0; @@ -91,7 +239,7 @@ static void scale_sharp_240x160_320xXXX(unsigned _w, unsigned _h, size_t _pitch, dst += dst_offs / sizeof(uint16_t); - int x, y; + unsigned x, y; for (y = 0; y < dst_h; y++) { int source = dh * width; @@ -110,8 +258,8 @@ static void scale_sharp_240x160_320xXXX(unsigned _w, unsigned _h, size_t _pitch, } *dst++ = a; - *dst++ = AVERAGE16(AVERAGE16(a,b),b); - *dst++ = AVERAGE16(b,AVERAGE16(b,c)); + *dst++ = AVERAGE16_1_3(a,b); + *dst++ = AVERAGE16_1_3(c,b); *dst++ = c; source+=3; @@ -129,7 +277,7 @@ static void scale_sharp_240x160_320xXXX(unsigned _w, unsigned _h, size_t _pitch, static void scale_sharp_256xXXX_320xXXX(unsigned w, unsigned h, size_t pitch, const void *src_bytes, void *dst_bytes) { - int Eh = 0; + unsigned Eh = 0; int dh = 0; int vf = 0; const uint16_t *src = (const uint16_t *)src_bytes; @@ -138,7 +286,7 @@ static void scale_sharp_256xXXX_320xXXX(unsigned w, unsigned h, size_t pitch, co dst += dst_offs / sizeof(uint16_t); - int x, y; + unsigned x, y; for (y = 0; y < dst_h; y++) { int source = dh * pxpitch; @@ -159,9 +307,9 @@ static void scale_sharp_256xXXX_320xXXX(unsigned w, unsigned h, size_t pitch, co } *dst++ = a; - *dst++ = AVERAGE16(AVERAGE16(a,b),b); + *dst++ = AVERAGE16_1_3(a,b); *dst++ = AVERAGE16(b,c); - *dst++ = AVERAGE16(c,AVERAGE16(c,d)); + *dst++ = AVERAGE16_1_3(d,c); *dst++ = d; source+=4; @@ -177,666 +325,20 @@ static void scale_sharp_256xXXX_320xXXX(unsigned w, unsigned h, size_t pitch, co } } -/* gpsp-bittboy bilinear scalers https://github.com/retrofirmware/gpsp-bittboy */ - -#define Average(A, B) ((((A) & 0xF7DE) >> 1) + (((B) & 0xF7DE) >> 1) + ((A) & (B) & 0x0821)) - -/* Calculates the average of two pairs of RGB565 pixels. The result is, in - * the lower bits, the average of both lower pixels, and in the upper bits, - * the average of both upper pixels. */ -#define Average32(A, B) ((((A) & 0xF7DEF7DE) >> 1) + (((B) & 0xF7DEF7DE) >> 1) + ((A) & (B) & 0x08210821)) - -/* Raises a pixel from the lower half to the upper half of a pair. */ -#define Raise(N) ((N) << 16) - -/* Extracts the upper pixel of a pair into the lower pixel of a pair. */ -#define Hi(N) ((N) >> 16) - -/* Extracts the lower pixel of a pair. */ -#define Lo(N) ((N) & 0xFFFF) - -/* Calculates the average of two RGB565 pixels. The source of the pixels is - * the lower 16 bits of both parameters. The result is in the lower 16 bits. - * The average is weighted so that the first pixel contributes 3/4 of its - * color and the second pixel contributes 1/4. */ -#define AverageQuarters3_1(A, B) ( (((A) & 0xF7DE) >> 1) + (((A) & 0xE79C) >> 2) + (((B) & 0xE79C) >> 2) + ((( (( ((A) & 0x1863) + ((A) & 0x0821) ) << 1) + ((B) & 0x1863) ) >> 2) & 0x1863) ) - -static inline void scale_bilinearish_240x160_320x240(unsigned src_x, unsigned src_y, size_t src_pitch, const void *src, void *dst) -{ - /* Before: - * a b c d e f - * g h i j k l - * - * After (multiple letters = average): - * a ab bc c d de ef f - * ag abgh bchi ci dj dejk efkl fl - * g gh hi i j jk kl l - */ - uint16_t *to = (uint16_t *)dst; - const uint16_t *from = (const uint16_t *)src; - const uint32_t dst_pitch = SCREEN_PITCH; - - const uint32_t dst_x = src_x * 4 / 3; - const uint32_t src_skip = src_pitch - src_x * sizeof(uint16_t), - dst_skip = dst_pitch - dst_x * sizeof(uint16_t); - - uint32_t x, y; - - for (y = 0; y < src_y; y += 2) { - for (x = 0; x < src_x / 6; x++) { - // -- Row 1 -- - // Read RGB565 elements in the source grid. - // The notation is high_low (little-endian). - uint32_t b_a = (*(uint32_t*) (from )), - d_c = (*(uint32_t*) (from + 2)), - f_e = (*(uint32_t*) (from + 4)); - - // Generate ab_a from b_a. - *(uint32_t*) (to) = likely(Hi(b_a) == Lo(b_a)) - ? b_a - : Lo(b_a) /* 'a' verbatim to low pixel */ | - Raise(Average(Hi(b_a), Lo(b_a))) /* ba to high pixel */; - - // Generate c_bc from b_a and d_c. - *(uint32_t*) (to + 2) = likely(Hi(b_a) == Lo(d_c)) - ? Lo(d_c) | Raise(Lo(d_c)) - : Raise(Lo(d_c)) /* 'c' verbatim to high pixel */ | - Average(Lo(d_c), Hi(b_a)) /* bc to low pixel */; - - // Generate de_d from d_c and f_e. - *(uint32_t*) (to + 4) = likely(Hi(d_c) == Lo(f_e)) - ? Lo(f_e) | Raise(Lo(f_e)) - : Hi(d_c) /* 'd' verbatim to low pixel */ | - Raise(Average(Lo(f_e), Hi(d_c))) /* de to high pixel */; - - // Generate f_ef from f_e. - *(uint32_t*) (to + 6) = likely(Hi(f_e) == Lo(f_e)) - ? f_e - : Raise(Hi(f_e)) /* 'f' verbatim to high pixel */ | - Average(Hi(f_e), Lo(f_e)) /* ef to low pixel */; - - if (likely(y + 1 < src_y)) // Is there a source row 2? - { - // -- Row 2 -- - uint32_t h_g = (*(uint32_t*) ((uint8_t*) from + src_pitch )), - j_i = (*(uint32_t*) ((uint8_t*) from + src_pitch + 4)), - l_k = (*(uint32_t*) ((uint8_t*) from + src_pitch + 8)); - - // Generate abgh_ag from b_a and h_g. - uint32_t bh_ag = Average32(b_a, h_g); - *(uint32_t*) ((uint8_t*) to + dst_pitch) = likely(Hi(bh_ag) == Lo(bh_ag)) - ? bh_ag - : Lo(bh_ag) /* ag verbatim to low pixel */ | - Raise(Average(Hi(bh_ag), Lo(bh_ag))) /* abgh to high pixel */; - - // Generate ci_bchi from b_a, d_c, h_g and j_i. - uint32_t ci_bh = - Hi(bh_ag) /* bh verbatim to low pixel */ | - Raise(Average(Lo(d_c), Lo(j_i))) /* ci to high pixel */; - *(uint32_t*) ((uint8_t*) to + dst_pitch + 4) = likely(Hi(ci_bh) == Lo(ci_bh)) - ? ci_bh - : Raise(Hi(ci_bh)) /* ci verbatim to high pixel */ | - Average(Hi(ci_bh), Lo(ci_bh)) /* bchi to low pixel */; - - // Generate fl_efkl from f_e and l_k. - uint32_t fl_ek = Average32(f_e, l_k); - *(uint32_t*) ((uint8_t*) to + dst_pitch + 12) = likely(Hi(fl_ek) == Lo(fl_ek)) - ? fl_ek - : Raise(Hi(fl_ek)) /* fl verbatim to high pixel */ | - Average(Hi(fl_ek), Lo(fl_ek)) /* efkl to low pixel */; - - // Generate dejk_dj from d_c, f_e, j_i and l_k. - uint32_t ek_dj = - Raise(Lo(fl_ek)) /* ek verbatim to high pixel */ | - Average(Hi(d_c), Hi(j_i)) /* dj to low pixel */; - *(uint32_t*) ((uint8_t*) to + dst_pitch + 8) = likely(Hi(ek_dj) == Lo(ek_dj)) - ? ek_dj - : Lo(ek_dj) /* dj verbatim to low pixel */ | - Raise(Average(Hi(ek_dj), Lo(ek_dj))) /* dejk to high pixel */; - - // -- Row 3 -- - // Generate gh_g from h_g. - *(uint32_t*) ((uint8_t*) to + dst_pitch * 2) = likely(Hi(h_g) == Lo(h_g)) - ? h_g - : Lo(h_g) /* 'g' verbatim to low pixel */ | - Raise(Average(Hi(h_g), Lo(h_g))) /* gh to high pixel */; - - // Generate i_hi from g_h and j_i. - *(uint32_t*) ((uint8_t*) to + dst_pitch * 2 + 4) = likely(Hi(h_g) == Lo(j_i)) - ? Lo(j_i) | Raise(Lo(j_i)) - : Raise(Lo(j_i)) /* 'i' verbatim to high pixel */ | - Average(Lo(j_i), Hi(h_g)) /* hi to low pixel */; - - // Generate jk_j from j_i and l_k. - *(uint32_t*) ((uint8_t*) to + dst_pitch * 2 + 8) = likely(Hi(j_i) == Lo(l_k)) - ? Lo(l_k) | Raise(Lo(l_k)) - : Hi(j_i) /* 'j' verbatim to low pixel */ | - Raise(Average(Hi(j_i), Lo(l_k))) /* jk to high pixel */; - - // Generate l_kl from l_k. - *(uint32_t*) ((uint8_t*) to + dst_pitch * 2 + 12) = likely(Hi(l_k) == Lo(l_k)) - ? l_k - : Raise(Hi(l_k)) /* 'l' verbatim to high pixel */ | - Average(Hi(l_k), Lo(l_k)) /* kl to low pixel */; - } - - from += 6; - to += 8; - } - - // Skip past the waste at the end of the first line, if any, - // then past 1 whole lines of source and 2 of destination. - from = (uint16_t*) ((uint8_t*) from + src_skip + src_pitch); - to = (uint16_t*) ((uint8_t*) to + dst_skip + 2 * dst_pitch); - } -} - -static inline void scale_bilinearish_240x160_320x213(unsigned src_x, unsigned src_y, size_t src_pitch, const void *src, void *dst) -{ - /* Before: - * a b c d e f - * g h i j k l - * m n o p q r - * - * After (multiple letters = average): - * a ab bc c d de ef f - * ag abgh bchi ci dj dejk efkl fl - * gm ghmn hino io jp jkpq klqr lr - * m mn no o p pq qr r - */ - - uint16_t *to = (uint16_t *)dst + dst_offs / sizeof(uint16_t); - const uint16_t *from = (const uint16_t *)src; - const uint32_t dst_pitch = SCREEN_PITCH; - - const uint32_t dst_x = src_x * 4 / 3; - const uint32_t src_skip = src_pitch - src_x * sizeof(uint16_t), - dst_skip = dst_pitch - dst_x * sizeof(uint16_t); - - uint32_t x, y; - - for (y = 0; y < src_y; y += 3) { - for (x = 0; x < src_x / 6; x++) { - // -- Row 1 -- - // Read RGB565 elements in the source grid. - // The notation is high_low (little-endian). - uint32_t b_a = (*(uint32_t*) (from )), - d_c = (*(uint32_t*) (from + 2)), - f_e = (*(uint32_t*) (from + 4)); - - // Generate ab_a from b_a. - *(uint32_t*) (to) = likely(Hi(b_a) == Lo(b_a)) - ? b_a - : Lo(b_a) /* 'a' verbatim to low pixel */ | - Raise(Average(Hi(b_a), Lo(b_a))) /* ba to high pixel */; - - // Generate c_bc from b_a and d_c. - *(uint32_t*) (to + 2) = likely(Hi(b_a) == Lo(d_c)) - ? Lo(d_c) | Raise(Lo(d_c)) - : Raise(Lo(d_c)) /* 'c' verbatim to high pixel */ | - Average(Lo(d_c), Hi(b_a)) /* bc to low pixel */; - - // Generate de_d from d_c and f_e. - *(uint32_t*) (to + 4) = likely(Hi(d_c) == Lo(f_e)) - ? Lo(f_e) | Raise(Lo(f_e)) - : Hi(d_c) /* 'd' verbatim to low pixel */ | - Raise(Average(Lo(f_e), Hi(d_c))) /* de to high pixel */; - - // Generate f_ef from f_e. - *(uint32_t*) (to + 6) = likely(Hi(f_e) == Lo(f_e)) - ? f_e - : Raise(Hi(f_e)) /* 'f' verbatim to high pixel */ | - Average(Hi(f_e), Lo(f_e)) /* ef to low pixel */; - - if (likely(y + 1 < src_y)) // Is there a source row 2? - { - // -- Row 2 -- - uint32_t h_g = (*(uint32_t*) ((uint8_t*) from + src_pitch )), - j_i = (*(uint32_t*) ((uint8_t*) from + src_pitch + 4)), - l_k = (*(uint32_t*) ((uint8_t*) from + src_pitch + 8)); - - // Generate abgh_ag from b_a and h_g. - uint32_t bh_ag = Average32(b_a, h_g); - *(uint32_t*) ((uint8_t*) to + dst_pitch) = likely(Hi(bh_ag) == Lo(bh_ag)) - ? bh_ag - : Lo(bh_ag) /* ag verbatim to low pixel */ | - Raise(Average(Hi(bh_ag), Lo(bh_ag))) /* abgh to high pixel */; - - // Generate ci_bchi from b_a, d_c, h_g and j_i. - uint32_t ci_bh = - Hi(bh_ag) /* bh verbatim to low pixel */ | - Raise(Average(Lo(d_c), Lo(j_i))) /* ci to high pixel */; - *(uint32_t*) ((uint8_t*) to + dst_pitch + 4) = likely(Hi(ci_bh) == Lo(ci_bh)) - ? ci_bh - : Raise(Hi(ci_bh)) /* ci verbatim to high pixel */ | - Average(Hi(ci_bh), Lo(ci_bh)) /* bchi to low pixel */; - - // Generate fl_efkl from f_e and l_k. - uint32_t fl_ek = Average32(f_e, l_k); - *(uint32_t*) ((uint8_t*) to + dst_pitch + 12) = likely(Hi(fl_ek) == Lo(fl_ek)) - ? fl_ek - : Raise(Hi(fl_ek)) /* fl verbatim to high pixel */ | - Average(Hi(fl_ek), Lo(fl_ek)) /* efkl to low pixel */; - - // Generate dejk_dj from d_c, f_e, j_i and l_k. - uint32_t ek_dj = - Raise(Lo(fl_ek)) /* ek verbatim to high pixel */ | - Average(Hi(d_c), Hi(j_i)) /* dj to low pixel */; - *(uint32_t*) ((uint8_t*) to + dst_pitch + 8) = likely(Hi(ek_dj) == Lo(ek_dj)) - ? ek_dj - : Lo(ek_dj) /* dj verbatim to low pixel */ | - Raise(Average(Hi(ek_dj), Lo(ek_dj))) /* dejk to high pixel */; - - if (likely(y + 2 < src_y)) // Is there a source row 3? - { - // -- Row 3 -- - uint32_t n_m = (*(uint32_t*) ((uint8_t*) from + src_pitch * 2 )), - p_o = (*(uint32_t*) ((uint8_t*) from + src_pitch * 2 + 4)), - r_q = (*(uint32_t*) ((uint8_t*) from + src_pitch * 2 + 8)); - - // Generate ghmn_gm from h_g and n_m. - uint32_t hn_gm = Average32(h_g, n_m); - *(uint32_t*) ((uint8_t*) to + dst_pitch * 2) = likely(Hi(hn_gm) == Lo(hn_gm)) - ? hn_gm - : Lo(hn_gm) /* gm verbatim to low pixel */ | - Raise(Average(Hi(hn_gm), Lo(hn_gm))) /* ghmn to high pixel */; - - // Generate io_hino from h_g, j_i, n_m and p_o. - uint32_t io_hn = - Hi(hn_gm) /* hn verbatim to low pixel */ | - Raise(Average(Lo(j_i), Lo(p_o))) /* io to high pixel */; - *(uint32_t*) ((uint8_t*) to + dst_pitch * 2 + 4) = likely(Hi(io_hn) == Lo(io_hn)) - ? io_hn - : Raise(Hi(io_hn)) /* io verbatim to high pixel */ | - Average(Hi(io_hn), Lo(io_hn)) /* hino to low pixel */; - - // Generate lr_klqr from l_k and r_q. - uint32_t lr_kq = Average32(l_k, r_q); - *(uint32_t*) ((uint8_t*) to + dst_pitch * 2 + 12) = likely(Hi(lr_kq) == Lo(lr_kq)) - ? lr_kq - : Raise(Hi(lr_kq)) /* lr verbatim to high pixel */ | - Average(Hi(lr_kq), Lo(lr_kq)) /* klqr to low pixel */; - - // Generate jkpq_jp from j_i, l_k, p_o and r_q. - uint32_t kq_jp = - Raise(Lo(lr_kq)) /* kq verbatim to high pixel */ | - Average(Hi(j_i), Hi(p_o)) /* jp to low pixel */; - *(uint32_t*) ((uint8_t*) to + dst_pitch * 2 + 8) = likely(Hi(kq_jp) == Lo(kq_jp)) - ? kq_jp - : Lo(kq_jp) /* jp verbatim to low pixel */ | - Raise(Average(Hi(kq_jp), Lo(kq_jp))) /* jkpq to high pixel */; - - // -- Row 4 -- - // Generate mn_m from n_m. - *(uint32_t*) ((uint8_t*) to + dst_pitch * 3) = likely(Hi(n_m) == Lo(n_m)) - ? n_m - : Lo(n_m) /* 'm' verbatim to low pixel */ | - Raise(Average(Hi(n_m), Lo(n_m))) /* mn to high pixel */; - - // Generate o_no from n_m and p_o. - *(uint32_t*) ((uint8_t*) to + dst_pitch * 3 + 4) = likely(Hi(n_m) == Lo(p_o)) - ? Lo(p_o) | Raise(Lo(p_o)) - : Raise(Lo(p_o)) /* 'o' verbatim to high pixel */ | - Average(Lo(p_o), Hi(n_m)) /* no to low pixel */; - - // Generate pq_p from p_o and r_q. - *(uint32_t*) ((uint8_t*) to + dst_pitch * 3 + 8) = likely(Hi(p_o) == Lo(r_q)) - ? Lo(r_q) | Raise(Lo(r_q)) - : Hi(p_o) /* 'p' verbatim to low pixel */ | - Raise(Average(Hi(p_o), Lo(r_q))) /* pq to high pixel */; - - // Generate r_qr from r_q. - *(uint32_t*) ((uint8_t*) to + dst_pitch * 3 + 12) = likely(Hi(r_q) == Lo(r_q)) - ? r_q - : Raise(Hi(r_q)) /* 'r' verbatim to high pixel */ | - Average(Hi(r_q), Lo(r_q)) /* qr to low pixel */; - } - } - - from += 6; - to += 8; - } - - // Skip past the waste at the end of the first line, if any, - // then past 2 whole lines of source and 3 of destination. - from = (uint16_t*) ((uint8_t*) from + src_skip + 2 * src_pitch); - to = (uint16_t*) ((uint8_t*) to + dst_skip + 3 * dst_pitch); - } -} - -/* - * Approximately bilinear scaler, 256x224 to 320x240 - * - * Copyright (C) 2014 hi-ban, Nebuleon - * - * This function and all auxiliary functions are free software; you can - * redistribute them and/or modify them under the terms of the GNU Lesser - * General Public License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * These functions are distributed in the hope that they will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -// Support math -#define Half(A) (((A) >> 1) & 0x7BEF) -#define Quarter(A) (((A) >> 2) & 0x39E7) -// Error correction expressions to piece back the lower bits together -#define RestHalf(A) ((A) & 0x0821) -#define RestQuarter(A) ((A) & 0x1863) - -// Error correction expressions for quarters of pixels -#define Corr1_3(A, B) Quarter(RestQuarter(A) + (RestHalf(B) << 1) + RestQuarter(B)) -#define Corr3_1(A, B) Quarter((RestHalf(A) << 1) + RestQuarter(A) + RestQuarter(B)) - -// Error correction expressions for halves -#define Corr1_1(A, B) ((A) & (B) & 0x0821) - -// Quarters -#define Weight1_3(A, B) (Quarter(A) + Half(B) + Quarter(B) + Corr1_3(A, B)) -#define Weight3_1(A, B) (Half(A) + Quarter(A) + Quarter(B) + Corr3_1(A, B)) - -// Halves -#define Weight1_1(A, B) (Half(A) + Half(B) + Corr1_1(A, B)) - -/* Upscales a 256x224 image to 320x240 using an approximate bilinear - * resampling algorithm that only uses integer math. - * - * Input: - * src: A packed 256x224 pixel image. The pixel format of this image is - * RGB 565. - * width: The width of the source image. Should always be 256. - * Output: - * dst: A packed 320x240 pixel image. The pixel format of this image is - * RGB 565. - */ -void scale_bilinearish_256x224_320x240(unsigned src_x, unsigned src_y, size_t src_pitch, const void *src, void *dst) -{ - uint16_t* Src16 = (uint16_t*) src; - uint16_t* Dst16 = (uint16_t*) dst; - unsigned pxpitch = src_pitch / 2; - // There are 64 blocks of 4 pixels horizontally, and 14 of 16 vertically. - // Each block of 4x16 becomes 5x17. - uint32_t BlockX, BlockY; - uint16_t* BlockSrc; - uint16_t* BlockDst; - for (BlockY = 0; BlockY < 14; BlockY++) - { - BlockSrc = Src16 + BlockY * pxpitch * 16; - BlockDst = Dst16 + BlockY * 320 * 17; - for (BlockX = 0; BlockX < 64; BlockX++) - { - /* Horizontally: - * Before(4): - * (a)(b)(c)(d) - * After(5): - * (a)(abbb)(bc)(cccd)(d) - * - * Vertically: - * Before(16): After(17): - * (a) (a) - * (b) (b) - * (c) (c) - * (d) (cddd) - * (e) (deee) - * (f) (efff) - * (g) (fggg) - * (h) (ghhh) - * (i) (hi) - * (j) (iiij) - * (k) (jjjk) - * (l) (kkkl) - * (m) (lllm) - * (n) (mmmn) - * (o) (n) - * (p) (o) - * (p) - */ - - // -- Row 1 -- - uint16_t _1 = *(BlockSrc ); - *(BlockDst ) = _1; - uint16_t _2 = *(BlockSrc + 1); - *(BlockDst + 1) = Weight1_3( _1, _2); - uint16_t _3 = *(BlockSrc + 2); - *(BlockDst + 2) = Weight1_1( _2, _3); - uint16_t _4 = *(BlockSrc + 3); - *(BlockDst + 3) = Weight3_1( _3, _4); - *(BlockDst + 4) = _4; - - // -- Row 2 -- - uint16_t _5 = *(BlockSrc + pxpitch * 1 ); - *(BlockDst + 320 * 1 ) = _5; - uint16_t _6 = *(BlockSrc + pxpitch * 1 + 1); - *(BlockDst + 320 * 1 + 1) = Weight1_3( _5, _6); - uint16_t _7 = *(BlockSrc + pxpitch * 1 + 2); - *(BlockDst + 320 * 1 + 2) = Weight1_1( _6, _7); - uint16_t _8 = *(BlockSrc + pxpitch * 1 + 3); - *(BlockDst + 320 * 1 + 3) = Weight3_1( _7, _8); - *(BlockDst + 320 * 1 + 4) = _8; - - // -- Row 3 -- - uint16_t _9 = *(BlockSrc + pxpitch * 2 ); - *(BlockDst + 320 * 2 ) = _9; - uint16_t _10 = *(BlockSrc + pxpitch * 2 + 1); - *(BlockDst + 320 * 2 + 1) = Weight1_3( _9, _10); - uint16_t _11 = *(BlockSrc + pxpitch * 2 + 2); - *(BlockDst + 320 * 2 + 2) = Weight1_1(_10, _11); - uint16_t _12 = *(BlockSrc + pxpitch * 2 + 3); - *(BlockDst + 320 * 2 + 3) = Weight3_1(_11, _12); - *(BlockDst + 320 * 2 + 4) = _12; - - // -- Row 4 -- - uint16_t _13 = *(BlockSrc + pxpitch * 3 ); - *(BlockDst + 320 * 3 ) = Weight1_3( _9, _13); - uint16_t _14 = *(BlockSrc + pxpitch * 3 + 1); - *(BlockDst + 320 * 3 + 1) = Weight1_3(Weight1_3( _9, _10), Weight1_3(_13, _14)); - uint16_t _15 = *(BlockSrc + pxpitch * 3 + 2); - *(BlockDst + 320 * 3 + 2) = Weight1_3(Weight1_1(_10, _11), Weight1_1(_14, _15)); - uint16_t _16 = *(BlockSrc + pxpitch * 3 + 3); - *(BlockDst + 320 * 3 + 3) = Weight1_3(Weight3_1(_11, _12), Weight3_1(_15, _16)); - *(BlockDst + 320 * 3 + 4) = Weight1_3(_12, _16); - - // -- Row 5 -- - uint16_t _17 = *(BlockSrc + pxpitch * 4 ); - *(BlockDst + 320 * 4 ) = Weight1_3(_13, _17); - uint16_t _18 = *(BlockSrc + pxpitch * 4 + 1); - *(BlockDst + 320 * 4 + 1) = Weight1_3(Weight1_3(_13, _14), Weight1_3(_17, _18)); - uint16_t _19 = *(BlockSrc + pxpitch * 4 + 2); - *(BlockDst + 320 * 4 + 2) = Weight1_3(Weight1_1(_14, _15), Weight1_1(_18, _19)); - uint16_t _20 = *(BlockSrc + pxpitch * 4 + 3); - *(BlockDst + 320 * 4 + 3) = Weight1_3(Weight3_1(_15, _16), Weight3_1(_19, _20)); - *(BlockDst + 320 * 4 + 4) = Weight1_3(_16, _20); - - // -- Row 6 -- - uint16_t _21 = *(BlockSrc + pxpitch * 5 ); - *(BlockDst + 320 * 5 ) = Weight1_3(_17, _21); - uint16_t _22 = *(BlockSrc + pxpitch * 5 + 1); - *(BlockDst + 320 * 5 + 1) = Weight1_3(Weight1_3(_17, _18), Weight1_3(_21, _22)); - uint16_t _23 = *(BlockSrc + pxpitch * 5 + 2); - *(BlockDst + 320 * 5 + 2) = Weight1_3(Weight1_1(_18, _19), Weight1_1(_22, _23)); - uint16_t _24 = *(BlockSrc + pxpitch * 5 + 3); - *(BlockDst + 320 * 5 + 3) = Weight1_3(Weight3_1(_19, _20), Weight3_1(_23, _24)); - *(BlockDst + 320 * 5 + 4) = Weight1_3(_20, _24); - - // -- Row 7 -- - uint16_t _25 = *(BlockSrc + pxpitch * 6 ); - *(BlockDst + 320 * 6 ) = Weight1_3(_21, _25); - uint16_t _26 = *(BlockSrc + pxpitch * 6 + 1); - *(BlockDst + 320 * 6 + 1) = Weight1_3(Weight1_3(_21, _22), Weight1_3(_25, _26)); - uint16_t _27 = *(BlockSrc + pxpitch * 6 + 2); - *(BlockDst + 320 * 6 + 2) = Weight1_3(Weight1_1(_22, _23), Weight1_1(_26, _27)); - uint16_t _28 = *(BlockSrc + pxpitch * 6 + 3); - *(BlockDst + 320 * 6 + 3) = Weight1_3(Weight3_1(_23, _24), Weight3_1(_27, _28)); - *(BlockDst + 320 * 6 + 4) = Weight1_3(_24, _28); - - // -- Row 8 -- - uint16_t _29 = *(BlockSrc + pxpitch * 7 ); - *(BlockDst + 320 * 7 ) = Weight1_3(_25, _29); - uint16_t _30 = *(BlockSrc + pxpitch * 7 + 1); - *(BlockDst + 320 * 7 + 1) = Weight1_3(Weight1_3(_25, _26), Weight1_3(_29, _30)); - uint16_t _31 = *(BlockSrc + pxpitch * 7 + 2); - *(BlockDst + 320 * 7 + 2) = Weight1_3(Weight1_1(_26, _27), Weight1_1(_30, _31)); - uint16_t _32 = *(BlockSrc + pxpitch * 7 + 3); - *(BlockDst + 320 * 7 + 3) = Weight1_3(Weight3_1(_27, _28), Weight3_1(_31, _32)); - *(BlockDst + 320 * 7 + 4) = Weight1_3(_28, _32); - - // -- Row 9 -- - uint16_t _33 = *(BlockSrc + pxpitch * 8 ); - *(BlockDst + 320 * 8 ) = Weight1_1(_29, _33); - uint16_t _34 = *(BlockSrc + pxpitch * 8 + 1); - *(BlockDst + 320 * 8 + 1) = Weight1_1(Weight1_3(_29, _30), Weight1_3(_33, _34)); - uint16_t _35 = *(BlockSrc + pxpitch * 8 + 2); - *(BlockDst + 320 * 8 + 2) = Weight1_1(Weight1_1(_30, _31), Weight1_1(_34, _35)); - uint16_t _36 = *(BlockSrc + pxpitch * 8 + 3); - *(BlockDst + 320 * 8 + 3) = Weight1_1(Weight3_1(_31, _32), Weight3_1(_35, _36)); - *(BlockDst + 320 * 8 + 4) = Weight1_1(_32, _36); - - // -- Row 10 -- - uint16_t _37 = *(BlockSrc + pxpitch * 9 ); - *(BlockDst + 320 * 9 ) = Weight3_1(_33, _37); - uint16_t _38 = *(BlockSrc + pxpitch * 9 + 1); - *(BlockDst + 320 * 9 + 1) = Weight3_1(Weight1_3(_33, _34), Weight1_3(_37, _38)); - uint16_t _39 = *(BlockSrc + pxpitch * 9 + 2); - *(BlockDst + 320 * 9 + 2) = Weight3_1(Weight1_1(_34, _35), Weight1_1(_38, _39)); - uint16_t _40 = *(BlockSrc + pxpitch * 9 + 3); - *(BlockDst + 320 * 9 + 3) = Weight3_1(Weight3_1(_35, _36), Weight3_1(_39, _40)); - *(BlockDst + 320 * 9 + 4) = Weight3_1(_36, _40); - - // -- Row 11 -- - uint16_t _41 = *(BlockSrc + pxpitch * 10 ); - *(BlockDst + 320 * 10 ) = Weight3_1(_37, _41); - uint16_t _42 = *(BlockSrc + pxpitch * 10 + 1); - *(BlockDst + 320 * 10 + 1) = Weight3_1(Weight1_3(_37, _38), Weight1_3(_41, _42)); - uint16_t _43 = *(BlockSrc + pxpitch * 10 + 2); - *(BlockDst + 320 * 10 + 2) = Weight3_1(Weight1_1(_38, _39), Weight1_1(_42, _43)); - uint16_t _44 = *(BlockSrc + pxpitch * 10 + 3); - *(BlockDst + 320 * 10 + 3) = Weight3_1(Weight3_1(_39, _40), Weight3_1(_43, _44)); - *(BlockDst + 320 * 10 + 4) = Weight3_1(_40, _44); - - // -- Row 12 -- - uint16_t _45 = *(BlockSrc + pxpitch * 11 ); - *(BlockDst + 320 * 11 ) = Weight3_1(_41, _45); - uint16_t _46 = *(BlockSrc + pxpitch * 11 + 1); - *(BlockDst + 320 * 11 + 1) = Weight3_1(Weight1_3(_41, _42), Weight1_3(_45, _46)); - uint16_t _47 = *(BlockSrc + pxpitch * 11 + 2); - *(BlockDst + 320 * 11 + 2) = Weight3_1(Weight1_1(_42, _43), Weight1_1(_46, _47)); - uint16_t _48 = *(BlockSrc + pxpitch * 11 + 3); - *(BlockDst + 320 * 11 + 3) = Weight3_1(Weight3_1(_43, _44), Weight3_1(_47, _48)); - *(BlockDst + 320 * 11 + 4) = Weight3_1(_44, _48); - - // -- Row 13 -- - uint16_t _49 = *(BlockSrc + pxpitch * 12 ); - *(BlockDst + 320 * 12 ) = Weight3_1(_45, _49); - uint16_t _50 = *(BlockSrc + pxpitch * 12 + 1); - *(BlockDst + 320 * 12 + 1) = Weight3_1(Weight1_3(_45, _46), Weight1_3(_49, _50)); - uint16_t _51 = *(BlockSrc + pxpitch * 12 + 2); - *(BlockDst + 320 * 12 + 2) = Weight3_1(Weight1_1(_46, _47), Weight1_1(_50, _51)); - uint16_t _52 = *(BlockSrc + pxpitch * 12 + 3); - *(BlockDst + 320 * 12 + 3) = Weight3_1(Weight3_1(_47, _48), Weight3_1(_51, _52)); - *(BlockDst + 320 * 12 + 4) = Weight3_1(_48, _52); - - // -- Row 14 -- - uint16_t _53 = *(BlockSrc + pxpitch * 13 ); - *(BlockDst + 320 * 13 ) = Weight3_1(_49, _53); - uint16_t _54 = *(BlockSrc + pxpitch * 13 + 1); - *(BlockDst + 320 * 13 + 1) = Weight3_1(Weight1_3(_49, _50), Weight1_3(_53, _54)); - uint16_t _55 = *(BlockSrc + pxpitch * 13 + 2); - *(BlockDst + 320 * 13 + 2) = Weight3_1(Weight1_1(_50, _51), Weight1_1(_54, _55)); - uint16_t _56 = *(BlockSrc + pxpitch * 13 + 3); - *(BlockDst + 320 * 13 + 3) = Weight3_1(Weight3_1(_51, _52), Weight3_1(_55, _56)); - *(BlockDst + 320 * 13 + 4) = Weight3_1(_52, _56); - - // -- Row 15 -- - *(BlockDst + 320 * 14 ) = _53; - *(BlockDst + 320 * 14 + 1) = Weight1_3(_53, _54); - *(BlockDst + 320 * 14 + 2) = Weight1_1(_54, _55); - *(BlockDst + 320 * 14 + 3) = Weight3_1(_55, _56); - *(BlockDst + 320 * 14 + 4) = _56; - - // -- Row 16 -- - uint16_t _57 = *(BlockSrc + pxpitch * 14 ); - *(BlockDst + 320 * 15 ) = _57; - uint16_t _58 = *(BlockSrc + pxpitch * 14 + 1); - *(BlockDst + 320 * 15 + 1) = Weight1_3(_57, _58); - uint16_t _59 = *(BlockSrc + pxpitch * 14 + 2); - *(BlockDst + 320 * 15 + 2) = Weight1_1(_58, _59); - uint16_t _60 = *(BlockSrc + pxpitch * 14 + 3); - *(BlockDst + 320 * 15 + 3) = Weight3_1(_59, _60); - *(BlockDst + 320 * 15 + 4) = _60; - - // -- Row 17 -- - uint16_t _61 = *(BlockSrc + pxpitch * 15 ); - *(BlockDst + 320 * 16 ) = _61; - uint16_t _62 = *(BlockSrc + pxpitch * 15 + 1); - *(BlockDst + 320 * 16 + 1) = Weight1_3(_61, _62); - uint16_t _63 = *(BlockSrc + pxpitch * 15 + 2); - *(BlockDst + 320 * 16 + 2) = Weight1_1(_62, _63); - uint16_t _64 = *(BlockSrc + pxpitch * 15 + 3); - *(BlockDst + 320 * 16 + 3) = Weight3_1(_63, _64); - *(BlockDst + 320 * 16 + 4) = _64; - - BlockSrc += 4; - BlockDst += 5; - } - } -} - -void scale_bilinearish_256x240_320x240(unsigned src_x, unsigned src_y, size_t src_pitch, const void *src, void *dst) -{ - uint16_t* Src16 = (uint16_t*) src; - uint16_t* Dst16 = (uint16_t*) dst; - unsigned pxpitch = src_pitch / 2; - // There are 64 blocks of 4 pixels horizontally, and 239 of 1 vertically. - // Each block of 4x1 becomes 5x1. - uint32_t BlockX, BlockY; - uint16_t* BlockSrc; - uint16_t* BlockDst; - for (BlockY = 0; BlockY < 239; BlockY++) - { - BlockSrc = Src16 + BlockY * pxpitch * 1; - BlockDst = Dst16 + BlockY * 320 * 1; - for (BlockX = 0; BlockX < 64; BlockX++) - { - /* Horizontally: - * Before(4): - * (a)(b)(c)(d) - * After(5): - * (a)(abbb)(bc)(cccd)(d) - */ - - // -- Row 1 -- - uint16_t _1 = *(BlockSrc ); - *(BlockDst ) = _1; - uint16_t _2 = *(BlockSrc + 1); - *(BlockDst + 1) = Weight1_3( _1, _2); - uint16_t _3 = *(BlockSrc + 2); - *(BlockDst + 2) = Weight1_1( _2, _3); - uint16_t _4 = *(BlockSrc + 3); - *(BlockDst + 3) = Weight3_1( _3, _4); - *(BlockDst + 4) = _4; - - BlockSrc += 4; - BlockDst += 5; - } - } -} - static void scale_select_scaler(unsigned w, unsigned h, size_t pitch) { scaler = NULL; + if (blend_args.blend_line != NULL) { + free(blend_args.blend_line); + blend_args.blend_line = NULL; + } + if (scale_size == SCALE_SIZE_FULL) { dst_w = SCREEN_WIDTH; dst_h = SCREEN_HEIGHT; dst_offs = 0; } else if (scale_size == SCALE_SIZE_ASPECT || - (scale_size == SCALE_SIZE_NONE && w > SCREEN_WIDTH || h > SCREEN_HEIGHT)) { + (scale_size == SCALE_SIZE_NONE && (w > SCREEN_WIDTH || h > SCREEN_HEIGHT))) { dst_w = SCREEN_WIDTH; dst_h = SCREEN_WIDTH / aspect_ratio + 0.5; dst_offs = ((SCREEN_HEIGHT-dst_h)/2) * SCREEN_PITCH; @@ -864,25 +366,13 @@ static void scale_select_scaler(unsigned w, unsigned h, size_t pitch) { if (scale_filter == SCALE_FILTER_SHARP) { scaler = scale_sharp_240x160_320xXXX; return; - } else if (scale_filter == SCALE_FILTER_SMOOTH) { - if (scale_size == SCALE_SIZE_ASPECT) { - scaler = scale_bilinearish_240x160_320x213; - } else { - scaler = scale_bilinearish_240x160_320x240; - } - return; } } if (!scaler && aspect_ratio == 4.0f / 3.0f && w == 256) { if (scale_filter == SCALE_FILTER_SHARP) { scaler = scale_sharp_256xXXX_320xXXX; - } else if (scale_filter == SCALE_FILTER_SMOOTH) { - if (h == 240) { - scaler = scale_bilinearish_256x240_320x240; - } else if (h == 224) { - scaler = scale_bilinearish_256x224_320x240; - } + return; } } @@ -891,6 +381,30 @@ static void scale_select_scaler(unsigned w, unsigned h, size_t pitch) { return; } + if (!scaler && (scale_filter == SCALE_FILTER_SHARP || scale_filter == SCALE_FILTER_SMOOTH)) { + int gcd_w, div_w, gcd_h, div_h; + blend_args.blend_line = calloc(w, sizeof(uint16_t)); + + gcd_w = gcd(w, dst_w); + blend_args.w_ratio_in = w / gcd_w; + blend_args.w_ratio_out = dst_w / gcd_w; + + div_w = round(blend_args.w_ratio_out / 5.0); + blend_args.w_bp[0] = div_w; + blend_args.w_bp[1] = blend_args.w_ratio_out >> 1; + + gcd_h = gcd(h, dst_h); + blend_args.h_ratio_in = h / gcd_h; + blend_args.h_ratio_out = dst_h / gcd_h; + + div_h = round(blend_args.h_ratio_out / 5.0); + blend_args.h_bp[0] = div_h; + blend_args.h_bp[1] = blend_args.h_ratio_out >> 1; + + scaler = scale_blend; + return; + } + if (!scaler) { scaler = scale_1x; } -- cgit v1.2.3