From e7a57de38ba56f2708403f28ea78ed56c9061ac6 Mon Sep 17 00:00:00 2001 From: Bastien Bouclet Date: Sun, 6 Feb 2011 13:43:40 +0000 Subject: GRAPHICS: Improved JPEG decoder performance Replaced the 2D IDCT by two 1D IDCT (rows, then columns). JPEG images now decode about twice as fast as they used to. svn-id: r55794 --- graphics/jpeg.cpp | 82 +++++++++++++++++++++++++++++++------------------------ graphics/jpeg.h | 4 +-- 2 files changed, 48 insertions(+), 38 deletions(-) (limited to 'graphics') diff --git a/graphics/jpeg.cpp b/graphics/jpeg.cpp index 2b202f32b5..aa4b876680 100644 --- a/graphics/jpeg.cpp +++ b/graphics/jpeg.cpp @@ -33,10 +33,6 @@ namespace Graphics { -#ifndef M_SQRT2 -#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ -#endif /* M_SQRT2 */ - // Order used to traverse the quantization tables static const uint8 _zigZagOrder[64] = { 0, 1, 8, 16, 9, 2, 3, 10, @@ -49,15 +45,18 @@ static const uint8 _zigZagOrder[64] = { 53, 60, 61, 54, 47, 55, 62, 63 }; -static const double _cosine32[32] = { - 1.000000000000000, 0.980785280403230, 0.923879532511287, 0.831469612302545, - 0.707106781186548, 0.555570233019602, 0.382683432365090, 0.195090322016128, - 0.000000000000000, -0.195090322016128, -0.382683432365090, -0.555570233019602, - -0.707106781186547, -0.831469612302545, -0.923879532511287, -0.980785280403230, - -1.000000000000000, -0.980785280403230, -0.923879532511287, -0.831469612302545, - -0.707106781186548, -0.555570233019602, -0.382683432365090, -0.195090322016129, - -0.000000000000000, 0.195090322016128, 0.382683432365090, 0.555570233019602, - 0.707106781186547, 0.831469612302545, 0.923879532511287, 0.980785280403230 +// IDCT table built with : +// _idct8x8[x][y] = cos(((2 * x + 1) * y) * (PI / 16.0)) * 0.5; +// _idct8x8[x][y] /= sqrt(2.0) if y == 0 +static const double _idct8x8[8][8] = { + { 0.353553390593274, 0.490392640201615, 0.461939766255643, 0.415734806151273, 0.353553390593274, 0.277785116509801, 0.191341716182545, 0.097545161008064 }, + { 0.353553390593274, 0.415734806151273, 0.191341716182545, -0.097545161008064, -0.353553390593274, -0.490392640201615, -0.461939766255643, -0.277785116509801 }, + { 0.353553390593274, 0.277785116509801, -0.191341716182545, -0.490392640201615, -0.353553390593274, 0.097545161008064, 0.461939766255643, 0.415734806151273 }, + { 0.353553390593274, 0.097545161008064, -0.461939766255643, -0.277785116509801, 0.353553390593274, 0.415734806151273, -0.191341716182545, -0.490392640201615 }, + { 0.353553390593274, -0.097545161008064, -0.461939766255643, 0.277785116509801, 0.353553390593274, -0.415734806151273, -0.191341716182545, 0.490392640201615 }, + { 0.353553390593274, -0.277785116509801, -0.191341716182545, 0.490392640201615, -0.353553390593273, -0.097545161008064, 0.461939766255643, -0.415734806151273 }, + { 0.353553390593274, -0.415734806151273, 0.191341716182545, 0.097545161008064, -0.353553390593274, 0.490392640201615, -0.461939766255643, 0.277785116509801 }, + { 0.353553390593274, -0.490392640201615, 0.461939766255643, -0.415734806151273, 0.353553390593273, -0.277785116509801, 0.191341716182545, -0.097545161008064 } }; JPEG::JPEG() : @@ -507,18 +506,39 @@ bool JPEG::readMCU(uint16 xMCU, uint16 yMCU) { return ok; } -float JPEG::idct(int x, int y, int weight, int fx, int fy) { - byte vx_in = ((int32)((2 * x) + 1) * fx) % 32; - byte vy_in = ((int32)((2 * y) + 1) * fy) % 32; - float ret = (float)weight * _cosine32[vx_in] * _cosine32[vy_in]; - - if (fx == 0) - ret /= (float)M_SQRT2; +void JPEG::idct8x8(float result[64], const int16 dct[64]) { + float tmp[64]; + + // Apply 1D IDCT to rows + for (int y = 0; y < 8; y++) { + for (int x = 0; x < 8; x++) { + tmp[y + x * 8] = dct[0] * _idct8x8[x][0] + + dct[1] * _idct8x8[x][1] + + dct[2] * _idct8x8[x][2] + + dct[3] * _idct8x8[x][3] + + dct[4] * _idct8x8[x][4] + + dct[5] * _idct8x8[x][5] + + dct[6] * _idct8x8[x][6] + + dct[7] * _idct8x8[x][7]; + } - if (fy == 0) - ret /= (float)M_SQRT2; + dct += 8; + } - return ret; + // Apply 1D IDCT to columns + for (int x = 0; x < 8; x++) { + const float *u = tmp + x * 8; + for (int y = 0; y < 8; y++) { + result[y * 8 + x] = u[0] * _idct8x8[y][0] + + u[1] * _idct8x8[y][1] + + u[2] * _idct8x8[y][2] + + u[3] * _idct8x8[y][3] + + u[4] * _idct8x8[y][4] + + u[5] * _idct8x8[y][5] + + u[6] * _idct8x8[y][6] + + u[7] * _idct8x8[y][7]; + } + } } bool JPEG::readDataUnit(uint16 x, uint16 y) { @@ -546,23 +566,13 @@ bool JPEG::readDataUnit(uint16 x, uint16 y) { DCT[_zigZagOrder[i]] = val; } - // Shortcut the IDCT for DC component + // Apply the IDCT float result[64]; - for (uint8 i = 0; i < 64; i++) - result[i] = DCT[0] / 2; - - // Apply the IDCT (PAG31) - for (int i = 1; i < 64; i++) { - if (DCT[i]) - for (int _y = 0; _y < 8; _y++) - for (int _x = 0; _x < 8; _x++) - result[_y * 8 + _x] += idct(_x, _y, DCT[i], i % 8, i / 8); - } + idct8x8(result, DCT); // Level shift to make the values unsigned - // Divide by 4 is final part of IDCT for (int i = 0; i < 64; i++) { - result[i] = result[i] / 4 + 128; + result[i] = result[i] + 128; if (result[i] < 0) result[i] = 0; diff --git a/graphics/jpeg.h b/graphics/jpeg.h index c8e0cd5f9d..3e26e9add2 100644 --- a/graphics/jpeg.h +++ b/graphics/jpeg.h @@ -118,8 +118,8 @@ private: uint8 _bitsData; uint8 _bitsNumber; - // Discrete Cosine Transformation - float idct(int x, int y, int weight, int fx, int fy); + // Inverse Discrete Cosine Transformation + void idct8x8(float dst[64], const int16 src[64]); }; } // End of Graphics namespace -- cgit v1.2.3