[mad-dev] short imdct
Rafael Ávila de Espíndola
rafael.espindola@ic.unicamp.br
Sat, 28 Feb 2004 16:45:18 -0300
--Boundary-00=_O/OQA81J7Mejltb
Content-Type: Text/Plain;
charset="iso-8859-1"
Content-Transfer-Encoding: quoted-printable
Content-Description: clearsigned data
Content-Disposition: inline
=2D----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
Is there any reason for not using the Szu-Wei Lee's algorithm for the short=
=20
IMDCT (size 12)?
Annexed is a patch witch implements it.
Rafael =C1vila de Esp=EDndola
=2D----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.4 (GNU/Linux)
iD8DBQFAQO/RLlrfGJ8JUHwRAogGAJ9c8dZFc+vHM5SJ94FJGgJ8g2IJWQCfbyr5
qczH/qDxC7XBLkbkXn6mGcQ=3D
=3DgjVQ
=2D----END PGP SIGNATURE-----
--Boundary-00=_O/OQA81J7Mejltb
Content-Type: text/x-diff;
charset="us-ascii";
name="imdct12.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename="imdct12.patch"
--- libmad-0.15.1b/layer3.c 2004-01-23 07:41:32.000000000 -0200
+++ libmad-0.15.1b_new/layer3.c 2004-02-28 17:35:53.000000000 -0300
@@ -1577,7 +1577,7 @@
# else
# if 1
static
-void fastsdct(mad_fixed_t const x[9], mad_fixed_t y[18])
+void sdct9(mad_fixed_t const x[9], mad_fixed_t y[18])
{
mad_fixed_t a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12;
mad_fixed_t a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25;
@@ -1644,59 +1644,75 @@
y[16] = a22 + m7;
}
+static
+void sdct3(mad_fixed_t const x[3], mad_fixed_t y[6]){
+ mad_fixed_t a0,a1;
+ static const mad_fixed_t d0=MAD_F(0x1bb67ae8); /*2*cos(M_PI/6)*/;
+ a0=x[0]+x[2];
+ a1=x[0]-x[2];
+ y[0]=x[1]+a0;
+ y[2]=mad_f_mul(d0,a1);
+ y[4]=a0-2*x[1];
+}
+
static inline
-void sdctII(mad_fixed_t const x[18], mad_fixed_t X[18])
+void sdctII(int N, mad_fixed_t const x[18], mad_fixed_t X[18])
{
- mad_fixed_t tmp[9];
+ mad_fixed_t tmp[N/2];
int i;
- /* scale[i] = 2 * cos(PI * (2 * i + 1) / (2 * 18)) */
- static mad_fixed_t const scale[9] = {
+ /* scale[i] = 2 * cos(PI * (2 * i + 1) / (2 * N)) */
+ static mad_fixed_t const scale36[9] = {
MAD_F(0x1fe0d3b4), MAD_F(0x1ee8dd47), MAD_F(0x1d007930),
MAD_F(0x1a367e59), MAD_F(0x16a09e66), MAD_F(0x125abcf8),
MAD_F(0x0d8616bc), MAD_F(0x08483ee1), MAD_F(0x02c9fad7)
};
+ static mad_fixed_t const scale12[3] = {
+ MAD_F(0x1ee8dd47), MAD_F(0x16a09e66), MAD_F(0x08483ee1)
+ };
/* divide the 18-point SDCT-II into two 9-point SDCT-IIs */
/* even input butterfly */
- for (i = 0; i < 9; i += 3) {
- tmp[i + 0] = x[i + 0] + x[18 - (i + 0) - 1];
- tmp[i + 1] = x[i + 1] + x[18 - (i + 1) - 1];
- tmp[i + 2] = x[i + 2] + x[18 - (i + 2) - 1];
+ for (i = 0; i < N/2; i ++) {
+ tmp[i + 0] = x[i + 0] + x[N - (i + 0) - 1];
}
- fastsdct(tmp, &X[0]);
+ if(N==18)
+ sdct9(tmp, &X[0]);
+ else
+ sdct3(tmp, &X[0]);
/* odd input butterfly and scaling */
- for (i = 0; i < 9; i += 3) {
- tmp[i + 0] = mad_f_mul(x[i + 0] - x[18 - (i + 0) - 1], scale[i + 0]);
- tmp[i + 1] = mad_f_mul(x[i + 1] - x[18 - (i + 1) - 1], scale[i + 1]);
- tmp[i + 2] = mad_f_mul(x[i + 2] - x[18 - (i + 2) - 1], scale[i + 2]);
+ for (i = 0; i < N/2; i ++) {
+ if(N==18)
+ tmp[i + 0] = mad_f_mul(x[i + 0] - x[N - (i + 0) - 1], scale36[i + 0]);
+ else
+ tmp[i + 0] = mad_f_mul(x[i + 0] - x[N - (i + 0) - 1], scale12[i + 0]);
}
- fastsdct(tmp, &X[1]);
+ if(N==18)
+ sdct9(tmp, &X[1]);
+ else
+ sdct3(tmp, &X[1]);
/* output accumulation */
- for (i = 3; i < 18; i += 8) {
+ for (i = 3; i < N; i += 2) {
X[i + 0] -= X[(i + 0) - 2];
- X[i + 2] -= X[(i + 2) - 2];
- X[i + 4] -= X[(i + 4) - 2];
- X[i + 6] -= X[(i + 6) - 2];
}
}
static inline
-void dctIV(mad_fixed_t const y[18], mad_fixed_t X[18])
+void dctIV(int N, mad_fixed_t const y[18], mad_fixed_t X[18])
{
- mad_fixed_t tmp[18];
+ mad_fixed_t tmp[N];
int i;
- /* scale[i] = 2 * cos(PI * (2 * i + 1) / (4 * 18)) */
- static mad_fixed_t const scale[18] = {
+ /* scale[i] = 2 * cos(PI * (2 * i + 1) / (4 * N)) */
+ static mad_fixed_t const scale36[18] = {
MAD_F(0x1ff833fa), MAD_F(0x1fb9ea93), MAD_F(0x1f3dd120),
MAD_F(0x1e84d969), MAD_F(0x1d906bcf), MAD_F(0x1c62648b),
MAD_F(0x1afd100f), MAD_F(0x1963268b), MAD_F(0x1797c6a4),
@@ -1704,61 +1720,56 @@
MAD_F(0x0ec6a507), MAD_F(0x0c3ef153), MAD_F(0x099f61c5),
MAD_F(0x06ed12c5), MAD_F(0x042d4544), MAD_F(0x0165547c)
};
+ static mad_fixed_t const scale12[6]={
+ MAD_F(0x1fb9ea93), MAD_F(0x1d906bcf), MAD_F(0x1963268b),
+ MAD_F(0x137af940), MAD_F(0x0c3ef153), MAD_F(0x042d4544)
+ };
/* scaling */
- for (i = 0; i < 18; i += 3) {
- tmp[i + 0] = mad_f_mul(y[i + 0], scale[i + 0]);
- tmp[i + 1] = mad_f_mul(y[i + 1], scale[i + 1]);
- tmp[i + 2] = mad_f_mul(y[i + 2], scale[i + 2]);
+ for (i = 0; i < N; i ++) {
+ if(N==18)
+ tmp[i + 0] = mad_f_mul(y[i + 0], scale36[i + 0]);
+ else
+ tmp[i + 0] = mad_f_mul(y[i + 0], scale12[i + 0]);
}
/* SDCT-II */
- sdctII(tmp, X);
+ sdctII(N, tmp, X);
/* scale reduction and output accumulation */
X[0] /= 2;
- for (i = 1; i < 17; i += 4) {
+ for (i = 1; i < N; i ++) {
X[i + 0] = X[i + 0] / 2 - X[(i + 0) - 1];
- X[i + 1] = X[i + 1] / 2 - X[(i + 1) - 1];
- X[i + 2] = X[i + 2] / 2 - X[(i + 2) - 1];
- X[i + 3] = X[i + 3] / 2 - X[(i + 3) - 1];
}
- X[17] = X[17] / 2 - X[16];
}
/*
- * NAME: imdct36
- * DESCRIPTION: perform X[18]->x[36] IMDCT using Szu-Wei Lee's fast algorithm
+ * NAME: imdct
+ * DESCRIPTION: perform X[N/2]->x[N] IMDCT using Szu-Wei Lee's fast algorithm
*/
-static inline
-void imdct36(mad_fixed_t const x[18], mad_fixed_t y[36])
+static
+void imdct(int N, mad_fixed_t const x[18], mad_fixed_t y[36])
{
- mad_fixed_t tmp[18];
+ mad_fixed_t tmp[N/2];
int i;
/* DCT-IV */
- dctIV(x, tmp);
+ dctIV(N/2, x, tmp);
- /* convert 18-point DCT-IV to 36-point IMDCT */
+ /* convert N/2-point DCT-IV to N-point IMDCT */
- for (i = 0; i < 9; i += 3) {
- y[i + 0] = tmp[9 + (i + 0)];
- y[i + 1] = tmp[9 + (i + 1)];
- y[i + 2] = tmp[9 + (i + 2)];
- }
- for (i = 9; i < 27; i += 3) {
- y[i + 0] = -tmp[36 - (9 + (i + 0)) - 1];
- y[i + 1] = -tmp[36 - (9 + (i + 1)) - 1];
- y[i + 2] = -tmp[36 - (9 + (i + 2)) - 1];
- }
- for (i = 27; i < 36; i += 3) {
- y[i + 0] = -tmp[(i + 0) - 27];
- y[i + 1] = -tmp[(i + 1) - 27];
- y[i + 2] = -tmp[(i + 2) - 27];
+ for (i = 0; i < N/4; i ++) {
+ y[i + 0] = tmp[N/4 + (i + 0)];
+ }
+ for (i = N/4; i < 3*N/4; i ++) {
+ y[i + 0] = -tmp[N - (N/4 + (i + 0)) - 1];
+ }
+ for (i = 3*N/4; i < N; i ++) {
+ y[i + 0] = -tmp[(i + 0) - 3*N/4];
}
}
# else
@@ -2066,7 +2077,7 @@
/* IMDCT */
- imdct36(X, z);
+ imdct(36, X, z);
/* windowing */
@@ -2159,36 +2170,7 @@
yptr = &y[0];
for (w = 0; w < 3; ++w) {
- register mad_fixed_t const (*s)[6];
-
- s = imdct_s;
-
- for (i = 0; i < 3; ++i) {
- MAD_F_ML0(hi, lo, X[0], (*s)[0]);
- MAD_F_MLA(hi, lo, X[1], (*s)[1]);
- MAD_F_MLA(hi, lo, X[2], (*s)[2]);
- MAD_F_MLA(hi, lo, X[3], (*s)[3]);
- MAD_F_MLA(hi, lo, X[4], (*s)[4]);
- MAD_F_MLA(hi, lo, X[5], (*s)[5]);
-
- yptr[i + 0] = MAD_F_MLZ(hi, lo);
- yptr[5 - i] = -yptr[i + 0];
-
- ++s;
-
- MAD_F_ML0(hi, lo, X[0], (*s)[0]);
- MAD_F_MLA(hi, lo, X[1], (*s)[1]);
- MAD_F_MLA(hi, lo, X[2], (*s)[2]);
- MAD_F_MLA(hi, lo, X[3], (*s)[3]);
- MAD_F_MLA(hi, lo, X[4], (*s)[4]);
- MAD_F_MLA(hi, lo, X[5], (*s)[5]);
-
- yptr[ i + 6] = MAD_F_MLZ(hi, lo);
- yptr[11 - i] = yptr[i + 6];
-
- ++s;
- }
-
+ imdct(12, X, yptr);
yptr += 12;
X += 6;
}
--Boundary-00=_O/OQA81J7Mejltb--