/* inv_mdct.cpp

	Inverse Discrete Cosine Transform for hybrid
   synthesis in MPEG Audio Layer III. Based on the public_c source,
   but almost completely redone by Jeff Tsay and Mikko Tommila.

   Last modified : 04/14/97 */

#ifdef  __WIN32__
#define STRICT
#include <windows.h>
#endif   // __WIN32__

#include <math.h>

#include "all.h"
#include "inv_mdct.h"

#define PI12      0.261799387f
#define PI36      0.087266462f
#define COSPI3    0.500000000f
#define COSPI6    0.866025403f
#define DCTODD1   0.984807753f
#define DCTODD2  -0.342020143f
#define DCTODD3  -0.642787609f
#define DCTEVEN1  0.939692620f
#define DCTEVEN2 -0.173648177f
#define DCTEVEN3 -0.766044443f

/*
This uses Byeong Gi Lee's Fast Cosine Transform algorithm to
decompose the 36 point and 12 point IDCT's into 9 point and 3
point IDCT's, respectively. Then the 9 point IDCT is computed
by a modified version of Mikko Tommila's IDCT algorithm, based on
the WFTA. See his comments before the first 9 point IDCT. The 3
point IDCT is already efficient to implement. -- Jeff Tsay. */

void inv_mdct(real *in, real *out, int32 block_type)
{
/*------------------------------------------------------------------*/
/*                                                                  */
/*    Function: Calculation of the inverse MDCT                     */
/*    In the case of short blocks the 3 output vectors are already  */
/*    overlapped and added in this modul.                           */
/*                                                                  */
/*    New layer3                                                    */
/*                                                                  */
/*------------------------------------------------------------------*/

	 real    tmp[18];
	 real    *win_bt;
	 static  BOOL MDCT_init = FALSE;
	 static  real win[4][36];

	 if(!MDCT_init){

	 // type 0
		for(int32 i=0;i<36;i++)
			win[0][i] = (real) sin(PI36 *(i+0.5));

	 // type 1
		for(int32 i=0;i<18;i++)
			win[1][i] = (real) sin(PI36 *(i+0.5));
		for(int32 i=18;i<24;i++)
			win[1][i] = 1.0f;
		for(int32 i=24;i<30;i++)
			win[1][i] = (real) sin(PI12 *(i+0.5-18));
		for(int32 i=30;i<36;i++)
			win[1][i] = 0.0f;

	 /* type 2 (not needed anymore)
		for(i=0;i<12;i++)
			win[2][i] = (real) sin( PI12*(i+0.5) ) ;
		for(i=12;i<36;i++)
			win[2][i] = 0.0f; */

	 // type 3
		for(int32 i=0;i<6;i++)
			win[3][i] = 0.0f;
		for(int32 i=6;i<12;i++)
			win[3][i] = (real) sin(PI12 * (i+ 0.5 - 6.0));
		for(int32 i=12;i<18;i++)
			win[3][i] = 1.0f;
		for(int32 i=18;i<36;i++)
			win[3][i] = (real) sin(PI36 * (i + 0.5));

		MDCT_init = TRUE;
	 }

	 if(block_type == 2){

	   for(int32 p=0;p<36;p+=9) {
	   	out[p]   = out[p+1] = out[p+2] = out[p+3] =
	      out[p+4] = out[p+5] = out[p+6] = out[p+7] =
	      out[p+8] = 0.0f;
	   }

    	int32 six_i = 0;

	   for(int32 i=0;i<3;i++)
   	{

      	// 12 point IMDCT

         // Begin 12 point IDCT

   		// Input aliasing for 12 pt IDCT
	   	in[15+i] += in[12+i]; in[12+i] += in[9+i]; in[9+i]  +=  in[6+i];
   		in[6+i]  += in[3+i];  in[3+i]  += in[0+i];

	   	// Input aliasing on odd indices (for 6 point IDCT)
   		in[15+i] += in[9+i];  in[9+i]  += in[3+i];

	   	// 3 point IDCT on even indices

         {
	  	   real 	pp1, pp2, sum;
			pp2 = in[12+i] * 0.500000000f;
	   	pp1 = in[ 6+i] * 0.866025403f;
	   	sum = in[0+i] + pp2;
	   	tmp[1] = in[0+i] - in[12+i];
	   	tmp[0] = sum + pp1;
	   	tmp[2] = sum - pp1;

      	// End 3 point IDCT on even indices

	   	// 3 point IDCT on odd indices (for 6 point IDCT)

			pp2 = in[15+i] * 0.500000000f;
   		pp1 = in[ 9+i] * 0.866025403f;
	   	sum = in[ 3+i] + pp2;
	   	tmp[4] = in[3+i] - in[15+i];
	   	tmp[5] = sum + pp1;
	   	tmp[3] = sum - pp1;
         }

   	   // End 3 point IDCT on odd indices

   		// Twiddle factors on odd indices (for 6 point IDCT)

   		tmp[3] *= 1.931851653f;
   		tmp[4] *= 0.707106781f;
   		tmp[5] *= 0.517638090f;

	   	// Output butterflies on 2 3 point IDCT's (for 6 point IDCT)

         {
   		real save = tmp[0];
   		tmp[0] += tmp[5];
   		tmp[5] = save - tmp[5];
	   	save = tmp[1];
	   	tmp[1] += tmp[4];
	   	tmp[4] = save - tmp[4];
	   	save = tmp[2];
	   	tmp[2] += tmp[3];
	   	tmp[3] = save - tmp[3];
         }

   		// End 6 point IDCT

	   	// Twiddle factors on indices (for 12 point IDCT)

	   	tmp[0]  *=  0.504314480f;
	   	tmp[1]  *=  0.541196100f;
	   	tmp[2]  *=  0.630236207f;
	   	tmp[3]  *=  0.821339815f;
	   	tmp[4]  *=  1.306562965f;
	   	tmp[5]  *=  3.830648788f;

      	// End 12 point IDCT

	   	// Shift to 12 point modified IDCT, multiply by window type 2
	   	tmp[8]  = -tmp[0] * 0.793353340f;
	   	tmp[9]  = -tmp[0] * 0.608761429f;
	   	tmp[7]  = -tmp[1] * 0.923879532f;
	   	tmp[10] = -tmp[1] * 0.382683432f;
	   	tmp[6]  = -tmp[2] * 0.991444861f;
	   	tmp[11] = -tmp[2] * 0.130526192f;

	   	tmp[0]  =  tmp[3];
	   	tmp[1]  =  tmp[4] * 0.382683432f;
	   	tmp[2]  =  tmp[5] * 0.608761429f;

	   	tmp[3]  = -tmp[5] * 0.793353340f;
   		tmp[4]  = -tmp[4] * 0.923879532f;
	   	tmp[5]  = -tmp[0] * 0.991444861f;

	   	tmp[0] *= 0.130526192f;

   		out[six_i + 6]  += tmp[0];
			out[six_i + 7]  += tmp[1];
	   	out[six_i + 8]  += tmp[2];
			out[six_i + 9]  += tmp[3];
   		out[six_i + 10] += tmp[4];
			out[six_i + 11] += tmp[5];
	   	out[six_i + 12] += tmp[6];
			out[six_i + 13] += tmp[7];
	   	out[six_i + 14] += tmp[8];
			out[six_i + 15] += tmp[9];
	   	out[six_i + 16] += tmp[10];
			out[six_i + 17] += tmp[11];

   		six_i += 6;
   	}

	 } else {

      // 36 point IDCT

      // input aliasing for 36 point IDCT
      in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14]; in[14]+=in[13];
	   in[13]+=in[12]; in[12]+=in[11]; in[11]+=in[10]; in[10]+=in[9];
	   in[9] +=in[8];  in[8] +=in[7];  in[7] +=in[6];  in[6] +=in[5];
	   in[5] +=in[4];  in[4] +=in[3];  in[3] +=in[2];  in[2] +=in[1];
   	in[1] +=in[0];

	   // 18 point IDCT for odd indices

   	// input aliasing for 18 point IDCT
	   in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9];
   	in[9] +=in[7];  in[7] +=in[5];  in[5] +=in[3];  in[3] +=in[1];

	   // 9 point IDCT on even indices

		// original:

/*   for(i=0; i<9; i++) {
   	sum = 0.0;

      for(j=0;j<18;j+=2)
      	sum += in[j] * cos(PI36 * (2*i + 1) * j);

      tmp[i] = sum;
     } */

// 9 Point Inverse Discrete Cosine Transform
//
// This piece of code is Copyright 1997 Mikko Tommila and is freely usable
// by anybody. The algorithm itself is of course in the public domain.
//
// Again derived heuristically from the 9-point WFTA.
//
// The algorithm is optimized (?) for speed, not for small rounding errors or
// good readability.
//
// 36 additions, 11 multiplications
//
// Again this is very likely sub-optimal.
//
// The code is optimized to use a minimum number of temporary variables,
// so it should compile quite well even on 8-register Intel x86 processors.
// This makes the code quite obfuscated and very difficult to understand.
//
// References:
// [1] S. Winograd: "On Computing the Discrete Fourier Transform",
//     Mathematics of Computation, Volume 32, Number 141, January 1978,
//     Pages 175-199

// Some modifications for maplay by Jeff Tsay
      {
      real t0, t1, t2, t3, t4, t5, t6, t7;

      t1 = COSPI3 * in[12];
      t2 = COSPI3 * (in[8] + in[16] - in[4]);

	 	t3 = in[0] + t1;
	 	t4 = in[0] - t1 - t1;
    	t5 = t4 - t2;

    	t0 = DCTEVEN1 * (in[4] + in[8]);
    	t1 = DCTEVEN2 * (in[8] - in[16]);

    	tmp[4] = t4 + t2 + t2;
    	t2 = DCTEVEN3 * (in[4] + in[16]);

    	t6 = t3 - t0 - t2;
    	t0 += t3 + t1;
    	t3 += t2 - t1;

    	t2 = DCTODD1 * (in[2]  + in[10]);
    	t4 = DCTODD2 * (in[10] - in[14]);
    	t7 = COSPI6 * in[6];

    	t1 = t2 + t4 + t7;
    	tmp[0] = t0 + t1;
    	tmp[8] = t0 - t1;
    	t1 = DCTODD3 * (in[2] + in[14]);
    	t2 += t1 - t7;

    	tmp[3] = t3 + t2;
    	t0 = COSPI6 * (in[10] + in[14] - in[2]);
    	tmp[5] = t3 - t2;

    	t4 -= t1 + t7;

    	tmp[1] = t5 - t0;
    	tmp[7] = t5 + t0;
    	tmp[2] = t6 + t4;
    	tmp[6] = t6 - t4;
    	}

   	// End 9 point IDCT on even indices

   // 9 point IDCT on odd indices

	// original:
/*   for(i=0; i<9; i++) {
   	sum = 0.0;

      for(j=0;j<18;j+=2)
      	sum += in[j+1] * cos(PI36 * (2*i + 1) * j);

      tmp[17-i] = sum;
     } */

   	// This includes multiplication by the twiddle factors
      // at the end -- Jeff.
      {
    	real t0, t1, t2, t3, t4, t5, t6, t7;

    	t1 = COSPI3 * in[13];
    	t2 = COSPI3 * (in[9] + in[17] - in[5]);

	 	t3 = in[1] + t1;
	 	t4 = in[1] - t1 - t1;
    	t5 = t4 - t2;

    	t0 = DCTEVEN1 * (in[5] + in[9]);
    	t1 = DCTEVEN2 * (in[9] - in[17]);

    	tmp[13] = (t4 + t2 + t2) * 0.707106781f;
    	t2 = DCTEVEN3 * (in[5] + in[17]);

    	t6 = t3 - t0 - t2;
    	t0 += t3 + t1;
    	t3 += t2 - t1;

    	t2 = DCTODD1 * (in[3]  + in[11]);
    	t4 = DCTODD2 * (in[11] - in[15]);
    	t7 = COSPI6  * in[7];

    	t1 = t2 + t4 + t7;
    	tmp[17] = (t0 + t1) * 0.501909918f;
    	tmp[9]  = (t0 - t1) * 5.736856623f;
    	t1 = DCTODD3 * (in[3] + in[15]);
    	t2 += t1 - t7;

    	tmp[14] = (t3 + t2) * 0.610387294f;
    	t0 = COSPI6 * (in[11] + in[15] - in[3]);
    	tmp[12] = (t3 - t2) * 0.871723397f;

    	t4 -= t1 + t7;

    	tmp[16] = (t5 - t0) * 0.517638090f;
    	tmp[10] = (t5 + t0) * 1.931851653f;
    	tmp[15] = (t6 + t4) * 0.551688959f;
    	tmp[11] = (t6 - t4) * 1.183100792f;
    	}

	   // End 9 point IDCT on odd indices

	   // Butterflies on 9 point IDCT's
   	for (int32 i=0;i<9;i++) {
   		real save = tmp[i];
      	tmp[i] += tmp[17-i];
      	tmp[17-i] = save - tmp[17-i];
   	}
	   // end 18 point IDCT

   	// twiddle factors for 36 point IDCT

	   tmp[0] *=  -0.500476342f;
   	tmp[1] *=  -0.504314480f;
	   tmp[2] *=  -0.512139757f;
	   tmp[3] *=  -0.524264562f;
   	tmp[4] *=  -0.541196100f;
	   tmp[5] *=  -0.563690973f;
		tmp[6] *=  -0.592844523f;
   	tmp[7] *=  -0.630236207f;
	   tmp[8] *=  -0.678170852f;
	   tmp[9] *=  -0.740093616f;
   	tmp[10]*=  -0.821339815f;
	   tmp[11]*=  -0.930579498f;
   	tmp[12]*=  -1.082840285f;
	   tmp[13]*=  -1.306562965f;
   	tmp[14]*=  -1.662754762f;
	   tmp[15]*=  -2.310113158f;
	   tmp[16]*=  -3.830648788f;
	   tmp[17]*= -11.46279281f;

   	// end 36 point IDCT

	   // shift to modified IDCT
   	win_bt = win[block_type];

	   out[0] =-tmp[9]  * win_bt[0];
   	out[1] =-tmp[10] * win_bt[1];
	   out[2] =-tmp[11] * win_bt[2];
	   out[3] =-tmp[12] * win_bt[3];
	   out[4] =-tmp[13] * win_bt[4];
	   out[5] =-tmp[14] * win_bt[5];
	   out[6] =-tmp[15] * win_bt[6];
	   out[7] =-tmp[16] * win_bt[7];
	   out[8] =-tmp[17] * win_bt[8];

	   out[9] = tmp[17] * win_bt[9];
   	out[10]= tmp[16] * win_bt[10];
	   out[11]= tmp[15] * win_bt[11];
	   out[12]= tmp[14] * win_bt[12];
	   out[13]= tmp[13] * win_bt[13];
	   out[14]= tmp[12] * win_bt[14];
   	out[15]= tmp[11] * win_bt[15];
	   out[16]= tmp[10] * win_bt[16];
	   out[17]= tmp[9]  * win_bt[17];
	   out[18]= tmp[8]  * win_bt[18];
   	out[19]= tmp[7]  * win_bt[19];
	   out[20]= tmp[6]  * win_bt[20];
   	out[21]= tmp[5]  * win_bt[21];
	   out[22]= tmp[4]  * win_bt[22];
	   out[23]= tmp[3]  * win_bt[23];
 	   out[24]= tmp[2]  * win_bt[24];
   	out[25]= tmp[1]  * win_bt[25];
	   out[26]= tmp[0]  * win_bt[26];

   	out[27]= tmp[0]  * win_bt[27];
	   out[28]= tmp[1]  * win_bt[28];
	   out[29]= tmp[2]  * win_bt[29];
	   out[30]= tmp[3]  * win_bt[30];
	   out[31]= tmp[4]  * win_bt[31];
   	out[32]= tmp[5]  * win_bt[32];
	   out[33]= tmp[6]  * win_bt[33];
	   out[34]= tmp[7]  * win_bt[34];
	   out[35]= tmp[8]  * win_bt[35];
   }
}
