/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*************************** floating point DCT/iDCT *************************/

/* Description of the AA&N algorithm:                                        */
/*  The DCT is performed in two steps, first the 1-D step of AA&N DCT is     */
/*  performed on each lines and each column resulting in a 2-D 'pre'DCT.     */
/*  Then each coefficient of the block is multiplied by a postscale factor   */
/*  to obtain the final result.                                              */
/*  The 1-D step of a AA&N DCT consists in a butterfly algorithm, similar to */
/*  the one of FFT, but a bit more complex.                                  */
/*  Here is a scheme of the algorithm:                                       */
/*                                                                           */
/* c0 \------/(+) c00 \--/---(+) c10 ----\/--(+) c20 ...                     */
/* c1 \\----//(+) c01 \\//---(+) c11 ----/\--(-) c21 ...                     */
/* c2 \\\--///(+) c02 //\\---(-) c12 -/------(+) c22 ...                     */
/* c3 \\\\////(+) c03 /--\---(-) c13 /---------------...                     */
/* c4 ////\\\\(-) c04 -/(*-1)(-) c14 -/(*-A5)(+) ca0 ...                     */
/* c5 ///--\\\(-) c05 //-----(+) c15 /---------------...                     */
/* c6 //----\\(-) c06 //-----(+) c16 ----------------...                     */
/* c7 /------\(-) c07 /------------------------------...                     */
/*                                                                           */
/* ...--------------------------------------------------------------- c0     */
/* ...--------------------------------------------------------------- c4     */
/* ...-(*A1)--- c32 --\/--(+) -------- c42 -------------------------- c2     */
/* ...----------------/\--(-) -(*-1)-- c43 -------------------------- c6     */
/* ...(-c14*A2) c34 -----------------------\--/(+) -------- c54 ----- c5     */
/* ...-(*A3)--- c35 --\--/(+) -------- c45 -\/--------\/(+) c55 ----- c1     */
/* ...(*A4+ca0) c36 ---\/-------------------/\--------/\(-) c56 ----- c7     */
/* ...-----------------/\-(-) -(*-1)-- c47 /--\(-) -(*-1)-- c57 ----- c3     */
/*                                                                           */
/* with                                                                      */
/* a \(*d)/(+) b            meaning         b = (a*d) + c                    */
/* c /----\(-) d                            d = (a*d) - c                    */


/* AA&N coefficients */
/* cos(4pi/16) */
/* cos(2pi/16)-cos(6pi/16) */
/* cos(2pi/16)+cos(6pi/16) */
/* cos(6pi/16) */
#define  DCT1 0.707106781
#define  DCT2 0.541196100
#define  DCT3 1.306562965
#define  DCT4 0.382683432

/*  dct_aan_row                                                              */
/*                                                                           */
/*  Description:                                                             */
/*    Perform the first step a 1-D AA&N DCT on a row of a block              */
/*                                                                           */
/*  Arguments:                                                               */
/*    dct_t *block: a 8x8 block                                              */
/*    int row: which row to perform the 1-D DCT on                           */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void inline dct_aan_row(dct_t * block, int row)
{
   dct_t v0, v1, v2, v3, v4, v5, v6, v7;
   dct_t v00,v01,v02,v03,v04,v05,v06,v07;
   dct_t v10,v11,v12,v13,v14,v15,v16;
   dct_t v20,v21,v22;
   dct_t v32,v34,v35,v36;
   dct_t v42,v43,v45,v47;
   dct_t v54,v55,v56,v57,va0;

   v0 = block[8*row+0];
   v1 = block[8*row+1];
   v2 = block[8*row+2];
   v3 = block[8*row+3];
   v4 = block[8*row+4];
   v5 = block[8*row+5];
   v6 = block[8*row+6];
   v7 = block[8*row+7];
								
   /* first butterfly stage */					
   v00 = v0+v7;	  /*0*/						
   v07 = v0-v7;	  /*7*/						
   v01 = v1+v6;	  /*1*/						
   v06 = v1-v6;	  /*6*/						
   v02 = v2+v5;	  /*2*/						
   v05 = v2-v5;	  /*5*/						
   v03 = v3+v4;	  /*3*/						
   v04 = v3-v4;	  /*4*/						
								
   /*second low butterfly */					
   v10=v00+v03;	         /*0*/					
   v13=v00-v03;		 /*3*/					
   v11=v01+v02;		 /*1*/					
   v12=v01-v02;		 /*2*/					
								
   /*second high */						
   v16=v06+v07;		 /*6*/					
   v15=v05+v06;		 /*5*/					
   v14=-(v04+v05);	 /*4*/					
                         /*7 v77 without change*/		
   								
   /*third	         (only 3 real new terms)*/		
   v20=v10+v11;		 /*0*/					
   v21=v10-v11;		 /*1*/					
   v22=v12+v13;   	 /*2*/					
   va0=(v14+v16)*DCT4;	 /* temporary for DCT4 multiply*/		
								
   /*fourth*/							
   v32=v22*DCT1;           /* 2 */				
   v34=-(v14*DCT2+va0);    /* 4 */       				
   v36=v16*DCT3-va0;       /* 6 */       				
   v35=v15*DCT1;           /* 5 */       				
								
   /*fifth*/							
   v42=v32+v13;          /*2*/					
   v43=v13-v32;          /*3*/					
   v45=v07+v35;          /*5*/					
   v47=v07-v35;          /*7*/					
								
   /*last*/							
   v54=v34+v47;          /*4*/					
   v57=v47-v34;          /*7*/					
   v55=v45+v36;          /*5*/					
   v56=v45-v36;          /*6*/					
								
   /* output butterfly */      					
   block[8*row+0] = v20;					
   block[8*row+1] = v55;					
   block[8*row+2] = v42;					
   block[8*row+3] = v57;					
   block[8*row+4] = v21;					
   block[8*row+5] = v54;					
   block[8*row+6] = v43;					
   block[8*row+7] = v56;					
}

/*  dct_aan_col                                                              */
/*                                                                           */
/*  Description:                                                             */
/*    Perform the first step a 1-D AA&N DCT on a column of a block           */
/*                                                                           */
/*  Arguments:                                                               */
/*    dct_t *block: a 8x8 block                                              */
/*    int col: which column to perform the 1-D DCT on                        */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void inline dct_aan_col(dct_t * block, int col)
{
   dct_t v0, v1, v2, v3, v4, v5, v6, v7;
   dct_t v00,v01,v02,v03,v04,v05,v06,v07;
   dct_t v10,v11,v12,v13,v14,v15,v16;
   dct_t v20,v21,v22;
   dct_t v32,v34,v35,v36;
   dct_t v42,v43,v45,v47;
   dct_t v54,v55,v56,v57,va0;

   v0 = block[0*8+col];
   v1 = block[1*8+col];
   v2 = block[2*8+col];
   v3 = block[3*8+col];
   v4 = block[4*8+col];
   v5 = block[5*8+col];
   v6 = block[6*8+col];
   v7 = block[7*8+col];
								
   /* first butterfly stage */					
   v00 = v0+v7;	  /*0*/						
   v07 = v0-v7;	  /*7*/						
   v01 = v1+v6;	  /*1*/						
   v06 = v1-v6;	  /*6*/						
   v02 = v2+v5;	  /*2*/						
   v05 = v2-v5;	  /*5*/						
   v03 = v3+v4;	  /*3*/						
   v04 = v3-v4;	  /*4*/						
								
   /*second low butterfly */					
   v10=v00+v03;	         /*0*/					
   v13=v00-v03;		 /*3*/					
   v11=v01+v02;		 /*1*/					
   v12=v01-v02;		 /*2*/					
								
   /*second high */						
   v16=v06+v07;		 /*6*/					
   v15=v05+v06;		 /*5*/					
   v14=-(v04+v05);	 /*4*/					
                         /*7 v77 without change*/		
   								
   /*third	         (only 3 real new terms)*/		
   v20=v10+v11;		 /*0*/					
   v21=v10-v11;		 /*1*/					
   v22=v12+v13;   	 /*2*/					
   va0=(v14+v16)*DCT4;	 /* temporary for DCT4 multiply*/		
								
   /*fourth*/							
   v32=v22*DCT1;           /* 2 */				
   v34=-(v14*DCT2+va0);    /* 4 */       				
   v36=v16*DCT3-va0;       /* 6 */       				
   v35=v15*DCT1;           /* 5 */       				
								
   /*fifth*/							
   v42=v32+v13;          /*2*/					
   v43=v13-v32;          /*3*/					
   v45=v07+v35;          /*5*/					
   v47=v07-v35;          /*7*/					
								
   /*last*/							
   v54=v34+v47;          /*4*/					
   v57=v47-v34;          /*7*/					
   v55=v45+v36;          /*5*/					
   v56=v45-v36;          /*6*/					
								
   /* output butterfly */      					
   block[0*8+col] = v20;					
   block[1*8+col] = v55;					
   block[2*8+col] = v42;					
   block[3*8+col] = v57;					
   block[4*8+col] = v21;					
   block[5*8+col] = v54;					
   block[6*8+col] = v43;					
   block[7*8+col] = v56;					
}

/*  dct                                                                      */
/*                                                                           */
/*  Description:                                                             */
/*    Perform the first step of a 2-D AA&N DCT on a 8x8 block                */
/*                                                                           */
/*  Arguments:                                                               */
/*    dct_t *block: a 8x8 block                                              */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void inline dct(dct_t *block)
{
   int i;

   for(i = 0; i < 8; i++)
     dct_aan_row(block, i);
   for(i = 0; i < 8; i++)
     dct_aan_col(block, i);
}
