/*
 * Copyright (c) 1997 Massachusetts Institute of Technology
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to use, copy, modify, and distribute the Software without
 * restriction, provided the Software, including any modified copies made
 * under this license, is not distributed for a fee, subject to
 * the following conditions:
 * 
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE MASSACHUSETTS INSTITUTE OF TECHNOLOGY BE LIABLE
 * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 * 
 * Except as contained in this notice, the name of the Massachusetts
 * Institute of Technology shall not be used in advertising or otherwise
 * to promote the sale, use or other dealings in this Software without
 * prior written authorization from the Massachusetts Institute of
 * Technology.
 *  
 */

#include <stdio.h>
#include <stdlib.h>

#include <math.h>

#include "bench_utils.h"
#include "rbench_ffts.h"

/***************************************************************************/
/* Prototypes for included FFTs: */

#include <rfftw.h>

void FORTRANIZE(nrfrlft3,NRFRLFT3)(FFTW_REAL*, FFTW_REAL*,
				   int*, int*, int*, int*);

int green_fft2dInit(int, int);
void green_fft2dFree(void);
void green_rfft2d(FFTW_REAL *data, int M2, int M);
void green_rifft2d(FFTW_REAL *data, int M2, int M);

void rdft2d(int, int, int, FFTW_REAL **, int *, FFTW_REAL *);
void FORTRANIZE(oouraf2rdft2d,OOURAF2RDFT2D) (int *, int *, int *, int *,
					      FFTW_REAL *, int *, FFTW_REAL*);

/***************************************************************************/

extern int check_prime_factors(int n, int maxprime);

void do_fftwnd_rfft(int rank, int *n, int *n_rev, int N, short is_power_of_two,
		    FFTW_REAL *arr, FFTW_REAL *work,
		    int size_arr, int size_work,
		    short compute_accuracy, factor_type allowed_factors)
{
     rfftwnd_plan plan = NULL, iplan = NULL;

     FFT_NAME("FFTW");

#ifndef RFFTW_V2
     if (n[rank-1] % 2 == 0)
#endif
     {
	  if (N != 0 && fft_data_cur) {
#ifdef RFFTW_V2
	       plan = rfftwnd_create_plan_specific(rank, n, FFTW_FORWARD, 
						   FFTW_MEASURE|FFTW_IN_PLACE,
						   arr, 1, arr, 1);
	       iplan = rfftwnd_create_plan_specific(rank, n, FFTW_BACKWARD,
						    FFTW_MEASURE|FFTW_IN_PLACE,
						    arr, 1, arr, 1);
#else
	       plan = rfftwnd_create_plan(rank, n, FFTW_FORWARD,
					  FFTW_MEASURE|FFTW_IN_PLACE,
					  REAL_TO_COMPLEX);
	       iplan = rfftwnd_create_plan(N, FFTW_BACKWARD, FFTW_MEASURE,
					   COMPLEX_TO_REAL);
#endif
	       if (!plan || !iplan) {
                    log_printf("\n\nError creating FFTW plan!\n");
                    printf("\n\nError creating FFTW plan!\n");
                    exit(EXIT_FAILURE);
               }
	  }

#ifdef RFFTW_V2 
	  DO_RBENCHMARK_ND(rank, n, N, arr, arr,
			   rfftwnd_real_to_complex(plan, 1, arr,1,0,
						   (FFTW_COMPLEX*)arr,1,0),
			   rfftwnd_complex_to_real(iplan, 1, 
						   (FFTW_COMPLEX*)arr,1,0,
						   arr,1,0),
			   1.0/N,
			   compute_accuracy);
#else
	  DO_RBENCHMARK_ND(rank, n, N, arr, arr,
			   rfftwnd(plan, 1, (FFTW_COMPLEX*)arr,1,0,
				   (FFTW_COMPLEX*)arr,1,0),
			   rfftwnd(iplan, 1, (FFTW_COMPLEX*)arr,1,0,
				   (FFTW_COMPLEX*)arr,1,0),
			   1.0/N,
			   compute_accuracy);
#endif

	  if (N != 0 && fft_data_cur) {
	       rfftwnd_destroy_plan(plan);
	       rfftwnd_destroy_plan(iplan);
	  }
     }
#ifndef RFFTW_V2
     else
	  skip_benchmark("RFFTW requires even array size");
#endif
}

void do_nrf_2d_rfft(int rank, int *n, int *n_rev, int N,
		    short is_power_of_two,
		    FFTW_REAL *arr, FFTW_REAL *work,
		    int size_arr, int size_work,
		    short compute_accuracy, factor_type allowed_factors)
{
#ifdef HAVE_F77
#ifdef HAVE_NRF
     if (rank != 2) return;
     if (sizeof(FFTW_REAL) != sizeof(double)) return;
     FFT_REQUIRE_POWER_OF_TWO;
     FFT_NAME("NR (F)");
     if (!FFT_OK) return;
     {
          int is1 = +1, is2 = -1;
	  int n1 = n[1], n2 = n[0], n3 = 1;
	  FFTW_REAL *arr2 = arr + N;

          DO_RBENCHMARK_ND(rank, n, N, arr, arr,
                           FORTRANIZE(nrfrlft3,NRFRLFT3)(arr, arr2,
							 &n1, &n2, &n3, &is1),
                           FORTRANIZE(nrfrlft3,NRFRLFT3)(arr, arr2,
							 &n1, &n2, &n3, &is2),
                           2.0/N,
                           compute_accuracy);
     }
#endif
#endif
}

void do_green_2d_rfft(int rank, int *n, int *n_rev, int N,
                     short is_power_of_two,
                     FFTW_REAL *arr, FFTW_REAL *work,
                     int size_arr, int size_work,
                     short compute_accuracy, factor_type allowed_factors)
{
     if (rank != 2) return;
     /* green is buggy with 64 bit pointers */
     if (sizeof (void *) != sizeof(int)) return;

     FFT_REQUIRE_POWER_OF_TWO;
     FFT_NAME("Green");
     if (!FFT_OK) return;

     {
          int M[2] = {0,0};
          int err = 0;

          if (N != 0) {
               int dim;

               for (dim = 0; dim < 2; ++dim) {
                    int tmpN = n[dim];
                    while (tmpN > 1) {
                         M[dim] += 1;
                         tmpN /= 2;
                    }
               }
               err = green_fft2dInit(M[0],M[1]);
          }

          if (err == 0) {
               DO_RBENCHMARK_ND(rank, n, N, arr, arr,
                               green_rfft2d(arr, M[0],M[1]),
                               green_rifft2d(arr, M[0],M[1]),
                               1.0,
                               compute_accuracy);
               if (N != 0)
                    green_fft2dFree();
          }
          else
               skip_benchmark("Green can't handle this size.");
     }
}

void do_mfft_2d_rfft(int rank, int *n, int *n_rev, int N,
                    short is_power_of_two,
                    FFTW_REAL *arr, FFTW_REAL *work,
                    int size_arr, int size_work,
                    short compute_accuracy, factor_type allowed_factors)
{
#ifdef HAVE_MFFT
#ifdef HAVE_F77
     /* Auggh!  MFFT, because of a totally non-portable hack that
        it uses, requires that the integer and floating point types
        be the same size!  Damn these Fortran programmers! */
     if (sizeof(FFTW_REAL) == sizeof(float) &&
         sizeof(FFTW_REAL) == sizeof(int) /* <-- obscenity */) {

          int np = 0, is0=0,is1=-1,is2=+1, ld = n[1] + 2;
          int *wl=0,*wm=0,*iwork=0;
          int iord=1,ierr;

          if (rank != 2) return;

          FFT_NAME("MFFT");
          if (!FFT_OK) return;
          if (N != 0) {
               /* insure that array size is power of 2, 3, & 5 only: */
               if (n[1] % 2 == 0 && check_prime_factors(N,5)) {
                    wl = (int*) fftw_malloc(sizeof(int)*(6*n_rev[0]+14));
                    wm = (int*) fftw_malloc(sizeof(int)*(4*n_rev[1]+14));
                    iwork = (int*) fftw_malloc(sizeof(int)*maxn(2,n_rev));
                    if (wl == 0 || wm == 0 || iwork == 0) {
                         printf("\nERROR!  Out of memory for MFFT!\n");
                         exit(1);
                    }

                    /* Initialize work arrays: */
                    FORTRANIZE(mfftr2fft,MFFTR2FFT)
                         (arr,&ld,&n_rev[0],&n_rev[1],
                          wl,wm,&is0,&iord,iwork,&ierr);

                    np = ierr;
               }
               else
                    np = 1;
          }
          if (np == 0)
               DO_RBENCHMARK_ND(rank, n, N, arr, arr,
                               FORTRANIZE(mfftr2fft,MFFTR2FFT)
				(arr,&ld,&n_rev[0],&n_rev[1],
				 wl,wm,&is1,&iord,iwork,&ierr),
                               FORTRANIZE(mfftr2fft,MFFTR2FFT)
				(arr,&ld,&n_rev[0],&n_rev[1],
				 wl,wm,&is2,&iord,iwork,&ierr),
                               1.0/N,
                               compute_accuracy);
          else if (n[1] % 2 != 0)
	       skip_benchmark("requires that last dimension be even");
	  else
               skip_benchmark("only handles N = 2^m 3^n 5^q");

          if (N == 0) {
               if (wl) fftw_free(wl);
               if (wm) fftw_free(wm);
               if (iwork) fftw_free(iwork);
          }
     }
#endif
#endif
}

void do_ooura_2d_rfft(int rank, int *n, int *n_rev, int N,
		      short is_power_of_two,
		      FFTW_REAL *arr, FFTW_REAL *work,
		      int size_arr, int size_work,
		      short compute_accuracy, factor_type allowed_factors)
{
     if (rank != 2) return;
     FFT_REQUIRE_POWER_OF_TWO;
     FFT_NAME("Ooura (C)");
     if (FFT_OK) {
          int *ip = 0, len_ip, is1=-1, is2=+1;
	  FFTW_REAL **arr2 = 0;

          if (N != 0) {
	       int nn = n[1]/2;

	       if (n[0] > nn) nn = n[0];
               len_ip = sqrt(nn) + 3;
               ip = (int*) fftw_malloc(sizeof(int)*len_ip);
               if (!ip) {
                    printf("\nERROR!  Out of memory!\n");
                    exit(1);
               }
               ip[0] = 0;
	       
	       arr2 = (FFTW_REAL **) fftw_malloc(sizeof(FFTW_REAL*) * n[0]);
	       for (nn = 0; nn < n[0]; ++nn)
		    arr2[nn] = &arr[nn*n[1]];

               rdft2d(n[0], n[1], +1, arr2, ip, work);
          }
          DO_RBENCHMARK_ND(rank, n, N, arr, arr,
			   rdft2d(n[0], n[1], +1, arr2, ip, work),
			   rdft2d(n[0], n[1], -1, arr2, ip, work),
                           2.0/N,
                           compute_accuracy);

	  if (arr2)
	       fftw_free(arr2);

#ifdef HAVE_F77
	  if (sizeof(FFTW_REAL) == sizeof(double)) {
          FFT_REQUIRE_POWER_OF_TWO;
          FFT_NAME("Ooura (F)");
          if (FFT_OK && N != 0) {
               ip[0] = 0;
               FORTRANIZE(oouraf2rdft2d,OOURAF2RDFT2D)(&n[1], &n[1], &n[0],
						       &is2, arr,
						       ip, work);
          }
	  if (FFT_OK)
	       DO_RBENCHMARK_ND(rank, n, N, arr, arr,
				FORTRANIZE(oouraf2rdft2d,
					   OOURAF2RDFT2D)(&n[1], &n[1], &n[0],
							  &is2, arr,
							  ip, work),
				FORTRANIZE(oouraf2rdft2d,
					   OOURAF2RDFT2D)(&n[1], &n[1], &n[0],
							  &is1, arr,
							  ip, work),
				2.0/N,
				compute_accuracy);
	  }
#endif
          if (FFT_OK && N != 0)
               fftw_free(ip);
     }
}

void do_scsl_2d_rfft(int rank, int *n, int *n_rev, int N, 
		  short is_power_of_two,
		  FFTW_REAL *arr, FFTW_REAL *work,
		  int size_arr, int size_work,
		  short compute_accuracy, factor_type allowed_factors)
{
#ifdef HAVE_LIBSCS
     typedef void scsl_fft_proc(int *isign, int *n1, int *n2,
				FFTW_REAL *scale,
				FFTW_REAL *x, int *ldx, 
				FFTW_REAL *y, int *ldy,
				FFTW_REAL *table, FFTW_REAL *work, 
				int *isys);
     typedef void (*scsl_fft_proc_ptr)(int *isign, int *n1, int *n2,
				       FFTW_REAL *scale,
				       FFTW_REAL *x, int *ldx, 
				       FFTW_REAL *y, int *ldy,
				       FFTW_REAL *table, FFTW_REAL *work, 
				       int *isys);
     extern scsl_fft_proc
	  FORTRANIZE(scfft2d,SCFFT2D), FORTRANIZE(dzfft2d,DZFFT2D),
	  FORTRANIZE(csfft2d,CSFFT2D), FORTRANIZE(zdfft2d,ZDFFT2D);
     scsl_fft_proc_ptr scsl_fft, scsl_ifft;
     int isign=-1, isign2=+1, n1, n2, ldx, ldy, isys = 0;
     FFTW_REAL scale = 1.0;
     FFTW_REAL *table, *work2;

     if (rank != 2) return;

     FFT_NAME("SCSL");

     n1 = n_rev[0];
     n2 = n_rev[1];
     ldy = n1/2 + 1;
     ldx = 2*ldy;

     work2 = (FFTW_REAL*)work;
     table = work2 + N;

     if (sizeof(FFTW_REAL) == sizeof(float)) {
	  scsl_fft = FORTRANIZE(scfft2d,SCFFT2D);
	  scsl_ifft = FORTRANIZE(csfft2d,CSFFT2D);
     }
     else {
	  scsl_fft = FORTRANIZE(dzfft2d,DZFFT2D);
	  scsl_ifft = FORTRANIZE(zdfft2d,ZDFFT2D);
     }

     if (N != 0 && fft_data_cur) {
	  isign = 0;
	  scsl_fft(&isign, &n1,&n2, &scale, arr,&ldx,arr,&ldy, 
		   table, work2, &isys);
	  isign = -1;
     }
     if (N == 0 || N > 2)
	  DO_RBENCHMARK_ND(rank, n, N, arr, arr,
			  scsl_fft(&isign, &n1,&n2, &scale, arr,&ldx,arr,&ldy, 
				   table, work2, &isys),
			  scsl_ifft(&isign2, &n1,&n2, &scale,arr,&ldy,arr,&ldx,
				    table, work2, &isys),
			  1.0/N,
			  compute_accuracy);
     else
	  skip_benchmark("can't handle n <= 2");
#endif
}

void do_sgimath_2d_rfft(int rank, int *n, int *n_rev, int N, 
		       short is_power_of_two,
		       FFTW_REAL *arr, FFTW_REAL *work,
		       int size_arr, int size_work,
		       short compute_accuracy, factor_type allowed_factors)
{
#ifdef HAVE_LIBCOMPLIB_SGIMATH
     int lda;
     if (rank != 2) return;
    
     FFT_NAME("SGIMATH");

     lda = 2*(n[1]/2 + 1);

     if (sizeof(double) == sizeof(FFTW_REAL)) {
	  FFTW_REAL *dzfft2di( int n1, int n2, FFTW_REAL *save);
	  int dzfft2d(int sign, int n1, int n2,
		      FFTW_REAL *array, int la1,
		      FFTW_REAL *save);
	  if (N != 0) dzfft2dui(n[1], n[0], work);
	  DO_RBENCHMARK_ND(rank, n, N, arr, arr,
			   dzfft2du(-1, n[1], n[0], 
				   arr, lda, work),
			   zdfft2du(+1, n[1], n[0], 
				   arr, lda, work),
			  1.0/N,
			  compute_accuracy);
     } else {
	  FFTW_REAL *scfft2di( int n1, int n2, FFTW_REAL *save);
	  int scfft2d(int sign, int n1, int n2,
		      FFTW_REAL *array, int la1,
		      FFTW_REAL *save);
	  int csfft2d(int sign, int n1, int n2,
		      FFTW_REAL *array, int la1,
		      FFTW_REAL *save);
	  if (N != 0) scfft2dui(n[1], n[0], work);
	  DO_RBENCHMARK_ND(rank, n, N, arr, arr,
			   scfft2du(-1, n[1], n[0], 
				   arr, lda, work),
			   csfft2du(+1, n[1], n[0], 
				   arr, lda, work),
			  1.0/N,
			  compute_accuracy);
     }
#endif
}

