/* gpfft3.f -- translated by f2c (version 19941113).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

typedef int integer;
#include <fftw.h>
typedef FFTW_REAL real;
typedef double doublereal;


/* Table of constant values */

static integer c__2 = 2;

#define MIN(a,b) ((a) < (b) ? (a) : (b))


/* GPF3D The basic 3D complex FFT. */


/* Arguments */

/* COMPLEX C(ID,NN(2),NN(3)) */
/* ID first dimension of data array (stored in Fortran order). */
/* NN(3) Dimensions of 3D complex data. */
/* IS Forward (+1)/Reverse (-1) */

/* Modified by Steven G. Johnson, 1997, to check for error conditions. 
   (returns 1 on error, 0 otherwise.)
   Also modified to handle arrays with dimensions up to 256. */

int gpf3d(real *c, integer id, integer *nn, integer is)
{
    /* Initialized data */

    static integer onn[3] = { 0,0,0 };
    static integer nthreads = 1;

    /* System generated locals */
    integer i__1, i__2, i__3, i__4, i__5, i__6;

    /* Local variables */
    extern /* Subroutine */ int gpfa(real *, real *, real *, integer, 
	    integer, integer, integer, integer);
    static integer i;
    static real trigs[1536]	/* was [512][3] */;
    static integer iofset, lot;
    extern /* Subroutine */ int setgpfa(real *, integer);

    /* Parameter adjustments */
    --nn;
    --c;

    /* Function Body */
    for (i = 1; i <= 3; ++i) {
	if (onn[i - 1] != nn[i]) {
	    onn[i - 1] = nn[i];
	    if (nn[i] > 256)
	    	return 1;
	    if (setgpfa(&trigs[(i << 9) - 512], nn[i]) != 0)
	    	return 1;
	}
    }

/*  The following three loops are prime candidates for parallelism */
/*  as they call independent multi-1D ffts.  Just set the variable */
/*  "nthreads" to the number of processors to use. */

    lot = (nn[2] * nn[3] + nthreads - 1) / nthreads;
    i__1 = nthreads - 1;
    for (i = 0; i <= i__1; ++i) {
	iofset = (id << 1) * lot * i;
	i__2 = id << 1;
/* Computing MIN */
	i__4 = lot, i__5 = nn[2] * nn[3] - i * lot;
	i__3 = MIN(i__4,i__5);
	i__6 = -(is);
	gpfa(&c[iofset + 1], &c[iofset + 2], trigs, c__2, i__2, nn[1], 
		i__3, i__6);
    }
    i__1 = nn[3] - 1;
    for (i = 0; i <= i__1; ++i) {
	iofset = (id << 1) * nn[2] * i;
	i__2 = id << 1;
	i__3 = -(is);
	gpfa(&c[iofset + 1], &c[iofset + 2], &trigs[512], i__2, c__2, nn[
		2], nn[1], i__3);
    }
    lot = (id * nn[2] + nthreads - 1) / nthreads;
    i__1 = nthreads - 1;
    for (i = 0; i <= i__1; ++i) {
	iofset = (lot << 1) * i;
	i__2 = (id << 1) * nn[2];
/* Computing MIN */
	i__4 = lot, i__5 = id * nn[2] - i * lot;
	i__3 = MIN(i__4,i__5);
	i__6 = -(is);
	gpfa(&c[iofset + 1], &c[iofset + 2], &trigs[1024], i__2, c__2, nn[
		3], i__3, i__6);
    }
    return 0;
} /* gpf3d_ */

/* Subroutine */ int expand(real *c, integer *nl, integer *nmn, integer *is)
{
    /* System generated locals */
    integer i__1, i__2;

    /* Local variables */
    static integer ii, io;


/*  Stretch data stored in an array with a leading dimension of */
/*  nl=2**n into storage with a leading dimension of 2**n + 1. */
/*  This prevents memory-bank conflicts and therefore dramatically */
/*  improves performance on (vector) machines with interleaved memory. */
/*  Arrays are complex, so inital data of size 2*nl*nm*nn is increased */
/*  to 2*(nl+1)*nm*nn words.  Ensure array is dimensioned big enough */
/*  to take it! */
/*  In-place stretch can not be done in parallel as order of data move */
/*  is critical.  Vectorization does preserve order, if stride of -1 */
/*  really does shift data top first.  May not be true for all vector */
/*  machines, so beware. */
    if (*is > 0) {
	for (io = (*nmn << 1) - 2; io >= 2; io += -2) {
	    for (ii = (*nl << 1) - 1; ii >= 0; --ii) {
		c[ii + io * *nl + io] = c[ii + io * *nl];
	    }
	}
    } else {
	i__1 = (*nmn << 1) - 2;
	for (io = 2; io <= i__1; io += 2) {
	    i__2 = (*nl << 1) - 1;
	    for (ii = 0; ii <= i__2; ++ii) {
		c[ii + io * *nl] = c[ii + io * *nl + io];
	    }
	}
    }
    return 0;
} /* expand_ */

