/// \ingroup newmat ///@{ /// \file newfft.cpp /// Fast Fourier transform using Sande and Gentleman method. // This is originally by Sande and Gentleman in 1967! I have translated from // Fortran into C and a little bit of C++. // It takes about twice as long as fftw // (http://theory.lcs.mit.edu/~fftw/homepage.html) // but is much shorter than fftw and so despite its age // might represent a reasonable // compromise between speed and complexity. // If you really need the speed get fftw. // THIS SUBROUTINE WAS WRITTEN BY G.SANDE OF PRINCETON UNIVERSITY AND // W.M.GENTLMAN OF THE BELL TELEPHONE LAB. IT WAS BROUGHT TO LONDON // BY DR. M.D. GODFREY AT THE IMPERIAL COLLEGE AND WAS ADAPTED FOR // BURROUGHS 6700 BY D. R. BRILLINGER AND J. PEMBERTON // IT REPRESENTS THE STATE OF THE ART OF COMPUTING COMPLETE FINITE // DISCRETE FOURIER TRANSFORMS AS OF NOV.1967. // OTHER PROGRAMS REQUIRED. // ONLY THOSE SUBROUTINES INCLUDED HERE. // USAGE. // CALL AR1DFT(N,X,Y) // WHERE N IS THE NUMBER OF POINTS IN THE SEQUENCE . // X - IS A ONE-DIMENSIONAL ARRAY CONTAINING THE REAL // PART OF THE SEQUENCE. // Y - IS A ONE-DIMENSIONAL ARRAY CONTAINING THE // IMAGINARY PART OF THE SEQUENCE. // THE TRANSFORM IS RETURNED IN X AND Y. // METHOD // FOR A GENERAL DISCUSSION OF THESE TRANSFORMS AND OF // THE FAST METHOD FOR COMPUTING THEM, SEE GENTLEMAN AND SANDE, // @FAST FOURIER TRANSFORMS - FOR FUN AND PROFIT,@ 1966 FALL JOINT // COMPUTER CONFERENCE. // THIS PROGRAM COMPUTES THIS FOR A COMPLEX SEQUENCE Z(T) OF LENGTH // N WHOSE ELEMENTS ARE STORED AT(X(I) , Y(I)) AND RETURNS THE // TRANSFORM COEFFICIENTS AT (X(I), Y(I)). // DESCRIPTION // AR1DFT IS A HIGHLY MODULAR ROUTINE CAPABLE OF COMPUTING IN PLACE // THE COMPLETE FINITE DISCRETE FOURIER TRANSFORM OF A ONE- // DIMENSIONAL SEQUENCE OF RATHER GENERAL LENGTH N. // THE MAIN ROUTINE , AR1DFT ITSELF, FACTORS N. IT THEN CALLS ON // ON GR 1D FT TO COMPUTE THE ACTUAL TRANSFORMS, USING THESE FACTORS. // THIS GR 1D FT DOES, CALLING AT EACH STAGE ON THE APPROPRIATE KERN // EL R2FTK, R4FTK, R8FTK, R16FTK, R3FTK, R5FTK, OR RPFTK TO PERFORM // THE COMPUTATIONS FOR THIS PASS OVER THE SEQUENCE, DEPENDING ON // WHETHER THE CORRESPONDING FACTOR IS 2, 4, 8, 16, 3, 5, OR SOME // MORE GENERAL PRIME P. WHEN GR1DFT IS FINISHED THE TRANSFORM IS // COMPUTED, HOWEVER, THE RESULTS ARE STORED IN "DIGITS REVERSED" // ORDER. AR1DFT THEREFORE, CALLS UPON GR 1S FS TO SORT THEM OUT. // TO RETURN TO THE FACTORIZATION, SINGLETON HAS POINTED OUT THAT // THE TRANSFORMS ARE MORE EFFICIENT IF THE SAMPLE SIZE N, IS OF THE // FORM B*A**2 AND B CONSISTS OF A SINGLE FACTOR. IN SUCH A CASE // IF WE PROCESS THE FACTORS IN THE ORDER ABA THEN // THE REORDERING CAN BE DONE AS FAST IN PLACE, AS WITH SCRATCH // STORAGE. BUT AS B BECOMES MORE COMPLICATED, THE COST OF THE DIGIT // REVERSING DUE TO B PART BECOMES VERY EXPENSIVE IF WE TRY TO DO IT // IN PLACE. IN SUCH A CASE IT MIGHT BE BETTER TO USE EXTRA STORAGE // A ROUTINE TO DO THIS IS, HOWEVER, NOT INCLUDED HERE. // ANOTHER FEATURE INFLUENCING THE FACTORIZATION IS THAT FOR ANY FIXED // FACTOR N WE CAN PREPARE A SPECIAL KERNEL WHICH WILL COMPUTE // THAT STAGE OF THE TRANSFORM MORE EFFICIENTLY THAN WOULD A KERNEL // FOR GENERAL FACTORS, ESPECIALLY IF THE GENERAL KERNEL HAD TO BE // APPLIED SEVERAL TIMES. FOR EXAMPLE, FACTORS OF 4 ARE MORE // EFFICIENT THAN FACTORS OF 2, FACTORS OF 8 MORE EFFICIENT THAN 4,ETC // ON THE OTHER HAND DIMINISHING RETURNS RAPIDLY SET IN, ESPECIALLY // SINCE THE LENGTH OF THE KERNEL FOR A SPECIAL CASE IS ROUGHLY // PROPORTIONAL TO THE FACTOR IT DEALS WITH. HENCE THESE PROBABLY ARE // ALL THE KERNELS WE WISH TO HAVE. // RESTRICTIONS. // AN UNFORTUNATE FEATURE OF THE SORTING PROBLEM IS THAT THE MOST // EFFICIENT WAY TO DO IT IS WITH NESTED DO LOOPS, ONE FOR EACH // FACTOR. THIS PUTS A RESTRICTION ON N AS TO HOW MANY FACTORS IT // CAN HAVE. CURRENTLY THE LIMIT IS 16, BUT THE LIMIT CAN BE READILY // RAISED IF NECESSARY. // A SECOND RESTRICTION OF THE PROGRAM IS THAT LOCAL STORAGE OF THE // THE ORDER P**2 IS REQUIRED BY THE GENERAL KERNEL RPFTK, SO SOME // LIMIT MUST BE SET ON P. CURRENTLY THIS IS 19, BUT IT CAN BE INCRE // INCREASED BY TRIVIAL CHANGES. // OTHER COMMENTS. //(1) THE ROUTINE IS ADAPTED TO CHECK WHETHER A GIVEN N WILL MEET THE // ABOVE FACTORING REQUIREMENTS AN, IF NOT, TO RETURN THE NEXT HIGHER // NUMBER, NX, SAY, WHICH WILL MEET THESE REQUIREMENTS. // THIS CAN BE ACCHIEVED BY A STATEMENT OF THE FORM // CALL FACTR(N,X,Y). // IF A DIFFERENT N, SAY NX, IS RETURNED THEN THE TRANSFORMS COULD BE // OBTAINED BY EXTENDING THE SIZE OF THE X-ARRAY AND Y-ARRAY TO NX, // AND SETTING X(I) = Y(I) = 0., FOR I = N+1, NX. //(2) IF THE SEQUENCE Z IS ONLY A REAL SEQUENCE, THEN THE IMAGINARY PART // Y(I)=0., THIS WILL RETURN THE COSINE TRANSFORM OF THE REAL SEQUENCE // IN X, AND THE SINE TRANSFORM IN Y. #define WANT_STREAM #define WANT_MATH #include "newmatap.h" #ifdef use_namespace namespace NEWMAT { #endif #ifdef DO_REPORT #define REPORT { static ExeCounter ExeCount(__LINE__,20); ++ExeCount; } #else #define REPORT {} #endif inline Real square(Real x) { return x*x; } inline int square(int x) { return x*x; } static void GR_1D_FS (int PTS, int N_SYM, int N_UN_SYM, const SimpleIntArray& SYM, int P_SYM, const SimpleIntArray& UN_SYM, Real* X, Real* Y); static void GR_1D_FT (int N, int N_FACTOR, const SimpleIntArray& FACTOR, Real* X, Real* Y); static void R_P_FTK (int N, int M, int P, Real* X, Real* Y); static void R_2_FTK (int N, int M, Real* X0, Real* Y0, Real* X1, Real* Y1); static void R_3_FTK (int N, int M, Real* X0, Real* Y0, Real* X1, Real* Y1, Real* X2, Real* Y2); static void R_4_FTK (int N, int M, Real* X0, Real* Y0, Real* X1, Real* Y1, Real* X2, Real* Y2, Real* X3, Real* Y3); static void R_5_FTK (int N, int M, Real* X0, Real* Y0, Real* X1, Real* Y1, Real* X2, Real* Y2, Real* X3, Real* Y3, Real* X4, Real* Y4); static void R_8_FTK (int N, int M, Real* X0, Real* Y0, Real* X1, Real* Y1, Real* X2, Real* Y2, Real* X3, Real* Y3, Real* X4, Real* Y4, Real* X5, Real* Y5, Real* X6, Real* Y6, Real* X7, Real* Y7); static void R_16_FTK (int N, int M, Real* X0, Real* Y0, Real* X1, Real* Y1, Real* X2, Real* Y2, Real* X3, Real* Y3, Real* X4, Real* Y4, Real* X5, Real* Y5, Real* X6, Real* Y6, Real* X7, Real* Y7, Real* X8, Real* Y8, Real* X9, Real* Y9, Real* X10, Real* Y10, Real* X11, Real* Y11, Real* X12, Real* Y12, Real* X13, Real* Y13, Real* X14, Real* Y14, Real* X15, Real* Y15); static int BitReverse(int x, int prod, int n, const SimpleIntArray& f); bool FFT_Controller::ar_1d_ft (int PTS, Real* X, Real *Y) { // ARBITRARY RADIX ONE DIMENSIONAL FOURIER TRANSFORM REPORT int F,J,N,NF,P,PMAX,P_SYM,P_TWO,Q,R,TWO_GRP; // NP is maximum number of squared factors allows PTS up to 2**32 at least // NQ is number of not-squared factors - increase if we increase PMAX const int NP = 16, NQ = 10; SimpleIntArray PP(NP), QQ(NQ); TWO_GRP=16; PMAX=19; // PMAX is the maximum factor size // TWO_GRP is the maximum power of 2 handled as a single factor // Doesn't take advantage of combining powers of 2 when calculating // number of factors if (PTS<=1) return true; N=PTS; P_SYM=1; F=2; P=0; Q=0; // P counts the number of squared factors // Q counts the number of the rest // R = 0 for no non-squared factors; 1 otherwise // FACTOR holds all the factors - non-squared ones in the middle // - length is 2*P+Q // SYM also holds all the factors but with the non-squared ones // multiplied together - length is 2*P+R // PP holds the values of the squared factors - length is P // QQ holds the values of the rest - length is Q // P_SYM holds the product of the squared factors // find the factors - load into PP and QQ while (N > 1) { bool fail = true; for (J=F; J<=PMAX; J++) if (N % J == 0) { fail = false; F=J; break; } if (fail || P >= NP || Q >= NQ) return false; // can't factor N /= F; if (N % F != 0) QQ[Q++] = F; else { N /= F; PP[P++] = F; P_SYM *= F; } } R = (Q == 0) ? 0 : 1; // R = 0 if no not-squared factors, 1 otherwise NF = 2*P + Q; SimpleIntArray FACTOR(NF + 1), SYM(2*P + R); FACTOR[NF] = 0; // we need this in the "combine powers of 2" // load into SYM and FACTOR for (J=0; J0) { REPORT for (J=0; J 0) { REPORT SimpleIntArray U(N_SYM); for(MultiRadixCounter MRC(N_SYM, SYM, U); !MRC.Finish(); ++MRC) { if (MRC.Swap()) { int P = MRC.Reverse(); int JJ = MRC.Counter(); Real T; T=X[JJ]; X[JJ]=X[P]; X[P]=T; T=Y[JJ]; Y[JJ]=Y[P]; Y[P]=T; } } } int J,JL,K,L,M,MS; // UN_SYM contains the non-squared factors // I have replaced the Sande-Gentleman code as it runs into // integer overflow problems // My code (and theirs) would be improved by using a bit array // as suggested by Van Loan if (N_UN_SYM==0) { REPORT return; } P_UN_SYM=PTS/square(P_SYM); JL=(P_UN_SYM-3)*P_SYM; MS=P_UN_SYM*P_SYM; for (J = P_SYM; J<=JL; J+=P_SYM) { K=J; do K = P_SYM * BitReverse(K / P_SYM, P_UN_SYM, N_UN_SYM, UN_SYM); while (K 1) { bool fail = true; for (int J = F; J <= PMAX; J++) if (N % J == 0) { fail = false; F=J; break; } if (fail || P >= NP || Q >= NQ) { REPORT return false; } N /= F; if (N % F != 0) Q++; else { N /= F; P++; } } return true; // can factorise } bool FFT_Controller::OnlyOldFFT; // static variable // **************************** multi radix counter ********************** MultiRadixCounter::MultiRadixCounter(int nx, const SimpleIntArray& rx, SimpleIntArray& vx) : Radix(rx), Value(vx), n(nx), reverse(0), product(1), counter(0), finish(false) { REPORT for (int k = 0; k < n; k++) { Value[k] = 0; product *= Radix[k]; } } void MultiRadixCounter::operator++() { REPORT counter++; int p = product; for (int k = 0; k < n; k++) { Value[k]++; int p1 = p / Radix[k]; reverse += p1; if (Value[k] == Radix[k]) { REPORT Value[k] = 0; reverse -= p; p = p1; } else { REPORT return; } } finish = true; } static int BitReverse(int x, int prod, int n, const SimpleIntArray& f) { // x = c[0]+f[0]*(c[1]+f[1]*(c[2]+... // return c[n-1]+f[n-1]*(c[n-2]+f[n-2]*(c[n-3]+... // prod is the product of the f[i] // n is the number of f[i] (don't assume f has the correct length) REPORT const int* d = f.Data() + n; int sum = 0; int q = 1; while (n--) { prod /= *(--d); int c = x / prod; x-= c * prod; sum += q * c; q *= *d; } return sum; } #ifdef use_namespace } #endif ///@}