1/*
2 * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
3 * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
4 *
5 * SPDX-License-Identifier: BSD-3-Clause
6 * See COPYING file for more information.
7 */
8
9
10#include "_kiss_fft_guts.h"
11/* The guts header contains all the multiplication and addition macros that are defined for
12 fixed or floating point complex numbers. It also delares the kf_ internal functions.
13 */
14
15static void kf_bfly2(
16 kiss_fft_cpx * Fout,
17 const size_t fstride,
18 const kiss_fft_cfg st,
19 int m
20 )
21{
22 kiss_fft_cpx * Fout2;
23 kiss_fft_cpx * tw1 = st->twiddles;
24 kiss_fft_cpx t;
25 Fout2 = Fout + m;
26 do{
27 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
28
29 C_MUL (t, *Fout2 , *tw1);
30 tw1 += fstride;
31 C_SUB( *Fout2 , *Fout , t );
32 C_ADDTO( *Fout , t );
33 ++Fout2;
34 ++Fout;
35 }while (--m);
36}
37
38static void kf_bfly4(
39 kiss_fft_cpx * Fout,
40 const size_t fstride,
41 const kiss_fft_cfg st,
42 const size_t m
43 )
44{
45 kiss_fft_cpx *tw1,*tw2,*tw3;
46 kiss_fft_cpx scratch[6];
47 size_t k=m;
48 const size_t m2=2*m;
49 const size_t m3=3*m;
50
51
52 tw3 = tw2 = tw1 = st->twiddles;
53
54 do {
55 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
56
57 C_MUL(scratch[0],Fout[m] , *tw1 );
58 C_MUL(scratch[1],Fout[m2] , *tw2 );
59 C_MUL(scratch[2],Fout[m3] , *tw3 );
60
61 C_SUB( scratch[5] , *Fout, scratch[1] );
62 C_ADDTO(*Fout, scratch[1]);
63 C_ADD( scratch[3] , scratch[0] , scratch[2] );
64 C_SUB( scratch[4] , scratch[0] , scratch[2] );
65 C_SUB( Fout[m2], *Fout, scratch[3] );
66 tw1 += fstride;
67 tw2 += fstride*2;
68 tw3 += fstride*3;
69 C_ADDTO( *Fout , scratch[3] );
70
71 if(st->inverse) {
72 Fout[m].r = scratch[5].r - scratch[4].i;
73 Fout[m].i = scratch[5].i + scratch[4].r;
74 Fout[m3].r = scratch[5].r + scratch[4].i;
75 Fout[m3].i = scratch[5].i - scratch[4].r;
76 }else{
77 Fout[m].r = scratch[5].r + scratch[4].i;
78 Fout[m].i = scratch[5].i - scratch[4].r;
79 Fout[m3].r = scratch[5].r - scratch[4].i;
80 Fout[m3].i = scratch[5].i + scratch[4].r;
81 }
82 ++Fout;
83 }while(--k);
84}
85
86static void kf_bfly3(
87 kiss_fft_cpx * Fout,
88 const size_t fstride,
89 const kiss_fft_cfg st,
90 size_t m
91 )
92{
93 size_t k=m;
94 const size_t m2 = 2*m;
95 kiss_fft_cpx *tw1,*tw2;
96 kiss_fft_cpx scratch[5];
97 kiss_fft_cpx epi3;
98 epi3 = st->twiddles[fstride*m];
99
100 tw1=tw2=st->twiddles;
101
102 do{
103 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
104
105 C_MUL(scratch[1],Fout[m] , *tw1);
106 C_MUL(scratch[2],Fout[m2] , *tw2);
107
108 C_ADD(scratch[3],scratch[1],scratch[2]);
109 C_SUB(scratch[0],scratch[1],scratch[2]);
110 tw1 += fstride;
111 tw2 += fstride*2;
112
113 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
114 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
115
116 C_MULBYSCALAR( scratch[0] , epi3.i );
117
118 C_ADDTO(*Fout,scratch[3]);
119
120 Fout[m2].r = Fout[m].r + scratch[0].i;
121 Fout[m2].i = Fout[m].i - scratch[0].r;
122
123 Fout[m].r -= scratch[0].i;
124 Fout[m].i += scratch[0].r;
125
126 ++Fout;
127 }while(--k);
128}
129
130static void kf_bfly5(
131 kiss_fft_cpx * Fout,
132 const size_t fstride,
133 const kiss_fft_cfg st,
134 int m
135 )
136{
137 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
138 int u;
139 kiss_fft_cpx scratch[13];
140 kiss_fft_cpx * twiddles = st->twiddles;
141 kiss_fft_cpx *tw;
142 kiss_fft_cpx ya,yb;
143 ya = twiddles[fstride*m];
144 yb = twiddles[fstride*2*m];
145
146 Fout0=Fout;
147 Fout1=Fout0+m;
148 Fout2=Fout0+2*m;
149 Fout3=Fout0+3*m;
150 Fout4=Fout0+4*m;
151
152 tw=st->twiddles;
153 for ( u=0; u<m; ++u ) {
154 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
155 scratch[0] = *Fout0;
156
157 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
158 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
159 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
160 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
161
162 C_ADD( scratch[7],scratch[1],scratch[4]);
163 C_SUB( scratch[10],scratch[1],scratch[4]);
164 C_ADD( scratch[8],scratch[2],scratch[3]);
165 C_SUB( scratch[9],scratch[2],scratch[3]);
166
167 Fout0->r += scratch[7].r + scratch[8].r;
168 Fout0->i += scratch[7].i + scratch[8].i;
169
170 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
171 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
172
173 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
174 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
175
176 C_SUB(*Fout1,scratch[5],scratch[6]);
177 C_ADD(*Fout4,scratch[5],scratch[6]);
178
179 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
180 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
181 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
182 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
183
184 C_ADD(*Fout2,scratch[11],scratch[12]);
185 C_SUB(*Fout3,scratch[11],scratch[12]);
186
187 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
188 }
189}
190
191/* perform the butterfly for one stage of a mixed radix FFT */
192static void kf_bfly_generic(
193 kiss_fft_cpx * Fout,
194 const size_t fstride,
195 const kiss_fft_cfg st,
196 int m,
197 int p
198 )
199{
200 int u,k,q1,q;
201 kiss_fft_cpx * twiddles = st->twiddles;
202 kiss_fft_cpx t;
203 int Norig = st->nfft;
204
205 kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
206 if (scratch == NULL){
207 KISS_FFT_ERROR("Memory allocation failed.");
208 return;
209 }
210
211 for ( u=0; u<m; ++u ) {
212 k=u;
213 for ( q1=0 ; q1<p ; ++q1 ) {
214 scratch[q1] = Fout[ k ];
215 C_FIXDIV(scratch[q1],p);
216 k += m;
217 }
218
219 k=u;
220 for ( q1=0 ; q1<p ; ++q1 ) {
221 int twidx=0;
222 Fout[ k ] = scratch[0];
223 for (q=1;q<p;++q ) {
224 twidx += fstride * k;
225 if (twidx>=Norig) twidx-=Norig;
226 C_MUL(t,scratch[q] , twiddles[twidx] );
227 C_ADDTO( Fout[ k ] ,t);
228 }
229 k += m;
230 }
231 }
232 KISS_FFT_TMP_FREE(scratch);
233}
234
235static
236void kf_work(
237 kiss_fft_cpx * Fout,
238 const kiss_fft_cpx * f,
239 const size_t fstride,
240 int in_stride,
241 int * factors,
242 const kiss_fft_cfg st
243 )
244{
245 kiss_fft_cpx * Fout_beg=Fout;
246 const int p=*factors++; /* the radix */
247 const int m=*factors++; /* stage's fft length/p */
248 const kiss_fft_cpx * Fout_end = Fout + p*m;
249
250#ifdef _OPENMP
251 // use openmp extensions at the
252 // top-level (not recursive)
253 if (fstride==1 && p<=5 && m!=1)
254 {
255 int k;
256
257 // execute the p different work units in different threads
258# pragma omp parallel for
259 for (k=0;k<p;++k)
260 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
261 // all threads have joined by this point
262
263 switch (p) {
264 case 2: kf_bfly2(Fout,fstride,st,m); break;
265 case 3: kf_bfly3(Fout,fstride,st,m); break;
266 case 4: kf_bfly4(Fout,fstride,st,m); break;
267 case 5: kf_bfly5(Fout,fstride,st,m); break;
268 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
269 }
270 return;
271 }
272#endif
273
274 if (m==1) {
275 do{
276 *Fout = *f;
277 f += fstride*in_stride;
278 }while(++Fout != Fout_end );
279 }else{
280 do{
281 // recursive call:
282 // DFT of size m*p performed by doing
283 // p instances of smaller DFTs of size m,
284 // each one takes a decimated version of the input
285 kf_work( Fout , f, fstride*p, in_stride, factors,st);
286 f += fstride*in_stride;
287 }while( (Fout += m) != Fout_end );
288 }
289
290 Fout=Fout_beg;
291
292 // recombine the p smaller DFTs
293 switch (p) {
294 case 2: kf_bfly2(Fout,fstride,st,m); break;
295 case 3: kf_bfly3(Fout,fstride,st,m); break;
296 case 4: kf_bfly4(Fout,fstride,st,m); break;
297 case 5: kf_bfly5(Fout,fstride,st,m); break;
298 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
299 }
300}
301
302/* facbuf is populated by p1,m1,p2,m2, ...
303 where
304 p[i] * m[i] = m[i-1]
305 m0 = n */
306static
307void kf_factor(int n,int * facbuf)
308{
309 int p=4;
310 double floor_sqrt;
311 floor_sqrt = floor( sqrt((double)n) );
312
313 /*factor out powers of 4, powers of 2, then any remaining primes */
314 do {
315 while (n % p) {
316 switch (p) {
317 case 4: p = 2; break;
318 case 2: p = 3; break;
319 default: p += 2; break;
320 }
321 if (p > floor_sqrt)
322 p = n; /* no more factors, skip to end */
323 }
324 n /= p;
325 *facbuf++ = p;
326 *facbuf++ = n;
327 } while (n > 1);
328}
329
330/*
331 *
332 * User-callable function to allocate all necessary storage space for the fft.
333 *
334 * The return value is a contiguous block of memory, allocated with malloc. As such,
335 * It can be freed with free(), rather than a kiss_fft-specific function.
336 * */
337kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
338{
339 KISS_FFT_ALIGN_CHECK(mem)
340
341 kiss_fft_cfg st=NULL;
342 size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fft_state)
343 + sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/
344
345 if ( lenmem==NULL ) {
346 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
347 }else{
348 if (mem != NULL && *lenmem >= memneeded)
349 st = (kiss_fft_cfg)mem;
350 *lenmem = memneeded;
351 }
352 if (st) {
353 int i;
354 st->nfft=nfft;
355 st->inverse = inverse_fft;
356
357 for (i=0;i<nfft;++i) {
358 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
359 double phase = -2*pi*i / nfft;
360 if (st->inverse)
361 phase *= -1;
362 kf_cexp(st->twiddles+i, phase );
363 }
364
365 kf_factor(nfft,st->factors);
366 }
367 return st;
368}
369
370
371void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
372{
373 if (fin == fout) {
374 //NOTE: this is not really an in-place FFT algorithm.
375 //It just performs an out-of-place FFT into a temp buffer
376 if (fout == NULL){
377 KISS_FFT_ERROR("fout buffer NULL.");
378 return;
379 }
380
381 kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
382 if (tmpbuf == NULL){
383 KISS_FFT_ERROR("Memory allocation error.");
384 return;
385 }
386
387
388
389 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
390 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
391 KISS_FFT_TMP_FREE(tmpbuf);
392 }else{
393 kf_work( fout, fin, 1,in_stride, st->factors,st );
394 }
395}
396
397void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
398{
399 kiss_fft_stride(cfg,fin,fout,1);
400}
401
402
403void kiss_fft_cleanup(void)
404{
405 // nothing needed any more
406}
407
408int kiss_fft_next_fast_size(int n)
409{
410 while(1) {
411 int m=n;
412 while ( (m%2) == 0 ) m/=2;
413 while ( (m%3) == 0 ) m/=3;
414 while ( (m%5) == 0 ) m/=5;
415 if (m<=1)
416 break; /* n is completely factorable by twos, threes, and fives */
417 n++;
418 }
419 return n;
420}
421