1/*
2Fast Fourier/Cosine/Sine Transform
3 dimension :two
4 data length :power of 2
5 decimation :frequency
6 radix :split-radix, row-column
7 data :inplace
8 table :use
9functions
10 cdft2d: Complex Discrete Fourier Transform
11 rdft2d: Real Discrete Fourier Transform
12 ddct2d: Discrete Cosine Transform
13 ddst2d: Discrete Sine Transform
14function prototypes
15 void cdft2d(int, int, int, double **, double *, int *, double *);
16 void rdft2d(int, int, int, double **, double *, int *, double *);
17 void rdft2dsort(int, int, int, double **);
18 void ddct2d(int, int, int, double **, double *, int *, double *);
19 void ddst2d(int, int, int, double **, double *, int *, double *);
20necessary package
21 fftsg.c : 1D-FFT package
22macro definitions
23 USE_FFT2D_PTHREADS : default=not defined
24 FFT2D_MAX_THREADS : must be 2^N, default=4
25 FFT2D_THREADS_BEGIN_N : default=65536
26 USE_FFT2D_WINTHREADS : default=not defined
27 FFT2D_MAX_THREADS : must be 2^N, default=4
28 FFT2D_THREADS_BEGIN_N : default=131072
29
30
31-------- Complex DFT (Discrete Fourier Transform) --------
32 [definition]
33 <case1>
34 X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] *
35 exp(2*pi*i*j1*k1/n1) *
36 exp(2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2
37 <case2>
38 X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] *
39 exp(-2*pi*i*j1*k1/n1) *
40 exp(-2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2
41 (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
42 [usage]
43 <case1>
44 ip[0] = 0; // first time only
45 cdft2d(n1, 2*n2, 1, a, t, ip, w);
46 <case2>
47 ip[0] = 0; // first time only
48 cdft2d(n1, 2*n2, -1, a, t, ip, w);
49 [parameters]
50 n1 :data length (int)
51 n1 >= 1, n1 = power of 2
52 2*n2 :data length (int)
53 n2 >= 1, n2 = power of 2
54 a[0...n1-1][0...2*n2-1]
55 :input/output data (double **)
56 input data
57 a[j1][2*j2] = Re(x[j1][j2]),
58 a[j1][2*j2+1] = Im(x[j1][j2]),
59 0<=j1<n1, 0<=j2<n2
60 output data
61 a[k1][2*k2] = Re(X[k1][k2]),
62 a[k1][2*k2+1] = Im(X[k1][k2]),
63 0<=k1<n1, 0<=k2<n2
64 t[0...*]
65 :work area (double *)
66 length of t >= 8*n1, if single thread,
67 length of t >= 8*n1*FFT2D_MAX_THREADS, if multi threads,
68 t is dynamically allocated, if t == NULL.
69 ip[0...*]
70 :work area for bit reversal (int *)
71 length of ip >= 2+sqrt(n)
72 (n = max(n1, n2))
73 ip[0],ip[1] are pointers of the cos/sin table.
74 w[0...*]
75 :cos/sin table (double *)
76 length of w >= max(n1/2, n2/2)
77 w[],ip[] are initialized if ip[0] == 0.
78 [remark]
79 Inverse of
80 cdft2d(n1, 2*n2, -1, a, t, ip, w);
81 is
82 cdft2d(n1, 2*n2, 1, a, t, ip, w);
83 for (j1 = 0; j1 <= n1 - 1; j1++) {
84 for (j2 = 0; j2 <= 2 * n2 - 1; j2++) {
85 a[j1][j2] *= 1.0 / n1 / n2;
86 }
87 }
88 .
89
90
91-------- Real DFT / Inverse of Real DFT --------
92 [definition]
93 <case1> RDFT
94 R[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] *
95 cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2),
96 0<=k1<n1, 0<=k2<n2
97 I[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] *
98 sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2),
99 0<=k1<n1, 0<=k2<n2
100 <case2> IRDFT (excluding scale)
101 a[k1][k2] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1
102 (R[j1][j2] *
103 cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) +
104 I[j1][j2] *
105 sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)),
106 0<=k1<n1, 0<=k2<n2
107 (notes: R[n1-k1][n2-k2] = R[k1][k2],
108 I[n1-k1][n2-k2] = -I[k1][k2],
109 R[n1-k1][0] = R[k1][0],
110 I[n1-k1][0] = -I[k1][0],
111 R[0][n2-k2] = R[0][k2],
112 I[0][n2-k2] = -I[0][k2],
113 0<k1<n1, 0<k2<n2)
114 [usage]
115 <case1>
116 ip[0] = 0; // first time only
117 rdft2d(n1, n2, 1, a, t, ip, w);
118 <case2>
119 ip[0] = 0; // first time only
120 rdft2d(n1, n2, -1, a, t, ip, w);
121 [parameters]
122 n1 :data length (int)
123 n1 >= 2, n1 = power of 2
124 n2 :data length (int)
125 n2 >= 2, n2 = power of 2
126 a[0...n1-1][0...n2-1]
127 :input/output data (double **)
128 <case1>
129 output data
130 a[k1][2*k2] = R[k1][k2] = R[n1-k1][n2-k2],
131 a[k1][2*k2+1] = I[k1][k2] = -I[n1-k1][n2-k2],
132 0<k1<n1, 0<k2<n2/2,
133 a[0][2*k2] = R[0][k2] = R[0][n2-k2],
134 a[0][2*k2+1] = I[0][k2] = -I[0][n2-k2],
135 0<k2<n2/2,
136 a[k1][0] = R[k1][0] = R[n1-k1][0],
137 a[k1][1] = I[k1][0] = -I[n1-k1][0],
138 a[n1-k1][1] = R[k1][n2/2] = R[n1-k1][n2/2],
139 a[n1-k1][0] = -I[k1][n2/2] = I[n1-k1][n2/2],
140 0<k1<n1/2,
141 a[0][0] = R[0][0],
142 a[0][1] = R[0][n2/2],
143 a[n1/2][0] = R[n1/2][0],
144 a[n1/2][1] = R[n1/2][n2/2]
145 <case2>
146 input data
147 a[j1][2*j2] = R[j1][j2] = R[n1-j1][n2-j2],
148 a[j1][2*j2+1] = I[j1][j2] = -I[n1-j1][n2-j2],
149 0<j1<n1, 0<j2<n2/2,
150 a[0][2*j2] = R[0][j2] = R[0][n2-j2],
151 a[0][2*j2+1] = I[0][j2] = -I[0][n2-j2],
152 0<j2<n2/2,
153 a[j1][0] = R[j1][0] = R[n1-j1][0],
154 a[j1][1] = I[j1][0] = -I[n1-j1][0],
155 a[n1-j1][1] = R[j1][n2/2] = R[n1-j1][n2/2],
156 a[n1-j1][0] = -I[j1][n2/2] = I[n1-j1][n2/2],
157 0<j1<n1/2,
158 a[0][0] = R[0][0],
159 a[0][1] = R[0][n2/2],
160 a[n1/2][0] = R[n1/2][0],
161 a[n1/2][1] = R[n1/2][n2/2]
162 ---- output ordering ----
163 rdft2d(n1, n2, 1, a, t, ip, w);
164 rdft2dsort(n1, n2, 1, a);
165 // stored data is a[0...n1-1][0...n2+1]:
166 // a[k1][2*k2] = R[k1][k2],
167 // a[k1][2*k2+1] = I[k1][k2],
168 // 0<=k1<n1, 0<=k2<=n2/2.
169 // the stored data is larger than the input data!
170 ---- input ordering ----
171 rdft2dsort(n1, n2, -1, a);
172 rdft2d(n1, n2, -1, a, t, ip, w);
173 t[0...*]
174 :work area (double *)
175 length of t >= 8*n1, if single thread,
176 length of t >= 8*n1*FFT2D_MAX_THREADS, if multi threads,
177 t is dynamically allocated, if t == NULL.
178 ip[0...*]
179 :work area for bit reversal (int *)
180 length of ip >= 2+sqrt(n)
181 (n = max(n1, n2/2))
182 ip[0],ip[1] are pointers of the cos/sin table.
183 w[0...*]
184 :cos/sin table (double *)
185 length of w >= max(n1/2, n2/4) + n2/4
186 w[],ip[] are initialized if ip[0] == 0.
187 [remark]
188 Inverse of
189 rdft2d(n1, n2, 1, a, t, ip, w);
190 is
191 rdft2d(n1, n2, -1, a, t, ip, w);
192 for (j1 = 0; j1 <= n1 - 1; j1++) {
193 for (j2 = 0; j2 <= n2 - 1; j2++) {
194 a[j1][j2] *= 2.0 / n1 / n2;
195 }
196 }
197 .
198
199
200-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
201 [definition]
202 <case1> IDCT (excluding scale)
203 C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] *
204 cos(pi*j1*(k1+1/2)/n1) *
205 cos(pi*j2*(k2+1/2)/n2),
206 0<=k1<n1, 0<=k2<n2
207 <case2> DCT
208 C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] *
209 cos(pi*(j1+1/2)*k1/n1) *
210 cos(pi*(j2+1/2)*k2/n2),
211 0<=k1<n1, 0<=k2<n2
212 [usage]
213 <case1>
214 ip[0] = 0; // first time only
215 ddct2d(n1, n2, 1, a, t, ip, w);
216 <case2>
217 ip[0] = 0; // first time only
218 ddct2d(n1, n2, -1, a, t, ip, w);
219 [parameters]
220 n1 :data length (int)
221 n1 >= 2, n1 = power of 2
222 n2 :data length (int)
223 n2 >= 2, n2 = power of 2
224 a[0...n1-1][0...n2-1]
225 :input/output data (double **)
226 output data
227 a[k1][k2] = C[k1][k2], 0<=k1<n1, 0<=k2<n2
228 t[0...*]
229 :work area (double *)
230 length of t >= 4*n1, if single thread,
231 length of t >= 4*n1*FFT2D_MAX_THREADS, if multi threads,
232 t is dynamically allocated, if t == NULL.
233 ip[0...*]
234 :work area for bit reversal (int *)
235 length of ip >= 2+sqrt(n)
236 (n = max(n1/2, n2/2))
237 ip[0],ip[1] are pointers of the cos/sin table.
238 w[0...*]
239 :cos/sin table (double *)
240 length of w >= max(n1*3/2, n2*3/2)
241 w[],ip[] are initialized if ip[0] == 0.
242 [remark]
243 Inverse of
244 ddct2d(n1, n2, -1, a, t, ip, w);
245 is
246 for (j1 = 0; j1 <= n1 - 1; j1++) {
247 a[j1][0] *= 0.5;
248 }
249 for (j2 = 0; j2 <= n2 - 1; j2++) {
250 a[0][j2] *= 0.5;
251 }
252 ddct2d(n1, n2, 1, a, t, ip, w);
253 for (j1 = 0; j1 <= n1 - 1; j1++) {
254 for (j2 = 0; j2 <= n2 - 1; j2++) {
255 a[j1][j2] *= 4.0 / n1 / n2;
256 }
257 }
258 .
259
260
261-------- DST (Discrete Sine Transform) / Inverse of DST --------
262 [definition]
263 <case1> IDST (excluding scale)
264 S[k1][k2] = sum_j1=1^n1 sum_j2=1^n2 A[j1][j2] *
265 sin(pi*j1*(k1+1/2)/n1) *
266 sin(pi*j2*(k2+1/2)/n2),
267 0<=k1<n1, 0<=k2<n2
268 <case2> DST
269 S[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] *
270 sin(pi*(j1+1/2)*k1/n1) *
271 sin(pi*(j2+1/2)*k2/n2),
272 0<k1<=n1, 0<k2<=n2
273 [usage]
274 <case1>
275 ip[0] = 0; // first time only
276 ddst2d(n1, n2, 1, a, t, ip, w);
277 <case2>
278 ip[0] = 0; // first time only
279 ddst2d(n1, n2, -1, a, t, ip, w);
280 [parameters]
281 n1 :data length (int)
282 n1 >= 2, n1 = power of 2
283 n2 :data length (int)
284 n2 >= 2, n2 = power of 2
285 a[0...n1-1][0...n2-1]
286 :input/output data (double **)
287 <case1>
288 input data
289 a[j1][j2] = A[j1][j2], 0<j1<n1, 0<j2<n2,
290 a[j1][0] = A[j1][n2], 0<j1<n1,
291 a[0][j2] = A[n1][j2], 0<j2<n2,
292 a[0][0] = A[n1][n2]
293 (i.e. A[j1][j2] = a[j1 % n1][j2 % n2])
294 output data
295 a[k1][k2] = S[k1][k2], 0<=k1<n1, 0<=k2<n2
296 <case2>
297 output data
298 a[k1][k2] = S[k1][k2], 0<k1<n1, 0<k2<n2,
299 a[k1][0] = S[k1][n2], 0<k1<n1,
300 a[0][k2] = S[n1][k2], 0<k2<n2,
301 a[0][0] = S[n1][n2]
302 (i.e. S[k1][k2] = a[k1 % n1][k2 % n2])
303 t[0...*]
304 :work area (double *)
305 length of t >= 4*n1, if single thread,
306 length of t >= 4*n1*FFT2D_MAX_THREADS, if multi threads,
307 t is dynamically allocated, if t == NULL.
308 ip[0...*]
309 :work area for bit reversal (int *)
310 length of ip >= 2+sqrt(n)
311 (n = max(n1/2, n2/2))
312 ip[0],ip[1] are pointers of the cos/sin table.
313 w[0...*]
314 :cos/sin table (double *)
315 length of w >= max(n1*3/2, n2*3/2)
316 w[],ip[] are initialized if ip[0] == 0.
317 [remark]
318 Inverse of
319 ddst2d(n1, n2, -1, a, t, ip, w);
320 is
321 for (j1 = 0; j1 <= n1 - 1; j1++) {
322 a[j1][0] *= 0.5;
323 }
324 for (j2 = 0; j2 <= n2 - 1; j2++) {
325 a[0][j2] *= 0.5;
326 }
327 ddst2d(n1, n2, 1, a, t, ip, w);
328 for (j1 = 0; j1 <= n1 - 1; j1++) {
329 for (j2 = 0; j2 <= n2 - 1; j2++) {
330 a[j1][j2] *= 4.0 / n1 / n2;
331 }
332 }
333 .
334*/
335
336
337#include <stdio.h>
338#include <stdlib.h>
339#define fft2d_alloc_error_check(p) { \
340 if ((p) == NULL) { \
341 fprintf(stderr, "fft2d memory allocation error\n"); \
342 exit(1); \
343 } \
344}
345
346
347#ifdef USE_FFT2D_PTHREADS
348#define USE_FFT2D_THREADS
349#ifndef FFT2D_MAX_THREADS
350#define FFT2D_MAX_THREADS 4
351#endif
352#ifndef FFT2D_THREADS_BEGIN_N
353#define FFT2D_THREADS_BEGIN_N 65536
354#endif
355#include <pthread.h>
356#define fft2d_thread_t pthread_t
357#define fft2d_thread_create(thp,func,argp) { \
358 if (pthread_create(thp, NULL, func, (void *) (argp)) != 0) { \
359 fprintf(stderr, "fft2d thread error\n"); \
360 exit(1); \
361 } \
362}
363#define fft2d_thread_wait(th) { \
364 if (pthread_join(th, NULL) != 0) { \
365 fprintf(stderr, "fft2d thread error\n"); \
366 exit(1); \
367 } \
368}
369#endif /* USE_FFT2D_PTHREADS */
370
371
372#ifdef USE_FFT2D_WINTHREADS
373#define USE_FFT2D_THREADS
374#ifndef FFT2D_MAX_THREADS
375#define FFT2D_MAX_THREADS 4
376#endif
377#ifndef FFT2D_THREADS_BEGIN_N
378#define FFT2D_THREADS_BEGIN_N 131072
379#endif
380#include <windows.h>
381#define fft2d_thread_t HANDLE
382#define fft2d_thread_create(thp,func,argp) { \
383 DWORD thid; \
384 *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) (func), (LPVOID) (argp), 0, &thid); \
385 if (*(thp) == 0) { \
386 fprintf(stderr, "fft2d thread error\n"); \
387 exit(1); \
388 } \
389}
390#define fft2d_thread_wait(th) { \
391 WaitForSingleObject(th, INFINITE); \
392 CloseHandle(th); \
393}
394#endif /* USE_FFT2D_WINTHREADS */
395
396
397void cdft2d(int n1, int n2, int isgn, double **a, double *t,
398 int *ip, double *w)
399{
400 void makewt(int nw, int *ip, double *w);
401 void cdft(int n, int isgn, double *a, int *ip, double *w);
402 void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t,
403 int *ip, double *w);
404#ifdef USE_FFT2D_THREADS
405 void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a,
406 int *ip, double *w);
407 void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t,
408 int *ip, double *w);
409#endif /* USE_FFT2D_THREADS */
410 int n, itnull, nthread, nt, i;
411
412 n = n1 << 1;
413 if (n < n2) {
414 n = n2;
415 }
416 if (n > (ip[0] << 2)) {
417 makewt(n >> 2, ip, w);
418 }
419 itnull = 0;
420 if (t == NULL) {
421 itnull = 1;
422 nthread = 1;
423#ifdef USE_FFT2D_THREADS
424 nthread = FFT2D_MAX_THREADS;
425#endif /* USE_FFT2D_THREADS */
426 nt = 8 * nthread * n1;
427 if (n2 == 4 * nthread) {
428 nt >>= 1;
429 } else if (n2 < 4 * nthread) {
430 nt >>= 2;
431 }
432 t = (double *) malloc(sizeof(double) * nt);
433 fft2d_alloc_error_check(t);
434 }
435#ifdef USE_FFT2D_THREADS
436 if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) {
437 xdft2d0_subth(n1, n2, 0, isgn, a, ip, w);
438 cdft2d_subth(n1, n2, isgn, a, t, ip, w);
439 } else
440#endif /* USE_FFT2D_THREADS */
441 {
442 for (i = 0; i < n1; i++) {
443 cdft(n2, isgn, a[i], ip, w);
444 }
445 cdft2d_sub(n1, n2, isgn, a, t, ip, w);
446 }
447 if (itnull != 0) {
448 free(t);
449 }
450}
451
452
453void rdft2d(int n1, int n2, int isgn, double **a, double *t,
454 int *ip, double *w)
455{
456 void makewt(int nw, int *ip, double *w);
457 void makect(int nc, int *ip, double *c);
458 void rdft(int n, int isgn, double *a, int *ip, double *w);
459 void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t,
460 int *ip, double *w);
461 void rdft2d_sub(int n1, int n2, int isgn, double **a);
462#ifdef USE_FFT2D_THREADS
463 void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a,
464 int *ip, double *w);
465 void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t,
466 int *ip, double *w);
467#endif /* USE_FFT2D_THREADS */
468 int n, nw, nc, itnull, nthread, nt, i;
469
470 n = n1 << 1;
471 if (n < n2) {
472 n = n2;
473 }
474 nw = ip[0];
475 if (n > (nw << 2)) {
476 nw = n >> 2;
477 makewt(nw, ip, w);
478 }
479 nc = ip[1];
480 if (n2 > (nc << 2)) {
481 nc = n2 >> 2;
482 makect(nc, ip, w + nw);
483 }
484 itnull = 0;
485 if (t == NULL) {
486 itnull = 1;
487 nthread = 1;
488#ifdef USE_FFT2D_THREADS
489 nthread = FFT2D_MAX_THREADS;
490#endif /* USE_FFT2D_THREADS */
491 nt = 8 * nthread * n1;
492 if (n2 == 4 * nthread) {
493 nt >>= 1;
494 } else if (n2 < 4 * nthread) {
495 nt >>= 2;
496 }
497 t = (double *) malloc(sizeof(double) * nt);
498 fft2d_alloc_error_check(t);
499 }
500#ifdef USE_FFT2D_THREADS
501 if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) {
502 if (isgn < 0) {
503 rdft2d_sub(n1, n2, isgn, a);
504 cdft2d_subth(n1, n2, isgn, a, t, ip, w);
505 }
506 xdft2d0_subth(n1, n2, 1, isgn, a, ip, w);
507 if (isgn >= 0) {
508 cdft2d_subth(n1, n2, isgn, a, t, ip, w);
509 rdft2d_sub(n1, n2, isgn, a);
510 }
511 } else
512#endif /* USE_FFT2D_THREADS */
513 {
514 if (isgn < 0) {
515 rdft2d_sub(n1, n2, isgn, a);
516 cdft2d_sub(n1, n2, isgn, a, t, ip, w);
517 }
518 for (i = 0; i < n1; i++) {
519 rdft(n2, isgn, a[i], ip, w);
520 }
521 if (isgn >= 0) {
522 cdft2d_sub(n1, n2, isgn, a, t, ip, w);
523 rdft2d_sub(n1, n2, isgn, a);
524 }
525 }
526 if (itnull != 0) {
527 free(t);
528 }
529}
530
531
532void rdft2dsort(int n1, int n2, int isgn, double **a)
533{
534 int n1h, i;
535 double x, y;
536
537 n1h = n1 >> 1;
538 if (isgn < 0) {
539 for (i = n1h + 1; i < n1; i++) {
540 a[i][0] = a[i][n2 + 1];
541 a[i][1] = a[i][n2];
542 }
543 a[0][1] = a[0][n2];
544 a[n1h][1] = a[n1h][n2];
545 } else {
546 for (i = n1h + 1; i < n1; i++) {
547 y = a[i][0];
548 x = a[i][1];
549 a[i][n2] = x;
550 a[i][n2 + 1] = y;
551 a[n1 - i][n2] = x;
552 a[n1 - i][n2 + 1] = -y;
553 a[i][0] = a[n1 - i][0];
554 a[i][1] = -a[n1 - i][1];
555 }
556 a[0][n2] = a[0][1];
557 a[0][n2 + 1] = 0;
558 a[0][1] = 0;
559 a[n1h][n2] = a[n1h][1];
560 a[n1h][n2 + 1] = 0;
561 a[n1h][1] = 0;
562 }
563}
564
565
566void ddct2d(int n1, int n2, int isgn, double **a, double *t,
567 int *ip, double *w)
568{
569 void makewt(int nw, int *ip, double *w);
570 void makect(int nc, int *ip, double *c);
571 void ddct(int n, int isgn, double *a, int *ip, double *w);
572 void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a,
573 double *t, int *ip, double *w);
574#ifdef USE_FFT2D_THREADS
575 void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a,
576 int *ip, double *w);
577 void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a,
578 double *t, int *ip, double *w);
579#endif /* USE_FFT2D_THREADS */
580 int n, nw, nc, itnull, nthread, nt, i;
581
582 n = n1;
583 if (n < n2) {
584 n = n2;
585 }
586 nw = ip[0];
587 if (n > (nw << 2)) {
588 nw = n >> 2;
589 makewt(nw, ip, w);
590 }
591 nc = ip[1];
592 if (n > nc) {
593 nc = n;
594 makect(nc, ip, w + nw);
595 }
596 itnull = 0;
597 if (t == NULL) {
598 itnull = 1;
599 nthread = 1;
600#ifdef USE_FFT2D_THREADS
601 nthread = FFT2D_MAX_THREADS;
602#endif /* USE_FFT2D_THREADS */
603 nt = 4 * nthread * n1;
604 if (n2 == 2 * nthread) {
605 nt >>= 1;
606 } else if (n2 < 2 * nthread) {
607 nt >>= 2;
608 }
609 t = (double *) malloc(sizeof(double) * nt);
610 fft2d_alloc_error_check(t);
611 }
612#ifdef USE_FFT2D_THREADS
613 if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) {
614 ddxt2d0_subth(n1, n2, 0, isgn, a, ip, w);
615 ddxt2d_subth(n1, n2, 0, isgn, a, t, ip, w);
616 } else
617#endif /* USE_FFT2D_THREADS */
618 {
619 for (i = 0; i < n1; i++) {
620 ddct(n2, isgn, a[i], ip, w);
621 }
622 ddxt2d_sub(n1, n2, 0, isgn, a, t, ip, w);
623 }
624 if (itnull != 0) {
625 free(t);
626 }
627}
628
629
630void ddst2d(int n1, int n2, int isgn, double **a, double *t,
631 int *ip, double *w)
632{
633 void makewt(int nw, int *ip, double *w);
634 void makect(int nc, int *ip, double *c);
635 void ddst(int n, int isgn, double *a, int *ip, double *w);
636 void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a,
637 double *t, int *ip, double *w);
638#ifdef USE_FFT2D_THREADS
639 void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a,
640 int *ip, double *w);
641 void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a,
642 double *t, int *ip, double *w);
643#endif /* USE_FFT2D_THREADS */
644 int n, nw, nc, itnull, nthread, nt, i;
645
646 n = n1;
647 if (n < n2) {
648 n = n2;
649 }
650 nw = ip[0];
651 if (n > (nw << 2)) {
652 nw = n >> 2;
653 makewt(nw, ip, w);
654 }
655 nc = ip[1];
656 if (n > nc) {
657 nc = n;
658 makect(nc, ip, w + nw);
659 }
660 itnull = 0;
661 if (t == NULL) {
662 itnull = 1;
663 nthread = 1;
664#ifdef USE_FFT2D_THREADS
665 nthread = FFT2D_MAX_THREADS;
666#endif /* USE_FFT2D_THREADS */
667 nt = 4 * nthread * n1;
668 if (n2 == 2 * nthread) {
669 nt >>= 1;
670 } else if (n2 < 2 * nthread) {
671 nt >>= 2;
672 }
673 t = (double *) malloc(sizeof(double) * nt);
674 fft2d_alloc_error_check(t);
675 }
676#ifdef USE_FFT2D_THREADS
677 if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) {
678 ddxt2d0_subth(n1, n2, 1, isgn, a, ip, w);
679 ddxt2d_subth(n1, n2, 1, isgn, a, t, ip, w);
680 } else
681#endif /* USE_FFT2D_THREADS */
682 {
683 for (i = 0; i < n1; i++) {
684 ddst(n2, isgn, a[i], ip, w);
685 }
686 ddxt2d_sub(n1, n2, 1, isgn, a, t, ip, w);
687 }
688 if (itnull != 0) {
689 free(t);
690 }
691}
692
693
694/* -------- child routines -------- */
695
696
697void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t,
698 int *ip, double *w)
699{
700 void cdft(int n, int isgn, double *a, int *ip, double *w);
701 int i, j;
702
703 if (n2 > 4) {
704 for (j = 0; j < n2; j += 8) {
705 for (i = 0; i < n1; i++) {
706 t[2 * i] = a[i][j];
707 t[2 * i + 1] = a[i][j + 1];
708 t[2 * n1 + 2 * i] = a[i][j + 2];
709 t[2 * n1 + 2 * i + 1] = a[i][j + 3];
710 t[4 * n1 + 2 * i] = a[i][j + 4];
711 t[4 * n1 + 2 * i + 1] = a[i][j + 5];
712 t[6 * n1 + 2 * i] = a[i][j + 6];
713 t[6 * n1 + 2 * i + 1] = a[i][j + 7];
714 }
715 cdft(2 * n1, isgn, t, ip, w);
716 cdft(2 * n1, isgn, &t[2 * n1], ip, w);
717 cdft(2 * n1, isgn, &t[4 * n1], ip, w);
718 cdft(2 * n1, isgn, &t[6 * n1], ip, w);
719 for (i = 0; i < n1; i++) {
720 a[i][j] = t[2 * i];
721 a[i][j + 1] = t[2 * i + 1];
722 a[i][j + 2] = t[2 * n1 + 2 * i];
723 a[i][j + 3] = t[2 * n1 + 2 * i + 1];
724 a[i][j + 4] = t[4 * n1 + 2 * i];
725 a[i][j + 5] = t[4 * n1 + 2 * i + 1];
726 a[i][j + 6] = t[6 * n1 + 2 * i];
727 a[i][j + 7] = t[6 * n1 + 2 * i + 1];
728 }
729 }
730 } else if (n2 == 4) {
731 for (i = 0; i < n1; i++) {
732 t[2 * i] = a[i][0];
733 t[2 * i + 1] = a[i][1];
734 t[2 * n1 + 2 * i] = a[i][2];
735 t[2 * n1 + 2 * i + 1] = a[i][3];
736 }
737 cdft(2 * n1, isgn, t, ip, w);
738 cdft(2 * n1, isgn, &t[2 * n1], ip, w);
739 for (i = 0; i < n1; i++) {
740 a[i][0] = t[2 * i];
741 a[i][1] = t[2 * i + 1];
742 a[i][2] = t[2 * n1 + 2 * i];
743 a[i][3] = t[2 * n1 + 2 * i + 1];
744 }
745 } else if (n2 == 2) {
746 for (i = 0; i < n1; i++) {
747 t[2 * i] = a[i][0];
748 t[2 * i + 1] = a[i][1];
749 }
750 cdft(2 * n1, isgn, t, ip, w);
751 for (i = 0; i < n1; i++) {
752 a[i][0] = t[2 * i];
753 a[i][1] = t[2 * i + 1];
754 }
755 }
756}
757
758
759void rdft2d_sub(int n1, int n2, int isgn, double **a)
760{
761 int n1h, i, j;
762 double xi;
763
764 n1h = n1 >> 1;
765 if (isgn < 0) {
766 for (i = 1; i < n1h; i++) {
767 j = n1 - i;
768 xi = a[i][0] - a[j][0];
769 a[i][0] += a[j][0];
770 a[j][0] = xi;
771 xi = a[j][1] - a[i][1];
772 a[i][1] += a[j][1];
773 a[j][1] = xi;
774 }
775 } else {
776 for (i = 1; i < n1h; i++) {
777 j = n1 - i;
778 a[j][0] = 0.5 * (a[i][0] - a[j][0]);
779 a[i][0] -= a[j][0];
780 a[j][1] = 0.5 * (a[i][1] + a[j][1]);
781 a[i][1] -= a[j][1];
782 }
783 }
784}
785
786
787void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a,
788 double *t, int *ip, double *w)
789{
790 void ddct(int n, int isgn, double *a, int *ip, double *w);
791 void ddst(int n, int isgn, double *a, int *ip, double *w);
792 int i, j;
793
794 if (n2 > 2) {
795 for (j = 0; j < n2; j += 4) {
796 for (i = 0; i < n1; i++) {
797 t[i] = a[i][j];
798 t[n1 + i] = a[i][j + 1];
799 t[2 * n1 + i] = a[i][j + 2];
800 t[3 * n1 + i] = a[i][j + 3];
801 }
802 if (ics == 0) {
803 ddct(n1, isgn, t, ip, w);
804 ddct(n1, isgn, &t[n1], ip, w);
805 ddct(n1, isgn, &t[2 * n1], ip, w);
806 ddct(n1, isgn, &t[3 * n1], ip, w);
807 } else {
808 ddst(n1, isgn, t, ip, w);
809 ddst(n1, isgn, &t[n1], ip, w);
810 ddst(n1, isgn, &t[2 * n1], ip, w);
811 ddst(n1, isgn, &t[3 * n1], ip, w);
812 }
813 for (i = 0; i < n1; i++) {
814 a[i][j] = t[i];
815 a[i][j + 1] = t[n1 + i];
816 a[i][j + 2] = t[2 * n1 + i];
817 a[i][j + 3] = t[3 * n1 + i];
818 }
819 }
820 } else if (n2 == 2) {
821 for (i = 0; i < n1; i++) {
822 t[i] = a[i][0];
823 t[n1 + i] = a[i][1];
824 }
825 if (ics == 0) {
826 ddct(n1, isgn, t, ip, w);
827 ddct(n1, isgn, &t[n1], ip, w);
828 } else {
829 ddst(n1, isgn, t, ip, w);
830 ddst(n1, isgn, &t[n1], ip, w);
831 }
832 for (i = 0; i < n1; i++) {
833 a[i][0] = t[i];
834 a[i][1] = t[n1 + i];
835 }
836 }
837}
838
839
840#ifdef USE_FFT2D_THREADS
841struct fft2d_arg_st {
842 int nthread;
843 int n0;
844 int n1;
845 int n2;
846 int ic;
847 int isgn;
848 double **a;
849 double *t;
850 int *ip;
851 double *w;
852};
853typedef struct fft2d_arg_st fft2d_arg_t;
854
855
856void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a,
857 int *ip, double *w)
858{
859 void *xdft2d0_th(void *p);
860 fft2d_thread_t th[FFT2D_MAX_THREADS];
861 fft2d_arg_t ag[FFT2D_MAX_THREADS];
862 int nthread, i;
863
864 nthread = FFT2D_MAX_THREADS;
865 if (nthread > n1) {
866 nthread = n1;
867 }
868 for (i = 0; i < nthread; i++) {
869 ag[i].nthread = nthread;
870 ag[i].n0 = i;
871 ag[i].n1 = n1;
872 ag[i].n2 = n2;
873 ag[i].ic = icr;
874 ag[i].isgn = isgn;
875 ag[i].a = a;
876 ag[i].ip = ip;
877 ag[i].w = w;
878 fft2d_thread_create(&th[i], xdft2d0_th, &ag[i]);
879 }
880 for (i = 0; i < nthread; i++) {
881 fft2d_thread_wait(th[i]);
882 }
883}
884
885
886void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t,
887 int *ip, double *w)
888{
889 void *cdft2d_th(void *p);
890 fft2d_thread_t th[FFT2D_MAX_THREADS];
891 fft2d_arg_t ag[FFT2D_MAX_THREADS];
892 int nthread, nt, i;
893
894 nthread = FFT2D_MAX_THREADS;
895 nt = 8 * n1;
896 if (n2 == 4 * FFT2D_MAX_THREADS) {
897 nt >>= 1;
898 } else if (n2 < 4 * FFT2D_MAX_THREADS) {
899 nthread = n2 >> 1;
900 nt >>= 2;
901 }
902 for (i = 0; i < nthread; i++) {
903 ag[i].nthread = nthread;
904 ag[i].n0 = i;
905 ag[i].n1 = n1;
906 ag[i].n2 = n2;
907 ag[i].isgn = isgn;
908 ag[i].a = a;
909 ag[i].t = &t[nt * i];
910 ag[i].ip = ip;
911 ag[i].w = w;
912 fft2d_thread_create(&th[i], cdft2d_th, &ag[i]);
913 }
914 for (i = 0; i < nthread; i++) {
915 fft2d_thread_wait(th[i]);
916 }
917}
918
919
920void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a,
921 int *ip, double *w)
922{
923 void *ddxt2d0_th(void *p);
924 fft2d_thread_t th[FFT2D_MAX_THREADS];
925 fft2d_arg_t ag[FFT2D_MAX_THREADS];
926 int nthread, i;
927
928 nthread = FFT2D_MAX_THREADS;
929 if (nthread > n1) {
930 nthread = n1;
931 }
932 for (i = 0; i < nthread; i++) {
933 ag[i].nthread = nthread;
934 ag[i].n0 = i;
935 ag[i].n1 = n1;
936 ag[i].n2 = n2;
937 ag[i].ic = ics;
938 ag[i].isgn = isgn;
939 ag[i].a = a;
940 ag[i].ip = ip;
941 ag[i].w = w;
942 fft2d_thread_create(&th[i], ddxt2d0_th, &ag[i]);
943 }
944 for (i = 0; i < nthread; i++) {
945 fft2d_thread_wait(th[i]);
946 }
947}
948
949
950void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a,
951 double *t, int *ip, double *w)
952{
953 void *ddxt2d_th(void *p);
954 fft2d_thread_t th[FFT2D_MAX_THREADS];
955 fft2d_arg_t ag[FFT2D_MAX_THREADS];
956 int nthread, nt, i;
957
958 nthread = FFT2D_MAX_THREADS;
959 nt = 4 * n1;
960 if (n2 == 2 * FFT2D_MAX_THREADS) {
961 nt >>= 1;
962 } else if (n2 < 2 * FFT2D_MAX_THREADS) {
963 nthread = n2;
964 nt >>= 2;
965 }
966 for (i = 0; i < nthread; i++) {
967 ag[i].nthread = nthread;
968 ag[i].n0 = i;
969 ag[i].n1 = n1;
970 ag[i].n2 = n2;
971 ag[i].ic = ics;
972 ag[i].isgn = isgn;
973 ag[i].a = a;
974 ag[i].t = &t[nt * i];
975 ag[i].ip = ip;
976 ag[i].w = w;
977 fft2d_thread_create(&th[i], ddxt2d_th, &ag[i]);
978 }
979 for (i = 0; i < nthread; i++) {
980 fft2d_thread_wait(th[i]);
981 }
982}
983
984
985void *xdft2d0_th(void *p)
986{
987 void cdft(int n, int isgn, double *a, int *ip, double *w);
988 void rdft(int n, int isgn, double *a, int *ip, double *w);
989 int nthread, n0, n1, n2, icr, isgn, *ip, i;
990 double **a, *w;
991
992 nthread = ((fft2d_arg_t *) p)->nthread;
993 n0 = ((fft2d_arg_t *) p)->n0;
994 n1 = ((fft2d_arg_t *) p)->n1;
995 n2 = ((fft2d_arg_t *) p)->n2;
996 icr = ((fft2d_arg_t *) p)->ic;
997 isgn = ((fft2d_arg_t *) p)->isgn;
998 a = ((fft2d_arg_t *) p)->a;
999 ip = ((fft2d_arg_t *) p)->ip;
1000 w = ((fft2d_arg_t *) p)->w;
1001 if (icr == 0) {
1002 for (i = n0; i < n1; i += nthread) {
1003 cdft(n2, isgn, a[i], ip, w);
1004 }
1005 } else {
1006 for (i = n0; i < n1; i += nthread) {
1007 rdft(n2, isgn, a[i], ip, w);
1008 }
1009 }
1010 return (void *) 0;
1011}
1012
1013
1014void *cdft2d_th(void *p)
1015{
1016 void cdft(int n, int isgn, double *a, int *ip, double *w);
1017 int nthread, n0, n1, n2, isgn, *ip, i, j;
1018 double **a, *t, *w;
1019
1020 nthread = ((fft2d_arg_t *) p)->nthread;
1021 n0 = ((fft2d_arg_t *) p)->n0;
1022 n1 = ((fft2d_arg_t *) p)->n1;
1023 n2 = ((fft2d_arg_t *) p)->n2;
1024 isgn = ((fft2d_arg_t *) p)->isgn;
1025 a = ((fft2d_arg_t *) p)->a;
1026 t = ((fft2d_arg_t *) p)->t;
1027 ip = ((fft2d_arg_t *) p)->ip;
1028 w = ((fft2d_arg_t *) p)->w;
1029 if (n2 > 4 * nthread) {
1030 for (j = 8 * n0; j < n2; j += 8 * nthread) {
1031 for (i = 0; i < n1; i++) {
1032 t[2 * i] = a[i][j];
1033 t[2 * i + 1] = a[i][j + 1];
1034 t[2 * n1 + 2 * i] = a[i][j + 2];
1035 t[2 * n1 + 2 * i + 1] = a[i][j + 3];
1036 t[4 * n1 + 2 * i] = a[i][j + 4];
1037 t[4 * n1 + 2 * i + 1] = a[i][j + 5];
1038 t[6 * n1 + 2 * i] = a[i][j + 6];
1039 t[6 * n1 + 2 * i + 1] = a[i][j + 7];
1040 }
1041 cdft(2 * n1, isgn, t, ip, w);
1042 cdft(2 * n1, isgn, &t[2 * n1], ip, w);
1043 cdft(2 * n1, isgn, &t[4 * n1], ip, w);
1044 cdft(2 * n1, isgn, &t[6 * n1], ip, w);
1045 for (i = 0; i < n1; i++) {
1046 a[i][j] = t[2 * i];
1047 a[i][j + 1] = t[2 * i + 1];
1048 a[i][j + 2] = t[2 * n1 + 2 * i];
1049 a[i][j + 3] = t[2 * n1 + 2 * i + 1];
1050 a[i][j + 4] = t[4 * n1 + 2 * i];
1051 a[i][j + 5] = t[4 * n1 + 2 * i + 1];
1052 a[i][j + 6] = t[6 * n1 + 2 * i];
1053 a[i][j + 7] = t[6 * n1 + 2 * i + 1];
1054 }
1055 }
1056 } else if (n2 == 4 * nthread) {
1057 for (i = 0; i < n1; i++) {
1058 t[2 * i] = a[i][4 * n0];
1059 t[2 * i + 1] = a[i][4 * n0 + 1];
1060 t[2 * n1 + 2 * i] = a[i][4 * n0 + 2];
1061 t[2 * n1 + 2 * i + 1] = a[i][4 * n0 + 3];
1062 }
1063 cdft(2 * n1, isgn, t, ip, w);
1064 cdft(2 * n1, isgn, &t[2 * n1], ip, w);
1065 for (i = 0; i < n1; i++) {
1066 a[i][4 * n0] = t[2 * i];
1067 a[i][4 * n0 + 1] = t[2 * i + 1];
1068 a[i][4 * n0 + 2] = t[2 * n1 + 2 * i];
1069 a[i][4 * n0 + 3] = t[2 * n1 + 2 * i + 1];
1070 }
1071 } else if (n2 == 2 * nthread) {
1072 for (i = 0; i < n1; i++) {
1073 t[2 * i] = a[i][2 * n0];
1074 t[2 * i + 1] = a[i][2 * n0 + 1];
1075 }
1076 cdft(2 * n1, isgn, t, ip, w);
1077 for (i = 0; i < n1; i++) {
1078 a[i][2 * n0] = t[2 * i];
1079 a[i][2 * n0 + 1] = t[2 * i + 1];
1080 }
1081 }
1082 return (void *) 0;
1083}
1084
1085
1086void *ddxt2d0_th(void *p)
1087{
1088 void ddct(int n, int isgn, double *a, int *ip, double *w);
1089 void ddst(int n, int isgn, double *a, int *ip, double *w);
1090 int nthread, n0, n1, n2, ics, isgn, *ip, i;
1091 double **a, *w;
1092
1093 nthread = ((fft2d_arg_t *) p)->nthread;
1094 n0 = ((fft2d_arg_t *) p)->n0;
1095 n1 = ((fft2d_arg_t *) p)->n1;
1096 n2 = ((fft2d_arg_t *) p)->n2;
1097 ics = ((fft2d_arg_t *) p)->ic;
1098 isgn = ((fft2d_arg_t *) p)->isgn;
1099 a = ((fft2d_arg_t *) p)->a;
1100 ip = ((fft2d_arg_t *) p)->ip;
1101 w = ((fft2d_arg_t *) p)->w;
1102 if (ics == 0) {
1103 for (i = n0; i < n1; i += nthread) {
1104 ddct(n2, isgn, a[i], ip, w);
1105 }
1106 } else {
1107 for (i = n0; i < n1; i += nthread) {
1108 ddst(n2, isgn, a[i], ip, w);
1109 }
1110 }
1111 return (void *) 0;
1112}
1113
1114
1115void *ddxt2d_th(void *p)
1116{
1117 void ddct(int n, int isgn, double *a, int *ip, double *w);
1118 void ddst(int n, int isgn, double *a, int *ip, double *w);
1119 int nthread, n0, n1, n2, ics, isgn, *ip, i, j;
1120 double **a, *t, *w;
1121
1122 nthread = ((fft2d_arg_t *) p)->nthread;
1123 n0 = ((fft2d_arg_t *) p)->n0;
1124 n1 = ((fft2d_arg_t *) p)->n1;
1125 n2 = ((fft2d_arg_t *) p)->n2;
1126 ics = ((fft2d_arg_t *) p)->ic;
1127 isgn = ((fft2d_arg_t *) p)->isgn;
1128 a = ((fft2d_arg_t *) p)->a;
1129 t = ((fft2d_arg_t *) p)->t;
1130 ip = ((fft2d_arg_t *) p)->ip;
1131 w = ((fft2d_arg_t *) p)->w;
1132 if (n2 > 2 * nthread) {
1133 for (j = 4 * n0; j < n2; j += 4 * nthread) {
1134 for (i = 0; i < n1; i++) {
1135 t[i] = a[i][j];
1136 t[n1 + i] = a[i][j + 1];
1137 t[2 * n1 + i] = a[i][j + 2];
1138 t[3 * n1 + i] = a[i][j + 3];
1139 }
1140 if (ics == 0) {
1141 ddct(n1, isgn, t, ip, w);
1142 ddct(n1, isgn, &t[n1], ip, w);
1143 ddct(n1, isgn, &t[2 * n1], ip, w);
1144 ddct(n1, isgn, &t[3 * n1], ip, w);
1145 } else {
1146 ddst(n1, isgn, t, ip, w);
1147 ddst(n1, isgn, &t[n1], ip, w);
1148 ddst(n1, isgn, &t[2 * n1], ip, w);
1149 ddst(n1, isgn, &t[3 * n1], ip, w);
1150 }
1151 for (i = 0; i < n1; i++) {
1152 a[i][j] = t[i];
1153 a[i][j + 1] = t[n1 + i];
1154 a[i][j + 2] = t[2 * n1 + i];
1155 a[i][j + 3] = t[3 * n1 + i];
1156 }
1157 }
1158 } else if (n2 == 2 * nthread) {
1159 for (i = 0; i < n1; i++) {
1160 t[i] = a[i][2 * n0];
1161 t[n1 + i] = a[i][2 * n0 + 1];
1162 }
1163 if (ics == 0) {
1164 ddct(n1, isgn, t, ip, w);
1165 ddct(n1, isgn, &t[n1], ip, w);
1166 } else {
1167 ddst(n1, isgn, t, ip, w);
1168 ddst(n1, isgn, &t[n1], ip, w);
1169 }
1170 for (i = 0; i < n1; i++) {
1171 a[i][2 * n0] = t[i];
1172 a[i][2 * n0 + 1] = t[n1 + i];
1173 }
1174 } else if (n2 == nthread) {
1175 for (i = 0; i < n1; i++) {
1176 t[i] = a[i][n0];
1177 }
1178 if (ics == 0) {
1179 ddct(n1, isgn, t, ip, w);
1180 } else {
1181 ddst(n1, isgn, t, ip, w);
1182 }
1183 for (i = 0; i < n1; i++) {
1184 a[i][n0] = t[i];
1185 }
1186 }
1187 return (void *) 0;
1188}
1189#endif /* USE_FFT2D_THREADS */
1190
1191