1 | /* |
2 | Fast Fourier/Cosine/Sine Transform |
3 | dimension :two |
4 | data length :power of 2 |
5 | decimation :frequency |
6 | radix :split-radix, row-column |
7 | data :inplace |
8 | table :use |
9 | functions |
10 | cdft2d: Complex Discrete Fourier Transform |
11 | rdft2d: Real Discrete Fourier Transform |
12 | ddct2d: Discrete Cosine Transform |
13 | ddst2d: Discrete Sine Transform |
14 | function prototypes |
15 | void cdft2d(int, int, int, double **, double *, int *, double *); |
16 | void rdft2d(int, int, int, double **, double *, int *, double *); |
17 | void rdft2dsort(int, int, int, double **); |
18 | void ddct2d(int, int, int, double **, double *, int *, double *); |
19 | void ddst2d(int, int, int, double **, double *, int *, double *); |
20 | necessary package |
21 | fftsg.c : 1D-FFT package |
22 | macro definitions |
23 | USE_FFT2D_PTHREADS : default=not defined |
24 | FFT2D_MAX_THREADS : must be 2^N, default=4 |
25 | FFT2D_THREADS_BEGIN_N : default=65536 |
26 | USE_FFT2D_WINTHREADS : default=not defined |
27 | FFT2D_MAX_THREADS : must be 2^N, default=4 |
28 | FFT2D_THREADS_BEGIN_N : default=131072 |
29 | |
30 | |
31 | -------- Complex DFT (Discrete Fourier Transform) -------- |
32 | [definition] |
33 | <case1> |
34 | X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * |
35 | exp(2*pi*i*j1*k1/n1) * |
36 | exp(2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2 |
37 | <case2> |
38 | X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * |
39 | exp(-2*pi*i*j1*k1/n1) * |
40 | exp(-2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2 |
41 | (notes: sum_j=0^n-1 is a summation from j=0 to n-1) |
42 | [usage] |
43 | <case1> |
44 | ip[0] = 0; // first time only |
45 | cdft2d(n1, 2*n2, 1, a, t, ip, w); |
46 | <case2> |
47 | ip[0] = 0; // first time only |
48 | cdft2d(n1, 2*n2, -1, a, t, ip, w); |
49 | [parameters] |
50 | n1 :data length (int) |
51 | n1 >= 1, n1 = power of 2 |
52 | 2*n2 :data length (int) |
53 | n2 >= 1, n2 = power of 2 |
54 | a[0...n1-1][0...2*n2-1] |
55 | :input/output data (double **) |
56 | input data |
57 | a[j1][2*j2] = Re(x[j1][j2]), |
58 | a[j1][2*j2+1] = Im(x[j1][j2]), |
59 | 0<=j1<n1, 0<=j2<n2 |
60 | output data |
61 | a[k1][2*k2] = Re(X[k1][k2]), |
62 | a[k1][2*k2+1] = Im(X[k1][k2]), |
63 | 0<=k1<n1, 0<=k2<n2 |
64 | t[0...*] |
65 | :work area (double *) |
66 | length of t >= 8*n1, if single thread, |
67 | length of t >= 8*n1*FFT2D_MAX_THREADS, if multi threads, |
68 | t is dynamically allocated, if t == NULL. |
69 | ip[0...*] |
70 | :work area for bit reversal (int *) |
71 | length of ip >= 2+sqrt(n) |
72 | (n = max(n1, n2)) |
73 | ip[0],ip[1] are pointers of the cos/sin table. |
74 | w[0...*] |
75 | :cos/sin table (double *) |
76 | length of w >= max(n1/2, n2/2) |
77 | w[],ip[] are initialized if ip[0] == 0. |
78 | [remark] |
79 | Inverse of |
80 | cdft2d(n1, 2*n2, -1, a, t, ip, w); |
81 | is |
82 | cdft2d(n1, 2*n2, 1, a, t, ip, w); |
83 | for (j1 = 0; j1 <= n1 - 1; j1++) { |
84 | for (j2 = 0; j2 <= 2 * n2 - 1; j2++) { |
85 | a[j1][j2] *= 1.0 / n1 / n2; |
86 | } |
87 | } |
88 | . |
89 | |
90 | |
91 | -------- Real DFT / Inverse of Real DFT -------- |
92 | [definition] |
93 | <case1> RDFT |
94 | R[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * |
95 | cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), |
96 | 0<=k1<n1, 0<=k2<n2 |
97 | I[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * |
98 | sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), |
99 | 0<=k1<n1, 0<=k2<n2 |
100 | <case2> IRDFT (excluding scale) |
101 | a[k1][k2] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 |
102 | (R[j1][j2] * |
103 | cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + |
104 | I[j1][j2] * |
105 | sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), |
106 | 0<=k1<n1, 0<=k2<n2 |
107 | (notes: R[n1-k1][n2-k2] = R[k1][k2], |
108 | I[n1-k1][n2-k2] = -I[k1][k2], |
109 | R[n1-k1][0] = R[k1][0], |
110 | I[n1-k1][0] = -I[k1][0], |
111 | R[0][n2-k2] = R[0][k2], |
112 | I[0][n2-k2] = -I[0][k2], |
113 | 0<k1<n1, 0<k2<n2) |
114 | [usage] |
115 | <case1> |
116 | ip[0] = 0; // first time only |
117 | rdft2d(n1, n2, 1, a, t, ip, w); |
118 | <case2> |
119 | ip[0] = 0; // first time only |
120 | rdft2d(n1, n2, -1, a, t, ip, w); |
121 | [parameters] |
122 | n1 :data length (int) |
123 | n1 >= 2, n1 = power of 2 |
124 | n2 :data length (int) |
125 | n2 >= 2, n2 = power of 2 |
126 | a[0...n1-1][0...n2-1] |
127 | :input/output data (double **) |
128 | <case1> |
129 | output data |
130 | a[k1][2*k2] = R[k1][k2] = R[n1-k1][n2-k2], |
131 | a[k1][2*k2+1] = I[k1][k2] = -I[n1-k1][n2-k2], |
132 | 0<k1<n1, 0<k2<n2/2, |
133 | a[0][2*k2] = R[0][k2] = R[0][n2-k2], |
134 | a[0][2*k2+1] = I[0][k2] = -I[0][n2-k2], |
135 | 0<k2<n2/2, |
136 | a[k1][0] = R[k1][0] = R[n1-k1][0], |
137 | a[k1][1] = I[k1][0] = -I[n1-k1][0], |
138 | a[n1-k1][1] = R[k1][n2/2] = R[n1-k1][n2/2], |
139 | a[n1-k1][0] = -I[k1][n2/2] = I[n1-k1][n2/2], |
140 | 0<k1<n1/2, |
141 | a[0][0] = R[0][0], |
142 | a[0][1] = R[0][n2/2], |
143 | a[n1/2][0] = R[n1/2][0], |
144 | a[n1/2][1] = R[n1/2][n2/2] |
145 | <case2> |
146 | input data |
147 | a[j1][2*j2] = R[j1][j2] = R[n1-j1][n2-j2], |
148 | a[j1][2*j2+1] = I[j1][j2] = -I[n1-j1][n2-j2], |
149 | 0<j1<n1, 0<j2<n2/2, |
150 | a[0][2*j2] = R[0][j2] = R[0][n2-j2], |
151 | a[0][2*j2+1] = I[0][j2] = -I[0][n2-j2], |
152 | 0<j2<n2/2, |
153 | a[j1][0] = R[j1][0] = R[n1-j1][0], |
154 | a[j1][1] = I[j1][0] = -I[n1-j1][0], |
155 | a[n1-j1][1] = R[j1][n2/2] = R[n1-j1][n2/2], |
156 | a[n1-j1][0] = -I[j1][n2/2] = I[n1-j1][n2/2], |
157 | 0<j1<n1/2, |
158 | a[0][0] = R[0][0], |
159 | a[0][1] = R[0][n2/2], |
160 | a[n1/2][0] = R[n1/2][0], |
161 | a[n1/2][1] = R[n1/2][n2/2] |
162 | ---- output ordering ---- |
163 | rdft2d(n1, n2, 1, a, t, ip, w); |
164 | rdft2dsort(n1, n2, 1, a); |
165 | // stored data is a[0...n1-1][0...n2+1]: |
166 | // a[k1][2*k2] = R[k1][k2], |
167 | // a[k1][2*k2+1] = I[k1][k2], |
168 | // 0<=k1<n1, 0<=k2<=n2/2. |
169 | // the stored data is larger than the input data! |
170 | ---- input ordering ---- |
171 | rdft2dsort(n1, n2, -1, a); |
172 | rdft2d(n1, n2, -1, a, t, ip, w); |
173 | t[0...*] |
174 | :work area (double *) |
175 | length of t >= 8*n1, if single thread, |
176 | length of t >= 8*n1*FFT2D_MAX_THREADS, if multi threads, |
177 | t is dynamically allocated, if t == NULL. |
178 | ip[0...*] |
179 | :work area for bit reversal (int *) |
180 | length of ip >= 2+sqrt(n) |
181 | (n = max(n1, n2/2)) |
182 | ip[0],ip[1] are pointers of the cos/sin table. |
183 | w[0...*] |
184 | :cos/sin table (double *) |
185 | length of w >= max(n1/2, n2/4) + n2/4 |
186 | w[],ip[] are initialized if ip[0] == 0. |
187 | [remark] |
188 | Inverse of |
189 | rdft2d(n1, n2, 1, a, t, ip, w); |
190 | is |
191 | rdft2d(n1, n2, -1, a, t, ip, w); |
192 | for (j1 = 0; j1 <= n1 - 1; j1++) { |
193 | for (j2 = 0; j2 <= n2 - 1; j2++) { |
194 | a[j1][j2] *= 2.0 / n1 / n2; |
195 | } |
196 | } |
197 | . |
198 | |
199 | |
200 | -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- |
201 | [definition] |
202 | <case1> IDCT (excluding scale) |
203 | C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * |
204 | cos(pi*j1*(k1+1/2)/n1) * |
205 | cos(pi*j2*(k2+1/2)/n2), |
206 | 0<=k1<n1, 0<=k2<n2 |
207 | <case2> DCT |
208 | C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * |
209 | cos(pi*(j1+1/2)*k1/n1) * |
210 | cos(pi*(j2+1/2)*k2/n2), |
211 | 0<=k1<n1, 0<=k2<n2 |
212 | [usage] |
213 | <case1> |
214 | ip[0] = 0; // first time only |
215 | ddct2d(n1, n2, 1, a, t, ip, w); |
216 | <case2> |
217 | ip[0] = 0; // first time only |
218 | ddct2d(n1, n2, -1, a, t, ip, w); |
219 | [parameters] |
220 | n1 :data length (int) |
221 | n1 >= 2, n1 = power of 2 |
222 | n2 :data length (int) |
223 | n2 >= 2, n2 = power of 2 |
224 | a[0...n1-1][0...n2-1] |
225 | :input/output data (double **) |
226 | output data |
227 | a[k1][k2] = C[k1][k2], 0<=k1<n1, 0<=k2<n2 |
228 | t[0...*] |
229 | :work area (double *) |
230 | length of t >= 4*n1, if single thread, |
231 | length of t >= 4*n1*FFT2D_MAX_THREADS, if multi threads, |
232 | t is dynamically allocated, if t == NULL. |
233 | ip[0...*] |
234 | :work area for bit reversal (int *) |
235 | length of ip >= 2+sqrt(n) |
236 | (n = max(n1/2, n2/2)) |
237 | ip[0],ip[1] are pointers of the cos/sin table. |
238 | w[0...*] |
239 | :cos/sin table (double *) |
240 | length of w >= max(n1*3/2, n2*3/2) |
241 | w[],ip[] are initialized if ip[0] == 0. |
242 | [remark] |
243 | Inverse of |
244 | ddct2d(n1, n2, -1, a, t, ip, w); |
245 | is |
246 | for (j1 = 0; j1 <= n1 - 1; j1++) { |
247 | a[j1][0] *= 0.5; |
248 | } |
249 | for (j2 = 0; j2 <= n2 - 1; j2++) { |
250 | a[0][j2] *= 0.5; |
251 | } |
252 | ddct2d(n1, n2, 1, a, t, ip, w); |
253 | for (j1 = 0; j1 <= n1 - 1; j1++) { |
254 | for (j2 = 0; j2 <= n2 - 1; j2++) { |
255 | a[j1][j2] *= 4.0 / n1 / n2; |
256 | } |
257 | } |
258 | . |
259 | |
260 | |
261 | -------- DST (Discrete Sine Transform) / Inverse of DST -------- |
262 | [definition] |
263 | <case1> IDST (excluding scale) |
264 | S[k1][k2] = sum_j1=1^n1 sum_j2=1^n2 A[j1][j2] * |
265 | sin(pi*j1*(k1+1/2)/n1) * |
266 | sin(pi*j2*(k2+1/2)/n2), |
267 | 0<=k1<n1, 0<=k2<n2 |
268 | <case2> DST |
269 | S[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * |
270 | sin(pi*(j1+1/2)*k1/n1) * |
271 | sin(pi*(j2+1/2)*k2/n2), |
272 | 0<k1<=n1, 0<k2<=n2 |
273 | [usage] |
274 | <case1> |
275 | ip[0] = 0; // first time only |
276 | ddst2d(n1, n2, 1, a, t, ip, w); |
277 | <case2> |
278 | ip[0] = 0; // first time only |
279 | ddst2d(n1, n2, -1, a, t, ip, w); |
280 | [parameters] |
281 | n1 :data length (int) |
282 | n1 >= 2, n1 = power of 2 |
283 | n2 :data length (int) |
284 | n2 >= 2, n2 = power of 2 |
285 | a[0...n1-1][0...n2-1] |
286 | :input/output data (double **) |
287 | <case1> |
288 | input data |
289 | a[j1][j2] = A[j1][j2], 0<j1<n1, 0<j2<n2, |
290 | a[j1][0] = A[j1][n2], 0<j1<n1, |
291 | a[0][j2] = A[n1][j2], 0<j2<n2, |
292 | a[0][0] = A[n1][n2] |
293 | (i.e. A[j1][j2] = a[j1 % n1][j2 % n2]) |
294 | output data |
295 | a[k1][k2] = S[k1][k2], 0<=k1<n1, 0<=k2<n2 |
296 | <case2> |
297 | output data |
298 | a[k1][k2] = S[k1][k2], 0<k1<n1, 0<k2<n2, |
299 | a[k1][0] = S[k1][n2], 0<k1<n1, |
300 | a[0][k2] = S[n1][k2], 0<k2<n2, |
301 | a[0][0] = S[n1][n2] |
302 | (i.e. S[k1][k2] = a[k1 % n1][k2 % n2]) |
303 | t[0...*] |
304 | :work area (double *) |
305 | length of t >= 4*n1, if single thread, |
306 | length of t >= 4*n1*FFT2D_MAX_THREADS, if multi threads, |
307 | t is dynamically allocated, if t == NULL. |
308 | ip[0...*] |
309 | :work area for bit reversal (int *) |
310 | length of ip >= 2+sqrt(n) |
311 | (n = max(n1/2, n2/2)) |
312 | ip[0],ip[1] are pointers of the cos/sin table. |
313 | w[0...*] |
314 | :cos/sin table (double *) |
315 | length of w >= max(n1*3/2, n2*3/2) |
316 | w[],ip[] are initialized if ip[0] == 0. |
317 | [remark] |
318 | Inverse of |
319 | ddst2d(n1, n2, -1, a, t, ip, w); |
320 | is |
321 | for (j1 = 0; j1 <= n1 - 1; j1++) { |
322 | a[j1][0] *= 0.5; |
323 | } |
324 | for (j2 = 0; j2 <= n2 - 1; j2++) { |
325 | a[0][j2] *= 0.5; |
326 | } |
327 | ddst2d(n1, n2, 1, a, t, ip, w); |
328 | for (j1 = 0; j1 <= n1 - 1; j1++) { |
329 | for (j2 = 0; j2 <= n2 - 1; j2++) { |
330 | a[j1][j2] *= 4.0 / n1 / n2; |
331 | } |
332 | } |
333 | . |
334 | */ |
335 | |
336 | |
337 | #include <stdio.h> |
338 | #include <stdlib.h> |
339 | #define fft2d_alloc_error_check(p) { \ |
340 | if ((p) == NULL) { \ |
341 | fprintf(stderr, "fft2d memory allocation error\n"); \ |
342 | exit(1); \ |
343 | } \ |
344 | } |
345 | |
346 | |
347 | #ifdef USE_FFT2D_PTHREADS |
348 | #define USE_FFT2D_THREADS |
349 | #ifndef FFT2D_MAX_THREADS |
350 | #define FFT2D_MAX_THREADS 4 |
351 | #endif |
352 | #ifndef FFT2D_THREADS_BEGIN_N |
353 | #define FFT2D_THREADS_BEGIN_N 65536 |
354 | #endif |
355 | #include <pthread.h> |
356 | #define fft2d_thread_t pthread_t |
357 | #define fft2d_thread_create(thp,func,argp) { \ |
358 | if (pthread_create(thp, NULL, func, (void *) (argp)) != 0) { \ |
359 | fprintf(stderr, "fft2d thread error\n"); \ |
360 | exit(1); \ |
361 | } \ |
362 | } |
363 | #define fft2d_thread_wait(th) { \ |
364 | if (pthread_join(th, NULL) != 0) { \ |
365 | fprintf(stderr, "fft2d thread error\n"); \ |
366 | exit(1); \ |
367 | } \ |
368 | } |
369 | #endif /* USE_FFT2D_PTHREADS */ |
370 | |
371 | |
372 | #ifdef USE_FFT2D_WINTHREADS |
373 | #define USE_FFT2D_THREADS |
374 | #ifndef FFT2D_MAX_THREADS |
375 | #define FFT2D_MAX_THREADS 4 |
376 | #endif |
377 | #ifndef FFT2D_THREADS_BEGIN_N |
378 | #define FFT2D_THREADS_BEGIN_N 131072 |
379 | #endif |
380 | #include <windows.h> |
381 | #define fft2d_thread_t HANDLE |
382 | #define fft2d_thread_create(thp,func,argp) { \ |
383 | DWORD thid; \ |
384 | *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) (func), (LPVOID) (argp), 0, &thid); \ |
385 | if (*(thp) == 0) { \ |
386 | fprintf(stderr, "fft2d thread error\n"); \ |
387 | exit(1); \ |
388 | } \ |
389 | } |
390 | #define fft2d_thread_wait(th) { \ |
391 | WaitForSingleObject(th, INFINITE); \ |
392 | CloseHandle(th); \ |
393 | } |
394 | #endif /* USE_FFT2D_WINTHREADS */ |
395 | |
396 | |
397 | void cdft2d(int n1, int n2, int isgn, double **a, double *t, |
398 | int *ip, double *w) |
399 | { |
400 | void makewt(int nw, int *ip, double *w); |
401 | void cdft(int n, int isgn, double *a, int *ip, double *w); |
402 | void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t, |
403 | int *ip, double *w); |
404 | #ifdef USE_FFT2D_THREADS |
405 | void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a, |
406 | int *ip, double *w); |
407 | void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t, |
408 | int *ip, double *w); |
409 | #endif /* USE_FFT2D_THREADS */ |
410 | int n, itnull, nthread, nt, i; |
411 | |
412 | n = n1 << 1; |
413 | if (n < n2) { |
414 | n = n2; |
415 | } |
416 | if (n > (ip[0] << 2)) { |
417 | makewt(n >> 2, ip, w); |
418 | } |
419 | itnull = 0; |
420 | if (t == NULL) { |
421 | itnull = 1; |
422 | nthread = 1; |
423 | #ifdef USE_FFT2D_THREADS |
424 | nthread = FFT2D_MAX_THREADS; |
425 | #endif /* USE_FFT2D_THREADS */ |
426 | nt = 8 * nthread * n1; |
427 | if (n2 == 4 * nthread) { |
428 | nt >>= 1; |
429 | } else if (n2 < 4 * nthread) { |
430 | nt >>= 2; |
431 | } |
432 | t = (double *) malloc(sizeof(double) * nt); |
433 | fft2d_alloc_error_check(t); |
434 | } |
435 | #ifdef USE_FFT2D_THREADS |
436 | if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { |
437 | xdft2d0_subth(n1, n2, 0, isgn, a, ip, w); |
438 | cdft2d_subth(n1, n2, isgn, a, t, ip, w); |
439 | } else |
440 | #endif /* USE_FFT2D_THREADS */ |
441 | { |
442 | for (i = 0; i < n1; i++) { |
443 | cdft(n2, isgn, a[i], ip, w); |
444 | } |
445 | cdft2d_sub(n1, n2, isgn, a, t, ip, w); |
446 | } |
447 | if (itnull != 0) { |
448 | free(t); |
449 | } |
450 | } |
451 | |
452 | |
453 | void rdft2d(int n1, int n2, int isgn, double **a, double *t, |
454 | int *ip, double *w) |
455 | { |
456 | void makewt(int nw, int *ip, double *w); |
457 | void makect(int nc, int *ip, double *c); |
458 | void rdft(int n, int isgn, double *a, int *ip, double *w); |
459 | void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t, |
460 | int *ip, double *w); |
461 | void rdft2d_sub(int n1, int n2, int isgn, double **a); |
462 | #ifdef USE_FFT2D_THREADS |
463 | void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a, |
464 | int *ip, double *w); |
465 | void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t, |
466 | int *ip, double *w); |
467 | #endif /* USE_FFT2D_THREADS */ |
468 | int n, nw, nc, itnull, nthread, nt, i; |
469 | |
470 | n = n1 << 1; |
471 | if (n < n2) { |
472 | n = n2; |
473 | } |
474 | nw = ip[0]; |
475 | if (n > (nw << 2)) { |
476 | nw = n >> 2; |
477 | makewt(nw, ip, w); |
478 | } |
479 | nc = ip[1]; |
480 | if (n2 > (nc << 2)) { |
481 | nc = n2 >> 2; |
482 | makect(nc, ip, w + nw); |
483 | } |
484 | itnull = 0; |
485 | if (t == NULL) { |
486 | itnull = 1; |
487 | nthread = 1; |
488 | #ifdef USE_FFT2D_THREADS |
489 | nthread = FFT2D_MAX_THREADS; |
490 | #endif /* USE_FFT2D_THREADS */ |
491 | nt = 8 * nthread * n1; |
492 | if (n2 == 4 * nthread) { |
493 | nt >>= 1; |
494 | } else if (n2 < 4 * nthread) { |
495 | nt >>= 2; |
496 | } |
497 | t = (double *) malloc(sizeof(double) * nt); |
498 | fft2d_alloc_error_check(t); |
499 | } |
500 | #ifdef USE_FFT2D_THREADS |
501 | if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { |
502 | if (isgn < 0) { |
503 | rdft2d_sub(n1, n2, isgn, a); |
504 | cdft2d_subth(n1, n2, isgn, a, t, ip, w); |
505 | } |
506 | xdft2d0_subth(n1, n2, 1, isgn, a, ip, w); |
507 | if (isgn >= 0) { |
508 | cdft2d_subth(n1, n2, isgn, a, t, ip, w); |
509 | rdft2d_sub(n1, n2, isgn, a); |
510 | } |
511 | } else |
512 | #endif /* USE_FFT2D_THREADS */ |
513 | { |
514 | if (isgn < 0) { |
515 | rdft2d_sub(n1, n2, isgn, a); |
516 | cdft2d_sub(n1, n2, isgn, a, t, ip, w); |
517 | } |
518 | for (i = 0; i < n1; i++) { |
519 | rdft(n2, isgn, a[i], ip, w); |
520 | } |
521 | if (isgn >= 0) { |
522 | cdft2d_sub(n1, n2, isgn, a, t, ip, w); |
523 | rdft2d_sub(n1, n2, isgn, a); |
524 | } |
525 | } |
526 | if (itnull != 0) { |
527 | free(t); |
528 | } |
529 | } |
530 | |
531 | |
532 | void rdft2dsort(int n1, int n2, int isgn, double **a) |
533 | { |
534 | int n1h, i; |
535 | double x, y; |
536 | |
537 | n1h = n1 >> 1; |
538 | if (isgn < 0) { |
539 | for (i = n1h + 1; i < n1; i++) { |
540 | a[i][0] = a[i][n2 + 1]; |
541 | a[i][1] = a[i][n2]; |
542 | } |
543 | a[0][1] = a[0][n2]; |
544 | a[n1h][1] = a[n1h][n2]; |
545 | } else { |
546 | for (i = n1h + 1; i < n1; i++) { |
547 | y = a[i][0]; |
548 | x = a[i][1]; |
549 | a[i][n2] = x; |
550 | a[i][n2 + 1] = y; |
551 | a[n1 - i][n2] = x; |
552 | a[n1 - i][n2 + 1] = -y; |
553 | a[i][0] = a[n1 - i][0]; |
554 | a[i][1] = -a[n1 - i][1]; |
555 | } |
556 | a[0][n2] = a[0][1]; |
557 | a[0][n2 + 1] = 0; |
558 | a[0][1] = 0; |
559 | a[n1h][n2] = a[n1h][1]; |
560 | a[n1h][n2 + 1] = 0; |
561 | a[n1h][1] = 0; |
562 | } |
563 | } |
564 | |
565 | |
566 | void ddct2d(int n1, int n2, int isgn, double **a, double *t, |
567 | int *ip, double *w) |
568 | { |
569 | void makewt(int nw, int *ip, double *w); |
570 | void makect(int nc, int *ip, double *c); |
571 | void ddct(int n, int isgn, double *a, int *ip, double *w); |
572 | void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a, |
573 | double *t, int *ip, double *w); |
574 | #ifdef USE_FFT2D_THREADS |
575 | void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a, |
576 | int *ip, double *w); |
577 | void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a, |
578 | double *t, int *ip, double *w); |
579 | #endif /* USE_FFT2D_THREADS */ |
580 | int n, nw, nc, itnull, nthread, nt, i; |
581 | |
582 | n = n1; |
583 | if (n < n2) { |
584 | n = n2; |
585 | } |
586 | nw = ip[0]; |
587 | if (n > (nw << 2)) { |
588 | nw = n >> 2; |
589 | makewt(nw, ip, w); |
590 | } |
591 | nc = ip[1]; |
592 | if (n > nc) { |
593 | nc = n; |
594 | makect(nc, ip, w + nw); |
595 | } |
596 | itnull = 0; |
597 | if (t == NULL) { |
598 | itnull = 1; |
599 | nthread = 1; |
600 | #ifdef USE_FFT2D_THREADS |
601 | nthread = FFT2D_MAX_THREADS; |
602 | #endif /* USE_FFT2D_THREADS */ |
603 | nt = 4 * nthread * n1; |
604 | if (n2 == 2 * nthread) { |
605 | nt >>= 1; |
606 | } else if (n2 < 2 * nthread) { |
607 | nt >>= 2; |
608 | } |
609 | t = (double *) malloc(sizeof(double) * nt); |
610 | fft2d_alloc_error_check(t); |
611 | } |
612 | #ifdef USE_FFT2D_THREADS |
613 | if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { |
614 | ddxt2d0_subth(n1, n2, 0, isgn, a, ip, w); |
615 | ddxt2d_subth(n1, n2, 0, isgn, a, t, ip, w); |
616 | } else |
617 | #endif /* USE_FFT2D_THREADS */ |
618 | { |
619 | for (i = 0; i < n1; i++) { |
620 | ddct(n2, isgn, a[i], ip, w); |
621 | } |
622 | ddxt2d_sub(n1, n2, 0, isgn, a, t, ip, w); |
623 | } |
624 | if (itnull != 0) { |
625 | free(t); |
626 | } |
627 | } |
628 | |
629 | |
630 | void ddst2d(int n1, int n2, int isgn, double **a, double *t, |
631 | int *ip, double *w) |
632 | { |
633 | void makewt(int nw, int *ip, double *w); |
634 | void makect(int nc, int *ip, double *c); |
635 | void ddst(int n, int isgn, double *a, int *ip, double *w); |
636 | void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a, |
637 | double *t, int *ip, double *w); |
638 | #ifdef USE_FFT2D_THREADS |
639 | void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a, |
640 | int *ip, double *w); |
641 | void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a, |
642 | double *t, int *ip, double *w); |
643 | #endif /* USE_FFT2D_THREADS */ |
644 | int n, nw, nc, itnull, nthread, nt, i; |
645 | |
646 | n = n1; |
647 | if (n < n2) { |
648 | n = n2; |
649 | } |
650 | nw = ip[0]; |
651 | if (n > (nw << 2)) { |
652 | nw = n >> 2; |
653 | makewt(nw, ip, w); |
654 | } |
655 | nc = ip[1]; |
656 | if (n > nc) { |
657 | nc = n; |
658 | makect(nc, ip, w + nw); |
659 | } |
660 | itnull = 0; |
661 | if (t == NULL) { |
662 | itnull = 1; |
663 | nthread = 1; |
664 | #ifdef USE_FFT2D_THREADS |
665 | nthread = FFT2D_MAX_THREADS; |
666 | #endif /* USE_FFT2D_THREADS */ |
667 | nt = 4 * nthread * n1; |
668 | if (n2 == 2 * nthread) { |
669 | nt >>= 1; |
670 | } else if (n2 < 2 * nthread) { |
671 | nt >>= 2; |
672 | } |
673 | t = (double *) malloc(sizeof(double) * nt); |
674 | fft2d_alloc_error_check(t); |
675 | } |
676 | #ifdef USE_FFT2D_THREADS |
677 | if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { |
678 | ddxt2d0_subth(n1, n2, 1, isgn, a, ip, w); |
679 | ddxt2d_subth(n1, n2, 1, isgn, a, t, ip, w); |
680 | } else |
681 | #endif /* USE_FFT2D_THREADS */ |
682 | { |
683 | for (i = 0; i < n1; i++) { |
684 | ddst(n2, isgn, a[i], ip, w); |
685 | } |
686 | ddxt2d_sub(n1, n2, 1, isgn, a, t, ip, w); |
687 | } |
688 | if (itnull != 0) { |
689 | free(t); |
690 | } |
691 | } |
692 | |
693 | |
694 | /* -------- child routines -------- */ |
695 | |
696 | |
697 | void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t, |
698 | int *ip, double *w) |
699 | { |
700 | void cdft(int n, int isgn, double *a, int *ip, double *w); |
701 | int i, j; |
702 | |
703 | if (n2 > 4) { |
704 | for (j = 0; j < n2; j += 8) { |
705 | for (i = 0; i < n1; i++) { |
706 | t[2 * i] = a[i][j]; |
707 | t[2 * i + 1] = a[i][j + 1]; |
708 | t[2 * n1 + 2 * i] = a[i][j + 2]; |
709 | t[2 * n1 + 2 * i + 1] = a[i][j + 3]; |
710 | t[4 * n1 + 2 * i] = a[i][j + 4]; |
711 | t[4 * n1 + 2 * i + 1] = a[i][j + 5]; |
712 | t[6 * n1 + 2 * i] = a[i][j + 6]; |
713 | t[6 * n1 + 2 * i + 1] = a[i][j + 7]; |
714 | } |
715 | cdft(2 * n1, isgn, t, ip, w); |
716 | cdft(2 * n1, isgn, &t[2 * n1], ip, w); |
717 | cdft(2 * n1, isgn, &t[4 * n1], ip, w); |
718 | cdft(2 * n1, isgn, &t[6 * n1], ip, w); |
719 | for (i = 0; i < n1; i++) { |
720 | a[i][j] = t[2 * i]; |
721 | a[i][j + 1] = t[2 * i + 1]; |
722 | a[i][j + 2] = t[2 * n1 + 2 * i]; |
723 | a[i][j + 3] = t[2 * n1 + 2 * i + 1]; |
724 | a[i][j + 4] = t[4 * n1 + 2 * i]; |
725 | a[i][j + 5] = t[4 * n1 + 2 * i + 1]; |
726 | a[i][j + 6] = t[6 * n1 + 2 * i]; |
727 | a[i][j + 7] = t[6 * n1 + 2 * i + 1]; |
728 | } |
729 | } |
730 | } else if (n2 == 4) { |
731 | for (i = 0; i < n1; i++) { |
732 | t[2 * i] = a[i][0]; |
733 | t[2 * i + 1] = a[i][1]; |
734 | t[2 * n1 + 2 * i] = a[i][2]; |
735 | t[2 * n1 + 2 * i + 1] = a[i][3]; |
736 | } |
737 | cdft(2 * n1, isgn, t, ip, w); |
738 | cdft(2 * n1, isgn, &t[2 * n1], ip, w); |
739 | for (i = 0; i < n1; i++) { |
740 | a[i][0] = t[2 * i]; |
741 | a[i][1] = t[2 * i + 1]; |
742 | a[i][2] = t[2 * n1 + 2 * i]; |
743 | a[i][3] = t[2 * n1 + 2 * i + 1]; |
744 | } |
745 | } else if (n2 == 2) { |
746 | for (i = 0; i < n1; i++) { |
747 | t[2 * i] = a[i][0]; |
748 | t[2 * i + 1] = a[i][1]; |
749 | } |
750 | cdft(2 * n1, isgn, t, ip, w); |
751 | for (i = 0; i < n1; i++) { |
752 | a[i][0] = t[2 * i]; |
753 | a[i][1] = t[2 * i + 1]; |
754 | } |
755 | } |
756 | } |
757 | |
758 | |
759 | void rdft2d_sub(int n1, int n2, int isgn, double **a) |
760 | { |
761 | int n1h, i, j; |
762 | double xi; |
763 | |
764 | n1h = n1 >> 1; |
765 | if (isgn < 0) { |
766 | for (i = 1; i < n1h; i++) { |
767 | j = n1 - i; |
768 | xi = a[i][0] - a[j][0]; |
769 | a[i][0] += a[j][0]; |
770 | a[j][0] = xi; |
771 | xi = a[j][1] - a[i][1]; |
772 | a[i][1] += a[j][1]; |
773 | a[j][1] = xi; |
774 | } |
775 | } else { |
776 | for (i = 1; i < n1h; i++) { |
777 | j = n1 - i; |
778 | a[j][0] = 0.5 * (a[i][0] - a[j][0]); |
779 | a[i][0] -= a[j][0]; |
780 | a[j][1] = 0.5 * (a[i][1] + a[j][1]); |
781 | a[i][1] -= a[j][1]; |
782 | } |
783 | } |
784 | } |
785 | |
786 | |
787 | void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a, |
788 | double *t, int *ip, double *w) |
789 | { |
790 | void ddct(int n, int isgn, double *a, int *ip, double *w); |
791 | void ddst(int n, int isgn, double *a, int *ip, double *w); |
792 | int i, j; |
793 | |
794 | if (n2 > 2) { |
795 | for (j = 0; j < n2; j += 4) { |
796 | for (i = 0; i < n1; i++) { |
797 | t[i] = a[i][j]; |
798 | t[n1 + i] = a[i][j + 1]; |
799 | t[2 * n1 + i] = a[i][j + 2]; |
800 | t[3 * n1 + i] = a[i][j + 3]; |
801 | } |
802 | if (ics == 0) { |
803 | ddct(n1, isgn, t, ip, w); |
804 | ddct(n1, isgn, &t[n1], ip, w); |
805 | ddct(n1, isgn, &t[2 * n1], ip, w); |
806 | ddct(n1, isgn, &t[3 * n1], ip, w); |
807 | } else { |
808 | ddst(n1, isgn, t, ip, w); |
809 | ddst(n1, isgn, &t[n1], ip, w); |
810 | ddst(n1, isgn, &t[2 * n1], ip, w); |
811 | ddst(n1, isgn, &t[3 * n1], ip, w); |
812 | } |
813 | for (i = 0; i < n1; i++) { |
814 | a[i][j] = t[i]; |
815 | a[i][j + 1] = t[n1 + i]; |
816 | a[i][j + 2] = t[2 * n1 + i]; |
817 | a[i][j + 3] = t[3 * n1 + i]; |
818 | } |
819 | } |
820 | } else if (n2 == 2) { |
821 | for (i = 0; i < n1; i++) { |
822 | t[i] = a[i][0]; |
823 | t[n1 + i] = a[i][1]; |
824 | } |
825 | if (ics == 0) { |
826 | ddct(n1, isgn, t, ip, w); |
827 | ddct(n1, isgn, &t[n1], ip, w); |
828 | } else { |
829 | ddst(n1, isgn, t, ip, w); |
830 | ddst(n1, isgn, &t[n1], ip, w); |
831 | } |
832 | for (i = 0; i < n1; i++) { |
833 | a[i][0] = t[i]; |
834 | a[i][1] = t[n1 + i]; |
835 | } |
836 | } |
837 | } |
838 | |
839 | |
840 | #ifdef USE_FFT2D_THREADS |
841 | struct fft2d_arg_st { |
842 | int nthread; |
843 | int n0; |
844 | int n1; |
845 | int n2; |
846 | int ic; |
847 | int isgn; |
848 | double **a; |
849 | double *t; |
850 | int *ip; |
851 | double *w; |
852 | }; |
853 | typedef struct fft2d_arg_st fft2d_arg_t; |
854 | |
855 | |
856 | void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a, |
857 | int *ip, double *w) |
858 | { |
859 | void *xdft2d0_th(void *p); |
860 | fft2d_thread_t th[FFT2D_MAX_THREADS]; |
861 | fft2d_arg_t ag[FFT2D_MAX_THREADS]; |
862 | int nthread, i; |
863 | |
864 | nthread = FFT2D_MAX_THREADS; |
865 | if (nthread > n1) { |
866 | nthread = n1; |
867 | } |
868 | for (i = 0; i < nthread; i++) { |
869 | ag[i].nthread = nthread; |
870 | ag[i].n0 = i; |
871 | ag[i].n1 = n1; |
872 | ag[i].n2 = n2; |
873 | ag[i].ic = icr; |
874 | ag[i].isgn = isgn; |
875 | ag[i].a = a; |
876 | ag[i].ip = ip; |
877 | ag[i].w = w; |
878 | fft2d_thread_create(&th[i], xdft2d0_th, &ag[i]); |
879 | } |
880 | for (i = 0; i < nthread; i++) { |
881 | fft2d_thread_wait(th[i]); |
882 | } |
883 | } |
884 | |
885 | |
886 | void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t, |
887 | int *ip, double *w) |
888 | { |
889 | void *cdft2d_th(void *p); |
890 | fft2d_thread_t th[FFT2D_MAX_THREADS]; |
891 | fft2d_arg_t ag[FFT2D_MAX_THREADS]; |
892 | int nthread, nt, i; |
893 | |
894 | nthread = FFT2D_MAX_THREADS; |
895 | nt = 8 * n1; |
896 | if (n2 == 4 * FFT2D_MAX_THREADS) { |
897 | nt >>= 1; |
898 | } else if (n2 < 4 * FFT2D_MAX_THREADS) { |
899 | nthread = n2 >> 1; |
900 | nt >>= 2; |
901 | } |
902 | for (i = 0; i < nthread; i++) { |
903 | ag[i].nthread = nthread; |
904 | ag[i].n0 = i; |
905 | ag[i].n1 = n1; |
906 | ag[i].n2 = n2; |
907 | ag[i].isgn = isgn; |
908 | ag[i].a = a; |
909 | ag[i].t = &t[nt * i]; |
910 | ag[i].ip = ip; |
911 | ag[i].w = w; |
912 | fft2d_thread_create(&th[i], cdft2d_th, &ag[i]); |
913 | } |
914 | for (i = 0; i < nthread; i++) { |
915 | fft2d_thread_wait(th[i]); |
916 | } |
917 | } |
918 | |
919 | |
920 | void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a, |
921 | int *ip, double *w) |
922 | { |
923 | void *ddxt2d0_th(void *p); |
924 | fft2d_thread_t th[FFT2D_MAX_THREADS]; |
925 | fft2d_arg_t ag[FFT2D_MAX_THREADS]; |
926 | int nthread, i; |
927 | |
928 | nthread = FFT2D_MAX_THREADS; |
929 | if (nthread > n1) { |
930 | nthread = n1; |
931 | } |
932 | for (i = 0; i < nthread; i++) { |
933 | ag[i].nthread = nthread; |
934 | ag[i].n0 = i; |
935 | ag[i].n1 = n1; |
936 | ag[i].n2 = n2; |
937 | ag[i].ic = ics; |
938 | ag[i].isgn = isgn; |
939 | ag[i].a = a; |
940 | ag[i].ip = ip; |
941 | ag[i].w = w; |
942 | fft2d_thread_create(&th[i], ddxt2d0_th, &ag[i]); |
943 | } |
944 | for (i = 0; i < nthread; i++) { |
945 | fft2d_thread_wait(th[i]); |
946 | } |
947 | } |
948 | |
949 | |
950 | void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a, |
951 | double *t, int *ip, double *w) |
952 | { |
953 | void *ddxt2d_th(void *p); |
954 | fft2d_thread_t th[FFT2D_MAX_THREADS]; |
955 | fft2d_arg_t ag[FFT2D_MAX_THREADS]; |
956 | int nthread, nt, i; |
957 | |
958 | nthread = FFT2D_MAX_THREADS; |
959 | nt = 4 * n1; |
960 | if (n2 == 2 * FFT2D_MAX_THREADS) { |
961 | nt >>= 1; |
962 | } else if (n2 < 2 * FFT2D_MAX_THREADS) { |
963 | nthread = n2; |
964 | nt >>= 2; |
965 | } |
966 | for (i = 0; i < nthread; i++) { |
967 | ag[i].nthread = nthread; |
968 | ag[i].n0 = i; |
969 | ag[i].n1 = n1; |
970 | ag[i].n2 = n2; |
971 | ag[i].ic = ics; |
972 | ag[i].isgn = isgn; |
973 | ag[i].a = a; |
974 | ag[i].t = &t[nt * i]; |
975 | ag[i].ip = ip; |
976 | ag[i].w = w; |
977 | fft2d_thread_create(&th[i], ddxt2d_th, &ag[i]); |
978 | } |
979 | for (i = 0; i < nthread; i++) { |
980 | fft2d_thread_wait(th[i]); |
981 | } |
982 | } |
983 | |
984 | |
985 | void *xdft2d0_th(void *p) |
986 | { |
987 | void cdft(int n, int isgn, double *a, int *ip, double *w); |
988 | void rdft(int n, int isgn, double *a, int *ip, double *w); |
989 | int nthread, n0, n1, n2, icr, isgn, *ip, i; |
990 | double **a, *w; |
991 | |
992 | nthread = ((fft2d_arg_t *) p)->nthread; |
993 | n0 = ((fft2d_arg_t *) p)->n0; |
994 | n1 = ((fft2d_arg_t *) p)->n1; |
995 | n2 = ((fft2d_arg_t *) p)->n2; |
996 | icr = ((fft2d_arg_t *) p)->ic; |
997 | isgn = ((fft2d_arg_t *) p)->isgn; |
998 | a = ((fft2d_arg_t *) p)->a; |
999 | ip = ((fft2d_arg_t *) p)->ip; |
1000 | w = ((fft2d_arg_t *) p)->w; |
1001 | if (icr == 0) { |
1002 | for (i = n0; i < n1; i += nthread) { |
1003 | cdft(n2, isgn, a[i], ip, w); |
1004 | } |
1005 | } else { |
1006 | for (i = n0; i < n1; i += nthread) { |
1007 | rdft(n2, isgn, a[i], ip, w); |
1008 | } |
1009 | } |
1010 | return (void *) 0; |
1011 | } |
1012 | |
1013 | |
1014 | void *cdft2d_th(void *p) |
1015 | { |
1016 | void cdft(int n, int isgn, double *a, int *ip, double *w); |
1017 | int nthread, n0, n1, n2, isgn, *ip, i, j; |
1018 | double **a, *t, *w; |
1019 | |
1020 | nthread = ((fft2d_arg_t *) p)->nthread; |
1021 | n0 = ((fft2d_arg_t *) p)->n0; |
1022 | n1 = ((fft2d_arg_t *) p)->n1; |
1023 | n2 = ((fft2d_arg_t *) p)->n2; |
1024 | isgn = ((fft2d_arg_t *) p)->isgn; |
1025 | a = ((fft2d_arg_t *) p)->a; |
1026 | t = ((fft2d_arg_t *) p)->t; |
1027 | ip = ((fft2d_arg_t *) p)->ip; |
1028 | w = ((fft2d_arg_t *) p)->w; |
1029 | if (n2 > 4 * nthread) { |
1030 | for (j = 8 * n0; j < n2; j += 8 * nthread) { |
1031 | for (i = 0; i < n1; i++) { |
1032 | t[2 * i] = a[i][j]; |
1033 | t[2 * i + 1] = a[i][j + 1]; |
1034 | t[2 * n1 + 2 * i] = a[i][j + 2]; |
1035 | t[2 * n1 + 2 * i + 1] = a[i][j + 3]; |
1036 | t[4 * n1 + 2 * i] = a[i][j + 4]; |
1037 | t[4 * n1 + 2 * i + 1] = a[i][j + 5]; |
1038 | t[6 * n1 + 2 * i] = a[i][j + 6]; |
1039 | t[6 * n1 + 2 * i + 1] = a[i][j + 7]; |
1040 | } |
1041 | cdft(2 * n1, isgn, t, ip, w); |
1042 | cdft(2 * n1, isgn, &t[2 * n1], ip, w); |
1043 | cdft(2 * n1, isgn, &t[4 * n1], ip, w); |
1044 | cdft(2 * n1, isgn, &t[6 * n1], ip, w); |
1045 | for (i = 0; i < n1; i++) { |
1046 | a[i][j] = t[2 * i]; |
1047 | a[i][j + 1] = t[2 * i + 1]; |
1048 | a[i][j + 2] = t[2 * n1 + 2 * i]; |
1049 | a[i][j + 3] = t[2 * n1 + 2 * i + 1]; |
1050 | a[i][j + 4] = t[4 * n1 + 2 * i]; |
1051 | a[i][j + 5] = t[4 * n1 + 2 * i + 1]; |
1052 | a[i][j + 6] = t[6 * n1 + 2 * i]; |
1053 | a[i][j + 7] = t[6 * n1 + 2 * i + 1]; |
1054 | } |
1055 | } |
1056 | } else if (n2 == 4 * nthread) { |
1057 | for (i = 0; i < n1; i++) { |
1058 | t[2 * i] = a[i][4 * n0]; |
1059 | t[2 * i + 1] = a[i][4 * n0 + 1]; |
1060 | t[2 * n1 + 2 * i] = a[i][4 * n0 + 2]; |
1061 | t[2 * n1 + 2 * i + 1] = a[i][4 * n0 + 3]; |
1062 | } |
1063 | cdft(2 * n1, isgn, t, ip, w); |
1064 | cdft(2 * n1, isgn, &t[2 * n1], ip, w); |
1065 | for (i = 0; i < n1; i++) { |
1066 | a[i][4 * n0] = t[2 * i]; |
1067 | a[i][4 * n0 + 1] = t[2 * i + 1]; |
1068 | a[i][4 * n0 + 2] = t[2 * n1 + 2 * i]; |
1069 | a[i][4 * n0 + 3] = t[2 * n1 + 2 * i + 1]; |
1070 | } |
1071 | } else if (n2 == 2 * nthread) { |
1072 | for (i = 0; i < n1; i++) { |
1073 | t[2 * i] = a[i][2 * n0]; |
1074 | t[2 * i + 1] = a[i][2 * n0 + 1]; |
1075 | } |
1076 | cdft(2 * n1, isgn, t, ip, w); |
1077 | for (i = 0; i < n1; i++) { |
1078 | a[i][2 * n0] = t[2 * i]; |
1079 | a[i][2 * n0 + 1] = t[2 * i + 1]; |
1080 | } |
1081 | } |
1082 | return (void *) 0; |
1083 | } |
1084 | |
1085 | |
1086 | void *ddxt2d0_th(void *p) |
1087 | { |
1088 | void ddct(int n, int isgn, double *a, int *ip, double *w); |
1089 | void ddst(int n, int isgn, double *a, int *ip, double *w); |
1090 | int nthread, n0, n1, n2, ics, isgn, *ip, i; |
1091 | double **a, *w; |
1092 | |
1093 | nthread = ((fft2d_arg_t *) p)->nthread; |
1094 | n0 = ((fft2d_arg_t *) p)->n0; |
1095 | n1 = ((fft2d_arg_t *) p)->n1; |
1096 | n2 = ((fft2d_arg_t *) p)->n2; |
1097 | ics = ((fft2d_arg_t *) p)->ic; |
1098 | isgn = ((fft2d_arg_t *) p)->isgn; |
1099 | a = ((fft2d_arg_t *) p)->a; |
1100 | ip = ((fft2d_arg_t *) p)->ip; |
1101 | w = ((fft2d_arg_t *) p)->w; |
1102 | if (ics == 0) { |
1103 | for (i = n0; i < n1; i += nthread) { |
1104 | ddct(n2, isgn, a[i], ip, w); |
1105 | } |
1106 | } else { |
1107 | for (i = n0; i < n1; i += nthread) { |
1108 | ddst(n2, isgn, a[i], ip, w); |
1109 | } |
1110 | } |
1111 | return (void *) 0; |
1112 | } |
1113 | |
1114 | |
1115 | void *ddxt2d_th(void *p) |
1116 | { |
1117 | void ddct(int n, int isgn, double *a, int *ip, double *w); |
1118 | void ddst(int n, int isgn, double *a, int *ip, double *w); |
1119 | int nthread, n0, n1, n2, ics, isgn, *ip, i, j; |
1120 | double **a, *t, *w; |
1121 | |
1122 | nthread = ((fft2d_arg_t *) p)->nthread; |
1123 | n0 = ((fft2d_arg_t *) p)->n0; |
1124 | n1 = ((fft2d_arg_t *) p)->n1; |
1125 | n2 = ((fft2d_arg_t *) p)->n2; |
1126 | ics = ((fft2d_arg_t *) p)->ic; |
1127 | isgn = ((fft2d_arg_t *) p)->isgn; |
1128 | a = ((fft2d_arg_t *) p)->a; |
1129 | t = ((fft2d_arg_t *) p)->t; |
1130 | ip = ((fft2d_arg_t *) p)->ip; |
1131 | w = ((fft2d_arg_t *) p)->w; |
1132 | if (n2 > 2 * nthread) { |
1133 | for (j = 4 * n0; j < n2; j += 4 * nthread) { |
1134 | for (i = 0; i < n1; i++) { |
1135 | t[i] = a[i][j]; |
1136 | t[n1 + i] = a[i][j + 1]; |
1137 | t[2 * n1 + i] = a[i][j + 2]; |
1138 | t[3 * n1 + i] = a[i][j + 3]; |
1139 | } |
1140 | if (ics == 0) { |
1141 | ddct(n1, isgn, t, ip, w); |
1142 | ddct(n1, isgn, &t[n1], ip, w); |
1143 | ddct(n1, isgn, &t[2 * n1], ip, w); |
1144 | ddct(n1, isgn, &t[3 * n1], ip, w); |
1145 | } else { |
1146 | ddst(n1, isgn, t, ip, w); |
1147 | ddst(n1, isgn, &t[n1], ip, w); |
1148 | ddst(n1, isgn, &t[2 * n1], ip, w); |
1149 | ddst(n1, isgn, &t[3 * n1], ip, w); |
1150 | } |
1151 | for (i = 0; i < n1; i++) { |
1152 | a[i][j] = t[i]; |
1153 | a[i][j + 1] = t[n1 + i]; |
1154 | a[i][j + 2] = t[2 * n1 + i]; |
1155 | a[i][j + 3] = t[3 * n1 + i]; |
1156 | } |
1157 | } |
1158 | } else if (n2 == 2 * nthread) { |
1159 | for (i = 0; i < n1; i++) { |
1160 | t[i] = a[i][2 * n0]; |
1161 | t[n1 + i] = a[i][2 * n0 + 1]; |
1162 | } |
1163 | if (ics == 0) { |
1164 | ddct(n1, isgn, t, ip, w); |
1165 | ddct(n1, isgn, &t[n1], ip, w); |
1166 | } else { |
1167 | ddst(n1, isgn, t, ip, w); |
1168 | ddst(n1, isgn, &t[n1], ip, w); |
1169 | } |
1170 | for (i = 0; i < n1; i++) { |
1171 | a[i][2 * n0] = t[i]; |
1172 | a[i][2 * n0 + 1] = t[n1 + i]; |
1173 | } |
1174 | } else if (n2 == nthread) { |
1175 | for (i = 0; i < n1; i++) { |
1176 | t[i] = a[i][n0]; |
1177 | } |
1178 | if (ics == 0) { |
1179 | ddct(n1, isgn, t, ip, w); |
1180 | } else { |
1181 | ddst(n1, isgn, t, ip, w); |
1182 | } |
1183 | for (i = 0; i < n1; i++) { |
1184 | a[i][n0] = t[i]; |
1185 | } |
1186 | } |
1187 | return (void *) 0; |
1188 | } |
1189 | #endif /* USE_FFT2D_THREADS */ |
1190 | |
1191 | |