1 | /* |
2 | Fast Fourier/Cosine/Sine Transform |
3 | dimension :one |
4 | data length :power of 2 |
5 | decimation :frequency |
6 | radix :split-radix |
7 | data :inplace |
8 | table :use |
9 | functions |
10 | cdft: Complex Discrete Fourier Transform |
11 | rdft: Real Discrete Fourier Transform |
12 | ddct: Discrete Cosine Transform |
13 | ddst: Discrete Sine Transform |
14 | dfct: Cosine Transform of RDFT (Real Symmetric DFT) |
15 | dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) |
16 | function prototypes |
17 | void cdft(int, int, double *, int *, double *); |
18 | void rdft(int, int, double *, int *, double *); |
19 | void ddct(int, int, double *, int *, double *); |
20 | void ddst(int, int, double *, int *, double *); |
21 | void dfct(int, double *, double *, int *, double *); |
22 | void dfst(int, double *, double *, int *, double *); |
23 | macro definitions |
24 | USE_CDFT_PTHREADS : default=not defined |
25 | CDFT_THREADS_BEGIN_N : must be >= 512, default=8192 |
26 | CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536 |
27 | USE_CDFT_WINTHREADS : default=not defined |
28 | CDFT_THREADS_BEGIN_N : must be >= 512, default=32768 |
29 | CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288 |
30 | |
31 | |
32 | -------- Complex DFT (Discrete Fourier Transform) -------- |
33 | [definition] |
34 | <case1> |
35 | X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n |
36 | <case2> |
37 | X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n |
38 | (notes: sum_j=0^n-1 is a summation from j=0 to n-1) |
39 | [usage] |
40 | <case1> |
41 | ip[0] = 0; // first time only |
42 | cdft(2*n, 1, a, ip, w); |
43 | <case2> |
44 | ip[0] = 0; // first time only |
45 | cdft(2*n, -1, a, ip, w); |
46 | [parameters] |
47 | 2*n :data length (int) |
48 | n >= 1, n = power of 2 |
49 | a[0...2*n-1] :input/output data (double *) |
50 | input data |
51 | a[2*j] = Re(x[j]), |
52 | a[2*j+1] = Im(x[j]), 0<=j<n |
53 | output data |
54 | a[2*k] = Re(X[k]), |
55 | a[2*k+1] = Im(X[k]), 0<=k<n |
56 | ip[0...*] :work area for bit reversal (int *) |
57 | length of ip >= 2+sqrt(n) |
58 | strictly, |
59 | length of ip >= |
60 | 2+(1<<(int)(log(n+0.5)/log(2))/2). |
61 | ip[0],ip[1] are pointers of the cos/sin table. |
62 | w[0...n/2-1] :cos/sin table (double *) |
63 | w[],ip[] are initialized if ip[0] == 0. |
64 | [remark] |
65 | Inverse of |
66 | cdft(2*n, -1, a, ip, w); |
67 | is |
68 | cdft(2*n, 1, a, ip, w); |
69 | for (j = 0; j <= 2 * n - 1; j++) { |
70 | a[j] *= 1.0 / n; |
71 | } |
72 | . |
73 | |
74 | |
75 | -------- Real DFT / Inverse of Real DFT -------- |
76 | [definition] |
77 | <case1> RDFT |
78 | R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 |
79 | I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2 |
80 | <case2> IRDFT (excluding scale) |
81 | a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + |
82 | sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + |
83 | sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n |
84 | [usage] |
85 | <case1> |
86 | ip[0] = 0; // first time only |
87 | rdft(n, 1, a, ip, w); |
88 | <case2> |
89 | ip[0] = 0; // first time only |
90 | rdft(n, -1, a, ip, w); |
91 | [parameters] |
92 | n :data length (int) |
93 | n >= 2, n = power of 2 |
94 | a[0...n-1] :input/output data (double *) |
95 | <case1> |
96 | output data |
97 | a[2*k] = R[k], 0<=k<n/2 |
98 | a[2*k+1] = I[k], 0<k<n/2 |
99 | a[1] = R[n/2] |
100 | <case2> |
101 | input data |
102 | a[2*j] = R[j], 0<=j<n/2 |
103 | a[2*j+1] = I[j], 0<j<n/2 |
104 | a[1] = R[n/2] |
105 | ip[0...*] :work area for bit reversal (int *) |
106 | length of ip >= 2+sqrt(n/2) |
107 | strictly, |
108 | length of ip >= |
109 | 2+(1<<(int)(log(n/2+0.5)/log(2))/2). |
110 | ip[0],ip[1] are pointers of the cos/sin table. |
111 | w[0...n/2-1] :cos/sin table (double *) |
112 | w[],ip[] are initialized if ip[0] == 0. |
113 | [remark] |
114 | Inverse of |
115 | rdft(n, 1, a, ip, w); |
116 | is |
117 | rdft(n, -1, a, ip, w); |
118 | for (j = 0; j <= n - 1; j++) { |
119 | a[j] *= 2.0 / n; |
120 | } |
121 | . |
122 | |
123 | |
124 | -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- |
125 | [definition] |
126 | <case1> IDCT (excluding scale) |
127 | C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n |
128 | <case2> DCT |
129 | C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n |
130 | [usage] |
131 | <case1> |
132 | ip[0] = 0; // first time only |
133 | ddct(n, 1, a, ip, w); |
134 | <case2> |
135 | ip[0] = 0; // first time only |
136 | ddct(n, -1, a, ip, w); |
137 | [parameters] |
138 | n :data length (int) |
139 | n >= 2, n = power of 2 |
140 | a[0...n-1] :input/output data (double *) |
141 | output data |
142 | a[k] = C[k], 0<=k<n |
143 | ip[0...*] :work area for bit reversal (int *) |
144 | length of ip >= 2+sqrt(n/2) |
145 | strictly, |
146 | length of ip >= |
147 | 2+(1<<(int)(log(n/2+0.5)/log(2))/2). |
148 | ip[0],ip[1] are pointers of the cos/sin table. |
149 | w[0...n*5/4-1] :cos/sin table (double *) |
150 | w[],ip[] are initialized if ip[0] == 0. |
151 | [remark] |
152 | Inverse of |
153 | ddct(n, -1, a, ip, w); |
154 | is |
155 | a[0] *= 0.5; |
156 | ddct(n, 1, a, ip, w); |
157 | for (j = 0; j <= n - 1; j++) { |
158 | a[j] *= 2.0 / n; |
159 | } |
160 | . |
161 | |
162 | |
163 | -------- DST (Discrete Sine Transform) / Inverse of DST -------- |
164 | [definition] |
165 | <case1> IDST (excluding scale) |
166 | S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n |
167 | <case2> DST |
168 | S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n |
169 | [usage] |
170 | <case1> |
171 | ip[0] = 0; // first time only |
172 | ddst(n, 1, a, ip, w); |
173 | <case2> |
174 | ip[0] = 0; // first time only |
175 | ddst(n, -1, a, ip, w); |
176 | [parameters] |
177 | n :data length (int) |
178 | n >= 2, n = power of 2 |
179 | a[0...n-1] :input/output data (double *) |
180 | <case1> |
181 | input data |
182 | a[j] = A[j], 0<j<n |
183 | a[0] = A[n] |
184 | output data |
185 | a[k] = S[k], 0<=k<n |
186 | <case2> |
187 | output data |
188 | a[k] = S[k], 0<k<n |
189 | a[0] = S[n] |
190 | ip[0...*] :work area for bit reversal (int *) |
191 | length of ip >= 2+sqrt(n/2) |
192 | strictly, |
193 | length of ip >= |
194 | 2+(1<<(int)(log(n/2+0.5)/log(2))/2). |
195 | ip[0],ip[1] are pointers of the cos/sin table. |
196 | w[0...n*5/4-1] :cos/sin table (double *) |
197 | w[],ip[] are initialized if ip[0] == 0. |
198 | [remark] |
199 | Inverse of |
200 | ddst(n, -1, a, ip, w); |
201 | is |
202 | a[0] *= 0.5; |
203 | ddst(n, 1, a, ip, w); |
204 | for (j = 0; j <= n - 1; j++) { |
205 | a[j] *= 2.0 / n; |
206 | } |
207 | . |
208 | |
209 | |
210 | -------- Cosine Transform of RDFT (Real Symmetric DFT) -------- |
211 | [definition] |
212 | C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n |
213 | [usage] |
214 | ip[0] = 0; // first time only |
215 | dfct(n, a, t, ip, w); |
216 | [parameters] |
217 | n :data length - 1 (int) |
218 | n >= 2, n = power of 2 |
219 | a[0...n] :input/output data (double *) |
220 | output data |
221 | a[k] = C[k], 0<=k<=n |
222 | t[0...n/2] :work area (double *) |
223 | ip[0...*] :work area for bit reversal (int *) |
224 | length of ip >= 2+sqrt(n/4) |
225 | strictly, |
226 | length of ip >= |
227 | 2+(1<<(int)(log(n/4+0.5)/log(2))/2). |
228 | ip[0],ip[1] are pointers of the cos/sin table. |
229 | w[0...n*5/8-1] :cos/sin table (double *) |
230 | w[],ip[] are initialized if ip[0] == 0. |
231 | [remark] |
232 | Inverse of |
233 | a[0] *= 0.5; |
234 | a[n] *= 0.5; |
235 | dfct(n, a, t, ip, w); |
236 | is |
237 | a[0] *= 0.5; |
238 | a[n] *= 0.5; |
239 | dfct(n, a, t, ip, w); |
240 | for (j = 0; j <= n; j++) { |
241 | a[j] *= 2.0 / n; |
242 | } |
243 | . |
244 | |
245 | |
246 | -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- |
247 | [definition] |
248 | S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n |
249 | [usage] |
250 | ip[0] = 0; // first time only |
251 | dfst(n, a, t, ip, w); |
252 | [parameters] |
253 | n :data length + 1 (int) |
254 | n >= 2, n = power of 2 |
255 | a[0...n-1] :input/output data (double *) |
256 | output data |
257 | a[k] = S[k], 0<k<n |
258 | (a[0] is used for work area) |
259 | t[0...n/2-1] :work area (double *) |
260 | ip[0...*] :work area for bit reversal (int *) |
261 | length of ip >= 2+sqrt(n/4) |
262 | strictly, |
263 | length of ip >= |
264 | 2+(1<<(int)(log(n/4+0.5)/log(2))/2). |
265 | ip[0],ip[1] are pointers of the cos/sin table. |
266 | w[0...n*5/8-1] :cos/sin table (double *) |
267 | w[],ip[] are initialized if ip[0] == 0. |
268 | [remark] |
269 | Inverse of |
270 | dfst(n, a, t, ip, w); |
271 | is |
272 | dfst(n, a, t, ip, w); |
273 | for (j = 1; j <= n - 1; j++) { |
274 | a[j] *= 2.0 / n; |
275 | } |
276 | . |
277 | |
278 | |
279 | Appendix : |
280 | The cos/sin table is recalculated when the larger table required. |
281 | w[] and ip[] are compatible with all routines. |
282 | */ |
283 | |
284 | |
285 | void cdft(int n, int isgn, double *a, int *ip, double *w) |
286 | { |
287 | void makewt(int nw, int *ip, double *w); |
288 | void cftfsub(int n, double *a, int *ip, int nw, double *w); |
289 | void cftbsub(int n, double *a, int *ip, int nw, double *w); |
290 | int nw; |
291 | |
292 | nw = ip[0]; |
293 | if (n > (nw << 2)) { |
294 | nw = n >> 2; |
295 | makewt(nw, ip, w); |
296 | } |
297 | if (isgn >= 0) { |
298 | cftfsub(n, a, ip, nw, w); |
299 | } else { |
300 | cftbsub(n, a, ip, nw, w); |
301 | } |
302 | } |
303 | |
304 | |
305 | void rdft(int n, int isgn, double *a, int *ip, double *w) |
306 | { |
307 | void makewt(int nw, int *ip, double *w); |
308 | void makect(int nc, int *ip, double *c); |
309 | void cftfsub(int n, double *a, int *ip, int nw, double *w); |
310 | void cftbsub(int n, double *a, int *ip, int nw, double *w); |
311 | void rftfsub(int n, double *a, int nc, double *c); |
312 | void rftbsub(int n, double *a, int nc, double *c); |
313 | int nw, nc; |
314 | double xi; |
315 | |
316 | nw = ip[0]; |
317 | if (n > (nw << 2)) { |
318 | nw = n >> 2; |
319 | makewt(nw, ip, w); |
320 | } |
321 | nc = ip[1]; |
322 | if (n > (nc << 2)) { |
323 | nc = n >> 2; |
324 | makect(nc, ip, w + nw); |
325 | } |
326 | if (isgn >= 0) { |
327 | if (n > 4) { |
328 | cftfsub(n, a, ip, nw, w); |
329 | rftfsub(n, a, nc, w + nw); |
330 | } else if (n == 4) { |
331 | cftfsub(n, a, ip, nw, w); |
332 | } |
333 | xi = a[0] - a[1]; |
334 | a[0] += a[1]; |
335 | a[1] = xi; |
336 | } else { |
337 | a[1] = 0.5 * (a[0] - a[1]); |
338 | a[0] -= a[1]; |
339 | if (n > 4) { |
340 | rftbsub(n, a, nc, w + nw); |
341 | cftbsub(n, a, ip, nw, w); |
342 | } else if (n == 4) { |
343 | cftbsub(n, a, ip, nw, w); |
344 | } |
345 | } |
346 | } |
347 | |
348 | |
349 | void ddct(int n, int isgn, double *a, int *ip, double *w) |
350 | { |
351 | void makewt(int nw, int *ip, double *w); |
352 | void makect(int nc, int *ip, double *c); |
353 | void cftfsub(int n, double *a, int *ip, int nw, double *w); |
354 | void cftbsub(int n, double *a, int *ip, int nw, double *w); |
355 | void rftfsub(int n, double *a, int nc, double *c); |
356 | void rftbsub(int n, double *a, int nc, double *c); |
357 | void dctsub(int n, double *a, int nc, double *c); |
358 | int j, nw, nc; |
359 | double xr; |
360 | |
361 | nw = ip[0]; |
362 | if (n > (nw << 2)) { |
363 | nw = n >> 2; |
364 | makewt(nw, ip, w); |
365 | } |
366 | nc = ip[1]; |
367 | if (n > nc) { |
368 | nc = n; |
369 | makect(nc, ip, w + nw); |
370 | } |
371 | if (isgn < 0) { |
372 | xr = a[n - 1]; |
373 | for (j = n - 2; j >= 2; j -= 2) { |
374 | a[j + 1] = a[j] - a[j - 1]; |
375 | a[j] += a[j - 1]; |
376 | } |
377 | a[1] = a[0] - xr; |
378 | a[0] += xr; |
379 | if (n > 4) { |
380 | rftbsub(n, a, nc, w + nw); |
381 | cftbsub(n, a, ip, nw, w); |
382 | } else if (n == 4) { |
383 | cftbsub(n, a, ip, nw, w); |
384 | } |
385 | } |
386 | dctsub(n, a, nc, w + nw); |
387 | if (isgn >= 0) { |
388 | if (n > 4) { |
389 | cftfsub(n, a, ip, nw, w); |
390 | rftfsub(n, a, nc, w + nw); |
391 | } else if (n == 4) { |
392 | cftfsub(n, a, ip, nw, w); |
393 | } |
394 | xr = a[0] - a[1]; |
395 | a[0] += a[1]; |
396 | for (j = 2; j < n; j += 2) { |
397 | a[j - 1] = a[j] - a[j + 1]; |
398 | a[j] += a[j + 1]; |
399 | } |
400 | a[n - 1] = xr; |
401 | } |
402 | } |
403 | |
404 | |
405 | void ddst(int n, int isgn, double *a, int *ip, double *w) |
406 | { |
407 | void makewt(int nw, int *ip, double *w); |
408 | void makect(int nc, int *ip, double *c); |
409 | void cftfsub(int n, double *a, int *ip, int nw, double *w); |
410 | void cftbsub(int n, double *a, int *ip, int nw, double *w); |
411 | void rftfsub(int n, double *a, int nc, double *c); |
412 | void rftbsub(int n, double *a, int nc, double *c); |
413 | void dstsub(int n, double *a, int nc, double *c); |
414 | int j, nw, nc; |
415 | double xr; |
416 | |
417 | nw = ip[0]; |
418 | if (n > (nw << 2)) { |
419 | nw = n >> 2; |
420 | makewt(nw, ip, w); |
421 | } |
422 | nc = ip[1]; |
423 | if (n > nc) { |
424 | nc = n; |
425 | makect(nc, ip, w + nw); |
426 | } |
427 | if (isgn < 0) { |
428 | xr = a[n - 1]; |
429 | for (j = n - 2; j >= 2; j -= 2) { |
430 | a[j + 1] = -a[j] - a[j - 1]; |
431 | a[j] -= a[j - 1]; |
432 | } |
433 | a[1] = a[0] + xr; |
434 | a[0] -= xr; |
435 | if (n > 4) { |
436 | rftbsub(n, a, nc, w + nw); |
437 | cftbsub(n, a, ip, nw, w); |
438 | } else if (n == 4) { |
439 | cftbsub(n, a, ip, nw, w); |
440 | } |
441 | } |
442 | dstsub(n, a, nc, w + nw); |
443 | if (isgn >= 0) { |
444 | if (n > 4) { |
445 | cftfsub(n, a, ip, nw, w); |
446 | rftfsub(n, a, nc, w + nw); |
447 | } else if (n == 4) { |
448 | cftfsub(n, a, ip, nw, w); |
449 | } |
450 | xr = a[0] - a[1]; |
451 | a[0] += a[1]; |
452 | for (j = 2; j < n; j += 2) { |
453 | a[j - 1] = -a[j] - a[j + 1]; |
454 | a[j] -= a[j + 1]; |
455 | } |
456 | a[n - 1] = -xr; |
457 | } |
458 | } |
459 | |
460 | |
461 | void dfct(int n, double *a, double *t, int *ip, double *w) |
462 | { |
463 | void makewt(int nw, int *ip, double *w); |
464 | void makect(int nc, int *ip, double *c); |
465 | void cftfsub(int n, double *a, int *ip, int nw, double *w); |
466 | void rftfsub(int n, double *a, int nc, double *c); |
467 | void dctsub(int n, double *a, int nc, double *c); |
468 | int j, k, l, m, mh, nw, nc; |
469 | double xr, xi, yr, yi; |
470 | |
471 | nw = ip[0]; |
472 | if (n > (nw << 3)) { |
473 | nw = n >> 3; |
474 | makewt(nw, ip, w); |
475 | } |
476 | nc = ip[1]; |
477 | if (n > (nc << 1)) { |
478 | nc = n >> 1; |
479 | makect(nc, ip, w + nw); |
480 | } |
481 | m = n >> 1; |
482 | yi = a[m]; |
483 | xi = a[0] + a[n]; |
484 | a[0] -= a[n]; |
485 | t[0] = xi - yi; |
486 | t[m] = xi + yi; |
487 | if (n > 2) { |
488 | mh = m >> 1; |
489 | for (j = 1; j < mh; j++) { |
490 | k = m - j; |
491 | xr = a[j] - a[n - j]; |
492 | xi = a[j] + a[n - j]; |
493 | yr = a[k] - a[n - k]; |
494 | yi = a[k] + a[n - k]; |
495 | a[j] = xr; |
496 | a[k] = yr; |
497 | t[j] = xi - yi; |
498 | t[k] = xi + yi; |
499 | } |
500 | t[mh] = a[mh] + a[n - mh]; |
501 | a[mh] -= a[n - mh]; |
502 | dctsub(m, a, nc, w + nw); |
503 | if (m > 4) { |
504 | cftfsub(m, a, ip, nw, w); |
505 | rftfsub(m, a, nc, w + nw); |
506 | } else if (m == 4) { |
507 | cftfsub(m, a, ip, nw, w); |
508 | } |
509 | a[n - 1] = a[0] - a[1]; |
510 | a[1] = a[0] + a[1]; |
511 | for (j = m - 2; j >= 2; j -= 2) { |
512 | a[2 * j + 1] = a[j] + a[j + 1]; |
513 | a[2 * j - 1] = a[j] - a[j + 1]; |
514 | } |
515 | l = 2; |
516 | m = mh; |
517 | while (m >= 2) { |
518 | dctsub(m, t, nc, w + nw); |
519 | if (m > 4) { |
520 | cftfsub(m, t, ip, nw, w); |
521 | rftfsub(m, t, nc, w + nw); |
522 | } else if (m == 4) { |
523 | cftfsub(m, t, ip, nw, w); |
524 | } |
525 | a[n - l] = t[0] - t[1]; |
526 | a[l] = t[0] + t[1]; |
527 | k = 0; |
528 | for (j = 2; j < m; j += 2) { |
529 | k += l << 2; |
530 | a[k - l] = t[j] - t[j + 1]; |
531 | a[k + l] = t[j] + t[j + 1]; |
532 | } |
533 | l <<= 1; |
534 | mh = m >> 1; |
535 | for (j = 0; j < mh; j++) { |
536 | k = m - j; |
537 | t[j] = t[m + k] - t[m + j]; |
538 | t[k] = t[m + k] + t[m + j]; |
539 | } |
540 | t[mh] = t[m + mh]; |
541 | m = mh; |
542 | } |
543 | a[l] = t[0]; |
544 | a[n] = t[2] - t[1]; |
545 | a[0] = t[2] + t[1]; |
546 | } else { |
547 | a[1] = a[0]; |
548 | a[2] = t[0]; |
549 | a[0] = t[1]; |
550 | } |
551 | } |
552 | |
553 | |
554 | void dfst(int n, double *a, double *t, int *ip, double *w) |
555 | { |
556 | void makewt(int nw, int *ip, double *w); |
557 | void makect(int nc, int *ip, double *c); |
558 | void cftfsub(int n, double *a, int *ip, int nw, double *w); |
559 | void rftfsub(int n, double *a, int nc, double *c); |
560 | void dstsub(int n, double *a, int nc, double *c); |
561 | int j, k, l, m, mh, nw, nc; |
562 | double xr, xi, yr, yi; |
563 | |
564 | nw = ip[0]; |
565 | if (n > (nw << 3)) { |
566 | nw = n >> 3; |
567 | makewt(nw, ip, w); |
568 | } |
569 | nc = ip[1]; |
570 | if (n > (nc << 1)) { |
571 | nc = n >> 1; |
572 | makect(nc, ip, w + nw); |
573 | } |
574 | if (n > 2) { |
575 | m = n >> 1; |
576 | mh = m >> 1; |
577 | for (j = 1; j < mh; j++) { |
578 | k = m - j; |
579 | xr = a[j] + a[n - j]; |
580 | xi = a[j] - a[n - j]; |
581 | yr = a[k] + a[n - k]; |
582 | yi = a[k] - a[n - k]; |
583 | a[j] = xr; |
584 | a[k] = yr; |
585 | t[j] = xi + yi; |
586 | t[k] = xi - yi; |
587 | } |
588 | t[0] = a[mh] - a[n - mh]; |
589 | a[mh] += a[n - mh]; |
590 | a[0] = a[m]; |
591 | dstsub(m, a, nc, w + nw); |
592 | if (m > 4) { |
593 | cftfsub(m, a, ip, nw, w); |
594 | rftfsub(m, a, nc, w + nw); |
595 | } else if (m == 4) { |
596 | cftfsub(m, a, ip, nw, w); |
597 | } |
598 | a[n - 1] = a[1] - a[0]; |
599 | a[1] = a[0] + a[1]; |
600 | for (j = m - 2; j >= 2; j -= 2) { |
601 | a[2 * j + 1] = a[j] - a[j + 1]; |
602 | a[2 * j - 1] = -a[j] - a[j + 1]; |
603 | } |
604 | l = 2; |
605 | m = mh; |
606 | while (m >= 2) { |
607 | dstsub(m, t, nc, w + nw); |
608 | if (m > 4) { |
609 | cftfsub(m, t, ip, nw, w); |
610 | rftfsub(m, t, nc, w + nw); |
611 | } else if (m == 4) { |
612 | cftfsub(m, t, ip, nw, w); |
613 | } |
614 | a[n - l] = t[1] - t[0]; |
615 | a[l] = t[0] + t[1]; |
616 | k = 0; |
617 | for (j = 2; j < m; j += 2) { |
618 | k += l << 2; |
619 | a[k - l] = -t[j] - t[j + 1]; |
620 | a[k + l] = t[j] - t[j + 1]; |
621 | } |
622 | l <<= 1; |
623 | mh = m >> 1; |
624 | for (j = 1; j < mh; j++) { |
625 | k = m - j; |
626 | t[j] = t[m + k] + t[m + j]; |
627 | t[k] = t[m + k] - t[m + j]; |
628 | } |
629 | t[0] = t[m + mh]; |
630 | m = mh; |
631 | } |
632 | a[l] = t[0]; |
633 | } |
634 | a[0] = 0; |
635 | } |
636 | |
637 | |
638 | /* -------- initializing routines -------- */ |
639 | |
640 | |
641 | #include <math.h> |
642 | |
643 | void makewt(int nw, int *ip, double *w) |
644 | { |
645 | void makeipt(int nw, int *ip); |
646 | int j, nwh, nw0, nw1; |
647 | double delta, wn4r, wk1r, wk1i, wk3r, wk3i; |
648 | |
649 | ip[0] = nw; |
650 | ip[1] = 1; |
651 | if (nw > 2) { |
652 | nwh = nw >> 1; |
653 | delta = atan(1.0) / nwh; |
654 | wn4r = cos(delta * nwh); |
655 | w[0] = 1; |
656 | w[1] = wn4r; |
657 | if (nwh == 4) { |
658 | w[2] = cos(delta * 2); |
659 | w[3] = sin(delta * 2); |
660 | } else if (nwh > 4) { |
661 | makeipt(nw, ip); |
662 | w[2] = 0.5 / cos(delta * 2); |
663 | w[3] = 0.5 / cos(delta * 6); |
664 | for (j = 4; j < nwh; j += 4) { |
665 | w[j] = cos(delta * j); |
666 | w[j + 1] = sin(delta * j); |
667 | w[j + 2] = cos(3 * delta * j); |
668 | w[j + 3] = -sin(3 * delta * j); |
669 | } |
670 | } |
671 | nw0 = 0; |
672 | while (nwh > 2) { |
673 | nw1 = nw0 + nwh; |
674 | nwh >>= 1; |
675 | w[nw1] = 1; |
676 | w[nw1 + 1] = wn4r; |
677 | if (nwh == 4) { |
678 | wk1r = w[nw0 + 4]; |
679 | wk1i = w[nw0 + 5]; |
680 | w[nw1 + 2] = wk1r; |
681 | w[nw1 + 3] = wk1i; |
682 | } else if (nwh > 4) { |
683 | wk1r = w[nw0 + 4]; |
684 | wk3r = w[nw0 + 6]; |
685 | w[nw1 + 2] = 0.5 / wk1r; |
686 | w[nw1 + 3] = 0.5 / wk3r; |
687 | for (j = 4; j < nwh; j += 4) { |
688 | wk1r = w[nw0 + 2 * j]; |
689 | wk1i = w[nw0 + 2 * j + 1]; |
690 | wk3r = w[nw0 + 2 * j + 2]; |
691 | wk3i = w[nw0 + 2 * j + 3]; |
692 | w[nw1 + j] = wk1r; |
693 | w[nw1 + j + 1] = wk1i; |
694 | w[nw1 + j + 2] = wk3r; |
695 | w[nw1 + j + 3] = wk3i; |
696 | } |
697 | } |
698 | nw0 = nw1; |
699 | } |
700 | } |
701 | } |
702 | |
703 | |
704 | void makeipt(int nw, int *ip) |
705 | { |
706 | int j, l, m, m2, p, q; |
707 | |
708 | ip[2] = 0; |
709 | ip[3] = 16; |
710 | m = 2; |
711 | for (l = nw; l > 32; l >>= 2) { |
712 | m2 = m << 1; |
713 | q = m2 << 3; |
714 | for (j = m; j < m2; j++) { |
715 | p = ip[j] << 2; |
716 | ip[m + j] = p; |
717 | ip[m2 + j] = p + q; |
718 | } |
719 | m = m2; |
720 | } |
721 | } |
722 | |
723 | |
724 | void makect(int nc, int *ip, double *c) |
725 | { |
726 | int j, nch; |
727 | double delta; |
728 | |
729 | ip[1] = nc; |
730 | if (nc > 1) { |
731 | nch = nc >> 1; |
732 | delta = atan(1.0) / nch; |
733 | c[0] = cos(delta * nch); |
734 | c[nch] = 0.5 * c[0]; |
735 | for (j = 1; j < nch; j++) { |
736 | c[j] = 0.5 * cos(delta * j); |
737 | c[nc - j] = 0.5 * sin(delta * j); |
738 | } |
739 | } |
740 | } |
741 | |
742 | |
743 | /* -------- child routines -------- */ |
744 | |
745 | |
746 | #ifdef USE_CDFT_PTHREADS |
747 | #define USE_CDFT_THREADS |
748 | #ifndef CDFT_THREADS_BEGIN_N |
749 | #define CDFT_THREADS_BEGIN_N 8192 |
750 | #endif |
751 | #ifndef CDFT_4THREADS_BEGIN_N |
752 | #define CDFT_4THREADS_BEGIN_N 65536 |
753 | #endif |
754 | #include <pthread.h> |
755 | #include <stdio.h> |
756 | #include <stdlib.h> |
757 | #define cdft_thread_t pthread_t |
758 | #define cdft_thread_create(thp,func,argp) { \ |
759 | if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \ |
760 | fprintf(stderr, "cdft thread error\n"); \ |
761 | exit(1); \ |
762 | } \ |
763 | } |
764 | #define cdft_thread_wait(th) { \ |
765 | if (pthread_join(th, NULL) != 0) { \ |
766 | fprintf(stderr, "cdft thread error\n"); \ |
767 | exit(1); \ |
768 | } \ |
769 | } |
770 | #endif /* USE_CDFT_PTHREADS */ |
771 | |
772 | |
773 | #ifdef USE_CDFT_WINTHREADS |
774 | #define USE_CDFT_THREADS |
775 | #ifndef CDFT_THREADS_BEGIN_N |
776 | #define CDFT_THREADS_BEGIN_N 32768 |
777 | #endif |
778 | #ifndef CDFT_4THREADS_BEGIN_N |
779 | #define CDFT_4THREADS_BEGIN_N 524288 |
780 | #endif |
781 | #include <windows.h> |
782 | #include <stdio.h> |
783 | #include <stdlib.h> |
784 | #define cdft_thread_t HANDLE |
785 | #define cdft_thread_create(thp,func,argp) { \ |
786 | DWORD thid; \ |
787 | *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \ |
788 | if (*(thp) == 0) { \ |
789 | fprintf(stderr, "cdft thread error\n"); \ |
790 | exit(1); \ |
791 | } \ |
792 | } |
793 | #define cdft_thread_wait(th) { \ |
794 | WaitForSingleObject(th, INFINITE); \ |
795 | CloseHandle(th); \ |
796 | } |
797 | #endif /* USE_CDFT_WINTHREADS */ |
798 | |
799 | |
800 | void cftfsub(int n, double *a, int *ip, int nw, double *w) |
801 | { |
802 | void bitrv2(int n, int *ip, double *a); |
803 | void bitrv216(double *a); |
804 | void bitrv208(double *a); |
805 | void cftf1st(int n, double *a, double *w); |
806 | void cftrec4(int n, double *a, int nw, double *w); |
807 | void cftleaf(int n, int isplt, double *a, int nw, double *w); |
808 | void cftfx41(int n, double *a, int nw, double *w); |
809 | void cftf161(double *a, double *w); |
810 | void cftf081(double *a, double *w); |
811 | void cftf040(double *a); |
812 | void cftx020(double *a); |
813 | #ifdef USE_CDFT_THREADS |
814 | void cftrec4_th(int n, double *a, int nw, double *w); |
815 | #endif /* USE_CDFT_THREADS */ |
816 | |
817 | if (n > 8) { |
818 | if (n > 32) { |
819 | cftf1st(n, a, &w[nw - (n >> 2)]); |
820 | #ifdef USE_CDFT_THREADS |
821 | if (n > CDFT_THREADS_BEGIN_N) { |
822 | cftrec4_th(n, a, nw, w); |
823 | } else |
824 | #endif /* USE_CDFT_THREADS */ |
825 | if (n > 512) { |
826 | cftrec4(n, a, nw, w); |
827 | } else if (n > 128) { |
828 | cftleaf(n, 1, a, nw, w); |
829 | } else { |
830 | cftfx41(n, a, nw, w); |
831 | } |
832 | bitrv2(n, ip, a); |
833 | } else if (n == 32) { |
834 | cftf161(a, &w[nw - 8]); |
835 | bitrv216(a); |
836 | } else { |
837 | cftf081(a, w); |
838 | bitrv208(a); |
839 | } |
840 | } else if (n == 8) { |
841 | cftf040(a); |
842 | } else if (n == 4) { |
843 | cftx020(a); |
844 | } |
845 | } |
846 | |
847 | |
848 | void cftbsub(int n, double *a, int *ip, int nw, double *w) |
849 | { |
850 | void bitrv2conj(int n, int *ip, double *a); |
851 | void bitrv216neg(double *a); |
852 | void bitrv208neg(double *a); |
853 | void cftb1st(int n, double *a, double *w); |
854 | void cftrec4(int n, double *a, int nw, double *w); |
855 | void cftleaf(int n, int isplt, double *a, int nw, double *w); |
856 | void cftfx41(int n, double *a, int nw, double *w); |
857 | void cftf161(double *a, double *w); |
858 | void cftf081(double *a, double *w); |
859 | void cftb040(double *a); |
860 | void cftx020(double *a); |
861 | #ifdef USE_CDFT_THREADS |
862 | void cftrec4_th(int n, double *a, int nw, double *w); |
863 | #endif /* USE_CDFT_THREADS */ |
864 | |
865 | if (n > 8) { |
866 | if (n > 32) { |
867 | cftb1st(n, a, &w[nw - (n >> 2)]); |
868 | #ifdef USE_CDFT_THREADS |
869 | if (n > CDFT_THREADS_BEGIN_N) { |
870 | cftrec4_th(n, a, nw, w); |
871 | } else |
872 | #endif /* USE_CDFT_THREADS */ |
873 | if (n > 512) { |
874 | cftrec4(n, a, nw, w); |
875 | } else if (n > 128) { |
876 | cftleaf(n, 1, a, nw, w); |
877 | } else { |
878 | cftfx41(n, a, nw, w); |
879 | } |
880 | bitrv2conj(n, ip, a); |
881 | } else if (n == 32) { |
882 | cftf161(a, &w[nw - 8]); |
883 | bitrv216neg(a); |
884 | } else { |
885 | cftf081(a, w); |
886 | bitrv208neg(a); |
887 | } |
888 | } else if (n == 8) { |
889 | cftb040(a); |
890 | } else if (n == 4) { |
891 | cftx020(a); |
892 | } |
893 | } |
894 | |
895 | |
896 | void bitrv2(int n, int *ip, double *a) |
897 | { |
898 | int j, j1, k, k1, l, m, nh, nm; |
899 | double xr, xi, yr, yi; |
900 | |
901 | m = 1; |
902 | for (l = n >> 2; l > 8; l >>= 2) { |
903 | m <<= 1; |
904 | } |
905 | nh = n >> 1; |
906 | nm = 4 * m; |
907 | if (l == 8) { |
908 | for (k = 0; k < m; k++) { |
909 | for (j = 0; j < k; j++) { |
910 | j1 = 4 * j + 2 * ip[m + k]; |
911 | k1 = 4 * k + 2 * ip[m + j]; |
912 | xr = a[j1]; |
913 | xi = a[j1 + 1]; |
914 | yr = a[k1]; |
915 | yi = a[k1 + 1]; |
916 | a[j1] = yr; |
917 | a[j1 + 1] = yi; |
918 | a[k1] = xr; |
919 | a[k1 + 1] = xi; |
920 | j1 += nm; |
921 | k1 += 2 * nm; |
922 | xr = a[j1]; |
923 | xi = a[j1 + 1]; |
924 | yr = a[k1]; |
925 | yi = a[k1 + 1]; |
926 | a[j1] = yr; |
927 | a[j1 + 1] = yi; |
928 | a[k1] = xr; |
929 | a[k1 + 1] = xi; |
930 | j1 += nm; |
931 | k1 -= nm; |
932 | xr = a[j1]; |
933 | xi = a[j1 + 1]; |
934 | yr = a[k1]; |
935 | yi = a[k1 + 1]; |
936 | a[j1] = yr; |
937 | a[j1 + 1] = yi; |
938 | a[k1] = xr; |
939 | a[k1 + 1] = xi; |
940 | j1 += nm; |
941 | k1 += 2 * nm; |
942 | xr = a[j1]; |
943 | xi = a[j1 + 1]; |
944 | yr = a[k1]; |
945 | yi = a[k1 + 1]; |
946 | a[j1] = yr; |
947 | a[j1 + 1] = yi; |
948 | a[k1] = xr; |
949 | a[k1 + 1] = xi; |
950 | j1 += nh; |
951 | k1 += 2; |
952 | xr = a[j1]; |
953 | xi = a[j1 + 1]; |
954 | yr = a[k1]; |
955 | yi = a[k1 + 1]; |
956 | a[j1] = yr; |
957 | a[j1 + 1] = yi; |
958 | a[k1] = xr; |
959 | a[k1 + 1] = xi; |
960 | j1 -= nm; |
961 | k1 -= 2 * nm; |
962 | xr = a[j1]; |
963 | xi = a[j1 + 1]; |
964 | yr = a[k1]; |
965 | yi = a[k1 + 1]; |
966 | a[j1] = yr; |
967 | a[j1 + 1] = yi; |
968 | a[k1] = xr; |
969 | a[k1 + 1] = xi; |
970 | j1 -= nm; |
971 | k1 += nm; |
972 | xr = a[j1]; |
973 | xi = a[j1 + 1]; |
974 | yr = a[k1]; |
975 | yi = a[k1 + 1]; |
976 | a[j1] = yr; |
977 | a[j1 + 1] = yi; |
978 | a[k1] = xr; |
979 | a[k1 + 1] = xi; |
980 | j1 -= nm; |
981 | k1 -= 2 * nm; |
982 | xr = a[j1]; |
983 | xi = a[j1 + 1]; |
984 | yr = a[k1]; |
985 | yi = a[k1 + 1]; |
986 | a[j1] = yr; |
987 | a[j1 + 1] = yi; |
988 | a[k1] = xr; |
989 | a[k1 + 1] = xi; |
990 | j1 += 2; |
991 | k1 += nh; |
992 | xr = a[j1]; |
993 | xi = a[j1 + 1]; |
994 | yr = a[k1]; |
995 | yi = a[k1 + 1]; |
996 | a[j1] = yr; |
997 | a[j1 + 1] = yi; |
998 | a[k1] = xr; |
999 | a[k1 + 1] = xi; |
1000 | j1 += nm; |
1001 | k1 += 2 * nm; |
1002 | xr = a[j1]; |
1003 | xi = a[j1 + 1]; |
1004 | yr = a[k1]; |
1005 | yi = a[k1 + 1]; |
1006 | a[j1] = yr; |
1007 | a[j1 + 1] = yi; |
1008 | a[k1] = xr; |
1009 | a[k1 + 1] = xi; |
1010 | j1 += nm; |
1011 | k1 -= nm; |
1012 | xr = a[j1]; |
1013 | xi = a[j1 + 1]; |
1014 | yr = a[k1]; |
1015 | yi = a[k1 + 1]; |
1016 | a[j1] = yr; |
1017 | a[j1 + 1] = yi; |
1018 | a[k1] = xr; |
1019 | a[k1 + 1] = xi; |
1020 | j1 += nm; |
1021 | k1 += 2 * nm; |
1022 | xr = a[j1]; |
1023 | xi = a[j1 + 1]; |
1024 | yr = a[k1]; |
1025 | yi = a[k1 + 1]; |
1026 | a[j1] = yr; |
1027 | a[j1 + 1] = yi; |
1028 | a[k1] = xr; |
1029 | a[k1 + 1] = xi; |
1030 | j1 -= nh; |
1031 | k1 -= 2; |
1032 | xr = a[j1]; |
1033 | xi = a[j1 + 1]; |
1034 | yr = a[k1]; |
1035 | yi = a[k1 + 1]; |
1036 | a[j1] = yr; |
1037 | a[j1 + 1] = yi; |
1038 | a[k1] = xr; |
1039 | a[k1 + 1] = xi; |
1040 | j1 -= nm; |
1041 | k1 -= 2 * nm; |
1042 | xr = a[j1]; |
1043 | xi = a[j1 + 1]; |
1044 | yr = a[k1]; |
1045 | yi = a[k1 + 1]; |
1046 | a[j1] = yr; |
1047 | a[j1 + 1] = yi; |
1048 | a[k1] = xr; |
1049 | a[k1 + 1] = xi; |
1050 | j1 -= nm; |
1051 | k1 += nm; |
1052 | xr = a[j1]; |
1053 | xi = a[j1 + 1]; |
1054 | yr = a[k1]; |
1055 | yi = a[k1 + 1]; |
1056 | a[j1] = yr; |
1057 | a[j1 + 1] = yi; |
1058 | a[k1] = xr; |
1059 | a[k1 + 1] = xi; |
1060 | j1 -= nm; |
1061 | k1 -= 2 * nm; |
1062 | xr = a[j1]; |
1063 | xi = a[j1 + 1]; |
1064 | yr = a[k1]; |
1065 | yi = a[k1 + 1]; |
1066 | a[j1] = yr; |
1067 | a[j1 + 1] = yi; |
1068 | a[k1] = xr; |
1069 | a[k1 + 1] = xi; |
1070 | } |
1071 | k1 = 4 * k + 2 * ip[m + k]; |
1072 | j1 = k1 + 2; |
1073 | k1 += nh; |
1074 | xr = a[j1]; |
1075 | xi = a[j1 + 1]; |
1076 | yr = a[k1]; |
1077 | yi = a[k1 + 1]; |
1078 | a[j1] = yr; |
1079 | a[j1 + 1] = yi; |
1080 | a[k1] = xr; |
1081 | a[k1 + 1] = xi; |
1082 | j1 += nm; |
1083 | k1 += 2 * nm; |
1084 | xr = a[j1]; |
1085 | xi = a[j1 + 1]; |
1086 | yr = a[k1]; |
1087 | yi = a[k1 + 1]; |
1088 | a[j1] = yr; |
1089 | a[j1 + 1] = yi; |
1090 | a[k1] = xr; |
1091 | a[k1 + 1] = xi; |
1092 | j1 += nm; |
1093 | k1 -= nm; |
1094 | xr = a[j1]; |
1095 | xi = a[j1 + 1]; |
1096 | yr = a[k1]; |
1097 | yi = a[k1 + 1]; |
1098 | a[j1] = yr; |
1099 | a[j1 + 1] = yi; |
1100 | a[k1] = xr; |
1101 | a[k1 + 1] = xi; |
1102 | j1 -= 2; |
1103 | k1 -= nh; |
1104 | xr = a[j1]; |
1105 | xi = a[j1 + 1]; |
1106 | yr = a[k1]; |
1107 | yi = a[k1 + 1]; |
1108 | a[j1] = yr; |
1109 | a[j1 + 1] = yi; |
1110 | a[k1] = xr; |
1111 | a[k1 + 1] = xi; |
1112 | j1 += nh + 2; |
1113 | k1 += nh + 2; |
1114 | xr = a[j1]; |
1115 | xi = a[j1 + 1]; |
1116 | yr = a[k1]; |
1117 | yi = a[k1 + 1]; |
1118 | a[j1] = yr; |
1119 | a[j1 + 1] = yi; |
1120 | a[k1] = xr; |
1121 | a[k1 + 1] = xi; |
1122 | j1 -= nh - nm; |
1123 | k1 += 2 * nm - 2; |
1124 | xr = a[j1]; |
1125 | xi = a[j1 + 1]; |
1126 | yr = a[k1]; |
1127 | yi = a[k1 + 1]; |
1128 | a[j1] = yr; |
1129 | a[j1 + 1] = yi; |
1130 | a[k1] = xr; |
1131 | a[k1 + 1] = xi; |
1132 | } |
1133 | } else { |
1134 | for (k = 0; k < m; k++) { |
1135 | for (j = 0; j < k; j++) { |
1136 | j1 = 4 * j + ip[m + k]; |
1137 | k1 = 4 * k + ip[m + j]; |
1138 | xr = a[j1]; |
1139 | xi = a[j1 + 1]; |
1140 | yr = a[k1]; |
1141 | yi = a[k1 + 1]; |
1142 | a[j1] = yr; |
1143 | a[j1 + 1] = yi; |
1144 | a[k1] = xr; |
1145 | a[k1 + 1] = xi; |
1146 | j1 += nm; |
1147 | k1 += nm; |
1148 | xr = a[j1]; |
1149 | xi = a[j1 + 1]; |
1150 | yr = a[k1]; |
1151 | yi = a[k1 + 1]; |
1152 | a[j1] = yr; |
1153 | a[j1 + 1] = yi; |
1154 | a[k1] = xr; |
1155 | a[k1 + 1] = xi; |
1156 | j1 += nh; |
1157 | k1 += 2; |
1158 | xr = a[j1]; |
1159 | xi = a[j1 + 1]; |
1160 | yr = a[k1]; |
1161 | yi = a[k1 + 1]; |
1162 | a[j1] = yr; |
1163 | a[j1 + 1] = yi; |
1164 | a[k1] = xr; |
1165 | a[k1 + 1] = xi; |
1166 | j1 -= nm; |
1167 | k1 -= nm; |
1168 | xr = a[j1]; |
1169 | xi = a[j1 + 1]; |
1170 | yr = a[k1]; |
1171 | yi = a[k1 + 1]; |
1172 | a[j1] = yr; |
1173 | a[j1 + 1] = yi; |
1174 | a[k1] = xr; |
1175 | a[k1 + 1] = xi; |
1176 | j1 += 2; |
1177 | k1 += nh; |
1178 | xr = a[j1]; |
1179 | xi = a[j1 + 1]; |
1180 | yr = a[k1]; |
1181 | yi = a[k1 + 1]; |
1182 | a[j1] = yr; |
1183 | a[j1 + 1] = yi; |
1184 | a[k1] = xr; |
1185 | a[k1 + 1] = xi; |
1186 | j1 += nm; |
1187 | k1 += nm; |
1188 | xr = a[j1]; |
1189 | xi = a[j1 + 1]; |
1190 | yr = a[k1]; |
1191 | yi = a[k1 + 1]; |
1192 | a[j1] = yr; |
1193 | a[j1 + 1] = yi; |
1194 | a[k1] = xr; |
1195 | a[k1 + 1] = xi; |
1196 | j1 -= nh; |
1197 | k1 -= 2; |
1198 | xr = a[j1]; |
1199 | xi = a[j1 + 1]; |
1200 | yr = a[k1]; |
1201 | yi = a[k1 + 1]; |
1202 | a[j1] = yr; |
1203 | a[j1 + 1] = yi; |
1204 | a[k1] = xr; |
1205 | a[k1 + 1] = xi; |
1206 | j1 -= nm; |
1207 | k1 -= nm; |
1208 | xr = a[j1]; |
1209 | xi = a[j1 + 1]; |
1210 | yr = a[k1]; |
1211 | yi = a[k1 + 1]; |
1212 | a[j1] = yr; |
1213 | a[j1 + 1] = yi; |
1214 | a[k1] = xr; |
1215 | a[k1 + 1] = xi; |
1216 | } |
1217 | k1 = 4 * k + ip[m + k]; |
1218 | j1 = k1 + 2; |
1219 | k1 += nh; |
1220 | xr = a[j1]; |
1221 | xi = a[j1 + 1]; |
1222 | yr = a[k1]; |
1223 | yi = a[k1 + 1]; |
1224 | a[j1] = yr; |
1225 | a[j1 + 1] = yi; |
1226 | a[k1] = xr; |
1227 | a[k1 + 1] = xi; |
1228 | j1 += nm; |
1229 | k1 += nm; |
1230 | xr = a[j1]; |
1231 | xi = a[j1 + 1]; |
1232 | yr = a[k1]; |
1233 | yi = a[k1 + 1]; |
1234 | a[j1] = yr; |
1235 | a[j1 + 1] = yi; |
1236 | a[k1] = xr; |
1237 | a[k1 + 1] = xi; |
1238 | } |
1239 | } |
1240 | } |
1241 | |
1242 | |
1243 | void bitrv2conj(int n, int *ip, double *a) |
1244 | { |
1245 | int j, j1, k, k1, l, m, nh, nm; |
1246 | double xr, xi, yr, yi; |
1247 | |
1248 | m = 1; |
1249 | for (l = n >> 2; l > 8; l >>= 2) { |
1250 | m <<= 1; |
1251 | } |
1252 | nh = n >> 1; |
1253 | nm = 4 * m; |
1254 | if (l == 8) { |
1255 | for (k = 0; k < m; k++) { |
1256 | for (j = 0; j < k; j++) { |
1257 | j1 = 4 * j + 2 * ip[m + k]; |
1258 | k1 = 4 * k + 2 * ip[m + j]; |
1259 | xr = a[j1]; |
1260 | xi = -a[j1 + 1]; |
1261 | yr = a[k1]; |
1262 | yi = -a[k1 + 1]; |
1263 | a[j1] = yr; |
1264 | a[j1 + 1] = yi; |
1265 | a[k1] = xr; |
1266 | a[k1 + 1] = xi; |
1267 | j1 += nm; |
1268 | k1 += 2 * nm; |
1269 | xr = a[j1]; |
1270 | xi = -a[j1 + 1]; |
1271 | yr = a[k1]; |
1272 | yi = -a[k1 + 1]; |
1273 | a[j1] = yr; |
1274 | a[j1 + 1] = yi; |
1275 | a[k1] = xr; |
1276 | a[k1 + 1] = xi; |
1277 | j1 += nm; |
1278 | k1 -= nm; |
1279 | xr = a[j1]; |
1280 | xi = -a[j1 + 1]; |
1281 | yr = a[k1]; |
1282 | yi = -a[k1 + 1]; |
1283 | a[j1] = yr; |
1284 | a[j1 + 1] = yi; |
1285 | a[k1] = xr; |
1286 | a[k1 + 1] = xi; |
1287 | j1 += nm; |
1288 | k1 += 2 * nm; |
1289 | xr = a[j1]; |
1290 | xi = -a[j1 + 1]; |
1291 | yr = a[k1]; |
1292 | yi = -a[k1 + 1]; |
1293 | a[j1] = yr; |
1294 | a[j1 + 1] = yi; |
1295 | a[k1] = xr; |
1296 | a[k1 + 1] = xi; |
1297 | j1 += nh; |
1298 | k1 += 2; |
1299 | xr = a[j1]; |
1300 | xi = -a[j1 + 1]; |
1301 | yr = a[k1]; |
1302 | yi = -a[k1 + 1]; |
1303 | a[j1] = yr; |
1304 | a[j1 + 1] = yi; |
1305 | a[k1] = xr; |
1306 | a[k1 + 1] = xi; |
1307 | j1 -= nm; |
1308 | k1 -= 2 * nm; |
1309 | xr = a[j1]; |
1310 | xi = -a[j1 + 1]; |
1311 | yr = a[k1]; |
1312 | yi = -a[k1 + 1]; |
1313 | a[j1] = yr; |
1314 | a[j1 + 1] = yi; |
1315 | a[k1] = xr; |
1316 | a[k1 + 1] = xi; |
1317 | j1 -= nm; |
1318 | k1 += nm; |
1319 | xr = a[j1]; |
1320 | xi = -a[j1 + 1]; |
1321 | yr = a[k1]; |
1322 | yi = -a[k1 + 1]; |
1323 | a[j1] = yr; |
1324 | a[j1 + 1] = yi; |
1325 | a[k1] = xr; |
1326 | a[k1 + 1] = xi; |
1327 | j1 -= nm; |
1328 | k1 -= 2 * nm; |
1329 | xr = a[j1]; |
1330 | xi = -a[j1 + 1]; |
1331 | yr = a[k1]; |
1332 | yi = -a[k1 + 1]; |
1333 | a[j1] = yr; |
1334 | a[j1 + 1] = yi; |
1335 | a[k1] = xr; |
1336 | a[k1 + 1] = xi; |
1337 | j1 += 2; |
1338 | k1 += nh; |
1339 | xr = a[j1]; |
1340 | xi = -a[j1 + 1]; |
1341 | yr = a[k1]; |
1342 | yi = -a[k1 + 1]; |
1343 | a[j1] = yr; |
1344 | a[j1 + 1] = yi; |
1345 | a[k1] = xr; |
1346 | a[k1 + 1] = xi; |
1347 | j1 += nm; |
1348 | k1 += 2 * nm; |
1349 | xr = a[j1]; |
1350 | xi = -a[j1 + 1]; |
1351 | yr = a[k1]; |
1352 | yi = -a[k1 + 1]; |
1353 | a[j1] = yr; |
1354 | a[j1 + 1] = yi; |
1355 | a[k1] = xr; |
1356 | a[k1 + 1] = xi; |
1357 | j1 += nm; |
1358 | k1 -= nm; |
1359 | xr = a[j1]; |
1360 | xi = -a[j1 + 1]; |
1361 | yr = a[k1]; |
1362 | yi = -a[k1 + 1]; |
1363 | a[j1] = yr; |
1364 | a[j1 + 1] = yi; |
1365 | a[k1] = xr; |
1366 | a[k1 + 1] = xi; |
1367 | j1 += nm; |
1368 | k1 += 2 * nm; |
1369 | xr = a[j1]; |
1370 | xi = -a[j1 + 1]; |
1371 | yr = a[k1]; |
1372 | yi = -a[k1 + 1]; |
1373 | a[j1] = yr; |
1374 | a[j1 + 1] = yi; |
1375 | a[k1] = xr; |
1376 | a[k1 + 1] = xi; |
1377 | j1 -= nh; |
1378 | k1 -= 2; |
1379 | xr = a[j1]; |
1380 | xi = -a[j1 + 1]; |
1381 | yr = a[k1]; |
1382 | yi = -a[k1 + 1]; |
1383 | a[j1] = yr; |
1384 | a[j1 + 1] = yi; |
1385 | a[k1] = xr; |
1386 | a[k1 + 1] = xi; |
1387 | j1 -= nm; |
1388 | k1 -= 2 * nm; |
1389 | xr = a[j1]; |
1390 | xi = -a[j1 + 1]; |
1391 | yr = a[k1]; |
1392 | yi = -a[k1 + 1]; |
1393 | a[j1] = yr; |
1394 | a[j1 + 1] = yi; |
1395 | a[k1] = xr; |
1396 | a[k1 + 1] = xi; |
1397 | j1 -= nm; |
1398 | k1 += nm; |
1399 | xr = a[j1]; |
1400 | xi = -a[j1 + 1]; |
1401 | yr = a[k1]; |
1402 | yi = -a[k1 + 1]; |
1403 | a[j1] = yr; |
1404 | a[j1 + 1] = yi; |
1405 | a[k1] = xr; |
1406 | a[k1 + 1] = xi; |
1407 | j1 -= nm; |
1408 | k1 -= 2 * nm; |
1409 | xr = a[j1]; |
1410 | xi = -a[j1 + 1]; |
1411 | yr = a[k1]; |
1412 | yi = -a[k1 + 1]; |
1413 | a[j1] = yr; |
1414 | a[j1 + 1] = yi; |
1415 | a[k1] = xr; |
1416 | a[k1 + 1] = xi; |
1417 | } |
1418 | k1 = 4 * k + 2 * ip[m + k]; |
1419 | j1 = k1 + 2; |
1420 | k1 += nh; |
1421 | a[j1 - 1] = -a[j1 - 1]; |
1422 | xr = a[j1]; |
1423 | xi = -a[j1 + 1]; |
1424 | yr = a[k1]; |
1425 | yi = -a[k1 + 1]; |
1426 | a[j1] = yr; |
1427 | a[j1 + 1] = yi; |
1428 | a[k1] = xr; |
1429 | a[k1 + 1] = xi; |
1430 | a[k1 + 3] = -a[k1 + 3]; |
1431 | j1 += nm; |
1432 | k1 += 2 * nm; |
1433 | xr = a[j1]; |
1434 | xi = -a[j1 + 1]; |
1435 | yr = a[k1]; |
1436 | yi = -a[k1 + 1]; |
1437 | a[j1] = yr; |
1438 | a[j1 + 1] = yi; |
1439 | a[k1] = xr; |
1440 | a[k1 + 1] = xi; |
1441 | j1 += nm; |
1442 | k1 -= nm; |
1443 | xr = a[j1]; |
1444 | xi = -a[j1 + 1]; |
1445 | yr = a[k1]; |
1446 | yi = -a[k1 + 1]; |
1447 | a[j1] = yr; |
1448 | a[j1 + 1] = yi; |
1449 | a[k1] = xr; |
1450 | a[k1 + 1] = xi; |
1451 | j1 -= 2; |
1452 | k1 -= nh; |
1453 | xr = a[j1]; |
1454 | xi = -a[j1 + 1]; |
1455 | yr = a[k1]; |
1456 | yi = -a[k1 + 1]; |
1457 | a[j1] = yr; |
1458 | a[j1 + 1] = yi; |
1459 | a[k1] = xr; |
1460 | a[k1 + 1] = xi; |
1461 | j1 += nh + 2; |
1462 | k1 += nh + 2; |
1463 | xr = a[j1]; |
1464 | xi = -a[j1 + 1]; |
1465 | yr = a[k1]; |
1466 | yi = -a[k1 + 1]; |
1467 | a[j1] = yr; |
1468 | a[j1 + 1] = yi; |
1469 | a[k1] = xr; |
1470 | a[k1 + 1] = xi; |
1471 | j1 -= nh - nm; |
1472 | k1 += 2 * nm - 2; |
1473 | a[j1 - 1] = -a[j1 - 1]; |
1474 | xr = a[j1]; |
1475 | xi = -a[j1 + 1]; |
1476 | yr = a[k1]; |
1477 | yi = -a[k1 + 1]; |
1478 | a[j1] = yr; |
1479 | a[j1 + 1] = yi; |
1480 | a[k1] = xr; |
1481 | a[k1 + 1] = xi; |
1482 | a[k1 + 3] = -a[k1 + 3]; |
1483 | } |
1484 | } else { |
1485 | for (k = 0; k < m; k++) { |
1486 | for (j = 0; j < k; j++) { |
1487 | j1 = 4 * j + ip[m + k]; |
1488 | k1 = 4 * k + ip[m + j]; |
1489 | xr = a[j1]; |
1490 | xi = -a[j1 + 1]; |
1491 | yr = a[k1]; |
1492 | yi = -a[k1 + 1]; |
1493 | a[j1] = yr; |
1494 | a[j1 + 1] = yi; |
1495 | a[k1] = xr; |
1496 | a[k1 + 1] = xi; |
1497 | j1 += nm; |
1498 | k1 += nm; |
1499 | xr = a[j1]; |
1500 | xi = -a[j1 + 1]; |
1501 | yr = a[k1]; |
1502 | yi = -a[k1 + 1]; |
1503 | a[j1] = yr; |
1504 | a[j1 + 1] = yi; |
1505 | a[k1] = xr; |
1506 | a[k1 + 1] = xi; |
1507 | j1 += nh; |
1508 | k1 += 2; |
1509 | xr = a[j1]; |
1510 | xi = -a[j1 + 1]; |
1511 | yr = a[k1]; |
1512 | yi = -a[k1 + 1]; |
1513 | a[j1] = yr; |
1514 | a[j1 + 1] = yi; |
1515 | a[k1] = xr; |
1516 | a[k1 + 1] = xi; |
1517 | j1 -= nm; |
1518 | k1 -= nm; |
1519 | xr = a[j1]; |
1520 | xi = -a[j1 + 1]; |
1521 | yr = a[k1]; |
1522 | yi = -a[k1 + 1]; |
1523 | a[j1] = yr; |
1524 | a[j1 + 1] = yi; |
1525 | a[k1] = xr; |
1526 | a[k1 + 1] = xi; |
1527 | j1 += 2; |
1528 | k1 += nh; |
1529 | xr = a[j1]; |
1530 | xi = -a[j1 + 1]; |
1531 | yr = a[k1]; |
1532 | yi = -a[k1 + 1]; |
1533 | a[j1] = yr; |
1534 | a[j1 + 1] = yi; |
1535 | a[k1] = xr; |
1536 | a[k1 + 1] = xi; |
1537 | j1 += nm; |
1538 | k1 += nm; |
1539 | xr = a[j1]; |
1540 | xi = -a[j1 + 1]; |
1541 | yr = a[k1]; |
1542 | yi = -a[k1 + 1]; |
1543 | a[j1] = yr; |
1544 | a[j1 + 1] = yi; |
1545 | a[k1] = xr; |
1546 | a[k1 + 1] = xi; |
1547 | j1 -= nh; |
1548 | k1 -= 2; |
1549 | xr = a[j1]; |
1550 | xi = -a[j1 + 1]; |
1551 | yr = a[k1]; |
1552 | yi = -a[k1 + 1]; |
1553 | a[j1] = yr; |
1554 | a[j1 + 1] = yi; |
1555 | a[k1] = xr; |
1556 | a[k1 + 1] = xi; |
1557 | j1 -= nm; |
1558 | k1 -= nm; |
1559 | xr = a[j1]; |
1560 | xi = -a[j1 + 1]; |
1561 | yr = a[k1]; |
1562 | yi = -a[k1 + 1]; |
1563 | a[j1] = yr; |
1564 | a[j1 + 1] = yi; |
1565 | a[k1] = xr; |
1566 | a[k1 + 1] = xi; |
1567 | } |
1568 | k1 = 4 * k + ip[m + k]; |
1569 | j1 = k1 + 2; |
1570 | k1 += nh; |
1571 | a[j1 - 1] = -a[j1 - 1]; |
1572 | xr = a[j1]; |
1573 | xi = -a[j1 + 1]; |
1574 | yr = a[k1]; |
1575 | yi = -a[k1 + 1]; |
1576 | a[j1] = yr; |
1577 | a[j1 + 1] = yi; |
1578 | a[k1] = xr; |
1579 | a[k1 + 1] = xi; |
1580 | a[k1 + 3] = -a[k1 + 3]; |
1581 | j1 += nm; |
1582 | k1 += nm; |
1583 | a[j1 - 1] = -a[j1 - 1]; |
1584 | xr = a[j1]; |
1585 | xi = -a[j1 + 1]; |
1586 | yr = a[k1]; |
1587 | yi = -a[k1 + 1]; |
1588 | a[j1] = yr; |
1589 | a[j1 + 1] = yi; |
1590 | a[k1] = xr; |
1591 | a[k1 + 1] = xi; |
1592 | a[k1 + 3] = -a[k1 + 3]; |
1593 | } |
1594 | } |
1595 | } |
1596 | |
1597 | |
1598 | void bitrv216(double *a) |
1599 | { |
1600 | double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, |
1601 | x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i, |
1602 | x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i; |
1603 | |
1604 | x1r = a[2]; |
1605 | x1i = a[3]; |
1606 | x2r = a[4]; |
1607 | x2i = a[5]; |
1608 | x3r = a[6]; |
1609 | x3i = a[7]; |
1610 | x4r = a[8]; |
1611 | x4i = a[9]; |
1612 | x5r = a[10]; |
1613 | x5i = a[11]; |
1614 | x7r = a[14]; |
1615 | x7i = a[15]; |
1616 | x8r = a[16]; |
1617 | x8i = a[17]; |
1618 | x10r = a[20]; |
1619 | x10i = a[21]; |
1620 | x11r = a[22]; |
1621 | x11i = a[23]; |
1622 | x12r = a[24]; |
1623 | x12i = a[25]; |
1624 | x13r = a[26]; |
1625 | x13i = a[27]; |
1626 | x14r = a[28]; |
1627 | x14i = a[29]; |
1628 | a[2] = x8r; |
1629 | a[3] = x8i; |
1630 | a[4] = x4r; |
1631 | a[5] = x4i; |
1632 | a[6] = x12r; |
1633 | a[7] = x12i; |
1634 | a[8] = x2r; |
1635 | a[9] = x2i; |
1636 | a[10] = x10r; |
1637 | a[11] = x10i; |
1638 | a[14] = x14r; |
1639 | a[15] = x14i; |
1640 | a[16] = x1r; |
1641 | a[17] = x1i; |
1642 | a[20] = x5r; |
1643 | a[21] = x5i; |
1644 | a[22] = x13r; |
1645 | a[23] = x13i; |
1646 | a[24] = x3r; |
1647 | a[25] = x3i; |
1648 | a[26] = x11r; |
1649 | a[27] = x11i; |
1650 | a[28] = x7r; |
1651 | a[29] = x7i; |
1652 | } |
1653 | |
1654 | |
1655 | void bitrv216neg(double *a) |
1656 | { |
1657 | double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, |
1658 | x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i, |
1659 | x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, |
1660 | x13r, x13i, x14r, x14i, x15r, x15i; |
1661 | |
1662 | x1r = a[2]; |
1663 | x1i = a[3]; |
1664 | x2r = a[4]; |
1665 | x2i = a[5]; |
1666 | x3r = a[6]; |
1667 | x3i = a[7]; |
1668 | x4r = a[8]; |
1669 | x4i = a[9]; |
1670 | x5r = a[10]; |
1671 | x5i = a[11]; |
1672 | x6r = a[12]; |
1673 | x6i = a[13]; |
1674 | x7r = a[14]; |
1675 | x7i = a[15]; |
1676 | x8r = a[16]; |
1677 | x8i = a[17]; |
1678 | x9r = a[18]; |
1679 | x9i = a[19]; |
1680 | x10r = a[20]; |
1681 | x10i = a[21]; |
1682 | x11r = a[22]; |
1683 | x11i = a[23]; |
1684 | x12r = a[24]; |
1685 | x12i = a[25]; |
1686 | x13r = a[26]; |
1687 | x13i = a[27]; |
1688 | x14r = a[28]; |
1689 | x14i = a[29]; |
1690 | x15r = a[30]; |
1691 | x15i = a[31]; |
1692 | a[2] = x15r; |
1693 | a[3] = x15i; |
1694 | a[4] = x7r; |
1695 | a[5] = x7i; |
1696 | a[6] = x11r; |
1697 | a[7] = x11i; |
1698 | a[8] = x3r; |
1699 | a[9] = x3i; |
1700 | a[10] = x13r; |
1701 | a[11] = x13i; |
1702 | a[12] = x5r; |
1703 | a[13] = x5i; |
1704 | a[14] = x9r; |
1705 | a[15] = x9i; |
1706 | a[16] = x1r; |
1707 | a[17] = x1i; |
1708 | a[18] = x14r; |
1709 | a[19] = x14i; |
1710 | a[20] = x6r; |
1711 | a[21] = x6i; |
1712 | a[22] = x10r; |
1713 | a[23] = x10i; |
1714 | a[24] = x2r; |
1715 | a[25] = x2i; |
1716 | a[26] = x12r; |
1717 | a[27] = x12i; |
1718 | a[28] = x4r; |
1719 | a[29] = x4i; |
1720 | a[30] = x8r; |
1721 | a[31] = x8i; |
1722 | } |
1723 | |
1724 | |
1725 | void bitrv208(double *a) |
1726 | { |
1727 | double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i; |
1728 | |
1729 | x1r = a[2]; |
1730 | x1i = a[3]; |
1731 | x3r = a[6]; |
1732 | x3i = a[7]; |
1733 | x4r = a[8]; |
1734 | x4i = a[9]; |
1735 | x6r = a[12]; |
1736 | x6i = a[13]; |
1737 | a[2] = x4r; |
1738 | a[3] = x4i; |
1739 | a[6] = x6r; |
1740 | a[7] = x6i; |
1741 | a[8] = x1r; |
1742 | a[9] = x1i; |
1743 | a[12] = x3r; |
1744 | a[13] = x3i; |
1745 | } |
1746 | |
1747 | |
1748 | void bitrv208neg(double *a) |
1749 | { |
1750 | double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, |
1751 | x5r, x5i, x6r, x6i, x7r, x7i; |
1752 | |
1753 | x1r = a[2]; |
1754 | x1i = a[3]; |
1755 | x2r = a[4]; |
1756 | x2i = a[5]; |
1757 | x3r = a[6]; |
1758 | x3i = a[7]; |
1759 | x4r = a[8]; |
1760 | x4i = a[9]; |
1761 | x5r = a[10]; |
1762 | x5i = a[11]; |
1763 | x6r = a[12]; |
1764 | x6i = a[13]; |
1765 | x7r = a[14]; |
1766 | x7i = a[15]; |
1767 | a[2] = x7r; |
1768 | a[3] = x7i; |
1769 | a[4] = x3r; |
1770 | a[5] = x3i; |
1771 | a[6] = x5r; |
1772 | a[7] = x5i; |
1773 | a[8] = x1r; |
1774 | a[9] = x1i; |
1775 | a[10] = x6r; |
1776 | a[11] = x6i; |
1777 | a[12] = x2r; |
1778 | a[13] = x2i; |
1779 | a[14] = x4r; |
1780 | a[15] = x4i; |
1781 | } |
1782 | |
1783 | |
1784 | void cftf1st(int n, double *a, double *w) |
1785 | { |
1786 | int j, j0, j1, j2, j3, k, m, mh; |
1787 | double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, |
1788 | wd1r, wd1i, wd3r, wd3i; |
1789 | double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, |
1790 | y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; |
1791 | |
1792 | mh = n >> 3; |
1793 | m = 2 * mh; |
1794 | j1 = m; |
1795 | j2 = j1 + m; |
1796 | j3 = j2 + m; |
1797 | x0r = a[0] + a[j2]; |
1798 | x0i = a[1] + a[j2 + 1]; |
1799 | x1r = a[0] - a[j2]; |
1800 | x1i = a[1] - a[j2 + 1]; |
1801 | x2r = a[j1] + a[j3]; |
1802 | x2i = a[j1 + 1] + a[j3 + 1]; |
1803 | x3r = a[j1] - a[j3]; |
1804 | x3i = a[j1 + 1] - a[j3 + 1]; |
1805 | a[0] = x0r + x2r; |
1806 | a[1] = x0i + x2i; |
1807 | a[j1] = x0r - x2r; |
1808 | a[j1 + 1] = x0i - x2i; |
1809 | a[j2] = x1r - x3i; |
1810 | a[j2 + 1] = x1i + x3r; |
1811 | a[j3] = x1r + x3i; |
1812 | a[j3 + 1] = x1i - x3r; |
1813 | wn4r = w[1]; |
1814 | csc1 = w[2]; |
1815 | csc3 = w[3]; |
1816 | wd1r = 1; |
1817 | wd1i = 0; |
1818 | wd3r = 1; |
1819 | wd3i = 0; |
1820 | k = 0; |
1821 | for (j = 2; j < mh - 2; j += 4) { |
1822 | k += 4; |
1823 | wk1r = csc1 * (wd1r + w[k]); |
1824 | wk1i = csc1 * (wd1i + w[k + 1]); |
1825 | wk3r = csc3 * (wd3r + w[k + 2]); |
1826 | wk3i = csc3 * (wd3i + w[k + 3]); |
1827 | wd1r = w[k]; |
1828 | wd1i = w[k + 1]; |
1829 | wd3r = w[k + 2]; |
1830 | wd3i = w[k + 3]; |
1831 | j1 = j + m; |
1832 | j2 = j1 + m; |
1833 | j3 = j2 + m; |
1834 | x0r = a[j] + a[j2]; |
1835 | x0i = a[j + 1] + a[j2 + 1]; |
1836 | x1r = a[j] - a[j2]; |
1837 | x1i = a[j + 1] - a[j2 + 1]; |
1838 | y0r = a[j + 2] + a[j2 + 2]; |
1839 | y0i = a[j + 3] + a[j2 + 3]; |
1840 | y1r = a[j + 2] - a[j2 + 2]; |
1841 | y1i = a[j + 3] - a[j2 + 3]; |
1842 | x2r = a[j1] + a[j3]; |
1843 | x2i = a[j1 + 1] + a[j3 + 1]; |
1844 | x3r = a[j1] - a[j3]; |
1845 | x3i = a[j1 + 1] - a[j3 + 1]; |
1846 | y2r = a[j1 + 2] + a[j3 + 2]; |
1847 | y2i = a[j1 + 3] + a[j3 + 3]; |
1848 | y3r = a[j1 + 2] - a[j3 + 2]; |
1849 | y3i = a[j1 + 3] - a[j3 + 3]; |
1850 | a[j] = x0r + x2r; |
1851 | a[j + 1] = x0i + x2i; |
1852 | a[j + 2] = y0r + y2r; |
1853 | a[j + 3] = y0i + y2i; |
1854 | a[j1] = x0r - x2r; |
1855 | a[j1 + 1] = x0i - x2i; |
1856 | a[j1 + 2] = y0r - y2r; |
1857 | a[j1 + 3] = y0i - y2i; |
1858 | x0r = x1r - x3i; |
1859 | x0i = x1i + x3r; |
1860 | a[j2] = wk1r * x0r - wk1i * x0i; |
1861 | a[j2 + 1] = wk1r * x0i + wk1i * x0r; |
1862 | x0r = y1r - y3i; |
1863 | x0i = y1i + y3r; |
1864 | a[j2 + 2] = wd1r * x0r - wd1i * x0i; |
1865 | a[j2 + 3] = wd1r * x0i + wd1i * x0r; |
1866 | x0r = x1r + x3i; |
1867 | x0i = x1i - x3r; |
1868 | a[j3] = wk3r * x0r + wk3i * x0i; |
1869 | a[j3 + 1] = wk3r * x0i - wk3i * x0r; |
1870 | x0r = y1r + y3i; |
1871 | x0i = y1i - y3r; |
1872 | a[j3 + 2] = wd3r * x0r + wd3i * x0i; |
1873 | a[j3 + 3] = wd3r * x0i - wd3i * x0r; |
1874 | j0 = m - j; |
1875 | j1 = j0 + m; |
1876 | j2 = j1 + m; |
1877 | j3 = j2 + m; |
1878 | x0r = a[j0] + a[j2]; |
1879 | x0i = a[j0 + 1] + a[j2 + 1]; |
1880 | x1r = a[j0] - a[j2]; |
1881 | x1i = a[j0 + 1] - a[j2 + 1]; |
1882 | y0r = a[j0 - 2] + a[j2 - 2]; |
1883 | y0i = a[j0 - 1] + a[j2 - 1]; |
1884 | y1r = a[j0 - 2] - a[j2 - 2]; |
1885 | y1i = a[j0 - 1] - a[j2 - 1]; |
1886 | x2r = a[j1] + a[j3]; |
1887 | x2i = a[j1 + 1] + a[j3 + 1]; |
1888 | x3r = a[j1] - a[j3]; |
1889 | x3i = a[j1 + 1] - a[j3 + 1]; |
1890 | y2r = a[j1 - 2] + a[j3 - 2]; |
1891 | y2i = a[j1 - 1] + a[j3 - 1]; |
1892 | y3r = a[j1 - 2] - a[j3 - 2]; |
1893 | y3i = a[j1 - 1] - a[j3 - 1]; |
1894 | a[j0] = x0r + x2r; |
1895 | a[j0 + 1] = x0i + x2i; |
1896 | a[j0 - 2] = y0r + y2r; |
1897 | a[j0 - 1] = y0i + y2i; |
1898 | a[j1] = x0r - x2r; |
1899 | a[j1 + 1] = x0i - x2i; |
1900 | a[j1 - 2] = y0r - y2r; |
1901 | a[j1 - 1] = y0i - y2i; |
1902 | x0r = x1r - x3i; |
1903 | x0i = x1i + x3r; |
1904 | a[j2] = wk1i * x0r - wk1r * x0i; |
1905 | a[j2 + 1] = wk1i * x0i + wk1r * x0r; |
1906 | x0r = y1r - y3i; |
1907 | x0i = y1i + y3r; |
1908 | a[j2 - 2] = wd1i * x0r - wd1r * x0i; |
1909 | a[j2 - 1] = wd1i * x0i + wd1r * x0r; |
1910 | x0r = x1r + x3i; |
1911 | x0i = x1i - x3r; |
1912 | a[j3] = wk3i * x0r + wk3r * x0i; |
1913 | a[j3 + 1] = wk3i * x0i - wk3r * x0r; |
1914 | x0r = y1r + y3i; |
1915 | x0i = y1i - y3r; |
1916 | a[j3 - 2] = wd3i * x0r + wd3r * x0i; |
1917 | a[j3 - 1] = wd3i * x0i - wd3r * x0r; |
1918 | } |
1919 | wk1r = csc1 * (wd1r + wn4r); |
1920 | wk1i = csc1 * (wd1i + wn4r); |
1921 | wk3r = csc3 * (wd3r - wn4r); |
1922 | wk3i = csc3 * (wd3i - wn4r); |
1923 | j0 = mh; |
1924 | j1 = j0 + m; |
1925 | j2 = j1 + m; |
1926 | j3 = j2 + m; |
1927 | x0r = a[j0 - 2] + a[j2 - 2]; |
1928 | x0i = a[j0 - 1] + a[j2 - 1]; |
1929 | x1r = a[j0 - 2] - a[j2 - 2]; |
1930 | x1i = a[j0 - 1] - a[j2 - 1]; |
1931 | x2r = a[j1 - 2] + a[j3 - 2]; |
1932 | x2i = a[j1 - 1] + a[j3 - 1]; |
1933 | x3r = a[j1 - 2] - a[j3 - 2]; |
1934 | x3i = a[j1 - 1] - a[j3 - 1]; |
1935 | a[j0 - 2] = x0r + x2r; |
1936 | a[j0 - 1] = x0i + x2i; |
1937 | a[j1 - 2] = x0r - x2r; |
1938 | a[j1 - 1] = x0i - x2i; |
1939 | x0r = x1r - x3i; |
1940 | x0i = x1i + x3r; |
1941 | a[j2 - 2] = wk1r * x0r - wk1i * x0i; |
1942 | a[j2 - 1] = wk1r * x0i + wk1i * x0r; |
1943 | x0r = x1r + x3i; |
1944 | x0i = x1i - x3r; |
1945 | a[j3 - 2] = wk3r * x0r + wk3i * x0i; |
1946 | a[j3 - 1] = wk3r * x0i - wk3i * x0r; |
1947 | x0r = a[j0] + a[j2]; |
1948 | x0i = a[j0 + 1] + a[j2 + 1]; |
1949 | x1r = a[j0] - a[j2]; |
1950 | x1i = a[j0 + 1] - a[j2 + 1]; |
1951 | x2r = a[j1] + a[j3]; |
1952 | x2i = a[j1 + 1] + a[j3 + 1]; |
1953 | x3r = a[j1] - a[j3]; |
1954 | x3i = a[j1 + 1] - a[j3 + 1]; |
1955 | a[j0] = x0r + x2r; |
1956 | a[j0 + 1] = x0i + x2i; |
1957 | a[j1] = x0r - x2r; |
1958 | a[j1 + 1] = x0i - x2i; |
1959 | x0r = x1r - x3i; |
1960 | x0i = x1i + x3r; |
1961 | a[j2] = wn4r * (x0r - x0i); |
1962 | a[j2 + 1] = wn4r * (x0i + x0r); |
1963 | x0r = x1r + x3i; |
1964 | x0i = x1i - x3r; |
1965 | a[j3] = -wn4r * (x0r + x0i); |
1966 | a[j3 + 1] = -wn4r * (x0i - x0r); |
1967 | x0r = a[j0 + 2] + a[j2 + 2]; |
1968 | x0i = a[j0 + 3] + a[j2 + 3]; |
1969 | x1r = a[j0 + 2] - a[j2 + 2]; |
1970 | x1i = a[j0 + 3] - a[j2 + 3]; |
1971 | x2r = a[j1 + 2] + a[j3 + 2]; |
1972 | x2i = a[j1 + 3] + a[j3 + 3]; |
1973 | x3r = a[j1 + 2] - a[j3 + 2]; |
1974 | x3i = a[j1 + 3] - a[j3 + 3]; |
1975 | a[j0 + 2] = x0r + x2r; |
1976 | a[j0 + 3] = x0i + x2i; |
1977 | a[j1 + 2] = x0r - x2r; |
1978 | a[j1 + 3] = x0i - x2i; |
1979 | x0r = x1r - x3i; |
1980 | x0i = x1i + x3r; |
1981 | a[j2 + 2] = wk1i * x0r - wk1r * x0i; |
1982 | a[j2 + 3] = wk1i * x0i + wk1r * x0r; |
1983 | x0r = x1r + x3i; |
1984 | x0i = x1i - x3r; |
1985 | a[j3 + 2] = wk3i * x0r + wk3r * x0i; |
1986 | a[j3 + 3] = wk3i * x0i - wk3r * x0r; |
1987 | } |
1988 | |
1989 | |
1990 | void cftb1st(int n, double *a, double *w) |
1991 | { |
1992 | int j, j0, j1, j2, j3, k, m, mh; |
1993 | double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, |
1994 | wd1r, wd1i, wd3r, wd3i; |
1995 | double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, |
1996 | y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; |
1997 | |
1998 | mh = n >> 3; |
1999 | m = 2 * mh; |
2000 | j1 = m; |
2001 | j2 = j1 + m; |
2002 | j3 = j2 + m; |
2003 | x0r = a[0] + a[j2]; |
2004 | x0i = -a[1] - a[j2 + 1]; |
2005 | x1r = a[0] - a[j2]; |
2006 | x1i = -a[1] + a[j2 + 1]; |
2007 | x2r = a[j1] + a[j3]; |
2008 | x2i = a[j1 + 1] + a[j3 + 1]; |
2009 | x3r = a[j1] - a[j3]; |
2010 | x3i = a[j1 + 1] - a[j3 + 1]; |
2011 | a[0] = x0r + x2r; |
2012 | a[1] = x0i - x2i; |
2013 | a[j1] = x0r - x2r; |
2014 | a[j1 + 1] = x0i + x2i; |
2015 | a[j2] = x1r + x3i; |
2016 | a[j2 + 1] = x1i + x3r; |
2017 | a[j3] = x1r - x3i; |
2018 | a[j3 + 1] = x1i - x3r; |
2019 | wn4r = w[1]; |
2020 | csc1 = w[2]; |
2021 | csc3 = w[3]; |
2022 | wd1r = 1; |
2023 | wd1i = 0; |
2024 | wd3r = 1; |
2025 | wd3i = 0; |
2026 | k = 0; |
2027 | for (j = 2; j < mh - 2; j += 4) { |
2028 | k += 4; |
2029 | wk1r = csc1 * (wd1r + w[k]); |
2030 | wk1i = csc1 * (wd1i + w[k + 1]); |
2031 | wk3r = csc3 * (wd3r + w[k + 2]); |
2032 | wk3i = csc3 * (wd3i + w[k + 3]); |
2033 | wd1r = w[k]; |
2034 | wd1i = w[k + 1]; |
2035 | wd3r = w[k + 2]; |
2036 | wd3i = w[k + 3]; |
2037 | j1 = j + m; |
2038 | j2 = j1 + m; |
2039 | j3 = j2 + m; |
2040 | x0r = a[j] + a[j2]; |
2041 | x0i = -a[j + 1] - a[j2 + 1]; |
2042 | x1r = a[j] - a[j2]; |
2043 | x1i = -a[j + 1] + a[j2 + 1]; |
2044 | y0r = a[j + 2] + a[j2 + 2]; |
2045 | y0i = -a[j + 3] - a[j2 + 3]; |
2046 | y1r = a[j + 2] - a[j2 + 2]; |
2047 | y1i = -a[j + 3] + a[j2 + 3]; |
2048 | x2r = a[j1] + a[j3]; |
2049 | x2i = a[j1 + 1] + a[j3 + 1]; |
2050 | x3r = a[j1] - a[j3]; |
2051 | x3i = a[j1 + 1] - a[j3 + 1]; |
2052 | y2r = a[j1 + 2] + a[j3 + 2]; |
2053 | y2i = a[j1 + 3] + a[j3 + 3]; |
2054 | y3r = a[j1 + 2] - a[j3 + 2]; |
2055 | y3i = a[j1 + 3] - a[j3 + 3]; |
2056 | a[j] = x0r + x2r; |
2057 | a[j + 1] = x0i - x2i; |
2058 | a[j + 2] = y0r + y2r; |
2059 | a[j + 3] = y0i - y2i; |
2060 | a[j1] = x0r - x2r; |
2061 | a[j1 + 1] = x0i + x2i; |
2062 | a[j1 + 2] = y0r - y2r; |
2063 | a[j1 + 3] = y0i + y2i; |
2064 | x0r = x1r + x3i; |
2065 | x0i = x1i + x3r; |
2066 | a[j2] = wk1r * x0r - wk1i * x0i; |
2067 | a[j2 + 1] = wk1r * x0i + wk1i * x0r; |
2068 | x0r = y1r + y3i; |
2069 | x0i = y1i + y3r; |
2070 | a[j2 + 2] = wd1r * x0r - wd1i * x0i; |
2071 | a[j2 + 3] = wd1r * x0i + wd1i * x0r; |
2072 | x0r = x1r - x3i; |
2073 | x0i = x1i - x3r; |
2074 | a[j3] = wk3r * x0r + wk3i * x0i; |
2075 | a[j3 + 1] = wk3r * x0i - wk3i * x0r; |
2076 | x0r = y1r - y3i; |
2077 | x0i = y1i - y3r; |
2078 | a[j3 + 2] = wd3r * x0r + wd3i * x0i; |
2079 | a[j3 + 3] = wd3r * x0i - wd3i * x0r; |
2080 | j0 = m - j; |
2081 | j1 = j0 + m; |
2082 | j2 = j1 + m; |
2083 | j3 = j2 + m; |
2084 | x0r = a[j0] + a[j2]; |
2085 | x0i = -a[j0 + 1] - a[j2 + 1]; |
2086 | x1r = a[j0] - a[j2]; |
2087 | x1i = -a[j0 + 1] + a[j2 + 1]; |
2088 | y0r = a[j0 - 2] + a[j2 - 2]; |
2089 | y0i = -a[j0 - 1] - a[j2 - 1]; |
2090 | y1r = a[j0 - 2] - a[j2 - 2]; |
2091 | y1i = -a[j0 - 1] + a[j2 - 1]; |
2092 | x2r = a[j1] + a[j3]; |
2093 | x2i = a[j1 + 1] + a[j3 + 1]; |
2094 | x3r = a[j1] - a[j3]; |
2095 | x3i = a[j1 + 1] - a[j3 + 1]; |
2096 | y2r = a[j1 - 2] + a[j3 - 2]; |
2097 | y2i = a[j1 - 1] + a[j3 - 1]; |
2098 | y3r = a[j1 - 2] - a[j3 - 2]; |
2099 | y3i = a[j1 - 1] - a[j3 - 1]; |
2100 | a[j0] = x0r + x2r; |
2101 | a[j0 + 1] = x0i - x2i; |
2102 | a[j0 - 2] = y0r + y2r; |
2103 | a[j0 - 1] = y0i - y2i; |
2104 | a[j1] = x0r - x2r; |
2105 | a[j1 + 1] = x0i + x2i; |
2106 | a[j1 - 2] = y0r - y2r; |
2107 | a[j1 - 1] = y0i + y2i; |
2108 | x0r = x1r + x3i; |
2109 | x0i = x1i + x3r; |
2110 | a[j2] = wk1i * x0r - wk1r * x0i; |
2111 | a[j2 + 1] = wk1i * x0i + wk1r * x0r; |
2112 | x0r = y1r + y3i; |
2113 | x0i = y1i + y3r; |
2114 | a[j2 - 2] = wd1i * x0r - wd1r * x0i; |
2115 | a[j2 - 1] = wd1i * x0i + wd1r * x0r; |
2116 | x0r = x1r - x3i; |
2117 | x0i = x1i - x3r; |
2118 | a[j3] = wk3i * x0r + wk3r * x0i; |
2119 | a[j3 + 1] = wk3i * x0i - wk3r * x0r; |
2120 | x0r = y1r - y3i; |
2121 | x0i = y1i - y3r; |
2122 | a[j3 - 2] = wd3i * x0r + wd3r * x0i; |
2123 | a[j3 - 1] = wd3i * x0i - wd3r * x0r; |
2124 | } |
2125 | wk1r = csc1 * (wd1r + wn4r); |
2126 | wk1i = csc1 * (wd1i + wn4r); |
2127 | wk3r = csc3 * (wd3r - wn4r); |
2128 | wk3i = csc3 * (wd3i - wn4r); |
2129 | j0 = mh; |
2130 | j1 = j0 + m; |
2131 | j2 = j1 + m; |
2132 | j3 = j2 + m; |
2133 | x0r = a[j0 - 2] + a[j2 - 2]; |
2134 | x0i = -a[j0 - 1] - a[j2 - 1]; |
2135 | x1r = a[j0 - 2] - a[j2 - 2]; |
2136 | x1i = -a[j0 - 1] + a[j2 - 1]; |
2137 | x2r = a[j1 - 2] + a[j3 - 2]; |
2138 | x2i = a[j1 - 1] + a[j3 - 1]; |
2139 | x3r = a[j1 - 2] - a[j3 - 2]; |
2140 | x3i = a[j1 - 1] - a[j3 - 1]; |
2141 | a[j0 - 2] = x0r + x2r; |
2142 | a[j0 - 1] = x0i - x2i; |
2143 | a[j1 - 2] = x0r - x2r; |
2144 | a[j1 - 1] = x0i + x2i; |
2145 | x0r = x1r + x3i; |
2146 | x0i = x1i + x3r; |
2147 | a[j2 - 2] = wk1r * x0r - wk1i * x0i; |
2148 | a[j2 - 1] = wk1r * x0i + wk1i * x0r; |
2149 | x0r = x1r - x3i; |
2150 | x0i = x1i - x3r; |
2151 | a[j3 - 2] = wk3r * x0r + wk3i * x0i; |
2152 | a[j3 - 1] = wk3r * x0i - wk3i * x0r; |
2153 | x0r = a[j0] + a[j2]; |
2154 | x0i = -a[j0 + 1] - a[j2 + 1]; |
2155 | x1r = a[j0] - a[j2]; |
2156 | x1i = -a[j0 + 1] + a[j2 + 1]; |
2157 | x2r = a[j1] + a[j3]; |
2158 | x2i = a[j1 + 1] + a[j3 + 1]; |
2159 | x3r = a[j1] - a[j3]; |
2160 | x3i = a[j1 + 1] - a[j3 + 1]; |
2161 | a[j0] = x0r + x2r; |
2162 | a[j0 + 1] = x0i - x2i; |
2163 | a[j1] = x0r - x2r; |
2164 | a[j1 + 1] = x0i + x2i; |
2165 | x0r = x1r + x3i; |
2166 | x0i = x1i + x3r; |
2167 | a[j2] = wn4r * (x0r - x0i); |
2168 | a[j2 + 1] = wn4r * (x0i + x0r); |
2169 | x0r = x1r - x3i; |
2170 | x0i = x1i - x3r; |
2171 | a[j3] = -wn4r * (x0r + x0i); |
2172 | a[j3 + 1] = -wn4r * (x0i - x0r); |
2173 | x0r = a[j0 + 2] + a[j2 + 2]; |
2174 | x0i = -a[j0 + 3] - a[j2 + 3]; |
2175 | x1r = a[j0 + 2] - a[j2 + 2]; |
2176 | x1i = -a[j0 + 3] + a[j2 + 3]; |
2177 | x2r = a[j1 + 2] + a[j3 + 2]; |
2178 | x2i = a[j1 + 3] + a[j3 + 3]; |
2179 | x3r = a[j1 + 2] - a[j3 + 2]; |
2180 | x3i = a[j1 + 3] - a[j3 + 3]; |
2181 | a[j0 + 2] = x0r + x2r; |
2182 | a[j0 + 3] = x0i - x2i; |
2183 | a[j1 + 2] = x0r - x2r; |
2184 | a[j1 + 3] = x0i + x2i; |
2185 | x0r = x1r + x3i; |
2186 | x0i = x1i + x3r; |
2187 | a[j2 + 2] = wk1i * x0r - wk1r * x0i; |
2188 | a[j2 + 3] = wk1i * x0i + wk1r * x0r; |
2189 | x0r = x1r - x3i; |
2190 | x0i = x1i - x3r; |
2191 | a[j3 + 2] = wk3i * x0r + wk3r * x0i; |
2192 | a[j3 + 3] = wk3i * x0i - wk3r * x0r; |
2193 | } |
2194 | |
2195 | |
2196 | #ifdef USE_CDFT_THREADS |
2197 | struct cdft_arg_st { |
2198 | int n0; |
2199 | int n; |
2200 | double *a; |
2201 | int nw; |
2202 | double *w; |
2203 | }; |
2204 | typedef struct cdft_arg_st cdft_arg_t; |
2205 | |
2206 | |
2207 | void cftrec4_th(int n, double *a, int nw, double *w) |
2208 | { |
2209 | void *cftrec1_th(void *p); |
2210 | void *cftrec2_th(void *p); |
2211 | int i, idiv4, m, nthread; |
2212 | cdft_thread_t th[4]; |
2213 | cdft_arg_t ag[4]; |
2214 | |
2215 | nthread = 2; |
2216 | idiv4 = 0; |
2217 | m = n >> 1; |
2218 | if (n > CDFT_4THREADS_BEGIN_N) { |
2219 | nthread = 4; |
2220 | idiv4 = 1; |
2221 | m >>= 1; |
2222 | } |
2223 | for (i = 0; i < nthread; i++) { |
2224 | ag[i].n0 = n; |
2225 | ag[i].n = m; |
2226 | ag[i].a = &a[i * m]; |
2227 | ag[i].nw = nw; |
2228 | ag[i].w = w; |
2229 | if (i != idiv4) { |
2230 | cdft_thread_create(&th[i], cftrec1_th, &ag[i]); |
2231 | } else { |
2232 | cdft_thread_create(&th[i], cftrec2_th, &ag[i]); |
2233 | } |
2234 | } |
2235 | for (i = 0; i < nthread; i++) { |
2236 | cdft_thread_wait(th[i]); |
2237 | } |
2238 | } |
2239 | |
2240 | |
2241 | void *cftrec1_th(void *p) |
2242 | { |
2243 | int cfttree(int n, int j, int k, double *a, int nw, double *w); |
2244 | void cftleaf(int n, int isplt, double *a, int nw, double *w); |
2245 | void cftmdl1(int n, double *a, double *w); |
2246 | int isplt, j, k, m, n, n0, nw; |
2247 | double *a, *w; |
2248 | |
2249 | n0 = ((cdft_arg_t *) p)->n0; |
2250 | n = ((cdft_arg_t *) p)->n; |
2251 | a = ((cdft_arg_t *) p)->a; |
2252 | nw = ((cdft_arg_t *) p)->nw; |
2253 | w = ((cdft_arg_t *) p)->w; |
2254 | m = n0; |
2255 | while (m > 512) { |
2256 | m >>= 2; |
2257 | cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); |
2258 | } |
2259 | cftleaf(m, 1, &a[n - m], nw, w); |
2260 | k = 0; |
2261 | for (j = n - m; j > 0; j -= m) { |
2262 | k++; |
2263 | isplt = cfttree(m, j, k, a, nw, w); |
2264 | cftleaf(m, isplt, &a[j - m], nw, w); |
2265 | } |
2266 | return (void *) 0; |
2267 | } |
2268 | |
2269 | |
2270 | void *cftrec2_th(void *p) |
2271 | { |
2272 | int cfttree(int n, int j, int k, double *a, int nw, double *w); |
2273 | void cftleaf(int n, int isplt, double *a, int nw, double *w); |
2274 | void cftmdl2(int n, double *a, double *w); |
2275 | int isplt, j, k, m, n, n0, nw; |
2276 | double *a, *w; |
2277 | |
2278 | n0 = ((cdft_arg_t *) p)->n0; |
2279 | n = ((cdft_arg_t *) p)->n; |
2280 | a = ((cdft_arg_t *) p)->a; |
2281 | nw = ((cdft_arg_t *) p)->nw; |
2282 | w = ((cdft_arg_t *) p)->w; |
2283 | k = 1; |
2284 | m = n0; |
2285 | while (m > 512) { |
2286 | m >>= 2; |
2287 | k <<= 2; |
2288 | cftmdl2(m, &a[n - m], &w[nw - m]); |
2289 | } |
2290 | cftleaf(m, 0, &a[n - m], nw, w); |
2291 | k >>= 1; |
2292 | for (j = n - m; j > 0; j -= m) { |
2293 | k++; |
2294 | isplt = cfttree(m, j, k, a, nw, w); |
2295 | cftleaf(m, isplt, &a[j - m], nw, w); |
2296 | } |
2297 | return (void *) 0; |
2298 | } |
2299 | #endif /* USE_CDFT_THREADS */ |
2300 | |
2301 | |
2302 | void cftrec4(int n, double *a, int nw, double *w) |
2303 | { |
2304 | int cfttree(int n, int j, int k, double *a, int nw, double *w); |
2305 | void cftleaf(int n, int isplt, double *a, int nw, double *w); |
2306 | void cftmdl1(int n, double *a, double *w); |
2307 | int isplt, j, k, m; |
2308 | |
2309 | m = n; |
2310 | while (m > 512) { |
2311 | m >>= 2; |
2312 | cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); |
2313 | } |
2314 | cftleaf(m, 1, &a[n - m], nw, w); |
2315 | k = 0; |
2316 | for (j = n - m; j > 0; j -= m) { |
2317 | k++; |
2318 | isplt = cfttree(m, j, k, a, nw, w); |
2319 | cftleaf(m, isplt, &a[j - m], nw, w); |
2320 | } |
2321 | } |
2322 | |
2323 | |
2324 | int cfttree(int n, int j, int k, double *a, int nw, double *w) |
2325 | { |
2326 | void cftmdl1(int n, double *a, double *w); |
2327 | void cftmdl2(int n, double *a, double *w); |
2328 | int i, isplt, m; |
2329 | |
2330 | if ((k & 3) != 0) { |
2331 | isplt = k & 1; |
2332 | if (isplt != 0) { |
2333 | cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]); |
2334 | } else { |
2335 | cftmdl2(n, &a[j - n], &w[nw - n]); |
2336 | } |
2337 | } else { |
2338 | m = n; |
2339 | for (i = k; (i & 3) == 0; i >>= 2) { |
2340 | m <<= 2; |
2341 | } |
2342 | isplt = i & 1; |
2343 | if (isplt != 0) { |
2344 | while (m > 128) { |
2345 | cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]); |
2346 | m >>= 2; |
2347 | } |
2348 | } else { |
2349 | while (m > 128) { |
2350 | cftmdl2(m, &a[j - m], &w[nw - m]); |
2351 | m >>= 2; |
2352 | } |
2353 | } |
2354 | } |
2355 | return isplt; |
2356 | } |
2357 | |
2358 | |
2359 | void cftleaf(int n, int isplt, double *a, int nw, double *w) |
2360 | { |
2361 | void cftmdl1(int n, double *a, double *w); |
2362 | void cftmdl2(int n, double *a, double *w); |
2363 | void cftf161(double *a, double *w); |
2364 | void cftf162(double *a, double *w); |
2365 | void cftf081(double *a, double *w); |
2366 | void cftf082(double *a, double *w); |
2367 | |
2368 | if (n == 512) { |
2369 | cftmdl1(128, a, &w[nw - 64]); |
2370 | cftf161(a, &w[nw - 8]); |
2371 | cftf162(&a[32], &w[nw - 32]); |
2372 | cftf161(&a[64], &w[nw - 8]); |
2373 | cftf161(&a[96], &w[nw - 8]); |
2374 | cftmdl2(128, &a[128], &w[nw - 128]); |
2375 | cftf161(&a[128], &w[nw - 8]); |
2376 | cftf162(&a[160], &w[nw - 32]); |
2377 | cftf161(&a[192], &w[nw - 8]); |
2378 | cftf162(&a[224], &w[nw - 32]); |
2379 | cftmdl1(128, &a[256], &w[nw - 64]); |
2380 | cftf161(&a[256], &w[nw - 8]); |
2381 | cftf162(&a[288], &w[nw - 32]); |
2382 | cftf161(&a[320], &w[nw - 8]); |
2383 | cftf161(&a[352], &w[nw - 8]); |
2384 | if (isplt != 0) { |
2385 | cftmdl1(128, &a[384], &w[nw - 64]); |
2386 | cftf161(&a[480], &w[nw - 8]); |
2387 | } else { |
2388 | cftmdl2(128, &a[384], &w[nw - 128]); |
2389 | cftf162(&a[480], &w[nw - 32]); |
2390 | } |
2391 | cftf161(&a[384], &w[nw - 8]); |
2392 | cftf162(&a[416], &w[nw - 32]); |
2393 | cftf161(&a[448], &w[nw - 8]); |
2394 | } else { |
2395 | cftmdl1(64, a, &w[nw - 32]); |
2396 | cftf081(a, &w[nw - 8]); |
2397 | cftf082(&a[16], &w[nw - 8]); |
2398 | cftf081(&a[32], &w[nw - 8]); |
2399 | cftf081(&a[48], &w[nw - 8]); |
2400 | cftmdl2(64, &a[64], &w[nw - 64]); |
2401 | cftf081(&a[64], &w[nw - 8]); |
2402 | cftf082(&a[80], &w[nw - 8]); |
2403 | cftf081(&a[96], &w[nw - 8]); |
2404 | cftf082(&a[112], &w[nw - 8]); |
2405 | cftmdl1(64, &a[128], &w[nw - 32]); |
2406 | cftf081(&a[128], &w[nw - 8]); |
2407 | cftf082(&a[144], &w[nw - 8]); |
2408 | cftf081(&a[160], &w[nw - 8]); |
2409 | cftf081(&a[176], &w[nw - 8]); |
2410 | if (isplt != 0) { |
2411 | cftmdl1(64, &a[192], &w[nw - 32]); |
2412 | cftf081(&a[240], &w[nw - 8]); |
2413 | } else { |
2414 | cftmdl2(64, &a[192], &w[nw - 64]); |
2415 | cftf082(&a[240], &w[nw - 8]); |
2416 | } |
2417 | cftf081(&a[192], &w[nw - 8]); |
2418 | cftf082(&a[208], &w[nw - 8]); |
2419 | cftf081(&a[224], &w[nw - 8]); |
2420 | } |
2421 | } |
2422 | |
2423 | |
2424 | void cftmdl1(int n, double *a, double *w) |
2425 | { |
2426 | int j, j0, j1, j2, j3, k, m, mh; |
2427 | double wn4r, wk1r, wk1i, wk3r, wk3i; |
2428 | double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
2429 | |
2430 | mh = n >> 3; |
2431 | m = 2 * mh; |
2432 | j1 = m; |
2433 | j2 = j1 + m; |
2434 | j3 = j2 + m; |
2435 | x0r = a[0] + a[j2]; |
2436 | x0i = a[1] + a[j2 + 1]; |
2437 | x1r = a[0] - a[j2]; |
2438 | x1i = a[1] - a[j2 + 1]; |
2439 | x2r = a[j1] + a[j3]; |
2440 | x2i = a[j1 + 1] + a[j3 + 1]; |
2441 | x3r = a[j1] - a[j3]; |
2442 | x3i = a[j1 + 1] - a[j3 + 1]; |
2443 | a[0] = x0r + x2r; |
2444 | a[1] = x0i + x2i; |
2445 | a[j1] = x0r - x2r; |
2446 | a[j1 + 1] = x0i - x2i; |
2447 | a[j2] = x1r - x3i; |
2448 | a[j2 + 1] = x1i + x3r; |
2449 | a[j3] = x1r + x3i; |
2450 | a[j3 + 1] = x1i - x3r; |
2451 | wn4r = w[1]; |
2452 | k = 0; |
2453 | for (j = 2; j < mh; j += 2) { |
2454 | k += 4; |
2455 | wk1r = w[k]; |
2456 | wk1i = w[k + 1]; |
2457 | wk3r = w[k + 2]; |
2458 | wk3i = w[k + 3]; |
2459 | j1 = j + m; |
2460 | j2 = j1 + m; |
2461 | j3 = j2 + m; |
2462 | x0r = a[j] + a[j2]; |
2463 | x0i = a[j + 1] + a[j2 + 1]; |
2464 | x1r = a[j] - a[j2]; |
2465 | x1i = a[j + 1] - a[j2 + 1]; |
2466 | x2r = a[j1] + a[j3]; |
2467 | x2i = a[j1 + 1] + a[j3 + 1]; |
2468 | x3r = a[j1] - a[j3]; |
2469 | x3i = a[j1 + 1] - a[j3 + 1]; |
2470 | a[j] = x0r + x2r; |
2471 | a[j + 1] = x0i + x2i; |
2472 | a[j1] = x0r - x2r; |
2473 | a[j1 + 1] = x0i - x2i; |
2474 | x0r = x1r - x3i; |
2475 | x0i = x1i + x3r; |
2476 | a[j2] = wk1r * x0r - wk1i * x0i; |
2477 | a[j2 + 1] = wk1r * x0i + wk1i * x0r; |
2478 | x0r = x1r + x3i; |
2479 | x0i = x1i - x3r; |
2480 | a[j3] = wk3r * x0r + wk3i * x0i; |
2481 | a[j3 + 1] = wk3r * x0i - wk3i * x0r; |
2482 | j0 = m - j; |
2483 | j1 = j0 + m; |
2484 | j2 = j1 + m; |
2485 | j3 = j2 + m; |
2486 | x0r = a[j0] + a[j2]; |
2487 | x0i = a[j0 + 1] + a[j2 + 1]; |
2488 | x1r = a[j0] - a[j2]; |
2489 | x1i = a[j0 + 1] - a[j2 + 1]; |
2490 | x2r = a[j1] + a[j3]; |
2491 | x2i = a[j1 + 1] + a[j3 + 1]; |
2492 | x3r = a[j1] - a[j3]; |
2493 | x3i = a[j1 + 1] - a[j3 + 1]; |
2494 | a[j0] = x0r + x2r; |
2495 | a[j0 + 1] = x0i + x2i; |
2496 | a[j1] = x0r - x2r; |
2497 | a[j1 + 1] = x0i - x2i; |
2498 | x0r = x1r - x3i; |
2499 | x0i = x1i + x3r; |
2500 | a[j2] = wk1i * x0r - wk1r * x0i; |
2501 | a[j2 + 1] = wk1i * x0i + wk1r * x0r; |
2502 | x0r = x1r + x3i; |
2503 | x0i = x1i - x3r; |
2504 | a[j3] = wk3i * x0r + wk3r * x0i; |
2505 | a[j3 + 1] = wk3i * x0i - wk3r * x0r; |
2506 | } |
2507 | j0 = mh; |
2508 | j1 = j0 + m; |
2509 | j2 = j1 + m; |
2510 | j3 = j2 + m; |
2511 | x0r = a[j0] + a[j2]; |
2512 | x0i = a[j0 + 1] + a[j2 + 1]; |
2513 | x1r = a[j0] - a[j2]; |
2514 | x1i = a[j0 + 1] - a[j2 + 1]; |
2515 | x2r = a[j1] + a[j3]; |
2516 | x2i = a[j1 + 1] + a[j3 + 1]; |
2517 | x3r = a[j1] - a[j3]; |
2518 | x3i = a[j1 + 1] - a[j3 + 1]; |
2519 | a[j0] = x0r + x2r; |
2520 | a[j0 + 1] = x0i + x2i; |
2521 | a[j1] = x0r - x2r; |
2522 | a[j1 + 1] = x0i - x2i; |
2523 | x0r = x1r - x3i; |
2524 | x0i = x1i + x3r; |
2525 | a[j2] = wn4r * (x0r - x0i); |
2526 | a[j2 + 1] = wn4r * (x0i + x0r); |
2527 | x0r = x1r + x3i; |
2528 | x0i = x1i - x3r; |
2529 | a[j3] = -wn4r * (x0r + x0i); |
2530 | a[j3 + 1] = -wn4r * (x0i - x0r); |
2531 | } |
2532 | |
2533 | |
2534 | void cftmdl2(int n, double *a, double *w) |
2535 | { |
2536 | int j, j0, j1, j2, j3, k, kr, m, mh; |
2537 | double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; |
2538 | double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i; |
2539 | |
2540 | mh = n >> 3; |
2541 | m = 2 * mh; |
2542 | wn4r = w[1]; |
2543 | j1 = m; |
2544 | j2 = j1 + m; |
2545 | j3 = j2 + m; |
2546 | x0r = a[0] - a[j2 + 1]; |
2547 | x0i = a[1] + a[j2]; |
2548 | x1r = a[0] + a[j2 + 1]; |
2549 | x1i = a[1] - a[j2]; |
2550 | x2r = a[j1] - a[j3 + 1]; |
2551 | x2i = a[j1 + 1] + a[j3]; |
2552 | x3r = a[j1] + a[j3 + 1]; |
2553 | x3i = a[j1 + 1] - a[j3]; |
2554 | y0r = wn4r * (x2r - x2i); |
2555 | y0i = wn4r * (x2i + x2r); |
2556 | a[0] = x0r + y0r; |
2557 | a[1] = x0i + y0i; |
2558 | a[j1] = x0r - y0r; |
2559 | a[j1 + 1] = x0i - y0i; |
2560 | y0r = wn4r * (x3r - x3i); |
2561 | y0i = wn4r * (x3i + x3r); |
2562 | a[j2] = x1r - y0i; |
2563 | a[j2 + 1] = x1i + y0r; |
2564 | a[j3] = x1r + y0i; |
2565 | a[j3 + 1] = x1i - y0r; |
2566 | k = 0; |
2567 | kr = 2 * m; |
2568 | for (j = 2; j < mh; j += 2) { |
2569 | k += 4; |
2570 | wk1r = w[k]; |
2571 | wk1i = w[k + 1]; |
2572 | wk3r = w[k + 2]; |
2573 | wk3i = w[k + 3]; |
2574 | kr -= 4; |
2575 | wd1i = w[kr]; |
2576 | wd1r = w[kr + 1]; |
2577 | wd3i = w[kr + 2]; |
2578 | wd3r = w[kr + 3]; |
2579 | j1 = j + m; |
2580 | j2 = j1 + m; |
2581 | j3 = j2 + m; |
2582 | x0r = a[j] - a[j2 + 1]; |
2583 | x0i = a[j + 1] + a[j2]; |
2584 | x1r = a[j] + a[j2 + 1]; |
2585 | x1i = a[j + 1] - a[j2]; |
2586 | x2r = a[j1] - a[j3 + 1]; |
2587 | x2i = a[j1 + 1] + a[j3]; |
2588 | x3r = a[j1] + a[j3 + 1]; |
2589 | x3i = a[j1 + 1] - a[j3]; |
2590 | y0r = wk1r * x0r - wk1i * x0i; |
2591 | y0i = wk1r * x0i + wk1i * x0r; |
2592 | y2r = wd1r * x2r - wd1i * x2i; |
2593 | y2i = wd1r * x2i + wd1i * x2r; |
2594 | a[j] = y0r + y2r; |
2595 | a[j + 1] = y0i + y2i; |
2596 | a[j1] = y0r - y2r; |
2597 | a[j1 + 1] = y0i - y2i; |
2598 | y0r = wk3r * x1r + wk3i * x1i; |
2599 | y0i = wk3r * x1i - wk3i * x1r; |
2600 | y2r = wd3r * x3r + wd3i * x3i; |
2601 | y2i = wd3r * x3i - wd3i * x3r; |
2602 | a[j2] = y0r + y2r; |
2603 | a[j2 + 1] = y0i + y2i; |
2604 | a[j3] = y0r - y2r; |
2605 | a[j3 + 1] = y0i - y2i; |
2606 | j0 = m - j; |
2607 | j1 = j0 + m; |
2608 | j2 = j1 + m; |
2609 | j3 = j2 + m; |
2610 | x0r = a[j0] - a[j2 + 1]; |
2611 | x0i = a[j0 + 1] + a[j2]; |
2612 | x1r = a[j0] + a[j2 + 1]; |
2613 | x1i = a[j0 + 1] - a[j2]; |
2614 | x2r = a[j1] - a[j3 + 1]; |
2615 | x2i = a[j1 + 1] + a[j3]; |
2616 | x3r = a[j1] + a[j3 + 1]; |
2617 | x3i = a[j1 + 1] - a[j3]; |
2618 | y0r = wd1i * x0r - wd1r * x0i; |
2619 | y0i = wd1i * x0i + wd1r * x0r; |
2620 | y2r = wk1i * x2r - wk1r * x2i; |
2621 | y2i = wk1i * x2i + wk1r * x2r; |
2622 | a[j0] = y0r + y2r; |
2623 | a[j0 + 1] = y0i + y2i; |
2624 | a[j1] = y0r - y2r; |
2625 | a[j1 + 1] = y0i - y2i; |
2626 | y0r = wd3i * x1r + wd3r * x1i; |
2627 | y0i = wd3i * x1i - wd3r * x1r; |
2628 | y2r = wk3i * x3r + wk3r * x3i; |
2629 | y2i = wk3i * x3i - wk3r * x3r; |
2630 | a[j2] = y0r + y2r; |
2631 | a[j2 + 1] = y0i + y2i; |
2632 | a[j3] = y0r - y2r; |
2633 | a[j3 + 1] = y0i - y2i; |
2634 | } |
2635 | wk1r = w[m]; |
2636 | wk1i = w[m + 1]; |
2637 | j0 = mh; |
2638 | j1 = j0 + m; |
2639 | j2 = j1 + m; |
2640 | j3 = j2 + m; |
2641 | x0r = a[j0] - a[j2 + 1]; |
2642 | x0i = a[j0 + 1] + a[j2]; |
2643 | x1r = a[j0] + a[j2 + 1]; |
2644 | x1i = a[j0 + 1] - a[j2]; |
2645 | x2r = a[j1] - a[j3 + 1]; |
2646 | x2i = a[j1 + 1] + a[j3]; |
2647 | x3r = a[j1] + a[j3 + 1]; |
2648 | x3i = a[j1 + 1] - a[j3]; |
2649 | y0r = wk1r * x0r - wk1i * x0i; |
2650 | y0i = wk1r * x0i + wk1i * x0r; |
2651 | y2r = wk1i * x2r - wk1r * x2i; |
2652 | y2i = wk1i * x2i + wk1r * x2r; |
2653 | a[j0] = y0r + y2r; |
2654 | a[j0 + 1] = y0i + y2i; |
2655 | a[j1] = y0r - y2r; |
2656 | a[j1 + 1] = y0i - y2i; |
2657 | y0r = wk1i * x1r - wk1r * x1i; |
2658 | y0i = wk1i * x1i + wk1r * x1r; |
2659 | y2r = wk1r * x3r - wk1i * x3i; |
2660 | y2i = wk1r * x3i + wk1i * x3r; |
2661 | a[j2] = y0r - y2r; |
2662 | a[j2 + 1] = y0i - y2i; |
2663 | a[j3] = y0r + y2r; |
2664 | a[j3 + 1] = y0i + y2i; |
2665 | } |
2666 | |
2667 | |
2668 | void cftfx41(int n, double *a, int nw, double *w) |
2669 | { |
2670 | void cftf161(double *a, double *w); |
2671 | void cftf162(double *a, double *w); |
2672 | void cftf081(double *a, double *w); |
2673 | void cftf082(double *a, double *w); |
2674 | |
2675 | if (n == 128) { |
2676 | cftf161(a, &w[nw - 8]); |
2677 | cftf162(&a[32], &w[nw - 32]); |
2678 | cftf161(&a[64], &w[nw - 8]); |
2679 | cftf161(&a[96], &w[nw - 8]); |
2680 | } else { |
2681 | cftf081(a, &w[nw - 8]); |
2682 | cftf082(&a[16], &w[nw - 8]); |
2683 | cftf081(&a[32], &w[nw - 8]); |
2684 | cftf081(&a[48], &w[nw - 8]); |
2685 | } |
2686 | } |
2687 | |
2688 | |
2689 | void cftf161(double *a, double *w) |
2690 | { |
2691 | double wn4r, wk1r, wk1i, |
2692 | x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, |
2693 | y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, |
2694 | y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, |
2695 | y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, |
2696 | y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; |
2697 | |
2698 | wn4r = w[1]; |
2699 | wk1r = w[2]; |
2700 | wk1i = w[3]; |
2701 | x0r = a[0] + a[16]; |
2702 | x0i = a[1] + a[17]; |
2703 | x1r = a[0] - a[16]; |
2704 | x1i = a[1] - a[17]; |
2705 | x2r = a[8] + a[24]; |
2706 | x2i = a[9] + a[25]; |
2707 | x3r = a[8] - a[24]; |
2708 | x3i = a[9] - a[25]; |
2709 | y0r = x0r + x2r; |
2710 | y0i = x0i + x2i; |
2711 | y4r = x0r - x2r; |
2712 | y4i = x0i - x2i; |
2713 | y8r = x1r - x3i; |
2714 | y8i = x1i + x3r; |
2715 | y12r = x1r + x3i; |
2716 | y12i = x1i - x3r; |
2717 | x0r = a[2] + a[18]; |
2718 | x0i = a[3] + a[19]; |
2719 | x1r = a[2] - a[18]; |
2720 | x1i = a[3] - a[19]; |
2721 | x2r = a[10] + a[26]; |
2722 | x2i = a[11] + a[27]; |
2723 | x3r = a[10] - a[26]; |
2724 | x3i = a[11] - a[27]; |
2725 | y1r = x0r + x2r; |
2726 | y1i = x0i + x2i; |
2727 | y5r = x0r - x2r; |
2728 | y5i = x0i - x2i; |
2729 | x0r = x1r - x3i; |
2730 | x0i = x1i + x3r; |
2731 | y9r = wk1r * x0r - wk1i * x0i; |
2732 | y9i = wk1r * x0i + wk1i * x0r; |
2733 | x0r = x1r + x3i; |
2734 | x0i = x1i - x3r; |
2735 | y13r = wk1i * x0r - wk1r * x0i; |
2736 | y13i = wk1i * x0i + wk1r * x0r; |
2737 | x0r = a[4] + a[20]; |
2738 | x0i = a[5] + a[21]; |
2739 | x1r = a[4] - a[20]; |
2740 | x1i = a[5] - a[21]; |
2741 | x2r = a[12] + a[28]; |
2742 | x2i = a[13] + a[29]; |
2743 | x3r = a[12] - a[28]; |
2744 | x3i = a[13] - a[29]; |
2745 | y2r = x0r + x2r; |
2746 | y2i = x0i + x2i; |
2747 | y6r = x0r - x2r; |
2748 | y6i = x0i - x2i; |
2749 | x0r = x1r - x3i; |
2750 | x0i = x1i + x3r; |
2751 | y10r = wn4r * (x0r - x0i); |
2752 | y10i = wn4r * (x0i + x0r); |
2753 | x0r = x1r + x3i; |
2754 | x0i = x1i - x3r; |
2755 | y14r = wn4r * (x0r + x0i); |
2756 | y14i = wn4r * (x0i - x0r); |
2757 | x0r = a[6] + a[22]; |
2758 | x0i = a[7] + a[23]; |
2759 | x1r = a[6] - a[22]; |
2760 | x1i = a[7] - a[23]; |
2761 | x2r = a[14] + a[30]; |
2762 | x2i = a[15] + a[31]; |
2763 | x3r = a[14] - a[30]; |
2764 | x3i = a[15] - a[31]; |
2765 | y3r = x0r + x2r; |
2766 | y3i = x0i + x2i; |
2767 | y7r = x0r - x2r; |
2768 | y7i = x0i - x2i; |
2769 | x0r = x1r - x3i; |
2770 | x0i = x1i + x3r; |
2771 | y11r = wk1i * x0r - wk1r * x0i; |
2772 | y11i = wk1i * x0i + wk1r * x0r; |
2773 | x0r = x1r + x3i; |
2774 | x0i = x1i - x3r; |
2775 | y15r = wk1r * x0r - wk1i * x0i; |
2776 | y15i = wk1r * x0i + wk1i * x0r; |
2777 | x0r = y12r - y14r; |
2778 | x0i = y12i - y14i; |
2779 | x1r = y12r + y14r; |
2780 | x1i = y12i + y14i; |
2781 | x2r = y13r - y15r; |
2782 | x2i = y13i - y15i; |
2783 | x3r = y13r + y15r; |
2784 | x3i = y13i + y15i; |
2785 | a[24] = x0r + x2r; |
2786 | a[25] = x0i + x2i; |
2787 | a[26] = x0r - x2r; |
2788 | a[27] = x0i - x2i; |
2789 | a[28] = x1r - x3i; |
2790 | a[29] = x1i + x3r; |
2791 | a[30] = x1r + x3i; |
2792 | a[31] = x1i - x3r; |
2793 | x0r = y8r + y10r; |
2794 | x0i = y8i + y10i; |
2795 | x1r = y8r - y10r; |
2796 | x1i = y8i - y10i; |
2797 | x2r = y9r + y11r; |
2798 | x2i = y9i + y11i; |
2799 | x3r = y9r - y11r; |
2800 | x3i = y9i - y11i; |
2801 | a[16] = x0r + x2r; |
2802 | a[17] = x0i + x2i; |
2803 | a[18] = x0r - x2r; |
2804 | a[19] = x0i - x2i; |
2805 | a[20] = x1r - x3i; |
2806 | a[21] = x1i + x3r; |
2807 | a[22] = x1r + x3i; |
2808 | a[23] = x1i - x3r; |
2809 | x0r = y5r - y7i; |
2810 | x0i = y5i + y7r; |
2811 | x2r = wn4r * (x0r - x0i); |
2812 | x2i = wn4r * (x0i + x0r); |
2813 | x0r = y5r + y7i; |
2814 | x0i = y5i - y7r; |
2815 | x3r = wn4r * (x0r - x0i); |
2816 | x3i = wn4r * (x0i + x0r); |
2817 | x0r = y4r - y6i; |
2818 | x0i = y4i + y6r; |
2819 | x1r = y4r + y6i; |
2820 | x1i = y4i - y6r; |
2821 | a[8] = x0r + x2r; |
2822 | a[9] = x0i + x2i; |
2823 | a[10] = x0r - x2r; |
2824 | a[11] = x0i - x2i; |
2825 | a[12] = x1r - x3i; |
2826 | a[13] = x1i + x3r; |
2827 | a[14] = x1r + x3i; |
2828 | a[15] = x1i - x3r; |
2829 | x0r = y0r + y2r; |
2830 | x0i = y0i + y2i; |
2831 | x1r = y0r - y2r; |
2832 | x1i = y0i - y2i; |
2833 | x2r = y1r + y3r; |
2834 | x2i = y1i + y3i; |
2835 | x3r = y1r - y3r; |
2836 | x3i = y1i - y3i; |
2837 | a[0] = x0r + x2r; |
2838 | a[1] = x0i + x2i; |
2839 | a[2] = x0r - x2r; |
2840 | a[3] = x0i - x2i; |
2841 | a[4] = x1r - x3i; |
2842 | a[5] = x1i + x3r; |
2843 | a[6] = x1r + x3i; |
2844 | a[7] = x1i - x3r; |
2845 | } |
2846 | |
2847 | |
2848 | void cftf162(double *a, double *w) |
2849 | { |
2850 | double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, |
2851 | x0r, x0i, x1r, x1i, x2r, x2i, |
2852 | y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, |
2853 | y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, |
2854 | y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, |
2855 | y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; |
2856 | |
2857 | wn4r = w[1]; |
2858 | wk1r = w[4]; |
2859 | wk1i = w[5]; |
2860 | wk3r = w[6]; |
2861 | wk3i = -w[7]; |
2862 | wk2r = w[8]; |
2863 | wk2i = w[9]; |
2864 | x1r = a[0] - a[17]; |
2865 | x1i = a[1] + a[16]; |
2866 | x0r = a[8] - a[25]; |
2867 | x0i = a[9] + a[24]; |
2868 | x2r = wn4r * (x0r - x0i); |
2869 | x2i = wn4r * (x0i + x0r); |
2870 | y0r = x1r + x2r; |
2871 | y0i = x1i + x2i; |
2872 | y4r = x1r - x2r; |
2873 | y4i = x1i - x2i; |
2874 | x1r = a[0] + a[17]; |
2875 | x1i = a[1] - a[16]; |
2876 | x0r = a[8] + a[25]; |
2877 | x0i = a[9] - a[24]; |
2878 | x2r = wn4r * (x0r - x0i); |
2879 | x2i = wn4r * (x0i + x0r); |
2880 | y8r = x1r - x2i; |
2881 | y8i = x1i + x2r; |
2882 | y12r = x1r + x2i; |
2883 | y12i = x1i - x2r; |
2884 | x0r = a[2] - a[19]; |
2885 | x0i = a[3] + a[18]; |
2886 | x1r = wk1r * x0r - wk1i * x0i; |
2887 | x1i = wk1r * x0i + wk1i * x0r; |
2888 | x0r = a[10] - a[27]; |
2889 | x0i = a[11] + a[26]; |
2890 | x2r = wk3i * x0r - wk3r * x0i; |
2891 | x2i = wk3i * x0i + wk3r * x0r; |
2892 | y1r = x1r + x2r; |
2893 | y1i = x1i + x2i; |
2894 | y5r = x1r - x2r; |
2895 | y5i = x1i - x2i; |
2896 | x0r = a[2] + a[19]; |
2897 | x0i = a[3] - a[18]; |
2898 | x1r = wk3r * x0r - wk3i * x0i; |
2899 | x1i = wk3r * x0i + wk3i * x0r; |
2900 | x0r = a[10] + a[27]; |
2901 | x0i = a[11] - a[26]; |
2902 | x2r = wk1r * x0r + wk1i * x0i; |
2903 | x2i = wk1r * x0i - wk1i * x0r; |
2904 | y9r = x1r - x2r; |
2905 | y9i = x1i - x2i; |
2906 | y13r = x1r + x2r; |
2907 | y13i = x1i + x2i; |
2908 | x0r = a[4] - a[21]; |
2909 | x0i = a[5] + a[20]; |
2910 | x1r = wk2r * x0r - wk2i * x0i; |
2911 | x1i = wk2r * x0i + wk2i * x0r; |
2912 | x0r = a[12] - a[29]; |
2913 | x0i = a[13] + a[28]; |
2914 | x2r = wk2i * x0r - wk2r * x0i; |
2915 | x2i = wk2i * x0i + wk2r * x0r; |
2916 | y2r = x1r + x2r; |
2917 | y2i = x1i + x2i; |
2918 | y6r = x1r - x2r; |
2919 | y6i = x1i - x2i; |
2920 | x0r = a[4] + a[21]; |
2921 | x0i = a[5] - a[20]; |
2922 | x1r = wk2i * x0r - wk2r * x0i; |
2923 | x1i = wk2i * x0i + wk2r * x0r; |
2924 | x0r = a[12] + a[29]; |
2925 | x0i = a[13] - a[28]; |
2926 | x2r = wk2r * x0r - wk2i * x0i; |
2927 | x2i = wk2r * x0i + wk2i * x0r; |
2928 | y10r = x1r - x2r; |
2929 | y10i = x1i - x2i; |
2930 | y14r = x1r + x2r; |
2931 | y14i = x1i + x2i; |
2932 | x0r = a[6] - a[23]; |
2933 | x0i = a[7] + a[22]; |
2934 | x1r = wk3r * x0r - wk3i * x0i; |
2935 | x1i = wk3r * x0i + wk3i * x0r; |
2936 | x0r = a[14] - a[31]; |
2937 | x0i = a[15] + a[30]; |
2938 | x2r = wk1i * x0r - wk1r * x0i; |
2939 | x2i = wk1i * x0i + wk1r * x0r; |
2940 | y3r = x1r + x2r; |
2941 | y3i = x1i + x2i; |
2942 | y7r = x1r - x2r; |
2943 | y7i = x1i - x2i; |
2944 | x0r = a[6] + a[23]; |
2945 | x0i = a[7] - a[22]; |
2946 | x1r = wk1i * x0r + wk1r * x0i; |
2947 | x1i = wk1i * x0i - wk1r * x0r; |
2948 | x0r = a[14] + a[31]; |
2949 | x0i = a[15] - a[30]; |
2950 | x2r = wk3i * x0r - wk3r * x0i; |
2951 | x2i = wk3i * x0i + wk3r * x0r; |
2952 | y11r = x1r + x2r; |
2953 | y11i = x1i + x2i; |
2954 | y15r = x1r - x2r; |
2955 | y15i = x1i - x2i; |
2956 | x1r = y0r + y2r; |
2957 | x1i = y0i + y2i; |
2958 | x2r = y1r + y3r; |
2959 | x2i = y1i + y3i; |
2960 | a[0] = x1r + x2r; |
2961 | a[1] = x1i + x2i; |
2962 | a[2] = x1r - x2r; |
2963 | a[3] = x1i - x2i; |
2964 | x1r = y0r - y2r; |
2965 | x1i = y0i - y2i; |
2966 | x2r = y1r - y3r; |
2967 | x2i = y1i - y3i; |
2968 | a[4] = x1r - x2i; |
2969 | a[5] = x1i + x2r; |
2970 | a[6] = x1r + x2i; |
2971 | a[7] = x1i - x2r; |
2972 | x1r = y4r - y6i; |
2973 | x1i = y4i + y6r; |
2974 | x0r = y5r - y7i; |
2975 | x0i = y5i + y7r; |
2976 | x2r = wn4r * (x0r - x0i); |
2977 | x2i = wn4r * (x0i + x0r); |
2978 | a[8] = x1r + x2r; |
2979 | a[9] = x1i + x2i; |
2980 | a[10] = x1r - x2r; |
2981 | a[11] = x1i - x2i; |
2982 | x1r = y4r + y6i; |
2983 | x1i = y4i - y6r; |
2984 | x0r = y5r + y7i; |
2985 | x0i = y5i - y7r; |
2986 | x2r = wn4r * (x0r - x0i); |
2987 | x2i = wn4r * (x0i + x0r); |
2988 | a[12] = x1r - x2i; |
2989 | a[13] = x1i + x2r; |
2990 | a[14] = x1r + x2i; |
2991 | a[15] = x1i - x2r; |
2992 | x1r = y8r + y10r; |
2993 | x1i = y8i + y10i; |
2994 | x2r = y9r - y11r; |
2995 | x2i = y9i - y11i; |
2996 | a[16] = x1r + x2r; |
2997 | a[17] = x1i + x2i; |
2998 | a[18] = x1r - x2r; |
2999 | a[19] = x1i - x2i; |
3000 | x1r = y8r - y10r; |
3001 | x1i = y8i - y10i; |
3002 | x2r = y9r + y11r; |
3003 | x2i = y9i + y11i; |
3004 | a[20] = x1r - x2i; |
3005 | a[21] = x1i + x2r; |
3006 | a[22] = x1r + x2i; |
3007 | a[23] = x1i - x2r; |
3008 | x1r = y12r - y14i; |
3009 | x1i = y12i + y14r; |
3010 | x0r = y13r + y15i; |
3011 | x0i = y13i - y15r; |
3012 | x2r = wn4r * (x0r - x0i); |
3013 | x2i = wn4r * (x0i + x0r); |
3014 | a[24] = x1r + x2r; |
3015 | a[25] = x1i + x2i; |
3016 | a[26] = x1r - x2r; |
3017 | a[27] = x1i - x2i; |
3018 | x1r = y12r + y14i; |
3019 | x1i = y12i - y14r; |
3020 | x0r = y13r - y15i; |
3021 | x0i = y13i + y15r; |
3022 | x2r = wn4r * (x0r - x0i); |
3023 | x2i = wn4r * (x0i + x0r); |
3024 | a[28] = x1r - x2i; |
3025 | a[29] = x1i + x2r; |
3026 | a[30] = x1r + x2i; |
3027 | a[31] = x1i - x2r; |
3028 | } |
3029 | |
3030 | |
3031 | void cftf081(double *a, double *w) |
3032 | { |
3033 | double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, |
3034 | y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, |
3035 | y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; |
3036 | |
3037 | wn4r = w[1]; |
3038 | x0r = a[0] + a[8]; |
3039 | x0i = a[1] + a[9]; |
3040 | x1r = a[0] - a[8]; |
3041 | x1i = a[1] - a[9]; |
3042 | x2r = a[4] + a[12]; |
3043 | x2i = a[5] + a[13]; |
3044 | x3r = a[4] - a[12]; |
3045 | x3i = a[5] - a[13]; |
3046 | y0r = x0r + x2r; |
3047 | y0i = x0i + x2i; |
3048 | y2r = x0r - x2r; |
3049 | y2i = x0i - x2i; |
3050 | y1r = x1r - x3i; |
3051 | y1i = x1i + x3r; |
3052 | y3r = x1r + x3i; |
3053 | y3i = x1i - x3r; |
3054 | x0r = a[2] + a[10]; |
3055 | x0i = a[3] + a[11]; |
3056 | x1r = a[2] - a[10]; |
3057 | x1i = a[3] - a[11]; |
3058 | x2r = a[6] + a[14]; |
3059 | x2i = a[7] + a[15]; |
3060 | x3r = a[6] - a[14]; |
3061 | x3i = a[7] - a[15]; |
3062 | y4r = x0r + x2r; |
3063 | y4i = x0i + x2i; |
3064 | y6r = x0r - x2r; |
3065 | y6i = x0i - x2i; |
3066 | x0r = x1r - x3i; |
3067 | x0i = x1i + x3r; |
3068 | x2r = x1r + x3i; |
3069 | x2i = x1i - x3r; |
3070 | y5r = wn4r * (x0r - x0i); |
3071 | y5i = wn4r * (x0r + x0i); |
3072 | y7r = wn4r * (x2r - x2i); |
3073 | y7i = wn4r * (x2r + x2i); |
3074 | a[8] = y1r + y5r; |
3075 | a[9] = y1i + y5i; |
3076 | a[10] = y1r - y5r; |
3077 | a[11] = y1i - y5i; |
3078 | a[12] = y3r - y7i; |
3079 | a[13] = y3i + y7r; |
3080 | a[14] = y3r + y7i; |
3081 | a[15] = y3i - y7r; |
3082 | a[0] = y0r + y4r; |
3083 | a[1] = y0i + y4i; |
3084 | a[2] = y0r - y4r; |
3085 | a[3] = y0i - y4i; |
3086 | a[4] = y2r - y6i; |
3087 | a[5] = y2i + y6r; |
3088 | a[6] = y2r + y6i; |
3089 | a[7] = y2i - y6r; |
3090 | } |
3091 | |
3092 | |
3093 | void cftf082(double *a, double *w) |
3094 | { |
3095 | double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, |
3096 | y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, |
3097 | y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; |
3098 | |
3099 | wn4r = w[1]; |
3100 | wk1r = w[2]; |
3101 | wk1i = w[3]; |
3102 | y0r = a[0] - a[9]; |
3103 | y0i = a[1] + a[8]; |
3104 | y1r = a[0] + a[9]; |
3105 | y1i = a[1] - a[8]; |
3106 | x0r = a[4] - a[13]; |
3107 | x0i = a[5] + a[12]; |
3108 | y2r = wn4r * (x0r - x0i); |
3109 | y2i = wn4r * (x0i + x0r); |
3110 | x0r = a[4] + a[13]; |
3111 | x0i = a[5] - a[12]; |
3112 | y3r = wn4r * (x0r - x0i); |
3113 | y3i = wn4r * (x0i + x0r); |
3114 | x0r = a[2] - a[11]; |
3115 | x0i = a[3] + a[10]; |
3116 | y4r = wk1r * x0r - wk1i * x0i; |
3117 | y4i = wk1r * x0i + wk1i * x0r; |
3118 | x0r = a[2] + a[11]; |
3119 | x0i = a[3] - a[10]; |
3120 | y5r = wk1i * x0r - wk1r * x0i; |
3121 | y5i = wk1i * x0i + wk1r * x0r; |
3122 | x0r = a[6] - a[15]; |
3123 | x0i = a[7] + a[14]; |
3124 | y6r = wk1i * x0r - wk1r * x0i; |
3125 | y6i = wk1i * x0i + wk1r * x0r; |
3126 | x0r = a[6] + a[15]; |
3127 | x0i = a[7] - a[14]; |
3128 | y7r = wk1r * x0r - wk1i * x0i; |
3129 | y7i = wk1r * x0i + wk1i * x0r; |
3130 | x0r = y0r + y2r; |
3131 | x0i = y0i + y2i; |
3132 | x1r = y4r + y6r; |
3133 | x1i = y4i + y6i; |
3134 | a[0] = x0r + x1r; |
3135 | a[1] = x0i + x1i; |
3136 | a[2] = x0r - x1r; |
3137 | a[3] = x0i - x1i; |
3138 | x0r = y0r - y2r; |
3139 | x0i = y0i - y2i; |
3140 | x1r = y4r - y6r; |
3141 | x1i = y4i - y6i; |
3142 | a[4] = x0r - x1i; |
3143 | a[5] = x0i + x1r; |
3144 | a[6] = x0r + x1i; |
3145 | a[7] = x0i - x1r; |
3146 | x0r = y1r - y3i; |
3147 | x0i = y1i + y3r; |
3148 | x1r = y5r - y7r; |
3149 | x1i = y5i - y7i; |
3150 | a[8] = x0r + x1r; |
3151 | a[9] = x0i + x1i; |
3152 | a[10] = x0r - x1r; |
3153 | a[11] = x0i - x1i; |
3154 | x0r = y1r + y3i; |
3155 | x0i = y1i - y3r; |
3156 | x1r = y5r + y7r; |
3157 | x1i = y5i + y7i; |
3158 | a[12] = x0r - x1i; |
3159 | a[13] = x0i + x1r; |
3160 | a[14] = x0r + x1i; |
3161 | a[15] = x0i - x1r; |
3162 | } |
3163 | |
3164 | |
3165 | void cftf040(double *a) |
3166 | { |
3167 | double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
3168 | |
3169 | x0r = a[0] + a[4]; |
3170 | x0i = a[1] + a[5]; |
3171 | x1r = a[0] - a[4]; |
3172 | x1i = a[1] - a[5]; |
3173 | x2r = a[2] + a[6]; |
3174 | x2i = a[3] + a[7]; |
3175 | x3r = a[2] - a[6]; |
3176 | x3i = a[3] - a[7]; |
3177 | a[0] = x0r + x2r; |
3178 | a[1] = x0i + x2i; |
3179 | a[2] = x1r - x3i; |
3180 | a[3] = x1i + x3r; |
3181 | a[4] = x0r - x2r; |
3182 | a[5] = x0i - x2i; |
3183 | a[6] = x1r + x3i; |
3184 | a[7] = x1i - x3r; |
3185 | } |
3186 | |
3187 | |
3188 | void cftb040(double *a) |
3189 | { |
3190 | double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
3191 | |
3192 | x0r = a[0] + a[4]; |
3193 | x0i = a[1] + a[5]; |
3194 | x1r = a[0] - a[4]; |
3195 | x1i = a[1] - a[5]; |
3196 | x2r = a[2] + a[6]; |
3197 | x2i = a[3] + a[7]; |
3198 | x3r = a[2] - a[6]; |
3199 | x3i = a[3] - a[7]; |
3200 | a[0] = x0r + x2r; |
3201 | a[1] = x0i + x2i; |
3202 | a[2] = x1r + x3i; |
3203 | a[3] = x1i - x3r; |
3204 | a[4] = x0r - x2r; |
3205 | a[5] = x0i - x2i; |
3206 | a[6] = x1r - x3i; |
3207 | a[7] = x1i + x3r; |
3208 | } |
3209 | |
3210 | |
3211 | void cftx020(double *a) |
3212 | { |
3213 | double x0r, x0i; |
3214 | |
3215 | x0r = a[0] - a[2]; |
3216 | x0i = a[1] - a[3]; |
3217 | a[0] += a[2]; |
3218 | a[1] += a[3]; |
3219 | a[2] = x0r; |
3220 | a[3] = x0i; |
3221 | } |
3222 | |
3223 | |
3224 | void rftfsub(int n, double *a, int nc, double *c) |
3225 | { |
3226 | int j, k, kk, ks, m; |
3227 | double wkr, wki, xr, xi, yr, yi; |
3228 | |
3229 | m = n >> 1; |
3230 | ks = 2 * nc / m; |
3231 | kk = 0; |
3232 | for (j = 2; j < m; j += 2) { |
3233 | k = n - j; |
3234 | kk += ks; |
3235 | wkr = 0.5 - c[nc - kk]; |
3236 | wki = c[kk]; |
3237 | xr = a[j] - a[k]; |
3238 | xi = a[j + 1] + a[k + 1]; |
3239 | yr = wkr * xr - wki * xi; |
3240 | yi = wkr * xi + wki * xr; |
3241 | a[j] -= yr; |
3242 | a[j + 1] -= yi; |
3243 | a[k] += yr; |
3244 | a[k + 1] -= yi; |
3245 | } |
3246 | } |
3247 | |
3248 | |
3249 | void rftbsub(int n, double *a, int nc, double *c) |
3250 | { |
3251 | int j, k, kk, ks, m; |
3252 | double wkr, wki, xr, xi, yr, yi; |
3253 | |
3254 | m = n >> 1; |
3255 | ks = 2 * nc / m; |
3256 | kk = 0; |
3257 | for (j = 2; j < m; j += 2) { |
3258 | k = n - j; |
3259 | kk += ks; |
3260 | wkr = 0.5 - c[nc - kk]; |
3261 | wki = c[kk]; |
3262 | xr = a[j] - a[k]; |
3263 | xi = a[j + 1] + a[k + 1]; |
3264 | yr = wkr * xr + wki * xi; |
3265 | yi = wkr * xi - wki * xr; |
3266 | a[j] -= yr; |
3267 | a[j + 1] -= yi; |
3268 | a[k] += yr; |
3269 | a[k + 1] -= yi; |
3270 | } |
3271 | } |
3272 | |
3273 | |
3274 | void dctsub(int n, double *a, int nc, double *c) |
3275 | { |
3276 | int j, k, kk, ks, m; |
3277 | double wkr, wki, xr; |
3278 | |
3279 | m = n >> 1; |
3280 | ks = nc / n; |
3281 | kk = 0; |
3282 | for (j = 1; j < m; j++) { |
3283 | k = n - j; |
3284 | kk += ks; |
3285 | wkr = c[kk] - c[nc - kk]; |
3286 | wki = c[kk] + c[nc - kk]; |
3287 | xr = wki * a[j] - wkr * a[k]; |
3288 | a[j] = wkr * a[j] + wki * a[k]; |
3289 | a[k] = xr; |
3290 | } |
3291 | a[m] *= c[0]; |
3292 | } |
3293 | |
3294 | |
3295 | void dstsub(int n, double *a, int nc, double *c) |
3296 | { |
3297 | int j, k, kk, ks, m; |
3298 | double wkr, wki, xr; |
3299 | |
3300 | m = n >> 1; |
3301 | ks = nc / n; |
3302 | kk = 0; |
3303 | for (j = 1; j < m; j++) { |
3304 | k = n - j; |
3305 | kk += ks; |
3306 | wkr = c[kk] - c[nc - kk]; |
3307 | wki = c[kk] + c[nc - kk]; |
3308 | xr = wki * a[k] - wkr * a[j]; |
3309 | a[k] = wkr * a[k] + wki * a[j]; |
3310 | a[j] = xr; |
3311 | } |
3312 | a[m] *= c[0]; |
3313 | } |
3314 | |
3315 | |