1 | /* |
2 | * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. |
3 | * This file is part of KISS FFT - https://github.com/mborgerding/kissfft |
4 | * |
5 | * SPDX-License-Identifier: BSD-3-Clause |
6 | * See COPYING file for more information. |
7 | */ |
8 | |
9 | #include "kiss_fftnd.h" |
10 | #include "_kiss_fft_guts.h" |
11 | |
12 | struct kiss_fftnd_state{ |
13 | int dimprod; /* dimsum would be mighty tasty right now */ |
14 | int ndims; |
15 | int *dims; |
16 | kiss_fft_cfg *states; /* cfg states for each dimension */ |
17 | kiss_fft_cpx * tmpbuf; /*buffer capable of hold the entire input */ |
18 | }; |
19 | |
20 | kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem) |
21 | { |
22 | KISS_FFT_ALIGN_CHECK(mem) |
23 | |
24 | kiss_fftnd_cfg st = NULL; |
25 | int i; |
26 | int dimprod=1; |
27 | size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftnd_state)); |
28 | char * ptr = NULL; |
29 | |
30 | for (i=0;i<ndims;++i) { |
31 | size_t sublen=0; |
32 | kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen); |
33 | memneeded += sublen; /* st->states[i] */ |
34 | dimprod *= dims[i]; |
35 | } |
36 | memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(int) * ndims);/* st->dims */ |
37 | memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(void*) * ndims);/* st->states */ |
38 | memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(kiss_fft_cpx) * dimprod); /* st->tmpbuf */ |
39 | |
40 | if (lenmem == NULL) {/* allocate for the caller*/ |
41 | ptr = (char *) malloc (memneeded); |
42 | } else { /* initialize supplied buffer if big enough */ |
43 | if (*lenmem >= memneeded) |
44 | ptr = (char *) mem; |
45 | *lenmem = memneeded; /*tell caller how big struct is (or would be) */ |
46 | } |
47 | if (!ptr) |
48 | return NULL; /*malloc failed or buffer too small */ |
49 | |
50 | st = (kiss_fftnd_cfg) ptr; |
51 | st->dimprod = dimprod; |
52 | st->ndims = ndims; |
53 | ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftnd_state)); |
54 | |
55 | st->states = (kiss_fft_cfg *)ptr; |
56 | ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(void*) * ndims); |
57 | |
58 | st->dims = (int*)ptr; |
59 | ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(int) * ndims); |
60 | |
61 | st->tmpbuf = (kiss_fft_cpx*)ptr; |
62 | ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(kiss_fft_cpx) * dimprod); |
63 | |
64 | for (i=0;i<ndims;++i) { |
65 | size_t len; |
66 | st->dims[i] = dims[i]; |
67 | kiss_fft_alloc (st->dims[i], inverse_fft, NULL, &len); |
68 | st->states[i] = kiss_fft_alloc (st->dims[i], inverse_fft, ptr,&len); |
69 | ptr += len; |
70 | } |
71 | /* |
72 | Hi there! |
73 | |
74 | If you're looking at this particular code, it probably means you've got a brain-dead bounds checker |
75 | that thinks the above code overwrites the end of the array. |
76 | |
77 | It doesn't. |
78 | |
79 | -- Mark |
80 | |
81 | P.S. |
82 | The below code might give you some warm fuzzies and help convince you. |
83 | */ |
84 | if ( ptr - (char*)st != (int)memneeded ) { |
85 | fprintf(stderr, |
86 | "################################################################################\n" |
87 | "Internal error! Memory allocation miscalculation\n" |
88 | "################################################################################\n" |
89 | ); |
90 | } |
91 | return st; |
92 | } |
93 | |
94 | /* |
95 | This works by tackling one dimension at a time. |
96 | |
97 | In effect, |
98 | Each stage starts out by reshaping the matrix into a DixSi 2d matrix. |
99 | A Di-sized fft is taken of each column, transposing the matrix as it goes. |
100 | |
101 | Here's a 3-d example: |
102 | Take a 2x3x4 matrix, laid out in memory as a contiguous buffer |
103 | [ [ [ a b c d ] [ e f g h ] [ i j k l ] ] |
104 | [ [ m n o p ] [ q r s t ] [ u v w x ] ] ] |
105 | |
106 | Stage 0 ( D=2): treat the buffer as a 2x12 matrix |
107 | [ [a b ... k l] |
108 | [m n ... w x] ] |
109 | |
110 | FFT each column with size 2. |
111 | Transpose the matrix at the same time using kiss_fft_stride. |
112 | |
113 | [ [ a+m a-m ] |
114 | [ b+n b-n] |
115 | ... |
116 | [ k+w k-w ] |
117 | [ l+x l-x ] ] |
118 | |
119 | Note fft([x y]) == [x+y x-y] |
120 | |
121 | Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix, |
122 | [ [ a+m a-m b+n b-n c+o c-o d+p d-p ] |
123 | [ e+q e-q f+r f-r g+s g-s h+t h-t ] |
124 | [ i+u i-u j+v j-v k+w k-w l+x l-x ] ] |
125 | |
126 | And perform FFTs (size=3) on each of the columns as above, transposing |
127 | the matrix as it goes. The output of stage 1 is |
128 | (Legend: ap = [ a+m e+q i+u ] |
129 | am = [ a-m e-q i-u ] ) |
130 | |
131 | [ [ sum(ap) fft(ap)[0] fft(ap)[1] ] |
132 | [ sum(am) fft(am)[0] fft(am)[1] ] |
133 | [ sum(bp) fft(bp)[0] fft(bp)[1] ] |
134 | [ sum(bm) fft(bm)[0] fft(bm)[1] ] |
135 | [ sum(cp) fft(cp)[0] fft(cp)[1] ] |
136 | [ sum(cm) fft(cm)[0] fft(cm)[1] ] |
137 | [ sum(dp) fft(dp)[0] fft(dp)[1] ] |
138 | [ sum(dm) fft(dm)[0] fft(dm)[1] ] ] |
139 | |
140 | Stage 2 ( D=4) treats this buffer as a 4*6 matrix, |
141 | [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ] |
142 | [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ] |
143 | [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ] |
144 | [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ] ] |
145 | |
146 | Then FFTs each column, transposing as it goes. |
147 | |
148 | The resulting matrix is the 3d FFT of the 2x3x4 input matrix. |
149 | |
150 | Note as a sanity check that the first element of the final |
151 | stage's output (DC term) is |
152 | sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] ) |
153 | , i.e. the summation of all 24 input elements. |
154 | |
155 | */ |
156 | void kiss_fftnd(kiss_fftnd_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) |
157 | { |
158 | int i,k; |
159 | const kiss_fft_cpx * bufin=fin; |
160 | kiss_fft_cpx * bufout; |
161 | |
162 | /*arrange it so the last bufout == fout*/ |
163 | if ( st->ndims & 1 ) { |
164 | bufout = fout; |
165 | if (fin==fout) { |
166 | memcpy( st->tmpbuf, fin, sizeof(kiss_fft_cpx) * st->dimprod ); |
167 | bufin = st->tmpbuf; |
168 | } |
169 | }else |
170 | bufout = st->tmpbuf; |
171 | |
172 | for ( k=0; k < st->ndims; ++k) { |
173 | int curdim = st->dims[k]; |
174 | int stride = st->dimprod / curdim; |
175 | |
176 | for ( i=0 ; i<stride ; ++i ) |
177 | kiss_fft_stride( st->states[k], bufin+i , bufout+i*curdim, stride ); |
178 | |
179 | /*toggle back and forth between the two buffers*/ |
180 | if (bufout == st->tmpbuf){ |
181 | bufout = fout; |
182 | bufin = st->tmpbuf; |
183 | }else{ |
184 | bufout = st->tmpbuf; |
185 | bufin = fout; |
186 | } |
187 | } |
188 | } |
189 | |