1 | /* |
2 | * Copyright (c) 2008-2020 Stefan Krah. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * |
8 | * 1. Redistributions of source code must retain the above copyright |
9 | * notice, this list of conditions and the following disclaimer. |
10 | * |
11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND |
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 | * SUCH DAMAGE. |
26 | */ |
27 | |
28 | |
29 | #include "mpdecimal.h" |
30 | |
31 | #include <assert.h> |
32 | |
33 | #include "bits.h" |
34 | #include "constants.h" |
35 | #include "difradix2.h" |
36 | #include "numbertheory.h" |
37 | #include "umodarith.h" |
38 | |
39 | |
40 | /* Bignum: The actual transform routine (decimation in frequency). */ |
41 | |
42 | |
43 | /* |
44 | * Generate index pairs (x, bitreverse(x)) and carry out the permutation. |
45 | * n must be a power of two. |
46 | * Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational", |
47 | * Chapter 1.14.4. [http://www.jjj.de/fxt/] |
48 | */ |
49 | static inline void |
50 | bitreverse_permute(mpd_uint_t a[], mpd_size_t n) |
51 | { |
52 | mpd_size_t x = 0; |
53 | mpd_size_t r = 0; |
54 | mpd_uint_t t; |
55 | |
56 | do { /* Invariant: r = bitreverse(x) */ |
57 | if (r > x) { |
58 | t = a[x]; |
59 | a[x] = a[r]; |
60 | a[r] = t; |
61 | } |
62 | /* Flip trailing consecutive 1 bits and the first zero bit |
63 | * that absorbs a possible carry. */ |
64 | x += 1; |
65 | /* Mirror the operation on r: Flip n_trailing_zeros(x)+1 |
66 | high bits of r. */ |
67 | r ^= (n - (n >> (mpd_bsf(x)+1))); |
68 | /* The loop invariant is preserved. */ |
69 | } while (x < n); |
70 | } |
71 | |
72 | |
73 | /* Fast Number Theoretic Transform, decimation in frequency. */ |
74 | void |
75 | fnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams) |
76 | { |
77 | mpd_uint_t *wtable = tparams->wtable; |
78 | mpd_uint_t umod; |
79 | #ifdef PPRO |
80 | double dmod; |
81 | uint32_t dinvmod[3]; |
82 | #endif |
83 | mpd_uint_t u0, u1, v0, v1; |
84 | mpd_uint_t w, w0, w1, wstep; |
85 | mpd_size_t m, mhalf; |
86 | mpd_size_t j, r; |
87 | |
88 | |
89 | assert(ispower2(n)); |
90 | assert(n >= 4); |
91 | |
92 | SETMODULUS(tparams->modnum); |
93 | |
94 | /* m == n */ |
95 | mhalf = n / 2; |
96 | for (j = 0; j < mhalf; j += 2) { |
97 | |
98 | w0 = wtable[j]; |
99 | w1 = wtable[j+1]; |
100 | |
101 | u0 = a[j]; |
102 | v0 = a[j+mhalf]; |
103 | |
104 | u1 = a[j+1]; |
105 | v1 = a[j+1+mhalf]; |
106 | |
107 | a[j] = addmod(u0, v0, umod); |
108 | v0 = submod(u0, v0, umod); |
109 | |
110 | a[j+1] = addmod(u1, v1, umod); |
111 | v1 = submod(u1, v1, umod); |
112 | |
113 | MULMOD2(&v0, w0, &v1, w1); |
114 | |
115 | a[j+mhalf] = v0; |
116 | a[j+1+mhalf] = v1; |
117 | |
118 | } |
119 | |
120 | wstep = 2; |
121 | for (m = n/2; m >= 2; m>>=1, wstep<<=1) { |
122 | |
123 | mhalf = m / 2; |
124 | |
125 | /* j == 0 */ |
126 | for (r = 0; r < n; r += 2*m) { |
127 | |
128 | u0 = a[r]; |
129 | v0 = a[r+mhalf]; |
130 | |
131 | u1 = a[m+r]; |
132 | v1 = a[m+r+mhalf]; |
133 | |
134 | a[r] = addmod(u0, v0, umod); |
135 | v0 = submod(u0, v0, umod); |
136 | |
137 | a[m+r] = addmod(u1, v1, umod); |
138 | v1 = submod(u1, v1, umod); |
139 | |
140 | a[r+mhalf] = v0; |
141 | a[m+r+mhalf] = v1; |
142 | } |
143 | |
144 | for (j = 1; j < mhalf; j++) { |
145 | |
146 | w = wtable[j*wstep]; |
147 | |
148 | for (r = 0; r < n; r += 2*m) { |
149 | |
150 | u0 = a[r+j]; |
151 | v0 = a[r+j+mhalf]; |
152 | |
153 | u1 = a[m+r+j]; |
154 | v1 = a[m+r+j+mhalf]; |
155 | |
156 | a[r+j] = addmod(u0, v0, umod); |
157 | v0 = submod(u0, v0, umod); |
158 | |
159 | a[m+r+j] = addmod(u1, v1, umod); |
160 | v1 = submod(u1, v1, umod); |
161 | |
162 | MULMOD2C(&v0, &v1, w); |
163 | |
164 | a[r+j+mhalf] = v0; |
165 | a[m+r+j+mhalf] = v1; |
166 | } |
167 | |
168 | } |
169 | |
170 | } |
171 | |
172 | bitreverse_permute(a, n); |
173 | } |
174 | |