1 | /* |
2 | * Copyright (c) 2008-2020 Stefan Krah. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * |
8 | * 1. Redistributions of source code must retain the above copyright |
9 | * notice, this list of conditions and the following disclaimer. |
10 | * |
11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND |
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 | * SUCH DAMAGE. |
26 | */ |
27 | |
28 | |
29 | #include "mpdecimal.h" |
30 | |
31 | #include <assert.h> |
32 | #include <stdio.h> |
33 | |
34 | #include "bits.h" |
35 | #include "constants.h" |
36 | #include "difradix2.h" |
37 | #include "numbertheory.h" |
38 | #include "sixstep.h" |
39 | #include "transpose.h" |
40 | #include "umodarith.h" |
41 | |
42 | |
43 | /* Bignum: Cache efficient Matrix Fourier Transform for arrays of the |
44 | form 2**n (See literature/six-step.txt). */ |
45 | |
46 | |
47 | /* forward transform with sign = -1 */ |
48 | int |
49 | six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum) |
50 | { |
51 | struct fnt_params *tparams; |
52 | mpd_size_t log2n, C, R; |
53 | mpd_uint_t kernel; |
54 | mpd_uint_t umod; |
55 | #ifdef PPRO |
56 | double dmod; |
57 | uint32_t dinvmod[3]; |
58 | #endif |
59 | mpd_uint_t *x, w0, w1, wstep; |
60 | mpd_size_t i, k; |
61 | |
62 | |
63 | assert(ispower2(n)); |
64 | assert(n >= 16); |
65 | assert(n <= MPD_MAXTRANSFORM_2N); |
66 | |
67 | log2n = mpd_bsr(n); |
68 | C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */ |
69 | R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */ |
70 | |
71 | |
72 | /* Transpose the matrix. */ |
73 | if (!transpose_pow2(a, R, C)) { |
74 | return 0; |
75 | } |
76 | |
77 | /* Length R transform on the rows. */ |
78 | if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) { |
79 | return 0; |
80 | } |
81 | for (x = a; x < a+n; x += R) { |
82 | fnt_dif2(x, R, tparams); |
83 | } |
84 | |
85 | /* Transpose the matrix. */ |
86 | if (!transpose_pow2(a, C, R)) { |
87 | mpd_free(tparams); |
88 | return 0; |
89 | } |
90 | |
91 | /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */ |
92 | SETMODULUS(modnum); |
93 | kernel = _mpd_getkernel(n, -1, modnum); |
94 | for (i = 1; i < R; i++) { |
95 | w0 = 1; /* r**(i*0): initial value for k=0 */ |
96 | w1 = POWMOD(kernel, i); /* r**(i*1): initial value for k=1 */ |
97 | wstep = MULMOD(w1, w1); /* r**(2*i) */ |
98 | for (k = 0; k < C; k += 2) { |
99 | mpd_uint_t x0 = a[i*C+k]; |
100 | mpd_uint_t x1 = a[i*C+k+1]; |
101 | MULMOD2(&x0, w0, &x1, w1); |
102 | MULMOD2C(&w0, &w1, wstep); /* r**(i*(k+2)) = r**(i*k) * r**(2*i) */ |
103 | a[i*C+k] = x0; |
104 | a[i*C+k+1] = x1; |
105 | } |
106 | } |
107 | |
108 | /* Length C transform on the rows. */ |
109 | if (C != R) { |
110 | mpd_free(tparams); |
111 | if ((tparams = _mpd_init_fnt_params(C, -1, modnum)) == NULL) { |
112 | return 0; |
113 | } |
114 | } |
115 | for (x = a; x < a+n; x += C) { |
116 | fnt_dif2(x, C, tparams); |
117 | } |
118 | mpd_free(tparams); |
119 | |
120 | #if 0 |
121 | /* An unordered transform is sufficient for convolution. */ |
122 | /* Transpose the matrix. */ |
123 | if (!transpose_pow2(a, R, C)) { |
124 | return 0; |
125 | } |
126 | #endif |
127 | |
128 | return 1; |
129 | } |
130 | |
131 | |
132 | /* reverse transform, sign = 1 */ |
133 | int |
134 | inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum) |
135 | { |
136 | struct fnt_params *tparams; |
137 | mpd_size_t log2n, C, R; |
138 | mpd_uint_t kernel; |
139 | mpd_uint_t umod; |
140 | #ifdef PPRO |
141 | double dmod; |
142 | uint32_t dinvmod[3]; |
143 | #endif |
144 | mpd_uint_t *x, w0, w1, wstep; |
145 | mpd_size_t i, k; |
146 | |
147 | |
148 | assert(ispower2(n)); |
149 | assert(n >= 16); |
150 | assert(n <= MPD_MAXTRANSFORM_2N); |
151 | |
152 | log2n = mpd_bsr(n); |
153 | C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */ |
154 | R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */ |
155 | |
156 | |
157 | #if 0 |
158 | /* An unordered transform is sufficient for convolution. */ |
159 | /* Transpose the matrix, producing an R*C matrix. */ |
160 | if (!transpose_pow2(a, C, R)) { |
161 | return 0; |
162 | } |
163 | #endif |
164 | |
165 | /* Length C transform on the rows. */ |
166 | if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) { |
167 | return 0; |
168 | } |
169 | for (x = a; x < a+n; x += C) { |
170 | fnt_dif2(x, C, tparams); |
171 | } |
172 | |
173 | /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */ |
174 | SETMODULUS(modnum); |
175 | kernel = _mpd_getkernel(n, 1, modnum); |
176 | for (i = 1; i < R; i++) { |
177 | w0 = 1; |
178 | w1 = POWMOD(kernel, i); |
179 | wstep = MULMOD(w1, w1); |
180 | for (k = 0; k < C; k += 2) { |
181 | mpd_uint_t x0 = a[i*C+k]; |
182 | mpd_uint_t x1 = a[i*C+k+1]; |
183 | MULMOD2(&x0, w0, &x1, w1); |
184 | MULMOD2C(&w0, &w1, wstep); |
185 | a[i*C+k] = x0; |
186 | a[i*C+k+1] = x1; |
187 | } |
188 | } |
189 | |
190 | /* Transpose the matrix. */ |
191 | if (!transpose_pow2(a, R, C)) { |
192 | mpd_free(tparams); |
193 | return 0; |
194 | } |
195 | |
196 | /* Length R transform on the rows. */ |
197 | if (R != C) { |
198 | mpd_free(tparams); |
199 | if ((tparams = _mpd_init_fnt_params(R, 1, modnum)) == NULL) { |
200 | return 0; |
201 | } |
202 | } |
203 | for (x = a; x < a+n; x += R) { |
204 | fnt_dif2(x, R, tparams); |
205 | } |
206 | mpd_free(tparams); |
207 | |
208 | /* Transpose the matrix. */ |
209 | if (!transpose_pow2(a, C, R)) { |
210 | return 0; |
211 | } |
212 | |
213 | return 1; |
214 | } |
215 | |