1 | /* |
2 | * Copyright (c) 2008-2020 Stefan Krah. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * |
8 | * 1. Redistributions of source code must retain the above copyright |
9 | * notice, this list of conditions and the following disclaimer. |
10 | * |
11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND |
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 | * SUCH DAMAGE. |
26 | */ |
27 | |
28 | |
29 | #include "mpdecimal.h" |
30 | #include "bits.h" |
31 | #include "constants.h" |
32 | #include "convolute.h" |
33 | #include "fnt.h" |
34 | #include "fourstep.h" |
35 | #include "numbertheory.h" |
36 | #include "sixstep.h" |
37 | #include "umodarith.h" |
38 | |
39 | |
40 | /* Bignum: Fast convolution using the Number Theoretic Transform. Used for |
41 | the multiplication of very large coefficients. */ |
42 | |
43 | |
44 | /* Convolute the data in c1 and c2. Result is in c1. */ |
45 | int |
46 | fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum) |
47 | { |
48 | int (*fnt)(mpd_uint_t *, mpd_size_t, int); |
49 | int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int); |
50 | #ifdef PPRO |
51 | double dmod; |
52 | uint32_t dinvmod[3]; |
53 | #endif |
54 | mpd_uint_t n_inv, umod; |
55 | mpd_size_t i; |
56 | |
57 | |
58 | SETMODULUS(modnum); |
59 | n_inv = POWMOD(n, (umod-2)); |
60 | |
61 | if (ispower2(n)) { |
62 | if (n > SIX_STEP_THRESHOLD) { |
63 | fnt = six_step_fnt; |
64 | inv_fnt = inv_six_step_fnt; |
65 | } |
66 | else { |
67 | fnt = std_fnt; |
68 | inv_fnt = std_inv_fnt; |
69 | } |
70 | } |
71 | else { |
72 | fnt = four_step_fnt; |
73 | inv_fnt = inv_four_step_fnt; |
74 | } |
75 | |
76 | if (!fnt(c1, n, modnum)) { |
77 | return 0; |
78 | } |
79 | if (!fnt(c2, n, modnum)) { |
80 | return 0; |
81 | } |
82 | for (i = 0; i < n-1; i += 2) { |
83 | mpd_uint_t x0 = c1[i]; |
84 | mpd_uint_t y0 = c2[i]; |
85 | mpd_uint_t x1 = c1[i+1]; |
86 | mpd_uint_t y1 = c2[i+1]; |
87 | MULMOD2(&x0, y0, &x1, y1); |
88 | c1[i] = x0; |
89 | c1[i+1] = x1; |
90 | } |
91 | |
92 | if (!inv_fnt(c1, n, modnum)) { |
93 | return 0; |
94 | } |
95 | for (i = 0; i < n-3; i += 4) { |
96 | mpd_uint_t x0 = c1[i]; |
97 | mpd_uint_t x1 = c1[i+1]; |
98 | mpd_uint_t x2 = c1[i+2]; |
99 | mpd_uint_t x3 = c1[i+3]; |
100 | MULMOD2C(&x0, &x1, n_inv); |
101 | MULMOD2C(&x2, &x3, n_inv); |
102 | c1[i] = x0; |
103 | c1[i+1] = x1; |
104 | c1[i+2] = x2; |
105 | c1[i+3] = x3; |
106 | } |
107 | |
108 | return 1; |
109 | } |
110 | |
111 | /* Autoconvolute the data in c1. Result is in c1. */ |
112 | int |
113 | fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum) |
114 | { |
115 | int (*fnt)(mpd_uint_t *, mpd_size_t, int); |
116 | int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int); |
117 | #ifdef PPRO |
118 | double dmod; |
119 | uint32_t dinvmod[3]; |
120 | #endif |
121 | mpd_uint_t n_inv, umod; |
122 | mpd_size_t i; |
123 | |
124 | |
125 | SETMODULUS(modnum); |
126 | n_inv = POWMOD(n, (umod-2)); |
127 | |
128 | if (ispower2(n)) { |
129 | if (n > SIX_STEP_THRESHOLD) { |
130 | fnt = six_step_fnt; |
131 | inv_fnt = inv_six_step_fnt; |
132 | } |
133 | else { |
134 | fnt = std_fnt; |
135 | inv_fnt = std_inv_fnt; |
136 | } |
137 | } |
138 | else { |
139 | fnt = four_step_fnt; |
140 | inv_fnt = inv_four_step_fnt; |
141 | } |
142 | |
143 | if (!fnt(c1, n, modnum)) { |
144 | return 0; |
145 | } |
146 | for (i = 0; i < n-1; i += 2) { |
147 | mpd_uint_t x0 = c1[i]; |
148 | mpd_uint_t x1 = c1[i+1]; |
149 | MULMOD2(&x0, x0, &x1, x1); |
150 | c1[i] = x0; |
151 | c1[i+1] = x1; |
152 | } |
153 | |
154 | if (!inv_fnt(c1, n, modnum)) { |
155 | return 0; |
156 | } |
157 | for (i = 0; i < n-3; i += 4) { |
158 | mpd_uint_t x0 = c1[i]; |
159 | mpd_uint_t x1 = c1[i+1]; |
160 | mpd_uint_t x2 = c1[i+2]; |
161 | mpd_uint_t x3 = c1[i+3]; |
162 | MULMOD2C(&x0, &x1, n_inv); |
163 | MULMOD2C(&x2, &x3, n_inv); |
164 | c1[i] = x0; |
165 | c1[i+1] = x1; |
166 | c1[i+2] = x2; |
167 | c1[i+3] = x3; |
168 | } |
169 | |
170 | return 1; |
171 | } |
172 | |