convolute.c source code [python/Modules/_decimal/libmpdec/convolute.c]

1	/*
2	* Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	*
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	*
11	* 2. Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	*
15	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25	* SUCH DAMAGE.
26	*/
27
28
29	#include "mpdecimal.h"
30	#include "bits.h"
31	#include "constants.h"
32	#include "convolute.h"
33	#include "fnt.h"
34	#include "fourstep.h"
35	#include "numbertheory.h"
36	#include "sixstep.h"
37	#include "umodarith.h"
38
39
40	/ Bignum: Fast convolution using the Number Theoretic Transform. Used for*
41	the multiplication of very large coefficients. /*
42
43
44	/ Convolute the data in c1 and c2. Result is in c1. /
45	int
46	fnt_convolute(mpd_uint_t c1, mpd_uint_t c2, mpd_size_t n, int modnum)
47	{
48	int (fnt)(mpd_uint_t , mpd_size_t, int);
49	int (inv_fnt)(mpd_uint_t , mpd_size_t, int);
50	#ifdef PPRO
51	double dmod;
52	uint32_t dinvmod[`3`];
53	#endif
54	mpd_uint_t n_inv, umod;
55	mpd_size_t i;
56
57
58	SETMODULUS(modnum);
59	n_inv = POWMOD(n, (umod-`2`));
60
61	if (ispower2(n)) {
62	if (n > SIX_STEP_THRESHOLD) {
63	fnt = six_step_fnt;
64	inv_fnt = inv_six_step_fnt;
65	}
66	else {
67	fnt = std_fnt;
68	inv_fnt = std_inv_fnt;
69	}
70	}
71	else {
72	fnt = four_step_fnt;
73	inv_fnt = inv_four_step_fnt;
74	}
75
76	if (!fnt(c1, n, modnum)) {
77	return `0`;
78	}
79	if (!fnt(c2, n, modnum)) {
80	return `0`;
81	}
82	for (i = `0`; i < n-`1`; i += `2`) {
83	mpd_uint_t x0 = c1[i];
84	mpd_uint_t y0 = c2[i];
85	mpd_uint_t x1 = c1[i+`1`];
86	mpd_uint_t y1 = c2[i+`1`];
87	MULMOD2(&x0, y0, &x1, y1);
88	c1[i] = x0;
89	c1[i+`1`] = x1;
90	}
91
92	if (!inv_fnt(c1, n, modnum)) {
93	return `0`;
94	}
95	for (i = `0`; i < n-`3`; i += `4`) {
96	mpd_uint_t x0 = c1[i];
97	mpd_uint_t x1 = c1[i+`1`];
98	mpd_uint_t x2 = c1[i+`2`];
99	mpd_uint_t x3 = c1[i+`3`];
100	MULMOD2C(&x0, &x1, n_inv);
101	MULMOD2C(&x2, &x3, n_inv);
102	c1[i] = x0;
103	c1[i+`1`] = x1;
104	c1[i+`2`] = x2;
105	c1[i+`3`] = x3;
106	}
107
108	return `1`;
109	}
110
111	/ Autoconvolute the data in c1. Result is in c1. /
112	int
113	fnt_autoconvolute(mpd_uint_t c1, mpd_size_t n, int* modnum)
114	{
115	int (fnt)(mpd_uint_t , mpd_size_t, int);
116	int (inv_fnt)(mpd_uint_t , mpd_size_t, int);
117	#ifdef PPRO
118	double dmod;
119	uint32_t dinvmod[`3`];
120	#endif
121	mpd_uint_t n_inv, umod;
122	mpd_size_t i;
123
124
125	SETMODULUS(modnum);
126	n_inv = POWMOD(n, (umod-`2`));
127
128	if (ispower2(n)) {
129	if (n > SIX_STEP_THRESHOLD) {
130	fnt = six_step_fnt;
131	inv_fnt = inv_six_step_fnt;
132	}
133	else {
134	fnt = std_fnt;
135	inv_fnt = std_inv_fnt;
136	}
137	}
138	else {
139	fnt = four_step_fnt;
140	inv_fnt = inv_four_step_fnt;
141	}
142
143	if (!fnt(c1, n, modnum)) {
144	return `0`;
145	}
146	for (i = `0`; i < n-`1`; i += `2`) {
147	mpd_uint_t x0 = c1[i];
148	mpd_uint_t x1 = c1[i+`1`];
149	MULMOD2(&x0, x0, &x1, x1);
150	c1[i] = x0;
151	c1[i+`1`] = x1;
152	}
153
154	if (!inv_fnt(c1, n, modnum)) {
155	return `0`;
156	}
157	for (i = `0`; i < n-`3`; i += `4`) {
158	mpd_uint_t x0 = c1[i];
159	mpd_uint_t x1 = c1[i+`1`];
160	mpd_uint_t x2 = c1[i+`2`];
161	mpd_uint_t x3 = c1[i+`3`];
162	MULMOD2C(&x0, &x1, n_inv);
163	MULMOD2C(&x2, &x3, n_inv);
164	c1[i] = x0;
165	c1[i+`1`] = x1;
166	c1[i+`2`] = x2;
167	c1[i+`3`] = x3;
168	}
169
170	return `1`;
171	}
172

Browse the source code of python/Modules/_decimal/libmpdec/convolute.c