crt.c source code [python/Modules/_decimal/libmpdec/crt.c]

1	/*
2	* Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	*
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	*
11	* 2. Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	*
15	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25	* SUCH DAMAGE.
26	*/
27
28
29	#include "mpdecimal.h"
30
31	#include <assert.h>
32
33	#include "constants.h"
34	#include "crt.h"
35	#include "numbertheory.h"
36	#include "typearith.h"
37	#include "umodarith.h"
38
39
40	/ Bignum: Chinese Remainder Theorem, extends the maximum transform length. /
41
42
43	/ Multiply P1P2 by v, store result in w. /
44	static inline void
45	_crt_mulP1P2_3(mpd_uint_t w[`3`], mpd_uint_t v)
46	{
47	mpd_uint_t hi1, hi2, lo;
48
49	_mpd_mul_words(&hi1, &lo, LH_P1P2, v);
50	w[`0`] = lo;
51
52	_mpd_mul_words(&hi2, &lo, UH_P1P2, v);
53	lo = hi1 + lo;
54	if (lo < hi1) hi2++;
55
56	w[`1`] = lo;
57	w[`2`] = hi2;
58	}
59
60	/ Add 3 words from v to w. The result is known to fit in w. /
61	static inline void
62	_crt_add3(mpd_uint_t w[`3`], mpd_uint_t v[`3`])
63	{
64	mpd_uint_t carry;
65
66	w[`0`] = w[`0`] + v[`0`];
67	carry = (w[`0`] < v[`0`]);
68
69	w[`1`] = w[`1`] + v[`1`];
70	if (w[`1`] < v[`1`]) w[`2`]++;
71
72	w[`1`] = w[`1`] + carry;
73	if (w[`1`] < carry) w[`2`]++;
74
75	w[`2`] += v[`2`];
76	}
77
78	/ Divide 3 words in u by v, store result in w, return remainder. /
79	static inline mpd_uint_t
80	_crt_div3(mpd_uint_t w, const* mpd_uint_t *u, mpd_uint_t v)
81	{
82	mpd_uint_t r1 = u[`2`];
83	mpd_uint_t r2;
84
85	if (r1 < v) {
86	w[`2`] = `0`;
87	}
88	else {
89	_mpd_div_word(&w[`2`], &r1, u[`2`], v); / GCOV_NOT_REACHED /
90	}
91
92	_mpd_div_words(&w[`1`], &r2, r1, u[`1`], v);
93	_mpd_div_words(&w[`0`], &r1, r2, u[`0`], v);
94
95	return r1;
96	}
97
98
99	/*
100	* Chinese Remainder Theorem:
101	* Algorithm from Joerg Arndt, "Matters Computational",
102	* Chapter 37.4.1 [http://www.jjj.de/fxt/]
103	*
104	* See also Knuth, TAOCP, Volume 2, 4.3.2, exercise 7.
105	*/
106
107	/*
108	* CRT with carry: x1, x2, x3 contain numbers modulo p1, p2, p3. For each
109	* triple of members of the arrays, find the unique z modulo p1p2p3, with
110	* zmax = p1p2p3 - 1.
111	*
112	* In each iteration of the loop, split z into result[i] = z % MPD_RADIX
113	* and carry = z / MPD_RADIX. Let N be the size of carry[] and cmax the
114	* maximum carry.
115	*
116	* Limits for the 32-bit build:
117	*
118	* N = 2**96
119	* cmax = 7711435591312380274
120	*
121	* Limits for the 64 bit build:
122	*
123	* N = 2**192
124	* cmax = 627710135393475385904124401220046371710
125	*
126	* The following statements hold for both versions:
127	*
128	* 1) cmax + zmax < N, so the addition does not overflow.
129	*
130	* 2) (cmax + zmax) / MPD_RADIX == cmax.
131	*
132	* 3) If c <= cmax, then c_next = (c + zmax) / MPD_RADIX <= cmax.
133	*/
134	void
135	crt3(mpd_uint_t x1, mpd_uint_t x2, mpd_uint_t *x3, mpd_size_t rsize)
136	{
137	mpd_uint_t p1 = mpd_moduli[P1];
138	mpd_uint_t umod;
139	#ifdef PPRO
140	double dmod;
141	uint32_t dinvmod[`3`];
142	#endif
143	mpd_uint_t a1, a2, a3;
144	mpd_uint_t s;
145	mpd_uint_t z[`3`], t[`3`];
146	mpd_uint_t carry[`3`] = {`0`,`0`,`0`};
147	mpd_uint_t hi, lo;
148	mpd_size_t i;
149
150	for (i = `0`; i < rsize; i++) {
151
152	a1 = x1[i];
153	a2 = x2[i];
154	a3 = x3[i];
155
156	SETMODULUS(P2);
157	s = ext_submod(a2, a1, umod);
158	s = MULMOD(s, INV_P1_MOD_P2);
159
160	_mpd_mul_words(&hi, &lo, s, p1);
161	lo = lo + a1;
162	if (lo < a1) hi++;
163
164	SETMODULUS(P3);
165	s = dw_submod(a3, hi, lo, umod);
166	s = MULMOD(s, INV_P1P2_MOD_P3);
167
168	z[`0`] = lo;
169	z[`1`] = hi;
170	z[`2`] = `0`;
171
172	_crt_mulP1P2_3(t, s);
173	_crt_add3(z, t);
174	_crt_add3(carry, z);
175
176	x1[i] = _crt_div3(carry, carry, MPD_RADIX);
177	}
178
179	assert(carry[`0`] == `0` && carry[`1`] == `0` && carry[`2`] == `0`);
180	}
181

Browse the source code of python/Modules/_decimal/libmpdec/crt.c