difradix2.c source code [python/Modules/_decimal/libmpdec/difradix2.c]

1	/*
2	* Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	*
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	*
11	* 2. Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	*
15	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25	* SUCH DAMAGE.
26	*/
27
28
29	#include "mpdecimal.h"
30
31	#include <assert.h>
32
33	#include "bits.h"
34	#include "constants.h"
35	#include "difradix2.h"
36	#include "numbertheory.h"
37	#include "umodarith.h"
38
39
40	/ Bignum: The actual transform routine (decimation in frequency). /
41
42
43	/*
44	* Generate index pairs (x, bitreverse(x)) and carry out the permutation.
45	* n must be a power of two.
46	* Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational",
47	* Chapter 1.14.4. [http://www.jjj.de/fxt/]
48	*/
49	static inline void
50	bitreverse_permute(mpd_uint_t a[], mpd_size_t n)
51	{
52	mpd_size_t x = `0`;
53	mpd_size_t r = `0`;
54	mpd_uint_t t;
55
56	do { / Invariant: r = bitreverse(x) /
57	if (r > x) {
58	t = a[x];
59	a[x] = a[r];
60	a[r] = t;
61	}
62	/ Flip trailing consecutive 1 bits and the first zero bit*
63	* that absorbs a possible carry. */
64	x += `1`;
65	/ Mirror the operation on r: Flip n_trailing_zeros(x)+1*
66	high bits of r. /*
67	r ^= (n - (n >> (mpd_bsf(x)+`1`)));
68	/ The loop invariant is preserved. /
69	} while (x < n);
70	}
71
72
73	/ Fast Number Theoretic Transform, decimation in frequency. /
74	void
75	fnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams)
76	{
77	mpd_uint_t *wtable = tparams->wtable;
78	mpd_uint_t umod;
79	#ifdef PPRO
80	double dmod;
81	uint32_t dinvmod[`3`];
82	#endif
83	mpd_uint_t u0, u1, v0, v1;
84	mpd_uint_t w, w0, w1, wstep;
85	mpd_size_t m, mhalf;
86	mpd_size_t j, r;
87
88
89	assert(ispower2(n));
90	assert(n >= `4`);
91
92	SETMODULUS(tparams->modnum);
93
94	/ m == n /
95	mhalf = n / `2`;
96	for (j = `0`; j < mhalf; j += `2`) {
97
98	w0 = wtable[j];
99	w1 = wtable[j+`1`];
100
101	u0 = a[j];
102	v0 = a[j+mhalf];
103
104	u1 = a[j+`1`];
105	v1 = a[j+`1`+mhalf];
106
107	a[j] = addmod(u0, v0, umod);
108	v0 = submod(u0, v0, umod);
109
110	a[j+`1`] = addmod(u1, v1, umod);
111	v1 = submod(u1, v1, umod);
112
113	MULMOD2(&v0, w0, &v1, w1);
114
115	a[j+mhalf] = v0;
116	a[j+`1`+mhalf] = v1;
117
118	}
119
120	wstep = `2`;
121	for (m = n/`2`; m >= `2`; m>>=`1`, wstep<<=`1`) {
122
123	mhalf = m / `2`;
124
125	/ j == 0 /
126	for (r = `0`; r < n; r += `2`*m) {
127
128	u0 = a[r];
129	v0 = a[r+mhalf];
130
131	u1 = a[m+r];
132	v1 = a[m+r+mhalf];
133
134	a[r] = addmod(u0, v0, umod);
135	v0 = submod(u0, v0, umod);
136
137	a[m+r] = addmod(u1, v1, umod);
138	v1 = submod(u1, v1, umod);
139
140	a[r+mhalf] = v0;
141	a[m+r+mhalf] = v1;
142	}
143
144	for (j = `1`; j < mhalf; j++) {
145
146	w = wtable[j*wstep];
147
148	for (r = `0`; r < n; r += `2`*m) {
149
150	u0 = a[r+j];
151	v0 = a[r+j+mhalf];
152
153	u1 = a[m+r+j];
154	v1 = a[m+r+j+mhalf];
155
156	a[r+j] = addmod(u0, v0, umod);
157	v0 = submod(u0, v0, umod);
158
159	a[m+r+j] = addmod(u1, v1, umod);
160	v1 = submod(u1, v1, umod);
161
162	MULMOD2C(&v0, &v1, w);
163
164	a[r+j+mhalf] = v0;
165	a[m+r+j+mhalf] = v1;
166	}
167
168	}
169
170	}
171
172	bitreverse_permute(a, n);
173	}
174

Browse the source code of python/Modules/_decimal/libmpdec/difradix2.c