sixstep.c source code [python/Modules/_decimal/libmpdec/sixstep.c]

1	/*
2	* Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	*
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	*
11	* 2. Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	*
15	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25	* SUCH DAMAGE.
26	*/
27
28
29	#include "mpdecimal.h"
30
31	#include <assert.h>
32	#include <stdio.h>
33
34	#include "bits.h"
35	#include "constants.h"
36	#include "difradix2.h"
37	#include "numbertheory.h"
38	#include "sixstep.h"
39	#include "transpose.h"
40	#include "umodarith.h"
41
42
43	/ Bignum: Cache efficient Matrix Fourier Transform for arrays of the*
44	form 2n (See literature/six-step.txt). /*
45
46
47	/ forward transform with sign = -1 /
48	int
49	six_step_fnt(mpd_uint_t a, mpd_size_t n, int* modnum)
50	{
51	struct fnt_params *tparams;
52	mpd_size_t log2n, C, R;
53	mpd_uint_t kernel;
54	mpd_uint_t umod;
55	#ifdef PPRO
56	double dmod;
57	uint32_t dinvmod[`3`];
58	#endif
59	mpd_uint_t *x, w0, w1, wstep;
60	mpd_size_t i, k;
61
62
63	assert(ispower2(n));
64	assert(n >= `16`);
65	assert(n <= MPD_MAXTRANSFORM_2N);
66
67	log2n = mpd_bsr(n);
68	C = ((mpd_size_t)`1`) << (log2n / `2`); / number of columns /
69	R = ((mpd_size_t)`1`) << (log2n - (log2n / `2`)); / number of rows /
70
71
72	/ Transpose the matrix. /
73	if (!transpose_pow2(a, R, C)) {
74	return `0`;
75	}
76
77	/ Length R transform on the rows. /
78	if ((tparams = _mpd_init_fnt_params(R, -`1`, modnum)) == NULL) {
79	return `0`;
80	}
81	for (x = a; x < a+n; x += R) {
82	fnt_dif2(x, R, tparams);
83	}
84
85	/ Transpose the matrix. /
86	if (!transpose_pow2(a, C, R)) {
87	mpd_free(tparams);
88	return `0`;
89	}
90
91	/ Multiply each matrix element (addressed by iC+k) by r(ik). /
92	SETMODULUS(modnum);
93	kernel = _mpd_getkernel(n, -`1`, modnum);
94	for (i = `1`; i < R; i++) {
95	w0 = `1`; / r*(i0): initial value for k=0 /
96	w1 = POWMOD(kernel, i); / r*(i1): initial value for k=1 /
97	wstep = MULMOD(w1, w1); / r*(2i) /
98	for (k = `0`; k < C; k += `2`) {
99	mpd_uint_t x0 = a[i*C+k];
100	mpd_uint_t x1 = a[i*C+k+`1`];
101	MULMOD2(&x0, w0, &x1, w1);
102	MULMOD2C(&w0, &w1, wstep); / r*(i(k+2)) = r*(ik) * r*(2i) /
103	a[i*C+k] = x0;
104	a[i*C+k+`1`] = x1;
105	}
106	}
107
108	/ Length C transform on the rows. /
109	if (C != R) {
110	mpd_free(tparams);
111	if ((tparams = _mpd_init_fnt_params(C, -`1`, modnum)) == NULL) {
112	return `0`;
113	}
114	}
115	for (x = a; x < a+n; x += C) {
116	fnt_dif2(x, C, tparams);
117	}
118	mpd_free(tparams);
119
120	#if 0
121	/ An unordered transform is sufficient for convolution. /
122	/ Transpose the matrix. /
123	if (!transpose_pow2(a, R, C)) {
124	return `0`;
125	}
126	#endif
127
128	return `1`;
129	}
130
131
132	/ reverse transform, sign = 1 /
133	int
134	inv_six_step_fnt(mpd_uint_t a, mpd_size_t n, int* modnum)
135	{
136	struct fnt_params *tparams;
137	mpd_size_t log2n, C, R;
138	mpd_uint_t kernel;
139	mpd_uint_t umod;
140	#ifdef PPRO
141	double dmod;
142	uint32_t dinvmod[`3`];
143	#endif
144	mpd_uint_t *x, w0, w1, wstep;
145	mpd_size_t i, k;
146
147
148	assert(ispower2(n));
149	assert(n >= `16`);
150	assert(n <= MPD_MAXTRANSFORM_2N);
151
152	log2n = mpd_bsr(n);
153	C = ((mpd_size_t)`1`) << (log2n / `2`); / number of columns /
154	R = ((mpd_size_t)`1`) << (log2n - (log2n / `2`)); / number of rows /
155
156
157	#if 0
158	/ An unordered transform is sufficient for convolution. /
159	/ Transpose the matrix, producing an RC matrix. /*
160	if (!transpose_pow2(a, C, R)) {
161	return `0`;
162	}
163	#endif
164
165	/ Length C transform on the rows. /
166	if ((tparams = _mpd_init_fnt_params(C, `1`, modnum)) == NULL) {
167	return `0`;
168	}
169	for (x = a; x < a+n; x += C) {
170	fnt_dif2(x, C, tparams);
171	}
172
173	/ Multiply each matrix element (addressed by iC+k) by r(ik). /
174	SETMODULUS(modnum);
175	kernel = _mpd_getkernel(n, `1`, modnum);
176	for (i = `1`; i < R; i++) {
177	w0 = `1`;
178	w1 = POWMOD(kernel, i);
179	wstep = MULMOD(w1, w1);
180	for (k = `0`; k < C; k += `2`) {
181	mpd_uint_t x0 = a[i*C+k];
182	mpd_uint_t x1 = a[i*C+k+`1`];
183	MULMOD2(&x0, w0, &x1, w1);
184	MULMOD2C(&w0, &w1, wstep);
185	a[i*C+k] = x0;
186	a[i*C+k+`1`] = x1;
187	}
188	}
189
190	/ Transpose the matrix. /
191	if (!transpose_pow2(a, R, C)) {
192	mpd_free(tparams);
193	return `0`;
194	}
195
196	/ Length R transform on the rows. /
197	if (R != C) {
198	mpd_free(tparams);
199	if ((tparams = _mpd_init_fnt_params(R, `1`, modnum)) == NULL) {
200	return `0`;
201	}
202	}
203	for (x = a; x < a+n; x += R) {
204	fnt_dif2(x, R, tparams);
205	}
206	mpd_free(tparams);
207
208	/ Transpose the matrix. /
209	if (!transpose_pow2(a, C, R)) {
210	return `0`;
211	}
212
213	return `1`;
214	}
215

Browse the source code of python/Modules/_decimal/libmpdec/sixstep.c