transpose.c source code [python/Modules/_decimal/libmpdec/transpose.c]

1	/*
2	* Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	*
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	*
11	* 2. Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	*
15	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25	* SUCH DAMAGE.
26	*/
27
28
29	#include "mpdecimal.h"
30
31	#include <assert.h>
32	#include <limits.h>
33	#include <stdio.h>
34	#include <stdlib.h>
35	#include <string.h>
36
37	#include "bits.h"
38	#include "constants.h"
39	#include "transpose.h"
40	#include "typearith.h"
41
42
43	#define BUFSIZE 4096
44	#define SIDE 128
45
46
47	/ Bignum: The transpose functions are used for very large transforms*
48	in sixstep.c and fourstep.c. /*
49
50
51	/ Definition of the matrix transpose /
52	void
53	std_trans(mpd_uint_t dest[], mpd_uint_t src[], mpd_size_t rows, mpd_size_t cols)
54	{
55	mpd_size_t idest, isrc;
56	mpd_size_t r, c;
57
58	for (r = `0`; r < rows; r++) {
59	isrc = r * cols;
60	idest = r;
61	for (c = `0`; c < cols; c++) {
62	dest[idest] = src[isrc];
63	isrc += `1`;
64	idest += rows;
65	}
66	}
67	}
68
69	/*
70	* Swap half-rows of 2^n * (2*2^n) matrix.
71	* FORWARD_CYCLE: even/odd permutation of the halfrows.
72	* BACKWARD_CYCLE: reverse the even/odd permutation.
73	*/
74	static int
75	swap_halfrows_pow2(mpd_uint_t matrix, mpd_size_t rows, mpd_size_t cols, int* dir)
76	{
77	mpd_uint_t buf1[BUFSIZE];
78	mpd_uint_t buf2[BUFSIZE];
79	mpd_uint_t readbuf, writebuf, *hp;
80	mpd_size_t *done, dbits;
81	mpd_size_t b = BUFSIZE, stride;
82	mpd_size_t hn, hmax; / halfrow number /
83	mpd_size_t m, r=`0`;
84	mpd_size_t offset;
85	mpd_size_t next;
86
87
88	assert(cols == mul_size_t(`2`, rows));
89
90	if (dir == FORWARD_CYCLE) {
91	r = rows;
92	}
93	else if (dir == BACKWARD_CYCLE) {
94	r = `2`;
95	}
96	else {
97	abort(); / GCOV_NOT_REACHED /
98	}
99
100	m = cols - `1`;
101	hmax = rows; / cycles start at odd halfrows /
102	dbits = `8` * sizeof *done;
103	if ((done = mpd_calloc(hmax/(sizeof done) + `1`, sizeof* *done)) == NULL) {
104	return `0`;
105	}
106
107	for (hn = `1`; hn <= hmax; hn += `2`) {
108
109	if (done[hn/dbits] & mpd_bits[hn%dbits]) {
110	continue;
111	}
112
113	readbuf = buf1; writebuf = buf2;
114
115	for (offset = `0`; offset < cols/`2`; offset += b) {
116
117	stride = (offset + b < cols/`2`) ? b : cols/`2`-offset;
118
119	hp = matrix + hn*cols/`2`;
120	memcpy(readbuf, hp+offset, stride(sizeof* *readbuf));
121	pointerswap(&readbuf, &writebuf);
122
123	next = mulmod_size_t(hn, r, m);
124	hp = matrix + next*cols/`2`;
125
126	while (next != hn) {
127
128	memcpy(readbuf, hp+offset, stride(sizeof* *readbuf));
129	memcpy(hp+offset, writebuf, stride(sizeof* *writebuf));
130	pointerswap(&readbuf, &writebuf);
131
132	done[next/dbits] \|= mpd_bits[next%dbits];
133
134	next = mulmod_size_t(next, r, m);
135	hp = matrix + next*cols/`2`;
136
137	}
138
139	memcpy(hp+offset, writebuf, stride(sizeof* *writebuf));
140
141	done[hn/dbits] \|= mpd_bits[hn%dbits];
142	}
143	}
144
145	mpd_free(done);
146	return `1`;
147	}
148
149	/ In-place transpose of a square matrix /
150	static inline void
151	squaretrans(mpd_uint_t *buf, mpd_size_t cols)
152	{
153	mpd_uint_t tmp;
154	mpd_size_t idest, isrc;
155	mpd_size_t r, c;
156
157	for (r = `0`; r < cols; r++) {
158	c = r+`1`;
159	isrc = r*cols + c;
160	idest = c*cols + r;
161	for (c = r+`1`; c < cols; c++) {
162	tmp = buf[isrc];
163	buf[isrc] = buf[idest];
164	buf[idest] = tmp;
165	isrc += `1`;
166	idest += cols;
167	}
168	}
169	}
170
171	/*
172	* Transpose 2^n * 2^n matrix. For cache efficiency, the matrix is split into
173	* square blocks with side length 'SIDE'. First, the blocks are transposed,
174	* then a square transposition is done on each individual block.
175	*/
176	static void
177	squaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size)
178	{
179	mpd_uint_t buf1[SIDE*SIDE];
180	mpd_uint_t buf2[SIDE*SIDE];
181	mpd_uint_t to, from;
182	mpd_size_t b = size;
183	mpd_size_t r, c;
184	mpd_size_t i;
185
186	while (b > SIDE) b >>= `1`;
187
188	for (r = `0`; r < size; r += b) {
189
190	for (c = r; c < size; c += b) {
191
192	from = matrix + r*size + c;
193	to = buf1;
194	for (i = `0`; i < b; i++) {
195	memcpy(to, from, b(sizeof* *to));
196	from += size;
197	to += b;
198	}
199	squaretrans(buf1, b);
200
201	if (r == c) {
202	to = matrix + r*size + c;
203	from = buf1;
204	for (i = `0`; i < b; i++) {
205	memcpy(to, from, b(sizeof* *to));
206	from += b;
207	to += size;
208	}
209	continue;
210	}
211	else {
212	from = matrix + c*size + r;
213	to = buf2;
214	for (i = `0`; i < b; i++) {
215	memcpy(to, from, b(sizeof* *to));
216	from += size;
217	to += b;
218	}
219	squaretrans(buf2, b);
220
221	to = matrix + c*size + r;
222	from = buf1;
223	for (i = `0`; i < b; i++) {
224	memcpy(to, from, b(sizeof* *to));
225	from += b;
226	to += size;
227	}
228
229	to = matrix + r*size + c;
230	from = buf2;
231	for (i = `0`; i < b; i++) {
232	memcpy(to, from, b(sizeof* *to));
233	from += b;
234	to += size;
235	}
236	}
237	}
238	}
239
240	}
241
242	/*
243	* In-place transposition of a 2^n x 2^n or a 2^n x (2*2^n)
244	* or a (2*2^n) x 2^n matrix.
245	*/
246	int
247	transpose_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols)
248	{
249	mpd_size_t size = mul_size_t(rows, cols);
250
251	assert(ispower2(rows));
252	assert(ispower2(cols));
253
254	if (cols == rows) {
255	squaretrans_pow2(matrix, rows);
256	}
257	else if (cols == mul_size_t(`2`, rows)) {
258	if (!swap_halfrows_pow2(matrix, rows, cols, FORWARD_CYCLE)) {
259	return `0`;
260	}
261	squaretrans_pow2(matrix, rows);
262	squaretrans_pow2(matrix+(size/`2`), rows);
263	}
264	else if (rows == mul_size_t(`2`, cols)) {
265	squaretrans_pow2(matrix, cols);
266	squaretrans_pow2(matrix+(size/`2`), cols);
267	if (!swap_halfrows_pow2(matrix, cols, rows, BACKWARD_CYCLE)) {
268	return `0`;
269	}
270	}
271	else {
272	abort(); / GCOV_NOT_REACHED /
273	}
274
275	return `1`;
276	}
277

Browse the source code of python/Modules/_decimal/libmpdec/transpose.c