1/*
2 * Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28
29#ifndef LIBMPDEC_TYPEARITH_H_
30#define LIBMPDEC_TYPEARITH_H_
31
32
33#include "mpdecimal.h"
34
35#include <assert.h>
36
37
38/*****************************************************************************/
39/* Low level native arithmetic on basic types */
40/*****************************************************************************/
41
42
43/** ------------------------------------------------------------
44 ** Double width multiplication and division
45 ** ------------------------------------------------------------
46 */
47
48#if defined(CONFIG_64)
49#if defined(ANSI)
50#if defined(HAVE_UINT128_T)
51static inline void
52_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
53{
54 __uint128_t hl;
55
56 hl = (__uint128_t)a * b;
57
58 *hi = hl >> 64;
59 *lo = (mpd_uint_t)hl;
60}
61
62static inline void
63_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
64 mpd_uint_t d)
65{
66 __uint128_t hl;
67
68 hl = ((__uint128_t)hi<<64) + lo;
69 *q = (mpd_uint_t)(hl / d); /* quotient is known to fit */
70 *r = (mpd_uint_t)(hl - (__uint128_t)(*q) * d);
71}
72#else
73static inline void
74_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
75{
76 uint32_t w[4], carry;
77 uint32_t ah, al, bh, bl;
78 uint64_t hl;
79
80 ah = (uint32_t)(a>>32); al = (uint32_t)a;
81 bh = (uint32_t)(b>>32); bl = (uint32_t)b;
82
83 hl = (uint64_t)al * bl;
84 w[0] = (uint32_t)hl;
85 carry = (uint32_t)(hl>>32);
86
87 hl = (uint64_t)ah * bl + carry;
88 w[1] = (uint32_t)hl;
89 w[2] = (uint32_t)(hl>>32);
90
91 hl = (uint64_t)al * bh + w[1];
92 w[1] = (uint32_t)hl;
93 carry = (uint32_t)(hl>>32);
94
95 hl = ((uint64_t)ah * bh + w[2]) + carry;
96 w[2] = (uint32_t)hl;
97 w[3] = (uint32_t)(hl>>32);
98
99 *hi = ((uint64_t)w[3]<<32) + w[2];
100 *lo = ((uint64_t)w[1]<<32) + w[0];
101}
102
103/*
104 * By Henry S. Warren: http://www.hackersdelight.org/HDcode/divlu.c.txt
105 * http://www.hackersdelight.org/permissions.htm:
106 * "You are free to use, copy, and distribute any of the code on this web
107 * site, whether modified by you or not. You need not give attribution."
108 *
109 * Slightly modified, comments are mine.
110 */
111static inline int
112nlz(uint64_t x)
113{
114 int n;
115
116 if (x == 0) return(64);
117
118 n = 0;
119 if (x <= 0x00000000FFFFFFFF) {n = n +32; x = x <<32;}
120 if (x <= 0x0000FFFFFFFFFFFF) {n = n +16; x = x <<16;}
121 if (x <= 0x00FFFFFFFFFFFFFF) {n = n + 8; x = x << 8;}
122 if (x <= 0x0FFFFFFFFFFFFFFF) {n = n + 4; x = x << 4;}
123 if (x <= 0x3FFFFFFFFFFFFFFF) {n = n + 2; x = x << 2;}
124 if (x <= 0x7FFFFFFFFFFFFFFF) {n = n + 1;}
125
126 return n;
127}
128
129static inline void
130_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0,
131 mpd_uint_t v)
132{
133 const mpd_uint_t b = 4294967296;
134 mpd_uint_t un1, un0,
135 vn1, vn0,
136 q1, q0,
137 un32, un21, un10,
138 rhat, t;
139 int s;
140
141 assert(u1 < v);
142
143 s = nlz(v);
144 v = v << s;
145 vn1 = v >> 32;
146 vn0 = v & 0xFFFFFFFF;
147
148 t = (s == 0) ? 0 : u0 >> (64 - s);
149 un32 = (u1 << s) | t;
150 un10 = u0 << s;
151
152 un1 = un10 >> 32;
153 un0 = un10 & 0xFFFFFFFF;
154
155 q1 = un32 / vn1;
156 rhat = un32 - q1*vn1;
157again1:
158 if (q1 >= b || q1*vn0 > b*rhat + un1) {
159 q1 = q1 - 1;
160 rhat = rhat + vn1;
161 if (rhat < b) goto again1;
162 }
163
164 /*
165 * Before again1 we had:
166 * (1) q1*vn1 + rhat = un32
167 * (2) q1*vn1*b + rhat*b + un1 = un32*b + un1
168 *
169 * The statements inside the if-clause do not change the value
170 * of the left-hand side of (2), and the loop is only exited
171 * if q1*vn0 <= rhat*b + un1, so:
172 *
173 * (3) q1*vn1*b + q1*vn0 <= un32*b + un1
174 * (4) q1*v <= un32*b + un1
175 * (5) 0 <= un32*b + un1 - q1*v
176 *
177 * By (5) we are certain that the possible add-back step from
178 * Knuth's algorithm D is never required.
179 *
180 * Since the final quotient is less than 2**64, the following
181 * must be true:
182 *
183 * (6) un32*b + un1 - q1*v <= UINT64_MAX
184 *
185 * This means that in the following line, the high words
186 * of un32*b and q1*v can be discarded without any effect
187 * on the result.
188 */
189 un21 = un32*b + un1 - q1*v;
190
191 q0 = un21 / vn1;
192 rhat = un21 - q0*vn1;
193again2:
194 if (q0 >= b || q0*vn0 > b*rhat + un0) {
195 q0 = q0 - 1;
196 rhat = rhat + vn1;
197 if (rhat < b) goto again2;
198 }
199
200 *q = q1*b + q0;
201 *r = (un21*b + un0 - q0*v) >> s;
202}
203#endif
204
205/* END ANSI */
206#elif defined(ASM)
207static inline void
208_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
209{
210 mpd_uint_t h, l;
211
212 __asm__ ( "mulq %3\n\t"
213 : "=d" (h), "=a" (l)
214 : "%a" (a), "rm" (b)
215 : "cc"
216 );
217
218 *hi = h;
219 *lo = l;
220}
221
222static inline void
223_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
224 mpd_uint_t d)
225{
226 mpd_uint_t qq, rr;
227
228 __asm__ ( "divq %4\n\t"
229 : "=a" (qq), "=d" (rr)
230 : "a" (lo), "d" (hi), "rm" (d)
231 : "cc"
232 );
233
234 *q = qq;
235 *r = rr;
236}
237/* END GCC ASM */
238#elif defined(MASM)
239#include <intrin.h>
240#pragma intrinsic(_umul128)
241
242static inline void
243_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
244{
245 *lo = _umul128(a, b, hi);
246}
247
248void _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
249 mpd_uint_t d);
250
251/* END MASM (_MSC_VER) */
252#else
253 #error "need platform specific 128 bit multiplication and division"
254#endif
255
256#define DIVMOD(q, r, v, d) *q = v / d; *r = v - *q * d
257static inline void
258_mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
259{
260 assert(exp <= 19);
261
262 if (exp <= 9) {
263 if (exp <= 4) {
264 switch (exp) {
265 case 0: *q = v; *r = 0; break;
266 case 1: DIVMOD(q, r, v, 10UL); break;
267 case 2: DIVMOD(q, r, v, 100UL); break;
268 case 3: DIVMOD(q, r, v, 1000UL); break;
269 case 4: DIVMOD(q, r, v, 10000UL); break;
270 }
271 }
272 else {
273 switch (exp) {
274 case 5: DIVMOD(q, r, v, 100000UL); break;
275 case 6: DIVMOD(q, r, v, 1000000UL); break;
276 case 7: DIVMOD(q, r, v, 10000000UL); break;
277 case 8: DIVMOD(q, r, v, 100000000UL); break;
278 case 9: DIVMOD(q, r, v, 1000000000UL); break;
279 }
280 }
281 }
282 else {
283 if (exp <= 14) {
284 switch (exp) {
285 case 10: DIVMOD(q, r, v, 10000000000ULL); break;
286 case 11: DIVMOD(q, r, v, 100000000000ULL); break;
287 case 12: DIVMOD(q, r, v, 1000000000000ULL); break;
288 case 13: DIVMOD(q, r, v, 10000000000000ULL); break;
289 case 14: DIVMOD(q, r, v, 100000000000000ULL); break;
290 }
291 }
292 else {
293 switch (exp) {
294 case 15: DIVMOD(q, r, v, 1000000000000000ULL); break;
295 case 16: DIVMOD(q, r, v, 10000000000000000ULL); break;
296 case 17: DIVMOD(q, r, v, 100000000000000000ULL); break;
297 case 18: DIVMOD(q, r, v, 1000000000000000000ULL); break;
298 case 19: DIVMOD(q, r, v, 10000000000000000000ULL); break; /* GCOV_NOT_REACHED */
299 }
300 }
301 }
302}
303
304/* END CONFIG_64 */
305#elif defined(CONFIG_32)
306#if defined(ANSI)
307#if !defined(LEGACY_COMPILER)
308static inline void
309_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
310{
311 mpd_uuint_t hl;
312
313 hl = (mpd_uuint_t)a * b;
314
315 *hi = hl >> 32;
316 *lo = (mpd_uint_t)hl;
317}
318
319static inline void
320_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
321 mpd_uint_t d)
322{
323 mpd_uuint_t hl;
324
325 hl = ((mpd_uuint_t)hi<<32) + lo;
326 *q = (mpd_uint_t)(hl / d); /* quotient is known to fit */
327 *r = (mpd_uint_t)(hl - (mpd_uuint_t)(*q) * d);
328}
329/* END ANSI + uint64_t */
330#else
331static inline void
332_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
333{
334 uint16_t w[4], carry;
335 uint16_t ah, al, bh, bl;
336 uint32_t hl;
337
338 ah = (uint16_t)(a>>16); al = (uint16_t)a;
339 bh = (uint16_t)(b>>16); bl = (uint16_t)b;
340
341 hl = (uint32_t)al * bl;
342 w[0] = (uint16_t)hl;
343 carry = (uint16_t)(hl>>16);
344
345 hl = (uint32_t)ah * bl + carry;
346 w[1] = (uint16_t)hl;
347 w[2] = (uint16_t)(hl>>16);
348
349 hl = (uint32_t)al * bh + w[1];
350 w[1] = (uint16_t)hl;
351 carry = (uint16_t)(hl>>16);
352
353 hl = ((uint32_t)ah * bh + w[2]) + carry;
354 w[2] = (uint16_t)hl;
355 w[3] = (uint16_t)(hl>>16);
356
357 *hi = ((uint32_t)w[3]<<16) + w[2];
358 *lo = ((uint32_t)w[1]<<16) + w[0];
359}
360
361/*
362 * By Henry S. Warren: http://www.hackersdelight.org/HDcode/divlu.c.txt
363 * http://www.hackersdelight.org/permissions.htm:
364 * "You are free to use, copy, and distribute any of the code on this web
365 * site, whether modified by you or not. You need not give attribution."
366 *
367 * Slightly modified, comments are mine.
368 */
369static inline int
370nlz(uint32_t x)
371{
372 int n;
373
374 if (x == 0) return(32);
375
376 n = 0;
377 if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
378 if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
379 if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
380 if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
381 if (x <= 0x7FFFFFFF) {n = n + 1;}
382
383 return n;
384}
385
386static inline void
387_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0,
388 mpd_uint_t v)
389{
390 const mpd_uint_t b = 65536;
391 mpd_uint_t un1, un0,
392 vn1, vn0,
393 q1, q0,
394 un32, un21, un10,
395 rhat, t;
396 int s;
397
398 assert(u1 < v);
399
400 s = nlz(v);
401 v = v << s;
402 vn1 = v >> 16;
403 vn0 = v & 0xFFFF;
404
405 t = (s == 0) ? 0 : u0 >> (32 - s);
406 un32 = (u1 << s) | t;
407 un10 = u0 << s;
408
409 un1 = un10 >> 16;
410 un0 = un10 & 0xFFFF;
411
412 q1 = un32 / vn1;
413 rhat = un32 - q1*vn1;
414again1:
415 if (q1 >= b || q1*vn0 > b*rhat + un1) {
416 q1 = q1 - 1;
417 rhat = rhat + vn1;
418 if (rhat < b) goto again1;
419 }
420
421 /*
422 * Before again1 we had:
423 * (1) q1*vn1 + rhat = un32
424 * (2) q1*vn1*b + rhat*b + un1 = un32*b + un1
425 *
426 * The statements inside the if-clause do not change the value
427 * of the left-hand side of (2), and the loop is only exited
428 * if q1*vn0 <= rhat*b + un1, so:
429 *
430 * (3) q1*vn1*b + q1*vn0 <= un32*b + un1
431 * (4) q1*v <= un32*b + un1
432 * (5) 0 <= un32*b + un1 - q1*v
433 *
434 * By (5) we are certain that the possible add-back step from
435 * Knuth's algorithm D is never required.
436 *
437 * Since the final quotient is less than 2**32, the following
438 * must be true:
439 *
440 * (6) un32*b + un1 - q1*v <= UINT32_MAX
441 *
442 * This means that in the following line, the high words
443 * of un32*b and q1*v can be discarded without any effect
444 * on the result.
445 */
446 un21 = un32*b + un1 - q1*v;
447
448 q0 = un21 / vn1;
449 rhat = un21 - q0*vn1;
450again2:
451 if (q0 >= b || q0*vn0 > b*rhat + un0) {
452 q0 = q0 - 1;
453 rhat = rhat + vn1;
454 if (rhat < b) goto again2;
455 }
456
457 *q = q1*b + q0;
458 *r = (un21*b + un0 - q0*v) >> s;
459}
460#endif /* END ANSI + LEGACY_COMPILER */
461
462/* END ANSI */
463#elif defined(ASM)
464static inline void
465_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
466{
467 mpd_uint_t h, l;
468
469 __asm__ ( "mull %3\n\t"
470 : "=d" (h), "=a" (l)
471 : "%a" (a), "rm" (b)
472 : "cc"
473 );
474
475 *hi = h;
476 *lo = l;
477}
478
479static inline void
480_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
481 mpd_uint_t d)
482{
483 mpd_uint_t qq, rr;
484
485 __asm__ ( "divl %4\n\t"
486 : "=a" (qq), "=d" (rr)
487 : "a" (lo), "d" (hi), "rm" (d)
488 : "cc"
489 );
490
491 *q = qq;
492 *r = rr;
493}
494/* END GCC ASM */
495#elif defined(MASM)
496static inline void __cdecl
497_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
498{
499 mpd_uint_t h, l;
500
501 __asm {
502 mov eax, a
503 mul b
504 mov h, edx
505 mov l, eax
506 }
507
508 *hi = h;
509 *lo = l;
510}
511
512static inline void __cdecl
513_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
514 mpd_uint_t d)
515{
516 mpd_uint_t qq, rr;
517
518 __asm {
519 mov eax, lo
520 mov edx, hi
521 div d
522 mov qq, eax
523 mov rr, edx
524 }
525
526 *q = qq;
527 *r = rr;
528}
529/* END MASM (_MSC_VER) */
530#else
531 #error "need platform specific 64 bit multiplication and division"
532#endif
533
534#define DIVMOD(q, r, v, d) *q = v / d; *r = v - *q * d
535static inline void
536_mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
537{
538 assert(exp <= 9);
539
540 if (exp <= 4) {
541 switch (exp) {
542 case 0: *q = v; *r = 0; break;
543 case 1: DIVMOD(q, r, v, 10UL); break;
544 case 2: DIVMOD(q, r, v, 100UL); break;
545 case 3: DIVMOD(q, r, v, 1000UL); break;
546 case 4: DIVMOD(q, r, v, 10000UL); break;
547 }
548 }
549 else {
550 switch (exp) {
551 case 5: DIVMOD(q, r, v, 100000UL); break;
552 case 6: DIVMOD(q, r, v, 1000000UL); break;
553 case 7: DIVMOD(q, r, v, 10000000UL); break;
554 case 8: DIVMOD(q, r, v, 100000000UL); break;
555 case 9: DIVMOD(q, r, v, 1000000000UL); break; /* GCOV_NOT_REACHED */
556 }
557 }
558}
559/* END CONFIG_32 */
560
561/* NO CONFIG */
562#else
563 #error "define CONFIG_64 or CONFIG_32"
564#endif /* CONFIG */
565
566
567static inline void
568_mpd_div_word(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t d)
569{
570 *q = v / d;
571 *r = v - *q * d;
572}
573
574static inline void
575_mpd_idiv_word(mpd_ssize_t *q, mpd_ssize_t *r, mpd_ssize_t v, mpd_ssize_t d)
576{
577 *q = v / d;
578 *r = v - *q * d;
579}
580
581
582/** ------------------------------------------------------------
583 ** Arithmetic with overflow checking
584 ** ------------------------------------------------------------
585 */
586
587/* The following macros do call exit() in case of an overflow.
588 If the library is used correctly (i.e. with valid context
589 parameters), such overflows cannot occur. The macros are used
590 as sanity checks in a couple of strategic places and should
591 be viewed as a handwritten version of gcc's -ftrapv option. */
592
593static inline mpd_size_t
594add_size_t(mpd_size_t a, mpd_size_t b)
595{
596 if (a > MPD_SIZE_MAX - b) {
597 mpd_err_fatal("add_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
598 }
599 return a + b;
600}
601
602static inline mpd_size_t
603sub_size_t(mpd_size_t a, mpd_size_t b)
604{
605 if (b > a) {
606 mpd_err_fatal("sub_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
607 }
608 return a - b;
609}
610
611#if MPD_SIZE_MAX != MPD_UINT_MAX
612 #error "adapt mul_size_t() and mulmod_size_t()"
613#endif
614
615static inline mpd_size_t
616mul_size_t(mpd_size_t a, mpd_size_t b)
617{
618 mpd_uint_t hi, lo;
619
620 _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b);
621 if (hi) {
622 mpd_err_fatal("mul_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
623 }
624 return lo;
625}
626
627static inline mpd_size_t
628add_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow)
629{
630 mpd_size_t ret;
631
632 *overflow = 0;
633 ret = a + b;
634 if (ret < a) *overflow = 1;
635 return ret;
636}
637
638static inline mpd_size_t
639mul_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow)
640{
641 mpd_uint_t hi, lo;
642
643 _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b);
644 *overflow = (mpd_size_t)hi;
645 return lo;
646}
647
648static inline mpd_ssize_t
649mod_mpd_ssize_t(mpd_ssize_t a, mpd_ssize_t m)
650{
651 mpd_ssize_t r = a % m;
652 return (r < 0) ? r + m : r;
653}
654
655static inline mpd_size_t
656mulmod_size_t(mpd_size_t a, mpd_size_t b, mpd_size_t m)
657{
658 mpd_uint_t hi, lo;
659 mpd_uint_t q, r;
660
661 _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b);
662 _mpd_div_words(&q, &r, hi, lo, (mpd_uint_t)m);
663
664 return r;
665}
666
667
668#endif /* LIBMPDEC_TYPEARITH_H_ */
669