jfdctfst.c source code [tensorflow/external/libjpeg_turbo/jfdctfst.c]

1	/*
2	* jfdctfst.c
3	*
4	* This file was part of the Independent JPEG Group's software:
5	* Copyright (C) 1994-1996, Thomas G. Lane.
6	* libjpeg-turbo Modifications:
7	* Copyright (C) 2015, D. R. Commander.
8	* For conditions of distribution and use, see the accompanying README.ijg
9	* file.
10	*
11	* This file contains a fast, not so accurate integer implementation of the
12	* forward DCT (Discrete Cosine Transform).
13	*
14	* A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
15	* on each column. Direct algorithms are also available, but they are
16	* much more complex and seem not to be any faster when reduced to code.
17	*
18	* This implementation is based on Arai, Agui, and Nakajima's algorithm for
19	* scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
20	* Japanese, but the algorithm is described in the Pennebaker & Mitchell
21	* JPEG textbook (see REFERENCES section in file README.ijg). The following
22	* code is based directly on figure 4-8 in P&M.
23	* While an 8-point DCT cannot be done in less than 11 multiplies, it is
24	* possible to arrange the computation so that many of the multiplies are
25	* simple scalings of the final outputs. These multiplies can then be
26	* folded into the multiplications or divisions by the JPEG quantization
27	* table entries. The AA&N method leaves only 5 multiplies and 29 adds
28	* to be done in the DCT itself.
29	* The primary disadvantage of this method is that with fixed-point math,
30	* accuracy is lost due to imprecise representation of the scaled
31	* quantization values. The smaller the quantization table entry, the less
32	* precise the scaled value, so this implementation does worse with high-
33	* quality-setting files than with low-quality ones.
34	*/
35
36	#define JPEG_INTERNALS
37	#include "jinclude.h"
38	#include "jpeglib.h"
39	#include "jdct.h" /* Private declarations for DCT subsystem */
40
41	#ifdef DCT_IFAST_SUPPORTED
42
43
44	/*
45	* This module is specialized to the case DCTSIZE = 8.
46	*/
47
48	#if DCTSIZE != 8
49	Sorry, this code only copes with `8x8` DCTs. / deliberate syntax err /
50	#endif
51
52
53	/ Scaling decisions are generally the same as in the LL&M algorithm;*
54	* see jfdctint.c for more details. However, we choose to descale
55	* (right shift) multiplication products as soon as they are formed,
56	* rather than carrying additional fractional bits into subsequent additions.
57	* This compromises accuracy slightly, but it lets us save a few shifts.
58	* More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
59	* everywhere except in the multiplications proper; this saves a good deal
60	* of work on 16-bit-int machines.
61	*
62	* Again to save a few shifts, the intermediate results between pass 1 and
63	* pass 2 are not upscaled, but are represented only to integral precision.
64	*
65	* A final compromise is to represent the multiplicative constants to only
66	* 8 fractional bits, rather than 13. This saves some shifting work on some
67	* machines, and may also reduce the cost of multiplication (since there
68	* are fewer one-bits in the constants).
69	*/
70
71	#define CONST_BITS 8
72
73
74	/ Some C compilers fail to reduce "FIX(constant)" at compile time, thus*
75	* causing a lot of useless floating-point operations at run time.
76	* To get around this we use the following pre-calculated constants.
77	* If you change CONST_BITS you may want to add appropriate values.
78	* (With a reasonable C compiler, you can just rely on the FIX() macro...)
79	*/
80
81	#if CONST_BITS == 8
82	#define FIX_0_382683433 ((JLONG)98) /* FIX(0.382683433) */
83	#define FIX_0_541196100 ((JLONG)139) /* FIX(0.541196100) */
84	#define FIX_0_707106781 ((JLONG)181) /* FIX(0.707106781) */
85	#define FIX_1_306562965 ((JLONG)334) /* FIX(1.306562965) */
86	#else
87	#define FIX_0_382683433 FIX(0.382683433)
88	#define FIX_0_541196100 FIX(0.541196100)
89	#define FIX_0_707106781 FIX(0.707106781)
90	#define FIX_1_306562965 FIX(1.306562965)
91	#endif
92
93
94	/ We can gain a little more speed, with a further compromise in accuracy,*
95	* by omitting the addition in a descaling shift. This yields an incorrectly
96	* rounded result half the time...
97	*/
98
99	#ifndef USE_ACCURATE_ROUNDING
100	#undef DESCALE
101	#define DESCALE(x, n) RIGHT_SHIFT(x, n)
102	#endif
103
104
105	/ Multiply a DCTELEM variable by an JLONG constant, and immediately*
106	* descale to yield a DCTELEM result.
107	*/
108
109	#define MULTIPLY(var, const) ((DCTELEM)DESCALE((var) * (const), CONST_BITS))
110
111
112	/*
113	* Perform the forward DCT on one block of samples.
114	*/
115
116	GLOBAL(void)
117	jpeg_fdct_ifast(DCTELEM *data)
118	{
119	DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
120	DCTELEM tmp10, tmp11, tmp12, tmp13;
121	DCTELEM z1, z2, z3, z4, z5, z11, z13;
122	DCTELEM *dataptr;
123	int ctr;
124	SHIFT_TEMPS
125
126	/ Pass 1: process rows. /
127
128	dataptr = data;
129	for (ctr = DCTSIZE - `1`; ctr >= `0`; ctr--) {
130	tmp0 = dataptr[`0`] + dataptr[`7`];
131	tmp7 = dataptr[`0`] - dataptr[`7`];
132	tmp1 = dataptr[`1`] + dataptr[`6`];
133	tmp6 = dataptr[`1`] - dataptr[`6`];
134	tmp2 = dataptr[`2`] + dataptr[`5`];
135	tmp5 = dataptr[`2`] - dataptr[`5`];
136	tmp3 = dataptr[`3`] + dataptr[`4`];
137	tmp4 = dataptr[`3`] - dataptr[`4`];
138
139	/ Even part /
140
141	tmp10 = tmp0 + tmp3; / phase 2 /
142	tmp13 = tmp0 - tmp3;
143	tmp11 = tmp1 + tmp2;
144	tmp12 = tmp1 - tmp2;
145
146	dataptr[`0`] = tmp10 + tmp11; / phase 3 /
147	dataptr[`4`] = tmp10 - tmp11;
148
149	z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); / c4 /
150	dataptr[`2`] = tmp13 + z1; / phase 5 /
151	dataptr[`6`] = tmp13 - z1;
152
153	/ Odd part /
154
155	tmp10 = tmp4 + tmp5; / phase 2 /
156	tmp11 = tmp5 + tmp6;
157	tmp12 = tmp6 + tmp7;
158
159	/ The rotator is modified from fig 4-8 to avoid extra negations. /
160	z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); / c6 /
161	z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; / c2-c6 /
162	z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; / c2+c6 /
163	z3 = MULTIPLY(tmp11, FIX_0_707106781); / c4 /
164
165	z11 = tmp7 + z3; / phase 5 /
166	z13 = tmp7 - z3;
167
168	dataptr[`5`] = z13 + z2; / phase 6 /
169	dataptr[`3`] = z13 - z2;
170	dataptr[`1`] = z11 + z4;
171	dataptr[`7`] = z11 - z4;
172
173	dataptr += DCTSIZE; / advance pointer to next row /
174	}
175
176	/ Pass 2: process columns. /
177
178	dataptr = data;
179	for (ctr = DCTSIZE - `1`; ctr >= `0`; ctr--) {
180	tmp0 = dataptr[DCTSIZE * `0`] + dataptr[DCTSIZE * `7`];
181	tmp7 = dataptr[DCTSIZE * `0`] - dataptr[DCTSIZE * `7`];
182	tmp1 = dataptr[DCTSIZE * `1`] + dataptr[DCTSIZE * `6`];
183	tmp6 = dataptr[DCTSIZE * `1`] - dataptr[DCTSIZE * `6`];
184	tmp2 = dataptr[DCTSIZE * `2`] + dataptr[DCTSIZE * `5`];
185	tmp5 = dataptr[DCTSIZE * `2`] - dataptr[DCTSIZE * `5`];
186	tmp3 = dataptr[DCTSIZE * `3`] + dataptr[DCTSIZE * `4`];
187	tmp4 = dataptr[DCTSIZE * `3`] - dataptr[DCTSIZE * `4`];
188
189	/ Even part /
190
191	tmp10 = tmp0 + tmp3; / phase 2 /
192	tmp13 = tmp0 - tmp3;
193	tmp11 = tmp1 + tmp2;
194	tmp12 = tmp1 - tmp2;
195
196	dataptr[DCTSIZE * `0`] = tmp10 + tmp11; / phase 3 /
197	dataptr[DCTSIZE * `4`] = tmp10 - tmp11;
198
199	z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); / c4 /
200	dataptr[DCTSIZE * `2`] = tmp13 + z1; / phase 5 /
201	dataptr[DCTSIZE * `6`] = tmp13 - z1;
202
203	/ Odd part /
204
205	tmp10 = tmp4 + tmp5; / phase 2 /
206	tmp11 = tmp5 + tmp6;
207	tmp12 = tmp6 + tmp7;
208
209	/ The rotator is modified from fig 4-8 to avoid extra negations. /
210	z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); / c6 /
211	z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; / c2-c6 /
212	z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; / c2+c6 /
213	z3 = MULTIPLY(tmp11, FIX_0_707106781); / c4 /
214
215	z11 = tmp7 + z3; / phase 5 /
216	z13 = tmp7 - z3;
217
218	dataptr[DCTSIZE * `5`] = z13 + z2; / phase 6 /
219	dataptr[DCTSIZE * `3`] = z13 - z2;
220	dataptr[DCTSIZE * `1`] = z11 + z4;
221	dataptr[DCTSIZE * `7`] = z11 - z4;
222
223	dataptr++; / advance pointer to next column /
224	}
225	}
226
227	#endif /* DCT_IFAST_SUPPORTED */
228

Browse the source code of tensorflow/external/libjpeg_turbo/jfdctfst.c