jidctred.c source code [tensorflow/external/libjpeg_turbo/jidctred.c]

1	/*
2	* jidctred.c
3	*
4	* This file was part of the Independent JPEG Group's software:
5	* Copyright (C) 1994-1998, Thomas G. Lane.
6	* libjpeg-turbo Modifications:
7	* Copyright (C) 2015, D. R. Commander.
8	* For conditions of distribution and use, see the accompanying README.ijg
9	* file.
10	*
11	* This file contains inverse-DCT routines that produce reduced-size output:
12	* either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block.
13	*
14	* The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M)
15	* algorithm used in jidctint.c. We simply replace each 8-to-8 1-D IDCT step
16	* with an 8-to-4 step that produces the four averages of two adjacent outputs
17	* (or an 8-to-2 step producing two averages of four outputs, for 2x2 output).
18	* These steps were derived by computing the corresponding values at the end
19	* of the normal LL&M code, then simplifying as much as possible.
20	*
21	* 1x1 is trivial: just take the DC coefficient divided by 8.
22	*
23	* See jidctint.c for additional comments.
24	*/
25
26	#define JPEG_INTERNALS
27	#include "jinclude.h"
28	#include "jpeglib.h"
29	#include "jdct.h" /* Private declarations for DCT subsystem */
30
31	#ifdef IDCT_SCALING_SUPPORTED
32
33
34	/*
35	* This module is specialized to the case DCTSIZE = 8.
36	*/
37
38	#if DCTSIZE != 8
39	Sorry, this code only copes with `8x8` DCTs. / deliberate syntax err /
40	#endif
41
42
43	/ Scaling is the same as in jidctint.c. /
44
45	#if BITS_IN_JSAMPLE == 8
46	#define CONST_BITS 13
47	#define PASS1_BITS 2
48	#else
49	#define CONST_BITS 13
50	#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
51	#endif
52
53	/ Some C compilers fail to reduce "FIX(constant)" at compile time, thus*
54	* causing a lot of useless floating-point operations at run time.
55	* To get around this we use the following pre-calculated constants.
56	* If you change CONST_BITS you may want to add appropriate values.
57	* (With a reasonable C compiler, you can just rely on the FIX() macro...)
58	*/
59
60	#if CONST_BITS == 13
61	#define FIX_0_211164243 ((JLONG)1730) /* FIX(0.211164243) */
62	#define FIX_0_509795579 ((JLONG)4176) /* FIX(0.509795579) */
63	#define FIX_0_601344887 ((JLONG)4926) /* FIX(0.601344887) */
64	#define FIX_0_720959822 ((JLONG)5906) /* FIX(0.720959822) */
65	#define FIX_0_765366865 ((JLONG)6270) /* FIX(0.765366865) */
66	#define FIX_0_850430095 ((JLONG)6967) /* FIX(0.850430095) */
67	#define FIX_0_899976223 ((JLONG)7373) /* FIX(0.899976223) */
68	#define FIX_1_061594337 ((JLONG)8697) /* FIX(1.061594337) */
69	#define FIX_1_272758580 ((JLONG)10426) /* FIX(1.272758580) */
70	#define FIX_1_451774981 ((JLONG)11893) /* FIX(1.451774981) */
71	#define FIX_1_847759065 ((JLONG)15137) /* FIX(1.847759065) */
72	#define FIX_2_172734803 ((JLONG)17799) /* FIX(2.172734803) */
73	#define FIX_2_562915447 ((JLONG)20995) /* FIX(2.562915447) */
74	#define FIX_3_624509785 ((JLONG)29692) /* FIX(3.624509785) */
75	#else
76	#define FIX_0_211164243 FIX(0.211164243)
77	#define FIX_0_509795579 FIX(0.509795579)
78	#define FIX_0_601344887 FIX(0.601344887)
79	#define FIX_0_720959822 FIX(0.720959822)
80	#define FIX_0_765366865 FIX(0.765366865)
81	#define FIX_0_850430095 FIX(0.850430095)
82	#define FIX_0_899976223 FIX(0.899976223)
83	#define FIX_1_061594337 FIX(1.061594337)
84	#define FIX_1_272758580 FIX(1.272758580)
85	#define FIX_1_451774981 FIX(1.451774981)
86	#define FIX_1_847759065 FIX(1.847759065)
87	#define FIX_2_172734803 FIX(2.172734803)
88	#define FIX_2_562915447 FIX(2.562915447)
89	#define FIX_3_624509785 FIX(3.624509785)
90	#endif
91
92
93	/ Multiply a JLONG variable by a JLONG constant to yield a JLONG result.*
94	* For 8-bit samples with the recommended scaling, all the variable
95	* and constant values involved are no more than 16 bits wide, so a
96	* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
97	* For 12-bit samples, a full 32-bit multiplication will be needed.
98	*/
99
100	#if BITS_IN_JSAMPLE == 8
101	#define MULTIPLY(var, const) MULTIPLY16C16(var, const)
102	#else
103	#define MULTIPLY(var, const) ((var) * (const))
104	#endif
105
106
107	/ Dequantize a coefficient by multiplying it by the multiplier-table*
108	* entry; produce an int result. In this module, both inputs and result
109	* are 16 bits or less, so either int or short multiply will work.
110	*/
111
112	#define DEQUANTIZE(coef, quantval) (((ISLOW_MULT_TYPE)(coef)) * (quantval))
113
114
115	/*
116	* Perform dequantization and inverse DCT on one block of coefficients,
117	* producing a reduced-size 4x4 output block.
118	*/
119
120	GLOBAL(void)
121	jpeg_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
122	JCOEFPTR coef_block, JSAMPARRAY output_buf,
123	JDIMENSION output_col)
124	{
125	JLONG tmp0, tmp2, tmp10, tmp12;
126	JLONG z1, z2, z3, z4;
127	JCOEFPTR inptr;
128	ISLOW_MULT_TYPE *quantptr;
129	int *wsptr;
130	JSAMPROW outptr;
131	JSAMPLE *range_limit = IDCT_range_limit(cinfo);
132	int ctr;
133	int workspace[DCTSIZE * `4`]; / buffers data between passes /
134	SHIFT_TEMPS
135
136	/ Pass 1: process columns from input, store into work array. /
137
138	inptr = coef_block;
139	quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
140	wsptr = workspace;
141	for (ctr = DCTSIZE; ctr > `0`; inptr++, quantptr++, wsptr++, ctr--) {
142	/ Don't bother to process column 4, because second pass won't use it /
143	if (ctr == DCTSIZE - `4`)
144	continue;
145	if (inptr[DCTSIZE * `1`] == `0` && inptr[DCTSIZE * `2`] == `0` &&
146	inptr[DCTSIZE * `3`] == `0` && inptr[DCTSIZE * `5`] == `0` &&
147	inptr[DCTSIZE * `6`] == `0` && inptr[DCTSIZE * `7`] == `0`) {
148	/ AC terms all zero; we need not examine term 4 for 4x4 output /
149	int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * `0`],
150	quantptr[DCTSIZE * `0`]), PASS1_BITS);
151
152	wsptr[DCTSIZE * `0`] = dcval;
153	wsptr[DCTSIZE * `1`] = dcval;
154	wsptr[DCTSIZE * `2`] = dcval;
155	wsptr[DCTSIZE * `3`] = dcval;
156
157	continue;
158	}
159
160	/ Even part /
161
162	tmp0 = DEQUANTIZE(inptr[DCTSIZE * `0`], quantptr[DCTSIZE * `0`]);
163	tmp0 = LEFT_SHIFT(tmp0, CONST_BITS + `1`);
164
165	z2 = DEQUANTIZE(inptr[DCTSIZE * `2`], quantptr[DCTSIZE * `2`]);
166	z3 = DEQUANTIZE(inptr[DCTSIZE * `6`], quantptr[DCTSIZE * `6`]);
167
168	tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, -FIX_0_765366865);
169
170	tmp10 = tmp0 + tmp2;
171	tmp12 = tmp0 - tmp2;
172
173	/ Odd part /
174
175	z1 = DEQUANTIZE(inptr[DCTSIZE * `7`], quantptr[DCTSIZE * `7`]);
176	z2 = DEQUANTIZE(inptr[DCTSIZE * `5`], quantptr[DCTSIZE * `5`]);
177	z3 = DEQUANTIZE(inptr[DCTSIZE * `3`], quantptr[DCTSIZE * `3`]);
178	z4 = DEQUANTIZE(inptr[DCTSIZE * `1`], quantptr[DCTSIZE * `1`]);
179
180	tmp0 = MULTIPLY(z1, -FIX_0_211164243) + / sqrt(2) * ( c3-c1) /
181	MULTIPLY(z2, FIX_1_451774981) + / sqrt(2) * ( c3+c7) /
182	MULTIPLY(z3, -FIX_2_172734803) + / sqrt(2) * (-c1-c5) /
183	MULTIPLY(z4, FIX_1_061594337); / sqrt(2) * ( c5+c7) /
184
185	tmp2 = MULTIPLY(z1, -FIX_0_509795579) + / sqrt(2) * (c7-c5) /
186	MULTIPLY(z2, -FIX_0_601344887) + / sqrt(2) * (c5-c1) /
187	MULTIPLY(z3, FIX_0_899976223) + / sqrt(2) * (c3-c7) /
188	MULTIPLY(z4, FIX_2_562915447); / sqrt(2) * (c1+c3) /
189
190	/ Final output stage /
191
192	wsptr[DCTSIZE * `0`] =
193	(int)DESCALE(tmp10 + tmp2, CONST_BITS - PASS1_BITS + `1`);
194	wsptr[DCTSIZE * `3`] =
195	(int)DESCALE(tmp10 - tmp2, CONST_BITS - PASS1_BITS + `1`);
196	wsptr[DCTSIZE * `1`] =
197	(int)DESCALE(tmp12 + tmp0, CONST_BITS - PASS1_BITS + `1`);
198	wsptr[DCTSIZE * `2`] =
199	(int)DESCALE(tmp12 - tmp0, CONST_BITS - PASS1_BITS + `1`);
200	}
201
202	/ Pass 2: process 4 rows from work array, store into output array. /
203
204	wsptr = workspace;
205	for (ctr = `0`; ctr < `4`; ctr++) {
206	outptr = output_buf[ctr] + output_col;
207	/ It's not clear whether a zero row test is worthwhile here ... /
208
209	#ifndef NO_ZERO_ROW_TEST
210	if (wsptr[`1`] == `0` && wsptr[`2`] == `0` && wsptr[`3`] == `0` &&
211	wsptr[`5`] == `0` && wsptr[`6`] == `0` && wsptr[`7`] == `0`) {
212	/ AC terms all zero /
213	JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[`0`],
214	PASS1_BITS + `3`) & RANGE_MASK];
215
216	outptr[`0`] = dcval;
217	outptr[`1`] = dcval;
218	outptr[`2`] = dcval;
219	outptr[`3`] = dcval;
220
221	wsptr += DCTSIZE; / advance pointer to next row /
222	continue;
223	}
224	#endif
225
226	/ Even part /
227
228	tmp0 = LEFT_SHIFT((JLONG)wsptr[`0`], CONST_BITS + `1`);
229
230	tmp2 = MULTIPLY((JLONG)wsptr[`2`], FIX_1_847759065) +
231	MULTIPLY((JLONG)wsptr[`6`], -FIX_0_765366865);
232
233	tmp10 = tmp0 + tmp2;
234	tmp12 = tmp0 - tmp2;
235
236	/ Odd part /
237
238	z1 = (JLONG)wsptr[`7`];
239	z2 = (JLONG)wsptr[`5`];
240	z3 = (JLONG)wsptr[`3`];
241	z4 = (JLONG)wsptr[`1`];
242
243	tmp0 = MULTIPLY(z1, -FIX_0_211164243) + / sqrt(2) * ( c3-c1) /
244	MULTIPLY(z2, FIX_1_451774981) + / sqrt(2) * ( c3+c7) /
245	MULTIPLY(z3, -FIX_2_172734803) + / sqrt(2) * (-c1-c5) /
246	MULTIPLY(z4, FIX_1_061594337); / sqrt(2) * ( c5+c7) /
247
248	tmp2 = MULTIPLY(z1, -FIX_0_509795579) + / sqrt(2) * (c7-c5) /
249	MULTIPLY(z2, -FIX_0_601344887) + / sqrt(2) * (c5-c1) /
250	MULTIPLY(z3, FIX_0_899976223) + / sqrt(2) * (c3-c7) /
251	MULTIPLY(z4, FIX_2_562915447); / sqrt(2) * (c1+c3) /
252
253	/ Final output stage /
254
255	outptr[`0`] = range_limit[(int)DESCALE(tmp10 + tmp2,
256	CONST_BITS + PASS1_BITS + `3` + `1`) &
257	RANGE_MASK];
258	outptr[`3`] = range_limit[(int)DESCALE(tmp10 - tmp2,
259	CONST_BITS + PASS1_BITS + `3` + `1`) &
260	RANGE_MASK];
261	outptr[`1`] = range_limit[(int)DESCALE(tmp12 + tmp0,
262	CONST_BITS + PASS1_BITS + `3` + `1`) &
263	RANGE_MASK];
264	outptr[`2`] = range_limit[(int)DESCALE(tmp12 - tmp0,
265	CONST_BITS + PASS1_BITS + `3` + `1`) &
266	RANGE_MASK];
267
268	wsptr += DCTSIZE; / advance pointer to next row /
269	}
270	}
271
272
273	/*
274	* Perform dequantization and inverse DCT on one block of coefficients,
275	* producing a reduced-size 2x2 output block.
276	*/
277
278	GLOBAL(void)
279	jpeg_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
280	JCOEFPTR coef_block, JSAMPARRAY output_buf,
281	JDIMENSION output_col)
282	{
283	JLONG tmp0, tmp10, z1;
284	JCOEFPTR inptr;
285	ISLOW_MULT_TYPE *quantptr;
286	int *wsptr;
287	JSAMPROW outptr;
288	JSAMPLE *range_limit = IDCT_range_limit(cinfo);
289	int ctr;
290	int workspace[DCTSIZE * `2`]; / buffers data between passes /
291	SHIFT_TEMPS
292
293	/ Pass 1: process columns from input, store into work array. /
294
295	inptr = coef_block;
296	quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
297	wsptr = workspace;
298	for (ctr = DCTSIZE; ctr > `0`; inptr++, quantptr++, wsptr++, ctr--) {
299	/ Don't bother to process columns 2,4,6 /
300	if (ctr == DCTSIZE - `2` \|\| ctr == DCTSIZE - `4` \|\| ctr == DCTSIZE - `6`)
301	continue;
302	if (inptr[DCTSIZE * `1`] == `0` && inptr[DCTSIZE * `3`] == `0` &&
303	inptr[DCTSIZE * `5`] == `0` && inptr[DCTSIZE * `7`] == `0`) {
304	/ AC terms all zero; we need not examine terms 2,4,6 for 2x2 output /
305	int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * `0`],
306	quantptr[DCTSIZE * `0`]), PASS1_BITS);
307
308	wsptr[DCTSIZE * `0`] = dcval;
309	wsptr[DCTSIZE * `1`] = dcval;
310
311	continue;
312	}
313
314	/ Even part /
315
316	z1 = DEQUANTIZE(inptr[DCTSIZE * `0`], quantptr[DCTSIZE * `0`]);
317	tmp10 = LEFT_SHIFT(z1, CONST_BITS + `2`);
318
319	/ Odd part /
320
321	z1 = DEQUANTIZE(inptr[DCTSIZE * `7`], quantptr[DCTSIZE * `7`]);
322	tmp0 = MULTIPLY(z1, -FIX_0_720959822); / sqrt(2) * ( c7-c5+c3-c1) /
323	z1 = DEQUANTIZE(inptr[DCTSIZE * `5`], quantptr[DCTSIZE * `5`]);
324	tmp0 += MULTIPLY(z1, FIX_0_850430095); / sqrt(2) * (-c1+c3+c5+c7) /
325	z1 = DEQUANTIZE(inptr[DCTSIZE * `3`], quantptr[DCTSIZE * `3`]);
326	tmp0 += MULTIPLY(z1, -FIX_1_272758580); / sqrt(2) * (-c1+c3-c5-c7) /
327	z1 = DEQUANTIZE(inptr[DCTSIZE * `1`], quantptr[DCTSIZE * `1`]);
328	tmp0 += MULTIPLY(z1, FIX_3_624509785); / sqrt(2) * ( c1+c3+c5+c7) /
329
330	/ Final output stage /
331
332	wsptr[DCTSIZE * `0`] =
333	(int)DESCALE(tmp10 + tmp0, CONST_BITS - PASS1_BITS + `2`);
334	wsptr[DCTSIZE * `1`] =
335	(int)DESCALE(tmp10 - tmp0, CONST_BITS - PASS1_BITS + `2`);
336	}
337
338	/ Pass 2: process 2 rows from work array, store into output array. /
339
340	wsptr = workspace;
341	for (ctr = `0`; ctr < `2`; ctr++) {
342	outptr = output_buf[ctr] + output_col;
343	/ It's not clear whether a zero row test is worthwhile here ... /
344
345	#ifndef NO_ZERO_ROW_TEST
346	if (wsptr[`1`] == `0` && wsptr[`3`] == `0` && wsptr[`5`] == `0` && wsptr[`7`] == `0`) {
347	/ AC terms all zero /
348	JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[`0`],
349	PASS1_BITS + `3`) & RANGE_MASK];
350
351	outptr[`0`] = dcval;
352	outptr[`1`] = dcval;
353
354	wsptr += DCTSIZE; / advance pointer to next row /
355	continue;
356	}
357	#endif
358
359	/ Even part /
360
361	tmp10 = LEFT_SHIFT((JLONG)wsptr[`0`], CONST_BITS + `2`);
362
363	/ Odd part /
364
365	tmp0 = MULTIPLY((JLONG)wsptr[`7`], -FIX_0_720959822) + / sqrt(2) * ( c7-c5+c3-c1) /
366	MULTIPLY((JLONG)wsptr[`5`], FIX_0_850430095) + / sqrt(2) * (-c1+c3+c5+c7) /
367	MULTIPLY((JLONG)wsptr[`3`], -FIX_1_272758580) + / sqrt(2) * (-c1+c3-c5-c7) /
368	MULTIPLY((JLONG)wsptr[`1`], FIX_3_624509785); / sqrt(2) * ( c1+c3+c5+c7) /
369
370	/ Final output stage /
371
372	outptr[`0`] = range_limit[(int)DESCALE(tmp10 + tmp0,
373	CONST_BITS + PASS1_BITS + `3` + `2`) &
374	RANGE_MASK];
375	outptr[`1`] = range_limit[(int)DESCALE(tmp10 - tmp0,
376	CONST_BITS + PASS1_BITS + `3` + `2`) &
377	RANGE_MASK];
378
379	wsptr += DCTSIZE; / advance pointer to next row /
380	}
381	}
382
383
384	/*
385	* Perform dequantization and inverse DCT on one block of coefficients,
386	* producing a reduced-size 1x1 output block.
387	*/
388
389	GLOBAL(void)
390	jpeg_idct_1x1(j_decompress_ptr cinfo, jpeg_component_info *compptr,
391	JCOEFPTR coef_block, JSAMPARRAY output_buf,
392	JDIMENSION output_col)
393	{
394	int dcval;
395	ISLOW_MULT_TYPE *quantptr;
396	JSAMPLE *range_limit = IDCT_range_limit(cinfo);
397	SHIFT_TEMPS
398
399	/ We hardly need an inverse DCT routine for this: just take the*
400	* average pixel value, which is one-eighth of the DC coefficient.
401	*/
402	quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
403	dcval = DEQUANTIZE(coef_block[`0`], quantptr[`0`]);
404	dcval = (int)DESCALE((JLONG)dcval, `3`);
405
406	output_buf[`0`][output_col] = range_limit[dcval & RANGE_MASK];
407	}
408
409	#endif /* IDCT_SCALING_SUPPORTED */
410

Browse the source code of tensorflow/external/libjpeg_turbo/jidctred.c