jfdctflt.c source code [tensorflow/external/libjpeg_turbo/jfdctflt.c]

1	/*
2	* jfdctflt.c
3	*
4	* Copyright (C) 1994-1996, Thomas G. Lane.
5	* This file is part of the Independent JPEG Group's software.
6	* For conditions of distribution and use, see the accompanying README.ijg
7	* file.
8	*
9	* This file contains a floating-point implementation of the
10	* forward DCT (Discrete Cosine Transform).
11	*
12	* This implementation should be more accurate than either of the integer
13	* DCT implementations. However, it may not give the same results on all
14	* machines because of differences in roundoff behavior. Speed will depend
15	* on the hardware's floating point capacity.
16	*
17	* A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
18	* on each column. Direct algorithms are also available, but they are
19	* much more complex and seem not to be any faster when reduced to code.
20	*
21	* This implementation is based on Arai, Agui, and Nakajima's algorithm for
22	* scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
23	* Japanese, but the algorithm is described in the Pennebaker & Mitchell
24	* JPEG textbook (see REFERENCES section in file README.ijg). The following
25	* code is based directly on figure 4-8 in P&M.
26	* While an 8-point DCT cannot be done in less than 11 multiplies, it is
27	* possible to arrange the computation so that many of the multiplies are
28	* simple scalings of the final outputs. These multiplies can then be
29	* folded into the multiplications or divisions by the JPEG quantization
30	* table entries. The AA&N method leaves only 5 multiplies and 29 adds
31	* to be done in the DCT itself.
32	* The primary disadvantage of this method is that with a fixed-point
33	* implementation, accuracy is lost due to imprecise representation of the
34	* scaled quantization values. However, that problem does not arise if
35	* we use floating point arithmetic.
36	*/
37
38	#define JPEG_INTERNALS
39	#include "jinclude.h"
40	#include "jpeglib.h"
41	#include "jdct.h" /* Private declarations for DCT subsystem */
42
43	#ifdef DCT_FLOAT_SUPPORTED
44
45
46	/*
47	* This module is specialized to the case DCTSIZE = 8.
48	*/
49
50	#if DCTSIZE != 8
51	Sorry, this code only copes with `8x8` DCTs. / deliberate syntax err /
52	#endif
53
54
55	/*
56	* Perform the forward DCT on one block of samples.
57	*/
58
59	GLOBAL(void)
60	jpeg_fdct_float(FAST_FLOAT *data)
61	{
62	FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
63	FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
64	FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
65	FAST_FLOAT *dataptr;
66	int ctr;
67
68	/ Pass 1: process rows. /
69
70	dataptr = data;
71	for (ctr = DCTSIZE - `1`; ctr >= `0`; ctr--) {
72	tmp0 = dataptr[`0`] + dataptr[`7`];
73	tmp7 = dataptr[`0`] - dataptr[`7`];
74	tmp1 = dataptr[`1`] + dataptr[`6`];
75	tmp6 = dataptr[`1`] - dataptr[`6`];
76	tmp2 = dataptr[`2`] + dataptr[`5`];
77	tmp5 = dataptr[`2`] - dataptr[`5`];
78	tmp3 = dataptr[`3`] + dataptr[`4`];
79	tmp4 = dataptr[`3`] - dataptr[`4`];
80
81	/ Even part /
82
83	tmp10 = tmp0 + tmp3; / phase 2 /
84	tmp13 = tmp0 - tmp3;
85	tmp11 = tmp1 + tmp2;
86	tmp12 = tmp1 - tmp2;
87
88	dataptr[`0`] = tmp10 + tmp11; / phase 3 /
89	dataptr[`4`] = tmp10 - tmp11;
90
91	z1 = (tmp12 + tmp13) * ((FAST_FLOAT)`0.707106781`); / c4 /
92	dataptr[`2`] = tmp13 + z1; / phase 5 /
93	dataptr[`6`] = tmp13 - z1;
94
95	/ Odd part /
96
97	tmp10 = tmp4 + tmp5; / phase 2 /
98	tmp11 = tmp5 + tmp6;
99	tmp12 = tmp6 + tmp7;
100
101	/ The rotator is modified from fig 4-8 to avoid extra negations. /
102	z5 = (tmp10 - tmp12) * ((FAST_FLOAT)`0.382683433`); / c6 /
103	z2 = ((FAST_FLOAT)`0.541196100`) * tmp10 + z5; / c2-c6 /
104	z4 = ((FAST_FLOAT)`1.306562965`) * tmp12 + z5; / c2+c6 /
105	z3 = tmp11 * ((FAST_FLOAT)`0.707106781`); / c4 /
106
107	z11 = tmp7 + z3; / phase 5 /
108	z13 = tmp7 - z3;
109
110	dataptr[`5`] = z13 + z2; / phase 6 /
111	dataptr[`3`] = z13 - z2;
112	dataptr[`1`] = z11 + z4;
113	dataptr[`7`] = z11 - z4;
114
115	dataptr += DCTSIZE; / advance pointer to next row /
116	}
117
118	/ Pass 2: process columns. /
119
120	dataptr = data;
121	for (ctr = DCTSIZE - `1`; ctr >= `0`; ctr--) {
122	tmp0 = dataptr[DCTSIZE * `0`] + dataptr[DCTSIZE * `7`];
123	tmp7 = dataptr[DCTSIZE * `0`] - dataptr[DCTSIZE * `7`];
124	tmp1 = dataptr[DCTSIZE * `1`] + dataptr[DCTSIZE * `6`];
125	tmp6 = dataptr[DCTSIZE * `1`] - dataptr[DCTSIZE * `6`];
126	tmp2 = dataptr[DCTSIZE * `2`] + dataptr[DCTSIZE * `5`];
127	tmp5 = dataptr[DCTSIZE * `2`] - dataptr[DCTSIZE * `5`];
128	tmp3 = dataptr[DCTSIZE * `3`] + dataptr[DCTSIZE * `4`];
129	tmp4 = dataptr[DCTSIZE * `3`] - dataptr[DCTSIZE * `4`];
130
131	/ Even part /
132
133	tmp10 = tmp0 + tmp3; / phase 2 /
134	tmp13 = tmp0 - tmp3;
135	tmp11 = tmp1 + tmp2;
136	tmp12 = tmp1 - tmp2;
137
138	dataptr[DCTSIZE * `0`] = tmp10 + tmp11; / phase 3 /
139	dataptr[DCTSIZE * `4`] = tmp10 - tmp11;
140
141	z1 = (tmp12 + tmp13) * ((FAST_FLOAT)`0.707106781`); / c4 /
142	dataptr[DCTSIZE * `2`] = tmp13 + z1; / phase 5 /
143	dataptr[DCTSIZE * `6`] = tmp13 - z1;
144
145	/ Odd part /
146
147	tmp10 = tmp4 + tmp5; / phase 2 /
148	tmp11 = tmp5 + tmp6;
149	tmp12 = tmp6 + tmp7;
150
151	/ The rotator is modified from fig 4-8 to avoid extra negations. /
152	z5 = (tmp10 - tmp12) * ((FAST_FLOAT)`0.382683433`); / c6 /
153	z2 = ((FAST_FLOAT)`0.541196100`) * tmp10 + z5; / c2-c6 /
154	z4 = ((FAST_FLOAT)`1.306562965`) * tmp12 + z5; / c2+c6 /
155	z3 = tmp11 * ((FAST_FLOAT)`0.707106781`); / c4 /
156
157	z11 = tmp7 + z3; / phase 5 /
158	z13 = tmp7 - z3;
159
160	dataptr[DCTSIZE * `5`] = z13 + z2; / phase 6 /
161	dataptr[DCTSIZE * `3`] = z13 - z2;
162	dataptr[DCTSIZE * `1`] = z11 + z4;
163	dataptr[DCTSIZE * `7`] = z11 - z4;
164
165	dataptr++; / advance pointer to next column /
166	}
167	}
168
169	#endif /* DCT_FLOAT_SUPPORTED */
170

Browse the source code of tensorflow/external/libjpeg_turbo/jfdctflt.c