1/*
2 * jcphuff.c
3 *
4 * This file was part of the Independent JPEG Group's software:
5 * Copyright (C) 1995-1997, Thomas G. Lane.
6 * libjpeg-turbo Modifications:
7 * Copyright (C) 2011, 2015, 2018, 2021, D. R. Commander.
8 * Copyright (C) 2016, 2018, Matthieu Darbois.
9 * Copyright (C) 2020, Arm Limited.
10 * For conditions of distribution and use, see the accompanying README.ijg
11 * file.
12 *
13 * This file contains Huffman entropy encoding routines for progressive JPEG.
14 *
15 * We do not support output suspension in this module, since the library
16 * currently does not allow multiple-scan files to be written with output
17 * suspension.
18 */
19
20#define JPEG_INTERNALS
21#include "jinclude.h"
22#include "jpeglib.h"
23#include "jsimd.h"
24#include "jconfigint.h"
25#include <limits.h>
26
27#ifdef HAVE_INTRIN_H
28#include <intrin.h>
29#ifdef _MSC_VER
30#ifdef HAVE_BITSCANFORWARD64
31#pragma intrinsic(_BitScanForward64)
32#endif
33#ifdef HAVE_BITSCANFORWARD
34#pragma intrinsic(_BitScanForward)
35#endif
36#endif
37#endif
38
39#ifdef C_PROGRESSIVE_SUPPORTED
40
41/*
42 * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
43 * used for bit counting rather than the lookup table. This will reduce the
44 * memory footprint by 64k, which is important for some mobile applications
45 * that create many isolated instances of libjpeg-turbo (web browsers, for
46 * instance.) This may improve performance on some mobile platforms as well.
47 * This feature is enabled by default only on Arm processors, because some x86
48 * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
49 * shown to have a significant performance impact even on the x86 chips that
50 * have a fast implementation of it. When building for Armv6, you can
51 * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
52 * flags (this defines __thumb__).
53 */
54
55#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \
56 defined(_M_ARM64)
57#if !defined(__thumb__) || defined(__thumb2__)
58#define USE_CLZ_INTRINSIC
59#endif
60#endif
61
62#ifdef USE_CLZ_INTRINSIC
63#if defined(_MSC_VER) && !defined(__clang__)
64#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
65#else
66#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
67#endif
68#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
69#else
70#include "jpeg_nbits_table.h"
71#define JPEG_NBITS(x) (jpeg_nbits_table[x])
72#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
73#endif
74
75
76/* Expanded entropy encoder object for progressive Huffman encoding. */
77
78typedef struct {
79 struct jpeg_entropy_encoder pub; /* public fields */
80
81 /* Pointer to routine to prepare data for encode_mcu_AC_first() */
82 void (*AC_first_prepare) (const JCOEF *block,
83 const int *jpeg_natural_order_start, int Sl,
84 int Al, JCOEF *values, size_t *zerobits);
85 /* Pointer to routine to prepare data for encode_mcu_AC_refine() */
86 int (*AC_refine_prepare) (const JCOEF *block,
87 const int *jpeg_natural_order_start, int Sl,
88 int Al, JCOEF *absvalues, size_t *bits);
89
90 /* Mode flag: TRUE for optimization, FALSE for actual data output */
91 boolean gather_statistics;
92
93 /* Bit-level coding status.
94 * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
95 */
96 JOCTET *next_output_byte; /* => next byte to write in buffer */
97 size_t free_in_buffer; /* # of byte spaces remaining in buffer */
98 size_t put_buffer; /* current bit-accumulation buffer */
99 int put_bits; /* # of bits now in it */
100 j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */
101
102 /* Coding status for DC components */
103 int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
104
105 /* Coding status for AC components */
106 int ac_tbl_no; /* the table number of the single component */
107 unsigned int EOBRUN; /* run length of EOBs */
108 unsigned int BE; /* # of buffered correction bits before MCU */
109 char *bit_buffer; /* buffer for correction bits (1 per char) */
110 /* packing correction bits tightly would save some space but cost time... */
111
112 unsigned int restarts_to_go; /* MCUs left in this restart interval */
113 int next_restart_num; /* next restart number to write (0-7) */
114
115 /* Pointers to derived tables (these workspaces have image lifespan).
116 * Since any one scan codes only DC or only AC, we only need one set
117 * of tables, not one for DC and one for AC.
118 */
119 c_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
120
121 /* Statistics tables for optimization; again, one set is enough */
122 long *count_ptrs[NUM_HUFF_TBLS];
123} phuff_entropy_encoder;
124
125typedef phuff_entropy_encoder *phuff_entropy_ptr;
126
127/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
128 * buffer can hold. Larger sizes may slightly improve compression, but
129 * 1000 is already well into the realm of overkill.
130 * The minimum safe size is 64 bits.
131 */
132
133#define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */
134
135/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.
136 * We assume that int right shift is unsigned if JLONG right shift is,
137 * which should be safe.
138 */
139
140#ifdef RIGHT_SHIFT_IS_UNSIGNED
141#define ISHIFT_TEMPS int ishift_temp;
142#define IRIGHT_SHIFT(x, shft) \
143 ((ishift_temp = (x)) < 0 ? \
144 (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \
145 (ishift_temp >> (shft)))
146#else
147#define ISHIFT_TEMPS
148#define IRIGHT_SHIFT(x, shft) ((x) >> (shft))
149#endif
150
151#define PAD(v, p) ((v + (p) - 1) & (~((p) - 1)))
152
153/* Forward declarations */
154METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo,
155 JBLOCKROW *MCU_data);
156METHODDEF(void) encode_mcu_AC_first_prepare
157 (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
158 JCOEF *values, size_t *zerobits);
159METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
160 JBLOCKROW *MCU_data);
161METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,
162 JBLOCKROW *MCU_data);
163METHODDEF(int) encode_mcu_AC_refine_prepare
164 (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
165 JCOEF *absvalues, size_t *bits);
166METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
167 JBLOCKROW *MCU_data);
168METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);
169METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo);
170
171
172/* Count bit loop zeroes */
173INLINE
174METHODDEF(int)
175count_zeroes(size_t *x)
176{
177#if defined(HAVE_BUILTIN_CTZL)
178 int result;
179 result = __builtin_ctzl(*x);
180 *x >>= result;
181#elif defined(HAVE_BITSCANFORWARD64)
182 unsigned long result;
183 _BitScanForward64(&result, *x);
184 *x >>= result;
185#elif defined(HAVE_BITSCANFORWARD)
186 unsigned long result;
187 _BitScanForward(&result, *x);
188 *x >>= result;
189#else
190 int result = 0;
191 while ((*x & 1) == 0) {
192 ++result;
193 *x >>= 1;
194 }
195#endif
196 return (int)result;
197}
198
199
200/*
201 * Initialize for a Huffman-compressed scan using progressive JPEG.
202 */
203
204METHODDEF(void)
205start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics)
206{
207 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
208 boolean is_DC_band;
209 int ci, tbl;
210 jpeg_component_info *compptr;
211
212 entropy->cinfo = cinfo;
213 entropy->gather_statistics = gather_statistics;
214
215 is_DC_band = (cinfo->Ss == 0);
216
217 /* We assume jcmaster.c already validated the scan parameters. */
218
219 /* Select execution routines */
220 if (cinfo->Ah == 0) {
221 if (is_DC_band)
222 entropy->pub.encode_mcu = encode_mcu_DC_first;
223 else
224 entropy->pub.encode_mcu = encode_mcu_AC_first;
225 if (jsimd_can_encode_mcu_AC_first_prepare())
226 entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare;
227 else
228 entropy->AC_first_prepare = encode_mcu_AC_first_prepare;
229 } else {
230 if (is_DC_band)
231 entropy->pub.encode_mcu = encode_mcu_DC_refine;
232 else {
233 entropy->pub.encode_mcu = encode_mcu_AC_refine;
234 if (jsimd_can_encode_mcu_AC_refine_prepare())
235 entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare;
236 else
237 entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare;
238 /* AC refinement needs a correction bit buffer */
239 if (entropy->bit_buffer == NULL)
240 entropy->bit_buffer = (char *)
241 (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
242 MAX_CORR_BITS * sizeof(char));
243 }
244 }
245 if (gather_statistics)
246 entropy->pub.finish_pass = finish_pass_gather_phuff;
247 else
248 entropy->pub.finish_pass = finish_pass_phuff;
249
250 /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1
251 * for AC coefficients.
252 */
253 for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
254 compptr = cinfo->cur_comp_info[ci];
255 /* Initialize DC predictions to 0 */
256 entropy->last_dc_val[ci] = 0;
257 /* Get table index */
258 if (is_DC_band) {
259 if (cinfo->Ah != 0) /* DC refinement needs no table */
260 continue;
261 tbl = compptr->dc_tbl_no;
262 } else {
263 entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
264 }
265 if (gather_statistics) {
266 /* Check for invalid table index */
267 /* (make_c_derived_tbl does this in the other path) */
268 if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
269 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
270 /* Allocate and zero the statistics tables */
271 /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
272 if (entropy->count_ptrs[tbl] == NULL)
273 entropy->count_ptrs[tbl] = (long *)
274 (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
275 257 * sizeof(long));
276 MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long));
277 } else {
278 /* Compute derived values for Huffman table */
279 /* We may do this more than once for a table, but it's not expensive */
280 jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
281 &entropy->derived_tbls[tbl]);
282 }
283 }
284
285 /* Initialize AC stuff */
286 entropy->EOBRUN = 0;
287 entropy->BE = 0;
288
289 /* Initialize bit buffer to empty */
290 entropy->put_buffer = 0;
291 entropy->put_bits = 0;
292
293 /* Initialize restart stuff */
294 entropy->restarts_to_go = cinfo->restart_interval;
295 entropy->next_restart_num = 0;
296}
297
298
299/* Outputting bytes to the file.
300 * NB: these must be called only when actually outputting,
301 * that is, entropy->gather_statistics == FALSE.
302 */
303
304/* Emit a byte */
305#define emit_byte(entropy, val) { \
306 *(entropy)->next_output_byte++ = (JOCTET)(val); \
307 if (--(entropy)->free_in_buffer == 0) \
308 dump_buffer(entropy); \
309}
310
311
312LOCAL(void)
313dump_buffer(phuff_entropy_ptr entropy)
314/* Empty the output buffer; we do not support suspension in this module. */
315{
316 struct jpeg_destination_mgr *dest = entropy->cinfo->dest;
317
318 if (!(*dest->empty_output_buffer) (entropy->cinfo))
319 ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
320 /* After a successful buffer dump, must reset buffer pointers */
321 entropy->next_output_byte = dest->next_output_byte;
322 entropy->free_in_buffer = dest->free_in_buffer;
323}
324
325
326/* Outputting bits to the file */
327
328/* Only the right 24 bits of put_buffer are used; the valid bits are
329 * left-justified in this part. At most 16 bits can be passed to emit_bits
330 * in one call, and we never retain more than 7 bits in put_buffer
331 * between calls, so 24 bits are sufficient.
332 */
333
334LOCAL(void)
335emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size)
336/* Emit some bits, unless we are in gather mode */
337{
338 /* This routine is heavily used, so it's worth coding tightly. */
339 register size_t put_buffer = (size_t)code;
340 register int put_bits = entropy->put_bits;
341
342 /* if size is 0, caller used an invalid Huffman table entry */
343 if (size == 0)
344 ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
345
346 if (entropy->gather_statistics)
347 return; /* do nothing if we're only getting stats */
348
349 put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */
350
351 put_bits += size; /* new number of bits in buffer */
352
353 put_buffer <<= 24 - put_bits; /* align incoming bits */
354
355 put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
356
357 while (put_bits >= 8) {
358 int c = (int)((put_buffer >> 16) & 0xFF);
359
360 emit_byte(entropy, c);
361 if (c == 0xFF) { /* need to stuff a zero byte? */
362 emit_byte(entropy, 0);
363 }
364 put_buffer <<= 8;
365 put_bits -= 8;
366 }
367
368 entropy->put_buffer = put_buffer; /* update variables */
369 entropy->put_bits = put_bits;
370}
371
372
373LOCAL(void)
374flush_bits(phuff_entropy_ptr entropy)
375{
376 emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */
377 entropy->put_buffer = 0; /* and reset bit-buffer to empty */
378 entropy->put_bits = 0;
379}
380
381
382/*
383 * Emit (or just count) a Huffman symbol.
384 */
385
386LOCAL(void)
387emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol)
388{
389 if (entropy->gather_statistics)
390 entropy->count_ptrs[tbl_no][symbol]++;
391 else {
392 c_derived_tbl *tbl = entropy->derived_tbls[tbl_no];
393 emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
394 }
395}
396
397
398/*
399 * Emit bits from a correction bit buffer.
400 */
401
402LOCAL(void)
403emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart,
404 unsigned int nbits)
405{
406 if (entropy->gather_statistics)
407 return; /* no real work */
408
409 while (nbits > 0) {
410 emit_bits(entropy, (unsigned int)(*bufstart), 1);
411 bufstart++;
412 nbits--;
413 }
414}
415
416
417/*
418 * Emit any pending EOBRUN symbol.
419 */
420
421LOCAL(void)
422emit_eobrun(phuff_entropy_ptr entropy)
423{
424 register int temp, nbits;
425
426 if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */
427 temp = entropy->EOBRUN;
428 nbits = JPEG_NBITS_NONZERO(temp) - 1;
429 /* safety check: shouldn't happen given limited correction-bit buffer */
430 if (nbits > 14)
431 ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
432
433 emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
434 if (nbits)
435 emit_bits(entropy, entropy->EOBRUN, nbits);
436
437 entropy->EOBRUN = 0;
438
439 /* Emit any buffered correction bits */
440 emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
441 entropy->BE = 0;
442 }
443}
444
445
446/*
447 * Emit a restart marker & resynchronize predictions.
448 */
449
450LOCAL(void)
451emit_restart(phuff_entropy_ptr entropy, int restart_num)
452{
453 int ci;
454
455 emit_eobrun(entropy);
456
457 if (!entropy->gather_statistics) {
458 flush_bits(entropy);
459 emit_byte(entropy, 0xFF);
460 emit_byte(entropy, JPEG_RST0 + restart_num);
461 }
462
463 if (entropy->cinfo->Ss == 0) {
464 /* Re-initialize DC predictions to 0 */
465 for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
466 entropy->last_dc_val[ci] = 0;
467 } else {
468 /* Re-initialize all AC-related fields to 0 */
469 entropy->EOBRUN = 0;
470 entropy->BE = 0;
471 }
472}
473
474
475/*
476 * MCU encoding for DC initial scan (either spectral selection,
477 * or first pass of successive approximation).
478 */
479
480METHODDEF(boolean)
481encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
482{
483 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
484 register int temp, temp2, temp3;
485 register int nbits;
486 int blkn, ci;
487 int Al = cinfo->Al;
488 JBLOCKROW block;
489 jpeg_component_info *compptr;
490 ISHIFT_TEMPS
491
492 entropy->next_output_byte = cinfo->dest->next_output_byte;
493 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
494
495 /* Emit restart marker if needed */
496 if (cinfo->restart_interval)
497 if (entropy->restarts_to_go == 0)
498 emit_restart(entropy, entropy->next_restart_num);
499
500 /* Encode the MCU data blocks */
501 for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
502 block = MCU_data[blkn];
503 ci = cinfo->MCU_membership[blkn];
504 compptr = cinfo->cur_comp_info[ci];
505
506 /* Compute the DC value after the required point transform by Al.
507 * This is simply an arithmetic right shift.
508 */
509 temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al);
510
511 /* DC differences are figured on the point-transformed values. */
512 temp = temp2 - entropy->last_dc_val[ci];
513 entropy->last_dc_val[ci] = temp2;
514
515 /* Encode the DC coefficient difference per section G.1.2.1 */
516
517 /* This is a well-known technique for obtaining the absolute value without
518 * a branch. It is derived from an assembly language technique presented
519 * in "How to Optimize for the Pentium Processors", Copyright (c) 1996,
520 * 1997 by Agner Fog.
521 */
522 temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
523 temp ^= temp3;
524 temp -= temp3; /* temp is abs value of input */
525 /* For a negative input, want temp2 = bitwise complement of abs(input) */
526 temp2 = temp ^ temp3;
527
528 /* Find the number of bits needed for the magnitude of the coefficient */
529 nbits = JPEG_NBITS(temp);
530 /* Check for out-of-range coefficient values.
531 * Since we're encoding a difference, the range limit is twice as much.
532 */
533 if (nbits > MAX_COEF_BITS + 1)
534 ERREXIT(cinfo, JERR_BAD_DCT_COEF);
535
536 /* Count/emit the Huffman-coded symbol for the number of bits */
537 emit_symbol(entropy, compptr->dc_tbl_no, nbits);
538
539 /* Emit that number of bits of the value, if positive, */
540 /* or the complement of its magnitude, if negative. */
541 if (nbits) /* emit_bits rejects calls with size 0 */
542 emit_bits(entropy, (unsigned int)temp2, nbits);
543 }
544
545 cinfo->dest->next_output_byte = entropy->next_output_byte;
546 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
547
548 /* Update restart-interval state too */
549 if (cinfo->restart_interval) {
550 if (entropy->restarts_to_go == 0) {
551 entropy->restarts_to_go = cinfo->restart_interval;
552 entropy->next_restart_num++;
553 entropy->next_restart_num &= 7;
554 }
555 entropy->restarts_to_go--;
556 }
557
558 return TRUE;
559}
560
561
562/*
563 * Data preparation for encode_mcu_AC_first().
564 */
565
566#define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \
567 for (k = 0; k < Sl; k++) { \
568 temp = block[jpeg_natural_order_start[k]]; \
569 if (temp == 0) \
570 continue; \
571 /* We must apply the point transform by Al. For AC coefficients this \
572 * is an integer division with rounding towards 0. To do this portably \
573 * in C, we shift after obtaining the absolute value; so the code is \
574 * interwoven with finding the abs value (temp) and output bits (temp2). \
575 */ \
576 temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
577 temp ^= temp2; \
578 temp -= temp2; /* temp is abs value of input */ \
579 temp >>= Al; /* apply the point transform */ \
580 /* Watch out for case that nonzero coef is zero after point transform */ \
581 if (temp == 0) \
582 continue; \
583 /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
584 temp2 ^= temp; \
585 values[k] = temp; \
586 values[k + DCTSIZE2] = temp2; \
587 zerobits |= ((size_t)1U) << k; \
588 } \
589}
590
591METHODDEF(void)
592encode_mcu_AC_first_prepare(const JCOEF *block,
593 const int *jpeg_natural_order_start, int Sl,
594 int Al, JCOEF *values, size_t *bits)
595{
596 register int k, temp, temp2;
597 size_t zerobits = 0U;
598 int Sl0 = Sl;
599
600#if SIZEOF_SIZE_T == 4
601 if (Sl0 > 32)
602 Sl0 = 32;
603#endif
604
605 COMPUTE_ABSVALUES_AC_FIRST(Sl0);
606
607 bits[0] = zerobits;
608#if SIZEOF_SIZE_T == 4
609 zerobits = 0U;
610
611 if (Sl > 32) {
612 Sl -= 32;
613 jpeg_natural_order_start += 32;
614 values += 32;
615
616 COMPUTE_ABSVALUES_AC_FIRST(Sl);
617 }
618 bits[1] = zerobits;
619#endif
620}
621
622/*
623 * MCU encoding for AC initial scan (either spectral selection,
624 * or first pass of successive approximation).
625 */
626
627#define ENCODE_COEFS_AC_FIRST(label) { \
628 while (zerobits) { \
629 r = count_zeroes(&zerobits); \
630 cvalue += r; \
631label \
632 temp = cvalue[0]; \
633 temp2 = cvalue[DCTSIZE2]; \
634 \
635 /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
636 while (r > 15) { \
637 emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
638 r -= 16; \
639 } \
640 \
641 /* Find the number of bits needed for the magnitude of the coefficient */ \
642 nbits = JPEG_NBITS_NONZERO(temp); /* there must be at least one 1 bit */ \
643 /* Check for out-of-range coefficient values */ \
644 if (nbits > MAX_COEF_BITS) \
645 ERREXIT(cinfo, JERR_BAD_DCT_COEF); \
646 \
647 /* Count/emit Huffman symbol for run length / number of bits */ \
648 emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \
649 \
650 /* Emit that number of bits of the value, if positive, */ \
651 /* or the complement of its magnitude, if negative. */ \
652 emit_bits(entropy, (unsigned int)temp2, nbits); \
653 \
654 cvalue++; \
655 zerobits >>= 1; \
656 } \
657}
658
659METHODDEF(boolean)
660encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
661{
662 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
663 register int temp, temp2;
664 register int nbits, r;
665 int Sl = cinfo->Se - cinfo->Ss + 1;
666 int Al = cinfo->Al;
667 JCOEF values_unaligned[2 * DCTSIZE2 + 15];
668 JCOEF *values;
669 const JCOEF *cvalue;
670 size_t zerobits;
671 size_t bits[8 / SIZEOF_SIZE_T];
672
673 entropy->next_output_byte = cinfo->dest->next_output_byte;
674 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
675
676 /* Emit restart marker if needed */
677 if (cinfo->restart_interval)
678 if (entropy->restarts_to_go == 0)
679 emit_restart(entropy, entropy->next_restart_num);
680
681#ifdef WITH_SIMD
682 cvalue = values = (JCOEF *)PAD((size_t)values_unaligned, 16);
683#else
684 /* Not using SIMD, so alignment is not needed */
685 cvalue = values = values_unaligned;
686#endif
687
688 /* Prepare data */
689 entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
690 Sl, Al, values, bits);
691
692 zerobits = bits[0];
693#if SIZEOF_SIZE_T == 4
694 zerobits |= bits[1];
695#endif
696
697 /* Emit any pending EOBRUN */
698 if (zerobits && (entropy->EOBRUN > 0))
699 emit_eobrun(entropy);
700
701#if SIZEOF_SIZE_T == 4
702 zerobits = bits[0];
703#endif
704
705 /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
706
707 ENCODE_COEFS_AC_FIRST((void)0;);
708
709#if SIZEOF_SIZE_T == 4
710 zerobits = bits[1];
711 if (zerobits) {
712 int diff = ((values + DCTSIZE2 / 2) - cvalue);
713 r = count_zeroes(&zerobits);
714 r += diff;
715 cvalue += r;
716 goto first_iter_ac_first;
717 }
718
719 ENCODE_COEFS_AC_FIRST(first_iter_ac_first:);
720#endif
721
722 if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */
723 entropy->EOBRUN++; /* count an EOB */
724 if (entropy->EOBRUN == 0x7FFF)
725 emit_eobrun(entropy); /* force it out to avoid overflow */
726 }
727
728 cinfo->dest->next_output_byte = entropy->next_output_byte;
729 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
730
731 /* Update restart-interval state too */
732 if (cinfo->restart_interval) {
733 if (entropy->restarts_to_go == 0) {
734 entropy->restarts_to_go = cinfo->restart_interval;
735 entropy->next_restart_num++;
736 entropy->next_restart_num &= 7;
737 }
738 entropy->restarts_to_go--;
739 }
740
741 return TRUE;
742}
743
744
745/*
746 * MCU encoding for DC successive approximation refinement scan.
747 * Note: we assume such scans can be multi-component, although the spec
748 * is not very clear on the point.
749 */
750
751METHODDEF(boolean)
752encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
753{
754 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
755 register int temp;
756 int blkn;
757 int Al = cinfo->Al;
758 JBLOCKROW block;
759
760 entropy->next_output_byte = cinfo->dest->next_output_byte;
761 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
762
763 /* Emit restart marker if needed */
764 if (cinfo->restart_interval)
765 if (entropy->restarts_to_go == 0)
766 emit_restart(entropy, entropy->next_restart_num);
767
768 /* Encode the MCU data blocks */
769 for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
770 block = MCU_data[blkn];
771
772 /* We simply emit the Al'th bit of the DC coefficient value. */
773 temp = (*block)[0];
774 emit_bits(entropy, (unsigned int)(temp >> Al), 1);
775 }
776
777 cinfo->dest->next_output_byte = entropy->next_output_byte;
778 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
779
780 /* Update restart-interval state too */
781 if (cinfo->restart_interval) {
782 if (entropy->restarts_to_go == 0) {
783 entropy->restarts_to_go = cinfo->restart_interval;
784 entropy->next_restart_num++;
785 entropy->next_restart_num &= 7;
786 }
787 entropy->restarts_to_go--;
788 }
789
790 return TRUE;
791}
792
793
794/*
795 * Data preparation for encode_mcu_AC_refine().
796 */
797
798#define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \
799 /* It is convenient to make a pre-pass to determine the transformed \
800 * coefficients' absolute values and the EOB position. \
801 */ \
802 for (k = 0; k < Sl; k++) { \
803 temp = block[jpeg_natural_order_start[k]]; \
804 /* We must apply the point transform by Al. For AC coefficients this \
805 * is an integer division with rounding towards 0. To do this portably \
806 * in C, we shift after obtaining the absolute value. \
807 */ \
808 temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
809 temp ^= temp2; \
810 temp -= temp2; /* temp is abs value of input */ \
811 temp >>= Al; /* apply the point transform */ \
812 if (temp != 0) { \
813 zerobits |= ((size_t)1U) << k; \
814 signbits |= ((size_t)(temp2 + 1)) << k; \
815 } \
816 absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \
817 if (temp == 1) \
818 EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \
819 } \
820}
821
822METHODDEF(int)
823encode_mcu_AC_refine_prepare(const JCOEF *block,
824 const int *jpeg_natural_order_start, int Sl,
825 int Al, JCOEF *absvalues, size_t *bits)
826{
827 register int k, temp, temp2;
828 int EOB = 0;
829 size_t zerobits = 0U, signbits = 0U;
830 int Sl0 = Sl;
831
832#if SIZEOF_SIZE_T == 4
833 if (Sl0 > 32)
834 Sl0 = 32;
835#endif
836
837 COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0);
838
839 bits[0] = zerobits;
840#if SIZEOF_SIZE_T == 8
841 bits[1] = signbits;
842#else
843 bits[2] = signbits;
844
845 zerobits = 0U;
846 signbits = 0U;
847
848 if (Sl > 32) {
849 Sl -= 32;
850 jpeg_natural_order_start += 32;
851 absvalues += 32;
852
853 COMPUTE_ABSVALUES_AC_REFINE(Sl, 32);
854 }
855
856 bits[1] = zerobits;
857 bits[3] = signbits;
858#endif
859
860 return EOB;
861}
862
863
864/*
865 * MCU encoding for AC successive approximation refinement scan.
866 */
867
868#define ENCODE_COEFS_AC_REFINE(label) { \
869 while (zerobits) { \
870 idx = count_zeroes(&zerobits); \
871 r += idx; \
872 cabsvalue += idx; \
873 signbits >>= idx; \
874label \
875 /* Emit any required ZRLs, but not if they can be folded into EOB */ \
876 while (r > 15 && (cabsvalue <= EOBPTR)) { \
877 /* emit any pending EOBRUN and the BE correction bits */ \
878 emit_eobrun(entropy); \
879 /* Emit ZRL */ \
880 emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
881 r -= 16; \
882 /* Emit buffered correction bits that must be associated with ZRL */ \
883 emit_buffered_bits(entropy, BR_buffer, BR); \
884 BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
885 BR = 0; \
886 } \
887 \
888 temp = *cabsvalue++; \
889 \
890 /* If the coef was previously nonzero, it only needs a correction bit. \
891 * NOTE: a straight translation of the spec's figure G.7 would suggest \
892 * that we also need to test r > 15. But if r > 15, we can only get here \
893 * if k > EOB, which implies that this coefficient is not 1. \
894 */ \
895 if (temp > 1) { \
896 /* The correction bit is the next bit of the absolute value. */ \
897 BR_buffer[BR++] = (char)(temp & 1); \
898 signbits >>= 1; \
899 zerobits >>= 1; \
900 continue; \
901 } \
902 \
903 /* Emit any pending EOBRUN and the BE correction bits */ \
904 emit_eobrun(entropy); \
905 \
906 /* Count/emit Huffman symbol for run length / number of bits */ \
907 emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \
908 \
909 /* Emit output bit for newly-nonzero coef */ \
910 temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \
911 emit_bits(entropy, (unsigned int)temp, 1); \
912 \
913 /* Emit buffered correction bits that must be associated with this code */ \
914 emit_buffered_bits(entropy, BR_buffer, BR); \
915 BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
916 BR = 0; \
917 r = 0; /* reset zero run length */ \
918 signbits >>= 1; \
919 zerobits >>= 1; \
920 } \
921}
922
923METHODDEF(boolean)
924encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
925{
926 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
927 register int temp, r, idx;
928 char *BR_buffer;
929 unsigned int BR;
930 int Sl = cinfo->Se - cinfo->Ss + 1;
931 int Al = cinfo->Al;
932 JCOEF absvalues_unaligned[DCTSIZE2 + 15];
933 JCOEF *absvalues;
934 const JCOEF *cabsvalue, *EOBPTR;
935 size_t zerobits, signbits;
936 size_t bits[16 / SIZEOF_SIZE_T];
937
938 entropy->next_output_byte = cinfo->dest->next_output_byte;
939 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
940
941 /* Emit restart marker if needed */
942 if (cinfo->restart_interval)
943 if (entropy->restarts_to_go == 0)
944 emit_restart(entropy, entropy->next_restart_num);
945
946#ifdef WITH_SIMD
947 cabsvalue = absvalues = (JCOEF *)PAD((size_t)absvalues_unaligned, 16);
948#else
949 /* Not using SIMD, so alignment is not needed */
950 cabsvalue = absvalues = absvalues_unaligned;
951#endif
952
953 /* Prepare data */
954 EOBPTR = absvalues +
955 entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
956 Sl, Al, absvalues, bits);
957
958 /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
959
960 r = 0; /* r = run length of zeros */
961 BR = 0; /* BR = count of buffered bits added now */
962 BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
963
964 zerobits = bits[0];
965#if SIZEOF_SIZE_T == 8
966 signbits = bits[1];
967#else
968 signbits = bits[2];
969#endif
970 ENCODE_COEFS_AC_REFINE((void)0;);
971
972#if SIZEOF_SIZE_T == 4
973 zerobits = bits[1];
974 signbits = bits[3];
975
976 if (zerobits) {
977 int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
978 idx = count_zeroes(&zerobits);
979 signbits >>= idx;
980 idx += diff;
981 r += idx;
982 cabsvalue += idx;
983 goto first_iter_ac_refine;
984 }
985
986 ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:);
987#endif
988
989 r |= (int)((absvalues + Sl) - cabsvalue);
990
991 if (r > 0 || BR > 0) { /* If there are trailing zeroes, */
992 entropy->EOBRUN++; /* count an EOB */
993 entropy->BE += BR; /* concat my correction bits to older ones */
994 /* We force out the EOB if we risk either:
995 * 1. overflow of the EOB counter;
996 * 2. overflow of the correction bit buffer during the next MCU.
997 */
998 if (entropy->EOBRUN == 0x7FFF ||
999 entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1))
1000 emit_eobrun(entropy);
1001 }
1002
1003 cinfo->dest->next_output_byte = entropy->next_output_byte;
1004 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
1005
1006 /* Update restart-interval state too */
1007 if (cinfo->restart_interval) {
1008 if (entropy->restarts_to_go == 0) {
1009 entropy->restarts_to_go = cinfo->restart_interval;
1010 entropy->next_restart_num++;
1011 entropy->next_restart_num &= 7;
1012 }
1013 entropy->restarts_to_go--;
1014 }
1015
1016 return TRUE;
1017}
1018
1019
1020/*
1021 * Finish up at the end of a Huffman-compressed progressive scan.
1022 */
1023
1024METHODDEF(void)
1025finish_pass_phuff(j_compress_ptr cinfo)
1026{
1027 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
1028
1029 entropy->next_output_byte = cinfo->dest->next_output_byte;
1030 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
1031
1032 /* Flush out any buffered data */
1033 emit_eobrun(entropy);
1034 flush_bits(entropy);
1035
1036 cinfo->dest->next_output_byte = entropy->next_output_byte;
1037 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
1038}
1039
1040
1041/*
1042 * Finish up a statistics-gathering pass and create the new Huffman tables.
1043 */
1044
1045METHODDEF(void)
1046finish_pass_gather_phuff(j_compress_ptr cinfo)
1047{
1048 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
1049 boolean is_DC_band;
1050 int ci, tbl;
1051 jpeg_component_info *compptr;
1052 JHUFF_TBL **htblptr;
1053 boolean did[NUM_HUFF_TBLS];
1054
1055 /* Flush out buffered data (all we care about is counting the EOB symbol) */
1056 emit_eobrun(entropy);
1057
1058 is_DC_band = (cinfo->Ss == 0);
1059
1060 /* It's important not to apply jpeg_gen_optimal_table more than once
1061 * per table, because it clobbers the input frequency counts!
1062 */
1063 MEMZERO(did, sizeof(did));
1064
1065 for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
1066 compptr = cinfo->cur_comp_info[ci];
1067 if (is_DC_band) {
1068 if (cinfo->Ah != 0) /* DC refinement needs no table */
1069 continue;
1070 tbl = compptr->dc_tbl_no;
1071 } else {
1072 tbl = compptr->ac_tbl_no;
1073 }
1074 if (!did[tbl]) {
1075 if (is_DC_band)
1076 htblptr = &cinfo->dc_huff_tbl_ptrs[tbl];
1077 else
1078 htblptr = &cinfo->ac_huff_tbl_ptrs[tbl];
1079 if (*htblptr == NULL)
1080 *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
1081 jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);
1082 did[tbl] = TRUE;
1083 }
1084 }
1085}
1086
1087
1088/*
1089 * Module initialization routine for progressive Huffman entropy encoding.
1090 */
1091
1092GLOBAL(void)
1093jinit_phuff_encoder(j_compress_ptr cinfo)
1094{
1095 phuff_entropy_ptr entropy;
1096 int i;
1097
1098 entropy = (phuff_entropy_ptr)
1099 (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
1100 sizeof(phuff_entropy_encoder));
1101 cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
1102 entropy->pub.start_pass = start_pass_phuff;
1103
1104 /* Mark tables unallocated */
1105 for (i = 0; i < NUM_HUFF_TBLS; i++) {
1106 entropy->derived_tbls[i] = NULL;
1107 entropy->count_ptrs[i] = NULL;
1108 }
1109 entropy->bit_buffer = NULL; /* needed only in AC refinement scan */
1110}
1111
1112#endif /* C_PROGRESSIVE_SUPPORTED */
1113