luajitos

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

decoder_JPEG.c (39064B)


      1 /*
      2  * JPEG Decoder Implementation
      3  * Supports: Baseline DCT, Extended Sequential DCT, Progressive DCT
      4  * Chroma subsampling: 4:4:4, 4:2:2, 4:2:0, 4:1:1
      5  *
      6  * References:
      7  * - ITU-T T.81 (JPEG Standard)
      8  * - ISO/IEC 10918-1
      9  */
     10 
     11 #include "decoder_JPEG.h"
     12 #include <stdlib.h>
     13 #include <string.h>
     14 #include <lauxlib.h>
     15 
     16 /* Zigzag ordering table */
     17 const uint8_t jpeg_zigzag[64] = {
     18      0,  1,  8, 16,  9,  2,  3, 10,
     19     17, 24, 32, 25, 18, 11,  4,  5,
     20     12, 19, 26, 33, 40, 48, 41, 34,
     21     27, 20, 13,  6,  7, 14, 21, 28,
     22     35, 42, 49, 56, 57, 50, 43, 36,
     23     29, 22, 15, 23, 30, 37, 44, 51,
     24     58, 59, 52, 45, 38, 31, 39, 46,
     25     53, 60, 61, 54, 47, 55, 62, 63
     26 };
     27 
     28 /* Inverse zigzag (zigzag to normal order) */
     29 const uint8_t jpeg_unzigzag[64] = {
     30      0,  1,  5,  6, 14, 15, 27, 28,
     31      2,  4,  7, 13, 16, 26, 29, 42,
     32      3,  8, 12, 17, 25, 30, 41, 43,
     33      9, 11, 18, 24, 31, 40, 44, 53,
     34     10, 19, 23, 32, 39, 45, 52, 54,
     35     20, 22, 33, 38, 46, 51, 55, 60,
     36     21, 34, 37, 47, 50, 56, 59, 61,
     37     35, 36, 48, 49, 57, 58, 62, 63
     38 };
     39 
     40 /* IDCT constants - scaled by 2^12 for fixed-point math */
     41 #define IDCT_SCALE 4096
     42 #define IDCT_ROUND 2048
     43 
     44 /* Cosine values for IDCT, scaled by 2^14 */
     45 #define C1 16069  /* cos(1*pi/16) * 2^14 */
     46 #define C2 15137  /* cos(2*pi/16) * 2^14 */
     47 #define C3 13623  /* cos(3*pi/16) * 2^14 */
     48 #define C4 11585  /* cos(4*pi/16) * 2^14 = sqrt(2) * 2^13 */
     49 #define C5 9102   /* cos(5*pi/16) * 2^14 */
     50 #define C6 6270   /* cos(6*pi/16) * 2^14 */
     51 #define C7 3196   /* cos(7*pi/16) * 2^14 */
     52 
     53 /* Clip value to 0-255 range */
     54 static inline uint8_t clip_uint8(int x) {
     55     if (x < 0) return 0;
     56     if (x > 255) return 255;
     57     return (uint8_t)x;
     58 }
     59 
     60 /* Read 16-bit big-endian value */
     61 static inline uint16_t read_be16(const uint8_t* data) {
     62     return (uint16_t)((data[0] << 8) | data[1]);
     63 }
     64 
     65 /* ============================================================
     66  * Memory Management
     67  * ============================================================ */
     68 
     69 jpeg_decoder_t* jpeg_decoder_create(void) {
     70     jpeg_decoder_t* dec = (jpeg_decoder_t*)calloc(1, sizeof(jpeg_decoder_t));
     71     if (!dec) return NULL;
     72 
     73     /* Initialize all table validity flags to 0 */
     74     memset(dec->dc_table_valid, 0, sizeof(dec->dc_table_valid));
     75     memset(dec->ac_table_valid, 0, sizeof(dec->ac_table_valid));
     76     memset(dec->quant_table_valid, 0, sizeof(dec->quant_table_valid));
     77 
     78     return dec;
     79 }
     80 
     81 void jpeg_decoder_destroy(jpeg_decoder_t* dec) {
     82     if (!dec) return;
     83 
     84     /* Free component data */
     85     for (int i = 0; i < JPEG_MAX_COMPONENTS; i++) {
     86         if (dec->components[i].data) {
     87             free(dec->components[i].data);
     88         }
     89     }
     90 
     91     /* Free progressive coefficient blocks */
     92     if (dec->coef_blocks) {
     93         for (int i = 0; i < dec->coef_blocks_count; i++) {
     94             if (dec->coef_blocks[i]) {
     95                 free(dec->coef_blocks[i]);
     96             }
     97         }
     98         free(dec->coef_blocks);
     99     }
    100 
    101     free(dec);
    102 }
    103 
    104 /* ============================================================
    105  * Bit Reading
    106  * ============================================================ */
    107 
    108 /* Fill bit buffer from stream */
    109 static int jpeg_fill_bits(jpeg_decoder_t* dec) {
    110     while (dec->bits_count < 24 && dec->pos < dec->data_size) {
    111         uint8_t byte = dec->data[dec->pos++];
    112 
    113         /* Handle byte stuffing (0xFF followed by 0x00) */
    114         if (byte == 0xFF) {
    115             if (dec->pos >= dec->data_size) {
    116                 dec->error = 1;
    117                 return -1;
    118             }
    119             uint8_t next = dec->data[dec->pos];
    120             if (next == 0x00) {
    121                 /* Stuffed byte, skip the 0x00 */
    122                 dec->pos++;
    123             } else if (next >= 0xD0 && next <= 0xD7) {
    124                 /* Restart marker - ignore and continue */
    125                 dec->pos++;
    126                 continue;
    127             } else {
    128                 /* Other marker - we've hit the end of scan data */
    129                 dec->pos--;
    130                 break;
    131             }
    132         }
    133 
    134         dec->bits_buffer = (dec->bits_buffer << 8) | byte;
    135         dec->bits_count += 8;
    136     }
    137     return 0;
    138 }
    139 
    140 int jpeg_get_bits(jpeg_decoder_t* dec, int nbits) {
    141     if (nbits == 0) return 0;
    142 
    143     while (dec->bits_count < nbits) {
    144         if (jpeg_fill_bits(dec) < 0) return -1;
    145         if (dec->bits_count < nbits) {
    146             dec->error = 1;
    147             return -1;
    148         }
    149     }
    150 
    151     dec->bits_count -= nbits;
    152     return (dec->bits_buffer >> dec->bits_count) & ((1 << nbits) - 1);
    153 }
    154 
    155 int jpeg_peek_bits(jpeg_decoder_t* dec, int nbits) {
    156     if (nbits == 0) return 0;
    157 
    158     while (dec->bits_count < nbits) {
    159         if (jpeg_fill_bits(dec) < 0) return -1;
    160         if (dec->bits_count < nbits) {
    161             dec->error = 1;
    162             return -1;
    163         }
    164     }
    165 
    166     return (dec->bits_buffer >> (dec->bits_count - nbits)) & ((1 << nbits) - 1);
    167 }
    168 
    169 void jpeg_skip_bits(jpeg_decoder_t* dec, int nbits) {
    170     dec->bits_count -= nbits;
    171 }
    172 
    173 void jpeg_align_bits(jpeg_decoder_t* dec) {
    174     dec->bits_count &= ~7;  /* Align to byte boundary */
    175 }
    176 
    177 int jpeg_next_marker(jpeg_decoder_t* dec) {
    178     /* Align to byte boundary */
    179     jpeg_align_bits(dec);
    180     dec->bits_count = 0;
    181     dec->bits_buffer = 0;
    182 
    183     /* Find next marker */
    184     while (dec->pos < dec->data_size - 1) {
    185         if (dec->data[dec->pos] == 0xFF) {
    186             uint8_t marker = dec->data[dec->pos + 1];
    187             if (marker != 0x00 && marker != 0xFF) {
    188                 dec->pos += 2;
    189                 return 0xFF00 | marker;
    190             }
    191         }
    192         dec->pos++;
    193     }
    194 
    195     return -1;
    196 }
    197 
    198 /* ============================================================
    199  * Huffman Table Handling
    200  * ============================================================ */
    201 
    202 int jpeg_build_huffman_table(jpeg_huffman_table_t* table) {
    203     int code = 0;
    204     int si = 1;
    205     int p = 0;
    206 
    207     /* Generate huffcode and huffsize arrays */
    208     for (int i = 1; i <= 16; i++) {
    209         for (int j = 0; j < table->bits[i]; j++) {
    210             table->huffcode[p] = code;
    211             table->huffsize[p] = i;
    212             p++;
    213             code++;
    214         }
    215         code <<= 1;
    216     }
    217     table->num_symbols = p;
    218 
    219     /* Generate mincode, maxcode, and valptr */
    220     p = 0;
    221     for (int i = 1; i <= 16; i++) {
    222         if (table->bits[i]) {
    223             table->valptr[i] = p;
    224             table->mincode[i] = table->huffcode[p];
    225             p += table->bits[i];
    226             table->maxcode[i] = table->huffcode[p - 1];
    227         } else {
    228             table->maxcode[i] = -1;
    229             table->mincode[i] = 0;
    230             table->valptr[i] = 0;
    231         }
    232     }
    233     table->maxcode[17] = 0xFFFFFF;  /* Sentinel */
    234 
    235     return 0;
    236 }
    237 
    238 int jpeg_decode_huffman(jpeg_decoder_t* dec, jpeg_huffman_table_t* table) {
    239     int code = 0;
    240     int size = 1;
    241 
    242     /* Read bits one at a time until we find a valid code */
    243     while (size <= 16) {
    244         int bit = jpeg_get_bits(dec, 1);
    245         if (bit < 0) return -1;
    246 
    247         code = (code << 1) | bit;
    248 
    249         if (code <= table->maxcode[size]) {
    250             int index = table->valptr[size] + code - table->mincode[size];
    251             return table->huffval[index];
    252         }
    253         size++;
    254     }
    255 
    256     dec->error = 1;
    257     snprintf(dec->error_msg, sizeof(dec->error_msg), "Invalid Huffman code");
    258     return -1;
    259 }
    260 
    261 /* Extend signed value from Huffman decoding */
    262 int jpeg_receive_extend(jpeg_decoder_t* dec, int nbits) {
    263     if (nbits == 0) return 0;
    264 
    265     int value = jpeg_get_bits(dec, nbits);
    266     if (value < 0) return 0;
    267 
    268     /* Sign extension */
    269     if (value < (1 << (nbits - 1))) {
    270         value = value - (1 << nbits) + 1;
    271     }
    272 
    273     return value;
    274 }
    275 
    276 /* ============================================================
    277  * Marker Parsing
    278  * ============================================================ */
    279 
    280 static int jpeg_parse_dqt(jpeg_decoder_t* dec) {
    281     uint16_t length = read_be16(dec->data + dec->pos);
    282     dec->pos += 2;
    283     int remaining = length - 2;
    284 
    285     while (remaining > 0) {
    286         uint8_t info = dec->data[dec->pos++];
    287         remaining--;
    288 
    289         int precision = (info >> 4) & 0x0F;  /* 0 = 8-bit, 1 = 16-bit */
    290         int table_id = info & 0x0F;
    291 
    292         if (table_id >= JPEG_MAX_QUANT_TABLES) {
    293             dec->error = 1;
    294             snprintf(dec->error_msg, sizeof(dec->error_msg),
    295                      "Invalid quantization table ID: %d", table_id);
    296             return -1;
    297         }
    298 
    299         jpeg_quant_table_t* qt = &dec->quant_tables[table_id];
    300         qt->precision = precision;
    301 
    302         if (precision == 0) {
    303             /* 8-bit values */
    304             for (int i = 0; i < 64; i++) {
    305                 qt->table[jpeg_zigzag[i]] = dec->data[dec->pos++];
    306             }
    307             remaining -= 64;
    308         } else {
    309             /* 16-bit values */
    310             for (int i = 0; i < 64; i++) {
    311                 qt->table[jpeg_zigzag[i]] = read_be16(dec->data + dec->pos);
    312                 dec->pos += 2;
    313             }
    314             remaining -= 128;
    315         }
    316 
    317         dec->quant_table_valid[table_id] = 1;
    318     }
    319 
    320     return 0;
    321 }
    322 
    323 static int jpeg_parse_dht(jpeg_decoder_t* dec) {
    324     uint16_t length = read_be16(dec->data + dec->pos);
    325     dec->pos += 2;
    326     int remaining = length - 2;
    327 
    328     while (remaining > 0) {
    329         uint8_t info = dec->data[dec->pos++];
    330         remaining--;
    331 
    332         int table_class = (info >> 4) & 0x0F;  /* 0 = DC, 1 = AC */
    333         int table_id = info & 0x0F;
    334 
    335         if (table_id >= JPEG_MAX_HUFFMAN_TABLES) {
    336             dec->error = 1;
    337             snprintf(dec->error_msg, sizeof(dec->error_msg),
    338                      "Invalid Huffman table ID: %d", table_id);
    339             return -1;
    340         }
    341 
    342         jpeg_huffman_table_t* ht;
    343         if (table_class == 0) {
    344             ht = &dec->dc_tables[table_id];
    345             dec->dc_table_valid[table_id] = 1;
    346         } else {
    347             ht = &dec->ac_tables[table_id];
    348             dec->ac_table_valid[table_id] = 1;
    349         }
    350 
    351         /* Read bits counts (BITS) */
    352         int total_symbols = 0;
    353         ht->bits[0] = 0;
    354         for (int i = 1; i <= 16; i++) {
    355             ht->bits[i] = dec->data[dec->pos++];
    356             total_symbols += ht->bits[i];
    357             remaining--;
    358         }
    359 
    360         /* Read symbols (HUFFVAL) */
    361         for (int i = 0; i < total_symbols; i++) {
    362             ht->huffval[i] = dec->data[dec->pos++];
    363             remaining--;
    364         }
    365 
    366         /* Build derived tables */
    367         jpeg_build_huffman_table(ht);
    368     }
    369 
    370     return 0;
    371 }
    372 
    373 static int jpeg_parse_sof(jpeg_decoder_t* dec, uint8_t marker) {
    374     uint16_t length = read_be16(dec->data + dec->pos);
    375     dec->pos += 2;
    376     (void)length;
    377 
    378     dec->frame_type = marker;
    379     dec->is_baseline = (marker == 0xC0);
    380     dec->is_progressive = (marker == 0xC2 || marker == 0xCA);
    381 
    382     dec->precision = dec->data[dec->pos++];
    383     dec->height = read_be16(dec->data + dec->pos);
    384     dec->pos += 2;
    385     dec->width = read_be16(dec->data + dec->pos);
    386     dec->pos += 2;
    387     dec->num_components = dec->data[dec->pos++];
    388 
    389     if (dec->num_components > JPEG_MAX_COMPONENTS) {
    390         dec->error = 1;
    391         snprintf(dec->error_msg, sizeof(dec->error_msg),
    392                  "Too many components: %d", dec->num_components);
    393         return -1;
    394     }
    395 
    396     if (dec->precision != 8 && dec->precision != 12) {
    397         dec->error = 1;
    398         snprintf(dec->error_msg, sizeof(dec->error_msg),
    399                  "Unsupported precision: %d", dec->precision);
    400         return -1;
    401     }
    402 
    403     dec->max_h_samp = 1;
    404     dec->max_v_samp = 1;
    405 
    406     for (int i = 0; i < dec->num_components; i++) {
    407         jpeg_component_t* comp = &dec->components[i];
    408         comp->id = dec->data[dec->pos++];
    409         uint8_t sampling = dec->data[dec->pos++];
    410         comp->h_samp = (sampling >> 4) & 0x0F;
    411         comp->v_samp = sampling & 0x0F;
    412         comp->quant_table_id = dec->data[dec->pos++];
    413         comp->dc_pred = 0;
    414 
    415         if (comp->h_samp > dec->max_h_samp) dec->max_h_samp = comp->h_samp;
    416         if (comp->v_samp > dec->max_v_samp) dec->max_v_samp = comp->v_samp;
    417 
    418         if (comp->quant_table_id >= JPEG_MAX_QUANT_TABLES) {
    419             dec->error = 1;
    420             snprintf(dec->error_msg, sizeof(dec->error_msg),
    421                      "Invalid quantization table ID: %d", comp->quant_table_id);
    422             return -1;
    423         }
    424     }
    425 
    426     /* Calculate MCU dimensions */
    427     dec->mcu_width = dec->max_h_samp * 8;
    428     dec->mcu_height = dec->max_v_samp * 8;
    429     dec->mcus_per_row = (dec->width + dec->mcu_width - 1) / dec->mcu_width;
    430     dec->mcu_rows = (dec->height + dec->mcu_height - 1) / dec->mcu_height;
    431 
    432     /* Allocate component buffers */
    433     for (int i = 0; i < dec->num_components; i++) {
    434         jpeg_component_t* comp = &dec->components[i];
    435 
    436         /* Calculate component dimensions */
    437         comp->width = dec->mcus_per_row * comp->h_samp * 8;
    438         comp->height = dec->mcu_rows * comp->v_samp * 8;
    439         comp->stride = comp->width;
    440 
    441         comp->data = (uint8_t*)calloc(comp->width * comp->height, 1);
    442         if (!comp->data) {
    443             dec->error = 1;
    444             snprintf(dec->error_msg, sizeof(dec->error_msg), "Out of memory");
    445             return -1;
    446         }
    447     }
    448 
    449     /* Allocate coefficient blocks for progressive JPEG */
    450     if (dec->is_progressive) {
    451         int total_blocks = 0;
    452         for (int i = 0; i < dec->num_components; i++) {
    453             jpeg_component_t* comp = &dec->components[i];
    454             int blocks_x = comp->width / 8;
    455             int blocks_y = comp->height / 8;
    456             total_blocks += blocks_x * blocks_y;
    457         }
    458 
    459         dec->coef_blocks = (int16_t**)calloc(total_blocks, sizeof(int16_t*));
    460         if (!dec->coef_blocks) {
    461             dec->error = 1;
    462             snprintf(dec->error_msg, sizeof(dec->error_msg), "Out of memory");
    463             return -1;
    464         }
    465 
    466         for (int i = 0; i < total_blocks; i++) {
    467             dec->coef_blocks[i] = (int16_t*)calloc(64, sizeof(int16_t));
    468             if (!dec->coef_blocks[i]) {
    469                 dec->error = 1;
    470                 snprintf(dec->error_msg, sizeof(dec->error_msg), "Out of memory");
    471                 return -1;
    472             }
    473         }
    474         dec->coef_blocks_count = total_blocks;
    475     }
    476 
    477     return 0;
    478 }
    479 
    480 static int jpeg_parse_dri(jpeg_decoder_t* dec) {
    481     uint16_t length = read_be16(dec->data + dec->pos);
    482     dec->pos += 2;
    483     (void)length;
    484 
    485     dec->restart_interval = read_be16(dec->data + dec->pos);
    486     dec->pos += 2;
    487 
    488     return 0;
    489 }
    490 
    491 static int jpeg_parse_sos(jpeg_decoder_t* dec, jpeg_scan_t* scan) {
    492     uint16_t length = read_be16(dec->data + dec->pos);
    493     dec->pos += 2;
    494     (void)length;
    495 
    496     scan->num_components = dec->data[dec->pos++];
    497 
    498     for (int i = 0; i < scan->num_components; i++) {
    499         uint8_t comp_id = dec->data[dec->pos++];
    500         uint8_t table_ids = dec->data[dec->pos++];
    501 
    502         scan->component_ids[i] = comp_id;
    503 
    504         /* Find matching component */
    505         for (int j = 0; j < dec->num_components; j++) {
    506             if (dec->components[j].id == comp_id) {
    507                 dec->components[j].dc_table_id = (table_ids >> 4) & 0x0F;
    508                 dec->components[j].ac_table_id = table_ids & 0x0F;
    509                 break;
    510             }
    511         }
    512     }
    513 
    514     scan->ss = dec->data[dec->pos++];  /* Start of spectral selection */
    515     scan->se = dec->data[dec->pos++];  /* End of spectral selection */
    516     uint8_t approx = dec->data[dec->pos++];
    517     scan->ah = (approx >> 4) & 0x0F;   /* Successive approximation high */
    518     scan->al = approx & 0x0F;          /* Successive approximation low */
    519 
    520     return 0;
    521 }
    522 
    523 int jpeg_parse_markers(jpeg_decoder_t* dec, const uint8_t* data, uint32_t size) {
    524     dec->data = data;
    525     dec->data_size = size;
    526     dec->pos = 0;
    527 
    528     /* Check SOI marker */
    529     if (size < 2 || data[0] != 0xFF || data[1] != 0xD8) {
    530         dec->error = 1;
    531         snprintf(dec->error_msg, sizeof(dec->error_msg), "Invalid JPEG: missing SOI marker");
    532         return -1;
    533     }
    534     dec->pos = 2;
    535 
    536     /* Parse markers */
    537     while (dec->pos < size - 1) {
    538         /* Find marker */
    539         if (dec->data[dec->pos] != 0xFF) {
    540             dec->pos++;
    541             continue;
    542         }
    543 
    544         uint8_t marker = dec->data[dec->pos + 1];
    545         dec->pos += 2;
    546 
    547         /* Skip padding bytes */
    548         if (marker == 0xFF || marker == 0x00) {
    549             continue;
    550         }
    551 
    552         switch (marker) {
    553             case 0xD8:  /* SOI - already handled */
    554                 break;
    555 
    556             case 0xD9:  /* EOI */
    557                 return 0;
    558 
    559             case 0xDB:  /* DQT */
    560                 if (jpeg_parse_dqt(dec) < 0) return -1;
    561                 break;
    562 
    563             case 0xC4:  /* DHT */
    564                 if (jpeg_parse_dht(dec) < 0) return -1;
    565                 break;
    566 
    567             case 0xC0:  /* SOF0 - Baseline DCT */
    568             case 0xC1:  /* SOF1 - Extended Sequential DCT */
    569             case 0xC2:  /* SOF2 - Progressive DCT */
    570                 if (jpeg_parse_sof(dec, marker) < 0) return -1;
    571                 break;
    572 
    573             case 0xC3:  /* SOF3 - Lossless */
    574             case 0xC5:  /* SOF5 */
    575             case 0xC6:  /* SOF6 */
    576             case 0xC7:  /* SOF7 */
    577             case 0xC9:  /* SOF9 */
    578             case 0xCA:  /* SOF10 */
    579             case 0xCB:  /* SOF11 */
    580             case 0xCD:  /* SOF13 */
    581             case 0xCE:  /* SOF14 */
    582             case 0xCF:  /* SOF15 */
    583                 if (marker == 0xCA) {
    584                     /* Progressive with arithmetic coding */
    585                     if (jpeg_parse_sof(dec, marker) < 0) return -1;
    586                 } else {
    587                     dec->error = 1;
    588                     snprintf(dec->error_msg, sizeof(dec->error_msg),
    589                              "Unsupported JPEG type: SOF%d", marker - 0xC0);
    590                     return -1;
    591                 }
    592                 break;
    593 
    594             case 0xDD:  /* DRI */
    595                 if (jpeg_parse_dri(dec) < 0) return -1;
    596                 break;
    597 
    598             case 0xDA:  /* SOS - Start of Scan */
    599                 return 0;  /* Stop parsing, scan data follows */
    600 
    601             case 0xE0: case 0xE1: case 0xE2: case 0xE3:
    602             case 0xE4: case 0xE5: case 0xE6: case 0xE7:
    603             case 0xE8: case 0xE9: case 0xEA: case 0xEB:
    604             case 0xEC: case 0xED: case 0xEE: case 0xEF:
    605             case 0xFE:  /* APP and COM markers - skip */
    606             {
    607                 uint16_t length = read_be16(dec->data + dec->pos);
    608                 dec->pos += length;
    609                 break;
    610             }
    611 
    612             case 0xD0: case 0xD1: case 0xD2: case 0xD3:
    613             case 0xD4: case 0xD5: case 0xD6: case 0xD7:
    614                 /* RST markers - no length */
    615                 break;
    616 
    617             default:
    618                 /* Unknown marker - try to skip */
    619                 if (dec->pos + 2 <= size) {
    620                     uint16_t length = read_be16(dec->data + dec->pos);
    621                     dec->pos += length;
    622                 }
    623                 break;
    624         }
    625     }
    626 
    627     return 0;
    628 }
    629 
    630 /* ============================================================
    631  * IDCT Implementation (AAN algorithm)
    632  * ============================================================ */
    633 
    634 void jpeg_idct_block(int16_t* block, uint8_t* output, int stride) {
    635     int tmp[64];
    636     int* tmpptr;
    637     int16_t* blkptr;
    638     int i;
    639 
    640     /* Pass 1: Process columns */
    641     blkptr = block;
    642     tmpptr = tmp;
    643     for (i = 0; i < 8; i++) {
    644         int s0 = blkptr[0*8];
    645         int s1 = blkptr[1*8];
    646         int s2 = blkptr[2*8];
    647         int s3 = blkptr[3*8];
    648         int s4 = blkptr[4*8];
    649         int s5 = blkptr[5*8];
    650         int s6 = blkptr[6*8];
    651         int s7 = blkptr[7*8];
    652 
    653         /* Check for all-zero AC coefficients */
    654         if ((s1 | s2 | s3 | s4 | s5 | s6 | s7) == 0) {
    655             int dc = s0 << 2;
    656             tmpptr[0*8] = dc;
    657             tmpptr[1*8] = dc;
    658             tmpptr[2*8] = dc;
    659             tmpptr[3*8] = dc;
    660             tmpptr[4*8] = dc;
    661             tmpptr[5*8] = dc;
    662             tmpptr[6*8] = dc;
    663             tmpptr[7*8] = dc;
    664         } else {
    665             /* Even part */
    666             int t0 = s0 + s4;
    667             int t1 = s0 - s4;
    668             int t2 = (s2 * C6 - s6 * C2) >> 14;
    669             int t3 = (s2 * C2 + s6 * C6) >> 14;
    670 
    671             int e0 = t0 + t3;
    672             int e1 = t1 + t2;
    673             int e2 = t1 - t2;
    674             int e3 = t0 - t3;
    675 
    676             /* Odd part */
    677             int t4 = (s1 * C7 - s7 * C1) >> 14;
    678             int t5 = (s5 * C3 - s3 * C5) >> 14;
    679             int t6 = (s5 * C5 + s3 * C3) >> 14;
    680             int t7 = (s1 * C1 + s7 * C7) >> 14;
    681 
    682             int o0 = t4 + t5;
    683             int o1 = t7 - t6;
    684             int o2 = ((t4 - t5 + t7 + t6) * C4) >> 14;
    685             int o3 = t7 + t6;
    686 
    687             int o4 = o2 - o0;
    688             int o5 = o1;
    689 
    690             /* Final output */
    691             tmpptr[0*8] = (e0 + o3) << 2;
    692             tmpptr[7*8] = (e0 - o3) << 2;
    693             tmpptr[1*8] = (e1 + o4) << 2;
    694             tmpptr[6*8] = (e1 - o4) << 2;
    695             tmpptr[2*8] = (e2 + o5) << 2;
    696             tmpptr[5*8] = (e2 - o5) << 2;
    697             tmpptr[3*8] = (e3 + o0 - o2 + o1) << 2;
    698             tmpptr[4*8] = (e3 - o0 + o2 - o1) << 2;
    699         }
    700 
    701         blkptr++;
    702         tmpptr++;
    703     }
    704 
    705     /* Pass 2: Process rows */
    706     tmpptr = tmp;
    707     for (i = 0; i < 8; i++) {
    708         int s0 = tmpptr[0];
    709         int s1 = tmpptr[1];
    710         int s2 = tmpptr[2];
    711         int s3 = tmpptr[3];
    712         int s4 = tmpptr[4];
    713         int s5 = tmpptr[5];
    714         int s6 = tmpptr[6];
    715         int s7 = tmpptr[7];
    716 
    717         /* Even part */
    718         int t0 = s0 + s4;
    719         int t1 = s0 - s4;
    720         int t2 = (s2 * C6 - s6 * C2) >> 14;
    721         int t3 = (s2 * C2 + s6 * C6) >> 14;
    722 
    723         int e0 = t0 + t3;
    724         int e1 = t1 + t2;
    725         int e2 = t1 - t2;
    726         int e3 = t0 - t3;
    727 
    728         /* Odd part */
    729         int t4 = (s1 * C7 - s7 * C1) >> 14;
    730         int t5 = (s5 * C3 - s3 * C5) >> 14;
    731         int t6 = (s5 * C5 + s3 * C3) >> 14;
    732         int t7 = (s1 * C1 + s7 * C7) >> 14;
    733 
    734         int o0 = t4 + t5;
    735         int o1 = t7 - t6;
    736         int o2 = ((t4 - t5 + t7 + t6) * C4) >> 14;
    737         int o3 = t7 + t6;
    738 
    739         int o4 = o2 - o0;
    740         int o5 = o1;
    741 
    742         /* Final output with level shift (add 128) and clamp */
    743         output[0] = clip_uint8(((e0 + o3) >> 5) + 128);
    744         output[7] = clip_uint8(((e0 - o3) >> 5) + 128);
    745         output[1] = clip_uint8(((e1 + o4) >> 5) + 128);
    746         output[6] = clip_uint8(((e1 - o4) >> 5) + 128);
    747         output[2] = clip_uint8(((e2 + o5) >> 5) + 128);
    748         output[5] = clip_uint8(((e2 - o5) >> 5) + 128);
    749         output[3] = clip_uint8(((e3 + o0 - o2 + o1) >> 5) + 128);
    750         output[4] = clip_uint8(((e3 - o0 + o2 - o1) >> 5) + 128);
    751 
    752         tmpptr += 8;
    753         output += stride;
    754     }
    755 }
    756 
    757 /* ============================================================
    758  * Block Decoding
    759  * ============================================================ */
    760 
    761 static int jpeg_decode_block(jpeg_decoder_t* dec, int16_t* block,
    762                              jpeg_component_t* comp, int is_dc_only) {
    763     jpeg_huffman_table_t* dc_table = &dec->dc_tables[comp->dc_table_id];
    764     jpeg_huffman_table_t* ac_table = &dec->ac_tables[comp->ac_table_id];
    765     jpeg_quant_table_t* qt = &dec->quant_tables[comp->quant_table_id];
    766 
    767     memset(block, 0, 64 * sizeof(int16_t));
    768 
    769     /* Decode DC coefficient */
    770     int dc_size = jpeg_decode_huffman(dec, dc_table);
    771     if (dc_size < 0) return -1;
    772 
    773     int dc_diff = jpeg_receive_extend(dec, dc_size);
    774     comp->dc_pred += dc_diff;
    775     block[0] = comp->dc_pred * qt->table[0];
    776 
    777     if (is_dc_only) return 0;
    778 
    779     /* Decode AC coefficients */
    780     int k = 1;
    781     while (k < 64) {
    782         int rs = jpeg_decode_huffman(dec, ac_table);
    783         if (rs < 0) return -1;
    784 
    785         int r = (rs >> 4) & 0x0F;  /* Run length */
    786         int s = rs & 0x0F;          /* Size */
    787 
    788         if (s == 0) {
    789             if (r == 0) {
    790                 /* EOB - End of Block */
    791                 break;
    792             } else if (r == 15) {
    793                 /* ZRL - Zero Run Length (16 zeros) */
    794                 k += 16;
    795             } else {
    796                 break;
    797             }
    798         } else {
    799             k += r;
    800             if (k >= 64) break;
    801 
    802             int ac = jpeg_receive_extend(dec, s);
    803             int zz_index = jpeg_zigzag[k];
    804             block[zz_index] = ac * qt->table[zz_index];
    805             k++;
    806         }
    807     }
    808 
    809     return 0;
    810 }
    811 
    812 /* ============================================================
    813  * Progressive JPEG Decoding
    814  * ============================================================ */
    815 
    816 int jpeg_decode_dc_first(jpeg_decoder_t* dec, jpeg_component_t* comp, int16_t* block) {
    817     jpeg_huffman_table_t* dc_table = &dec->dc_tables[comp->dc_table_id];
    818 
    819     int dc_size = jpeg_decode_huffman(dec, dc_table);
    820     if (dc_size < 0) return -1;
    821 
    822     int dc_diff = jpeg_receive_extend(dec, dc_size);
    823     comp->dc_pred += dc_diff;
    824     block[0] = comp->dc_pred;
    825 
    826     return 0;
    827 }
    828 
    829 int jpeg_decode_dc_refine(jpeg_decoder_t* dec, int16_t* block, int al) {
    830     int bit = jpeg_get_bits(dec, 1);
    831     if (bit < 0) return -1;
    832 
    833     block[0] |= (bit << al);
    834     return 0;
    835 }
    836 
    837 int jpeg_decode_ac_first(jpeg_decoder_t* dec, jpeg_component_t* comp,
    838                          int16_t* block, int ss, int se) {
    839     jpeg_huffman_table_t* ac_table = &dec->ac_tables[comp->ac_table_id];
    840 
    841     int k = ss;
    842     while (k <= se) {
    843         int rs = jpeg_decode_huffman(dec, ac_table);
    844         if (rs < 0) return -1;
    845 
    846         int r = (rs >> 4) & 0x0F;
    847         int s = rs & 0x0F;
    848 
    849         if (s == 0) {
    850             if (r == 15) {
    851                 k += 16;
    852             } else {
    853                 /* EOB run */
    854                 break;
    855             }
    856         } else {
    857             k += r;
    858             if (k > se) break;
    859 
    860             int ac = jpeg_receive_extend(dec, s);
    861             block[jpeg_zigzag[k]] = ac;
    862             k++;
    863         }
    864     }
    865 
    866     return 0;
    867 }
    868 
    869 int jpeg_decode_ac_refine(jpeg_decoder_t* dec, int16_t* block,
    870                           int ss, int se, int al) {
    871     int k = ss;
    872 
    873     while (k <= se) {
    874         int bit = jpeg_get_bits(dec, 1);
    875         if (bit < 0) return -1;
    876 
    877         int zz = jpeg_zigzag[k];
    878         if (block[zz] != 0) {
    879             block[zz] |= (bit << al);
    880         }
    881         k++;
    882     }
    883 
    884     return 0;
    885 }
    886 
    887 /* ============================================================
    888  * YCbCr to RGB Conversion
    889  * ============================================================ */
    890 
    891 void jpeg_ycbcr_to_rgb(jpeg_decoder_t* dec, image_t* img) {
    892     if (dec->num_components == 1) {
    893         /* Grayscale */
    894         jpeg_component_t* y_comp = &dec->components[0];
    895 
    896         for (uint32_t py = 0; py < img->height; py++) {
    897             for (uint32_t px = 0; px < img->width; px++) {
    898                 int y_x = px * y_comp->h_samp / dec->max_h_samp;
    899                 int y_y = py * y_comp->v_samp / dec->max_v_samp;
    900 
    901                 uint8_t y = y_comp->data[y_y * y_comp->stride + y_x];
    902 
    903                 image_set_pixel(img, px, py, y, y, y, 255);
    904             }
    905         }
    906     } else if (dec->num_components >= 3) {
    907         /* YCbCr to RGB */
    908         jpeg_component_t* y_comp = &dec->components[0];
    909         jpeg_component_t* cb_comp = &dec->components[1];
    910         jpeg_component_t* cr_comp = &dec->components[2];
    911 
    912         for (uint32_t py = 0; py < img->height; py++) {
    913             for (uint32_t px = 0; px < img->width; px++) {
    914                 /* Calculate sample positions for each component */
    915                 int y_x = px * y_comp->h_samp / dec->max_h_samp;
    916                 int y_y = py * y_comp->v_samp / dec->max_v_samp;
    917 
    918                 int cb_x = px * cb_comp->h_samp / dec->max_h_samp;
    919                 int cb_y = py * cb_comp->v_samp / dec->max_v_samp;
    920 
    921                 int cr_x = px * cr_comp->h_samp / dec->max_h_samp;
    922                 int cr_y = py * cr_comp->v_samp / dec->max_v_samp;
    923 
    924                 /* Clamp to valid range */
    925                 if (y_x >= y_comp->width) y_x = y_comp->width - 1;
    926                 if (y_y >= y_comp->height) y_y = y_comp->height - 1;
    927                 if (cb_x >= cb_comp->width) cb_x = cb_comp->width - 1;
    928                 if (cb_y >= cb_comp->height) cb_y = cb_comp->height - 1;
    929                 if (cr_x >= cr_comp->width) cr_x = cr_comp->width - 1;
    930                 if (cr_y >= cr_comp->height) cr_y = cr_comp->height - 1;
    931 
    932                 int y = y_comp->data[y_y * y_comp->stride + y_x];
    933                 int cb = cb_comp->data[cb_y * cb_comp->stride + cb_x] - 128;
    934                 int cr = cr_comp->data[cr_y * cr_comp->stride + cr_x] - 128;
    935 
    936                 /* YCbCr to RGB conversion (ITU-R BT.601) */
    937                 /* R = Y + 1.402 * Cr */
    938                 /* G = Y - 0.344136 * Cb - 0.714136 * Cr */
    939                 /* B = Y + 1.772 * Cb */
    940 
    941                 /* Using fixed-point arithmetic (scaled by 2^16) */
    942                 int r = y + ((91881 * cr) >> 16);
    943                 int g = y - ((22554 * cb + 46802 * cr) >> 16);
    944                 int b = y + ((116130 * cb) >> 16);
    945 
    946                 image_set_pixel(img, px, py,
    947                                clip_uint8(r),
    948                                clip_uint8(g),
    949                                clip_uint8(b), 255);
    950             }
    951         }
    952     }
    953 }
    954 
    955 /* ============================================================
    956  * Scan Decoding
    957  * ============================================================ */
    958 
    959 int jpeg_decode_scan(jpeg_decoder_t* dec) {
    960     jpeg_scan_t scan;
    961 
    962     if (jpeg_parse_sos(dec, &scan) < 0) {
    963         return -1;
    964     }
    965 
    966     /* Initialize bit reader */
    967     dec->bits_buffer = 0;
    968     dec->bits_count = 0;
    969 
    970     /* Reset DC predictors */
    971     for (int i = 0; i < dec->num_components; i++) {
    972         dec->components[i].dc_pred = 0;
    973     }
    974 
    975     int mcu_count = 0;
    976     int restart_count = dec->restart_interval;
    977 
    978     /* Non-interleaved scan (single component) */
    979     if (scan.num_components == 1) {
    980         /* Find component */
    981         jpeg_component_t* comp = NULL;
    982         int comp_idx = 0;
    983         for (int i = 0; i < dec->num_components; i++) {
    984             if (dec->components[i].id == scan.component_ids[0]) {
    985                 comp = &dec->components[i];
    986                 comp_idx = i;
    987                 break;
    988             }
    989         }
    990 
    991         if (!comp) {
    992             dec->error = 1;
    993             snprintf(dec->error_msg, sizeof(dec->error_msg), "Component not found");
    994             return -1;
    995         }
    996 
    997         int blocks_x = comp->width / 8;
    998         int blocks_y = comp->height / 8;
    999 
   1000         for (int by = 0; by < blocks_y; by++) {
   1001             for (int bx = 0; bx < blocks_x; bx++) {
   1002                 int16_t block[64];
   1003 
   1004                 if (dec->is_progressive) {
   1005                     int block_idx = 0;
   1006                     for (int c = 0; c < comp_idx; c++) {
   1007                         block_idx += (dec->components[c].width / 8) *
   1008                                     (dec->components[c].height / 8);
   1009                     }
   1010                     block_idx += by * blocks_x + bx;
   1011                     int16_t* coef_block = dec->coef_blocks[block_idx];
   1012 
   1013                     if (scan.ss == 0 && scan.se == 0) {
   1014                         /* DC scan */
   1015                         if (scan.ah == 0) {
   1016                             if (jpeg_decode_dc_first(dec, comp, coef_block) < 0) return -1;
   1017                             coef_block[0] <<= scan.al;
   1018                         } else {
   1019                             if (jpeg_decode_dc_refine(dec, coef_block, scan.al) < 0) return -1;
   1020                         }
   1021                     } else {
   1022                         /* AC scan */
   1023                         if (scan.ah == 0) {
   1024                             if (jpeg_decode_ac_first(dec, comp, coef_block, scan.ss, scan.se) < 0) return -1;
   1025                             /* Shift all AC coefficients */
   1026                             for (int k = scan.ss; k <= scan.se; k++) {
   1027                                 coef_block[jpeg_zigzag[k]] <<= scan.al;
   1028                             }
   1029                         } else {
   1030                             if (jpeg_decode_ac_refine(dec, coef_block, scan.ss, scan.se, scan.al) < 0) return -1;
   1031                         }
   1032                     }
   1033                 } else {
   1034                     /* Baseline/Sequential */
   1035                     if (jpeg_decode_block(dec, block, comp, 0) < 0) return -1;
   1036 
   1037                     /* IDCT and store */
   1038                     uint8_t* out = comp->data + by * 8 * comp->stride + bx * 8;
   1039                     jpeg_idct_block(block, out, comp->stride);
   1040                 }
   1041 
   1042                 /* Handle restart */
   1043                 if (dec->restart_interval > 0) {
   1044                     mcu_count++;
   1045                     if (mcu_count >= restart_count && (by < blocks_y - 1 || bx < blocks_x - 1)) {
   1046                         jpeg_align_bits(dec);
   1047                         jpeg_next_marker(dec);
   1048                         dec->bits_buffer = 0;
   1049                         dec->bits_count = 0;
   1050                         comp->dc_pred = 0;
   1051                         mcu_count = 0;
   1052                     }
   1053                 }
   1054             }
   1055         }
   1056     } else {
   1057         /* Interleaved scan */
   1058         for (int mcu_y = 0; mcu_y < dec->mcu_rows; mcu_y++) {
   1059             for (int mcu_x = 0; mcu_x < dec->mcus_per_row; mcu_x++) {
   1060                 /* Process each component in the MCU */
   1061                 for (int c = 0; c < scan.num_components; c++) {
   1062                     /* Find component */
   1063                     jpeg_component_t* comp = NULL;
   1064                     for (int i = 0; i < dec->num_components; i++) {
   1065                         if (dec->components[i].id == scan.component_ids[c]) {
   1066                             comp = &dec->components[i];
   1067                             break;
   1068                         }
   1069                     }
   1070 
   1071                     if (!comp) continue;
   1072 
   1073                     /* Process blocks for this component in the MCU */
   1074                     for (int v = 0; v < comp->v_samp; v++) {
   1075                         for (int h = 0; h < comp->h_samp; h++) {
   1076                             int16_t block[64];
   1077 
   1078                             int bx = mcu_x * comp->h_samp + h;
   1079                             int by = mcu_y * comp->v_samp + v;
   1080 
   1081                             if (bx * 8 >= comp->width || by * 8 >= comp->height) {
   1082                                 /* Skip blocks outside component dimensions */
   1083                                 if (jpeg_decode_block(dec, block, comp, 0) < 0) return -1;
   1084                                 continue;
   1085                             }
   1086 
   1087                             if (jpeg_decode_block(dec, block, comp, 0) < 0) return -1;
   1088 
   1089                             /* IDCT and store */
   1090                             uint8_t* out = comp->data + by * 8 * comp->stride + bx * 8;
   1091                             jpeg_idct_block(block, out, comp->stride);
   1092                         }
   1093                     }
   1094                 }
   1095 
   1096                 /* Handle restart */
   1097                 if (dec->restart_interval > 0) {
   1098                     mcu_count++;
   1099                     if (mcu_count >= (int)dec->restart_interval &&
   1100                         (mcu_y < dec->mcu_rows - 1 || mcu_x < dec->mcus_per_row - 1)) {
   1101                         jpeg_align_bits(dec);
   1102                         jpeg_next_marker(dec);
   1103                         dec->bits_buffer = 0;
   1104                         dec->bits_count = 0;
   1105 
   1106                         /* Reset DC predictors */
   1107                         for (int i = 0; i < dec->num_components; i++) {
   1108                             dec->components[i].dc_pred = 0;
   1109                         }
   1110                         mcu_count = 0;
   1111                     }
   1112                 }
   1113             }
   1114         }
   1115     }
   1116 
   1117     return 0;
   1118 }
   1119 
   1120 /* ============================================================
   1121  * Progressive Final Processing
   1122  * ============================================================ */
   1123 
   1124 static void jpeg_progressive_finish(jpeg_decoder_t* dec) {
   1125     int block_idx = 0;
   1126 
   1127     for (int c = 0; c < dec->num_components; c++) {
   1128         jpeg_component_t* comp = &dec->components[c];
   1129         jpeg_quant_table_t* qt = &dec->quant_tables[comp->quant_table_id];
   1130 
   1131         int blocks_x = comp->width / 8;
   1132         int blocks_y = comp->height / 8;
   1133 
   1134         for (int by = 0; by < blocks_y; by++) {
   1135             for (int bx = 0; bx < blocks_x; bx++) {
   1136                 int16_t* coef_block = dec->coef_blocks[block_idx++];
   1137                 int16_t block[64];
   1138 
   1139                 /* Dequantize */
   1140                 for (int k = 0; k < 64; k++) {
   1141                     block[k] = coef_block[k] * qt->table[k];
   1142                 }
   1143 
   1144                 /* IDCT and store */
   1145                 uint8_t* out = comp->data + by * 8 * comp->stride + bx * 8;
   1146                 jpeg_idct_block(block, out, comp->stride);
   1147             }
   1148         }
   1149     }
   1150 }
   1151 
   1152 /* ============================================================
   1153  * Main Decode Function
   1154  * ============================================================ */
   1155 
   1156 image_t* jpeg_decode(const uint8_t* data, uint32_t data_size) {
   1157     if (!data || data_size < 2) return NULL;
   1158 
   1159     jpeg_decoder_t* dec = jpeg_decoder_create();
   1160     if (!dec) return NULL;
   1161 
   1162     /* Parse markers up to first SOS */
   1163     if (jpeg_parse_markers(dec, data, data_size) < 0) {
   1164         jpeg_decoder_destroy(dec);
   1165         return NULL;
   1166     }
   1167 
   1168     /* Validate we have what we need */
   1169     if (dec->width == 0 || dec->height == 0) {
   1170         jpeg_decoder_destroy(dec);
   1171         return NULL;
   1172     }
   1173 
   1174     /* Decode scans */
   1175     if (dec->is_progressive) {
   1176         /* Progressive: may have multiple scans */
   1177         while (dec->pos < data_size - 1) {
   1178             /* Find SOS marker */
   1179             if (dec->data[dec->pos - 2] == 0xFF && dec->data[dec->pos - 1] == 0xDA) {
   1180                 if (jpeg_decode_scan(dec) < 0) {
   1181                     jpeg_decoder_destroy(dec);
   1182                     return NULL;
   1183                 }
   1184             }
   1185 
   1186             /* Find next marker */
   1187             int marker = jpeg_next_marker(dec);
   1188             if (marker < 0 || marker == JPEG_EOI) break;
   1189 
   1190             if (marker == JPEG_DHT) {
   1191                 dec->pos -= 2;
   1192                 jpeg_parse_dht(dec);
   1193             } else if (marker == JPEG_SOS) {
   1194                 dec->pos -= 2;
   1195             }
   1196         }
   1197 
   1198         /* Final IDCT pass */
   1199         jpeg_progressive_finish(dec);
   1200     } else {
   1201         /* Baseline/Sequential: single scan */
   1202         if (jpeg_decode_scan(dec) < 0) {
   1203             jpeg_decoder_destroy(dec);
   1204             return NULL;
   1205         }
   1206     }
   1207 
   1208     /* Create output image */
   1209     image_t* img = image_create(dec->width, dec->height, 24);
   1210     if (!img) {
   1211         jpeg_decoder_destroy(dec);
   1212         return NULL;
   1213     }
   1214 
   1215     /* Convert to RGB */
   1216     jpeg_ycbcr_to_rgb(dec, img);
   1217 
   1218     jpeg_decoder_destroy(dec);
   1219     return img;
   1220 }
   1221 
   1222 image_t* jpeg_load_file(const char* filename) {
   1223     (void)filename;
   1224     /* TODO: Implement file loading once filesystem is available */
   1225     return NULL;
   1226 }
   1227 
   1228 /* ============================================================
   1229  * Lua Bindings
   1230  * ============================================================ */
   1231 
   1232 int lua_jpeg_load(lua_State* L) {
   1233     size_t data_size;
   1234     const uint8_t* data = (const uint8_t*)luaL_checklstring(L, 1, &data_size);
   1235 
   1236     image_t* img = jpeg_decode(data, (uint32_t)data_size);
   1237 
   1238     if (img) {
   1239         lua_pushlightuserdata(L, img);
   1240         return 1;
   1241     } else {
   1242         lua_pushnil(L);
   1243         lua_pushstring(L, "Failed to decode JPEG");
   1244         return 2;
   1245     }
   1246 }