decoder_JPEG.c (39064B)
1 /* 2 * JPEG Decoder Implementation 3 * Supports: Baseline DCT, Extended Sequential DCT, Progressive DCT 4 * Chroma subsampling: 4:4:4, 4:2:2, 4:2:0, 4:1:1 5 * 6 * References: 7 * - ITU-T T.81 (JPEG Standard) 8 * - ISO/IEC 10918-1 9 */ 10 11 #include "decoder_JPEG.h" 12 #include <stdlib.h> 13 #include <string.h> 14 #include <lauxlib.h> 15 16 /* Zigzag ordering table */ 17 const uint8_t jpeg_zigzag[64] = { 18 0, 1, 8, 16, 9, 2, 3, 10, 19 17, 24, 32, 25, 18, 11, 4, 5, 20 12, 19, 26, 33, 40, 48, 41, 34, 21 27, 20, 13, 6, 7, 14, 21, 28, 22 35, 42, 49, 56, 57, 50, 43, 36, 23 29, 22, 15, 23, 30, 37, 44, 51, 24 58, 59, 52, 45, 38, 31, 39, 46, 25 53, 60, 61, 54, 47, 55, 62, 63 26 }; 27 28 /* Inverse zigzag (zigzag to normal order) */ 29 const uint8_t jpeg_unzigzag[64] = { 30 0, 1, 5, 6, 14, 15, 27, 28, 31 2, 4, 7, 13, 16, 26, 29, 42, 32 3, 8, 12, 17, 25, 30, 41, 43, 33 9, 11, 18, 24, 31, 40, 44, 53, 34 10, 19, 23, 32, 39, 45, 52, 54, 35 20, 22, 33, 38, 46, 51, 55, 60, 36 21, 34, 37, 47, 50, 56, 59, 61, 37 35, 36, 48, 49, 57, 58, 62, 63 38 }; 39 40 /* IDCT constants - scaled by 2^12 for fixed-point math */ 41 #define IDCT_SCALE 4096 42 #define IDCT_ROUND 2048 43 44 /* Cosine values for IDCT, scaled by 2^14 */ 45 #define C1 16069 /* cos(1*pi/16) * 2^14 */ 46 #define C2 15137 /* cos(2*pi/16) * 2^14 */ 47 #define C3 13623 /* cos(3*pi/16) * 2^14 */ 48 #define C4 11585 /* cos(4*pi/16) * 2^14 = sqrt(2) * 2^13 */ 49 #define C5 9102 /* cos(5*pi/16) * 2^14 */ 50 #define C6 6270 /* cos(6*pi/16) * 2^14 */ 51 #define C7 3196 /* cos(7*pi/16) * 2^14 */ 52 53 /* Clip value to 0-255 range */ 54 static inline uint8_t clip_uint8(int x) { 55 if (x < 0) return 0; 56 if (x > 255) return 255; 57 return (uint8_t)x; 58 } 59 60 /* Read 16-bit big-endian value */ 61 static inline uint16_t read_be16(const uint8_t* data) { 62 return (uint16_t)((data[0] << 8) | data[1]); 63 } 64 65 /* ============================================================ 66 * Memory Management 67 * ============================================================ */ 68 69 jpeg_decoder_t* jpeg_decoder_create(void) { 70 jpeg_decoder_t* dec = (jpeg_decoder_t*)calloc(1, sizeof(jpeg_decoder_t)); 71 if (!dec) return NULL; 72 73 /* Initialize all table validity flags to 0 */ 74 memset(dec->dc_table_valid, 0, sizeof(dec->dc_table_valid)); 75 memset(dec->ac_table_valid, 0, sizeof(dec->ac_table_valid)); 76 memset(dec->quant_table_valid, 0, sizeof(dec->quant_table_valid)); 77 78 return dec; 79 } 80 81 void jpeg_decoder_destroy(jpeg_decoder_t* dec) { 82 if (!dec) return; 83 84 /* Free component data */ 85 for (int i = 0; i < JPEG_MAX_COMPONENTS; i++) { 86 if (dec->components[i].data) { 87 free(dec->components[i].data); 88 } 89 } 90 91 /* Free progressive coefficient blocks */ 92 if (dec->coef_blocks) { 93 for (int i = 0; i < dec->coef_blocks_count; i++) { 94 if (dec->coef_blocks[i]) { 95 free(dec->coef_blocks[i]); 96 } 97 } 98 free(dec->coef_blocks); 99 } 100 101 free(dec); 102 } 103 104 /* ============================================================ 105 * Bit Reading 106 * ============================================================ */ 107 108 /* Fill bit buffer from stream */ 109 static int jpeg_fill_bits(jpeg_decoder_t* dec) { 110 while (dec->bits_count < 24 && dec->pos < dec->data_size) { 111 uint8_t byte = dec->data[dec->pos++]; 112 113 /* Handle byte stuffing (0xFF followed by 0x00) */ 114 if (byte == 0xFF) { 115 if (dec->pos >= dec->data_size) { 116 dec->error = 1; 117 return -1; 118 } 119 uint8_t next = dec->data[dec->pos]; 120 if (next == 0x00) { 121 /* Stuffed byte, skip the 0x00 */ 122 dec->pos++; 123 } else if (next >= 0xD0 && next <= 0xD7) { 124 /* Restart marker - ignore and continue */ 125 dec->pos++; 126 continue; 127 } else { 128 /* Other marker - we've hit the end of scan data */ 129 dec->pos--; 130 break; 131 } 132 } 133 134 dec->bits_buffer = (dec->bits_buffer << 8) | byte; 135 dec->bits_count += 8; 136 } 137 return 0; 138 } 139 140 int jpeg_get_bits(jpeg_decoder_t* dec, int nbits) { 141 if (nbits == 0) return 0; 142 143 while (dec->bits_count < nbits) { 144 if (jpeg_fill_bits(dec) < 0) return -1; 145 if (dec->bits_count < nbits) { 146 dec->error = 1; 147 return -1; 148 } 149 } 150 151 dec->bits_count -= nbits; 152 return (dec->bits_buffer >> dec->bits_count) & ((1 << nbits) - 1); 153 } 154 155 int jpeg_peek_bits(jpeg_decoder_t* dec, int nbits) { 156 if (nbits == 0) return 0; 157 158 while (dec->bits_count < nbits) { 159 if (jpeg_fill_bits(dec) < 0) return -1; 160 if (dec->bits_count < nbits) { 161 dec->error = 1; 162 return -1; 163 } 164 } 165 166 return (dec->bits_buffer >> (dec->bits_count - nbits)) & ((1 << nbits) - 1); 167 } 168 169 void jpeg_skip_bits(jpeg_decoder_t* dec, int nbits) { 170 dec->bits_count -= nbits; 171 } 172 173 void jpeg_align_bits(jpeg_decoder_t* dec) { 174 dec->bits_count &= ~7; /* Align to byte boundary */ 175 } 176 177 int jpeg_next_marker(jpeg_decoder_t* dec) { 178 /* Align to byte boundary */ 179 jpeg_align_bits(dec); 180 dec->bits_count = 0; 181 dec->bits_buffer = 0; 182 183 /* Find next marker */ 184 while (dec->pos < dec->data_size - 1) { 185 if (dec->data[dec->pos] == 0xFF) { 186 uint8_t marker = dec->data[dec->pos + 1]; 187 if (marker != 0x00 && marker != 0xFF) { 188 dec->pos += 2; 189 return 0xFF00 | marker; 190 } 191 } 192 dec->pos++; 193 } 194 195 return -1; 196 } 197 198 /* ============================================================ 199 * Huffman Table Handling 200 * ============================================================ */ 201 202 int jpeg_build_huffman_table(jpeg_huffman_table_t* table) { 203 int code = 0; 204 int si = 1; 205 int p = 0; 206 207 /* Generate huffcode and huffsize arrays */ 208 for (int i = 1; i <= 16; i++) { 209 for (int j = 0; j < table->bits[i]; j++) { 210 table->huffcode[p] = code; 211 table->huffsize[p] = i; 212 p++; 213 code++; 214 } 215 code <<= 1; 216 } 217 table->num_symbols = p; 218 219 /* Generate mincode, maxcode, and valptr */ 220 p = 0; 221 for (int i = 1; i <= 16; i++) { 222 if (table->bits[i]) { 223 table->valptr[i] = p; 224 table->mincode[i] = table->huffcode[p]; 225 p += table->bits[i]; 226 table->maxcode[i] = table->huffcode[p - 1]; 227 } else { 228 table->maxcode[i] = -1; 229 table->mincode[i] = 0; 230 table->valptr[i] = 0; 231 } 232 } 233 table->maxcode[17] = 0xFFFFFF; /* Sentinel */ 234 235 return 0; 236 } 237 238 int jpeg_decode_huffman(jpeg_decoder_t* dec, jpeg_huffman_table_t* table) { 239 int code = 0; 240 int size = 1; 241 242 /* Read bits one at a time until we find a valid code */ 243 while (size <= 16) { 244 int bit = jpeg_get_bits(dec, 1); 245 if (bit < 0) return -1; 246 247 code = (code << 1) | bit; 248 249 if (code <= table->maxcode[size]) { 250 int index = table->valptr[size] + code - table->mincode[size]; 251 return table->huffval[index]; 252 } 253 size++; 254 } 255 256 dec->error = 1; 257 snprintf(dec->error_msg, sizeof(dec->error_msg), "Invalid Huffman code"); 258 return -1; 259 } 260 261 /* Extend signed value from Huffman decoding */ 262 int jpeg_receive_extend(jpeg_decoder_t* dec, int nbits) { 263 if (nbits == 0) return 0; 264 265 int value = jpeg_get_bits(dec, nbits); 266 if (value < 0) return 0; 267 268 /* Sign extension */ 269 if (value < (1 << (nbits - 1))) { 270 value = value - (1 << nbits) + 1; 271 } 272 273 return value; 274 } 275 276 /* ============================================================ 277 * Marker Parsing 278 * ============================================================ */ 279 280 static int jpeg_parse_dqt(jpeg_decoder_t* dec) { 281 uint16_t length = read_be16(dec->data + dec->pos); 282 dec->pos += 2; 283 int remaining = length - 2; 284 285 while (remaining > 0) { 286 uint8_t info = dec->data[dec->pos++]; 287 remaining--; 288 289 int precision = (info >> 4) & 0x0F; /* 0 = 8-bit, 1 = 16-bit */ 290 int table_id = info & 0x0F; 291 292 if (table_id >= JPEG_MAX_QUANT_TABLES) { 293 dec->error = 1; 294 snprintf(dec->error_msg, sizeof(dec->error_msg), 295 "Invalid quantization table ID: %d", table_id); 296 return -1; 297 } 298 299 jpeg_quant_table_t* qt = &dec->quant_tables[table_id]; 300 qt->precision = precision; 301 302 if (precision == 0) { 303 /* 8-bit values */ 304 for (int i = 0; i < 64; i++) { 305 qt->table[jpeg_zigzag[i]] = dec->data[dec->pos++]; 306 } 307 remaining -= 64; 308 } else { 309 /* 16-bit values */ 310 for (int i = 0; i < 64; i++) { 311 qt->table[jpeg_zigzag[i]] = read_be16(dec->data + dec->pos); 312 dec->pos += 2; 313 } 314 remaining -= 128; 315 } 316 317 dec->quant_table_valid[table_id] = 1; 318 } 319 320 return 0; 321 } 322 323 static int jpeg_parse_dht(jpeg_decoder_t* dec) { 324 uint16_t length = read_be16(dec->data + dec->pos); 325 dec->pos += 2; 326 int remaining = length - 2; 327 328 while (remaining > 0) { 329 uint8_t info = dec->data[dec->pos++]; 330 remaining--; 331 332 int table_class = (info >> 4) & 0x0F; /* 0 = DC, 1 = AC */ 333 int table_id = info & 0x0F; 334 335 if (table_id >= JPEG_MAX_HUFFMAN_TABLES) { 336 dec->error = 1; 337 snprintf(dec->error_msg, sizeof(dec->error_msg), 338 "Invalid Huffman table ID: %d", table_id); 339 return -1; 340 } 341 342 jpeg_huffman_table_t* ht; 343 if (table_class == 0) { 344 ht = &dec->dc_tables[table_id]; 345 dec->dc_table_valid[table_id] = 1; 346 } else { 347 ht = &dec->ac_tables[table_id]; 348 dec->ac_table_valid[table_id] = 1; 349 } 350 351 /* Read bits counts (BITS) */ 352 int total_symbols = 0; 353 ht->bits[0] = 0; 354 for (int i = 1; i <= 16; i++) { 355 ht->bits[i] = dec->data[dec->pos++]; 356 total_symbols += ht->bits[i]; 357 remaining--; 358 } 359 360 /* Read symbols (HUFFVAL) */ 361 for (int i = 0; i < total_symbols; i++) { 362 ht->huffval[i] = dec->data[dec->pos++]; 363 remaining--; 364 } 365 366 /* Build derived tables */ 367 jpeg_build_huffman_table(ht); 368 } 369 370 return 0; 371 } 372 373 static int jpeg_parse_sof(jpeg_decoder_t* dec, uint8_t marker) { 374 uint16_t length = read_be16(dec->data + dec->pos); 375 dec->pos += 2; 376 (void)length; 377 378 dec->frame_type = marker; 379 dec->is_baseline = (marker == 0xC0); 380 dec->is_progressive = (marker == 0xC2 || marker == 0xCA); 381 382 dec->precision = dec->data[dec->pos++]; 383 dec->height = read_be16(dec->data + dec->pos); 384 dec->pos += 2; 385 dec->width = read_be16(dec->data + dec->pos); 386 dec->pos += 2; 387 dec->num_components = dec->data[dec->pos++]; 388 389 if (dec->num_components > JPEG_MAX_COMPONENTS) { 390 dec->error = 1; 391 snprintf(dec->error_msg, sizeof(dec->error_msg), 392 "Too many components: %d", dec->num_components); 393 return -1; 394 } 395 396 if (dec->precision != 8 && dec->precision != 12) { 397 dec->error = 1; 398 snprintf(dec->error_msg, sizeof(dec->error_msg), 399 "Unsupported precision: %d", dec->precision); 400 return -1; 401 } 402 403 dec->max_h_samp = 1; 404 dec->max_v_samp = 1; 405 406 for (int i = 0; i < dec->num_components; i++) { 407 jpeg_component_t* comp = &dec->components[i]; 408 comp->id = dec->data[dec->pos++]; 409 uint8_t sampling = dec->data[dec->pos++]; 410 comp->h_samp = (sampling >> 4) & 0x0F; 411 comp->v_samp = sampling & 0x0F; 412 comp->quant_table_id = dec->data[dec->pos++]; 413 comp->dc_pred = 0; 414 415 if (comp->h_samp > dec->max_h_samp) dec->max_h_samp = comp->h_samp; 416 if (comp->v_samp > dec->max_v_samp) dec->max_v_samp = comp->v_samp; 417 418 if (comp->quant_table_id >= JPEG_MAX_QUANT_TABLES) { 419 dec->error = 1; 420 snprintf(dec->error_msg, sizeof(dec->error_msg), 421 "Invalid quantization table ID: %d", comp->quant_table_id); 422 return -1; 423 } 424 } 425 426 /* Calculate MCU dimensions */ 427 dec->mcu_width = dec->max_h_samp * 8; 428 dec->mcu_height = dec->max_v_samp * 8; 429 dec->mcus_per_row = (dec->width + dec->mcu_width - 1) / dec->mcu_width; 430 dec->mcu_rows = (dec->height + dec->mcu_height - 1) / dec->mcu_height; 431 432 /* Allocate component buffers */ 433 for (int i = 0; i < dec->num_components; i++) { 434 jpeg_component_t* comp = &dec->components[i]; 435 436 /* Calculate component dimensions */ 437 comp->width = dec->mcus_per_row * comp->h_samp * 8; 438 comp->height = dec->mcu_rows * comp->v_samp * 8; 439 comp->stride = comp->width; 440 441 comp->data = (uint8_t*)calloc(comp->width * comp->height, 1); 442 if (!comp->data) { 443 dec->error = 1; 444 snprintf(dec->error_msg, sizeof(dec->error_msg), "Out of memory"); 445 return -1; 446 } 447 } 448 449 /* Allocate coefficient blocks for progressive JPEG */ 450 if (dec->is_progressive) { 451 int total_blocks = 0; 452 for (int i = 0; i < dec->num_components; i++) { 453 jpeg_component_t* comp = &dec->components[i]; 454 int blocks_x = comp->width / 8; 455 int blocks_y = comp->height / 8; 456 total_blocks += blocks_x * blocks_y; 457 } 458 459 dec->coef_blocks = (int16_t**)calloc(total_blocks, sizeof(int16_t*)); 460 if (!dec->coef_blocks) { 461 dec->error = 1; 462 snprintf(dec->error_msg, sizeof(dec->error_msg), "Out of memory"); 463 return -1; 464 } 465 466 for (int i = 0; i < total_blocks; i++) { 467 dec->coef_blocks[i] = (int16_t*)calloc(64, sizeof(int16_t)); 468 if (!dec->coef_blocks[i]) { 469 dec->error = 1; 470 snprintf(dec->error_msg, sizeof(dec->error_msg), "Out of memory"); 471 return -1; 472 } 473 } 474 dec->coef_blocks_count = total_blocks; 475 } 476 477 return 0; 478 } 479 480 static int jpeg_parse_dri(jpeg_decoder_t* dec) { 481 uint16_t length = read_be16(dec->data + dec->pos); 482 dec->pos += 2; 483 (void)length; 484 485 dec->restart_interval = read_be16(dec->data + dec->pos); 486 dec->pos += 2; 487 488 return 0; 489 } 490 491 static int jpeg_parse_sos(jpeg_decoder_t* dec, jpeg_scan_t* scan) { 492 uint16_t length = read_be16(dec->data + dec->pos); 493 dec->pos += 2; 494 (void)length; 495 496 scan->num_components = dec->data[dec->pos++]; 497 498 for (int i = 0; i < scan->num_components; i++) { 499 uint8_t comp_id = dec->data[dec->pos++]; 500 uint8_t table_ids = dec->data[dec->pos++]; 501 502 scan->component_ids[i] = comp_id; 503 504 /* Find matching component */ 505 for (int j = 0; j < dec->num_components; j++) { 506 if (dec->components[j].id == comp_id) { 507 dec->components[j].dc_table_id = (table_ids >> 4) & 0x0F; 508 dec->components[j].ac_table_id = table_ids & 0x0F; 509 break; 510 } 511 } 512 } 513 514 scan->ss = dec->data[dec->pos++]; /* Start of spectral selection */ 515 scan->se = dec->data[dec->pos++]; /* End of spectral selection */ 516 uint8_t approx = dec->data[dec->pos++]; 517 scan->ah = (approx >> 4) & 0x0F; /* Successive approximation high */ 518 scan->al = approx & 0x0F; /* Successive approximation low */ 519 520 return 0; 521 } 522 523 int jpeg_parse_markers(jpeg_decoder_t* dec, const uint8_t* data, uint32_t size) { 524 dec->data = data; 525 dec->data_size = size; 526 dec->pos = 0; 527 528 /* Check SOI marker */ 529 if (size < 2 || data[0] != 0xFF || data[1] != 0xD8) { 530 dec->error = 1; 531 snprintf(dec->error_msg, sizeof(dec->error_msg), "Invalid JPEG: missing SOI marker"); 532 return -1; 533 } 534 dec->pos = 2; 535 536 /* Parse markers */ 537 while (dec->pos < size - 1) { 538 /* Find marker */ 539 if (dec->data[dec->pos] != 0xFF) { 540 dec->pos++; 541 continue; 542 } 543 544 uint8_t marker = dec->data[dec->pos + 1]; 545 dec->pos += 2; 546 547 /* Skip padding bytes */ 548 if (marker == 0xFF || marker == 0x00) { 549 continue; 550 } 551 552 switch (marker) { 553 case 0xD8: /* SOI - already handled */ 554 break; 555 556 case 0xD9: /* EOI */ 557 return 0; 558 559 case 0xDB: /* DQT */ 560 if (jpeg_parse_dqt(dec) < 0) return -1; 561 break; 562 563 case 0xC4: /* DHT */ 564 if (jpeg_parse_dht(dec) < 0) return -1; 565 break; 566 567 case 0xC0: /* SOF0 - Baseline DCT */ 568 case 0xC1: /* SOF1 - Extended Sequential DCT */ 569 case 0xC2: /* SOF2 - Progressive DCT */ 570 if (jpeg_parse_sof(dec, marker) < 0) return -1; 571 break; 572 573 case 0xC3: /* SOF3 - Lossless */ 574 case 0xC5: /* SOF5 */ 575 case 0xC6: /* SOF6 */ 576 case 0xC7: /* SOF7 */ 577 case 0xC9: /* SOF9 */ 578 case 0xCA: /* SOF10 */ 579 case 0xCB: /* SOF11 */ 580 case 0xCD: /* SOF13 */ 581 case 0xCE: /* SOF14 */ 582 case 0xCF: /* SOF15 */ 583 if (marker == 0xCA) { 584 /* Progressive with arithmetic coding */ 585 if (jpeg_parse_sof(dec, marker) < 0) return -1; 586 } else { 587 dec->error = 1; 588 snprintf(dec->error_msg, sizeof(dec->error_msg), 589 "Unsupported JPEG type: SOF%d", marker - 0xC0); 590 return -1; 591 } 592 break; 593 594 case 0xDD: /* DRI */ 595 if (jpeg_parse_dri(dec) < 0) return -1; 596 break; 597 598 case 0xDA: /* SOS - Start of Scan */ 599 return 0; /* Stop parsing, scan data follows */ 600 601 case 0xE0: case 0xE1: case 0xE2: case 0xE3: 602 case 0xE4: case 0xE5: case 0xE6: case 0xE7: 603 case 0xE8: case 0xE9: case 0xEA: case 0xEB: 604 case 0xEC: case 0xED: case 0xEE: case 0xEF: 605 case 0xFE: /* APP and COM markers - skip */ 606 { 607 uint16_t length = read_be16(dec->data + dec->pos); 608 dec->pos += length; 609 break; 610 } 611 612 case 0xD0: case 0xD1: case 0xD2: case 0xD3: 613 case 0xD4: case 0xD5: case 0xD6: case 0xD7: 614 /* RST markers - no length */ 615 break; 616 617 default: 618 /* Unknown marker - try to skip */ 619 if (dec->pos + 2 <= size) { 620 uint16_t length = read_be16(dec->data + dec->pos); 621 dec->pos += length; 622 } 623 break; 624 } 625 } 626 627 return 0; 628 } 629 630 /* ============================================================ 631 * IDCT Implementation (AAN algorithm) 632 * ============================================================ */ 633 634 void jpeg_idct_block(int16_t* block, uint8_t* output, int stride) { 635 int tmp[64]; 636 int* tmpptr; 637 int16_t* blkptr; 638 int i; 639 640 /* Pass 1: Process columns */ 641 blkptr = block; 642 tmpptr = tmp; 643 for (i = 0; i < 8; i++) { 644 int s0 = blkptr[0*8]; 645 int s1 = blkptr[1*8]; 646 int s2 = blkptr[2*8]; 647 int s3 = blkptr[3*8]; 648 int s4 = blkptr[4*8]; 649 int s5 = blkptr[5*8]; 650 int s6 = blkptr[6*8]; 651 int s7 = blkptr[7*8]; 652 653 /* Check for all-zero AC coefficients */ 654 if ((s1 | s2 | s3 | s4 | s5 | s6 | s7) == 0) { 655 int dc = s0 << 2; 656 tmpptr[0*8] = dc; 657 tmpptr[1*8] = dc; 658 tmpptr[2*8] = dc; 659 tmpptr[3*8] = dc; 660 tmpptr[4*8] = dc; 661 tmpptr[5*8] = dc; 662 tmpptr[6*8] = dc; 663 tmpptr[7*8] = dc; 664 } else { 665 /* Even part */ 666 int t0 = s0 + s4; 667 int t1 = s0 - s4; 668 int t2 = (s2 * C6 - s6 * C2) >> 14; 669 int t3 = (s2 * C2 + s6 * C6) >> 14; 670 671 int e0 = t0 + t3; 672 int e1 = t1 + t2; 673 int e2 = t1 - t2; 674 int e3 = t0 - t3; 675 676 /* Odd part */ 677 int t4 = (s1 * C7 - s7 * C1) >> 14; 678 int t5 = (s5 * C3 - s3 * C5) >> 14; 679 int t6 = (s5 * C5 + s3 * C3) >> 14; 680 int t7 = (s1 * C1 + s7 * C7) >> 14; 681 682 int o0 = t4 + t5; 683 int o1 = t7 - t6; 684 int o2 = ((t4 - t5 + t7 + t6) * C4) >> 14; 685 int o3 = t7 + t6; 686 687 int o4 = o2 - o0; 688 int o5 = o1; 689 690 /* Final output */ 691 tmpptr[0*8] = (e0 + o3) << 2; 692 tmpptr[7*8] = (e0 - o3) << 2; 693 tmpptr[1*8] = (e1 + o4) << 2; 694 tmpptr[6*8] = (e1 - o4) << 2; 695 tmpptr[2*8] = (e2 + o5) << 2; 696 tmpptr[5*8] = (e2 - o5) << 2; 697 tmpptr[3*8] = (e3 + o0 - o2 + o1) << 2; 698 tmpptr[4*8] = (e3 - o0 + o2 - o1) << 2; 699 } 700 701 blkptr++; 702 tmpptr++; 703 } 704 705 /* Pass 2: Process rows */ 706 tmpptr = tmp; 707 for (i = 0; i < 8; i++) { 708 int s0 = tmpptr[0]; 709 int s1 = tmpptr[1]; 710 int s2 = tmpptr[2]; 711 int s3 = tmpptr[3]; 712 int s4 = tmpptr[4]; 713 int s5 = tmpptr[5]; 714 int s6 = tmpptr[6]; 715 int s7 = tmpptr[7]; 716 717 /* Even part */ 718 int t0 = s0 + s4; 719 int t1 = s0 - s4; 720 int t2 = (s2 * C6 - s6 * C2) >> 14; 721 int t3 = (s2 * C2 + s6 * C6) >> 14; 722 723 int e0 = t0 + t3; 724 int e1 = t1 + t2; 725 int e2 = t1 - t2; 726 int e3 = t0 - t3; 727 728 /* Odd part */ 729 int t4 = (s1 * C7 - s7 * C1) >> 14; 730 int t5 = (s5 * C3 - s3 * C5) >> 14; 731 int t6 = (s5 * C5 + s3 * C3) >> 14; 732 int t7 = (s1 * C1 + s7 * C7) >> 14; 733 734 int o0 = t4 + t5; 735 int o1 = t7 - t6; 736 int o2 = ((t4 - t5 + t7 + t6) * C4) >> 14; 737 int o3 = t7 + t6; 738 739 int o4 = o2 - o0; 740 int o5 = o1; 741 742 /* Final output with level shift (add 128) and clamp */ 743 output[0] = clip_uint8(((e0 + o3) >> 5) + 128); 744 output[7] = clip_uint8(((e0 - o3) >> 5) + 128); 745 output[1] = clip_uint8(((e1 + o4) >> 5) + 128); 746 output[6] = clip_uint8(((e1 - o4) >> 5) + 128); 747 output[2] = clip_uint8(((e2 + o5) >> 5) + 128); 748 output[5] = clip_uint8(((e2 - o5) >> 5) + 128); 749 output[3] = clip_uint8(((e3 + o0 - o2 + o1) >> 5) + 128); 750 output[4] = clip_uint8(((e3 - o0 + o2 - o1) >> 5) + 128); 751 752 tmpptr += 8; 753 output += stride; 754 } 755 } 756 757 /* ============================================================ 758 * Block Decoding 759 * ============================================================ */ 760 761 static int jpeg_decode_block(jpeg_decoder_t* dec, int16_t* block, 762 jpeg_component_t* comp, int is_dc_only) { 763 jpeg_huffman_table_t* dc_table = &dec->dc_tables[comp->dc_table_id]; 764 jpeg_huffman_table_t* ac_table = &dec->ac_tables[comp->ac_table_id]; 765 jpeg_quant_table_t* qt = &dec->quant_tables[comp->quant_table_id]; 766 767 memset(block, 0, 64 * sizeof(int16_t)); 768 769 /* Decode DC coefficient */ 770 int dc_size = jpeg_decode_huffman(dec, dc_table); 771 if (dc_size < 0) return -1; 772 773 int dc_diff = jpeg_receive_extend(dec, dc_size); 774 comp->dc_pred += dc_diff; 775 block[0] = comp->dc_pred * qt->table[0]; 776 777 if (is_dc_only) return 0; 778 779 /* Decode AC coefficients */ 780 int k = 1; 781 while (k < 64) { 782 int rs = jpeg_decode_huffman(dec, ac_table); 783 if (rs < 0) return -1; 784 785 int r = (rs >> 4) & 0x0F; /* Run length */ 786 int s = rs & 0x0F; /* Size */ 787 788 if (s == 0) { 789 if (r == 0) { 790 /* EOB - End of Block */ 791 break; 792 } else if (r == 15) { 793 /* ZRL - Zero Run Length (16 zeros) */ 794 k += 16; 795 } else { 796 break; 797 } 798 } else { 799 k += r; 800 if (k >= 64) break; 801 802 int ac = jpeg_receive_extend(dec, s); 803 int zz_index = jpeg_zigzag[k]; 804 block[zz_index] = ac * qt->table[zz_index]; 805 k++; 806 } 807 } 808 809 return 0; 810 } 811 812 /* ============================================================ 813 * Progressive JPEG Decoding 814 * ============================================================ */ 815 816 int jpeg_decode_dc_first(jpeg_decoder_t* dec, jpeg_component_t* comp, int16_t* block) { 817 jpeg_huffman_table_t* dc_table = &dec->dc_tables[comp->dc_table_id]; 818 819 int dc_size = jpeg_decode_huffman(dec, dc_table); 820 if (dc_size < 0) return -1; 821 822 int dc_diff = jpeg_receive_extend(dec, dc_size); 823 comp->dc_pred += dc_diff; 824 block[0] = comp->dc_pred; 825 826 return 0; 827 } 828 829 int jpeg_decode_dc_refine(jpeg_decoder_t* dec, int16_t* block, int al) { 830 int bit = jpeg_get_bits(dec, 1); 831 if (bit < 0) return -1; 832 833 block[0] |= (bit << al); 834 return 0; 835 } 836 837 int jpeg_decode_ac_first(jpeg_decoder_t* dec, jpeg_component_t* comp, 838 int16_t* block, int ss, int se) { 839 jpeg_huffman_table_t* ac_table = &dec->ac_tables[comp->ac_table_id]; 840 841 int k = ss; 842 while (k <= se) { 843 int rs = jpeg_decode_huffman(dec, ac_table); 844 if (rs < 0) return -1; 845 846 int r = (rs >> 4) & 0x0F; 847 int s = rs & 0x0F; 848 849 if (s == 0) { 850 if (r == 15) { 851 k += 16; 852 } else { 853 /* EOB run */ 854 break; 855 } 856 } else { 857 k += r; 858 if (k > se) break; 859 860 int ac = jpeg_receive_extend(dec, s); 861 block[jpeg_zigzag[k]] = ac; 862 k++; 863 } 864 } 865 866 return 0; 867 } 868 869 int jpeg_decode_ac_refine(jpeg_decoder_t* dec, int16_t* block, 870 int ss, int se, int al) { 871 int k = ss; 872 873 while (k <= se) { 874 int bit = jpeg_get_bits(dec, 1); 875 if (bit < 0) return -1; 876 877 int zz = jpeg_zigzag[k]; 878 if (block[zz] != 0) { 879 block[zz] |= (bit << al); 880 } 881 k++; 882 } 883 884 return 0; 885 } 886 887 /* ============================================================ 888 * YCbCr to RGB Conversion 889 * ============================================================ */ 890 891 void jpeg_ycbcr_to_rgb(jpeg_decoder_t* dec, image_t* img) { 892 if (dec->num_components == 1) { 893 /* Grayscale */ 894 jpeg_component_t* y_comp = &dec->components[0]; 895 896 for (uint32_t py = 0; py < img->height; py++) { 897 for (uint32_t px = 0; px < img->width; px++) { 898 int y_x = px * y_comp->h_samp / dec->max_h_samp; 899 int y_y = py * y_comp->v_samp / dec->max_v_samp; 900 901 uint8_t y = y_comp->data[y_y * y_comp->stride + y_x]; 902 903 image_set_pixel(img, px, py, y, y, y, 255); 904 } 905 } 906 } else if (dec->num_components >= 3) { 907 /* YCbCr to RGB */ 908 jpeg_component_t* y_comp = &dec->components[0]; 909 jpeg_component_t* cb_comp = &dec->components[1]; 910 jpeg_component_t* cr_comp = &dec->components[2]; 911 912 for (uint32_t py = 0; py < img->height; py++) { 913 for (uint32_t px = 0; px < img->width; px++) { 914 /* Calculate sample positions for each component */ 915 int y_x = px * y_comp->h_samp / dec->max_h_samp; 916 int y_y = py * y_comp->v_samp / dec->max_v_samp; 917 918 int cb_x = px * cb_comp->h_samp / dec->max_h_samp; 919 int cb_y = py * cb_comp->v_samp / dec->max_v_samp; 920 921 int cr_x = px * cr_comp->h_samp / dec->max_h_samp; 922 int cr_y = py * cr_comp->v_samp / dec->max_v_samp; 923 924 /* Clamp to valid range */ 925 if (y_x >= y_comp->width) y_x = y_comp->width - 1; 926 if (y_y >= y_comp->height) y_y = y_comp->height - 1; 927 if (cb_x >= cb_comp->width) cb_x = cb_comp->width - 1; 928 if (cb_y >= cb_comp->height) cb_y = cb_comp->height - 1; 929 if (cr_x >= cr_comp->width) cr_x = cr_comp->width - 1; 930 if (cr_y >= cr_comp->height) cr_y = cr_comp->height - 1; 931 932 int y = y_comp->data[y_y * y_comp->stride + y_x]; 933 int cb = cb_comp->data[cb_y * cb_comp->stride + cb_x] - 128; 934 int cr = cr_comp->data[cr_y * cr_comp->stride + cr_x] - 128; 935 936 /* YCbCr to RGB conversion (ITU-R BT.601) */ 937 /* R = Y + 1.402 * Cr */ 938 /* G = Y - 0.344136 * Cb - 0.714136 * Cr */ 939 /* B = Y + 1.772 * Cb */ 940 941 /* Using fixed-point arithmetic (scaled by 2^16) */ 942 int r = y + ((91881 * cr) >> 16); 943 int g = y - ((22554 * cb + 46802 * cr) >> 16); 944 int b = y + ((116130 * cb) >> 16); 945 946 image_set_pixel(img, px, py, 947 clip_uint8(r), 948 clip_uint8(g), 949 clip_uint8(b), 255); 950 } 951 } 952 } 953 } 954 955 /* ============================================================ 956 * Scan Decoding 957 * ============================================================ */ 958 959 int jpeg_decode_scan(jpeg_decoder_t* dec) { 960 jpeg_scan_t scan; 961 962 if (jpeg_parse_sos(dec, &scan) < 0) { 963 return -1; 964 } 965 966 /* Initialize bit reader */ 967 dec->bits_buffer = 0; 968 dec->bits_count = 0; 969 970 /* Reset DC predictors */ 971 for (int i = 0; i < dec->num_components; i++) { 972 dec->components[i].dc_pred = 0; 973 } 974 975 int mcu_count = 0; 976 int restart_count = dec->restart_interval; 977 978 /* Non-interleaved scan (single component) */ 979 if (scan.num_components == 1) { 980 /* Find component */ 981 jpeg_component_t* comp = NULL; 982 int comp_idx = 0; 983 for (int i = 0; i < dec->num_components; i++) { 984 if (dec->components[i].id == scan.component_ids[0]) { 985 comp = &dec->components[i]; 986 comp_idx = i; 987 break; 988 } 989 } 990 991 if (!comp) { 992 dec->error = 1; 993 snprintf(dec->error_msg, sizeof(dec->error_msg), "Component not found"); 994 return -1; 995 } 996 997 int blocks_x = comp->width / 8; 998 int blocks_y = comp->height / 8; 999 1000 for (int by = 0; by < blocks_y; by++) { 1001 for (int bx = 0; bx < blocks_x; bx++) { 1002 int16_t block[64]; 1003 1004 if (dec->is_progressive) { 1005 int block_idx = 0; 1006 for (int c = 0; c < comp_idx; c++) { 1007 block_idx += (dec->components[c].width / 8) * 1008 (dec->components[c].height / 8); 1009 } 1010 block_idx += by * blocks_x + bx; 1011 int16_t* coef_block = dec->coef_blocks[block_idx]; 1012 1013 if (scan.ss == 0 && scan.se == 0) { 1014 /* DC scan */ 1015 if (scan.ah == 0) { 1016 if (jpeg_decode_dc_first(dec, comp, coef_block) < 0) return -1; 1017 coef_block[0] <<= scan.al; 1018 } else { 1019 if (jpeg_decode_dc_refine(dec, coef_block, scan.al) < 0) return -1; 1020 } 1021 } else { 1022 /* AC scan */ 1023 if (scan.ah == 0) { 1024 if (jpeg_decode_ac_first(dec, comp, coef_block, scan.ss, scan.se) < 0) return -1; 1025 /* Shift all AC coefficients */ 1026 for (int k = scan.ss; k <= scan.se; k++) { 1027 coef_block[jpeg_zigzag[k]] <<= scan.al; 1028 } 1029 } else { 1030 if (jpeg_decode_ac_refine(dec, coef_block, scan.ss, scan.se, scan.al) < 0) return -1; 1031 } 1032 } 1033 } else { 1034 /* Baseline/Sequential */ 1035 if (jpeg_decode_block(dec, block, comp, 0) < 0) return -1; 1036 1037 /* IDCT and store */ 1038 uint8_t* out = comp->data + by * 8 * comp->stride + bx * 8; 1039 jpeg_idct_block(block, out, comp->stride); 1040 } 1041 1042 /* Handle restart */ 1043 if (dec->restart_interval > 0) { 1044 mcu_count++; 1045 if (mcu_count >= restart_count && (by < blocks_y - 1 || bx < blocks_x - 1)) { 1046 jpeg_align_bits(dec); 1047 jpeg_next_marker(dec); 1048 dec->bits_buffer = 0; 1049 dec->bits_count = 0; 1050 comp->dc_pred = 0; 1051 mcu_count = 0; 1052 } 1053 } 1054 } 1055 } 1056 } else { 1057 /* Interleaved scan */ 1058 for (int mcu_y = 0; mcu_y < dec->mcu_rows; mcu_y++) { 1059 for (int mcu_x = 0; mcu_x < dec->mcus_per_row; mcu_x++) { 1060 /* Process each component in the MCU */ 1061 for (int c = 0; c < scan.num_components; c++) { 1062 /* Find component */ 1063 jpeg_component_t* comp = NULL; 1064 for (int i = 0; i < dec->num_components; i++) { 1065 if (dec->components[i].id == scan.component_ids[c]) { 1066 comp = &dec->components[i]; 1067 break; 1068 } 1069 } 1070 1071 if (!comp) continue; 1072 1073 /* Process blocks for this component in the MCU */ 1074 for (int v = 0; v < comp->v_samp; v++) { 1075 for (int h = 0; h < comp->h_samp; h++) { 1076 int16_t block[64]; 1077 1078 int bx = mcu_x * comp->h_samp + h; 1079 int by = mcu_y * comp->v_samp + v; 1080 1081 if (bx * 8 >= comp->width || by * 8 >= comp->height) { 1082 /* Skip blocks outside component dimensions */ 1083 if (jpeg_decode_block(dec, block, comp, 0) < 0) return -1; 1084 continue; 1085 } 1086 1087 if (jpeg_decode_block(dec, block, comp, 0) < 0) return -1; 1088 1089 /* IDCT and store */ 1090 uint8_t* out = comp->data + by * 8 * comp->stride + bx * 8; 1091 jpeg_idct_block(block, out, comp->stride); 1092 } 1093 } 1094 } 1095 1096 /* Handle restart */ 1097 if (dec->restart_interval > 0) { 1098 mcu_count++; 1099 if (mcu_count >= (int)dec->restart_interval && 1100 (mcu_y < dec->mcu_rows - 1 || mcu_x < dec->mcus_per_row - 1)) { 1101 jpeg_align_bits(dec); 1102 jpeg_next_marker(dec); 1103 dec->bits_buffer = 0; 1104 dec->bits_count = 0; 1105 1106 /* Reset DC predictors */ 1107 for (int i = 0; i < dec->num_components; i++) { 1108 dec->components[i].dc_pred = 0; 1109 } 1110 mcu_count = 0; 1111 } 1112 } 1113 } 1114 } 1115 } 1116 1117 return 0; 1118 } 1119 1120 /* ============================================================ 1121 * Progressive Final Processing 1122 * ============================================================ */ 1123 1124 static void jpeg_progressive_finish(jpeg_decoder_t* dec) { 1125 int block_idx = 0; 1126 1127 for (int c = 0; c < dec->num_components; c++) { 1128 jpeg_component_t* comp = &dec->components[c]; 1129 jpeg_quant_table_t* qt = &dec->quant_tables[comp->quant_table_id]; 1130 1131 int blocks_x = comp->width / 8; 1132 int blocks_y = comp->height / 8; 1133 1134 for (int by = 0; by < blocks_y; by++) { 1135 for (int bx = 0; bx < blocks_x; bx++) { 1136 int16_t* coef_block = dec->coef_blocks[block_idx++]; 1137 int16_t block[64]; 1138 1139 /* Dequantize */ 1140 for (int k = 0; k < 64; k++) { 1141 block[k] = coef_block[k] * qt->table[k]; 1142 } 1143 1144 /* IDCT and store */ 1145 uint8_t* out = comp->data + by * 8 * comp->stride + bx * 8; 1146 jpeg_idct_block(block, out, comp->stride); 1147 } 1148 } 1149 } 1150 } 1151 1152 /* ============================================================ 1153 * Main Decode Function 1154 * ============================================================ */ 1155 1156 image_t* jpeg_decode(const uint8_t* data, uint32_t data_size) { 1157 if (!data || data_size < 2) return NULL; 1158 1159 jpeg_decoder_t* dec = jpeg_decoder_create(); 1160 if (!dec) return NULL; 1161 1162 /* Parse markers up to first SOS */ 1163 if (jpeg_parse_markers(dec, data, data_size) < 0) { 1164 jpeg_decoder_destroy(dec); 1165 return NULL; 1166 } 1167 1168 /* Validate we have what we need */ 1169 if (dec->width == 0 || dec->height == 0) { 1170 jpeg_decoder_destroy(dec); 1171 return NULL; 1172 } 1173 1174 /* Decode scans */ 1175 if (dec->is_progressive) { 1176 /* Progressive: may have multiple scans */ 1177 while (dec->pos < data_size - 1) { 1178 /* Find SOS marker */ 1179 if (dec->data[dec->pos - 2] == 0xFF && dec->data[dec->pos - 1] == 0xDA) { 1180 if (jpeg_decode_scan(dec) < 0) { 1181 jpeg_decoder_destroy(dec); 1182 return NULL; 1183 } 1184 } 1185 1186 /* Find next marker */ 1187 int marker = jpeg_next_marker(dec); 1188 if (marker < 0 || marker == JPEG_EOI) break; 1189 1190 if (marker == JPEG_DHT) { 1191 dec->pos -= 2; 1192 jpeg_parse_dht(dec); 1193 } else if (marker == JPEG_SOS) { 1194 dec->pos -= 2; 1195 } 1196 } 1197 1198 /* Final IDCT pass */ 1199 jpeg_progressive_finish(dec); 1200 } else { 1201 /* Baseline/Sequential: single scan */ 1202 if (jpeg_decode_scan(dec) < 0) { 1203 jpeg_decoder_destroy(dec); 1204 return NULL; 1205 } 1206 } 1207 1208 /* Create output image */ 1209 image_t* img = image_create(dec->width, dec->height, 24); 1210 if (!img) { 1211 jpeg_decoder_destroy(dec); 1212 return NULL; 1213 } 1214 1215 /* Convert to RGB */ 1216 jpeg_ycbcr_to_rgb(dec, img); 1217 1218 jpeg_decoder_destroy(dec); 1219 return img; 1220 } 1221 1222 image_t* jpeg_load_file(const char* filename) { 1223 (void)filename; 1224 /* TODO: Implement file loading once filesystem is available */ 1225 return NULL; 1226 } 1227 1228 /* ============================================================ 1229 * Lua Bindings 1230 * ============================================================ */ 1231 1232 int lua_jpeg_load(lua_State* L) { 1233 size_t data_size; 1234 const uint8_t* data = (const uint8_t*)luaL_checklstring(L, 1, &data_size); 1235 1236 image_t* img = jpeg_decode(data, (uint32_t)data_size); 1237 1238 if (img) { 1239 lua_pushlightuserdata(L, img); 1240 return 1; 1241 } else { 1242 lua_pushnil(L); 1243 lua_pushstring(L, "Failed to decode JPEG"); 1244 return 2; 1245 } 1246 }