luajitos

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

deflate_impl.c (20603B)


      1 #include "deflate_impl.h"
      2 #include <stdlib.h>
      3 #include <string.h>
      4 
      5 /* Fixed Huffman codes from RFC 1951 */
      6 static const uint8_t fixed_literal_lengths[288] = {
      7     8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
      8     8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
      9     8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
     10     8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
     11     8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
     12     9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
     13     9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
     14     9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
     15     7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
     16 };
     17 
     18 /* Code length alphabet order for dynamic blocks */
     19 static const uint8_t code_length_order[19] = {
     20     16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
     21 };
     22 
     23 /* Length extra bits and base values */
     24 static const uint8_t length_extra_bits[29] = {
     25     0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0
     26 };
     27 static const uint16_t length_base[29] = {
     28     3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258
     29 };
     30 
     31 /* Distance extra bits and base values */
     32 static const uint8_t distance_extra_bits[30] = {
     33     0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13
     34 };
     35 static const uint16_t distance_base[30] = {
     36     1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577
     37 };
     38 
     39 /* Bit buffer functions for compression */
     40 static void init_bit_buffer(bit_buffer_t* bb, uint32_t max_size) {
     41     bb->buffer = (uint8_t*)malloc(max_size);
     42     if (bb->buffer) {
     43         memset(bb->buffer, 0, max_size);
     44     }
     45     bb->buffer_size = 0;
     46     bb->byte_pos = 0;
     47     bb->bit_pos = 0;
     48     bb->max_size = max_size;
     49 }
     50 
     51 static void write_bits(bit_buffer_t* bb, uint32_t bits, uint32_t num_bits) {
     52     while (num_bits > 0) {
     53         if (bb->byte_pos >= bb->max_size) return;
     54 
     55         uint32_t bits_available = 8 - bb->bit_pos;
     56         uint32_t bits_to_write = (num_bits < bits_available) ? num_bits : bits_available;
     57 
     58         bb->buffer[bb->byte_pos] |= (bits & ((1 << bits_to_write) - 1)) << bb->bit_pos;
     59 
     60         bits >>= bits_to_write;
     61         num_bits -= bits_to_write;
     62         bb->bit_pos += bits_to_write;
     63 
     64         if (bb->bit_pos >= 8) {
     65             bb->bit_pos = 0;
     66             bb->byte_pos++;
     67             if (bb->byte_pos < bb->max_size) {
     68                 bb->buffer[bb->byte_pos] = 0;
     69             }
     70         }
     71     }
     72 
     73     if (bb->byte_pos + (bb->bit_pos > 0 ? 1 : 0) > bb->buffer_size) {
     74         bb->buffer_size = bb->byte_pos + (bb->bit_pos > 0 ? 1 : 0);
     75     }
     76 }
     77 
     78 static void align_bit_buffer(bit_buffer_t* bb) {
     79     if (bb->bit_pos > 0) {
     80         bb->byte_pos++;
     81         bb->bit_pos = 0;
     82         if (bb->byte_pos < bb->max_size) {
     83             bb->buffer[bb->byte_pos] = 0;
     84         }
     85         bb->buffer_size = bb->byte_pos;
     86     }
     87 }
     88 
     89 /* Build fixed Huffman codes for compression */
     90 static void build_fixed_huffman_codes(huffman_code_t* literal_codes, huffman_code_t* distance_codes) {
     91     int code = 0;
     92 
     93     /* 0-143: 8 bits, codes 00110000-10111111 */
     94     for (int i = 0; i <= 143; i++) {
     95         literal_codes[i].length = 8;
     96         literal_codes[i].code = code++;
     97     }
     98 
     99     /* 144-255: 9 bits, codes 110010000-111111111 */
    100     code = 0b110010000;
    101     for (int i = 144; i <= 255; i++) {
    102         literal_codes[i].length = 9;
    103         literal_codes[i].code = code++;
    104     }
    105 
    106     /* 256-279: 7 bits, codes 0000000-0010111 */
    107     code = 0;
    108     for (int i = 256; i <= 279; i++) {
    109         literal_codes[i].length = 7;
    110         literal_codes[i].code = code++;
    111     }
    112 
    113     /* 280-287: 8 bits, codes 11000000-11000111 */
    114     code = 0b11000000;
    115     for (int i = 280; i <= 287; i++) {
    116         literal_codes[i].length = 8;
    117         literal_codes[i].code = code++;
    118     }
    119 
    120     /* Fixed distance codes: all 5 bits */
    121     for (int i = 0; i < 30; i++) {
    122         distance_codes[i].length = 5;
    123         distance_codes[i].code = i;
    124     }
    125 }
    126 
    127 /* Reverse bits for Huffman codes */
    128 static uint16_t reverse_bits(uint16_t code, uint8_t length) {
    129     uint16_t result = 0;
    130     for (int i = 0; i < length; i++) {
    131         result = (result << 1) | (code & 1);
    132         code >>= 1;
    133     }
    134     return result;
    135 }
    136 
    137 /* Write Huffman coded literal/length */
    138 static void write_huffman_code(bit_buffer_t* bb, huffman_code_t* codes, uint16_t symbol) {
    139     huffman_code_t code = codes[symbol];
    140     uint16_t reversed = reverse_bits(code.code, code.length);
    141     write_bits(bb, reversed, code.length);
    142 }
    143 
    144 /* Find LZ77 match */
    145 static void find_best_match(const uint8_t* data, uint32_t pos, uint32_t size,
    146                            uint32_t* match_len, uint32_t* match_dist) {
    147     *match_len = 0;
    148     *match_dist = 0;
    149 
    150     if (pos < 3 || size - pos < 3) return;
    151 
    152     uint32_t window_start = (pos > 32768) ? (pos - 32768) : 0;
    153     uint32_t max_len = (size - pos < 258) ? (size - pos) : 258;
    154 
    155     for (uint32_t i = window_start; i < pos; i++) {
    156         uint32_t len = 0;
    157         while (len < max_len && data[i + len] == data[pos + len]) {
    158             len++;
    159         }
    160 
    161         if (len >= 3 && len > *match_len) {
    162             *match_len = len;
    163             *match_dist = pos - i;
    164         }
    165     }
    166 }
    167 
    168 /* Get length code */
    169 static uint16_t get_length_code(uint32_t length) {
    170     if (length < 3) return 0;
    171     if (length > 258) length = 258;
    172 
    173     for (int i = 0; i < 29; i++) {
    174         if (i == 28 || length < length_base[i + 1]) {
    175             return 257 + i;
    176         }
    177     }
    178     return 285;
    179 }
    180 
    181 /* Get distance code */
    182 static uint16_t get_distance_code(uint32_t distance) {
    183     if (distance < 1) return 0;
    184 
    185     for (int i = 0; i < 30; i++) {
    186         if (i == 29 || distance < distance_base[i + 1]) {
    187             return i;
    188         }
    189     }
    190     return 29;
    191 }
    192 
    193 /* Compress using fixed Huffman codes */
    194 int deflate_compress_full(const uint8_t* input, uint32_t input_size,
    195                           uint8_t** output, uint32_t* output_size, int level) {
    196     uint32_t max_output = input_size + (input_size / 1000) + 12 + 5;
    197     bit_buffer_t bb;
    198     init_bit_buffer(&bb, max_output);
    199 
    200     if (!bb.buffer) return -1;
    201 
    202     huffman_code_t literal_codes[288];
    203     huffman_code_t distance_codes[30];
    204     build_fixed_huffman_codes(literal_codes, distance_codes);
    205 
    206     /* Write block header: BFINAL=1, BTYPE=01 (fixed Huffman) */
    207     write_bits(&bb, 0b101, 3);
    208 
    209     uint32_t pos = 0;
    210     while (pos < input_size) {
    211         uint32_t match_len, match_dist;
    212 
    213         if (level > 1) {
    214             find_best_match(input, pos, input_size, &match_len, &match_dist);
    215         } else {
    216             match_len = 0;
    217         }
    218 
    219         if (match_len >= 3) {
    220             uint16_t length_code = get_length_code(match_len);
    221             write_huffman_code(&bb, literal_codes, length_code);
    222 
    223             uint8_t extra_len_bits = length_extra_bits[length_code - 257];
    224             if (extra_len_bits > 0) {
    225                 uint32_t extra_len = match_len - length_base[length_code - 257];
    226                 write_bits(&bb, extra_len, extra_len_bits);
    227             }
    228 
    229             uint16_t dist_code = get_distance_code(match_dist);
    230             write_huffman_code(&bb, distance_codes, dist_code);
    231 
    232             uint8_t extra_dist_bits = distance_extra_bits[dist_code];
    233             if (extra_dist_bits > 0) {
    234                 uint32_t extra_dist = match_dist - distance_base[dist_code];
    235                 write_bits(&bb, extra_dist, extra_dist_bits);
    236             }
    237 
    238             pos += match_len;
    239         } else {
    240             write_huffman_code(&bb, literal_codes, input[pos]);
    241             pos++;
    242         }
    243     }
    244 
    245     write_huffman_code(&bb, literal_codes, 256);
    246     align_bit_buffer(&bb);
    247 
    248     *output = bb.buffer;
    249     *output_size = bb.buffer_size;
    250 
    251     return 0;
    252 }
    253 
    254 /* ======== DECOMPRESSION ======== */
    255 
    256 /* Read bits from input */
    257 static uint32_t read_bits(inflate_state_t* state, uint32_t num_bits) {
    258     uint32_t result = 0;
    259     uint32_t bits_read = 0;
    260 
    261     while (bits_read < num_bits && state->byte_pos < state->input_size) {
    262         uint32_t bits_available = 8 - state->bit_pos;
    263         uint32_t bits_to_read = (num_bits - bits_read < bits_available) ?
    264                                 (num_bits - bits_read) : bits_available;
    265 
    266         uint32_t bits = (state->input[state->byte_pos] >> state->bit_pos) &
    267                        ((1 << bits_to_read) - 1);
    268         result |= bits << bits_read;
    269 
    270         bits_read += bits_to_read;
    271         state->bit_pos += bits_to_read;
    272 
    273         if (state->bit_pos >= 8) {
    274             state->bit_pos = 0;
    275             state->byte_pos++;
    276         }
    277     }
    278 
    279     return result;
    280 }
    281 
    282 /* Peek bits without advancing */
    283 static uint32_t peek_bits(inflate_state_t* state, uint32_t num_bits) {
    284     uint32_t save_byte = state->byte_pos;
    285     uint32_t save_bit = state->bit_pos;
    286     uint32_t result = read_bits(state, num_bits);
    287     state->byte_pos = save_byte;
    288     state->bit_pos = save_bit;
    289     return result;
    290 }
    291 
    292 /* Build Huffman decode table using canonical codes */
    293 static int build_huffman_table(uint16_t* table, const uint8_t* lengths, uint32_t num_codes, uint32_t max_bits) {
    294     /* Count codes of each length */
    295     uint32_t bl_count[16] = {0};
    296     for (uint32_t i = 0; i < num_codes; i++) {
    297         if (lengths[i] < 16) {
    298             bl_count[lengths[i]]++;
    299         }
    300     }
    301     bl_count[0] = 0;
    302 
    303     /* Find first code for each length */
    304     uint32_t next_code[16];
    305     uint32_t code = 0;
    306     for (int bits = 1; bits <= 15; bits++) {
    307         code = (code + bl_count[bits - 1]) << 1;
    308         next_code[bits] = code;
    309     }
    310 
    311     /* Build lookup table */
    312     memset(table, 0xFF, (1 << max_bits) * sizeof(uint16_t) * 2);
    313 
    314     for (uint32_t i = 0; i < num_codes; i++) {
    315         uint8_t len = lengths[i];
    316         if (len > 0) {
    317             uint32_t code = next_code[len]++;
    318             /* Reverse the code bits */
    319             uint32_t rev_code = 0;
    320             for (int b = 0; b < len; b++) {
    321                 rev_code = (rev_code << 1) | (code & 1);
    322                 code >>= 1;
    323             }
    324             /* Store in table: [reversed_code] = (length << 16) | symbol */
    325             if (rev_code < (1U << max_bits)) {
    326                 table[rev_code * 2] = len;
    327                 table[rev_code * 2 + 1] = i;
    328             }
    329         }
    330     }
    331 
    332     return 0;
    333 }
    334 
    335 /* Decode Huffman symbol using lookup table */
    336 static int decode_huffman_symbol(inflate_state_t* state, uint16_t* table, uint32_t max_bits) {
    337     /* Try to decode using table lookup */
    338     uint32_t code = 0;
    339 
    340     for (uint32_t bits = 1; bits <= max_bits; bits++) {
    341         code = (code << 1) | read_bits(state, 1);
    342 
    343         /* Check all entries for this bit length */
    344         for (uint32_t i = 0; i < (1U << max_bits); i++) {
    345             if (table[i * 2] == bits) {
    346                 /* Check if codes match (compare only 'bits' bits) */
    347                 uint32_t table_code = i;
    348                 if ((code & ((1 << bits) - 1)) == (table_code & ((1 << bits) - 1))) {
    349                     return table[i * 2 + 1];
    350                 }
    351             }
    352         }
    353     }
    354 
    355     return -1;  /* Decoding error */
    356 }
    357 
    358 /* Ensure output buffer has enough space */
    359 static int ensure_output_space(inflate_state_t* state, uint32_t needed) {
    360     while (state->output_pos + needed > state->output_capacity) {
    361         state->output_capacity *= 2;
    362         uint8_t* new_output = (uint8_t*)realloc(state->output, state->output_capacity);
    363         if (!new_output) {
    364             free(state->output);
    365             state->output = NULL;
    366             return -1;
    367         }
    368         state->output = new_output;
    369     }
    370     return 0;
    371 }
    372 
    373 /* Decompress deflate data */
    374 int deflate_decompress_full(const uint8_t* input, uint32_t input_size,
    375                             uint8_t** output, uint32_t* output_size) {
    376     inflate_state_t state;
    377     memset(&state, 0, sizeof(state));
    378 
    379     state.input = input;
    380     state.input_size = input_size;
    381     state.output_capacity = input_size * 3;  /* Initial guess */
    382     state.output = (uint8_t*)malloc(state.output_capacity);
    383 
    384     if (!state.output) return -1;
    385 
    386     /* Process blocks */
    387     int is_final = 0;
    388     while (!is_final && state.byte_pos < input_size) {
    389         is_final = read_bits(&state, 1);
    390         uint32_t btype = read_bits(&state, 2);
    391 
    392         if (btype == 0) {
    393             /* Uncompressed block */
    394             /* Align to byte boundary */
    395             state.bit_pos = 0;
    396             if (state.byte_pos < input_size) {
    397                 state.byte_pos++;
    398             }
    399 
    400             if (state.byte_pos + 4 > input_size) {
    401                 free(state.output);
    402                 return -1;
    403             }
    404 
    405             uint16_t len = input[state.byte_pos] | (input[state.byte_pos + 1] << 8);
    406             state.byte_pos += 4;  /* Skip LEN and NLEN */
    407 
    408             if (ensure_output_space(&state, len) < 0) return -1;
    409 
    410             if (state.byte_pos + len > input_size) {
    411                 free(state.output);
    412                 return -1;
    413             }
    414 
    415             memcpy(state.output + state.output_pos, input + state.byte_pos, len);
    416             state.output_pos += len;
    417             state.byte_pos += len;
    418 
    419         } else if (btype == 1) {
    420             /* Fixed Huffman codes */
    421             uint8_t lit_lengths[288];
    422             uint8_t dist_lengths[32];
    423 
    424             for (int i = 0; i < 288; i++) {
    425                 lit_lengths[i] = fixed_literal_lengths[i];
    426             }
    427             for (int i = 0; i < 32; i++) {
    428                 dist_lengths[i] = 5;
    429             }
    430 
    431             build_huffman_table(state.literal_tree, lit_lengths, 288, 9);
    432             build_huffman_table(state.distance_tree, dist_lengths, 32, 5);
    433 
    434             /* Decode literals and length/distance pairs */
    435             while (1) {
    436                 int symbol = decode_huffman_symbol(&state, state.literal_tree, 9);
    437 
    438                 if (symbol < 0) {
    439                     free(state.output);
    440                     return -1;
    441                 }
    442 
    443                 if (symbol < 256) {
    444                     /* Literal byte */
    445                     if (ensure_output_space(&state, 1) < 0) return -1;
    446                     state.output[state.output_pos++] = symbol;
    447 
    448                 } else if (symbol == 256) {
    449                     /* End of block */
    450                     break;
    451 
    452                 } else if (symbol <= 285) {
    453                     /* Length/distance pair */
    454                     uint32_t length = length_base[symbol - 257];
    455                     uint8_t extra_bits = length_extra_bits[symbol - 257];
    456                     if (extra_bits > 0) {
    457                         length += read_bits(&state, extra_bits);
    458                     }
    459 
    460                     int dist_symbol = decode_huffman_symbol(&state, state.distance_tree, 5);
    461                     if (dist_symbol < 0 || dist_symbol >= 30) {
    462                         free(state.output);
    463                         return -1;
    464                     }
    465 
    466                     uint32_t distance = distance_base[dist_symbol];
    467                     extra_bits = distance_extra_bits[dist_symbol];
    468                     if (extra_bits > 0) {
    469                         distance += read_bits(&state, extra_bits);
    470                     }
    471 
    472                     if (distance > state.output_pos) {
    473                         free(state.output);
    474                         return -1;
    475                     }
    476 
    477                     if (ensure_output_space(&state, length) < 0) return -1;
    478 
    479                     /* Copy from history */
    480                     for (uint32_t i = 0; i < length; i++) {
    481                         state.output[state.output_pos] = state.output[state.output_pos - distance];
    482                         state.output_pos++;
    483                     }
    484                 } else {
    485                     /* Invalid symbol */
    486                     free(state.output);
    487                     return -1;
    488                 }
    489             }
    490 
    491         } else if (btype == 2) {
    492             /* Dynamic Huffman codes */
    493             uint32_t hlit = read_bits(&state, 5) + 257;
    494             uint32_t hdist = read_bits(&state, 5) + 1;
    495             uint32_t hclen = read_bits(&state, 4) + 4;
    496 
    497             /* Read code length code lengths */
    498             uint8_t code_length_lengths[19] = {0};
    499             for (uint32_t i = 0; i < hclen; i++) {
    500                 code_length_lengths[code_length_order[i]] = read_bits(&state, 3);
    501             }
    502 
    503             /* Build code length Huffman table */
    504             uint16_t code_length_table[512];
    505             build_huffman_table(code_length_table, code_length_lengths, 19, 7);
    506 
    507             /* Decode literal/length and distance code lengths */
    508             uint8_t lengths[288 + 32];
    509             uint32_t length_idx = 0;
    510             uint32_t total_lengths = hlit + hdist;
    511 
    512             while (length_idx < total_lengths) {
    513                 int symbol = decode_huffman_symbol(&state, code_length_table, 7);
    514                 if (symbol < 0) {
    515                     free(state.output);
    516                     return -1;
    517                 }
    518 
    519                 if (symbol < 16) {
    520                     lengths[length_idx++] = symbol;
    521                 } else if (symbol == 16) {
    522                     /* Repeat previous length 3-6 times */
    523                     uint32_t repeat = 3 + read_bits(&state, 2);
    524                     if (length_idx == 0) {
    525                         free(state.output);
    526                         return -1;
    527                     }
    528                     uint8_t prev = lengths[length_idx - 1];
    529                     for (uint32_t i = 0; i < repeat && length_idx < total_lengths; i++) {
    530                         lengths[length_idx++] = prev;
    531                     }
    532                 } else if (symbol == 17) {
    533                     /* Repeat 0 for 3-10 times */
    534                     uint32_t repeat = 3 + read_bits(&state, 3);
    535                     for (uint32_t i = 0; i < repeat && length_idx < total_lengths; i++) {
    536                         lengths[length_idx++] = 0;
    537                     }
    538                 } else if (symbol == 18) {
    539                     /* Repeat 0 for 11-138 times */
    540                     uint32_t repeat = 11 + read_bits(&state, 7);
    541                     for (uint32_t i = 0; i < repeat && length_idx < total_lengths; i++) {
    542                         lengths[length_idx++] = 0;
    543                     }
    544                 }
    545             }
    546 
    547             /* Build literal/length and distance tables */
    548             build_huffman_table(state.literal_tree, lengths, hlit, 15);
    549             build_huffman_table(state.distance_tree, lengths + hlit, hdist, 15);
    550 
    551             /* Decode data (same as fixed Huffman) */
    552             while (1) {
    553                 int symbol = decode_huffman_symbol(&state, state.literal_tree, 15);
    554 
    555                 if (symbol < 0) {
    556                     free(state.output);
    557                     return -1;
    558                 }
    559 
    560                 if (symbol < 256) {
    561                     if (ensure_output_space(&state, 1) < 0) return -1;
    562                     state.output[state.output_pos++] = symbol;
    563 
    564                 } else if (symbol == 256) {
    565                     break;
    566 
    567                 } else if (symbol <= 285) {
    568                     uint32_t length = length_base[symbol - 257];
    569                     uint8_t extra_bits = length_extra_bits[symbol - 257];
    570                     if (extra_bits > 0) {
    571                         length += read_bits(&state, extra_bits);
    572                     }
    573 
    574                     int dist_symbol = decode_huffman_symbol(&state, state.distance_tree, 15);
    575                     if (dist_symbol < 0 || dist_symbol >= 30) {
    576                         free(state.output);
    577                         return -1;
    578                     }
    579 
    580                     uint32_t distance = distance_base[dist_symbol];
    581                     extra_bits = distance_extra_bits[dist_symbol];
    582                     if (extra_bits > 0) {
    583                         distance += read_bits(&state, extra_bits);
    584                     }
    585 
    586                     if (distance > state.output_pos) {
    587                         free(state.output);
    588                         return -1;
    589                     }
    590 
    591                     if (ensure_output_space(&state, length) < 0) return -1;
    592 
    593                     for (uint32_t i = 0; i < length; i++) {
    594                         state.output[state.output_pos] = state.output[state.output_pos - distance];
    595                         state.output_pos++;
    596                     }
    597                 } else {
    598                     free(state.output);
    599                     return -1;
    600                 }
    601             }
    602 
    603         } else {
    604             /* Invalid block type (btype == 3) */
    605             free(state.output);
    606             return -1;
    607         }
    608     }
    609 
    610     *output = state.output;
    611     *output_size = state.output_pos;
    612 
    613     return 0;
    614 }