luajitos

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

GZip.c (10251B)


      1 #include "compression.h"
      2 #include "deflate_impl.h"
      3 #include <stdlib.h>
      4 #include <string.h>
      5 #include <lauxlib.h>
      6 
      7 /*
      8  * GZip Compression Implementation
      9  *
     10  * NOTE: This is a SIMPLIFIED implementation for educational purposes.
     11  * For production use, integrate one of these proven libraries:
     12  *
     13  * 1. ZLIB (Recommended for GZip) ⭐
     14  *    - Has built-in gzip support
     15  *    - deflateInit2() with windowBits = 15 + 16 for gzip
     16  *    - inflateInit2() with windowBits = 15 + 16 for gzip
     17  *
     18  * 2. MINIZ (Single-file alternative)
     19  *    - Drop-in zlib replacement
     20  *    - Supports gzip format
     21  *    - Public domain
     22  *
     23  * This simplified version implements:
     24  * - GZip header structure
     25  * - Uncompressed storage (as placeholder)
     26  * - CRC32 checksum
     27  */
     28 
     29 /* GZip constants */
     30 #define GZIP_ID1 0x1F
     31 #define GZIP_ID2 0x8B
     32 #define GZIP_CM_DEFLATE 8
     33 
     34 /* GZip flags */
     35 #define GZIP_FTEXT    0x01  // Text hint
     36 #define GZIP_FHCRC    0x02  // Header CRC
     37 #define GZIP_FEXTRA   0x04  // Extra fields
     38 #define GZIP_FNAME    0x08  // Original filename
     39 #define GZIP_FCOMMENT 0x10  // Comment
     40 
     41 /* GZip header structure (10 bytes minimum) */
     42 typedef struct {
     43     uint8_t id1;        // 0x1F
     44     uint8_t id2;        // 0x8B
     45     uint8_t cm;         // Compression method (8 = deflate)
     46     uint8_t flg;        // Flags
     47     uint32_t mtime;     // Modification time (Unix timestamp)
     48     uint8_t xfl;        // Extra flags
     49     uint8_t os;         // Operating system
     50 } gzip_header_t;
     51 
     52 /* GZip footer (8 bytes) */
     53 typedef struct {
     54     uint32_t crc32;     // CRC32 of uncompressed data
     55     uint32_t isize;     // Size of uncompressed data (modulo 2^32)
     56 } gzip_footer_t;
     57 
     58 /* Write 32-bit little-endian */
     59 static void write_le32(uint8_t* buf, uint32_t value) {
     60     buf[0] = value & 0xFF;
     61     buf[1] = (value >> 8) & 0xFF;
     62     buf[2] = (value >> 16) & 0xFF;
     63     buf[3] = (value >> 24) & 0xFF;
     64 }
     65 
     66 /* Read 32-bit little-endian */
     67 static uint32_t read_le32(const uint8_t* buf) {
     68     return buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
     69 }
     70 
     71 /* Compress using GZip with full deflate compression */
     72 compression_result_t* gzip_compress(const uint8_t* input, uint32_t input_size, int level) {
     73     compression_result_t* result = compression_result_create();
     74     if (!result) return NULL;
     75 
     76     /* Compress the data using full deflate implementation */
     77     uint8_t* deflate_data = NULL;
     78     uint32_t deflate_size = 0;
     79 
     80     if (deflate_compress_full(input, input_size, &deflate_data, &deflate_size, level) != 0) {
     81         result->error = COMPRESS_ERROR;
     82         strcpy(result->error_msg, "Deflate compression failed");
     83         return result;
     84     }
     85 
     86     /* Calculate output size: header(10) + deflate_data + footer(8) */
     87     uint32_t output_size = 10 + deflate_size + 8;
     88     result->data = (uint8_t*)malloc(output_size);
     89     if (!result->data) {
     90         free(deflate_data);
     91         result->error = COMPRESS_MEM_ERROR;
     92         strcpy(result->error_msg, "Memory allocation failed");
     93         return result;
     94     }
     95 
     96     uint8_t* out = result->data;
     97     uint32_t out_pos = 0;
     98 
     99     /* Write GZip header */
    100     out[out_pos++] = GZIP_ID1;
    101     out[out_pos++] = GZIP_ID2;
    102     out[out_pos++] = GZIP_CM_DEFLATE;
    103     out[out_pos++] = 0;  // No flags
    104 
    105     /* Modification time (0 = not available) */
    106     write_le32(out + out_pos, 0);
    107     out_pos += 4;
    108 
    109     /* Extra flags (0=default, 2=max compression, 4=fastest) */
    110     out[out_pos++] = (level == COMPRESS_LEVEL_BEST) ? 2 :
    111                      (level == COMPRESS_LEVEL_FASTEST) ? 4 : 0;
    112 
    113     /* Operating system (255=unknown) */
    114     out[out_pos++] = 255;
    115 
    116     /* Copy deflate compressed data */
    117     memcpy(out + out_pos, deflate_data, deflate_size);
    118     out_pos += deflate_size;
    119     free(deflate_data);
    120 
    121     /* Write GZip footer */
    122     uint32_t crc = crc32_calc(0, input, input_size);
    123     write_le32(out + out_pos, crc);
    124     out_pos += 4;
    125 
    126     write_le32(out + out_pos, input_size);
    127     out_pos += 4;
    128 
    129     result->size = out_pos;
    130     result->error = COMPRESS_OK;
    131     return result;
    132 }
    133 
    134 /* Decompress GZip data */
    135 compression_result_t* gzip_decompress(const uint8_t* input, uint32_t input_size) {
    136     compression_result_t* result = compression_result_create();
    137     if (!result) return NULL;
    138 
    139     if (input_size < 18) {  // Minimum: header(10) + empty block(5) + footer(8)
    140         result->error = COMPRESS_DATA_ERROR;
    141         strcpy(result->error_msg, "Input too small");
    142         return result;
    143     }
    144 
    145     uint32_t in_pos = 0;
    146 
    147     /* Read GZip header */
    148     if (input[in_pos++] != GZIP_ID1 || input[in_pos++] != GZIP_ID2) {
    149         result->error = COMPRESS_DATA_ERROR;
    150         strcpy(result->error_msg, "Invalid GZip signature");
    151         return result;
    152     }
    153 
    154     uint8_t cm = input[in_pos++];
    155     if (cm != GZIP_CM_DEFLATE) {
    156         result->error = COMPRESS_DATA_ERROR;
    157         strcpy(result->error_msg, "Unsupported compression method");
    158         return result;
    159     }
    160 
    161     uint8_t flg = input[in_pos++];
    162     uint32_t mtime = read_le32(input + in_pos);
    163     in_pos += 4;
    164     uint8_t xfl = input[in_pos++];
    165     uint8_t os = input[in_pos++];
    166 
    167     /* Skip optional fields */
    168     if (flg & GZIP_FEXTRA) {
    169         if (in_pos + 2 > input_size) {
    170             result->error = COMPRESS_DATA_ERROR;
    171             strcpy(result->error_msg, "Truncated FEXTRA");
    172             return result;
    173         }
    174         uint16_t xlen = input[in_pos] | (input[in_pos + 1] << 8);
    175         in_pos += 2 + xlen;
    176     }
    177 
    178     if (flg & GZIP_FNAME) {
    179         while (in_pos < input_size && input[in_pos] != 0) in_pos++;
    180         in_pos++;  // Skip null terminator
    181     }
    182 
    183     if (flg & GZIP_FCOMMENT) {
    184         while (in_pos < input_size && input[in_pos] != 0) in_pos++;
    185         in_pos++;  // Skip null terminator
    186     }
    187 
    188     if (flg & GZIP_FHCRC) {
    189         in_pos += 2;  // Skip header CRC16
    190     }
    191 
    192     if (in_pos >= input_size - 8) {
    193         result->error = COMPRESS_DATA_ERROR;
    194         strcpy(result->error_msg, "No compressed data");
    195         return result;
    196     }
    197 
    198     /* Decompress the deflate data */
    199     uint8_t* decompressed = NULL;
    200     uint32_t decompressed_size = 0;
    201 
    202     /* Deflate data is between header and footer */
    203     const uint8_t* deflate_data = input + in_pos;
    204     uint32_t deflate_size = input_size - in_pos - 8;
    205 
    206     if (deflate_decompress_full(deflate_data, deflate_size, &decompressed, &decompressed_size) != 0) {
    207         result->error = COMPRESS_ERROR;
    208         strcpy(result->error_msg, "Deflate decompression failed");
    209         return result;
    210     }
    211 
    212     /* Verify CRC32 and size */
    213     uint32_t stored_crc = read_le32(input + input_size - 8);
    214     uint32_t stored_size = read_le32(input + input_size - 4);
    215 
    216     uint32_t calculated_crc = crc32_calc(0, decompressed, decompressed_size);
    217 
    218     if (stored_crc != calculated_crc) {
    219         free(decompressed);
    220         result->error = COMPRESS_DATA_ERROR;
    221         strcpy(result->error_msg, "CRC32 mismatch");
    222         return result;
    223     }
    224 
    225     if ((stored_size & 0xFFFFFFFF) != (decompressed_size & 0xFFFFFFFF)) {
    226         free(decompressed);
    227         result->error = COMPRESS_DATA_ERROR;
    228         strcpy(result->error_msg, "Size mismatch");
    229         return result;
    230     }
    231 
    232     result->data = decompressed;
    233     result->size = decompressed_size;
    234     result->error = COMPRESS_OK;
    235     return result;
    236 }
    237 
    238 /* Lua binding: Compress with GZip */
    239 int lua_gzip_compress(lua_State* L) {
    240     size_t input_size;
    241     const uint8_t* input = (const uint8_t*)luaL_checklstring(L, 1, &input_size);
    242     int level = luaL_optinteger(L, 2, COMPRESS_LEVEL_DEFAULT);
    243 
    244     compression_result_t* result = gzip_compress(input, input_size, level);
    245 
    246     if (result && result->error == COMPRESS_OK) {
    247         lua_pushlstring(L, (const char*)result->data, result->size);
    248         compression_result_destroy(result);
    249         return 1;
    250     } else {
    251         lua_pushnil(L);
    252         lua_pushstring(L, result ? result->error_msg : "Compression failed");
    253         if (result) compression_result_destroy(result);
    254         return 2;
    255     }
    256 }
    257 
    258 /* Lua binding: Decompress GZip */
    259 int lua_gzip_decompress(lua_State* L) {
    260     size_t input_size;
    261     const uint8_t* input = (const uint8_t*)luaL_checklstring(L, 1, &input_size);
    262 
    263     compression_result_t* result = gzip_decompress(input, input_size);
    264 
    265     if (result && result->error == COMPRESS_OK) {
    266         lua_pushlstring(L, (const char*)result->data, result->size);
    267         compression_result_destroy(result);
    268         return 1;
    269     } else {
    270         lua_pushnil(L);
    271         lua_pushstring(L, result ? result->error_msg : "Decompression failed");
    272         if (result) compression_result_destroy(result);
    273         return 2;
    274     }
    275 }
    276 
    277 /*
    278  * INTEGRATION GUIDE:
    279  *
    280  * To add full GZip support with Huffman coding, integrate ZLIB or MINIZ:
    281  *
    282  * Using ZLIB:
    283  *
    284  * 1. Cross-compile zlib for i686 or link with system zlib
    285  * 2. Include: #include <zlib.h>
    286  *
    287  * 3. Replace gzip_compress() with:
    288  *    z_stream stream;
    289  *    stream.zalloc = Z_NULL;
    290  *    stream.zfree = Z_NULL;
    291  *    stream.opaque = Z_NULL;
    292  *
    293  *    // Initialize with gzip format (windowBits = 15 + 16)
    294  *    int ret = deflateInit2(&stream, level,
    295  *                          Z_DEFLATED, 15 + 16,
    296  *                          8, Z_DEFAULT_STRATEGY);
    297  *
    298  *    stream.avail_in = input_size;
    299  *    stream.next_in = (unsigned char*)input;
    300  *    stream.avail_out = output_size;
    301  *    stream.next_out = output;
    302  *
    303  *    ret = deflate(&stream, Z_FINISH);
    304  *    deflateEnd(&stream);
    305  *
    306  * 4. Replace gzip_decompress() with:
    307  *    z_stream stream;
    308  *    stream.zalloc = Z_NULL;
    309  *    stream.zfree = Z_NULL;
    310  *    stream.opaque = Z_NULL;
    311  *
    312  *    // Initialize with gzip format (windowBits = 15 + 16)
    313  *    int ret = inflateInit2(&stream, 15 + 16);
    314  *
    315  *    stream.avail_in = input_size;
    316  *    stream.next_in = (unsigned char*)input;
    317  *    stream.avail_out = output_capacity;
    318  *    stream.next_out = output;
    319  *
    320  *    ret = inflate(&stream, Z_FINISH);
    321  *    inflateEnd(&stream);
    322  *
    323  * Using MINIZ (single-file alternative):
    324  *
    325  * 1. Download miniz.c from https://github.com/richgel999/miniz
    326  * 2. Add to project: compression/miniz.c
    327  * 3. Use same API as zlib above (miniz has compatible API)
    328  *
    329  * Benefits of GZip:
    330  * - Widely supported (web, curl, gunzip command)
    331  * - Includes CRC32 for data integrity
    332  * - File metadata (timestamp, filename)
    333  * - Streaming compression/decompression
    334  */