//+++2003-11-18 // Copyright (C) 2001,2002,2003 Mike Rieker, Beverly, MA USA // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; version 2 of the License. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA //---2003-11-18 /* gzip (GNU zip) -- compress files with zip algorithm and 'compress' interface * Copyright (C) 1992-1993 Jean-loup Gailly * The unzip code was written and put in the public domain by Mark Adler. * It was converted to a self-contained module by Mike Rieker. * * See the license_msg below and the file COPYING for the software license. * See the file algorithm.doc for the compression algorithms and file formats. */ static const char license_msg[] = { " Copyright (C) 1997,2002 Mike Rieker" " Copyright (C) 1992-1993 Jean-loup Gailly" " This program is free software; you can redistribute it and/or modify" " it under the terms of the GNU General Public License as published by" " the Free Software Foundation; either version 2, or (at your option)" " any later version." " This program is distributed in the hope that it will be useful," " but WITHOUT ANY WARRANTY; without even the implied warranty of" " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the" " GNU General Public License for more details." " You should have received a copy of the GNU General Public License" " along with this program; if not, write to the Free Software" " Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA." " *END*"}; #include #include #include "ozone.h" #include "oz_knl_hw.h" #include "gzip.h" /* tailor.h -- target dependent definitions * Copyright (C) 1992-1993 Jean-loup Gailly. * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. */ /* The target dependent definitions should be defined here only. * The target dependent functions should be defined in tailor.c. */ /* $Id: tailor.h,v 0.18 1993/06/14 19:32:20 jloup Exp $ */ #if defined(__MSDOS__) && !defined(MSDOS) # define MSDOS #endif #if defined(__OS2__) && !defined(OS2) # define OS2 #endif #if defined(OS2) && defined(MSDOS) /* MS C under OS/2 */ # undef MSDOS #endif #ifdef MSDOS # ifdef __GNUC__ /* DJGPP version 1.09+ on MS-DOS. * The DJGPP 1.09 stat() function must be upgraded before gzip will * fully work. * No need for DIRENT, since defines POSIX_SOURCE which * implies DIRENT. */ # define near # else # define MAXSEG_64K # ifdef __TURBOC__ # define NO_OFF_T # ifdef __BORLANDC__ # define DIRENT # else # define NO_UTIME # endif # else /* MSC */ # define HAVE_SYS_UTIME_H # define NO_UTIME_H # endif # endif # define PATH_SEP2 '\\' # define PATH_SEP3 ':' # define MAX_PATH_LEN 128 # define NO_MULTIPLE_DOTS # define MAX_EXT_CHARS 3 # define Z_SUFFIX "z" # define NO_CHOWN # define PROTO # define STDC_HEADERS # define NO_SIZE_CHECK # define casemap(c) tolow(c) /* Force file names to lower case */ # include # define OS_CODE 0x00 # define SET_BINARY_MODE(fd) setmode(fd, O_BINARY) # if !defined(NO_ASM) && !defined(ASMV) # define ASMV # endif #else # define near #endif #ifdef OS2 # define PATH_SEP2 '\\' # define PATH_SEP3 ':' # define MAX_PATH_LEN 260 # ifdef OS2FAT # define NO_MULTIPLE_DOTS # define MAX_EXT_CHARS 3 # define Z_SUFFIX "z" # define casemap(c) tolow(c) # endif # define NO_CHOWN # define PROTO # define STDC_HEADERS # include # define OS_CODE 0x06 # define SET_BINARY_MODE(fd) setmode(fd, O_BINARY) # ifdef _MSC_VER # define HAVE_SYS_UTIME_H # define NO_UTIME_H # define MAXSEG_64K # undef near # define near _near # endif # ifdef __EMX__ # define HAVE_SYS_UTIME_H # define NO_UTIME_H # define DIRENT # define EXPAND(argc,argv) \ {_response(&argc, &argv); _wildcard(&argc, &argv);} # endif # ifdef __BORLANDC__ # define DIRENT # endif # ifdef __ZTC__ # define NO_DIR # define NO_UTIME_H # include # define EXPAND(argc,argv) \ {response_expand(&argc, &argv);} # endif #endif #ifdef WIN32 /* Windows NT */ # include # include # define HAVE_SYS_UTIME_H # define NO_UTIME_H # define PATH_SEP2 '\\' # define PATH_SEP3 ':' # define MAX_PATH_LEN 260 # define NO_CHOWN # define PROTO # define STDC_HEADERS # define SET_BINARY_MODE(fd) setmode(fd, O_BINARY) # include # include # ifdef NTFAT # define NO_MULTIPLE_DOTS # define MAX_EXT_CHARS 3 # define Z_SUFFIX "z" # define casemap(c) tolow(c) /* Force file names to lower case */ # endif # define OS_CODE 0x0b #endif #ifdef MSDOS # ifdef __TURBOC__ # include # define DYN_ALLOC /* Turbo C 2.0 does not accept static allocations of large arrays */ void * fcalloc (unsigned items, unsigned size); void fcfree (void *ptr); # else /* MSC */ # include # define fcalloc(nitems,itemsize) halloc((long)(nitems),(itemsize)) # define fcfree(ptr) hfree(ptr) # endif #else # ifdef MAXSEG_64K # define fcalloc(items,size) calloc((items),(size)) # else # define fcalloc(items,size) malloc((size_t)(items)*(size_t)(size)) # endif # define fcfree(ptr) free(ptr) #endif #if defined(VAXC) || defined(VMS) # define PATH_SEP ']' # define PATH_SEP2 ':' # define SUFFIX_SEP ';' # define NO_MULTIPLE_DOTS # define Z_SUFFIX "-gz" # define RECORD_IO 1 # define casemap(c) tolow(c) # define OS_CODE 0x02 # define OPTIONS_VAR "GZIP_OPT" # define STDC_HEADERS # define NO_UTIME # define EXPAND(argc,argv) vms_expand_args(&argc,&argv); # include # define unlink delete # ifdef VAXC # define NO_FCNTL_H # include # endif #endif #ifdef AMIGA # define PATH_SEP2 ':' # define STDC_HEADERS # define OS_CODE 0x01 # define ASMV # ifdef __GNUC__ # define DIRENT # define HAVE_UNISTD_H # else /* SASC */ # define NO_STDIN_FSTAT # define SYSDIR # define NO_SYMLINK # define NO_CHOWN # define NO_FCNTL_H # include /* for read() and write() */ # define direct dirent extern void _expand_args(int *argc, char ***argv); # define EXPAND(argc,argv) _expand_args(&argc,&argv); # undef O_BINARY /* disable useless --ascii option */ # endif #endif #if defined(ATARI) || defined(atarist) # ifndef STDC_HEADERS # define STDC_HEADERS # define HAVE_UNISTD_H # define DIRENT # endif # define ASMV # define OS_CODE 0x05 # ifdef TOSFS # define PATH_SEP2 '\\' # define PATH_SEP3 ':' # define MAX_PATH_LEN 128 # define NO_MULTIPLE_DOTS # define MAX_EXT_CHARS 3 # define Z_SUFFIX "z" # define NO_CHOWN # define casemap(c) tolow(c) /* Force file names to lower case */ # define NO_SYMLINK # endif #endif #ifdef MACOS # define PATH_SEP ':' # define DYN_ALLOC # define PROTO # define NO_STDIN_FSTAT # define NO_CHOWN # define NO_UTIME # define chmod(file, mode) (0) # define OPEN(name, flags, mode) open(name, flags) # define OS_CODE 0x07 # ifdef MPW # define isatty(fd) ((fd) <= 2) # endif #endif #ifdef __50SERIES /* Prime/PRIMOS */ # define PATH_SEP '>' # define STDC_HEADERS # define NO_MEMORY_H # define NO_UTIME_H # define NO_UTIME # define NO_CHOWN # define NO_STDIN_FSTAT # define NO_SIZE_CHECK # define NO_SYMLINK # define RECORD_IO 1 # define casemap(c) tolow(c) /* Force file names to lower case */ # define put_char(c) put_byte((c) & 0x7F) # define get_char(c) ascii2pascii(get_byte()) # define OS_CODE 0x0F /* temporary, subject to change */ # ifdef SIGTERM # undef SIGTERM /* We don't want a signal handler for SIGTERM */ # endif #endif #if defined(pyr) && !defined(NOMEMCPY) /* Pyramid */ # define NOMEMCPY /* problem with overlapping copies */ #endif #ifdef TOPS20 # define OS_CODE 0x0a #endif #ifndef unix # define NO_ST_INO /* don't rely on inode numbers */ #endif /* Common defaults */ #ifndef OS_CODE # define OS_CODE 0x03 /* assume Unix */ #endif #ifndef PATH_SEP # define PATH_SEP '/' #endif #ifndef casemap # define casemap(c) (c) #endif #ifndef OPTIONS_VAR # define OPTIONS_VAR "GZIP" #endif #ifndef Z_SUFFIX # define Z_SUFFIX ".gz" #endif #ifdef MAX_EXT_CHARS # define MAX_SUFFIX MAX_EXT_CHARS #else # define MAX_SUFFIX 30 #endif #ifndef MAKE_LEGAL_NAME # ifdef NO_MULTIPLE_DOTS # define MAKE_LEGAL_NAME(name) make_simple_name(name) # else # define MAKE_LEGAL_NAME(name) # endif #endif #ifndef MIN_PART # define MIN_PART 3 /* keep at least MIN_PART chars between dots in a file name. */ #endif #ifndef EXPAND # define EXPAND(argc,argv) #endif #ifndef RECORD_IO # define RECORD_IO 0 #endif #ifndef SET_BINARY_MODE # define SET_BINARY_MODE(fd) #endif #ifndef OPEN # define OPEN(name, flags, mode) open(name, flags, mode) #endif #ifndef get_char # define get_char() get_byte() #endif #ifndef put_char # define put_char(c) put_byte(c) #endif /* gzip_int.h -- internal common declarations for all gzip modules * Copyright (C) 1992-1993 Jean-loup Gailly. * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. */ #ifndef NULL #define NULL ((void *)0) #endif #define local static typedef unsigned char uch; typedef unsigned short ush; typedef unsigned long ulg; #define BITS 16 #define memzero(a,s) memset (a, 0, s) #ifndef INBUFSIZ # ifdef SMALL_MEM # define INBUFSIZ 0x2000 /* input buffer size */ # else # define INBUFSIZ 0x8000 /* input buffer size */ # endif #endif #define INBUF_EXTRA 64 /* required by unlzw() */ #ifndef OUTBUFSIZ # ifdef SMALL_MEM # define OUTBUFSIZ 8192 /* output buffer size */ # else # define OUTBUFSIZ 16384 /* output buffer size */ # endif #endif #define OUTBUF_EXTRA 2048 /* required by unlzw() */ #ifndef DIST_BUFSIZE # ifdef SMALL_MEM # define DIST_BUFSIZE 0x2000 /* buffer for distances, see trees.c */ # else # define DIST_BUFSIZE 0x8000 /* buffer for distances, see trees.c */ # endif #endif /* Compression methods (see algorithm.doc) */ #define STORED 0 #define COMPRESSED 1 #define PACKED 2 #define LZHED 3 /* methods 4 to 7 reserved */ #define DEFLATED 8 #define MAX_METHODS 9 /* To save memory for 16 bit systems, some arrays are overlaid between * the various modules: * deflate: prev+head window d_buf l_buf outbuf * unlzw: tab_prefix tab_suffix stack inbuf outbuf * inflate: window inbuf * unpack: window inbuf prefix_len * unlzh: left+right window c_table inbuf c_len * For compression, input is done in window[]. For decompression, output * is done in window except for unlzw. */ #define GZIP_MAGIC "\037\213" /* Magic header for gzip files, 1F 8B */ /* gzip flag byte */ #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ #define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */ #define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ #define ORIG_NAME 0x08 /* bit 3 set: original file name present */ #define COMMENT 0x10 /* bit 4 set: file comment present */ #define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ #define RESERVED 0xC0 /* bit 6,7: reserved */ /* internal file attribute */ #define UNKNOWN 0xffff #define BINARY 0 #define ASCII 1 #ifndef WSIZE # define WSIZE 0x8000 /* window size--must be a power of two, and */ #endif /* at least 32K for zip's deflate method */ #define MIN_MATCH 3 #define MAX_MATCH 258 /* The minimum and maximum match lengths */ #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) /* Minimum amount of lookahead, except at the end of the input file. * See deflate.c for comments about the MIN_MATCH+1. */ #define MAX_DIST (WSIZE-MIN_LOOKAHEAD) /* In order to simplify the code, particularly on 16 bit machines, match * distances are limited to MAX_DIST instead of WSIZE. */ /* Macro to get a byte from the input stream */ #define get_byte() (-- insize >= 0 ? *(inpnt ++) : fill_inbuf (pb)) /* put_byte is used for the compressed output, put_ubyte for the * uncompressed output. However unlzw() uses window for its * suffix table instead of its output buffer, so it does not use put_ubyte * (to be cleaned up). */ #define put_byte(c) {outbuf[outcnt++]=(uch)(c); if (outcnt==OUTBUFSIZ)\ flush_outbuf(pb);} #define put_ubyte(c) {window[outcnt++]=(uch)(c); if (outcnt==WSIZE)\ flush_window(pb);} /* Output a 16 bit value, lsb first */ #define put_short(w) \ { if (outcnt < OUTBUFSIZ-2) { \ outbuf[outcnt++] = (uch) ((w) & 0xff); \ outbuf[outcnt++] = (uch) ((ush)(w) >> 8); \ } else { \ put_byte((uch)((w) & 0xff)); \ put_byte((uch)((ush)(w) >> 8)); \ } \ } /* Output a 32 bit value to the bit stream, lsb first */ #define put_long(n) { \ put_short((n) & 0xffff); \ put_short(((ulg)(n)) >> 16); \ } #define seekable() 0 /* force sequential output */ /* Macros for getting two-byte and four-byte header values */ #define SH(p) ((ush)(uch)((p)[0]) | ((ush)(uch)((p)[1]) << 8)) #define LG(p) ((ulg)(SH(p)) | ((ulg)(SH((p)+2)) << 16)) /* Diagnostic functions */ #ifdef DEBUG # define Assert(cond,msg) {if(!(cond)) error(msg);} # define Trace(x) fprintf x # define Tracev(x) {if (verbose) fprintf x ;} # define Tracevv(x) {if (verbose>1) fprintf x ;} # define Tracec(c,x) {if (verbose && (c)) fprintf x ;} # define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} #else # define Assert(cond,msg) # define Trace(x) # define Tracev(x) # define Tracevv(x) # define Tracec(c,x) # define Tracecv(c,x) #endif #ifndef LIT_BUFSIZE # ifdef SMALL_MEM # define LIT_BUFSIZE 0x2000 # else # ifdef MEDIUM_MEM # define LIT_BUFSIZE 0x4000 # else # define LIT_BUFSIZE 0x8000 # endif # endif #endif #ifndef DIST_BUFSIZE # define DIST_BUFSIZE LIT_BUFSIZE #endif /* Sizes of match buffers for literals/lengths and distances. There are * 4 reasons for limiting LIT_BUFSIZE to 64K: * - frequencies can be kept in 16 bit counters * - if compression is not successful for the first block, all input data is * still in the window so we can still emit a stored block even when input * comes from standard input. (This can also be done for all blocks if * LIT_BUFSIZE is not greater than 32K.) * - if compression is not successful for a file smaller than 64K, we can * even emit a stored file instead of a stored block (saving 5 bytes). * - creating new Huffman trees less frequently may not provide fast * adaptation to changes in the input data statistics. (Take for * example a binary file with poorly compressible code followed by * a highly compressible string table.) Smaller buffer sizes give * fast adaptation but have of course the overhead of transmitting trees * more frequently. * - I can't count above 4 * The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save * memory at the expense of compression). Some optimizations would be possible * if we rely on DIST_BUFSIZE == LIT_BUFSIZE. */ #if LIT_BUFSIZE > INBUFSIZ error cannot overlay l_buf and inbuf #endif /************************************************************************/ /* */ /* Type definitions from trees.c needed before pb struct definition */ /* */ /************************************************************************/ #define TREES_MAX_BITS 15 #define TREES_D_CODES 30 #define TREES_BL_CODES 19 #define TREES_LENGTH_CODES 29 #define TREES_LITERALS 256 #define TREES_L_CODES (TREES_LITERALS+1+TREES_LENGTH_CODES) #define TREES_HEAP_SIZE (2*TREES_L_CODES+1) /* Data structure describing a single value and its code string. */ typedef struct ct_data { union { ush freq; /* frequency count */ ush code; /* bit string */ } fc; union { ush dad; /* father node in Huffman tree */ ush len; /* length of bit string */ } dl; } ct_data; typedef struct tree_desc { ct_data *dyn_tree; /* the dynamic tree */ ct_data *static_tree; /* corresponding static tree or NULL */ const int *extra_bits; /* extra bits for each code or NULL */ int extra_base; /* base index for extra_bits */ int elems; /* max number of elements in the tree */ int max_length; /* max bit length for the codes */ int max_code; /* largest code with non zero frequency */ } tree_desc; /************************************************************************/ /* */ /* Global variable parameter block structure - */ /* Pointer 'pb' is passed as first arg to all routines */ /* then #defines define it to look like the old global variable name */ /* */ /************************************************************************/ typedef struct Pb { int pb_method; /* compression method */ int pb_insize; /* valid bytes in inbuf */ char *pb_inpnt; /* address of next byte to be processed in inbuf */ unsigned pb_outcnt; /* bytes in output buffer */ int (*pb_readrout) (void *param, int siz, char *buf, int *len, char **pnt); /* read stream */ int (*pb_writerout) (void *param, int siz, char *buf); /* write stream */ void (*pb_errorrout) (void *param, int code, char *msg); /* error message */ void *(*pb_mallocrout) (void *param, int size); /* mallocate memory block */ void (*pb_freerout) (void *param, void *buff); /* free memory block */ void *pb_rwparam; /* param to pass to readrout, writerout, errorrout, mallocrout, freerout */ long pb_bytes_in; /* number of input bytes */ long pb_bytes_out; /* number of output bytes */ long pb_time_stamp; /* original time stamp (modification time) */ int pb_exit_code; /* program exit code */ int pb_level; /* compression level */ uch pb_inbuf[INBUFSIZ+INBUF_EXTRA]; /* input buffer */ uch pb_outbuf[OUTBUFSIZ+OUTBUF_EXTRA]; /* output buffer */ ush pb_d_buf[DIST_BUFSIZE]; /* buffer for distances, see trees.c */ uch pb_window[2L*WSIZE]; /* Sliding window and suffix table (unlzw) */ ush pb_prev[1L< pb_method) #define insize (pb -> pb_insize) #define inpnt (pb -> pb_inpnt) #define outcnt (pb -> pb_outcnt) #define readrout (pb -> pb_readrout) #define writerout (pb -> pb_writerout) #define errorrout (pb -> pb_errorrout) #define mallocrout (pb -> pb_mallocrout) #define freerout (pb -> pb_freerout) #define rwparam (pb -> pb_rwparam) #define bytes_in (pb -> pb_bytes_in) #define bytes_out (pb -> pb_bytes_out) #define time_stamp (pb -> pb_time_stamp) #define exit_code (pb -> pb_exit_code) #define level (pb -> pb_level) #define inbuf (pb -> pb_inbuf) #define outbuf (pb -> pb_outbuf) #define d_buf (pb -> pb_d_buf) #define window (pb -> pb_window) #define prev (pb -> pb_prev) /************************************************************************/ /* */ /* Not so global routine prototypes */ /* */ /************************************************************************/ /* - bits.c */ static void bi_init (Pb *pb); static void send_bits (Pb *pb, int value, int length); static unsigned bi_reverse (Pb *pb, unsigned value, int length); static void bi_windup (Pb *pb); static void copy_block (Pb *pb, char *buf, unsigned len, int header); /* - deflate.c */ static void lm_init (Pb *pb, int pack_level, ush *flags); static ulg deflate (Pb *pb); /* - inflate.c */ static int inflate (Pb *pb); /* - trees.c */ static void ct_init (Pb *pb, ush *attr, int *methodp); static int ct_tally (Pb *pb, int dist, int lc); static ulg flush_block (Pb *pb, char *buf, ulg stored_len, int eof); /* - util.c */ static ulg updcrc (Pb *pb, uch *s, unsigned n); static void clear_bufs (Pb *pb); static int fill_inbuf (Pb *pb); static int file_read (Pb *pb, char *buf, int size); static void flush_outbuf (Pb *pb); static void flush_window (Pb *pb); static void error (Pb *pb, int code, char *m); /* - unzip.c */ static void unzip (Pb *pb); /* - zip.c */ static void zip (Pb *pb); /* lzw.h -- define the lzw functions. * Copyright (C) 1992-1993 Jean-loup Gailly. * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. */ #if !defined(OF) && defined(lint) # include "gzip.h" #endif #ifndef BITS # define BITS 16 #endif #define INIT_BITS 9 /* Initial number of bits per code */ #define LZW_MAGIC "\037\235" /* Magic header for lzw files, 1F 9D */ #define BIT_MASK 0x1f /* Mask for 'number of compression bits' */ /* Mask 0x20 is reserved to mean a fourth header byte, and 0x40 is free. * It's a pity that old uncompress does not check bit 0x20. That makes * extension of the format actually undesirable because old compress * would just crash on the new format instead of giving a meaningful * error message. It does check the number of bits, but it's more * helpful to say "unsupported format, get a new version" than * "can only handle 16 bits". */ #define BLOCK_MODE 0x80 /* Block compression: if table is full and compression rate is dropping, * clear the dictionary. */ #define LZW_RESERVED 0x60 /* reserved bits */ #define CLEAR 256 /* flush the dictionary */ #define FIRST (CLEAR+1) /* first free entry */ extern int lzw (int in, int out); extern int unlzw (int in, int out); /* revision.h -- define the version number * Copyright (C) 1992-1993 Jean-loup Gailly. * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. */ #define VERSION "1.2.4" #define PATCHLEVEL 0 #define REVDATE "18 Aug 93" /* This version does not support compression into old compress format: */ #ifdef LZW # undef LZW #endif /* $Id: revision.h,v 0.25 1993/06/24 08:29:52 jloup Exp $ */ /* local functions */ local void get_method (Pb *pb); /************************************************************************/ /* */ /* Stream (de)compression routine */ /* */ /* Input: */ /* */ /* readroutine = pointer to routine to read input bytes */ /* writeroutine = pointer to routine to write output bytes */ /* errorroutine = pointer to routine to call with error message */ /* callparam = pointer passed to read, write, error routines */ /* funcode = GZIP_FUNC_COMPRESS : this is to do compression */ /* GZIP_FUNC_EXPAND : this is to do decompression */ /* GZIP_FUNC_DUMMY : just copy input to output */ /* complevel = compression level (1..9) */ /* */ /* Output: */ /* */ /* gzip = GZIP_RTN_OK : successful */ /* _WARN : warnings given, but continued */ /* _RDERR : readroutine fatal error */ /* _WTERR : writeroutine fatal error */ /* _ERROR : errorroutine fatal error */ /* */ /* readroutine (void *param, int siz, char *buf, */ /* int *len, char **pnt) */ /* */ /* Input: */ /* */ /* param = as passed to gzip */ /* siz = size of buf buffer */ /* buf = address of buffer to read data into */ /* */ /* Output: */ /* */ /* readroutine = 0 : read error, abort operation */ /* 1 : read successful */ /* *len = 0 : end of file, no more input */ /* else : length of data retrieved */ /* *pnt = where data was actually read into, */ /* if different than buf */ /* */ /* writeroutine (void *param, int siz, char *buf) */ /* */ /* Input: */ /* */ /* param = as passed to gzip */ /* siz = number of bytes to write to output */ /* buf = address of data to write to output */ /* */ /* Output: */ /* */ /* writeroutine = 0 : write error, abort operation */ /* 1 : write successful */ /* */ /* errorroutine (void *param, int code, char *msg) */ /* */ /* Input: */ /* */ /* param = as passed to gzip */ /* code = error code */ /* msg = message text buffer */ /* */ /************************************************************************/ int gzip (int (*readroutine) (void *param, int siz, char *buf, int *len, char **pnt), int (*writeroutine) (void *param, int siz, char *buf), void (*errorroutine) (void *param, int code, char *msg), void *(*mallocroutine) (void *param, int size), void (*freeroutine) (void *param, void *buff), void *callparam, int funcode, int complevel) { char *buf, *pnt; int ec, len; Pb *pb; /* Initialize parameter block */ pb = NULL; if (funcode != GZIP_FUNC_DUMMY) { pb = (*mallocroutine) (callparam, sizeof *pb); memzero (pb, sizeof *pb); method = DEFLATED; level = complevel; exit_code = GZIP_RTN_OK; readrout = readroutine; writerout = writeroutine; errorrout = errorroutine; mallocrout = mallocroutine; freerout = freeroutine; rwparam = callparam; } /* Check out function code */ switch (funcode) { case GZIP_FUNC_DUMMY: { buf = (*mallocroutine) (callparam, INBUFSIZ); while (1) { len = INBUFSIZ; pnt = buf; ec = GZIP_RTN_RDERR; if (!(*readroutine) (callparam, INBUFSIZ, buf, &len, &pnt)) break; ec = GZIP_RTN_OK; if (len == 0) break; ec = GZIP_RTN_WTERR; if (!(*writeroutine) (callparam, len, pnt)) break; } (*freeroutine) (callparam, buf); return (ec); } case GZIP_FUNC_COMPRESS: { zip (pb); break; } case GZIP_FUNC_EXPAND: { get_method (pb); if (exit_code == GZIP_RTN_OK) unzip (pb); break; } default: { error (pb, GZIP_ERROR_BADFUNCODE, "bad function code"); break; } } ec = exit_code; (*freeroutine) (callparam, pb); return (ec); } /* ======================================================================== * Check the magic number of the input file and update ofname if an * original name was given and to_stdout is not set. * Return the compression method, -1 for error, -2 for warning. * Set inpnt to the offset of the next byte to be processed. * Updates time_stamp if there is one and --no-time is not used. * This function may be called repeatedly for an input file consisting * of several contiguous gzip'ed members. * IN assertions: there is at least one remaining compressed member. * If the member is a zip file, it must be the only one. */ local void get_method (Pb *pb) { char tempbuf[64]; uch flags; /* compression flags */ char magic[2]; /* magic header */ ulg stamp; /* time stamp */ magic[0] = (char)get_byte(); magic[1] = (char)get_byte(); if (memcmp(magic, GZIP_MAGIC, 2) != 0) { error (pb, GZIP_ERROR_INNOTGZIP, "input not in gzip format"); method = -1; return; } method = (int)get_byte(); if (method != DEFLATED) { /*sprintf (tempbuf, "unknown method %d", method);*/ error (pb, GZIP_ERROR_UNKNMETH, "unknown method"); method = -1; return; } flags = (uch)get_byte(); if ((flags & (ENCRYPTED | CONTINUATION | RESERVED)) != 0) { /*sprintf (tempbuf, "input has flags 0x%x", flags);*/ error (pb, GZIP_ERROR_UNKNFLAGS, "unknown flags"); method = -1; return; } stamp = (ulg)get_byte(); stamp |= ((ulg)get_byte()) << 8; stamp |= ((ulg)get_byte()) << 16; stamp |= ((ulg)get_byte()) << 24; (void)get_byte(); /* Ignore extra flags for the moment */ (void)get_byte(); /* Ignore OS type for the moment */ if ((flags & EXTRA_FIELD) != 0) { unsigned len = (unsigned)get_byte(); len |= ((unsigned)get_byte())<<8; while (len--) (void)get_byte(); } /* Discard original file name */ if ((flags & ORIG_NAME) != 0) { char c; do {c=get_byte();} while (c != 0); } /* Discard file comment if any */ if ((flags & COMMENT) != 0) { while (get_char() != 0) /* null */ ; } } /**************/ /**************/ /** **/ /** BITS.C **/ /** **/ /**************/ /**************/ /* bits.c -- output variable-length bit strings * Copyright (C) 1992-1993 Jean-loup Gailly * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. */ /* * PURPOSE * * Output variable-length bit strings. Compression can be done * to a file or to memory. (The latter is not supported in this version.) * * DISCUSSION * * The PKZIP "deflate" file format interprets compressed file data * as a sequence of bits. Multi-bit strings in the file may cross * byte boundaries without restriction. * * The first bit of each byte is the low-order bit. * * The routines in this file allow a variable-length bit value to * be output right-to-left (useful for literal values). For * left-to-right output (useful for code strings from the tree routines), * the bits must have been reversed first with bi_reverse(). * * For in-memory compression, the compressed bit stream goes directly * into the requested output buffer. The input data is read in blocks * by the mem_read() function. The buffer is limited to 64K on 16 bit * machines. * * INTERFACE * * void bi_init () * Initialize the bit string routines. * * void send_bits (int value, int length) * Write out a bit string, taking the source bits right to * left. * * int bi_reverse (int value, int length) * Reverse the bits of a bit string, taking the source bits left to * right and emitting them right to left. * * void bi_windup (void) * Write out any remaining bits in an incomplete byte. * * void copy_block(char *buf, unsigned len, int header) * Copy a stored block to the zip file, storing first the length and * its one's complement if requested. * */ #ifdef DEBUG # include #endif /* =========================================================================== * Local data used by the "bit string" routines. */ #define bi_buf (pb -> bits_bi_buf) /* Output buffer. bits are inserted starting at the bottom (least significant * bits). */ #define Buf_size (8 * 2*sizeof(char)) /* Number of bits used within bi_buf. (bi_buf might be implemented on * more than 16 bits on some systems.) */ #define bi_valid (pb -> bits_bi_valid) /* Number of valid bits in bi_buf. All bits above the last valid bit * are always zero. */ #ifdef DEBUG ulg bits_sent; /* bit length of the compressed data */ #endif /* =========================================================================== * Initialize the bit string routines. */ static void bi_init (Pb *pb) { bi_buf = 0; bi_valid = 0; #ifdef DEBUG bits_sent = 0L; #endif } /* =========================================================================== * Send a value on a given number of bits. * IN assertion: length <= 16 and value fits in length bits. */ static void send_bits(Pb *pb, int value, int length) { #ifdef DEBUG Tracev((stderr," l %2d v %4x ", length, value)); Assert(length > 0 && length <= 15, "invalid length"); bits_sent += (ulg)length; #endif /* If not enough room in bi_buf, use (valid) bits from bi_buf and * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) * unused bits in value. */ if (bi_valid > (int)Buf_size - length) { bi_buf |= (value << bi_valid); put_short(bi_buf); bi_buf = (ush)value >> (Buf_size - bi_valid); bi_valid += length - Buf_size; } else { bi_buf |= value << bi_valid; bi_valid += length; } } /* =========================================================================== * Reverse the first len bits of a code, using straightforward code (a faster * method would use a table) * IN assertion: 1 <= len <= 15 */ static unsigned bi_reverse(Pb *pb, unsigned code, int len) { register unsigned res = 0; do { res |= code & 1; code >>= 1, res <<= 1; } while (--len > 0); return res >> 1; } /* =========================================================================== * Write out any remaining bits in an incomplete byte. */ static void bi_windup(Pb *pb) { if (bi_valid > 8) { put_short(bi_buf); } else if (bi_valid > 0) { put_byte(bi_buf); } bi_buf = 0; bi_valid = 0; #ifdef DEBUG bits_sent = (bits_sent+7) & ~7; #endif } /* =========================================================================== * Copy a stored block to the zip file, storing first the length and its * one's complement if requested. */ static void copy_block(Pb *pb, char *buf, unsigned len, int header) { bi_windup(pb); /* align on byte boundary */ if (header) { put_short((ush)len); put_short((ush)~len); #ifdef DEBUG bits_sent += 2*16; #endif } #ifdef DEBUG bits_sent += (ulg)len<<3; #endif while (len--) { put_byte(*buf++); } } /*****************/ /*****************/ /** **/ /** DEFLATE.C **/ /** **/ /*****************/ /*****************/ /* deflate.c -- compress data using the deflation algorithm * Copyright (C) 1992-1993 Jean-loup Gailly * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. */ /* * PURPOSE * * Identify new text as repetitions of old text within a fixed- * length sliding window trailing behind the new text. * * DISCUSSION * * The "deflation" process depends on being able to identify portions * of the input text which are identical to earlier input (within a * sliding window trailing behind the input currently being processed). * * The most straightforward technique turns out to be the fastest for * most input files: try all possible matches and select the longest. * The key feature of this algorithm is that insertions into the string * dictionary are very simple and thus fast, and deletions are avoided * completely. Insertions are performed at each input character, whereas * string matches are performed only when the previous match ends. So it * is preferable to spend more time in matches to allow very fast string * insertions and avoid deletions. The matching algorithm for small * strings is inspired from that of Rabin & Karp. A brute force approach * is used to find longer strings when a small match has been found. * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze * (by Leonid Broukhis). * A previous version of this file used a more sophisticated algorithm * (by Fiala and Greene) which is guaranteed to run in linear amortized * time, but has a larger average cost, uses more memory and is patented. * However the F&G algorithm may be faster for some highly redundant * files if the parameter max_chain_length (described below) is too large. * * ACKNOWLEDGEMENTS * * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and * I found it in 'freeze' written by Leonid Broukhis. * Thanks to many info-zippers for bug reports and testing. * * REFERENCES * * APPNOTE.TXT documentation file in PKZIP 1.93a distribution. * * A description of the Rabin and Karp algorithm is given in the book * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. * * Fiala,E.R., and Greene,D.H. * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 * * INTERFACE * * void lm_init (int pack_level, ush *flags) * Initialize the "longest match" routines for a new file * * ulg deflate (void) * Processes a new input file and return its compressed length. Sets * the compressed length, crc, deflate flags and internal file * attributes. */ /* =========================================================================== * Configuration parameters */ /* Compile with MEDIUM_MEM to reduce the memory requirements or * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the * entire input file can be held in memory (not possible on 16 bit systems). * Warning: defining these symbols affects HASH_BITS (see below) and thus * affects the compression ratio. The compressed output * is still correct, and might even be smaller in some cases. */ #ifdef SMALL_MEM # define HASH_BITS 13 /* Number of bits used to hash strings */ #endif #ifdef MEDIUM_MEM # define HASH_BITS 14 #endif #ifndef HASH_BITS # define HASH_BITS 15 /* For portability to 16 bit machines, do not use values above 15. */ #endif /* To save space (see unlzw.c), we overlay prev+head with tab_prefix and * window with tab_suffix. Check that we can do this: */ #if (WSIZE<<1) > (1< BITS-1 error: cannot overlay head with tab_prefix1 #endif #define HASH_SIZE (unsigned)(1< deflate_block_start) /* window position at the beginning of the current output block. Gets * negative when the window is moved backwards. */ #define ins_h (pb -> deflate_ins_h) /* hash index of string to be inserted */ #define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH) /* Number of bits by which ins_h and del_h must be shifted at each * input step. It must be such that after MIN_MATCH steps, the oldest * byte no longer takes part in the hash key, that is: * H_SHIFT * MIN_MATCH >= HASH_BITS */ #define prev_length (pb -> deflate_prev_length) /* Length of the best match at previous step. Matches not greater than this * are discarded. This is used in the lazy match evaluation. */ #define strstart (pb -> deflate_strstart) /* start of string to insert */ #define match_start (pb -> deflate_match_start) /* start of matching string */ #define eofile (pb -> deflate_eofile) /* flag set at end of input file */ #define lookahead (pb -> deflate_lookahead) /* number of valid bytes ahead in window */ #define max_chain_length (pb -> deflate_max_chain_length) /* To speed up deflation, hash chains are never searched beyond this length. * A higher limit improves compression ratio but degrades the speed. */ #define max_lazy_match (pb -> deflate_max_lazy_match) /* Attempt to find a better match only when the current match is strictly * smaller than this value. This mechanism is used only for compression * levels >= 4. */ #define max_insert_length max_lazy_match /* Insert new strings in the hash table only if the match length * is not greater than this length. This saves time but degrades compression. * max_insert_length is used only for compression levels <= 3. */ #define compr_level (pb -> deflate_compr_level) /* compression level (1..9) */ #define good_match (pb -> deflate_good_match) /* Use a faster search when the previous match is longer than this */ /* Values for max_lazy_match, good_match and max_chain_length, depending on * the desired pack level (0..9). The values given below have been tuned to * exclude worst case performance for pathological files. Better values may be * found for specific files. */ typedef struct config { ush good_length; /* reduce lazy search above this match length */ ush max_lazy; /* do not perform lazy search above this match length */ ush nice_length; /* quit search above this match length */ ush max_chain; } config; #define nice_match (pb -> deflate_nice_match) /* Stop searching when current match exceeds this */ local const config configuration_table[10] = { /* good lazy nice chain */ /* 0 */ {0, 0, 0, 0}, /* store only */ /* 1 */ {4, 4, 8, 4}, /* maximum speed, no lazy matches */ /* 2 */ {4, 5, 16, 8}, /* 3 */ {4, 6, 32, 32}, /* 4 */ {4, 4, 16, 16}, /* lazy matches */ /* 5 */ {8, 16, 32, 32}, /* 6 */ {8, 16, 128, 128}, /* 7 */ {8, 32, 128, 256}, /* 8 */ {32, 128, 258, 1024}, /* 9 */ {32, 258, 258, 4096}}; /* maximum compression */ /* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 * For deflate_fast() (levels <= 3) good is ignored and lazy has a different * meaning. */ #define EQUAL 0 /* result of memcmp for equal strings */ /* =========================================================================== * Prototypes for local functions. */ local void fill_window (Pb *pb); local ulg deflate_fast (Pb *pb); static int longest_match (Pb *pb, IPos cur_match); #ifdef ASMV void match_init (void); /* asm code initialization */ #endif #ifdef DEBUG local void check_match (IPos start, IPos match, int length); #endif /* =========================================================================== * Update a hash value with the given input byte * IN assertion: all calls to to UPDATE_HASH are made with consecutive * input characters, so that a running hash key can be computed from the * previous key instead of complete recalculation each time. */ #define UPDATE_HASH(h,c) (h = (((h)< 9) { error(pb, GZIP_ERROR_BADPAKLVL, "bad pack level"); return; } compr_level = pack_level; /* Initialize the hash table. */ #if defined(MAXSEG_64K) && HASH_BITS == 15 for (j = 0; j < HASH_SIZE; j++) head[j] = NIL; #else memzero((char*)head, HASH_SIZE*sizeof(*head)); #endif /* prev will be initialized on the fly */ /* Set the default configuration parameters: */ max_lazy_match = configuration_table[pack_level].max_lazy; good_match = configuration_table[pack_level].good_length; nice_match = configuration_table[pack_level].nice_length; max_chain_length = configuration_table[pack_level].max_chain; if (pack_level == 1) { *flags |= FAST; } else if (pack_level == 9) { *flags |= SLOW; } /* ??? reduce max_chain_length for binary files */ strstart = 0; block_start = 0L; #ifdef ASMV match_init(); /* initialize the asm code */ #endif lookahead = file_read (pb, (char*)window, sizeof(int) <= 2 ? WSIZE : 2*WSIZE); if (lookahead == 0 || lookahead == (unsigned)(-1)) { eofile = 1, lookahead = 0; return; } eofile = 0; /* Make sure that we always have enough lookahead. This is important * if input comes from a device such as a tty. */ while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window(pb); ins_h = 0; for (j=0; j= 1 */ #ifndef ASMV /* For MSDOS, OS/2 and 386 Unix, an optimized version is in match.asm or * match.s. The code is functionally equivalent, so you can use the C version * if desired. */ static int longest_match(Pb *pb, IPos cur_match) { unsigned chain_length = max_chain_length; /* max hash chain length */ register uch *scan = window + strstart; /* current string */ register uch *match; /* matched string */ register int len; /* length of current match */ int best_len = prev_length; /* best match length so far */ IPos limit = strstart > (IPos)MAX_DIST ? strstart - (IPos)MAX_DIST : NIL; /* Stop when cur_match becomes <= limit. To simplify the code, * we prevent matches with the string of window index 0. */ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. * It is easy to get rid of this optimization if necessary. */ #if HASH_BITS < 8 || MAX_MATCH != 258 error: Code too clever #endif #ifdef UNALIGNED_OK /* Compare two bytes at a time. Note: this is not always beneficial. * Try with and without -DUNALIGNED_OK to check. */ register uch *strend = window + strstart + MAX_MATCH - 1; register ush scan_start = *(ush*)scan; register ush scan_end = *(ush*)(scan+best_len-1); #else register uch *strend = window + strstart + MAX_MATCH; register uch scan_end1 = scan[best_len-1]; register uch scan_end = scan[best_len]; #endif /* Do not waste too much time if we already have a good match: */ if (prev_length >= (int)good_match) { chain_length >>= 2; } Assert(strstart <= window_size-MIN_LOOKAHEAD, "insufficient lookahead"); do { Assert(cur_match < strstart, "no future"); match = window + cur_match; /* Skip to next match if the match length cannot increase * or if the match length is less than 2: */ #if (defined(UNALIGNED_OK) && MAX_MATCH == 258) /* This code assumes sizeof(unsigned short) == 2. Do not use * UNALIGNED_OK if your compiler uses a different size. */ if (*(ush*)(match+best_len-1) != scan_end || *(ush*)match != scan_start) continue; /* It is not necessary to compare scan[2] and match[2] since they are * always equal when the other bytes match, given that the hash keys * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at * strstart+3, +5, ... up to strstart+257. We check for insufficient * lookahead only every 4th comparison; the 128th check will be made * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is * necessary to put more guard bytes at the end of the window, or * to check more often for insufficient lookahead. */ scan++, match++; do { } while (*(ush*)(scan+=2) == *(ush*)(match+=2) && *(ush*)(scan+=2) == *(ush*)(match+=2) && *(ush*)(scan+=2) == *(ush*)(match+=2) && *(ush*)(scan+=2) == *(ush*)(match+=2) && scan < strend); /* The funny "do {}" generates better code on most compilers */ /* Here, scan <= window+strstart+257 */ Assert(scan <= window+(unsigned)(window_size-1), "wild scan"); if (*scan == *match) scan++; len = (MAX_MATCH - 1) - (int)(strend-scan); scan = strend - (MAX_MATCH-1); #else /* UNALIGNED_OK */ if (match[best_len] != scan_end || match[best_len-1] != scan_end1 || *match != *scan || *++match != scan[1]) continue; /* The check at best_len-1 can be removed because it will be made * again later. (This heuristic is not always a win.) * It is not necessary to compare scan[2] and match[2] since they * are always equal when the other bytes match, given that * the hash keys are equal and that HASH_BITS >= 8. */ scan += 2, match++; /* We check for insufficient lookahead only every 8th comparison; * the 256th check will be made at strstart+258. */ do { } while (*++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && scan < strend); len = MAX_MATCH - (int)(strend - scan); scan = strend - MAX_MATCH; #endif /* UNALIGNED_OK */ if (len > best_len) { match_start = cur_match; best_len = len; if (len >= nice_match) break; #ifdef UNALIGNED_OK scan_end = *(ush*)(scan+best_len-1); #else scan_end1 = scan[best_len-1]; scan_end = scan[best_len]; #endif } } while ((cur_match = prev[cur_match & WMASK]) > limit && --chain_length != 0); return best_len; } #endif /* ASMV */ #ifdef DEBUG /* =========================================================================== * Check that the match at match_start is indeed a match. */ local void check_match(start, match, length) IPos start, match; int length; { char erbuf[256]; /* check that the match is indeed a match */ if (memcmp((char*)window + match, (char*)window + start, length) != EQUAL) { sprintf(erbuf, " start %d, match %d, length %d\ninvalid match", start, match, length); error (erbuf); } if (verbose > 1) { fprintf(stderr,"\\[%d,%d]", start-match, length); do { putc(window[start++], stderr); } while (--length != 0); } } #else # define check_match(start, match, length) #endif /* =========================================================================== * Fill the window when the lookahead becomes insufficient. * Updates strstart and lookahead, and sets eofile if end of input file. * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0 * OUT assertions: at least one byte has been read, or eofile is set; * file reads are performed for at least two bytes (required for the * translate_eol option). */ local void fill_window(Pb *pb) { register unsigned n, m; int more = (unsigned)(window_size - (ulg)lookahead - (ulg)strstart); /* Amount of free space at the end of the window. */ /* If the window is almost full and there is insufficient lookahead, * move the upper half to the lower one to make room in the upper half. */ if (more == (unsigned)(-1)) { /* Very unlikely, but possible on 16 bit machine if strstart == 0 * and lookahead == 1 (input done one byte at time) */ more--; } else if (strstart >= WSIZE+MAX_DIST) { /* By the IN assertion, the window is not empty so we can't confuse * more == 0 with more == 64K on a 16 bit machine. */ Assert(window_size == (ulg)2*WSIZE, "no sliding with BIG_MEM"); memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE); match_start -= WSIZE; strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */ block_start -= (long) WSIZE; for (n = 0; n < HASH_SIZE; n++) { m = head[n]; head[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL); } for (n = 0; n < WSIZE; n++) { m = prev[n]; prev[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL); /* If n is not on any hash chain, prev[n] is garbage but * its value will never be used. */ } more += WSIZE; } /* At this point, more >= 2 */ if (!eofile) { n = file_read (pb, (char*)window+strstart+lookahead, more); if (n == 0 || n == (unsigned)(-1)) { eofile = 1; } else { lookahead += n; } } } /* =========================================================================== * Flush the current block, with given end-of-file flag. * IN assertion: strstart is set to the end of the current match. */ #define FLUSH_BLOCK(eof) \ flush_block(pb, block_start >= 0L ? (char*)&window[(unsigned)block_start] : \ (char*)NULL, (long)strstart - block_start, (eof)) /* =========================================================================== * Processes a new input file and return its compressed length. This * function does not perform lazy evaluationof matches and inserts * new strings in the dictionary only for unmatched strings or for short * matches. It is used only for the fast compression options. */ local ulg deflate_fast(Pb *pb) { IPos hash_head; /* head of the hash chain */ int flush; /* set if current block must be flushed */ unsigned match_length = 0; /* length of best match */ prev_length = MIN_MATCH-1; while (lookahead != 0) { /* Insert the string window[strstart .. strstart+2] in the * dictionary, and set hash_head to the head of the hash chain: */ INSERT_STRING(strstart, hash_head); /* Find the longest match, discarding those <= prev_length. * At this point we have always match_length < MIN_MATCH */ if (hash_head != NIL && strstart - hash_head <= MAX_DIST) { /* To simplify the code, we prevent matches with the string * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ match_length = longest_match (pb, hash_head); /* longest_match() sets match_start */ if (match_length > lookahead) match_length = lookahead; } if (match_length >= MIN_MATCH) { check_match(strstart, match_start, match_length); flush = ct_tally(pb, strstart-match_start, match_length - MIN_MATCH); lookahead -= match_length; /* Insert new strings in the hash table only if the match length * is not too large. This saves time but degrades compression. */ if (match_length <= max_insert_length) { match_length--; /* string at strstart already in hash table */ do { strstart++; INSERT_STRING(strstart, hash_head); /* strstart never exceeds WSIZE-MAX_MATCH, so there are * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH * these bytes are garbage, but it does not matter since * the next lookahead bytes will be emitted as literals. */ } while (--match_length != 0); strstart++; } else { strstart += match_length; match_length = 0; ins_h = window[strstart]; UPDATE_HASH(ins_h, window[strstart+1]); #if MIN_MATCH != 3 Call UPDATE_HASH() MIN_MATCH-3 more times #endif } } else { /* No match, output a literal byte */ Tracevv((stderr,"%c",window[strstart])); flush = ct_tally (pb, 0, window[strstart]); lookahead--; strstart++; } if (flush) FLUSH_BLOCK(0), block_start = strstart; /* Make sure that we always have enough lookahead, except * at the end of the input file. We need MAX_MATCH bytes * for the next match, plus MIN_MATCH bytes to insert the * string following the next match. */ while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window(pb); } return FLUSH_BLOCK(1); /* eof */ } /* =========================================================================== * Same as above, but achieves better compression. We use a lazy * evaluation for matches: a match is finally adopted only if there is * no better match at the next window position. */ static ulg deflate(Pb *pb) { IPos hash_head; /* head of hash chain */ IPos prev_match; /* previous match */ int flush; /* set if current block must be flushed */ int match_available = 0; /* set if previous match exists */ register unsigned match_length = MIN_MATCH-1; /* length of best match */ if (compr_level <= 3) return deflate_fast(pb); /* optimized for speed */ /* Process the input block. */ while (lookahead != 0) { /* Insert the string window[strstart .. strstart+2] in the * dictionary, and set hash_head to the head of the hash chain: */ INSERT_STRING(strstart, hash_head); /* Find the longest match, discarding those <= prev_length. */ prev_length = match_length, prev_match = match_start; match_length = MIN_MATCH-1; if ((hash_head != NIL) && (prev_length < (int)max_lazy_match) && (strstart - hash_head <= MAX_DIST)) { /* To simplify the code, we prevent matches with the string * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ match_length = longest_match (pb, hash_head); /* longest_match() sets match_start */ if (match_length > lookahead) match_length = lookahead; /* Ignore a length 3 match if it is too distant: */ if (match_length == MIN_MATCH && strstart-match_start > TOO_FAR){ /* If prev_match is also MIN_MATCH, match_start is garbage * but we will ignore the current match anyway. */ match_length--; } } /* If there was a match at the previous step and the current * match is not better, output the previous match: */ if (prev_length >= MIN_MATCH && (int)match_length <= prev_length) { check_match(strstart-1, prev_match, prev_length); flush = ct_tally(pb, strstart-1-prev_match, prev_length - MIN_MATCH); /* Insert in hash table all strings up to the end of the match. * strstart-1 and strstart are already inserted. */ lookahead -= prev_length-1; prev_length -= 2; do { strstart++; INSERT_STRING(strstart, hash_head); /* strstart never exceeds WSIZE-MAX_MATCH, so there are * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH * these bytes are garbage, but it does not matter since the * next lookahead bytes will always be emitted as literals. */ } while (--prev_length != 0); match_available = 0; match_length = MIN_MATCH-1; strstart++; if (flush) FLUSH_BLOCK(0), block_start = strstart; } else if (match_available) { /* If there was no match at the previous position, output a * single literal. If there was a match but the current match * is longer, truncate the previous match to a single literal. */ Tracevv((stderr,"%c",window[strstart-1])); if (ct_tally (pb, 0, window[strstart-1])) { FLUSH_BLOCK(0), block_start = strstart; } strstart++; lookahead--; } else { /* There is no previous match to compare with, wait for * the next step to decide. */ match_available = 1; strstart++; lookahead--; } Assert (strstart <= bytes_in && lookahead <= bytes_in, "a bit too far"); /* Make sure that we always have enough lookahead, except * at the end of the input file. We need MAX_MATCH bytes * for the next match, plus MIN_MATCH bytes to insert the * string following the next match. */ while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window(pb); } if (match_available) ct_tally (pb, 0, window[strstart-1]); return FLUSH_BLOCK(1); /* eof */ } /*****************/ /*****************/ /** **/ /** INFLATE.C **/ /** **/ /*****************/ /*****************/ /* inflate.c -- Not copyrighted 1992 by Mark Adler version c10p1, 10 January 1993 */ /* You can do whatever you like with this source file, though I would prefer that if you modify it and redistribute it that you include comments to that effect with your name and the date. Thank you. [The history has been moved to the file ChangeLog.] */ /* Inflate deflated (PKZIP's method 8 compressed) data. The compression method searches for as much of the current string of bytes (up to a length of 258) in the previous 32K bytes. If it doesn't find any matches (of at least length 3), it codes the next byte. Otherwise, it codes the length of the matched string and its distance backwards from the current position. There is a single Huffman code that codes both single bytes (called "literals") and match lengths. A second Huffman code codes the distance information, which follows a length code. Each length or distance code actually represents a base value and a number of "extra" (sometimes zero) bits to get to add to the base value. At the end of each deflated block is a special end-of-block (EOB) literal/ length code. The decoding process is basically: get a literal/length code; if EOB then done; if a literal, emit the decoded byte; if a length then get the distance and emit the referred-to bytes from the sliding window of previously emitted data. There are (currently) three kinds of inflate blocks: stored, fixed, and dynamic. The compressor deals with some chunk of data at a time, and decides which method to use on a chunk-by-chunk basis. A chunk might typically be 32K or 64K. If the chunk is uncompressible, then the "stored" method is used. In this case, the bytes are simply stored as is, eight bits per byte, with none of the above coding. The bytes are preceded by a count, since there is no longer an EOB code. If the data is compressible, then either the fixed or dynamic methods are used. In the dynamic method, the compressed data is preceded by an encoding of the literal/length and distance Huffman codes that are to be used to decode this block. The representation is itself Huffman coded, and so is preceded by a description of that code. These code descriptions take up a little space, and so for small blocks, there is a predefined set of codes, called the fixed codes. The fixed method is used if the block codes up smaller that way (usually for quite small chunks), otherwise the dynamic method is used. In the latter case, the codes are customized to the probabilities in the current block, and so can code it much better than the pre-determined fixed codes. The Huffman codes themselves are decoded using a mutli-level table lookup, in order to maximize the speed of decoding plus the speed of building the decoding tables. See the comments below that precede the lbits and dbits tuning parameters. */ /* Notes beyond the 1.93a appnote.txt: 1. Distance pointers never point before the beginning of the output stream. 2. Distance pointers can point back across blocks, up to 32k away. 3. There is an implied maximum of 7 bits for the bit length table and 15 bits for the actual data. 4. If only one code exists, then it is encoded using one bit. (Zero would be more efficient, but perhaps a little confusing.) If two codes exist, they are coded using one bit each (0 and 1). 5. There is no way of sending zero distance codes--a dummy must be sent if there are none. (History: a pre 2.0 version of PKZIP would store blocks with no distance codes, but this was discovered to be too harsh a criterion.) Valid only for 1.93a. 2.04c does allow zero distance codes, which is sent as one code of zero bits in length. 6. There are up to 286 literal/length codes. Code 256 represents the end-of-block. Note however that the static length tree defines 288 codes just to fill out the Huffman codes. Codes 286 and 287 cannot be used though, since there is no length base or extra bits defined for them. Similarly, there are up to 30 distance codes. However, static trees define 32 codes (all 5 bits) to fill out the Huffman codes, but the last two had better not show up in the data. 7. Unzip can check dynamic Huffman blocks for complete code sets. The exception is that a single code would not be complete (see #4). 8. The five bits following the block type is really the number of literal codes sent minus 257. 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits (1+6+6). Therefore, to output three times the length, you output three codes (1+1+1), whereas to output four times the same length, you only need two codes (1+3). Hmm. 10. In the tree reconstruction algorithm, Code = Code + Increment only if BitLength(i) is not zero. (Pretty obvious.) 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) 12. Note: length code 284 can represent 227-258, but length code 285 really is 258. The last length deserves its own, short code since it gets used a lot in very redundant files. The length 258 is special since 258 - 3 (the min match length) is 255. 13. The literal/length and distance code bit lengths are read as a single stream of lengths. It is possible (and advantageous) for a repeat code (16, 17, or 18) to go across the boundary between the two sets of lengths. */ #if defined(STDC_HEADERS) || !defined(NO_STDLIB_H) # include #endif #define slide window /* Huffman code lookup table entry--this entry is four bytes for machines that have 16-bit pointers (e.g. PC's in the small or medium model). Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16 means that v is a literal, 16 < e < 32 means that v is a pointer to the next table, which codes e - 16 bits, and lastly e == 99 indicates an unused code. If a code with e == 99 is looked up, this implies an error in the data. */ struct huft { uch e; /* number of extra bits or operation */ uch b; /* number of bits in this code or subcode */ union { ush n; /* literal, length base, or distance base */ struct huft *t; /* pointer to next level of table */ } v; }; /* Function prototypes */ static int huft_build(Pb *pb, unsigned *b, unsigned n, unsigned s, const ush *d, const ush *e, struct huft **t, int *m); static int huft_free (Pb *pb, struct huft *); static int inflate_codes(Pb *pb, struct huft *tl, struct huft *td, int bl, int bd); static int inflate_stored (Pb *pb); static int inflate_fixed (Pb *pb); static int inflate_dynamic (Pb *pb); static int inflate_block (Pb *pb, int *e); /* The inflate algorithm uses a sliding 32K byte window on the uncompressed stream to find repeated byte strings. This is implemented here as a circular buffer. The index is updated simply by incrementing and then and'ing with 0x7fff (32K-1). */ /* It is left to other modules to supply the 32K area. It is assumed to be usable as if it were declared "uch slide[32768];" or as just "uch *slide;" and then malloc'ed in the latter case. The definition must be in unzip.h, included above. */ /* unsigned wp; current position in slide */ #define wp outcnt #define flush_output(w) (wp=(w),flush_window(pb)) /* Tables for deflate from PKZIP's appnote.txt. */ static const unsigned border[] = { /* Order of the bit length code lengths */ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; static const ush cplens[] = { /* Copy lengths for literal codes 257..285 */ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; /* note: see note #13 above about the 258 in this list. */ static const ush cplext[] = { /* Extra bits for literal codes 257..285 */ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */ static const ush cpdist[] = { /* Copy offsets for distance codes 0..29 */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577}; static const ush cpdext[] = { /* Extra bits for distance codes */ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; /* Macros for inflate() bit peeking and grabbing. The usage is: NEEDBITS(j) x = b & mask_bits[j]; DUMPBITS(j) where NEEDBITS makes sure that b has at least j bits in it, and DUMPBITS removes the bits from b. The macros use the variable k for the number of bits in b. Normally, b and k are register variables for speed, and are initialized at the beginning of a routine that uses these macros from a global bit buffer and count. If we assume that EOB will be the longest code, then we will never ask for bits with NEEDBITS that are beyond the end of the stream. So, NEEDBITS should not read any more bytes than are needed to meet the request. Then no bytes need to be "returned" to the buffer at the end of the last block. However, this assumption is not true for fixed blocks--the EOB code is 7 bits, but the other literal/length codes can be 8 or 9 bits. (The EOB code is shorter than other codes because fixed blocks are generally short. So, while a block always has an EOB, many other literal/length codes have a significantly lower probability of showing up at all.) However, by making the first table have a lookup of seven bits, the EOB code will be found in that first lookup, and so will not require that too many bits be pulled from the stream. */ #define bb (pb -> inflate_bb) /* bit buffer */ #define bk (pb -> inflate_bk) /* bits in bit buffer */ static const ush mask_bits[] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff }; #define NEXTBYTE() (uch)get_byte() #define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<>=(n);k-=(n);} /* Huffman code decoding is performed using a multi-level table lookup. The fastest way to decode is to simply build a lookup table whose size is determined by the longest code. However, the time it takes to build this table can also be a factor if the data being decoded is not very long. The most common codes are necessarily the shortest codes, so those codes dominate the decoding time, and hence the speed. The idea is you can have a shorter table that decodes the shorter, more probable codes, and then point to subsidiary tables for the longer codes. The time it costs to decode the longer codes is then traded against the time it takes to make longer tables. This results of this trade are in the variables lbits and dbits below. lbits is the number of bits the first level table for literal/ length codes can decode in one step, and dbits is the same thing for the distance codes. Subsequent tables are also less than or equal to those sizes. These values may be adjusted either when all of the codes are shorter than that, in which case the longest code length in bits is used, or when the shortest code is *longer* than the requested table size, in which case the length of the shortest code in bits is used. There are two different values for the two tables, since they code a different number of possibilities each. The literal/length table codes 286 possible values, or in a flat code, a little over eight bits. The distance table codes 30 possible values, or a little less than five bits, flat. The optimum values for speed end up being about one bit more than those, so lbits is 8+1 and dbits is 5+1. The optimum values may differ though from machine to machine, and possibly even between compilers. Your mileage may vary. */ static const int lbits = 9; /* bits in base literal/length lookup table */ static const int dbits = 6; /* bits in base distance lookup table */ /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */ #define BMAX 16 /* maximum bit length of any code (16 for explode) */ #define N_MAX 288 /* maximum number of codes in any set */ #define hufts (pb -> inflate_hufts) /* track memory usage */ /* Given a list of code lengths and a maximum table size, make a set of tables to decode that set of codes. Return zero on success, one if the given code set is incomplete (the tables are still built in this case), two if the input is invalid (all zero length codes or an oversubscribed set of lengths), and three if not enough memory. */ /* unsigned *b; code lengths in bits (all assumed <= BMAX) */ /* unsigned n; number of codes (assumed <= N_MAX) */ /* unsigned s; number of simple-valued codes (0..s-1) */ /* const ush *d; list of base values for non-simple codes */ /* const ush *e; list of extra bits for non-simple codes */ /* struct huft **t; result: starting table */ /* int *m; maximum lookup bits, returns actual */ static int huft_build(Pb *pb, unsigned *b, unsigned n, unsigned s, const ush *d, const ush *e, struct huft **t, int *m) { unsigned a; /* counter for codes of length k */ unsigned c[BMAX+1]; /* bit length count table */ unsigned f; /* i repeats in table every f entries */ int g; /* maximum code length */ int h; /* table level */ register unsigned i; /* counter, current code */ register unsigned j; /* counter */ register int k; /* number of bits in current code */ int l; /* bits per table (returned in m) */ register unsigned *p; /* pointer into c[], b[], or v[] */ register struct huft *q; /* points to current table */ struct huft r; /* table entry for structure assignment */ struct huft *u[BMAX]; /* table stack */ unsigned v[N_MAX]; /* values in order of bit length */ register int w; /* bits before this table == (l * h) */ unsigned x[BMAX+1]; /* bit offsets, then code stack */ unsigned *xp; /* pointer into x */ int y; /* number of dummy codes added */ unsigned z; /* number of entries in current table */ /* Generate counts for each bit length */ memzero(c, sizeof(c)); p = b; i = n; do { Tracecv(*p, (stderr, (n-i >= ' ' && n-i <= '~' ? "%c %d\n" : "0x%x %d\n"), n-i, *p)); c[*p]++; /* assume all entries <= BMAX */ p++; /* Can't combine with above line (Solaris bug) */ } while (--i); if (c[0] == n) /* null input--all zero length codes */ { *t = (struct huft *)NULL; *m = 0; return 0; } /* Find minimum and maximum length, bound *m by those */ l = *m; for (j = 1; j <= BMAX; j++) if (c[j]) break; k = j; /* minimum code length */ if ((unsigned)l < j) l = j; for (i = BMAX; i; i--) if (c[i]) break; g = i; /* maximum code length */ if ((unsigned)l > i) l = i; *m = l; /* Adjust last length count to fill out codes, if needed */ for (y = 1 << j; j < i; j++, y <<= 1) if ((y -= c[j]) < 0) return 2; /* bad input: more codes than bits */ if ((y -= c[i]) < 0) return 2; c[i] += y; /* Generate starting offsets into the value table for each length */ x[1] = j = 0; p = c + 1; xp = x + 2; while (--i) { /* note that i == g from above */ *xp++ = (j += *p++); } /* Make a table of values in order of bit lengths */ p = b; i = 0; do { if ((j = *p++) != 0) v[x[j]++] = i; } while (++i < n); /* Generate the Huffman codes and for each, make the table entries */ x[0] = i = 0; /* first Huffman code is zero */ p = v; /* grab values in bit order */ h = -1; /* no tables yet--level -1 */ w = -l; /* bits decoded == (l * h) */ u[0] = (struct huft *)NULL; /* just to keep compilers happy */ q = (struct huft *)NULL; /* ditto */ z = 0; /* ditto */ /* go through the bit lengths (k already is bits in shortest code) */ for (; k <= g; k++) { a = c[k]; while (a--) { /* here i is the Huffman code of length k bits for value *p */ /* make tables up to required level */ while (k > w + l) { h++; w += l; /* previous table always l bits */ /* compute minimum size table less than or equal to l bits */ z = (z = g - w) > (unsigned)l ? l : z; /* upper limit on table size */ if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ { /* too few codes for k-w bit table */ f -= a + 1; /* deduct codes from patterns left */ xp = c + k; while (++j < z) /* try smaller tables up to z bits */ { if ((f <<= 1) <= *++xp) break; /* enough codes to use up j bits */ f -= *xp; /* else deduct codes from patterns */ } } z = 1 << j; /* table entries for j-bit table */ /* allocate and link in new table */ if ((q = (struct huft *)mallocrout(rwparam, (z + 1)*sizeof(struct huft))) == (struct huft *)NULL) { if (h) huft_free(pb, u[0]); return 3; /* not enough memory */ } hufts += z + 1; /* track memory usage */ *t = q + 1; /* link to list for huft_free() */ *(t = &(q->v.t)) = (struct huft *)NULL; u[h] = ++q; /* table starts after link */ /* connect to last table, if there is one */ if (h) { x[h] = i; /* save pattern for backing up */ r.b = (uch)l; /* bits to dump before this table */ r.e = (uch)(16 + j); /* bits in this table */ r.v.t = q; /* pointer to this table */ j = i >> (w - l); /* (get around Turbo C bug) */ u[h-1][j] = r; /* connect to last table */ } } /* set up table entry in r */ r.b = (uch)(k - w); if (p >= v + n) r.e = 99; /* out of values--invalid code */ else if (*p < s) { r.e = (uch)(*p < 256 ? 16 : 15); /* 256 is end-of-block code */ r.v.n = (ush)(*p); /* simple code is just the value */ p++; /* one compiler does not like *p++ */ } else { r.e = (uch)e[*p - s]; /* non-simple--look up in lists */ r.v.n = d[*p++ - s]; } /* fill code-like entries with r */ f = 1 << (k - w); for (j = i >> w; j < z; j += f) q[j] = r; /* backwards increment the k-bit code i */ for (j = 1 << (k - 1); i & j; j >>= 1) i ^= j; i ^= j; /* backup over finished tables */ while ((i & ((1 << w) - 1)) != x[h]) { h--; /* don't need to update q */ w -= l; } } } /* Return true (1) if we were given an incomplete table */ return y != 0 && g != 1; } /* Free the malloc'ed tables built by huft_build(), which makes a linked list of the tables it made, with the links in a dummy first entry of each table. */ static int huft_free(Pb *pb, struct huft *t) { register struct huft *p, *q; /* Go through linked list, freeing from the malloced (t[-1]) address. */ p = t; while (p != (struct huft *)NULL) { q = (--p)->v.t; freerout(rwparam, (char*)p); p = q; } return 0; } /* inflate (decompress) the codes in a deflated (compressed) block. Return an error code or zero if it all goes ok. */ static int inflate_codes(Pb *pb, struct huft *tl, struct huft *td, int bl, int bd) { register unsigned e; /* table entry flag/number of extra bits */ unsigned n, d; /* length and index for copy */ unsigned w; /* current window position */ struct huft *t; /* pointer to table entry */ unsigned ml, md; /* masks for bl and bd bits */ register ulg b; /* bit buffer */ register unsigned k; /* number of bits in bit buffer */ /* make local copies of globals */ b = bb; /* initialize bit buffer */ k = bk; w = wp; /* initialize window position */ /* inflate the coded data */ ml = mask_bits[bl]; /* precompute masks for speed */ md = mask_bits[bd]; for (;;) /* do until end of block */ { NEEDBITS((unsigned)bl) if ((e = (t = tl + ((unsigned)b & ml))->e) > 16) do { if (e == 99) return 1; DUMPBITS(t->b) e -= 16; NEEDBITS(e) } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16); DUMPBITS(t->b) if (e == 16) /* then it's a literal */ { slide[w++] = (uch)t->v.n; Tracevv((stderr, "%c", slide[w-1])); if (w == WSIZE) { flush_output(w); w = 0; } } else /* it's an EOB or a length */ { /* exit if end of block */ if (e == 15) break; /* get length of block to copy */ NEEDBITS(e) n = t->v.n + ((unsigned)b & mask_bits[e]); DUMPBITS(e); /* decode distance of block to copy */ NEEDBITS((unsigned)bd) if ((e = (t = td + ((unsigned)b & md))->e) > 16) do { if (e == 99) return 1; DUMPBITS(t->b) e -= 16; NEEDBITS(e) } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16); DUMPBITS(t->b) NEEDBITS(e) d = w - t->v.n - ((unsigned)b & mask_bits[e]); DUMPBITS(e) Tracevv((stderr,"\\[%d,%d]", w-d, n)); /* do the copy */ do { n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e); #if !defined(NOMEMCPY) && !defined(DEBUG) if (w - d >= e) /* (this test assumes unsigned comparison) */ { memcpy(slide + w, slide + d, e); w += e; d += e; } else /* do it slow to avoid memcpy() overlap */ #endif /* !NOMEMCPY */ do { slide[w++] = slide[d++]; Tracevv((stderr, "%c", slide[w-1])); } while (--e); if (w == WSIZE) { flush_output(w); w = 0; } } while (n); } } /* restore the globals from the locals */ wp = w; /* restore global window pointer */ bb = b; /* restore global bit buffer */ bk = k; /* done */ return 0; } /* "decompress" an inflated type 0 (stored) block. */ static int inflate_stored(Pb *pb) { unsigned n; /* number of bytes in block */ unsigned w; /* current window position */ register ulg b; /* bit buffer */ register unsigned k; /* number of bits in bit buffer */ /* make local copies of globals */ b = bb; /* initialize bit buffer */ k = bk; w = wp; /* initialize window position */ /* go to byte boundary */ n = k & 7; DUMPBITS(n); /* get the length and its complement */ NEEDBITS(16) n = ((unsigned)b & 0xffff); DUMPBITS(16) NEEDBITS(16) if (n != (unsigned)((~b) & 0xffff)) return 1; /* error in compressed data */ DUMPBITS(16) /* read and output the compressed data */ while (n--) { NEEDBITS(8) slide[w++] = (uch)b; if (w == WSIZE) { flush_output(w); w = 0; } DUMPBITS(8) } /* restore the globals from the locals */ wp = w; /* restore global window pointer */ bb = b; /* restore global bit buffer */ bk = k; return 0; } /* decompress an inflated type 1 (fixed Huffman codes) block. We should either replace this with a custom decoder, or at least precompute the Huffman tables. */ static int inflate_fixed(Pb *pb) { int i; /* temporary variable */ struct huft *tl; /* literal/length code table */ struct huft *td; /* distance code table */ int bl; /* lookup bits for tl */ int bd; /* lookup bits for td */ unsigned l[288]; /* length list for huft_build */ /* set up literal table */ for (i = 0; i < 144; i++) l[i] = 8; for (; i < 256; i++) l[i] = 9; for (; i < 280; i++) l[i] = 7; for (; i < 288; i++) /* make a complete, but wrong code set */ l[i] = 8; bl = 7; if ((i = huft_build(pb, l, 288, 257, cplens, cplext, &tl, &bl)) != 0) return i; /* set up distance table */ for (i = 0; i < 30; i++) /* make an incomplete code set */ l[i] = 5; bd = 5; if ((i = huft_build(pb, l, 30, 0, cpdist, cpdext, &td, &bd)) > 1) { huft_free(pb, tl); return i; } /* decompress until an end-of-block code */ if (inflate_codes(pb, tl, td, bl, bd)) return 1; /* free the decoding tables, return */ huft_free(pb, tl); huft_free(pb, td); return 0; } /* decompress an inflated type 2 (dynamic Huffman codes) block. */ static int inflate_dynamic(Pb *pb) { int i; /* temporary variables */ unsigned j; unsigned l; /* last length */ unsigned m; /* mask for bit lengths table */ unsigned n; /* number of lengths to get */ struct huft *tl; /* literal/length code table */ struct huft *td; /* distance code table */ int bl; /* lookup bits for tl */ int bd; /* lookup bits for td */ unsigned nb; /* number of bit length codes */ unsigned nl; /* number of literal/length codes */ unsigned nd; /* number of distance codes */ #ifdef PKZIP_BUG_WORKAROUND unsigned ll[288+32]; /* literal/length and distance code lengths */ #else unsigned ll[286+30]; /* literal/length and distance code lengths */ #endif register ulg b; /* bit buffer */ register unsigned k; /* number of bits in bit buffer */ /* make local bit buffer */ b = bb; k = bk; /* read in table lengths */ NEEDBITS(5) nl = 257 + ((unsigned)b & 0x1f); /* number of literal/length codes */ DUMPBITS(5) NEEDBITS(5) nd = 1 + ((unsigned)b & 0x1f); /* number of distance codes */ DUMPBITS(5) NEEDBITS(4) nb = 4 + ((unsigned)b & 0xf); /* number of bit length codes */ DUMPBITS(4) #ifdef PKZIP_BUG_WORKAROUND if (nl > 288 || nd > 32) #else if (nl > 286 || nd > 30) #endif return 1; /* bad lengths */ /* read in bit-length-code lengths */ for (j = 0; j < nb; j++) { NEEDBITS(3) ll[border[j]] = (unsigned)b & 7; DUMPBITS(3) } for (; j < 19; j++) ll[border[j]] = 0; /* build decoding table for trees--single level, 7 bit lookup */ bl = 7; if ((i = huft_build(pb, ll, 19, 19, NULL, NULL, &tl, &bl)) != 0) { if (i == 1) huft_free(pb, tl); return i; /* incomplete code set */ } /* read in literal and distance code lengths */ n = nl + nd; m = mask_bits[bl]; i = l = 0; while ((unsigned)i < n) { NEEDBITS((unsigned)bl) j = (td = tl + ((unsigned)b & m))->b; DUMPBITS(j) j = td->v.n; if (j < 16) /* length of code in bits (0..15) */ ll[i++] = l = j; /* save last length in l */ else if (j == 16) /* repeat last length 3 to 6 times */ { NEEDBITS(2) j = 3 + ((unsigned)b & 3); DUMPBITS(2) if ((unsigned)i + j > n) return 1; while (j--) ll[i++] = l; } else if (j == 17) /* 3 to 10 zero length codes */ { NEEDBITS(3) j = 3 + ((unsigned)b & 7); DUMPBITS(3) if ((unsigned)i + j > n) return 1; while (j--) ll[i++] = 0; l = 0; } else /* j == 18: 11 to 138 zero length codes */ { NEEDBITS(7) j = 11 + ((unsigned)b & 0x7f); DUMPBITS(7) if ((unsigned)i + j > n) return 1; while (j--) ll[i++] = 0; l = 0; } } /* free decoding table for trees */ huft_free(pb, tl); /* restore the global bit buffer */ bb = b; bk = k; /* build the decoding tables for literal/length and distance codes */ bl = lbits; if ((i = huft_build(pb, ll, nl, 257, cplens, cplext, &tl, &bl)) != 0) { if (i == 1) { error (pb, GZIP_ERROR_INCLITREE, " incomplete literal tree"); huft_free(pb, tl); } return i; /* incomplete code set */ } bd = dbits; if ((i = huft_build(pb, ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0) { if (i == 1) { error (pb, GZIP_ERROR_INCDISTREE, " incomplete distance tree"); #ifdef PKZIP_BUG_WORKAROUND i = 0; } #else huft_free(pb, td); } huft_free(pb, tl); return i; /* incomplete code set */ #endif } /* decompress until an end-of-block code */ if (inflate_codes(pb, tl, td, bl, bd)) return 1; /* free the decoding tables, return */ huft_free(pb, tl); huft_free(pb, td); return 0; } /* decompress an inflated block */ static int inflate_block(Pb *pb, int *e) { unsigned t; /* block type */ register ulg b; /* bit buffer */ register unsigned k; /* number of bits in bit buffer */ /* make local bit buffer */ b = bb; k = bk; /* read in last block bit */ NEEDBITS(1) *e = (int)b & 1; DUMPBITS(1) /* read in block type */ NEEDBITS(2) t = (unsigned)b & 3; DUMPBITS(2) /* restore the global bit buffer */ bb = b; bk = k; /* inflate that block type */ if (t == 2) return inflate_dynamic(pb); if (t == 0) return inflate_stored(pb); if (t == 1) return inflate_fixed(pb); /* bad block type */ return 2; } /* decompress an inflated entry */ static int inflate(Pb *pb) { int e; /* last block flag */ int r; /* result code */ unsigned h; /* maximum struct huft's malloc'ed */ /* initialize window, bit buffer */ wp = 0; bk = 0; bb = 0; /* decompress until the last block */ h = 0; do { hufts = 0; if ((r = inflate_block(pb, &e)) != 0) return r; if (hufts > h) h = hufts; } while (!e); /* Undo too much lookahead. The next read will be byte aligned so we * can discard unused bits in the last meaningful byte. */ while (bk >= 8) { bk -= 8; insize ++; -- inpnt; } /* flush out slide */ flush_output(wp); /* return success */ #ifdef DEBUG fprintf(stderr, "<%u> ", h); #endif /* DEBUG */ return 0; } /***************/ /***************/ /** **/ /** TREES.C **/ /** **/ /***************/ /***************/ /* trees.c -- output deflated data using Huffman coding * Copyright (C) 1992-1993 Jean-loup Gailly * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. */ /* * PURPOSE * * Encode various sets of source values using variable-length * binary code trees. * * DISCUSSION * * The PKZIP "deflation" process uses several Huffman trees. The more * common source values are represented by shorter bit sequences. * * Each code tree is stored in the ZIP file in a compressed form * which is itself a Huffman encoding of the lengths of * all the code strings (in ascending order by source values). * The actual code strings are reconstructed from the lengths in * the UNZIP process, as described in the "application note" * (APPNOTE.TXT) distributed as part of PKWARE's PKZIP program. * * REFERENCES * * Lynch, Thomas J. * Data Compression: Techniques and Applications, pp. 53-55. * Lifetime Learning Publications, 1985. ISBN 0-534-03418-7. * * Storer, James A. * Data Compression: Methods and Theory, pp. 49-50. * Computer Science Press, 1988. ISBN 0-7167-8156-5. * * Sedgewick, R. * Algorithms, p290. * Addison-Wesley, 1983. ISBN 0-201-06672-6. * * INTERFACE * * void ct_init (ush *attr, int *methodp) * Allocate the match buffer, initialize the various tables and save * the location of the internal file attribute (ascii/binary) and * method (DEFLATE/STORE) * * void ct_tally (int dist, int lc); * Save the match info and tally the frequency counts. * * long flush_block (char *buf, ulg stored_len, int eof) * Determine the best encoding for the current block: dynamic trees, * static trees or store, and output the encoded block to the zip * file. Returns the total compressed length for the file so far. * */ /* =========================================================================== * Constants */ #define MAX_BITS (TREES_MAX_BITS) /* All codes must not exceed MAX_BITS bits */ #define MAX_BL_BITS 7 /* Bit length codes must not exceed MAX_BL_BITS bits */ #define LENGTH_CODES (TREES_LENGTH_CODES) /* number of length codes, not counting the special END_BLOCK code */ #define LITERALS (TREES_LITERALS) /* number of literal bytes 0..255 */ #define END_BLOCK 256 /* end of block literal code */ #define L_CODES (TREES_L_CODES) /* number of Literal or Length codes, including the END_BLOCK code */ #define D_CODES (TREES_D_CODES) /* number of distance codes */ #define BL_CODES (TREES_BL_CODES) /* number of codes used to transfer the bit lengths */ local const int near extra_lbits[LENGTH_CODES] /* extra bits for each length code */ = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; local const int near extra_dbits[D_CODES] /* extra bits for each distance code */ = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; local const int near extra_blbits[BL_CODES]/* extra bits for each bit length code */ = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; #define STORED_BLOCK 0 #define STATIC_TREES 1 #define DYN_TREES 2 /* The three kinds of block type */ #define REP_3_6 16 /* repeat previous bit length 3-6 times (2 bits of repeat count) */ #define REPZ_3_10 17 /* repeat a zero length 3-10 times (3 bits of repeat count) */ #define REPZ_11_138 18 /* repeat a zero length 11-138 times (7 bits of repeat count) */ /* =========================================================================== * Local data */ /* Data structure describing a single value and its code string. */ #define Freq fc.freq #define Code fc.code #define Dad dl.dad #define Len dl.len #define HEAP_SIZE (TREES_HEAP_SIZE) /* maximum heap size */ #define dyn_ltree (pb -> trees_dyn_ltree) /* literal and length tree */ #define dyn_dtree (pb -> trees_dyn_dtree) /* distance tree */ #define static_ltree (pb -> trees_static_ltree) /* The static literal tree. Since the bit lengths are imposed, there is no * need for the L_CODES extra codes used during heap construction. However * The codes 286 and 287 are needed to build a canonical tree (see ct_init * below). */ #define static_dtree (pb -> trees_static_dtree) /* The static distance tree. (Actually a trivial tree since all codes use * 5 bits.) */ #define bl_tree (pb -> trees_bl_tree) /* Huffman tree for the bit lengths */ #define l_desc (pb -> trees_l_desc) #define d_desc (pb -> trees_d_desc) #define bl_desc (pb -> trees_bl_desc) #define bl_count (pb -> trees_bl_count) /* number of codes at each bit length for an optimal tree */ local const uch near bl_order[BL_CODES] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; /* The lengths of the bit length codes are sent in order of decreasing * probability, to avoid transmitting the lengths for unused bit length codes. */ #define heap (pb -> trees_heap) /* heap used to build the Huffman trees */ #define heap_len (pb -> trees_heap_len) /* number of elements in the heap */ #define heap_max (pb -> trees_heap_max) /* element of largest frequency */ /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. * The same heap array is used to build all trees. */ #define depth (pb -> trees_depth) /* Depth of each subtree used as tie breaker for trees of equal frequency */ #define length_code (pb -> trees_length_code) /* length code for each normalized match length (0 == MIN_MATCH) */ #define dist_code (pb -> trees_dist_code) /* distance codes. The first 256 values correspond to the distances * 3 .. 258, the last 256 values correspond to the top 8 bits of * the 15 bit distances. */ #define base_length (pb -> trees_base_length) /* First normalized length for each code (0 = MIN_MATCH) */ #define base_dist (pb -> trees_base_dist) /* First normalized distance for each code (0 = distance of 1) */ #define l_buf inbuf /* DECLARE(uch, l_buf, LIT_BUFSIZE); buffer for literals or lengths */ /* DECLARE(ush, d_buf, DIST_BUFSIZE); buffer for distances */ #define flag_buf (pb -> trees_flag_buf) /* flag_buf is a bit array distinguishing literals from lengths in * l_buf, thus indicating the presence or absence of a distance. */ #define last_lit (pb -> trees_last_lit) /* running index in l_buf */ #define last_dist (pb -> trees_last_dist) /* running index in d_buf */ #define last_flags (pb -> trees_last_flags) /* running index in flag_buf */ #define flags (pb -> trees_flags) /* current flags not yet saved in flag_buf */ #define flag_bit (pb -> trees_flag_bit) /* current bit used in flags */ /* bits are filled in flags starting at bit 0 (least significant). * Note: these flags are overkill in the current code since we don't * take advantage of DIST_BUFSIZE == LIT_BUFSIZE. */ #define opt_len (pb -> trees_opt_len) /* bit length of current block with optimal trees */ #define static_len (pb -> trees_static_len) /* bit length of current block with static trees */ #define compressed_len (pb -> trees_compressed_len) /* total bit length of compressed file */ #define input_len (pb -> trees_input_len) /* total byte length of input file */ /* input_len is for debugging only since we can get it by other means. */ #define file_type (pb -> trees_file_type) /* pointer to UNKNOWN, BINARY or ASCII */ #define file_method (pb -> trees_file_method) /* pointer to DEFLATE or STORE */ #ifdef DEBUG extern ulg bits_sent; /* bit length of the compressed data */ extern long bytes_in; /* byte length of input file */ #endif #define block_start (pb -> deflate_block_start) /* window offset of current block */ #define strstart (pb -> deflate_strstart) /* window offset of current string */ /* =========================================================================== * Local (static) routines in this file. */ local void init_block (Pb *pb); local void pqdownheap (Pb *pb, ct_data near *tree, int k); local void gen_bitlen (Pb *pb, tree_desc near *desc); local void gen_codes (Pb *pb, ct_data near *tree, int max_code); local void build_tree (Pb *pb, tree_desc near *desc); local void scan_tree (Pb *pb, ct_data near *tree, int max_code); local void send_tree (Pb *pb, ct_data near *tree, int max_code); local int build_bl_tree (Pb *pb); local void send_all_trees (Pb *pb, int lcodes, int dcodes, int blcodes); local void compress_block (Pb *pb, ct_data near *ltree, ct_data near *dtree); local void set_file_type (Pb *pb); #ifndef DEBUG # define send_code(c, tree) send_bits(pb, tree[c].Code, tree[c].Len) /* Send a code of the given tree. c and tree must not have side effects */ #else /* DEBUG */ # define send_code(c, tree) \ { if (verbose>1) fprintf(stderr,"\ncd %3d ",(c)); \ send_bits(pb, tree[c].Code, tree[c].Len); } #endif #define d_code(dist) \ ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)]) /* Mapping from a distance to a distance code. dist is the distance - 1 and * must not have side effects. dist_code[256] and dist_code[257] are never * used. */ #define MAX(a,b) (a >= b ? a : b) /* the arguments must not have side effects */ /* =========================================================================== * Allocate the match buffer, initialize the various tables and save the * location of the internal file attribute (ascii/binary) and method * (DEFLATE/STORE). */ static void ct_init(Pb *pb, ush *attr, int *methodp) { int n; /* iterates over tree elements */ int bits; /* bit counter */ int length; /* length value */ int code; /* code value */ int dist; /* distance index */ l_desc.dyn_tree = dyn_ltree; l_desc.static_tree = static_ltree; l_desc.extra_bits = extra_lbits; l_desc.extra_base = LITERALS+1; l_desc.elems = L_CODES; l_desc.max_length = MAX_BITS; l_desc.max_code = 0; d_desc.dyn_tree = dyn_dtree; d_desc.static_tree = static_dtree; d_desc.extra_bits = extra_dbits; d_desc.extra_base = 0; d_desc.elems = D_CODES; d_desc.max_length = MAX_BITS; d_desc.max_code = 0; bl_desc.dyn_tree = bl_tree; bl_desc.static_tree = NULL; bl_desc.extra_bits = extra_blbits; bl_desc.extra_base = 0; bl_desc.elems = BL_CODES; bl_desc.max_length = MAX_BL_BITS; bl_desc.max_code = 0; file_type = attr; file_method = methodp; compressed_len = input_len = 0L; if (static_dtree[0].Len != 0) return; /* ct_init already called */ /* Initialize the mapping length (0..255) -> length code (0..28) */ length = 0; for (code = 0; code < LENGTH_CODES-1; code++) { base_length[code] = length; for (n = 0; n < (1< dist code (0..29) */ dist = 0; for (code = 0 ; code < 16; code++) { base_dist[code] = dist; for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ for ( ; code < D_CODES; code++) { base_dist[code] = dist << 7; for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { dist_code[256 + dist++] = (uch)code; } } Assert (dist == 256, "ct_init: 256+dist != 512"); /* Construct the codes of the static literal tree */ for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; n = 0; while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; /* Codes 286 and 287 do not exist, but we must include them in the * tree construction to get a canonical Huffman tree (longest code * all ones) */ gen_codes(pb, (ct_data near *)static_ltree, L_CODES+1); /* The static distance tree is trivial: */ for (n = 0; n < D_CODES; n++) { static_dtree[n].Len = 5; static_dtree[n].Code = bi_reverse(pb, n, 5); } /* Initialize the first block of the first file: */ init_block(pb); } /* =========================================================================== * Initialize a new block. */ local void init_block(Pb *pb) { int n; /* iterates over tree elements */ /* Initialize the trees. */ for (n = 0; n < L_CODES; n++) dyn_ltree[n].Freq = 0; for (n = 0; n < D_CODES; n++) dyn_dtree[n].Freq = 0; for (n = 0; n < BL_CODES; n++) bl_tree[n].Freq = 0; dyn_ltree[END_BLOCK].Freq = 1; opt_len = static_len = 0L; last_lit = last_dist = last_flags = 0; flags = 0; flag_bit = 1; } #define SMALLEST 1 /* Index within the heap array of least frequent node in the Huffman tree */ /* =========================================================================== * Remove the smallest element from the heap and recreate the heap with * one less element. Updates heap and heap_len. */ #define pqremove(tree, top) \ {\ top = heap[SMALLEST]; \ heap[SMALLEST] = heap[heap_len--]; \ pqdownheap(pb, tree, SMALLEST); \ } /* =========================================================================== * Compares to subtrees, using the tree depth as tie breaker when * the subtrees have equal frequency. This minimizes the worst case length. */ #define smaller(tree, n, m) \ (tree[n].Freq < tree[m].Freq || \ (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) /* =========================================================================== * Restore the heap property by moving down the tree starting at node k, * exchanging a node with the smallest of its two sons if necessary, stopping * when the heap property is re-established (each father smaller than its * two sons). */ local void pqdownheap(Pb *pb, ct_data near *tree, int k) { int v = heap[k]; int j = k << 1; /* left son of k */ while (j <= heap_len) { /* Set j to the smallest of the two sons: */ if (j < heap_len && smaller(tree, heap[j+1], heap[j])) j++; /* Exit if v is smaller than both sons */ if (smaller(tree, v, heap[j])) break; /* Exchange v with the smallest son */ heap[k] = heap[j]; k = j; /* And continue down the tree, setting j to the left son of k */ j <<= 1; } heap[k] = v; } /* =========================================================================== * Compute the optimal bit lengths for a tree and update the total bit length * for the current block. * IN assertion: the fields freq and dad are set, heap[heap_max] and * above are the tree nodes sorted by increasing frequency. * OUT assertions: the field len is set to the optimal bit length, the * array bl_count contains the frequencies for each bit length. * The length opt_len is updated; static_len is also updated if stree is * not null. */ local void gen_bitlen(Pb *pb, tree_desc near *desc) { ct_data near *tree = desc->dyn_tree; int const *extra = desc->extra_bits; int base = desc->extra_base; int max_code = desc->max_code; int max_length = desc->max_length; ct_data near *stree = desc->static_tree; int h; /* heap index */ int n, m; /* iterate over the tree elements */ int bits; /* bit length */ int xbits; /* extra bits */ ush f; /* frequency */ int overflow = 0; /* number of elements with bit length too large */ for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; /* In a first pass, compute the optimal bit lengths (which may * overflow in the case of the bit length tree). */ tree[heap[heap_max]].Len = 0; /* root of the heap */ for (h = heap_max+1; h < HEAP_SIZE; h++) { n = heap[h]; bits = tree[tree[n].Dad].Len + 1; if (bits > max_length) bits = max_length, overflow++; tree[n].Len = (ush)bits; /* We overwrite tree[n].Dad which is no longer needed */ if (n > max_code) continue; /* not a leaf node */ bl_count[bits]++; xbits = 0; if (n >= base) xbits = extra[n-base]; f = tree[n].Freq; opt_len += (ulg)f * (bits + xbits); if (stree) static_len += (ulg)f * (stree[n].Len + xbits); } if (overflow == 0) return; Trace((stderr,"\nbit length overflow\n")); /* This happens for example on obj2 and pic of the Calgary corpus */ /* Find the first bit length which could increase: */ do { bits = max_length-1; while (bl_count[bits] == 0) bits--; bl_count[bits]--; /* move one leaf down the tree */ bl_count[bits+1] += 2; /* move one overflow item as its brother */ bl_count[max_length]--; /* The brother of the overflow item also moves one step up, * but this does not affect bl_count[max_length] */ overflow -= 2; } while (overflow > 0); /* Now recompute all bit lengths, scanning in increasing frequency. * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all * lengths instead of fixing only the wrong ones. This idea is taken * from 'ar' written by Haruhiko Okumura.) */ for (bits = max_length; bits != 0; bits--) { n = bl_count[bits]; while (n != 0) { m = heap[--h]; if (m > max_code) continue; if (tree[m].Len != (unsigned) bits) { Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); opt_len += ((long)bits-(long)tree[m].Len)*(long)tree[m].Freq; tree[m].Len = (ush)bits; } n--; } } } /* =========================================================================== * Generate the codes for a given tree and bit counts (which need not be * optimal). * IN assertion: the array bl_count contains the bit length statistics for * the given tree and the field len is set for all tree elements. * OUT assertion: the field code is set for all tree elements of non * zero code length. */ local void gen_codes (Pb *pb, ct_data near *tree, int max_code) { ush next_code[MAX_BITS+1]; /* next code value for each bit length */ ush code = 0; /* running code value */ int bits; /* bit index */ int n; /* code index */ /* The distribution counts are first used to generate the code values * without bit reversal. */ for (bits = 1; bits <= MAX_BITS; bits++) { next_code[bits] = code = (code + bl_count[bits-1]) << 1; } /* Check that the bit counts in bl_count are consistent. The last code * must be all ones. */ Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; ct_data near *stree = desc->static_tree; int elems = desc->elems; int n, m; /* iterate over heap elements */ int max_code = -1; /* largest code with non zero frequency */ int node = elems; /* next internal node of the tree */ /* Construct the initial heap, with least frequent element in * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. * heap[0] is not used. */ heap_len = 0, heap_max = HEAP_SIZE; for (n = 0; n < elems; n++) { if (tree[n].Freq != 0) { heap[++heap_len] = max_code = n; depth[n] = 0; } else { tree[n].Len = 0; } } /* The pkzip format requires that at least one distance code exists, * and that at least one bit should be sent even if there is only one * possible code. So to avoid special checks later on we force at least * two codes of non zero frequency. */ while (heap_len < 2) { int new = heap[++heap_len] = (max_code < 2 ? ++max_code : 0); tree[new].Freq = 1; depth[new] = 0; opt_len--; if (stree) static_len -= stree[new].Len; /* new is 0 or 1 so it does not have extra bits */ } desc->max_code = max_code; /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, * establish sub-heaps of increasing lengths: */ for (n = heap_len/2; n >= 1; n--) pqdownheap(pb, tree, n); /* Construct the Huffman tree by repeatedly combining the least two * frequent nodes. */ do { pqremove(tree, n); /* n = node of least frequency */ m = heap[SMALLEST]; /* m = node of next least frequency */ heap[--heap_max] = n; /* keep the nodes sorted by frequency */ heap[--heap_max] = m; /* Create a new node father of n and m */ tree[node].Freq = tree[n].Freq + tree[m].Freq; depth[node] = (uch) (MAX(depth[n], depth[m]) + 1); tree[n].Dad = tree[m].Dad = (ush)node; #ifdef DUMP_BL_TREE if (tree == bl_tree) { fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); } #endif /* and insert the new node in the heap */ heap[SMALLEST] = node++; pqdownheap(pb, tree, SMALLEST); } while (heap_len >= 2); heap[--heap_max] = heap[SMALLEST]; /* At this point, the fields freq and dad are set. We can now * generate the bit lengths. */ gen_bitlen(pb, (tree_desc near *)desc); /* The field len is now set, we can generate the bit codes */ gen_codes (pb, (ct_data near *)tree, max_code); } /* =========================================================================== * Scan a literal or distance tree to determine the frequencies of the codes * in the bit length tree. Updates opt_len to take into account the repeat * counts. (The contribution of the bit length codes will be added later * during the construction of bl_tree.) */ local void scan_tree (Pb *pb, ct_data near *tree, int max_code) { int n; /* iterates over all tree elements */ int prevlen = -1; /* last emitted length */ int curlen; /* length of current code */ int nextlen = tree[0].Len; /* length of next code */ int count = 0; /* repeat count of the current code */ int max_count = 7; /* max repeat count */ int min_count = 4; /* min repeat count */ if (nextlen == 0) max_count = 138, min_count = 3; tree[max_code+1].Len = (ush)0xffff; /* guard */ for (n = 0; n <= max_code; n++) { curlen = nextlen; nextlen = tree[n+1].Len; if (++count < max_count && curlen == nextlen) { continue; } else if (count < min_count) { bl_tree[curlen].Freq += count; } else if (curlen != 0) { if (curlen != prevlen) bl_tree[curlen].Freq++; bl_tree[REP_3_6].Freq++; } else if (count <= 10) { bl_tree[REPZ_3_10].Freq++; } else { bl_tree[REPZ_11_138].Freq++; } count = 0; prevlen = curlen; if (nextlen == 0) { max_count = 138, min_count = 3; } else if (curlen == nextlen) { max_count = 6, min_count = 3; } else { max_count = 7, min_count = 4; } } } /* =========================================================================== * Send a literal or distance tree in compressed form, using the codes in * bl_tree. */ local void send_tree (Pb *pb, ct_data near *tree, int max_code) { int n; /* iterates over all tree elements */ int prevlen = -1; /* last emitted length */ int curlen; /* length of current code */ int nextlen = tree[0].Len; /* length of next code */ int count = 0; /* repeat count of the current code */ int max_count = 7; /* max repeat count */ int min_count = 4; /* min repeat count */ /* tree[max_code+1].Len = -1; */ /* guard already set */ if (nextlen == 0) max_count = 138, min_count = 3; for (n = 0; n <= max_code; n++) { curlen = nextlen; nextlen = tree[n+1].Len; if (++count < max_count && curlen == nextlen) { continue; } else if (count < min_count) { do { send_code(curlen, bl_tree); } while (--count != 0); } else if (curlen != 0) { if (curlen != prevlen) { send_code(curlen, bl_tree); count--; } Assert(count >= 3 && count <= 6, " 3_6?"); send_code(REP_3_6, bl_tree); send_bits(pb, count-3, 2); } else if (count <= 10) { send_code(REPZ_3_10, bl_tree); send_bits(pb, count-3, 3); } else { send_code(REPZ_11_138, bl_tree); send_bits(pb, count-11, 7); } count = 0; prevlen = curlen; if (nextlen == 0) { max_count = 138, min_count = 3; } else if (curlen == nextlen) { max_count = 6, min_count = 3; } else { max_count = 7, min_count = 4; } } } /* =========================================================================== * Construct the Huffman tree for the bit lengths and return the index in * bl_order of the last bit length code to send. */ local int build_bl_tree(Pb *pb) { int max_blindex; /* index of last bit length code of non zero freq */ /* Determine the bit length frequencies for literal and distance trees */ scan_tree(pb, (ct_data near *)dyn_ltree, l_desc.max_code); scan_tree(pb, (ct_data near *)dyn_dtree, d_desc.max_code); /* Build the bit length tree: */ build_tree(pb, &bl_desc); /* opt_len now includes the length of the tree representations, except * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. */ /* Determine the number of bit length codes to send. The pkzip format * requires that at least 4 bit length codes be sent. (appnote.txt says * 3 but the actual value used is 4.) */ for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { if (bl_tree[bl_order[max_blindex]].Len != 0) break; } /* Update opt_len to include the bit length tree and counts */ opt_len += 3*(max_blindex+1) + 5+5+4; Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", opt_len, static_len)); return max_blindex; } /* =========================================================================== * Send the header for a block using dynamic Huffman trees: the counts, the * lengths of the bit length codes, the literal tree and the distance tree. * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. */ local void send_all_trees(Pb *pb, int lcodes, int dcodes, int blcodes) { int rank; /* index in bl_order */ Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, "too many codes"); Tracev((stderr, "\nbl counts: ")); send_bits(pb, lcodes-257, 5); /* not +255 as stated in appnote.txt */ send_bits(pb, dcodes-1, 5); send_bits(pb, blcodes-4, 4); /* not -3 as stated in appnote.txt */ for (rank = 0; rank < blcodes; rank++) { Tracev((stderr, "\nbl code %2d ", bl_order[rank])); send_bits(pb, bl_tree[bl_order[rank]].Len, 3); } Tracev((stderr, "\nbl tree: sent %ld", bits_sent)); send_tree(pb, (ct_data near *)dyn_ltree, lcodes-1); /* send the literal tree */ Tracev((stderr, "\nlit tree: sent %ld", bits_sent)); send_tree(pb, (ct_data near *)dyn_dtree, dcodes-1); /* send the distance tree */ Tracev((stderr, "\ndist tree: sent %ld", bits_sent)); } /* =========================================================================== * Determine the best encoding for the current block: dynamic trees, static * trees or store, and output the encoded block to the zip file. This function * returns the total compressed length for the file so far. */ static ulg flush_block(Pb *pb, char *buf, ulg stored_len, int eof) { ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ int max_blindex; /* index of last bit length code of non zero freq */ flag_buf[last_flags] = flags; /* Save the flags for the last 8 items */ /* Check if the file is ascii or binary */ if (*file_type == (ush)UNKNOWN) set_file_type(pb); /* Construct the literal and distance trees */ build_tree(pb, &l_desc); Tracev((stderr, "\nlit data: dyn %ld, stat %ld", opt_len, static_len)); build_tree(pb, &d_desc); Tracev((stderr, "\ndist data: dyn %ld, stat %ld", opt_len, static_len)); /* At this point, opt_len and static_len are the total bit lengths of * the compressed block data, excluding the tree representations. */ /* Build the bit length tree for the above two trees, and get the index * in bl_order of the last bit length code to send. */ max_blindex = build_bl_tree(pb); /* Determine the best encoding. Compute first the block length in bytes */ opt_lenb = (opt_len+3+7)>>3; static_lenb = (static_len+3+7)>>3; input_len += stored_len; /* for debugging only */ Trace((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u dist %u ", opt_lenb, opt_len, static_lenb, static_len, stored_len, last_lit, last_dist)); if (static_lenb <= opt_lenb) opt_lenb = static_lenb; /* If compression failed and this is the first and last block, * and if the zip file can be seeked (to rewrite the local header), * the whole file is transformed into a stored file: */ #ifdef FORCE_METHOD if (level == 1 && eof && compressed_len == 0L) { /* force stored file */ #else if (stored_len <= opt_lenb && eof && compressed_len == 0L && seekable()) { #endif /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */ if (buf == (char*)0) { error (pb, GZIP_ERROR_BLKVANSHD, "block vanished"); return (0); } copy_block(pb, buf, (unsigned)stored_len, 0); /* without header */ compressed_len = stored_len << 3; *file_method = STORED; #ifdef FORCE_METHOD } else if (level == 2 && buf != (char*)0) { /* force stored block */ #else } else if (stored_len+4 <= opt_lenb && buf != (char*)0) { /* 4: two words for the lengths */ #endif /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. * Otherwise we can't have processed more than WSIZE input bytes since * the last block flush, because compression would have been * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to * transform a block into a stored block. */ send_bits(pb, (STORED_BLOCK<<1)+eof, 3); /* send block type */ compressed_len = (compressed_len + 3 + 7) & ~7L; compressed_len += (stored_len + 4) << 3; copy_block(pb, buf, (unsigned)stored_len, 1); /* with header */ #ifdef FORCE_METHOD } else if (level == 3) { /* force static trees */ #else } else if (static_lenb == opt_lenb) { #endif send_bits(pb, (STATIC_TREES<<1)+eof, 3); compress_block(pb, (ct_data near *)static_ltree, (ct_data near *)static_dtree); compressed_len += 3 + static_len; } else { send_bits(pb, (DYN_TREES<<1)+eof, 3); send_all_trees(pb, l_desc.max_code+1, d_desc.max_code+1, max_blindex+1); compress_block(pb, (ct_data near *)dyn_ltree, (ct_data near *)dyn_dtree); compressed_len += 3 + opt_len; } Assert (compressed_len == bits_sent, "bad compressed size"); init_block(pb); if (eof) { Assert (input_len == bytes_in, "bad input size"); bi_windup(pb); compressed_len += 7; /* align on byte boundary */ } Tracev((stderr,"\ncomprlen %lu(%lu) ", compressed_len>>3, compressed_len-7*eof)); return compressed_len >> 3; } /* =========================================================================== * Save the match info and tally the frequency counts. Return true if * the current block must be flushed. */ static int ct_tally (Pb *pb, int dist, int lc) { l_buf[last_lit++] = (uch)lc; if (dist == 0) { /* lc is the unmatched char */ dyn_ltree[lc].Freq++; } else { /* Here, lc is the match length - MIN_MATCH */ dist--; /* dist = match distance - 1 */ Assert((ush)dist < (ush)MAX_DIST && (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && (ush)d_code(dist) < (ush)D_CODES, "ct_tally: bad match"); dyn_ltree[length_code[lc]+LITERALS+1].Freq++; dyn_dtree[d_code(dist)].Freq++; d_buf[last_dist++] = (ush)dist; flags |= flag_bit; } flag_bit <<= 1; /* Output the flags if they fill a byte: */ if ((last_lit & 7) == 0) { flag_buf[last_flags++] = flags; flags = 0, flag_bit = 1; } /* Try to guess if it is profitable to stop the current block here */ if (level > 2 && (last_lit & 0xfff) == 0) { /* Compute an upper bound for the compressed length */ ulg out_length = (ulg)last_lit*8L; ulg in_length = (ulg)strstart-block_start; int dcode; for (dcode = 0; dcode < D_CODES; dcode++) { out_length += (ulg)dyn_dtree[dcode].Freq*(5L+extra_dbits[dcode]); } out_length >>= 3; Trace((stderr,"\nlast_lit %u, last_dist %u, in %ld, out ~%ld(%ld%%) ", last_lit, last_dist, in_length, out_length, 100L - out_length*100L/in_length)); if (last_dist < last_lit/2 && out_length < in_length/2) return 1; } return (last_lit == LIT_BUFSIZE-1 || last_dist == DIST_BUFSIZE); /* We avoid equality with LIT_BUFSIZE because of wraparound at 64K * on 16 bit machines and because stored blocks are restricted to * 64K-1 bytes. */ } /* =========================================================================== * Send the block data compressed using the given Huffman trees */ local void compress_block(Pb *pb, ct_data near *ltree, ct_data near *dtree) { unsigned dist; /* distance of matched string */ int lc; /* match length or unmatched char (if dist == 0) */ unsigned lx = 0; /* running index in l_buf */ unsigned dx = 0; /* running index in d_buf */ unsigned fx = 0; /* running index in flag_buf */ uch flag = 0; /* current flags */ unsigned code; /* the code to send */ int extra; /* number of extra bits to send */ if (last_lit != 0) do { if ((lx & 7) == 0) flag = flag_buf[fx++]; lc = l_buf[lx++]; if ((flag & 1) == 0) { send_code(lc, ltree); /* send a literal byte */ Tracecv(isgraph(lc), (stderr," '%c' ", lc)); } else { /* Here, lc is the match length - MIN_MATCH */ code = length_code[lc]; send_code(code+LITERALS+1, ltree); /* send the length code */ extra = extra_lbits[code]; if (extra != 0) { lc -= base_length[code]; send_bits(pb, lc, extra); /* send the extra length bits */ } dist = d_buf[dx++]; /* Here, dist is the match distance - 1 */ code = d_code(dist); Assert (code < D_CODES, "bad d_code"); send_code(code, dtree); /* send the distance code */ extra = extra_dbits[code]; if (extra != 0) { dist -= base_dist[code]; send_bits(pb, dist, extra); /* send the extra distance bits */ } } /* literal or match pair ? */ flag >>= 1; } while (lx < last_lit); send_code(END_BLOCK, ltree); } /* =========================================================================== * Set the file type to ASCII or BINARY, using a crude approximation: * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. * IN assertion: the fields freq of dyn_ltree are set and the total of all * frequencies does not exceed 64K (to fit in an int on 16 bit machines). */ local void set_file_type(Pb *pb) { int n = 0; unsigned ascii_freq = 0; unsigned bin_freq = 0; while (n < 7) bin_freq += dyn_ltree[n++].Freq; while (n < 128) ascii_freq += dyn_ltree[n++].Freq; while (n < LITERALS) bin_freq += dyn_ltree[n++].Freq; *file_type = bin_freq > (ascii_freq >> 2) ? BINARY : ASCII; } /***************/ /***************/ /** **/ /** UNZIP.C **/ /** **/ /***************/ /***************/ /* unzip.c -- decompress files in gzip format. * Copyright (C) 1992-1993 Jean-loup Gailly * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. * * The code in this file is derived from the file funzip.c written * and put in the public domain by Mark Adler. */ /* This version can extract files in gzip format. */ /************************************************************************/ /* */ /* Unzip in to out. This routine works on gzip files. */ /* */ /* IN assertions: the buffer inpnt contains already the beginning of */ /* the compressed data, insize bytes long. The magic header has */ /* already been checked. The output buffer is cleared. */ /* */ /************************************************************************/ static void unzip (Pb *pb) { ulg orig_crc; /* original crc */ ulg orig_len; /* original uncompressed length */ int n; uch buf[8]; updcrc (pb, NULL, 0); /* initialize crc accumulator */ /* Decompress */ if (method == DEFLATED) { int res = inflate(pb); if (res == 3) { error(pb, GZIP_ERROR_OUTOFMEM, "out of memory"); } else if (res != 0) { error(pb, GZIP_ERROR_INVCOMPDAT, "invalid compressed data"); } } else { error(pb, GZIP_ERROR_BUGCHECK, "internal error, invalid method"); } /* Get the crc and original length */ /* crc32 (see algorithm.doc) * uncompressed input size modulo 2^32 */ for (n = 0; n < 8; n++) { buf[n] = (uch)get_byte(); /* may cause an error if EOF */ } orig_crc = LG(buf); orig_len = LG(buf+4); /* Validate decompression */ if (orig_crc != updcrc(pb, outbuf, 0)) { error(pb, GZIP_ERROR_CRCERROR, "invalid compressed data--crc error"); } if (orig_len != (ulg)bytes_out) { error(pb, GZIP_ERROR_LENERROR, "invalid compressed data--length error"); } } /**************/ /**************/ /** **/ /** UTIL.C **/ /** **/ /**************/ /**************/ /* util.c -- utility functions for gzip support * Copyright (C) 1992-1993 Jean-loup Gailly * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. */ /* ======================================================================== * Table of CRC-32's of all single-byte values (made by makecrc.c) */ static const ulg crc_32_tab[] = { 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, 0x2d02ef8dL }; static void write_buf (Pb *pb, void *buf, unsigned cnt); static void read_error (Pb *pb); /************************************************************************/ /* */ /* Run a set of bytes through the crc shift register. If s is a NULL */ /* pointer, then initialize the crc shift register contents instead. */ /* Return the current crc in either case. */ /* */ /* Input: */ /* */ /* s = pointer to bytes to pump through */ /* (or NULL to re-initialize shift register) */ /* n = number of bytes in s[] */ /* */ /* Output: */ /* */ /* updcrc = crc so far */ /* */ /************************************************************************/ static ulg updcrc (Pb *pb, uch *s, unsigned n) { register ulg c; /* temporary variable */ if (s == NULL) { c = 0xffffffffL; } else { c = pb -> updcrc_crc; if (n) do { c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8); } while (--n); } pb -> updcrc_crc = c; return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ } /************************************************************************/ /* */ /* Fill the input buffer with compressed data from the input file. */ /* This is called only when the buffer is empty. It is an error to */ /* read the end-of-file mark. */ /* */ /* Output: */ /* */ /* fill_inbuf = -1 : eof or read error */ /* else : first byte removed from input buffer */ /* inbuf = data bytes */ /* insize = length of inbuf data less the first byte taken out */ /* inpnt = address of next byte in input buffer */ /* */ /************************************************************************/ static int fill_inbuf (Pb *pb) { int len, ok; /* Set total read to zero, and if there has already been an */ /* error, return error flag (ie, don't try to read any more) */ insize = 0; inpnt = inbuf; if (exit_code != GZIP_RTN_OK) return (-1); /* Try to read as much as will fit in inbuf */ ok = (*readrout) (rwparam, INBUFSIZ, inbuf, &len, &inpnt); /* If successful, but hit eof, output an error message, and pretend error */ if (ok && (len == 0)) { error (pb, GZIP_ERROR_COMPEOF, "premature end-of-file on input"); ok = 0; } /* If error, return error status */ if (!ok) { exit_code = GZIP_RTN_RDERR; return (-1); } /* Ok, accumulate total bytes input, and return the size and first byte */ bytes_in += (ulg)len; insize = len - 1; return (*(inpnt ++)); } /************************************************************************/ /* */ /* Read uncompressed data from input file and update the crc and */ /* input file size. Eof returns a zero. */ /* */ /* Input: */ /* */ /* size = number of bytes wanted */ /* */ /* Output: */ /* */ /* file_read = -1 : read error */ /* 0 : end of file */ /* else : actual number of bytes read */ /* *buf = data bytes that were read */ /* updcrc_crc = updated */ /* */ /************************************************************************/ static int file_read (Pb *pb, char *buf, int size) { char *pnt; int len, ok; /* If previous error, return error status */ if (exit_code != GZIP_RTN_OK) return (-1); /* If stuff left over from last time, just use it */ if (pb -> util_frlos > 0) { /* see if anything left over */ len = pb -> util_frlos; /* if so, get its size */ pnt = pb -> util_frlop; /* get its pointer */ } /* Otherwise, read as much as we can get */ else { pnt = buf; /* assume it will read into buf */ ok = (*readrout) (rwparam, size, buf, &len, &pnt); /* read something */ if (!ok) { /* check for failure */ exit_code = GZIP_RTN_RDERR; /* if so, return failure status */ return (-1); } } /* See if we have more than requested. If so, save the rest */ /* in the file_read left_over variables and truncate to size */ /* requested. This should only happen when pnt != buf. */ pb -> util_frlos = 0; /* assume nothing left over */ if (len > size) { /* see if read more than we asked for */ pb -> util_frlos = len - size; /* if so, save what's left over */ pb -> util_frlop = pnt + size; len = size; /* ... and just get this much */ } /* If they read into a different buffer, copy it to buf */ if (pnt != buf) memcpy (buf, pnt, len); /* Update crc and accumulate total bytes read */ updcrc (pb, (uch*)buf, len); bytes_in += (ulg)len; /* Return actual length returned in buf */ return (len); } /************************************************************************/ /* */ /* Write the output buffer outbuf[0..outcnt-1] and update bytes_out. */ /* (used for the compressed data only) */ /* */ /************************************************************************/ static void flush_outbuf(Pb *pb) { if (outcnt != 0) { write_buf (pb, (char *)outbuf, outcnt); bytes_out += (ulg)outcnt; outcnt = 0; } } /************************************************************************/ /* */ /* Write the output window window[0..outcnt-1] and update crc and */ /* bytes_out. (Used for the decompressed data only.) */ /* */ /************************************************************************/ static void flush_window(Pb *pb) { if (outcnt != 0) { updcrc(pb, window, outcnt); write_buf (pb, (char *)window, outcnt); bytes_out += (ulg)outcnt; outcnt = 0; } } /************************************************************************/ /* */ /* Write data to output file. Abort if error */ /* */ /* Input: */ /* */ /* pb = param block pointer */ /* buf = address of data to be written */ /* cnt = number of bytes to write */ /* */ /************************************************************************/ static void write_buf (Pb *pb, void *buf, unsigned cnt) { int ok; if (exit_code == GZIP_RTN_OK) { ok = (*writerout) (rwparam, cnt, buf); if (!ok) exit_code = GZIP_RTN_WTERR; } } /************************************************************************/ /* */ /* Call error reporting routine */ /* */ /* Input: */ /* */ /* code = GZIP_ERROR_ error code */ /* m = pointer to message text */ /* */ /* Output: */ /* */ /* exit_code = GZIP_RTN_ERROR */ /* */ /************************************************************************/ static void error (Pb *pb, int code, char *m) { if (exit_code == GZIP_RTN_OK) { (*errorrout) (rwparam, code, m); exit_code = GZIP_RTN_ERROR; } } /*************/ /*************/ /** **/ /** ZIP.C **/ /** **/ /*************/ /*************/ /* zip.c -- compress files to the gzip format * Copyright (C) 1992-1993 Jean-loup Gailly * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. */ /************************************************************************/ /* */ /* Deflate (compress) input stream to output stream */ /* */ /************************************************************************/ static void zip (Pb *pb) { uch general_flags = 0; /* general purpose bit flags */ ush attr = 0; /* ascii/binary flag */ ush deflate_flags = 0; /* pkzip -es, -en or -ex equivalent */ ulg finalcrc; /* final crc value */ outcnt = 0; /* Write the header to the gzip file. See algorithm.doc for the format */ method = DEFLATED; put_byte(GZIP_MAGIC[0]); /* magic header */ put_byte(GZIP_MAGIC[1]); put_byte(DEFLATED); /* compression method */ put_byte(general_flags); /* general flags */ put_long(0); /* time stamp = 0 for now */ /* Write deflated file to zip file */ updcrc (pb, NULL, 0); bi_init(pb); ct_init(pb, &attr, &method); lm_init(pb, level, &deflate_flags); put_byte((uch)deflate_flags); /* extra flags */ put_byte(OS_CODE); /* OS identifier */ deflate(pb); /* Write the crc and uncompressed size */ finalcrc = updcrc (pb, &general_flags, 0); put_long(finalcrc); put_long(bytes_in); flush_outbuf (pb); }