| | | 1 | | using System; |
| | | 2 | | using System.IO; |
| | | 3 | | using ICSharpCode.SharpZipLib.Checksum; |
| | | 4 | | using ICSharpCode.SharpZipLib.Zip.Compression; |
| | | 5 | | using ICSharpCode.SharpZipLib.Zip.Compression.Streams; |
| | | 6 | | |
| | | 7 | | namespace ICSharpCode.SharpZipLib.GZip |
| | | 8 | | { |
| | | 9 | | |
| | | 10 | | /// <summary> |
| | | 11 | | /// This filter stream is used to decompress a "GZIP" format stream. |
| | | 12 | | /// The "GZIP" format is described baseInputStream RFC 1952. |
| | | 13 | | /// |
| | | 14 | | /// author of the original java version : John Leuner |
| | | 15 | | /// </summary> |
| | | 16 | | /// <example> This sample shows how to unzip a gzipped file |
| | | 17 | | /// <code> |
| | | 18 | | /// using System; |
| | | 19 | | /// using System.IO; |
| | | 20 | | /// |
| | | 21 | | /// using ICSharpCode.SharpZipLib.Core; |
| | | 22 | | /// using ICSharpCode.SharpZipLib.GZip; |
| | | 23 | | /// |
| | | 24 | | /// class MainClass |
| | | 25 | | /// { |
| | | 26 | | /// public static void Main(string[] args) |
| | | 27 | | /// { |
| | | 28 | | /// using (Stream inStream = new GZipInputStream(File.OpenRead(args[0]))) |
| | | 29 | | /// using (FileStream outStream = File.Create(Path.GetFileNameWithoutExtension(args[0]))) { |
| | | 30 | | /// byte[] buffer = new byte[4096]; |
| | | 31 | | /// StreamUtils.Copy(inStream, outStream, buffer); |
| | | 32 | | /// } |
| | | 33 | | /// } |
| | | 34 | | /// } |
| | | 35 | | /// </code> |
| | | 36 | | /// </example> |
| | | 37 | | public class GZipInputStream : InflaterInputStream |
| | | 38 | | { |
| | | 39 | | #region Instance Fields |
| | | 40 | | /// <summary> |
| | | 41 | | /// CRC-32 value for uncompressed data |
| | | 42 | | /// </summary> |
| | | 43 | | protected Crc32 crc; |
| | | 44 | | |
| | | 45 | | /// <summary> |
| | | 46 | | /// Flag to indicate if we've read the GZIP header yet for the current member (block of compressed data). |
| | | 47 | | /// This is tracked per-block as the file is parsed. |
| | | 48 | | /// </summary> |
| | | 49 | | bool readGZIPHeader; |
| | | 50 | | #endregion |
| | | 51 | | |
| | | 52 | | #region Constructors |
| | | 53 | | /// <summary> |
| | | 54 | | /// Creates a GZipInputStream with the default buffer size |
| | | 55 | | /// </summary> |
| | | 56 | | /// <param name="baseInputStream"> |
| | | 57 | | /// The stream to read compressed data from (baseInputStream GZIP format) |
| | | 58 | | /// </param> |
| | | 59 | | public GZipInputStream(Stream baseInputStream) |
| | 2 | 60 | | : this(baseInputStream, 4096) |
| | | 61 | | { |
| | 2 | 62 | | } |
| | | 63 | | |
| | | 64 | | /// <summary> |
| | | 65 | | /// Creates a GZIPInputStream with the specified buffer size |
| | | 66 | | /// </summary> |
| | | 67 | | /// <param name="baseInputStream"> |
| | | 68 | | /// The stream to read compressed data from (baseInputStream GZIP format) |
| | | 69 | | /// </param> |
| | | 70 | | /// <param name="size"> |
| | | 71 | | /// Size of the buffer to use |
| | | 72 | | /// </param> |
| | | 73 | | public GZipInputStream(Stream baseInputStream, int size) |
| | 2 | 74 | | : base(baseInputStream, new Inflater(true), size) |
| | | 75 | | { |
| | 2 | 76 | | } |
| | | 77 | | #endregion |
| | | 78 | | |
| | | 79 | | #region Stream overrides |
| | | 80 | | /// <summary> |
| | | 81 | | /// Reads uncompressed data into an array of bytes |
| | | 82 | | /// </summary> |
| | | 83 | | /// <param name="buffer"> |
| | | 84 | | /// The buffer to read uncompressed data into |
| | | 85 | | /// </param> |
| | | 86 | | /// <param name="offset"> |
| | | 87 | | /// The offset indicating where the data should be placed |
| | | 88 | | /// </param> |
| | | 89 | | /// <param name="count"> |
| | | 90 | | /// The number of uncompressed bytes to be read |
| | | 91 | | /// </param> |
| | | 92 | | /// <returns>Returns the number of bytes actually read.</returns> |
| | | 93 | | public override int Read(byte[] buffer, int offset, int count) |
| | | 94 | | { |
| | | 95 | | // A GZIP file can contain multiple blocks of compressed data, although this is quite rare. |
| | | 96 | | // A compressed block could potentially be empty, so we need to loop until we reach EOF or |
| | | 97 | | // we find data. |
| | | 98 | | while (true) { |
| | | 99 | | |
| | | 100 | | // If we haven't read the header for this block, read it |
| | 0 | 101 | | if (!readGZIPHeader) { |
| | | 102 | | |
| | | 103 | | // Try to read header. If there is no header (0 bytes available), this is EOF. If there is |
| | | 104 | | // an incomplete header, this will throw an exception. |
| | 0 | 105 | | if (!ReadHeader()) { |
| | 0 | 106 | | return 0; |
| | | 107 | | } |
| | | 108 | | } |
| | | 109 | | |
| | | 110 | | // Try to read compressed data |
| | 0 | 111 | | int bytesRead = base.Read(buffer, offset, count); |
| | 0 | 112 | | if (bytesRead > 0) { |
| | 0 | 113 | | crc.Update(buffer, offset, bytesRead); |
| | | 114 | | } |
| | | 115 | | |
| | | 116 | | // If this is the end of stream, read the footer |
| | 0 | 117 | | if (inf.IsFinished) { |
| | 0 | 118 | | ReadFooter(); |
| | | 119 | | } |
| | | 120 | | |
| | 0 | 121 | | if (bytesRead > 0) { |
| | 0 | 122 | | return bytesRead; |
| | | 123 | | } |
| | | 124 | | } |
| | | 125 | | } |
| | | 126 | | #endregion |
| | | 127 | | |
| | | 128 | | #region Support routines |
| | | 129 | | bool ReadHeader() |
| | | 130 | | { |
| | | 131 | | // Initialize CRC for this block |
| | 0 | 132 | | crc = new Crc32(); |
| | | 133 | | |
| | | 134 | | // Make sure there is data in file. We can't rely on ReadLeByte() to fill the buffer, as this could be EOF, |
| | | 135 | | // which is fine, but ReadLeByte() throws an exception if it doesn't find data, so we do this part ourselves. |
| | 0 | 136 | | if (inputBuffer.Available <= 0) { |
| | 0 | 137 | | inputBuffer.Fill(); |
| | 0 | 138 | | if (inputBuffer.Available <= 0) { |
| | | 139 | | // No header, EOF. |
| | 0 | 140 | | return false; |
| | | 141 | | } |
| | | 142 | | } |
| | | 143 | | |
| | | 144 | | // 1. Check the two magic bytes |
| | 0 | 145 | | var headCRC = new Crc32(); |
| | 0 | 146 | | int magic = inputBuffer.ReadLeByte(); |
| | | 147 | | |
| | 0 | 148 | | if (magic < 0) { |
| | 0 | 149 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | | 150 | | } |
| | | 151 | | |
| | 0 | 152 | | headCRC.Update(magic); |
| | 0 | 153 | | if (magic != (GZipConstants.GZIP_MAGIC >> 8)) { |
| | 0 | 154 | | throw new GZipException("Error GZIP header, first magic byte doesn't match"); |
| | | 155 | | } |
| | | 156 | | |
| | | 157 | | //magic = baseInputStream.ReadByte(); |
| | 0 | 158 | | magic = inputBuffer.ReadLeByte(); |
| | | 159 | | |
| | 0 | 160 | | if (magic < 0) { |
| | 0 | 161 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | | 162 | | } |
| | | 163 | | |
| | 0 | 164 | | if (magic != (GZipConstants.GZIP_MAGIC & 0xFF)) { |
| | 0 | 165 | | throw new GZipException("Error GZIP header, second magic byte doesn't match"); |
| | | 166 | | } |
| | | 167 | | |
| | 0 | 168 | | headCRC.Update(magic); |
| | | 169 | | |
| | | 170 | | // 2. Check the compression type (must be 8) |
| | 0 | 171 | | int compressionType = inputBuffer.ReadLeByte(); |
| | | 172 | | |
| | 0 | 173 | | if (compressionType < 0) { |
| | 0 | 174 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | | 175 | | } |
| | | 176 | | |
| | 0 | 177 | | if (compressionType != 8) { |
| | 0 | 178 | | throw new GZipException("Error GZIP header, data not in deflate format"); |
| | | 179 | | } |
| | 0 | 180 | | headCRC.Update(compressionType); |
| | | 181 | | |
| | | 182 | | // 3. Check the flags |
| | 0 | 183 | | int flags = inputBuffer.ReadLeByte(); |
| | 0 | 184 | | if (flags < 0) { |
| | 0 | 185 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | | 186 | | } |
| | 0 | 187 | | headCRC.Update(flags); |
| | | 188 | | |
| | | 189 | | /* This flag byte is divided into individual bits as follows: |
| | | 190 | | |
| | | 191 | | bit 0 FTEXT |
| | | 192 | | bit 1 FHCRC |
| | | 193 | | bit 2 FEXTRA |
| | | 194 | | bit 3 FNAME |
| | | 195 | | bit 4 FCOMMENT |
| | | 196 | | bit 5 reserved |
| | | 197 | | bit 6 reserved |
| | | 198 | | bit 7 reserved |
| | | 199 | | */ |
| | | 200 | | |
| | | 201 | | // 3.1 Check the reserved bits are zero |
| | | 202 | | |
| | 0 | 203 | | if ((flags & 0xE0) != 0) { |
| | 0 | 204 | | throw new GZipException("Reserved flag bits in GZIP header != 0"); |
| | | 205 | | } |
| | | 206 | | |
| | | 207 | | // 4.-6. Skip the modification time, extra flags, and OS type |
| | 0 | 208 | | for (int i = 0; i < 6; i++) { |
| | 0 | 209 | | int readByte = inputBuffer.ReadLeByte(); |
| | 0 | 210 | | if (readByte < 0) { |
| | 0 | 211 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | | 212 | | } |
| | 0 | 213 | | headCRC.Update(readByte); |
| | | 214 | | } |
| | | 215 | | |
| | | 216 | | // 7. Read extra field |
| | 0 | 217 | | if ((flags & GZipConstants.FEXTRA) != 0) { |
| | | 218 | | |
| | | 219 | | // XLEN is total length of extra subfields, we will skip them all |
| | | 220 | | int len1, len2; |
| | 0 | 221 | | len1 = inputBuffer.ReadLeByte(); |
| | 0 | 222 | | len2 = inputBuffer.ReadLeByte(); |
| | 0 | 223 | | if ((len1 < 0) || (len2 < 0)) { |
| | 0 | 224 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | | 225 | | } |
| | 0 | 226 | | headCRC.Update(len1); |
| | 0 | 227 | | headCRC.Update(len2); |
| | | 228 | | |
| | 0 | 229 | | int extraLen = (len2 << 8) | len1; // gzip is LSB first |
| | 0 | 230 | | for (int i = 0; i < extraLen; i++) { |
| | 0 | 231 | | int readByte = inputBuffer.ReadLeByte(); |
| | 0 | 232 | | if (readByte < 0) { |
| | 0 | 233 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | | 234 | | } |
| | 0 | 235 | | headCRC.Update(readByte); |
| | | 236 | | } |
| | | 237 | | } |
| | | 238 | | |
| | | 239 | | // 8. Read file name |
| | 0 | 240 | | if ((flags & GZipConstants.FNAME) != 0) { |
| | | 241 | | int readByte; |
| | 0 | 242 | | while ((readByte = inputBuffer.ReadLeByte()) > 0) { |
| | 0 | 243 | | headCRC.Update(readByte); |
| | | 244 | | } |
| | | 245 | | |
| | 0 | 246 | | if (readByte < 0) { |
| | 0 | 247 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | | 248 | | } |
| | 0 | 249 | | headCRC.Update(readByte); |
| | | 250 | | } |
| | | 251 | | |
| | | 252 | | // 9. Read comment |
| | 0 | 253 | | if ((flags & GZipConstants.FCOMMENT) != 0) { |
| | | 254 | | int readByte; |
| | 0 | 255 | | while ((readByte = inputBuffer.ReadLeByte()) > 0) { |
| | 0 | 256 | | headCRC.Update(readByte); |
| | | 257 | | } |
| | | 258 | | |
| | 0 | 259 | | if (readByte < 0) { |
| | 0 | 260 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | | 261 | | } |
| | | 262 | | |
| | 0 | 263 | | headCRC.Update(readByte); |
| | | 264 | | } |
| | | 265 | | |
| | | 266 | | // 10. Read header CRC |
| | 0 | 267 | | if ((flags & GZipConstants.FHCRC) != 0) { |
| | | 268 | | int tempByte; |
| | 0 | 269 | | int crcval = inputBuffer.ReadLeByte(); |
| | 0 | 270 | | if (crcval < 0) { |
| | 0 | 271 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | | 272 | | } |
| | | 273 | | |
| | 0 | 274 | | tempByte = inputBuffer.ReadLeByte(); |
| | 0 | 275 | | if (tempByte < 0) { |
| | 0 | 276 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | | 277 | | } |
| | | 278 | | |
| | 0 | 279 | | crcval = (crcval << 8) | tempByte; |
| | 0 | 280 | | if (crcval != ((int)headCRC.Value & 0xffff)) { |
| | 0 | 281 | | throw new GZipException("Header CRC value mismatch"); |
| | | 282 | | } |
| | | 283 | | } |
| | | 284 | | |
| | 0 | 285 | | readGZIPHeader = true; |
| | 0 | 286 | | return true; |
| | | 287 | | } |
| | | 288 | | |
| | | 289 | | void ReadFooter() |
| | | 290 | | { |
| | 0 | 291 | | byte[] footer = new byte[8]; |
| | | 292 | | |
| | | 293 | | // End of stream; reclaim all bytes from inf, read the final byte count, and reset the inflator |
| | 0 | 294 | | long bytesRead = inf.TotalOut & 0xffffffff; |
| | 0 | 295 | | inputBuffer.Available += inf.RemainingInput; |
| | 0 | 296 | | inf.Reset(); |
| | | 297 | | |
| | | 298 | | // Read footer from inputBuffer |
| | 0 | 299 | | int needed = 8; |
| | 0 | 300 | | while (needed > 0) { |
| | 0 | 301 | | int count = inputBuffer.ReadClearTextBuffer(footer, 8 - needed, needed); |
| | 0 | 302 | | if (count <= 0) { |
| | 0 | 303 | | throw new EndOfStreamException("EOS reading GZIP footer"); |
| | | 304 | | } |
| | 0 | 305 | | needed -= count; // Jewel Jan 16 |
| | | 306 | | } |
| | | 307 | | |
| | | 308 | | // Calculate CRC |
| | 0 | 309 | | int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8) | ((footer[2] & 0xff) << 16) | (footer[3] << 24); |
| | 0 | 310 | | if (crcval != (int)crc.Value) { |
| | 0 | 311 | | throw new GZipException("GZIP crc sum mismatch, theirs \"" + crcval + "\" and ours \"" + (int)crc.Value); |
| | | 312 | | } |
| | | 313 | | |
| | | 314 | | // NOTE The total here is the original total modulo 2 ^ 32. |
| | 0 | 315 | | uint total = |
| | 0 | 316 | | (uint)((uint)footer[4] & 0xff) | |
| | 0 | 317 | | (uint)(((uint)footer[5] & 0xff) << 8) | |
| | 0 | 318 | | (uint)(((uint)footer[6] & 0xff) << 16) | |
| | 0 | 319 | | (uint)((uint)footer[7] << 24); |
| | | 320 | | |
| | 0 | 321 | | if (bytesRead != total) { |
| | 0 | 322 | | throw new GZipException("Number of bytes mismatch in footer"); |
| | | 323 | | } |
| | | 324 | | |
| | | 325 | | // Mark header read as false so if another header exists, we'll continue reading through the file |
| | 0 | 326 | | readGZIPHeader = false; |
| | 0 | 327 | | } |
| | | 328 | | #endregion |
| | | 329 | | } |
| | | 330 | | } |