001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.math.BigInteger; 028import java.nio.Buffer; 029import java.nio.ByteBuffer; 030import java.util.Arrays; 031import java.util.zip.CRC32; 032import java.util.zip.DataFormatException; 033import java.util.zip.Inflater; 034import java.util.zip.ZipEntry; 035import java.util.zip.ZipException; 036 037import org.apache.commons.compress.archivers.ArchiveEntry; 038import org.apache.commons.compress.archivers.ArchiveInputStream; 039import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 040import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 041import org.apache.commons.compress.utils.ArchiveUtils; 042import org.apache.commons.compress.utils.IOUtils; 043import org.apache.commons.compress.utils.InputStreamStatistics; 044 045import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 046import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 047import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 048import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 049 050/** 051 * Implements an input stream that can read Zip archives. 052 * 053 * <p>As of Apache Commons Compress it transparently supports Zip64 054 * extensions and thus individual entries and archives larger than 4 055 * GB or with more than 65536 entries.</p> 056 * 057 * <p>The {@link ZipFile} class is preferred when reading from files 058 * as {@link ZipArchiveInputStream} is limited by not being able to 059 * read the central directory header before returning entries. In 060 * particular {@link ZipArchiveInputStream}</p> 061 * 062 * <ul> 063 * 064 * <li>may return entries that are not part of the central directory 065 * at all and shouldn't be considered part of the archive.</li> 066 * 067 * <li>may return several entries with the same name.</li> 068 * 069 * <li>will not return internal or external attributes.</li> 070 * 071 * <li>may return incomplete extra field data.</li> 072 * 073 * <li>may return unknown sizes and CRC values for entries until the 074 * next entry has been reached if the archive uses the data 075 * descriptor feature.</li> 076 * 077 * </ul> 078 * 079 * @see ZipFile 080 * @NotThreadSafe 081 */ 082public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics { 083 084 /** The zip encoding to use for file names and the file comment. */ 085 private final ZipEncoding zipEncoding; 086 087 // the provided encoding (for unit tests) 088 final String encoding; 089 090 /** Whether to look for and use Unicode extra fields. */ 091 private final boolean useUnicodeExtraFields; 092 093 /** Wrapped stream, will always be a PushbackInputStream. */ 094 private final InputStream in; 095 096 /** Inflater used for all deflated entries. */ 097 private final Inflater inf = new Inflater(true); 098 099 /** Buffer used to read from the wrapped stream. */ 100 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 101 102 /** The entry that is currently being read. */ 103 private CurrentEntry current = null; 104 105 /** Whether the stream has been closed. */ 106 private boolean closed = false; 107 108 /** Whether the stream has reached the central directory - and thus found all entries. */ 109 private boolean hitCentralDirectory = false; 110 111 /** 112 * When reading a stored entry that uses the data descriptor this 113 * stream has to read the full entry and caches it. This is the 114 * cache. 115 */ 116 private ByteArrayInputStream lastStoredEntry = null; 117 118 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 119 private boolean allowStoredEntriesWithDataDescriptor = false; 120 121 /** Count decompressed bytes for current entry */ 122 private long uncompressedCount = 0; 123 124 private static final int LFH_LEN = 30; 125 /* 126 local file header signature WORD 127 version needed to extract SHORT 128 general purpose bit flag SHORT 129 compression method SHORT 130 last mod file time SHORT 131 last mod file date SHORT 132 crc-32 WORD 133 compressed size WORD 134 uncompressed size WORD 135 file name length SHORT 136 extra field length SHORT 137 */ 138 139 private static final int CFH_LEN = 46; 140 /* 141 central file header signature WORD 142 version made by SHORT 143 version needed to extract SHORT 144 general purpose bit flag SHORT 145 compression method SHORT 146 last mod file time SHORT 147 last mod file date SHORT 148 crc-32 WORD 149 compressed size WORD 150 uncompressed size WORD 151 file name length SHORT 152 extra field length SHORT 153 file comment length SHORT 154 disk number start SHORT 155 internal file attributes SHORT 156 external file attributes WORD 157 relative offset of local header WORD 158 */ 159 160 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 161 162 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 163 private final byte[] lfhBuf = new byte[LFH_LEN]; 164 private final byte[] skipBuf = new byte[1024]; 165 private final byte[] shortBuf = new byte[SHORT]; 166 private final byte[] wordBuf = new byte[WORD]; 167 private final byte[] twoDwordBuf = new byte[2 * DWORD]; 168 169 private int entriesRead = 0; 170 171 /** 172 * Create an instance using UTF-8 encoding 173 * @param inputStream the stream to wrap 174 */ 175 public ZipArchiveInputStream(final InputStream inputStream) { 176 this(inputStream, ZipEncodingHelper.UTF8); 177 } 178 179 /** 180 * Create an instance using the specified encoding 181 * @param inputStream the stream to wrap 182 * @param encoding the encoding to use for file names, use null 183 * for the platform's default encoding 184 * @since 1.5 185 */ 186 public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { 187 this(inputStream, encoding, true); 188 } 189 190 /** 191 * Create an instance using the specified encoding 192 * @param inputStream the stream to wrap 193 * @param encoding the encoding to use for file names, use null 194 * for the platform's default encoding 195 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 196 * Extra Fields (if present) to set the file names. 197 */ 198 public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { 199 this(inputStream, encoding, useUnicodeExtraFields, false); 200 } 201 202 /** 203 * Create an instance using the specified encoding 204 * @param inputStream the stream to wrap 205 * @param encoding the encoding to use for file names, use null 206 * for the platform's default encoding 207 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 208 * Extra Fields (if present) to set the file names. 209 * @param allowStoredEntriesWithDataDescriptor whether the stream 210 * will try to read STORED entries that use a data descriptor 211 * @since 1.1 212 */ 213 public ZipArchiveInputStream(final InputStream inputStream, 214 final String encoding, 215 final boolean useUnicodeExtraFields, 216 final boolean allowStoredEntriesWithDataDescriptor) { 217 this.encoding = encoding; 218 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 219 this.useUnicodeExtraFields = useUnicodeExtraFields; 220 in = new PushbackInputStream(inputStream, buf.capacity()); 221 this.allowStoredEntriesWithDataDescriptor = 222 allowStoredEntriesWithDataDescriptor; 223 // haven't read anything so far 224 ((Buffer)buf).limit(0); 225 } 226 227 public ZipArchiveEntry getNextZipEntry() throws IOException { 228 uncompressedCount = 0; 229 230 boolean firstEntry = true; 231 if (closed || hitCentralDirectory) { 232 return null; 233 } 234 if (current != null) { 235 closeEntry(); 236 firstEntry = false; 237 } 238 239 long currentHeaderOffset = getBytesRead(); 240 try { 241 if (firstEntry) { 242 // split archives have a special signature before the 243 // first local file header - look for it and fail with 244 // the appropriate error message if this is a split 245 // archive. 246 readFirstLocalFileHeader(lfhBuf); 247 } else { 248 readFully(lfhBuf); 249 } 250 } catch (final EOFException e) { //NOSONAR 251 return null; 252 } 253 254 final ZipLong sig = new ZipLong(lfhBuf); 255 if (!sig.equals(ZipLong.LFH_SIG)) { 256 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG) || isApkSigningBlock(lfhBuf)) { 257 hitCentralDirectory = true; 258 skipRemainderOfArchive(); 259 return null; 260 } 261 throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue())); 262 } 263 264 int off = WORD; 265 current = new CurrentEntry(); 266 267 final int versionMadeBy = ZipShort.getValue(lfhBuf, off); 268 off += SHORT; 269 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 270 271 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off); 272 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 273 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 274 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 275 current.entry.setGeneralPurposeBit(gpFlag); 276 277 off += SHORT; 278 279 current.entry.setMethod(ZipShort.getValue(lfhBuf, off)); 280 off += SHORT; 281 282 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off)); 283 current.entry.setTime(time); 284 off += WORD; 285 286 ZipLong size = null, cSize = null; 287 if (!current.hasDataDescriptor) { 288 current.entry.setCrc(ZipLong.getValue(lfhBuf, off)); 289 off += WORD; 290 291 cSize = new ZipLong(lfhBuf, off); 292 off += WORD; 293 294 size = new ZipLong(lfhBuf, off); 295 off += WORD; 296 } else { 297 off += 3 * WORD; 298 } 299 300 final int fileNameLen = ZipShort.getValue(lfhBuf, off); 301 302 off += SHORT; 303 304 final int extraLen = ZipShort.getValue(lfhBuf, off); 305 off += SHORT; // NOSONAR - assignment as documentation 306 307 final byte[] fileName = new byte[fileNameLen]; 308 readFully(fileName); 309 current.entry.setName(entryEncoding.decode(fileName), fileName); 310 if (hasUTF8Flag) { 311 current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 312 } 313 314 final byte[] extraData = new byte[extraLen]; 315 readFully(extraData); 316 current.entry.setExtra(extraData); 317 318 if (!hasUTF8Flag && useUnicodeExtraFields) { 319 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 320 } 321 322 processZip64Extra(size, cSize); 323 324 current.entry.setLocalHeaderOffset(currentHeaderOffset); 325 current.entry.setDataOffset(getBytesRead()); 326 current.entry.setStreamContiguous(true); 327 328 ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod()); 329 if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { 330 if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) { 331 InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize()); 332 switch (m) { 333 case UNSHRINKING: 334 current.in = new UnshrinkingInputStream(bis); 335 break; 336 case IMPLODING: 337 current.in = new ExplodingInputStream( 338 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 339 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 340 bis); 341 break; 342 case BZIP2: 343 current.in = new BZip2CompressorInputStream(bis); 344 break; 345 case ENHANCED_DEFLATED: 346 current.in = new Deflate64CompressorInputStream(bis); 347 break; 348 default: 349 // we should never get here as all supported methods have been covered 350 // will cause an error when read is invoked, don't throw an exception here so people can 351 // skip unsupported entries 352 break; 353 } 354 } 355 } else if (m == ZipMethod.ENHANCED_DEFLATED) { 356 current.in = new Deflate64CompressorInputStream(in); 357 } 358 359 entriesRead++; 360 return current.entry; 361 } 362 363 /** 364 * Fills the given array with the first local file header and 365 * deals with splitting/spanning markers that may prefix the first 366 * LFH. 367 */ 368 private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { 369 readFully(lfh); 370 final ZipLong sig = new ZipLong(lfh); 371 if (sig.equals(ZipLong.DD_SIG)) { 372 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 373 } 374 375 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 376 // The archive is not really split as only one segment was 377 // needed in the end. Just skip over the marker. 378 final byte[] missedLfhBytes = new byte[4]; 379 readFully(missedLfhBytes); 380 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 381 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 382 } 383 } 384 385 /** 386 * Records whether a Zip64 extra is present and sets the size 387 * information from it if sizes are 0xFFFFFFFF and the entry 388 * doesn't use a data descriptor. 389 */ 390 private void processZip64Extra(final ZipLong size, final ZipLong cSize) { 391 final Zip64ExtendedInformationExtraField z64 = 392 (Zip64ExtendedInformationExtraField) 393 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 394 current.usesZip64 = z64 != null; 395 if (!current.hasDataDescriptor) { 396 if (z64 != null // same as current.usesZip64 but avoids NPE warning 397 && (ZipLong.ZIP64_MAGIC.equals(cSize) || ZipLong.ZIP64_MAGIC.equals(size)) ) { 398 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 399 current.entry.setSize(z64.getSize().getLongValue()); 400 } else if (cSize != null && size != null) { 401 current.entry.setCompressedSize(cSize.getValue()); 402 current.entry.setSize(size.getValue()); 403 } 404 } 405 } 406 407 @Override 408 public ArchiveEntry getNextEntry() throws IOException { 409 return getNextZipEntry(); 410 } 411 412 /** 413 * Whether this class is able to read the given entry. 414 * 415 * <p>May return false if it is set up to use encryption or a 416 * compression method that hasn't been implemented yet.</p> 417 * @since 1.1 418 */ 419 @Override 420 public boolean canReadEntryData(final ArchiveEntry ae) { 421 if (ae instanceof ZipArchiveEntry) { 422 final ZipArchiveEntry ze = (ZipArchiveEntry) ae; 423 return ZipUtil.canHandleEntryData(ze) 424 && supportsDataDescriptorFor(ze) 425 && supportsCompressedSizeFor(ze); 426 } 427 return false; 428 } 429 430 @Override 431 public int read(final byte[] buffer, final int offset, final int length) throws IOException { 432 if (closed) { 433 throw new IOException("The stream is closed"); 434 } 435 436 if (current == null) { 437 return -1; 438 } 439 440 // avoid int overflow, check null buffer 441 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 442 throw new ArrayIndexOutOfBoundsException(); 443 } 444 445 ZipUtil.checkRequestedFeatures(current.entry); 446 if (!supportsDataDescriptorFor(current.entry)) { 447 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 448 current.entry); 449 } 450 if (!supportsCompressedSizeFor(current.entry)) { 451 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE, 452 current.entry); 453 } 454 455 int read; 456 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 457 read = readStored(buffer, offset, length); 458 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 459 read = readDeflated(buffer, offset, length); 460 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 461 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 462 || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 463 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 464 read = current.in.read(buffer, offset, length); 465 } else { 466 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 467 current.entry); 468 } 469 470 if (read >= 0) { 471 current.crc.update(buffer, offset, read); 472 uncompressedCount += read; 473 } 474 475 return read; 476 } 477 478 /** 479 * @since 1.17 480 */ 481 @Override 482 public long getCompressedCount() { 483 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 484 return current.bytesRead; 485 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 486 return getBytesInflated(); 487 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 488 return ((UnshrinkingInputStream) current.in).getCompressedCount(); 489 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 490 return ((ExplodingInputStream) current.in).getCompressedCount(); 491 } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) { 492 return ((Deflate64CompressorInputStream) current.in).getCompressedCount(); 493 } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 494 return ((BZip2CompressorInputStream) current.in).getCompressedCount(); 495 } else { 496 return -1; 497 } 498 } 499 500 /** 501 * @since 1.17 502 */ 503 @Override 504 public long getUncompressedCount() { 505 return uncompressedCount; 506 } 507 508 /** 509 * Implementation of read for STORED entries. 510 */ 511 private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { 512 513 if (current.hasDataDescriptor) { 514 if (lastStoredEntry == null) { 515 readStoredEntry(); 516 } 517 return lastStoredEntry.read(buffer, offset, length); 518 } 519 520 final long csize = current.entry.getSize(); 521 if (current.bytesRead >= csize) { 522 return -1; 523 } 524 525 if (buf.position() >= buf.limit()) { 526 ((Buffer)buf).position(0); 527 final int l = in.read(buf.array()); 528 if (l == -1) { 529 ((Buffer)buf).limit(0); 530 throw new IOException("Truncated ZIP file"); 531 } 532 ((Buffer)buf).limit(l); 533 534 count(l); 535 current.bytesReadFromStream += l; 536 } 537 538 int toRead = Math.min(buf.remaining(), length); 539 if ((csize - current.bytesRead) < toRead) { 540 // if it is smaller than toRead then it fits into an int 541 toRead = (int) (csize - current.bytesRead); 542 } 543 buf.get(buffer, offset, toRead); 544 current.bytesRead += toRead; 545 return toRead; 546 } 547 548 /** 549 * Implementation of read for DEFLATED entries. 550 */ 551 private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { 552 final int read = readFromInflater(buffer, offset, length); 553 if (read <= 0) { 554 if (inf.finished()) { 555 return -1; 556 } else if (inf.needsDictionary()) { 557 throw new ZipException("This archive needs a preset dictionary" 558 + " which is not supported by Commons" 559 + " Compress."); 560 } else if (read == -1) { 561 throw new IOException("Truncated ZIP file"); 562 } 563 } 564 return read; 565 } 566 567 /** 568 * Potentially reads more bytes to fill the inflater's buffer and 569 * reads from it. 570 */ 571 private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { 572 int read = 0; 573 do { 574 if (inf.needsInput()) { 575 final int l = fill(); 576 if (l > 0) { 577 current.bytesReadFromStream += buf.limit(); 578 } else if (l == -1) { 579 return -1; 580 } else { 581 break; 582 } 583 } 584 try { 585 read = inf.inflate(buffer, offset, length); 586 } catch (final DataFormatException e) { 587 throw (IOException) new ZipException(e.getMessage()).initCause(e); 588 } 589 } while (read == 0 && inf.needsInput()); 590 return read; 591 } 592 593 @Override 594 public void close() throws IOException { 595 if (!closed) { 596 closed = true; 597 try { 598 in.close(); 599 } finally { 600 inf.end(); 601 } 602 } 603 } 604 605 /** 606 * Skips over and discards value bytes of data from this input 607 * stream. 608 * 609 * <p>This implementation may end up skipping over some smaller 610 * number of bytes, possibly 0, if and only if it reaches the end 611 * of the underlying stream.</p> 612 * 613 * <p>The actual number of bytes skipped is returned.</p> 614 * 615 * @param value the number of bytes to be skipped. 616 * @return the actual number of bytes skipped. 617 * @throws IOException - if an I/O error occurs. 618 * @throws IllegalArgumentException - if value is negative. 619 */ 620 @Override 621 public long skip(final long value) throws IOException { 622 if (value >= 0) { 623 long skipped = 0; 624 while (skipped < value) { 625 final long rem = value - skipped; 626 final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 627 if (x == -1) { 628 return skipped; 629 } 630 skipped += x; 631 } 632 return skipped; 633 } 634 throw new IllegalArgumentException(); 635 } 636 637 /** 638 * Checks if the signature matches what is expected for a zip file. 639 * Does not currently handle self-extracting zips which may have arbitrary 640 * leading content. 641 * 642 * @param signature the bytes to check 643 * @param length the number of bytes to check 644 * @return true, if this stream is a zip archive stream, false otherwise 645 */ 646 public static boolean matches(final byte[] signature, final int length) { 647 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 648 return false; 649 } 650 651 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 652 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 653 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 654 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 655 } 656 657 private static boolean checksig(final byte[] signature, final byte[] expected) { 658 for (int i = 0; i < expected.length; i++) { 659 if (signature[i] != expected[i]) { 660 return false; 661 } 662 } 663 return true; 664 } 665 666 /** 667 * Closes the current ZIP archive entry and positions the underlying 668 * stream to the beginning of the next entry. All per-entry variables 669 * and data structures are cleared. 670 * <p> 671 * If the compressed size of this entry is included in the entry header, 672 * then any outstanding bytes are simply skipped from the underlying 673 * stream without uncompressing them. This allows an entry to be safely 674 * closed even if the compression method is unsupported. 675 * <p> 676 * In case we don't know the compressed size of this entry or have 677 * already buffered too much data from the underlying stream to support 678 * uncompression, then the uncompression process is completed and the 679 * end position of the stream is adjusted based on the result of that 680 * process. 681 * 682 * @throws IOException if an error occurs 683 */ 684 private void closeEntry() throws IOException { 685 if (closed) { 686 throw new IOException("The stream is closed"); 687 } 688 if (current == null) { 689 return; 690 } 691 692 // Ensure all entry bytes are read 693 if (currentEntryHasOutstandingBytes()) { 694 drainCurrentEntryData(); 695 } else { 696 // this is guaranteed to exhaust the stream 697 skip(Long.MAX_VALUE); //NOSONAR 698 699 final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 700 ? getBytesInflated() : current.bytesRead; 701 702 // this is at most a single read() operation and can't 703 // exceed the range of int 704 final int diff = (int) (current.bytesReadFromStream - inB); 705 706 // Pushback any required bytes 707 if (diff > 0) { 708 pushback(buf.array(), buf.limit() - diff, diff); 709 current.bytesReadFromStream -= diff; 710 } 711 712 // Drain remainder of entry if not all data bytes were required 713 if (currentEntryHasOutstandingBytes()) { 714 drainCurrentEntryData(); 715 } 716 } 717 718 if (lastStoredEntry == null && current.hasDataDescriptor) { 719 readDataDescriptor(); 720 } 721 722 inf.reset(); 723 ((Buffer)buf).clear().flip(); 724 current = null; 725 lastStoredEntry = null; 726 } 727 728 /** 729 * If the compressed size of the current entry is included in the entry header 730 * and there are any outstanding bytes in the underlying stream, then 731 * this returns true. 732 * 733 * @return true, if current entry is determined to have outstanding bytes, false otherwise 734 */ 735 private boolean currentEntryHasOutstandingBytes() { 736 return current.bytesReadFromStream <= current.entry.getCompressedSize() 737 && !current.hasDataDescriptor; 738 } 739 740 /** 741 * Read all data of the current entry from the underlying stream 742 * that hasn't been read, yet. 743 */ 744 private void drainCurrentEntryData() throws IOException { 745 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 746 while (remaining > 0) { 747 final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 748 if (n < 0) { 749 throw new EOFException("Truncated ZIP entry: " 750 + ArchiveUtils.sanitize(current.entry.getName())); 751 } 752 count(n); 753 remaining -= n; 754 } 755 } 756 757 /** 758 * Get the number of bytes Inflater has actually processed. 759 * 760 * <p>for Java < Java7 the getBytes* methods in 761 * Inflater/Deflater seem to return unsigned ints rather than 762 * longs that start over with 0 at 2^32.</p> 763 * 764 * <p>The stream knows how many bytes it has read, but not how 765 * many the Inflater actually consumed - it should be between the 766 * total number of bytes read for the entry and the total number 767 * minus the last read operation. Here we just try to make the 768 * value close enough to the bytes we've read by assuming the 769 * number of bytes consumed must be smaller than (or equal to) the 770 * number of bytes read but not smaller by more than 2^32.</p> 771 */ 772 private long getBytesInflated() { 773 long inB = inf.getBytesRead(); 774 if (current.bytesReadFromStream >= TWO_EXP_32) { 775 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 776 inB += TWO_EXP_32; 777 } 778 } 779 return inB; 780 } 781 782 private int fill() throws IOException { 783 if (closed) { 784 throw new IOException("The stream is closed"); 785 } 786 final int length = in.read(buf.array()); 787 if (length > 0) { 788 ((Buffer)buf).limit(length); 789 count(buf.limit()); 790 inf.setInput(buf.array(), 0, buf.limit()); 791 } 792 return length; 793 } 794 795 private void readFully(final byte[] b) throws IOException { 796 readFully(b, 0); 797 } 798 799 private void readFully(final byte[] b, final int off) throws IOException { 800 final int len = b.length - off; 801 final int count = IOUtils.readFully(in, b, off, len); 802 count(count); 803 if (count < len) { 804 throw new EOFException(); 805 } 806 } 807 808 private void readDataDescriptor() throws IOException { 809 readFully(wordBuf); 810 ZipLong val = new ZipLong(wordBuf); 811 if (ZipLong.DD_SIG.equals(val)) { 812 // data descriptor with signature, skip sig 813 readFully(wordBuf); 814 val = new ZipLong(wordBuf); 815 } 816 current.entry.setCrc(val.getValue()); 817 818 // if there is a ZIP64 extra field, sizes are eight bytes 819 // each, otherwise four bytes each. Unfortunately some 820 // implementations - namely Java7 - use eight bytes without 821 // using a ZIP64 extra field - 822 // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 823 824 // just read 16 bytes and check whether bytes nine to twelve 825 // look like one of the signatures of what could follow a data 826 // descriptor (ignoring archive decryption headers for now). 827 // If so, push back eight bytes and assume sizes are four 828 // bytes, otherwise sizes are eight bytes each. 829 readFully(twoDwordBuf); 830 final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD); 831 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 832 pushback(twoDwordBuf, DWORD, DWORD); 833 current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf)); 834 current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD)); 835 } else { 836 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf)); 837 current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD)); 838 } 839 } 840 841 /** 842 * Whether this entry requires a data descriptor this library can work with. 843 * 844 * @return true if allowStoredEntriesWithDataDescriptor is true, 845 * the entry doesn't require any data descriptor or the method is 846 * DEFLATED or ENHANCED_DEFLATED. 847 */ 848 private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { 849 return !entry.getGeneralPurposeBit().usesDataDescriptor() 850 851 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 852 || entry.getMethod() == ZipEntry.DEFLATED 853 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode(); 854 } 855 856 /** 857 * Whether the compressed size for the entry is either known or 858 * not required by the compression method being used. 859 */ 860 private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) { 861 return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN 862 || entry.getMethod() == ZipEntry.DEFLATED 863 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 864 || (entry.getGeneralPurposeBit().usesDataDescriptor() 865 && allowStoredEntriesWithDataDescriptor 866 && entry.getMethod() == ZipEntry.STORED); 867 } 868 869 private static final String USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER = 870 " while reading a stored entry using data descriptor. Either the archive is broken" 871 + " or it can not be read using ZipArchiveInputStream and you must use ZipFile." 872 + " A common cause for this is a ZIP archive containing a ZIP archive." 873 + " See http://commons.apache.org/proper/commons-compress/zip.html#ZipArchiveInputStream_vs_ZipFile"; 874 875 /** 876 * Caches a stored entry that uses the data descriptor. 877 * 878 * <ul> 879 * <li>Reads a stored entry until the signature of a local file 880 * header, central directory header or data descriptor has been 881 * found.</li> 882 * <li>Stores all entry data in lastStoredEntry.</p> 883 * <li>Rewinds the stream to position at the data 884 * descriptor.</li> 885 * <li>reads the data descriptor</li> 886 * </ul> 887 * 888 * <p>After calling this method the entry should know its size, 889 * the entry's data is cached and the stream is positioned at the 890 * next local file or central directory header.</p> 891 */ 892 private void readStoredEntry() throws IOException { 893 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 894 int off = 0; 895 boolean done = false; 896 897 // length of DD without signature 898 final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 899 900 while (!done) { 901 final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 902 if (r <= 0) { 903 // read the whole archive without ever finding a 904 // central directory 905 throw new IOException("Truncated ZIP file"); 906 } 907 if (r + off < 4) { 908 // buffer too small to check for a signature, loop 909 off += r; 910 continue; 911 } 912 913 done = bufferContainsSignature(bos, off, r, ddLen); 914 if (!done) { 915 off = cacheBytesRead(bos, off, r, ddLen); 916 } 917 } 918 if (current.entry.getCompressedSize() != current.entry.getSize()) { 919 throw new ZipException("compressed and uncompressed size don't match" 920 + USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER); 921 } 922 final byte[] b = bos.toByteArray(); 923 if (b.length != current.entry.getSize()) { 924 throw new ZipException("actual and claimed size don't match" 925 + USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER); 926 } 927 lastStoredEntry = new ByteArrayInputStream(b); 928 } 929 930 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 931 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 932 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 933 934 /** 935 * Checks whether the current buffer contains the signature of a 936 * "data descriptor", "local file header" or 937 * "central directory entry". 938 * 939 * <p>If it contains such a signature, reads the data descriptor 940 * and positions the stream right after the data descriptor.</p> 941 */ 942 private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) 943 throws IOException { 944 945 boolean done = false; 946 for (int i = 0; !done && i < offset + lastRead - 4; i++) { 947 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 948 int expectDDPos = i; 949 if (i >= expectedDDLen && 950 (buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 951 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 952 // found a LFH or CFH: 953 expectDDPos = i - expectedDDLen; 954 done = true; 955 } 956 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 957 // found DD: 958 done = true; 959 } 960 if (done) { 961 // * push back bytes read in excess as well as the data 962 // descriptor 963 // * copy the remaining bytes to cache 964 // * read data descriptor 965 pushback(buf.array(), expectDDPos, offset + lastRead - expectDDPos); 966 bos.write(buf.array(), 0, expectDDPos); 967 readDataDescriptor(); 968 } 969 } 970 } 971 return done; 972 } 973 974 /** 975 * If the last read bytes could hold a data descriptor and an 976 * incomplete signature then save the last bytes to the front of 977 * the buffer and cache everything in front of the potential data 978 * descriptor into the given ByteArrayOutputStream. 979 * 980 * <p>Data descriptor plus incomplete signature (3 bytes in the 981 * worst case) can be 20 bytes max.</p> 982 */ 983 private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { 984 final int cacheable = offset + lastRead - expecteDDLen - 3; 985 if (cacheable > 0) { 986 bos.write(buf.array(), 0, cacheable); 987 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 988 offset = expecteDDLen + 3; 989 } else { 990 offset += lastRead; 991 } 992 return offset; 993 } 994 995 private void pushback(final byte[] buf, final int offset, final int length) throws IOException { 996 ((PushbackInputStream) in).unread(buf, offset, length); 997 pushedBackBytes(length); 998 } 999 1000 // End of Central Directory Record 1001 // end of central dir signature WORD 1002 // number of this disk SHORT 1003 // number of the disk with the 1004 // start of the central directory SHORT 1005 // total number of entries in the 1006 // central directory on this disk SHORT 1007 // total number of entries in 1008 // the central directory SHORT 1009 // size of the central directory WORD 1010 // offset of start of central 1011 // directory with respect to 1012 // the starting disk number WORD 1013 // .ZIP file comment length SHORT 1014 // .ZIP file comment up to 64KB 1015 // 1016 1017 /** 1018 * Reads the stream until it find the "End of central directory 1019 * record" and consumes it as well. 1020 */ 1021 private void skipRemainderOfArchive() throws IOException { 1022 // skip over central directory. One LFH has been read too much 1023 // already. The calculation discounts file names and extra 1024 // data so it will be too short. 1025 realSkip((long) entriesRead * CFH_LEN - LFH_LEN); 1026 findEocdRecord(); 1027 realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 1028 readFully(shortBuf); 1029 // file comment 1030 realSkip(ZipShort.getValue(shortBuf)); 1031 } 1032 1033 /** 1034 * Reads forward until the signature of the "End of central 1035 * directory" record is found. 1036 */ 1037 private void findEocdRecord() throws IOException { 1038 int currentByte = -1; 1039 boolean skipReadCall = false; 1040 while (skipReadCall || (currentByte = readOneByte()) > -1) { 1041 skipReadCall = false; 1042 if (!isFirstByteOfEocdSig(currentByte)) { 1043 continue; 1044 } 1045 currentByte = readOneByte(); 1046 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 1047 if (currentByte == -1) { 1048 break; 1049 } 1050 skipReadCall = isFirstByteOfEocdSig(currentByte); 1051 continue; 1052 } 1053 currentByte = readOneByte(); 1054 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 1055 if (currentByte == -1) { 1056 break; 1057 } 1058 skipReadCall = isFirstByteOfEocdSig(currentByte); 1059 continue; 1060 } 1061 currentByte = readOneByte(); 1062 if (currentByte == -1 1063 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 1064 break; 1065 } 1066 skipReadCall = isFirstByteOfEocdSig(currentByte); 1067 } 1068 } 1069 1070 /** 1071 * Skips bytes by reading from the underlying stream rather than 1072 * the (potentially inflating) archive stream - which {@link 1073 * #skip} would do. 1074 * 1075 * Also updates bytes-read counter. 1076 */ 1077 private void realSkip(final long value) throws IOException { 1078 if (value >= 0) { 1079 long skipped = 0; 1080 while (skipped < value) { 1081 final long rem = value - skipped; 1082 final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 1083 if (x == -1) { 1084 return; 1085 } 1086 count(x); 1087 skipped += x; 1088 } 1089 return; 1090 } 1091 throw new IllegalArgumentException(); 1092 } 1093 1094 /** 1095 * Reads bytes by reading from the underlying stream rather than 1096 * the (potentially inflating) archive stream - which {@link #read} would do. 1097 * 1098 * Also updates bytes-read counter. 1099 */ 1100 private int readOneByte() throws IOException { 1101 final int b = in.read(); 1102 if (b != -1) { 1103 count(1); 1104 } 1105 return b; 1106 } 1107 1108 private boolean isFirstByteOfEocdSig(final int b) { 1109 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 1110 } 1111 1112 private static final byte[] APK_SIGNING_BLOCK_MAGIC = new byte[] { 1113 'A', 'P', 'K', ' ', 'S', 'i', 'g', ' ', 'B', 'l', 'o', 'c', 'k', ' ', '4', '2', 1114 }; 1115 private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE); 1116 1117 /** 1118 * Checks whether this might be an APK Signing Block. 1119 * 1120 * <p>Unfortunately the APK signing block does not start with some kind of signature, it rather ends with one. It 1121 * starts with a length, so what we do is parse the suspect length, skip ahead far enough, look for the signature 1122 * and if we've found it, return true.</p> 1123 * 1124 * @param suspectLocalFileHeader the bytes read from the underlying stream in the expectation that they would hold 1125 * the local file header of the next entry. 1126 * 1127 * @return true if this looks like a APK signing block 1128 * 1129 * @see <a href="https://source.android.com/security/apksigning/v2">https://source.android.com/security/apksigning/v2</a> 1130 */ 1131 private boolean isApkSigningBlock(byte[] suspectLocalFileHeader) throws IOException { 1132 // length of block excluding the size field itself 1133 BigInteger len = ZipEightByteInteger.getValue(suspectLocalFileHeader); 1134 // LFH has already been read and all but the first eight bytes contain (part of) the APK signing block, 1135 // also subtract 16 bytes in order to position us at the magic string 1136 BigInteger toSkip = len.add(BigInteger.valueOf(DWORD - suspectLocalFileHeader.length 1137 - (long) APK_SIGNING_BLOCK_MAGIC.length)); 1138 byte[] magic = new byte[APK_SIGNING_BLOCK_MAGIC.length]; 1139 1140 try { 1141 if (toSkip.signum() < 0) { 1142 // suspectLocalFileHeader contains the start of suspect magic string 1143 int off = suspectLocalFileHeader.length + toSkip.intValue(); 1144 // length was shorter than magic length 1145 if (off < DWORD) { 1146 return false; 1147 } 1148 int bytesInBuffer = Math.abs(toSkip.intValue()); 1149 System.arraycopy(suspectLocalFileHeader, off, magic, 0, Math.min(bytesInBuffer, magic.length)); 1150 if (bytesInBuffer < magic.length) { 1151 readFully(magic, bytesInBuffer); 1152 } 1153 } else { 1154 while (toSkip.compareTo(LONG_MAX) > 0) { 1155 realSkip(Long.MAX_VALUE); 1156 toSkip = toSkip.add(LONG_MAX.negate()); 1157 } 1158 realSkip(toSkip.longValue()); 1159 readFully(magic); 1160 } 1161 } catch (EOFException ex) { //NOSONAR 1162 // length was invalid 1163 return false; 1164 } 1165 return Arrays.equals(magic, APK_SIGNING_BLOCK_MAGIC); 1166 } 1167 1168 /** 1169 * Structure collecting information for the entry that is 1170 * currently being read. 1171 */ 1172 private static final class CurrentEntry { 1173 1174 /** 1175 * Current ZIP entry. 1176 */ 1177 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 1178 1179 /** 1180 * Does the entry use a data descriptor? 1181 */ 1182 private boolean hasDataDescriptor; 1183 1184 /** 1185 * Does the entry have a ZIP64 extended information extra field. 1186 */ 1187 private boolean usesZip64; 1188 1189 /** 1190 * Number of bytes of entry content read by the client if the 1191 * entry is STORED. 1192 */ 1193 private long bytesRead; 1194 1195 /** 1196 * Number of bytes of entry content read from the stream. 1197 * 1198 * <p>This may be more than the actual entry's length as some 1199 * stuff gets buffered up and needs to be pushed back when the 1200 * end of the entry has been reached.</p> 1201 */ 1202 private long bytesReadFromStream; 1203 1204 /** 1205 * The checksum calculated as the current entry is read. 1206 */ 1207 private final CRC32 crc = new CRC32(); 1208 1209 /** 1210 * The input stream decompressing the data for shrunk and imploded entries. 1211 */ 1212 private InputStream in; 1213 } 1214 1215 /** 1216 * Bounded input stream adapted from commons-io 1217 */ 1218 private class BoundedInputStream extends InputStream { 1219 1220 /** the wrapped input stream */ 1221 private final InputStream in; 1222 1223 /** the max length to provide */ 1224 private final long max; 1225 1226 /** the number of bytes already returned */ 1227 private long pos = 0; 1228 1229 /** 1230 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1231 * stream and limits it to a certain size. 1232 * 1233 * @param in The wrapped input stream 1234 * @param size The maximum number of bytes to return 1235 */ 1236 public BoundedInputStream(final InputStream in, final long size) { 1237 this.max = size; 1238 this.in = in; 1239 } 1240 1241 @Override 1242 public int read() throws IOException { 1243 if (max >= 0 && pos >= max) { 1244 return -1; 1245 } 1246 final int result = in.read(); 1247 pos++; 1248 count(1); 1249 current.bytesReadFromStream++; 1250 return result; 1251 } 1252 1253 @Override 1254 public int read(final byte[] b) throws IOException { 1255 return this.read(b, 0, b.length); 1256 } 1257 1258 @Override 1259 public int read(final byte[] b, final int off, final int len) throws IOException { 1260 if (max >= 0 && pos >= max) { 1261 return -1; 1262 } 1263 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1264 final int bytesRead = in.read(b, off, (int) maxRead); 1265 1266 if (bytesRead == -1) { 1267 return -1; 1268 } 1269 1270 pos += bytesRead; 1271 count(bytesRead); 1272 current.bytesReadFromStream += bytesRead; 1273 return bytesRead; 1274 } 1275 1276 @Override 1277 public long skip(final long n) throws IOException { 1278 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1279 final long skippedBytes = IOUtils.skip(in, toSkip); 1280 pos += skippedBytes; 1281 return skippedBytes; 1282 } 1283 1284 @Override 1285 public int available() throws IOException { 1286 if (max >= 0 && pos >= max) { 1287 return 0; 1288 } 1289 return in.available(); 1290 } 1291 } 1292}