some zips can't be opened via ZipFile in JDK6, as the central directory

contains either non-latin entries or the compression type can't be handled
the workaround is to iterate over the stream and not the directory


git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1736933 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2016-03-28 22:49:45 +00:00
parent 033580e1b3
commit 1e65636048
6 changed files with 48 additions and 53 deletions

View File

@ -18,6 +18,7 @@
package org.apache.poi.openxml4j.opc; package org.apache.poi.openxml4j.opc;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
@ -88,6 +89,7 @@ public final class ZipPackage extends Package {
*/ */
ZipPackage(InputStream in, PackageAccess access) throws IOException { ZipPackage(InputStream in, PackageAccess access) throws IOException {
super(access); super(access);
@SuppressWarnings("resource")
ThresholdInputStream zis = ZipHelper.openZipStream(in); ThresholdInputStream zis = ZipHelper.openZipStream(in);
this.zipArchive = new ZipInputStreamZipEntrySource(zis); this.zipArchive = new ZipInputStreamZipEntrySource(zis);
} }
@ -101,18 +103,7 @@ public final class ZipPackage extends Package {
* The package access mode. * The package access mode.
*/ */
ZipPackage(String path, PackageAccess access) { ZipPackage(String path, PackageAccess access) {
super(access); this(new File(path), access);
final ZipFile zipFile;
try {
zipFile = ZipHelper.openZipFile(path);
} catch (IOException e) {
throw new InvalidOperationException(
"Can't open the specified file: '" + path + "'", e);
}
this.zipArchive = new ZipFileZipEntrySource(zipFile);
} }
/** /**
@ -123,19 +114,33 @@ public final class ZipPackage extends Package {
* @param access * @param access
* The package access mode. * The package access mode.
*/ */
@SuppressWarnings("resource")
ZipPackage(File file, PackageAccess access) { ZipPackage(File file, PackageAccess access) {
super(access); super(access);
final ZipFile zipFile; ZipEntrySource ze;
try { try {
zipFile = ZipHelper.openZipFile(file); final ZipFile zipFile = ZipHelper.openZipFile(file);
ze = new ZipFileZipEntrySource(zipFile);
} catch (IOException e) { } catch (IOException e) {
throw new InvalidOperationException( // probably not happening with write access - not sure how to handle the default read-write access ...
"Can't open the specified file: '" + file + "'", e); if (access == PackageAccess.WRITE) {
throw new InvalidOperationException("Can't open the specified file: '" + file + "'", e);
} }
logger.log(POILogger.ERROR, "Error in zip file "+file+" - falling back to stream processing (i.e. ignoring zip central directory)");
this.zipArchive = new ZipFileZipEntrySource(zipFile); // some zips can't be opened via ZipFile in JDK6, as the central directory
// contains either non-latin entries or the compression type can't be handled
// the workaround is to iterate over the stream and not the directory
FileInputStream fis;
try {
fis = new FileInputStream(file);
ThresholdInputStream zis = ZipHelper.openZipStream(fis);
ze = new ZipInputStreamZipEntrySource(zis);
} catch (IOException e2) {
throw new InvalidOperationException("Can't open the specified file: '" + file + "'", e);
}
}
this.zipArchive = ze;
} }
/** /**

View File

@ -221,6 +221,7 @@ public final class ZipHelper {
* The stream to open. * The stream to open.
* @return The zip stream freshly open. * @return The zip stream freshly open.
*/ */
@SuppressWarnings("resource")
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException { public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
// Peek at the first few bytes to sanity check // Peek at the first few bytes to sanity check
InputStream checkedStream = prepareToCheckHeader(stream); InputStream checkedStream = prepareToCheckHeader(stream);
@ -228,8 +229,7 @@ public final class ZipHelper {
// Open as a proper zip stream // Open as a proper zip stream
InputStream zis = new ZipInputStream(checkedStream); InputStream zis = new ZipInputStream(checkedStream);
ThresholdInputStream tis = ZipSecureFile.addThreshold(zis); return ZipSecureFile.addThreshold(zis);
return tis;
} }
/** /**
@ -262,8 +262,6 @@ public final class ZipHelper {
* @return The zip archive freshly open. * @return The zip archive freshly open.
*/ */
public static ZipFile openZipFile(String path) throws IOException { public static ZipFile openZipFile(String path) throws IOException {
File f = new File(path); return openZipFile(new File(path));
return openZipFile(f);
} }
} }

View File

@ -32,6 +32,7 @@ import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.POITextExtractor; import org.apache.poi.POITextExtractor;
import org.apache.poi.POIXMLException; import org.apache.poi.POIXMLException;
import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.UnsupportedFileFormatException;
import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor; import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
import org.apache.poi.hslf.extractor.PowerPointExtractor; import org.apache.poi.hslf.extractor.PowerPointExtractor;
@ -643,10 +644,7 @@ public class TestExtractorFactory {
public void testPackage() throws Exception { public void testPackage() throws Exception {
// Excel // Excel
POIXMLTextExtractor extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ)); POIXMLTextExtractor extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
assertTrue( assertTrue(extractor instanceof XSSFExcelExtractor);
extractor
instanceof XSSFExcelExtractor
);
extractor.close(); extractor.close();
extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString())); extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
assertTrue(extractor.getText().length() > 200); assertTrue(extractor.getText().length() > 200);
@ -654,48 +652,33 @@ public class TestExtractorFactory {
// Word // Word
extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString())); extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
assertTrue( assertTrue(extractor instanceof XWPFWordExtractor);
extractor
instanceof XWPFWordExtractor
);
extractor.close(); extractor.close();
extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString())); extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
assertTrue( assertTrue(extractor.getText().length() > 120);
extractor.getText().length() > 120
);
extractor.close(); extractor.close();
// PowerPoint // PowerPoint
extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString())); extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
assertTrue( assertTrue(extractor instanceof XSLFPowerPointExtractor);
extractor
instanceof XSLFPowerPointExtractor
);
extractor.close(); extractor.close();
extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString())); extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
assertTrue( assertTrue(extractor.getText().length() > 120);
extractor.getText().length() > 120
);
extractor.close(); extractor.close();
// Visio // Visio
extractor = ExtractorFactory.createExtractor(OPCPackage.open(vsdx.toString())); extractor = ExtractorFactory.createExtractor(OPCPackage.open(vsdx.toString()));
assertTrue( assertTrue(extractor instanceof XDGFVisioExtractor);
extractor assertTrue(extractor.getText().length() > 20);
instanceof XDGFVisioExtractor
);
assertTrue(
extractor.getText().length() > 20
);
extractor.close(); extractor.close();
// Text // Text
try { try {
ExtractorFactory.createExtractor(OPCPackage.open(txt.toString())); ExtractorFactory.createExtractor(OPCPackage.open(txt.toString()));
fail(); fail();
} catch(InvalidOperationException e) { } catch(UnsupportedFileFormatException e) {
// Good // Good
} }
} }

View File

@ -41,8 +41,6 @@ import java.util.List;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.zip.ZipEntry; import java.util.zip.ZipEntry;
import java.util.zip.ZipError;
import java.util.zip.ZipException;
import java.util.zip.ZipFile; import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream; import java.util.zip.ZipOutputStream;
@ -50,6 +48,7 @@ import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.POITestCase; import org.apache.poi.POITestCase;
import org.apache.poi.POIXMLException; import org.apache.poi.POIXMLException;
import org.apache.poi.UnsupportedFileFormatException;
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException; import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
@ -744,7 +743,7 @@ public final class TestPackage {
try { try {
OPCPackage.open(files.getFile("SampleSS.txt")); OPCPackage.open(files.getFile("SampleSS.txt"));
fail("Shouldn't be able to open Plain Text"); fail("Shouldn't be able to open Plain Text");
} catch (InvalidOperationException e) { } catch (UnsupportedFileFormatException e) {
// Unhelpful low-level error, sorry // Unhelpful low-level error, sorry
} }
} }

View File

@ -24,6 +24,7 @@ import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail; import static org.junit.Assert.fail;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStreamWriter; import java.io.OutputStreamWriter;
@ -35,6 +36,8 @@ import org.apache.poi.POIXMLException;
import org.apache.poi.extractor.ExtractorFactory; import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
import org.apache.poi.sl.usermodel.SlideShow;
import org.apache.poi.sl.usermodel.SlideShowFactory;
import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory; import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xssf.XSSFTestDataSamples; import org.apache.poi.xssf.XSSFTestDataSamples;
@ -164,4 +167,11 @@ public class TestZipPackage {
ExtractorFactory.setThreadPrefersEventExtractors(before); ExtractorFactory.setThreadPrefersEventExtractors(before);
} }
} }
@Test
public void unparseableCentralDirectory() throws IOException {
File f = OpenXML4JTestDataSamples.getSampleFile("at.pzp.www_uploads_media_PP_Scheinecker-jdk6error.pptx");
SlideShow<?,?> ppt = SlideShowFactory.create(f);
ppt.close();
}
} }