mirror of https://github.com/apache/poi.git
some zips can't be opened via ZipFile in JDK6, as the central directory
contains either non-latin entries or the compression type can't be handled the workaround is to iterate over the stream and not the directory git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1736933 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
033580e1b3
commit
1e65636048
|
@ -18,6 +18,7 @@
|
||||||
package org.apache.poi.openxml4j.opc;
|
package org.apache.poi.openxml4j.opc;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
|
@ -88,6 +89,7 @@ public final class ZipPackage extends Package {
|
||||||
*/
|
*/
|
||||||
ZipPackage(InputStream in, PackageAccess access) throws IOException {
|
ZipPackage(InputStream in, PackageAccess access) throws IOException {
|
||||||
super(access);
|
super(access);
|
||||||
|
@SuppressWarnings("resource")
|
||||||
ThresholdInputStream zis = ZipHelper.openZipStream(in);
|
ThresholdInputStream zis = ZipHelper.openZipStream(in);
|
||||||
this.zipArchive = new ZipInputStreamZipEntrySource(zis);
|
this.zipArchive = new ZipInputStreamZipEntrySource(zis);
|
||||||
}
|
}
|
||||||
|
@ -101,18 +103,7 @@ public final class ZipPackage extends Package {
|
||||||
* The package access mode.
|
* The package access mode.
|
||||||
*/
|
*/
|
||||||
ZipPackage(String path, PackageAccess access) {
|
ZipPackage(String path, PackageAccess access) {
|
||||||
super(access);
|
this(new File(path), access);
|
||||||
|
|
||||||
final ZipFile zipFile;
|
|
||||||
|
|
||||||
try {
|
|
||||||
zipFile = ZipHelper.openZipFile(path);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new InvalidOperationException(
|
|
||||||
"Can't open the specified file: '" + path + "'", e);
|
|
||||||
}
|
|
||||||
|
|
||||||
this.zipArchive = new ZipFileZipEntrySource(zipFile);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -123,19 +114,33 @@ public final class ZipPackage extends Package {
|
||||||
* @param access
|
* @param access
|
||||||
* The package access mode.
|
* The package access mode.
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("resource")
|
||||||
ZipPackage(File file, PackageAccess access) {
|
ZipPackage(File file, PackageAccess access) {
|
||||||
super(access);
|
super(access);
|
||||||
|
|
||||||
final ZipFile zipFile;
|
ZipEntrySource ze;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
zipFile = ZipHelper.openZipFile(file);
|
final ZipFile zipFile = ZipHelper.openZipFile(file);
|
||||||
|
ze = new ZipFileZipEntrySource(zipFile);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new InvalidOperationException(
|
// probably not happening with write access - not sure how to handle the default read-write access ...
|
||||||
"Can't open the specified file: '" + file + "'", e);
|
if (access == PackageAccess.WRITE) {
|
||||||
|
throw new InvalidOperationException("Can't open the specified file: '" + file + "'", e);
|
||||||
}
|
}
|
||||||
|
logger.log(POILogger.ERROR, "Error in zip file "+file+" - falling back to stream processing (i.e. ignoring zip central directory)");
|
||||||
this.zipArchive = new ZipFileZipEntrySource(zipFile);
|
// some zips can't be opened via ZipFile in JDK6, as the central directory
|
||||||
|
// contains either non-latin entries or the compression type can't be handled
|
||||||
|
// the workaround is to iterate over the stream and not the directory
|
||||||
|
FileInputStream fis;
|
||||||
|
try {
|
||||||
|
fis = new FileInputStream(file);
|
||||||
|
ThresholdInputStream zis = ZipHelper.openZipStream(fis);
|
||||||
|
ze = new ZipInputStreamZipEntrySource(zis);
|
||||||
|
} catch (IOException e2) {
|
||||||
|
throw new InvalidOperationException("Can't open the specified file: '" + file + "'", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.zipArchive = ze;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -221,6 +221,7 @@ public final class ZipHelper {
|
||||||
* The stream to open.
|
* The stream to open.
|
||||||
* @return The zip stream freshly open.
|
* @return The zip stream freshly open.
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("resource")
|
||||||
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
|
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
|
||||||
// Peek at the first few bytes to sanity check
|
// Peek at the first few bytes to sanity check
|
||||||
InputStream checkedStream = prepareToCheckHeader(stream);
|
InputStream checkedStream = prepareToCheckHeader(stream);
|
||||||
|
@ -228,8 +229,7 @@ public final class ZipHelper {
|
||||||
|
|
||||||
// Open as a proper zip stream
|
// Open as a proper zip stream
|
||||||
InputStream zis = new ZipInputStream(checkedStream);
|
InputStream zis = new ZipInputStream(checkedStream);
|
||||||
ThresholdInputStream tis = ZipSecureFile.addThreshold(zis);
|
return ZipSecureFile.addThreshold(zis);
|
||||||
return tis;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -262,8 +262,6 @@ public final class ZipHelper {
|
||||||
* @return The zip archive freshly open.
|
* @return The zip archive freshly open.
|
||||||
*/
|
*/
|
||||||
public static ZipFile openZipFile(String path) throws IOException {
|
public static ZipFile openZipFile(String path) throws IOException {
|
||||||
File f = new File(path);
|
return openZipFile(new File(path));
|
||||||
|
|
||||||
return openZipFile(f);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.poi.POIOLE2TextExtractor;
|
||||||
import org.apache.poi.POITextExtractor;
|
import org.apache.poi.POITextExtractor;
|
||||||
import org.apache.poi.POIXMLException;
|
import org.apache.poi.POIXMLException;
|
||||||
import org.apache.poi.POIXMLTextExtractor;
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
|
import org.apache.poi.UnsupportedFileFormatException;
|
||||||
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
||||||
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
|
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
|
||||||
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
||||||
|
@ -643,10 +644,7 @@ public class TestExtractorFactory {
|
||||||
public void testPackage() throws Exception {
|
public void testPackage() throws Exception {
|
||||||
// Excel
|
// Excel
|
||||||
POIXMLTextExtractor extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
|
POIXMLTextExtractor extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
|
||||||
assertTrue(
|
assertTrue(extractor instanceof XSSFExcelExtractor);
|
||||||
extractor
|
|
||||||
instanceof XSSFExcelExtractor
|
|
||||||
);
|
|
||||||
extractor.close();
|
extractor.close();
|
||||||
extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
|
extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
|
||||||
assertTrue(extractor.getText().length() > 200);
|
assertTrue(extractor.getText().length() > 200);
|
||||||
|
@ -654,48 +652,33 @@ public class TestExtractorFactory {
|
||||||
|
|
||||||
// Word
|
// Word
|
||||||
extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
|
extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
|
||||||
assertTrue(
|
assertTrue(extractor instanceof XWPFWordExtractor);
|
||||||
extractor
|
|
||||||
instanceof XWPFWordExtractor
|
|
||||||
);
|
|
||||||
extractor.close();
|
extractor.close();
|
||||||
|
|
||||||
extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
|
extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
|
||||||
assertTrue(
|
assertTrue(extractor.getText().length() > 120);
|
||||||
extractor.getText().length() > 120
|
|
||||||
);
|
|
||||||
extractor.close();
|
extractor.close();
|
||||||
|
|
||||||
// PowerPoint
|
// PowerPoint
|
||||||
extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
|
extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
|
||||||
assertTrue(
|
assertTrue(extractor instanceof XSLFPowerPointExtractor);
|
||||||
extractor
|
|
||||||
instanceof XSLFPowerPointExtractor
|
|
||||||
);
|
|
||||||
extractor.close();
|
extractor.close();
|
||||||
|
|
||||||
extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
|
extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
|
||||||
assertTrue(
|
assertTrue(extractor.getText().length() > 120);
|
||||||
extractor.getText().length() > 120
|
|
||||||
);
|
|
||||||
extractor.close();
|
extractor.close();
|
||||||
|
|
||||||
// Visio
|
// Visio
|
||||||
extractor = ExtractorFactory.createExtractor(OPCPackage.open(vsdx.toString()));
|
extractor = ExtractorFactory.createExtractor(OPCPackage.open(vsdx.toString()));
|
||||||
assertTrue(
|
assertTrue(extractor instanceof XDGFVisioExtractor);
|
||||||
extractor
|
assertTrue(extractor.getText().length() > 20);
|
||||||
instanceof XDGFVisioExtractor
|
|
||||||
);
|
|
||||||
assertTrue(
|
|
||||||
extractor.getText().length() > 20
|
|
||||||
);
|
|
||||||
extractor.close();
|
extractor.close();
|
||||||
|
|
||||||
// Text
|
// Text
|
||||||
try {
|
try {
|
||||||
ExtractorFactory.createExtractor(OPCPackage.open(txt.toString()));
|
ExtractorFactory.createExtractor(OPCPackage.open(txt.toString()));
|
||||||
fail();
|
fail();
|
||||||
} catch(InvalidOperationException e) {
|
} catch(UnsupportedFileFormatException e) {
|
||||||
// Good
|
// Good
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,8 +41,6 @@ import java.util.List;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.util.zip.ZipEntry;
|
import java.util.zip.ZipEntry;
|
||||||
import java.util.zip.ZipError;
|
|
||||||
import java.util.zip.ZipException;
|
|
||||||
import java.util.zip.ZipFile;
|
import java.util.zip.ZipFile;
|
||||||
import java.util.zip.ZipOutputStream;
|
import java.util.zip.ZipOutputStream;
|
||||||
|
|
||||||
|
@ -50,6 +48,7 @@ import org.apache.poi.EncryptedDocumentException;
|
||||||
import org.apache.poi.POIDataSamples;
|
import org.apache.poi.POIDataSamples;
|
||||||
import org.apache.poi.POITestCase;
|
import org.apache.poi.POITestCase;
|
||||||
import org.apache.poi.POIXMLException;
|
import org.apache.poi.POIXMLException;
|
||||||
|
import org.apache.poi.UnsupportedFileFormatException;
|
||||||
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
|
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
|
||||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||||
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
|
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
|
||||||
|
@ -744,7 +743,7 @@ public final class TestPackage {
|
||||||
try {
|
try {
|
||||||
OPCPackage.open(files.getFile("SampleSS.txt"));
|
OPCPackage.open(files.getFile("SampleSS.txt"));
|
||||||
fail("Shouldn't be able to open Plain Text");
|
fail("Shouldn't be able to open Plain Text");
|
||||||
} catch (InvalidOperationException e) {
|
} catch (UnsupportedFileFormatException e) {
|
||||||
// Unhelpful low-level error, sorry
|
// Unhelpful low-level error, sorry
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStreamWriter;
|
import java.io.OutputStreamWriter;
|
||||||
|
@ -35,6 +36,8 @@ import org.apache.poi.POIXMLException;
|
||||||
import org.apache.poi.extractor.ExtractorFactory;
|
import org.apache.poi.extractor.ExtractorFactory;
|
||||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||||
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
|
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
|
||||||
|
import org.apache.poi.sl.usermodel.SlideShow;
|
||||||
|
import org.apache.poi.sl.usermodel.SlideShowFactory;
|
||||||
import org.apache.poi.ss.usermodel.Workbook;
|
import org.apache.poi.ss.usermodel.Workbook;
|
||||||
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
||||||
import org.apache.poi.xssf.XSSFTestDataSamples;
|
import org.apache.poi.xssf.XSSFTestDataSamples;
|
||||||
|
@ -164,4 +167,11 @@ public class TestZipPackage {
|
||||||
ExtractorFactory.setThreadPrefersEventExtractors(before);
|
ExtractorFactory.setThreadPrefersEventExtractors(before);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void unparseableCentralDirectory() throws IOException {
|
||||||
|
File f = OpenXML4JTestDataSamples.getSampleFile("at.pzp.www_uploads_media_PP_Scheinecker-jdk6error.pptx");
|
||||||
|
SlideShow<?,?> ppt = SlideShowFactory.create(f);
|
||||||
|
ppt.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue