mirror of https://github.com/apache/poi.git
[bug-65581] support configurable temp file threshold
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1893421 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
944ea414cd
commit
8d7af95fed
|
@ -17,37 +17,82 @@
|
||||||
|
|
||||||
package org.apache.poi.openxml4j.util;
|
package org.apache.poi.openxml4j.util;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.Closeable;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
|
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
|
||||||
import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
|
import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
|
||||||
|
import org.apache.logging.log4j.LogManager;
|
||||||
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.apache.poi.util.IOUtils;
|
import org.apache.poi.util.IOUtils;
|
||||||
|
import org.apache.poi.util.TempFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* So we can close the real zip entry and still
|
* So we can close the real zip entry and still
|
||||||
* effectively work with it.
|
* effectively work with it.
|
||||||
* Holds the (decompressed!) data in memory, so
|
* Holds the (decompressed!) data in memory (or since POI 5.1.0, possibly in a temp file), so
|
||||||
* close this as soon as you can!
|
* close this as soon as you can!
|
||||||
|
* @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int)
|
||||||
*/
|
*/
|
||||||
/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry {
|
/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry implements Closeable {
|
||||||
private final byte[] data;
|
private static Logger LOG = LogManager.getLogger(ZipArchiveFakeEntry.class);
|
||||||
|
private byte[] data;
|
||||||
|
private File tempFile;
|
||||||
|
|
||||||
ZipArchiveFakeEntry(ZipArchiveEntry entry, InputStream inp) throws IOException {
|
ZipArchiveFakeEntry(ZipArchiveEntry entry, InputStream inp) throws IOException {
|
||||||
super(entry.getName());
|
super(entry.getName());
|
||||||
|
|
||||||
final long entrySize = entry.getSize();
|
final long entrySize = entry.getSize();
|
||||||
|
|
||||||
if (entrySize < -1 || entrySize>=Integer.MAX_VALUE) {
|
final int threshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles();
|
||||||
throw new IOException("ZIP entry size is too large or invalid");
|
if (threshold >= 0 && entrySize >= threshold) {
|
||||||
}
|
tempFile = TempFile.createTempFile("poi-zip-entry", ".tmp");
|
||||||
|
LOG.atInfo().log("created for temp file {} for zip entry {} of size {} bytes",
|
||||||
|
tempFile.getAbsolutePath(), entry.getName(), entrySize);
|
||||||
|
IOUtils.copy(inp, tempFile);
|
||||||
|
} else {
|
||||||
|
if (entrySize < -1 || entrySize >= Integer.MAX_VALUE) {
|
||||||
|
throw new IOException("ZIP entry size is too large or invalid");
|
||||||
|
}
|
||||||
|
|
||||||
// Grab the de-compressed contents for later
|
// Grab the de-compressed contents for later
|
||||||
data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize);
|
data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns zip entry.
|
||||||
|
* @return input stream
|
||||||
|
* @throws RuntimeException since POI 5.1.0,
|
||||||
|
* a RuntimeException can occur if the optional temp file has been removed
|
||||||
|
* @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int)
|
||||||
|
*/
|
||||||
public InputStream getInputStream() {
|
public InputStream getInputStream() {
|
||||||
return new UnsynchronizedByteArrayInputStream(data);
|
if (tempFile != null) {
|
||||||
|
try {
|
||||||
|
return new FileInputStream(tempFile);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new RuntimeException("temp file " + tempFile.getAbsolutePath() + " is missing");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return new UnsynchronizedByteArrayInputStream(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deletes any temp files and releases any byte arrays.
|
||||||
|
* @throws IOException
|
||||||
|
* @since POI 5.1.0
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
data = null;
|
||||||
|
if (tempFile != null) {
|
||||||
|
tempFile.delete();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,15 +34,40 @@ import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
|
||||||
* done, to free up that memory!
|
* done, to free up that memory!
|
||||||
*/
|
*/
|
||||||
public class ZipInputStreamZipEntrySource implements ZipEntrySource {
|
public class ZipInputStreamZipEntrySource implements ZipEntrySource {
|
||||||
|
private static int thresholdForTempFiles = -1;
|
||||||
private final Map<String, ZipArchiveFakeEntry> zipEntries = new HashMap<>();
|
private final Map<String, ZipArchiveFakeEntry> zipEntries = new HashMap<>();
|
||||||
|
|
||||||
private InputStream streamToClose;
|
private InputStream streamToClose;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the threshold at which it a zip entry is regarded as too large for holding in memory
|
||||||
|
* and the data is put in a temp file instead
|
||||||
|
* @param thresholdBytes number of bytes at which a zip entry is regarded as too large for holding in memory
|
||||||
|
* and the data is put in a temp file instead - defaults to -1 meaning temp files are not used
|
||||||
|
* and that zip entries with more than 2GB of data after decompressing will fail, 0 means all
|
||||||
|
* zip entries are stored in temp files. A threshold like 50000000 (approx 50Mb is recommended)
|
||||||
|
* @since POI 5.1.0
|
||||||
|
*/
|
||||||
|
public static void setThresholdBytesForTempFiles(int thresholdBytes) {
|
||||||
|
thresholdForTempFiles = thresholdBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the threshold at which it a zip entry is regarded as too large for holding in memory
|
||||||
|
* and the data is put in a temp file instead (defaults to -1 meaning temp files are not used)
|
||||||
|
* @return threshold in bytes
|
||||||
|
* @since POI 5.1.0
|
||||||
|
*/
|
||||||
|
public static int getThresholdBytesForTempFiles() {
|
||||||
|
return thresholdForTempFiles;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads all the entries from the ZipInputStream
|
* Reads all the entries from the ZipInputStream
|
||||||
* into memory, and don't close (since POI 4.0.1) the source stream.
|
* into memory, and don't close (since POI 4.0.1) the source stream.
|
||||||
* We'll then eat lots of memory, but be able to
|
* We'll then eat lots of memory, but be able to
|
||||||
* work with the entries at-will.
|
* work with the entries at-will.
|
||||||
|
* @see #setThresholdBytesForTempFiles
|
||||||
*/
|
*/
|
||||||
public ZipInputStreamZipEntrySource(ZipArchiveThresholdInputStream inp) throws IOException {
|
public ZipInputStreamZipEntrySource(ZipArchiveThresholdInputStream inp) throws IOException {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
@ -69,6 +94,10 @@ public class ZipInputStreamZipEntrySource implements ZipEntrySource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
for (ZipArchiveFakeEntry entry : zipEntries.values()) {
|
||||||
|
entry.close();
|
||||||
|
}
|
||||||
|
|
||||||
// Free the memory
|
// Free the memory
|
||||||
zipEntries.clear();
|
zipEntries.clear();
|
||||||
|
|
||||||
|
|
|
@ -53,6 +53,7 @@ import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
||||||
import org.apache.poi.openxml4j.opc.internal.FileHelper;
|
import org.apache.poi.openxml4j.opc.internal.FileHelper;
|
||||||
import org.apache.poi.openxml4j.opc.internal.MemoryPackagePart;
|
import org.apache.poi.openxml4j.opc.internal.MemoryPackagePart;
|
||||||
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
|
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
|
||||||
|
import org.apache.poi.openxml4j.util.ZipInputStreamZipEntrySource;
|
||||||
import org.apache.poi.ss.tests.usermodel.BaseTestXWorkbook;
|
import org.apache.poi.ss.tests.usermodel.BaseTestXWorkbook;
|
||||||
import org.apache.poi.ss.usermodel.*;
|
import org.apache.poi.ss.usermodel.*;
|
||||||
import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
|
import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
|
||||||
|
@ -167,6 +168,26 @@ public final class TestXSSFWorkbook extends BaseTestXWorkbook {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void existingWithZipEntryTempFiles() throws Exception {
|
||||||
|
int defaultThreshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles();
|
||||||
|
ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(100);
|
||||||
|
try (XSSFWorkbook workbook = openSampleWorkbook("Formatting.xlsx");
|
||||||
|
OPCPackage pkg = OPCPackage.open(openSampleFileStream("Formatting.xlsx"))) {
|
||||||
|
assertNotNull(workbook.getSharedStringSource());
|
||||||
|
assertNotNull(workbook.getStylesSource());
|
||||||
|
|
||||||
|
// And check a few low level bits too
|
||||||
|
PackagePart wbPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/workbook.xml"));
|
||||||
|
|
||||||
|
// Links to the three sheets, shared, styles and themes
|
||||||
|
assertTrue(wbPart.hasRelationships());
|
||||||
|
assertEquals(6, wbPart.getRelationships().size());
|
||||||
|
} finally {
|
||||||
|
ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(defaultThreshold);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void getCellStyleAt() throws IOException{
|
void getCellStyleAt() throws IOException{
|
||||||
try (XSSFWorkbook workbook = new XSSFWorkbook()) {
|
try (XSSFWorkbook workbook = new XSSFWorkbook()) {
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<Configuration status="WARN">
|
||||||
|
<Appenders>
|
||||||
|
<Console name="Console" target="SYSTEM_OUT">
|
||||||
|
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
|
||||||
|
</Console>
|
||||||
|
</Appenders>
|
||||||
|
<Loggers>
|
||||||
|
<Root level="info">
|
||||||
|
<AppenderRef ref="Console"/>
|
||||||
|
</Root>
|
||||||
|
</Loggers>
|
||||||
|
</Configuration>
|
Loading…
Reference in New Issue