[bug-65581] support configurable temp file threshold

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1893421 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
PJ Fanning 2021-09-18 20:06:25 +00:00
parent 944ea414cd
commit 8d7af95fed
4 changed files with 119 additions and 11 deletions

View File

@ -17,37 +17,82 @@
package org.apache.poi.openxml4j.util; package org.apache.poi.openxml4j.util;
import java.io.IOException; import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream; import java.io.InputStream;
import java.io.IOException;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream; import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.apache.poi.util.TempFile;
/** /**
* So we can close the real zip entry and still * So we can close the real zip entry and still
* effectively work with it. * effectively work with it.
* Holds the (decompressed!) data in memory, so * Holds the (decompressed!) data in memory (or since POI 5.1.0, possibly in a temp file), so
* close this as soon as you can! * close this as soon as you can!
* @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int)
*/ */
/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry { /* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry implements Closeable {
private final byte[] data; private static Logger LOG = LogManager.getLogger(ZipArchiveFakeEntry.class);
private byte[] data;
private File tempFile;
ZipArchiveFakeEntry(ZipArchiveEntry entry, InputStream inp) throws IOException { ZipArchiveFakeEntry(ZipArchiveEntry entry, InputStream inp) throws IOException {
super(entry.getName()); super(entry.getName());
final long entrySize = entry.getSize(); final long entrySize = entry.getSize();
if (entrySize < -1 || entrySize>=Integer.MAX_VALUE) { final int threshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles();
if (threshold >= 0 && entrySize >= threshold) {
tempFile = TempFile.createTempFile("poi-zip-entry", ".tmp");
LOG.atInfo().log("created for temp file {} for zip entry {} of size {} bytes",
tempFile.getAbsolutePath(), entry.getName(), entrySize);
IOUtils.copy(inp, tempFile);
} else {
if (entrySize < -1 || entrySize >= Integer.MAX_VALUE) {
throw new IOException("ZIP entry size is too large or invalid"); throw new IOException("ZIP entry size is too large or invalid");
} }
// Grab the de-compressed contents for later // Grab the de-compressed contents for later
data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize); data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize);
} }
}
/**
* Returns zip entry.
* @return input stream
* @throws RuntimeException since POI 5.1.0,
* a RuntimeException can occur if the optional temp file has been removed
* @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int)
*/
public InputStream getInputStream() { public InputStream getInputStream() {
if (tempFile != null) {
try {
return new FileInputStream(tempFile);
} catch (FileNotFoundException e) {
throw new RuntimeException("temp file " + tempFile.getAbsolutePath() + " is missing");
}
} else {
return new UnsynchronizedByteArrayInputStream(data); return new UnsynchronizedByteArrayInputStream(data);
} }
}
/**
* Deletes any temp files and releases any byte arrays.
* @throws IOException
* @since POI 5.1.0
*/
@Override
public void close() throws IOException {
data = null;
if (tempFile != null) {
tempFile.delete();
}
}
} }

View File

@ -34,15 +34,40 @@ import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
* done, to free up that memory! * done, to free up that memory!
*/ */
public class ZipInputStreamZipEntrySource implements ZipEntrySource { public class ZipInputStreamZipEntrySource implements ZipEntrySource {
private static int thresholdForTempFiles = -1;
private final Map<String, ZipArchiveFakeEntry> zipEntries = new HashMap<>(); private final Map<String, ZipArchiveFakeEntry> zipEntries = new HashMap<>();
private InputStream streamToClose; private InputStream streamToClose;
/**
* Set the threshold at which it a zip entry is regarded as too large for holding in memory
* and the data is put in a temp file instead
* @param thresholdBytes number of bytes at which a zip entry is regarded as too large for holding in memory
* and the data is put in a temp file instead - defaults to -1 meaning temp files are not used
* and that zip entries with more than 2GB of data after decompressing will fail, 0 means all
* zip entries are stored in temp files. A threshold like 50000000 (approx 50Mb is recommended)
* @since POI 5.1.0
*/
public static void setThresholdBytesForTempFiles(int thresholdBytes) {
thresholdForTempFiles = thresholdBytes;
}
/**
* Get the threshold at which it a zip entry is regarded as too large for holding in memory
* and the data is put in a temp file instead (defaults to -1 meaning temp files are not used)
* @return threshold in bytes
* @since POI 5.1.0
*/
public static int getThresholdBytesForTempFiles() {
return thresholdForTempFiles;
}
/** /**
* Reads all the entries from the ZipInputStream * Reads all the entries from the ZipInputStream
* into memory, and don't close (since POI 4.0.1) the source stream. * into memory, and don't close (since POI 4.0.1) the source stream.
* We'll then eat lots of memory, but be able to * We'll then eat lots of memory, but be able to
* work with the entries at-will. * work with the entries at-will.
* @see #setThresholdBytesForTempFiles
*/ */
public ZipInputStreamZipEntrySource(ZipArchiveThresholdInputStream inp) throws IOException { public ZipInputStreamZipEntrySource(ZipArchiveThresholdInputStream inp) throws IOException {
for (;;) { for (;;) {
@ -69,6 +94,10 @@ public class ZipInputStreamZipEntrySource implements ZipEntrySource {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
for (ZipArchiveFakeEntry entry : zipEntries.values()) {
entry.close();
}
// Free the memory // Free the memory
zipEntries.clear(); zipEntries.clear();

View File

@ -53,6 +53,7 @@ import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.openxml4j.opc.internal.FileHelper; import org.apache.poi.openxml4j.opc.internal.FileHelper;
import org.apache.poi.openxml4j.opc.internal.MemoryPackagePart; import org.apache.poi.openxml4j.opc.internal.MemoryPackagePart;
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
import org.apache.poi.openxml4j.util.ZipInputStreamZipEntrySource;
import org.apache.poi.ss.tests.usermodel.BaseTestXWorkbook; import org.apache.poi.ss.tests.usermodel.BaseTestXWorkbook;
import org.apache.poi.ss.usermodel.*; import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.usermodel.Row.MissingCellPolicy; import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
@ -167,6 +168,26 @@ public final class TestXSSFWorkbook extends BaseTestXWorkbook {
} }
} }
@Test
void existingWithZipEntryTempFiles() throws Exception {
int defaultThreshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles();
ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(100);
try (XSSFWorkbook workbook = openSampleWorkbook("Formatting.xlsx");
OPCPackage pkg = OPCPackage.open(openSampleFileStream("Formatting.xlsx"))) {
assertNotNull(workbook.getSharedStringSource());
assertNotNull(workbook.getStylesSource());
// And check a few low level bits too
PackagePart wbPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/workbook.xml"));
// Links to the three sheets, shared, styles and themes
assertTrue(wbPart.hasRelationships());
assertEquals(6, wbPart.getRelationships().size());
} finally {
ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(defaultThreshold);
}
}
@Test @Test
void getCellStyleAt() throws IOException{ void getCellStyleAt() throws IOException{
try (XSSFWorkbook workbook = new XSSFWorkbook()) { try (XSSFWorkbook workbook = new XSSFWorkbook()) {

View File

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN">
<Appenders>
<Console name="Console" target="SYSTEM_OUT">
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
</Console>
</Appenders>
<Loggers>
<Root level="info">
<AppenderRef ref="Console"/>
</Root>
</Loggers>
</Configuration>