mirror of https://github.com/apache/poi.git
[bug-65581] support configurable temp file threshold
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1893421 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
944ea414cd
commit
8d7af95fed
|
@ -17,37 +17,82 @@
|
|||
|
||||
package org.apache.poi.openxml4j.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
|
||||
import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
|
||||
import org.apache.poi.util.TempFile;
|
||||
|
||||
/**
|
||||
* So we can close the real zip entry and still
|
||||
* effectively work with it.
|
||||
* Holds the (decompressed!) data in memory, so
|
||||
* Holds the (decompressed!) data in memory (or since POI 5.1.0, possibly in a temp file), so
|
||||
* close this as soon as you can!
|
||||
* @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int)
|
||||
*/
|
||||
/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry {
|
||||
private final byte[] data;
|
||||
/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry implements Closeable {
|
||||
private static Logger LOG = LogManager.getLogger(ZipArchiveFakeEntry.class);
|
||||
private byte[] data;
|
||||
private File tempFile;
|
||||
|
||||
ZipArchiveFakeEntry(ZipArchiveEntry entry, InputStream inp) throws IOException {
|
||||
super(entry.getName());
|
||||
|
||||
final long entrySize = entry.getSize();
|
||||
|
||||
if (entrySize < -1 || entrySize>=Integer.MAX_VALUE) {
|
||||
throw new IOException("ZIP entry size is too large or invalid");
|
||||
}
|
||||
final int threshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles();
|
||||
if (threshold >= 0 && entrySize >= threshold) {
|
||||
tempFile = TempFile.createTempFile("poi-zip-entry", ".tmp");
|
||||
LOG.atInfo().log("created for temp file {} for zip entry {} of size {} bytes",
|
||||
tempFile.getAbsolutePath(), entry.getName(), entrySize);
|
||||
IOUtils.copy(inp, tempFile);
|
||||
} else {
|
||||
if (entrySize < -1 || entrySize >= Integer.MAX_VALUE) {
|
||||
throw new IOException("ZIP entry size is too large or invalid");
|
||||
}
|
||||
|
||||
// Grab the de-compressed contents for later
|
||||
data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize);
|
||||
// Grab the de-compressed contents for later
|
||||
data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns zip entry.
|
||||
* @return input stream
|
||||
* @throws RuntimeException since POI 5.1.0,
|
||||
* a RuntimeException can occur if the optional temp file has been removed
|
||||
* @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int)
|
||||
*/
|
||||
public InputStream getInputStream() {
|
||||
return new UnsynchronizedByteArrayInputStream(data);
|
||||
if (tempFile != null) {
|
||||
try {
|
||||
return new FileInputStream(tempFile);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new RuntimeException("temp file " + tempFile.getAbsolutePath() + " is missing");
|
||||
}
|
||||
} else {
|
||||
return new UnsynchronizedByteArrayInputStream(data);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes any temp files and releases any byte arrays.
|
||||
* @throws IOException
|
||||
* @since POI 5.1.0
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
data = null;
|
||||
if (tempFile != null) {
|
||||
tempFile.delete();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,15 +34,40 @@ import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
|
|||
* done, to free up that memory!
|
||||
*/
|
||||
public class ZipInputStreamZipEntrySource implements ZipEntrySource {
|
||||
private static int thresholdForTempFiles = -1;
|
||||
private final Map<String, ZipArchiveFakeEntry> zipEntries = new HashMap<>();
|
||||
|
||||
private InputStream streamToClose;
|
||||
|
||||
/**
|
||||
* Set the threshold at which it a zip entry is regarded as too large for holding in memory
|
||||
* and the data is put in a temp file instead
|
||||
* @param thresholdBytes number of bytes at which a zip entry is regarded as too large for holding in memory
|
||||
* and the data is put in a temp file instead - defaults to -1 meaning temp files are not used
|
||||
* and that zip entries with more than 2GB of data after decompressing will fail, 0 means all
|
||||
* zip entries are stored in temp files. A threshold like 50000000 (approx 50Mb is recommended)
|
||||
* @since POI 5.1.0
|
||||
*/
|
||||
public static void setThresholdBytesForTempFiles(int thresholdBytes) {
|
||||
thresholdForTempFiles = thresholdBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the threshold at which it a zip entry is regarded as too large for holding in memory
|
||||
* and the data is put in a temp file instead (defaults to -1 meaning temp files are not used)
|
||||
* @return threshold in bytes
|
||||
* @since POI 5.1.0
|
||||
*/
|
||||
public static int getThresholdBytesForTempFiles() {
|
||||
return thresholdForTempFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all the entries from the ZipInputStream
|
||||
* into memory, and don't close (since POI 4.0.1) the source stream.
|
||||
* We'll then eat lots of memory, but be able to
|
||||
* work with the entries at-will.
|
||||
* @see #setThresholdBytesForTempFiles
|
||||
*/
|
||||
public ZipInputStreamZipEntrySource(ZipArchiveThresholdInputStream inp) throws IOException {
|
||||
for (;;) {
|
||||
|
@ -69,6 +94,10 @@ public class ZipInputStreamZipEntrySource implements ZipEntrySource {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
for (ZipArchiveFakeEntry entry : zipEntries.values()) {
|
||||
entry.close();
|
||||
}
|
||||
|
||||
// Free the memory
|
||||
zipEntries.clear();
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@ import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
|||
import org.apache.poi.openxml4j.opc.internal.FileHelper;
|
||||
import org.apache.poi.openxml4j.opc.internal.MemoryPackagePart;
|
||||
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
|
||||
import org.apache.poi.openxml4j.util.ZipInputStreamZipEntrySource;
|
||||
import org.apache.poi.ss.tests.usermodel.BaseTestXWorkbook;
|
||||
import org.apache.poi.ss.usermodel.*;
|
||||
import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
|
||||
|
@ -167,6 +168,26 @@ public final class TestXSSFWorkbook extends BaseTestXWorkbook {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void existingWithZipEntryTempFiles() throws Exception {
|
||||
int defaultThreshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles();
|
||||
ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(100);
|
||||
try (XSSFWorkbook workbook = openSampleWorkbook("Formatting.xlsx");
|
||||
OPCPackage pkg = OPCPackage.open(openSampleFileStream("Formatting.xlsx"))) {
|
||||
assertNotNull(workbook.getSharedStringSource());
|
||||
assertNotNull(workbook.getStylesSource());
|
||||
|
||||
// And check a few low level bits too
|
||||
PackagePart wbPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/workbook.xml"));
|
||||
|
||||
// Links to the three sheets, shared, styles and themes
|
||||
assertTrue(wbPart.hasRelationships());
|
||||
assertEquals(6, wbPart.getRelationships().size());
|
||||
} finally {
|
||||
ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(defaultThreshold);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void getCellStyleAt() throws IOException{
|
||||
try (XSSFWorkbook workbook = new XSSFWorkbook()) {
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Configuration status="WARN">
|
||||
<Appenders>
|
||||
<Console name="Console" target="SYSTEM_OUT">
|
||||
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
|
||||
</Console>
|
||||
</Appenders>
|
||||
<Loggers>
|
||||
<Root level="info">
|
||||
<AppenderRef ref="Console"/>
|
||||
</Root>
|
||||
</Loggers>
|
||||
</Configuration>
|
Loading…
Reference in New Issue