Bug 66425: Avoid exceptions found via poi-fuzz

Processing formats uses regular expressions. Very complex formats
can recurse very deeply and thus can cause StackOVerflows depending
on the used stack-size.

In order to handle this a bit more gracefully, we now catch this
and report a better exception with details about the parsed 
format and potential mitigation.

Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=66137

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1919342 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2024-07-18 07:09:32 +00:00
parent 0dac5680c3
commit 0dea4a301c
6 changed files with 34 additions and 12 deletions

View File

@ -136,6 +136,7 @@ public class TestAllFiles {
"spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4977868385681408.xls", "spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4977868385681408.xls",
"spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4651309315719168.xls", "spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4651309315719168.xls",
"document/clusterfuzz-testcase-POIHWPFFuzzer-5696094627495936.doc", "document/clusterfuzz-testcase-POIHWPFFuzzer-5696094627495936.doc",
"spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls"
}); });
private static final Set<String> EXPECTED_FAILURES = StressTestUtils.unmodifiableHashSet( private static final Set<String> EXPECTED_FAILURES = StressTestUtils.unmodifiableHashSet(

View File

@ -46,7 +46,8 @@ public class TestExcelConverterSuite {
// not failing, but requires more memory // not failing, but requires more memory
"ex45698-22488.xls", "ex45698-22488.xls",
// broken documents // broken documents
"clusterfuzz-testcase-minimized-POIHSSFFuzzer-5436547081830400.xls" "clusterfuzz-testcase-minimized-POIHSSFFuzzer-5436547081830400.xls",
"clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls"
); );
public static Stream<Arguments> files() { public static Stream<Arguments> files() {

View File

@ -28,7 +28,6 @@ import java.util.regex.Pattern;
import javax.swing.JLabel; import javax.swing.JLabel;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Cell;
@ -182,19 +181,28 @@ public class CellFormat {
Matcher m = ONE_PART.matcher(format); Matcher m = ONE_PART.matcher(format);
List<CellFormatPart> parts = new ArrayList<>(); List<CellFormatPart> parts = new ArrayList<>();
while (m.find()) { try {
try { while (m.find()) {
String valueDesc = m.group(); try {
String valueDesc = m.group();
// Strip out the semicolon if it's there // Strip out the semicolon if it's there
if (valueDesc.endsWith(";")) if (valueDesc.endsWith(";"))
valueDesc = valueDesc.substring(0, valueDesc.length() - 1); valueDesc = valueDesc.substring(0, valueDesc.length() - 1);
parts.add(new CellFormatPart(locale, valueDesc)); parts.add(new CellFormatPart(locale, valueDesc));
} catch (RuntimeException e) { } catch (RuntimeException e) {
LOG.warn("Invalid format: {}", CellFormatter.quote(m.group()), e); LOG.warn("Invalid format: {}", CellFormatter.quote(m.group()), e);
parts.add(null); parts.add(null);
}
} }
} catch (StackOverflowError e) {
// very complex formats can cause the regex-parsing to exceed the available stack
// we want to handle this more gracefully by catching it and reporting a bit more
// details in the error message
throw new IllegalStateException("The provided format is too complex: " + format +
", you can try to increase Java Stack size via commandline argument '-Xss' " +
"to allow handling this format");
} }
formatPartCount = parts.size(); formatPartCount = parts.size();

View File

@ -382,4 +382,16 @@ final class TestExcelExtractor {
assertContains(txt, "Macro2"); assertContains(txt, "Macro2");
} }
} }
@Test
void testStackOverflowInRegex() throws IOException {
try (ExcelExtractor extractor = createExtractor("clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls")) {
extractor.getText();
} catch (IllegalStateException e) {
// we either get a StackOverflow or a parsing error depending on the stack-size of the current JVM,
// so we expect both here
assertTrue(e.getMessage().contains("Provided formula is too complex") ||
e.getMessage().contains("Did not have a ExtendedFormatRecord"));
}
}
} }

Binary file not shown.