Bug 66425: Avoid exceptions found via poi-fuzz

Prevent too much memory usage

Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=67413

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1919237 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2024-07-15 05:41:04 +00:00
parent 5085e3d1b2
commit e2044c958b
5 changed files with 126 additions and 155 deletions

View File

@ -138,6 +138,7 @@ public class TestAllFiles {
"spreadsheet/clusterfuzz-testcase-minimized-POIXSSFFuzzer-5089447305609216.xlsx", "spreadsheet/clusterfuzz-testcase-minimized-POIXSSFFuzzer-5089447305609216.xlsx",
"spreadsheet/clusterfuzz-testcase-minimized-POIXSSFFuzzer-5089447305609216.xlsx", "spreadsheet/clusterfuzz-testcase-minimized-POIXSSFFuzzer-5089447305609216.xlsx",
"spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4651309315719168.xls", "spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4651309315719168.xls",
"document/clusterfuzz-testcase-POIHWPFFuzzer-5696094627495936.doc",
}); });
private static final Set<String> EXPECTED_FAILURES = StressTestUtils.unmodifiableHashSet( private static final Set<String> EXPECTED_FAILURES = StressTestUtils.unmodifiableHashSet(

View File

@ -37,6 +37,7 @@
<Logger name="org.apache.poi.xssf.usermodel.XSSFWorkbook" level="ERROR" /> <Logger name="org.apache.poi.xssf.usermodel.XSSFWorkbook" level="ERROR" />
<Logger name="org.apache.poi.hslf.usermodel.HSLFGroupShape" level="WARN" /> <Logger name="org.apache.poi.hslf.usermodel.HSLFGroupShape" level="WARN" />
<Logger name="org.apache.poi.hslf.record.Record" level="ERROR" /> <Logger name="org.apache.poi.hslf.record.Record" level="ERROR" />
<Logger name="org.apache.poi.hwpf.sprm.ParagraphSprmUncompressor" level="FATAL" />
<!-- Change to DEBUG or another level to get log output --> <!-- Change to DEBUG or another level to get log output -->
<Root level="ERROR"> <Root level="ERROR">

View File

@ -40,6 +40,7 @@ import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.Beta; import org.apache.poi.util.Beta;
import org.apache.poi.util.IOUtils;
import org.w3c.dom.Attr; import org.w3c.dom.Attr;
import org.w3c.dom.Element; import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap; import org.w3c.dom.NamedNodeMap;
@ -49,8 +50,7 @@ import org.w3c.dom.NodeList;
import static org.apache.logging.log4j.util.Unbox.box; import static org.apache.logging.log4j.util.Unbox.box;
@Beta @Beta
public class AbstractWordUtils public class AbstractWordUtils {
{
static final String EMPTY = ""; static final String EMPTY = "";
private static final Logger LOG = LogManager.getLogger(AbstractWordUtils.class); private static final Logger LOG = LogManager.getLogger(AbstractWordUtils.class);
@ -58,6 +58,17 @@ public class AbstractWordUtils
public static final float TWIPS_PER_INCH = 1440.0f; public static final float TWIPS_PER_INCH = 1440.0f;
public static final int TWIPS_PER_PT = 20; public static final int TWIPS_PER_PT = 20;
/**
* Limit the amount of main memory which can be used for bullet-information
*
* if this is too small it can be raised via IOUtils.setByteArrayMaxOverride()
*
* the chosen limit is fairly arbitrarily, but should allow almost all valid
* documents to be processed, but should prevent from causing unexpected high
* memory allocation with malicious files.
*/
private static final int MAX_BULLET_BUFFER_SIZE = 1_000_000;
/** /**
* Creates array of all possible cell edges. In HTML (and FO) cells from * Creates array of all possible cell edges. In HTML (and FO) cells from
* different rows and same column should have same width, otherwise spanning * different rows and same column should have same width, otherwise spanning
@ -67,15 +78,12 @@ public class AbstractWordUtils
* table to build cell edges array from * table to build cell edges array from
* @return array of cell edges (including leftest one) in twips * @return array of cell edges (including leftest one) in twips
*/ */
static int[] buildTableCellEdgesArray( Table table ) static int[] buildTableCellEdgesArray( Table table ) {
{
Set<Integer> edges = new TreeSet<>(); Set<Integer> edges = new TreeSet<>();
for ( int r = 0; r < table.numRows(); r++ ) for ( int r = 0; r < table.numRows(); r++ ) {
{
TableRow tableRow = table.getRow( r ); TableRow tableRow = table.getRow( r );
for ( int c = 0; c < tableRow.numCells(); c++ ) for ( int c = 0; c < tableRow.numCells(); c++ ) {
{
TableCell tableCell = tableRow.getCell( c ); TableCell tableCell = tableRow.getCell( c );
edges.add(tableCell.getLeftEdge()); edges.add(tableCell.getLeftEdge());
@ -85,16 +93,14 @@ public class AbstractWordUtils
Integer[] sorted = edges.toArray(new Integer[0]); Integer[] sorted = edges.toArray(new Integer[0]);
int[] result = new int[sorted.length]; int[] result = new int[sorted.length];
for ( int i = 0; i < sorted.length; i++ ) for ( int i = 0; i < sorted.length; i++ ) {
{
result[i] = sorted[i]; result[i] = sorted[i];
} }
return result; return result;
} }
static boolean canBeMerged( Node node1, Node node2, String requiredTagName ) static boolean canBeMerged( Node node1, Node node2, String requiredTagName ) {
{
if ( node1.getNodeType() != Node.ELEMENT_NODE if ( node1.getNodeType() != Node.ELEMENT_NODE
|| node2.getNodeType() != Node.ELEMENT_NODE ) || node2.getNodeType() != Node.ELEMENT_NODE )
return false; return false;
@ -112,8 +118,7 @@ public class AbstractWordUtils
if ( attributes1.getLength() != attributes2.getLength() ) if ( attributes1.getLength() != attributes2.getLength() )
return false; return false;
for ( int i = 0; i < attributes1.getLength(); i++ ) for ( int i = 0; i < attributes1.getLength(); i++ ) {
{
final Attr attr1 = (Attr) attributes1.item( i ); final Attr attr1 = (Attr) attributes1.item( i );
final Attr attr2; final Attr attr2;
if ( isNotEmpty( attr1.getNamespaceURI() ) ) if ( isNotEmpty( attr1.getNamespaceURI() ) )
@ -130,11 +135,9 @@ public class AbstractWordUtils
return true; return true;
} }
static void compactChildNodesR( Element parentElement, String childTagName ) static void compactChildNodesR( Element parentElement, String childTagName ) {
{
NodeList childNodes = parentElement.getChildNodes(); NodeList childNodes = parentElement.getChildNodes();
for ( int i = 0; i < childNodes.getLength() - 1; i++ ) for ( int i = 0; i < childNodes.getLength() - 1; i++ ) {
{
Node child1 = childNodes.item( i ); Node child1 = childNodes.item( i );
Node child2 = childNodes.item( i + 1 ); Node child2 = childNodes.item( i + 1 );
if ( !AbstractWordUtils.canBeMerged( child1, child2, childTagName ) ) if ( !AbstractWordUtils.canBeMerged( child1, child2, childTagName ) )
@ -148,23 +151,19 @@ public class AbstractWordUtils
} }
childNodes = parentElement.getChildNodes(); childNodes = parentElement.getChildNodes();
for ( int i = 0; i < childNodes.getLength() - 1; i++ ) for ( int i = 0; i < childNodes.getLength() - 1; i++ ) {
{
Node child = childNodes.item( i ); Node child = childNodes.item( i );
if ( child instanceof Element ) if ( child instanceof Element ) {
{
compactChildNodesR( (Element) child, childTagName ); compactChildNodesR( (Element) child, childTagName );
} }
} }
} }
public static String getBorderType( BorderCode borderCode ) public static String getBorderType( BorderCode borderCode ) {
{
if ( borderCode == null ) if ( borderCode == null )
throw new IllegalArgumentException( "borderCode is null" ); throw new IllegalArgumentException( "borderCode is null" );
switch ( borderCode.getBorderType() ) switch ( borderCode.getBorderType() ) {
{
case 3: case 3:
case 10: case 10:
case 11: case 11:
@ -199,8 +198,7 @@ public class AbstractWordUtils
} }
} }
public static String getBorderWidth( BorderCode borderCode ) public static String getBorderWidth( BorderCode borderCode ) {
{
int lineWidth = borderCode.getLineWidth(); int lineWidth = borderCode.getLineWidth();
int pt = lineWidth / 8; int pt = lineWidth / 8;
int pte = lineWidth - pt * 8; int pte = lineWidth - pt * 8;
@ -208,47 +206,35 @@ public class AbstractWordUtils
return pt + "." + 1000 / 8 * pte + "pt"; return pt + "." + 1000 / 8 * pte + "pt";
} }
public static class NumberingState public static class NumberingState {
{
private final Map<String, Integer> levels = new HashMap<>(); private final Map<String, Integer> levels = new HashMap<>();
} }
public static String getBulletText( NumberingState numberingState, public static String getBulletText( NumberingState numberingState,
HWPFList list, char level ) HWPFList list, char level ) {
{
StringBuilder bulletBuffer = new StringBuilder(); StringBuilder bulletBuffer = new StringBuilder();
char[] xst = list.getNumberText( level ).toCharArray(); char[] xst = list.getNumberText( level ).toCharArray();
for ( char element : xst ) for ( char element : xst ) {
{ if ( element < 9 ) {
if ( element < 9 )
{
int lsid = list.getLsid(); int lsid = list.getLsid();
final String key = lsid + "#" + ( (int) element ); final String key = lsid + "#" + ( (int) element );
int num; int num;
if ( !list.isStartAtOverridden( element ) if ( !list.isStartAtOverridden( element )
&& numberingState.levels.containsKey( key ) ) && numberingState.levels.containsKey( key ) ) {
{
num = numberingState.levels.get( key ); num = numberingState.levels.get( key );
if ( level == element ) if ( level == element ) {
{
num++; num++;
numberingState.levels.put( key, num ); numberingState.levels.put( key, num );
} }
} } else {
else
{
num = list.getStartAt( element ); num = list.getStartAt( element );
numberingState.levels.put( key, num ); numberingState.levels.put( key, num );
} }
if ( level == element ) if ( level == element ) {
{
// cleaning states of nested levels to reset numbering // cleaning states of nested levels to reset numbering
for ( int i = element + 1; i < 9; i++ ) for ( int i = element + 1; i < 9; i++ ) {
{
final String childKey = lsid + "#" + i; final String childKey = lsid + "#" + i;
numberingState.levels.remove( childKey ); numberingState.levels.remove( childKey );
} }
@ -256,31 +242,32 @@ public class AbstractWordUtils
bulletBuffer.append( NumberFormatter.getNumber( num, bulletBuffer.append( NumberFormatter.getNumber( num,
list.getNumberFormat( level ) ) ); list.getNumberFormat( level ) ) );
} } else {
else
{
bulletBuffer.append( element ); bulletBuffer.append( element );
} }
// ensure this buffer does not grow to much, this should avoid cases where
// this can "explode", i.e. small input file consumes huge amounts of
// main memory
IOUtils.safelyAllocateCheck(bulletBuffer.length(), MAX_BULLET_BUFFER_SIZE);
} }
byte follow = list.getTypeOfCharFollowingTheNumber( level ); byte follow = list.getTypeOfCharFollowingTheNumber( level );
switch ( follow ) switch ( follow ) {
{ case 0:
case 0: bulletBuffer.append( "\t" );
bulletBuffer.append( "\t" ); break;
break; case 1:
case 1: bulletBuffer.append( " " );
bulletBuffer.append( " " ); break;
break; default:
default: break;
break;
} }
return bulletBuffer.toString(); return bulletBuffer.toString();
} }
public static String getColor( int ico ) public static String getColor( int ico ) {
{
switch ( ico ) { switch ( ico ) {
case 2: case 2:
return "blue"; return "blue";
@ -318,8 +305,7 @@ public class AbstractWordUtils
} }
} }
public static String getOpacity( int argbValue ) public static String getOpacity( int argbValue ) {
{
int opacity = (int) ( ( argbValue & 0xFF000000L) >>> 24 ); int opacity = (int) ( ( argbValue & 0xFF000000L) >>> 24 );
if ( opacity == 0 || opacity == 0xFF ) if ( opacity == 0 || opacity == 0xFF )
return ".0"; return ".0";
@ -327,8 +313,7 @@ public class AbstractWordUtils
return "" + ( opacity / (float) 0xFF ); return "" + ( opacity / (float) 0xFF );
} }
public static String getColor24( int argbValue ) public static String getColor24( int argbValue ) {
{
if ( argbValue == -1 ) if ( argbValue == -1 )
throw new IllegalArgumentException( "This colorref is empty" ); throw new IllegalArgumentException( "This colorref is empty" );
@ -337,96 +322,88 @@ public class AbstractWordUtils
| ( bgrValue & 0xFF0000 ) >> 16; | ( bgrValue & 0xFF0000 ) >> 16;
// http://www.w3.org/TR/REC-html40/types.html#h-6.5 // http://www.w3.org/TR/REC-html40/types.html#h-6.5
switch ( rgbValue ) switch ( rgbValue ) {
{ case 0xFFFFFF:
case 0xFFFFFF: return "white";
return "white"; case 0xC0C0C0:
case 0xC0C0C0: return "silver";
return "silver"; case 0x808080:
case 0x808080: return "gray";
return "gray"; case 0x000000:
case 0x000000: return "black";
return "black"; case 0xFF0000:
case 0xFF0000: return "red";
return "red"; case 0x800000:
case 0x800000: return "maroon";
return "maroon"; case 0xFFFF00:
case 0xFFFF00: return "yellow";
return "yellow"; case 0x808000:
case 0x808000: return "olive";
return "olive"; case 0x00FF00:
case 0x00FF00: return "lime";
return "lime"; case 0x008000:
case 0x008000: return "green";
return "green"; case 0x00FFFF:
case 0x00FFFF: return "aqua";
return "aqua"; case 0x008080:
case 0x008080: return "teal";
return "teal"; case 0x0000FF:
case 0x0000FF: return "blue";
return "blue"; case 0x000080:
case 0x000080: return "navy";
return "navy"; case 0xFF00FF:
case 0xFF00FF: return "fuchsia";
return "fuchsia"; case 0x800080:
case 0x800080: return "purple";
return "purple";
} }
StringBuilder result = new StringBuilder( "#" ); StringBuilder result = new StringBuilder( "#" );
String hex = Integer.toHexString( rgbValue ); String hex = Integer.toHexString( rgbValue );
for ( int i = hex.length(); i < 6; i++ ) for ( int i = hex.length(); i < 6; i++ ) {
{
result.append( '0' ); result.append( '0' );
} }
result.append( hex ); result.append( hex );
return result.toString(); return result.toString();
} }
public static String getJustification( int js ) public static String getJustification( int js ) {
{ switch ( js ) {
switch ( js ) case 0:
{ case 7:
case 0: return "start";
case 7: case 1:
return "start"; case 5:
case 1: return "center";
case 5: case 2:
return "center"; case 8:
case 2: return "end";
case 8: case 3:
return "end"; case 4:
case 3: case 9:
case 4: return "justify";
case 9: case 6:
return "justify"; return "left";
case 6:
return "left";
} }
return ""; return "";
} }
public static String getLanguage( int languageCode ) public static String getLanguage( int languageCode ) {
{ switch ( languageCode ) {
switch ( languageCode ) case 1024:
{ return EMPTY;
case 1024: case 1033:
return EMPTY; return "en-us";
case 1033: case 1049:
return "en-us"; return "ru-ru";
case 1049: case 2057:
return "ru-ru"; return "en-uk";
case 2057: default:
return "en-uk"; LOG.atWarn().log("Unknown or unmapped language code: {}", box(languageCode));
default: return EMPTY;
LOG.atWarn().log("Unknown or unmapped language code: {}", box(languageCode));
return EMPTY;
} }
} }
public static String getListItemNumberLabel( int number, int format ) public static String getListItemNumberLabel( int number, int format ) {
{
if ( format != 0 ) if ( format != 0 )
LOG.atInfo().log("NYI: toListItemNumberLabel(): {}", box(format)); LOG.atInfo().log("NYI: toListItemNumberLabel(): {}", box(format));
@ -444,35 +421,27 @@ public class AbstractWordUtils
} }
public static HWPFDocumentCore loadDoc( final DirectoryNode root ) public static HWPFDocumentCore loadDoc( final DirectoryNode root )
throws IOException throws IOException {
{ try {
try
{
return new HWPFDocument( root ); return new HWPFDocument( root );
} } catch ( OldWordFileFormatException exc ) {
catch ( OldWordFileFormatException exc )
{
return new HWPFOldDocument( root ); return new HWPFOldDocument( root );
} }
} }
public static HWPFDocumentCore loadDoc( File docFile ) throws IOException public static HWPFDocumentCore loadDoc( File docFile ) throws IOException {
{
try (InputStream istream = Files.newInputStream(docFile.toPath())) { try (InputStream istream = Files.newInputStream(docFile.toPath())) {
return loadDoc(istream); return loadDoc(istream);
} }
} }
public static HWPFDocumentCore loadDoc( InputStream inputStream ) public static HWPFDocumentCore loadDoc( InputStream inputStream )
throws IOException throws IOException {
{
return loadDoc( HWPFDocumentCore.verifyAndBuildPOIFS( inputStream ) ); return loadDoc( HWPFDocumentCore.verifyAndBuildPOIFS( inputStream ) );
} }
public static HWPFDocumentCore loadDoc( public static HWPFDocumentCore loadDoc(
final POIFSFileSystem poifsFileSystem ) throws IOException final POIFSFileSystem poifsFileSystem ) throws IOException {
{
return loadDoc( poifsFileSystem.getRoot() ); return loadDoc( poifsFileSystem.getRoot() );
} }
} }

Binary file not shown.