From a7ce117bb5620d998e644680b3995c684e240db2 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Tue, 5 Jul 2011 14:13:27 +0000 Subject: [PATCH] rewrite table bounds detection for Word 97, including inner table support git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143070 13f79535-47bb-0310-9956-ffa450edef68 --- .../hwpf/converter/AbstractWordConverter.java | 8 + .../apache/poi/hwpf/usermodel/Paragraph.java | 4 +- .../org/apache/poi/hwpf/usermodel/Range.java | 64 ++-- .../org/apache/poi/hwpf/usermodel/Table.java | 8 +- .../apache/poi/hwpf/usermodel/TableCell.java | 18 +- .../apache/poi/hwpf/usermodel/TableRow.java | 299 ++++++++++-------- .../converter/TestWordToConverterSuite.java | 3 +- .../converter/TestWordToHtmlConverter.java | 7 +- .../apache/poi/hwpf/usermodel/TestRange.java | 4 +- 9 files changed, 240 insertions(+), 175 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java index 31641cffce..9ac5633c1d 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java @@ -241,6 +241,14 @@ public abstract class AbstractWordConverter if ( paragraph.isInTable() && paragraph.getTableLevel() != currentTableLevel ) { + if ( paragraph.getTableLevel() < currentTableLevel ) + throw new IllegalStateException( + "Trying to process table cell with higher level (" + + paragraph.getTableLevel() + + ") than current table level (" + + currentTableLevel + + ") as inner table part" ); + Table table = range.getTable( paragraph ); processTable( wordDocument, flow, table ); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java index a56e2f0adf..35b71966cc 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java @@ -84,9 +84,9 @@ public class Paragraph extends Range implements Cloneable { protected ParagraphProperties _props; protected SprmBuffer _papx; - protected Paragraph(int startIdx, int endIdx, Table parent) + protected Paragraph(int startIdxInclusive, int endIdxExclusive, Table parent) { - super(startIdx, endIdx, Range.TYPE_PARAGRAPH, parent); + super(startIdxInclusive, endIdxExclusive, Range.TYPE_PARAGRAPH, parent); PAPX papx = _paragraphs.get(_parEnd - 1); _props = papx.getParagraphProperties(_doc.getStyleSheet()); _papx = papx.getSprmBuf(); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java index 6b738d318b..a764524d09 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java @@ -90,10 +90,10 @@ public class Range { // TODO -instantiable superclass /** All paragraphs that belong to the document this Range belongs to. */ protected List _paragraphs; - /** The start index in the paragraphs list for this Range */ + /** The start index in the paragraphs list for this Range, inclusive */ protected int _parStart; - /** The end index in the paragraphs list for this Range. */ + /** The end index in the paragraphs list for this Range, exclusive */ protected int _parEnd; /** Have we loaded the characterRun indexes yet. */ @@ -178,9 +178,9 @@ public class Range { // TODO -instantiable superclass * lists. * * @param startIdx - * The starting index in the list. + * The starting index in the list, inclusive * @param endIdx - * The ending index in the list. + * The ending index in the list, exclusive * @param idxType * The list type. * @param parent @@ -199,27 +199,27 @@ public class Range { // TODO -instantiable superclass _parStart = parent._parStart + startIdx; _parEnd = parent._parStart + endIdx; _start = _paragraphs.get(_parStart).getStart(); - _end = _paragraphs.get(_parEnd).getEnd(); + _end = _paragraphs.get(_parEnd - 1).getEnd(); _parRangeFound = true; break; case TYPE_CHARACTER: _charStart = parent._charStart + startIdx; _charEnd = parent._charStart + endIdx; - _start = _characters.get(_charStart).getStart(); + _start = _characters.get(_charStart - 1).getStart(); _end = _characters.get(_charEnd).getEnd(); _charRangeFound = true; break; case TYPE_SECTION: _sectionStart = parent._sectionStart + startIdx; _sectionEnd = parent._sectionStart + endIdx; - _start = _sections.get(_sectionStart).getStart(); + _start = _sections.get(_sectionStart - 1).getStart(); _end = _sections.get(_sectionEnd).getEnd(); _sectionRangeFound = true; break; case TYPE_TEXT: _textStart = parent._textStart + startIdx; _textEnd = parent._textStart + endIdx; - _start = _text.get(_textStart).getStart(); + _start = _text.get(_textStart - 1).getStart(); _end = _text.get(_textEnd).getEnd(); _textRangeFound = true; break; @@ -832,7 +832,13 @@ public class Range { // TODO -instantiable superclass */ public Paragraph getParagraph(int index) { - initParagraphs(); + initParagraphs(); + + if ( index + _parStart >= _parEnd ) + throw new IndexOutOfBoundsException( "Paragraph #" + index + " (" + + (index + _parStart) + ") not in range [" + _parStart + + "; " + _parEnd + ")" ); + PAPX papx = _paragraphs.get(index + _parStart); ParagraphProperties props = papx.getParagraphProperties(_doc.getStyleSheet()); @@ -880,7 +886,7 @@ public class Range { // TODO -instantiable superclass r.initAll(); int tableLevel = paragraph.getTableLevel(); - int tableEnd = r._parEnd; + int tableEndInclusive = r._parEnd ; if ( r._parStart != 0 ) { @@ -895,25 +901,31 @@ public class Range { // TODO -instantiable superclass } } + final Range overallrange = getDocument() instanceof HWPFDocument ? ((HWPFDocument) getDocument()) + .getOverallRange() : getDocument().getRange(); int limit = _paragraphs.size(); - for ( ; tableEnd < limit; tableEnd++ ) + for ( ; tableEndInclusive < limit - 1; tableEndInclusive++ ) { - Paragraph next = new Paragraph( _paragraphs.get( tableEnd ), this ); + Paragraph next = new Paragraph( _paragraphs.get( tableEndInclusive + 1 ), + overallrange ); if ( !next.isInTable() || next.getTableLevel() < tableLevel ) break; } - initAll(); - if (tableEnd > _parEnd) { - throw new ArrayIndexOutOfBoundsException( - "The table's bounds fall outside of this Range"); - } - if (tableEnd < 0) { - throw new ArrayIndexOutOfBoundsException( - "The table's end is negative, which isn't allowed!"); - } - return new Table(r._parStart, tableEnd, r._doc.getRange(), paragraph.getTableLevel()); - } + initAll(); + if ( tableEndInclusive + 1 > _parEnd ) + { + throw new ArrayIndexOutOfBoundsException( + "The table's bounds fall outside of this Range" ); + } + if ( tableEndInclusive < 0 ) + { + throw new ArrayIndexOutOfBoundsException( + "The table's end is negative, which isn't allowed!" ); + } + return new Table( r._parStart, tableEndInclusive + 1, r._doc.getRange(), + paragraph.getTableLevel() ); + } /** * loads all of the list indexes. @@ -989,7 +1001,11 @@ public class Range { // TODO -instantiable superclass */ private int[] findRange(List rpl, int min, int start, int end) { int x = min; - PropertyNode node = rpl.get(x); + + if ( rpl.size() == min ) + return new int[] { min, min }; + + PropertyNode node = rpl.get( x ); while (node==null || (node.getEnd() <= start && x < rpl.size() - 1)) { x++; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Table.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Table.java index f676afd1d4..6988766212 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Table.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Table.java @@ -25,9 +25,9 @@ public final class Table extends Range private int _tableLevel; - Table( int startIdx, int endIdx, Range parent, int levelNum ) + Table( int startIdxInclusive, int endIdxExclusive, Range parent, int levelNum ) { - super( startIdx, endIdx, Range.TYPE_PARAGRAPH, parent ); + super( startIdxInclusive, endIdxExclusive, Range.TYPE_PARAGRAPH, parent ); _rows = new ArrayList(); _tableLevel = levelNum; @@ -41,8 +41,8 @@ public final class Table extends Range rowEnd++; if ( p.isTableRowEnd() && p.getTableLevel() == levelNum ) { - _rows.add( new TableRow( rowStart, rowEnd, this, levelNum ) ); - rowStart = rowEnd; + _rows.add( new TableRow( rowStart, rowEnd + 1, this, levelNum ) ); + rowStart = rowEnd + 1; } } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableCell.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableCell.java index ec8425c3e6..e319307bf7 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableCell.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableCell.java @@ -25,14 +25,16 @@ public final class TableCell private int _leftEdge; private int _width; - public TableCell(int startIdx, int endIdx, TableRow parent, int levelNum, TableCellDescriptor tcd, int leftEdge, int width) - { - super(startIdx, endIdx, Range.TYPE_PARAGRAPH, parent); - _tcd = tcd; - _leftEdge = leftEdge; - _width = width; - _levelNum = levelNum; - } + public TableCell( int startIdxInclusive, int endIdxExclusive, + TableRow parent, int levelNum, TableCellDescriptor tcd, + int leftEdge, int width ) + { + super( startIdxInclusive, endIdxExclusive, Range.TYPE_PARAGRAPH, parent ); + _tcd = tcd; + _leftEdge = leftEdge; + _width = width; + _levelNum = levelNum; + } public boolean isFirstMerged() { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java index 015c6141b2..52d81cafcd 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java @@ -17,155 +17,198 @@ package org.apache.poi.hwpf.usermodel; +import java.util.ArrayList; +import java.util.List; + import org.apache.poi.hwpf.sprm.TableSprmUncompressor; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; -public final class TableRow - extends Paragraph +public final class TableRow extends Paragraph { - private final static char TABLE_CELL_MARK = '\u0007'; + private final static char TABLE_CELL_MARK = '\u0007'; - private final static short SPRM_TJC = 0x5400; - private final static short SPRM_DXAGAPHALF = (short)0x9602; - private final static short SPRM_FCANTSPLIT = 0x3403; - private final static short SPRM_FTABLEHEADER = 0x3404; - private final static short SPRM_DYAROWHEIGHT = (short)0x9407; + private final static short SPRM_TJC = 0x5400; + private final static short SPRM_DXAGAPHALF = (short) 0x9602; + private final static short SPRM_FCANTSPLIT = 0x3403; + private final static short SPRM_FTABLEHEADER = 0x3404; + private final static short SPRM_DYAROWHEIGHT = (short) 0x9407; - int _levelNum; - private TableProperties _tprops; - private TableCell[] _cells; + private static final POILogger logger = POILogFactory + .getLogger( TableRow.class ); - public TableRow(int startIdx, int endIdx, Table parent, int levelNum) - { - super(startIdx, endIdx, parent); + int _levelNum; + private TableProperties _tprops; + private TableCell[] _cells; - _tprops = TableSprmUncompressor.uncompressTAP(_papx.toByteArray(), 2); - _levelNum = levelNum; - _cells = new TableCell[_tprops.getItcMac()]; - - int start = 0; - int end = 0; - - for (int cellIndex = 0; cellIndex < _cells.length; cellIndex++) + public TableRow( int startIdxInclusive, int endIdxExclusive, Table parent, + int levelNum ) { - Paragraph p = getParagraph(start); - String s = p.text(); + super( startIdxInclusive, endIdxExclusive, parent ); - while (! ( s.length() > 0 && (s.charAt(s.length() - 1) == TABLE_CELL_MARK) || - p.isEmbeddedCellMark() && p.getTableLevel() == levelNum) && end < endIdx) - { - end++; - p = getParagraph(end); - s = p.text(); - } + _tprops = TableSprmUncompressor.uncompressTAP( _papx.toByteArray(), 2 ); + _levelNum = levelNum; + final short expectedCellsCount = _tprops.getItcMac(); - // Create it for the correct paragraph range - _cells[cellIndex] = new TableCell(start, end, this, levelNum, - _tprops.getRgtc()[cellIndex], - _tprops.getRgdxaCenter()[cellIndex], - _tprops.getRgdxaCenter()[cellIndex+1]-_tprops.getRgdxaCenter()[cellIndex]); - // Now we've decided where everything is, tweak the - // record of the paragraph end so that the - // paragraph level counts work - // This is a bit hacky, we really need a better fix... - _cells[cellIndex]._parEnd++; - - // Next! - end++; - start = end; + int lastCellStart = 0; + List cells = new ArrayList( expectedCellsCount ); + for ( int p = 0; p < (endIdxExclusive - startIdxInclusive); p++ ) + { + Paragraph paragraph = getParagraph( p ); + String s = paragraph.text(); + + if ( s.length() > 0 + && s.charAt( s.length() - 1 ) == TABLE_CELL_MARK + && paragraph.getTableLevel() == levelNum ) + { + TableCellDescriptor tableCellDescriptor = _tprops.getRgtc() != null + && _tprops.getRgtc().length > cells.size() ? _tprops + .getRgtc()[cells.size()] : new TableCellDescriptor(); + final short leftEdge = _tprops.getRgdxaCenter() != null + && _tprops.getRgdxaCenter().length > cells.size() ? _tprops + .getRgdxaCenter()[cells.size()] : 0; + final short rightEdge = _tprops.getRgdxaCenter() != null + && _tprops.getRgdxaCenter().length > cells.size() + 1 ? _tprops + .getRgdxaCenter()[cells.size() + 1] : 0; + + TableCell tableCell = new TableCell( lastCellStart, p + 1, + this, levelNum, tableCellDescriptor, leftEdge, + rightEdge - leftEdge ); + cells.add( tableCell ); + lastCellStart = p + 1; + } + } + + if ( lastCellStart < (endIdxExclusive - startIdxInclusive - 1) ) + { + TableCellDescriptor tableCellDescriptor = _tprops.getRgtc() != null + && _tprops.getRgtc().length > cells.size() ? _tprops + .getRgtc()[cells.size()] : new TableCellDescriptor(); + final short leftEdge = _tprops.getRgdxaCenter() != null + && _tprops.getRgdxaCenter().length > cells.size() ? _tprops + .getRgdxaCenter()[cells.size()] : 0; + final short rightEdge = _tprops.getRgdxaCenter() != null + && _tprops.getRgdxaCenter().length > cells.size() + 1 ? _tprops + .getRgdxaCenter()[cells.size() + 1] : 0; + + TableCell tableCell = new TableCell( lastCellStart, + (endIdxExclusive - startIdxInclusive - 1), this, levelNum, + tableCellDescriptor, leftEdge, rightEdge - leftEdge ); + cells.add( tableCell ); + } + + if ( cells.size() != expectedCellsCount ) + { + logger.log( POILogger.WARN, + "Number of found table cells (" + cells.size() + + ") for table row [" + getStartOffset() + "c; " + + getEndOffset() + + "c] not equals to stored property value " + + expectedCellsCount ); + _tprops.setItcMac( (short) cells.size() ); + } + + _cells = cells.toArray( new TableCell[cells.size()] ); } - } - public int getRowJustification() - { - return _tprops.getJc(); - } + public int getRowJustification() + { + return _tprops.getJc(); + } - public void setRowJustification(int jc) - { - _tprops.setJc((short) jc); - _papx.updateSprm(SPRM_TJC, (short)jc); - } + public void setRowJustification( int jc ) + { + _tprops.setJc( (short) jc ); + _papx.updateSprm( SPRM_TJC, (short) jc ); + } - public int getGapHalf() - { - return _tprops.getDxaGapHalf(); - } + public int getGapHalf() + { + return _tprops.getDxaGapHalf(); + } - public void setGapHalf(int dxaGapHalf) - { - _tprops.setDxaGapHalf(dxaGapHalf); - _papx.updateSprm(SPRM_DXAGAPHALF, (short)dxaGapHalf); - } + public void setGapHalf( int dxaGapHalf ) + { + _tprops.setDxaGapHalf( dxaGapHalf ); + _papx.updateSprm( SPRM_DXAGAPHALF, (short) dxaGapHalf ); + } - public int getRowHeight() - { - return _tprops.getDyaRowHeight(); - } + public int getRowHeight() + { + return _tprops.getDyaRowHeight(); + } - public void setRowHeight(int dyaRowHeight) - { - _tprops.setDyaRowHeight(dyaRowHeight); - _papx.updateSprm(SPRM_DYAROWHEIGHT, (short)dyaRowHeight); - } + public void setRowHeight( int dyaRowHeight ) + { + _tprops.setDyaRowHeight( dyaRowHeight ); + _papx.updateSprm( SPRM_DYAROWHEIGHT, (short) dyaRowHeight ); + } - public boolean cantSplit() - { - return _tprops.getFCantSplit(); - } + public boolean cantSplit() + { + return _tprops.getFCantSplit(); + } - public void setCantSplit(boolean cantSplit) - { - _tprops.setFCantSplit(cantSplit); - _papx.updateSprm(SPRM_FCANTSPLIT, (byte)(cantSplit ? 1 : 0)); - } + public void setCantSplit( boolean cantSplit ) + { + _tprops.setFCantSplit( cantSplit ); + _papx.updateSprm( SPRM_FCANTSPLIT, (byte) (cantSplit ? 1 : 0) ); + } - public boolean isTableHeader() - { - return _tprops.getFTableHeader(); - } + public boolean isTableHeader() + { + return _tprops.getFTableHeader(); + } - public void setTableHeader(boolean tableHeader) - { - _tprops.setFTableHeader(tableHeader); - _papx.updateSprm(SPRM_FTABLEHEADER, (byte)(tableHeader ? 1 : 0)); - } + public void setTableHeader( boolean tableHeader ) + { + _tprops.setFTableHeader( tableHeader ); + _papx.updateSprm( SPRM_FTABLEHEADER, (byte) (tableHeader ? 1 : 0) ); + } - public int numCells() - { - return _cells.length; - } + public int numCells() + { + return _cells.length; + } + + public TableCell getCell( int index ) + { + return _cells[index]; + } + + public BorderCode getTopBorder() + { + return _tprops.getBrcBottom(); + } + + public BorderCode getBottomBorder() + { + return _tprops.getBrcBottom(); + } + + public BorderCode getLeftBorder() + { + return _tprops.getBrcLeft(); + } + + public BorderCode getRightBorder() + { + return _tprops.getBrcRight(); + } + + public BorderCode getHorizontalBorder() + { + return _tprops.getBrcHorizontal(); + } + + public BorderCode getVerticalBorder() + { + return _tprops.getBrcVertical(); + } + + public BorderCode getBarBorder() + { + throw new UnsupportedOperationException( "not applicable for TableRow" ); + } - public TableCell getCell(int index) - { - return _cells[index]; - } - - public BorderCode getTopBorder() { - return _tprops.getBrcBottom(); - } - - public BorderCode getBottomBorder() { - return _tprops.getBrcBottom(); - } - - public BorderCode getLeftBorder() { - return _tprops.getBrcLeft(); - } - - public BorderCode getRightBorder() { - return _tprops.getBrcRight(); - } - - public BorderCode getHorizontalBorder() { - return _tprops.getBrcHorizontal(); - } - - public BorderCode getVerticalBorder() { - return _tprops.getBrcVertical(); - } - - public BorderCode getBarBorder() { - throw new UnsupportedOperationException("not applicable for TableRow"); - } - } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java index 976ccd9810..0de7ab3ef7 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java @@ -40,7 +40,8 @@ public class TestWordToConverterSuite /** * YK: a quick hack to exclude failing documents from the suite. */ - private static List failingFiles = Arrays.asList(); + private static List failingFiles = Arrays + .asList( "ProblemExtracting.doc" ); public static Test suite() { diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java index 04032f22e6..ced950965a 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java @@ -68,18 +68,13 @@ public class TestWordToHtmlConverter extends TestCase assertTrue( result.substring( 0, 2000 ).contains( "" ) ); } - public void testBug33519() throws Exception - { - getHtmlText( "Bug33519.doc" ); - } - public void testBug46610_2() throws Exception { String result = getHtmlText( "Bug46610_2.doc" ); assertTrue( result .contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) ); } - + public void testBug46817() throws Exception { String result = getHtmlText( "Bug46817.doc" ); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRange.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRange.java index cedb303eab..e6de35a736 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRange.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRange.java @@ -79,14 +79,14 @@ public final class TestRange extends TestCase assertEquals( range.getStartOffset(), 0 ); assertEquals( range.getEndOffset(), 766 ); - Paragraph lastInMainRange = range.getParagraph( range.numParagraphs() ); + Paragraph lastInMainRange = range.getParagraph( range.numParagraphs() - 1); assertTrue( lastInMainRange.getEndOffset() <= 766 ); Section section = range.getSection( 0 ); assertTrue( section.getEndOffset() <= 766 ); Paragraph lastInMainSection = section.getParagraph( section - .numParagraphs() ); + .numParagraphs() - 1); assertTrue( lastInMainSection.getEndOffset() <= 766 ); } }