Fix for bug #48245 - tweak HWPF table cell detection to work across more files

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@953694 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-06-11 13:29:44 +00:00
parent 034e5c4a97
commit 5977a9db00
4 changed files with 361 additions and 166 deletions

View File

@ -34,6 +34,7 @@
<changes> <changes>
<release version="3.7-SNAPSHOT" date="2010-??-??"> <release version="3.7-SNAPSHOT" date="2010-??-??">
<action dev="POI-DEVELOPERS" type="add">48245 - tweak HWPF table cell detection to work across more files</action>
<action dev="POI-DEVELOPERS" type="add">48996 - initial support for External Name References in HSSF formula evaluation</action> <action dev="POI-DEVELOPERS" type="add">48996 - initial support for External Name References in HSSF formula evaluation</action>
<action dev="POI-DEVELOPERS" type="fix">46664 - fix up Tab IDs when adding new sheets, so that print areas don't end up invalid</action> <action dev="POI-DEVELOPERS" type="fix">46664 - fix up Tab IDs when adding new sheets, so that print areas don't end up invalid</action>
<action dev="POI-DEVELOPERS" type="fix">45269 - improve replaceText on HWPF ranges</action> <action dev="POI-DEVELOPERS" type="fix">45269 - improve replaceText on HWPF ranges</action>

View File

@ -17,6 +17,7 @@
package org.apache.poi.hwpf.usermodel; package org.apache.poi.hwpf.usermodel;
import org.apache.poi.hwpf.model.PropertyNode;
import org.apache.poi.hwpf.sprm.TableSprmUncompressor; import org.apache.poi.hwpf.sprm.TableSprmUncompressor;
public final class TableRow public final class TableRow
@ -57,10 +58,19 @@ public final class TableRow
p = getParagraph(end); p = getParagraph(end);
s = p.text(); s = p.text();
} }
_cells[cellIndex] = new TableCell(start, end+1, this, levelNum,
// Create it for the correct paragraph range
_cells[cellIndex] = new TableCell(start, end, this, levelNum,
_tprops.getRgtc()[cellIndex], _tprops.getRgtc()[cellIndex],
_tprops.getRgdxaCenter()[cellIndex], _tprops.getRgdxaCenter()[cellIndex],
_tprops.getRgdxaCenter()[cellIndex+1]-_tprops.getRgdxaCenter()[cellIndex]); _tprops.getRgdxaCenter()[cellIndex+1]-_tprops.getRgdxaCenter()[cellIndex]);
// Now we've decided where everything is, tweak the
// record of the paragraph end so that the
// paragraph level counts work
// This is a bit hacky, we really need a better fix...
_cells[cellIndex]._parEnd++;
// Next!
end++; end++;
start = end; start = end;
} }

View File

@ -30,172 +30,356 @@ import org.apache.poi.hwpf.model.StyleSheet;
*/ */
public final class TestProblems extends HWPFTestCase { public final class TestProblems extends HWPFTestCase {
/** /**
* ListEntry passed no ListTable * ListEntry passed no ListTable
*/ */
public void testListEntryNoListTable() { public void testListEntryNoListTable() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("ListEntryNoListTable.doc"); HWPFDocument doc = HWPFTestDataSamples.openSampleFile("ListEntryNoListTable.doc");
Range r = doc.getRange(); Range r = doc.getRange();
StyleSheet styleSheet = doc.getStyleSheet(); StyleSheet styleSheet = doc.getStyleSheet();
for (int x = 0; x < r.numSections(); x++) { for (int x = 0; x < r.numSections(); x++) {
Section s = r.getSection(x); Section s = r.getSection(x);
for (int y = 0; y < s.numParagraphs(); y++) { for (int y = 0; y < s.numParagraphs(); y++) {
Paragraph paragraph = s.getParagraph(y); Paragraph paragraph = s.getParagraph(y);
// System.out.println(paragraph.getCharacterRun(0).text()); // System.out.println(paragraph.getCharacterRun(0).text());
} }
}
}
/**
* AIOOB for TableSprmUncompressor.unCompressTAPOperation
*/
public void testSprmAIOOB() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("AIOOB-Tap.doc");
Range r = doc.getRange();
StyleSheet styleSheet = doc.getStyleSheet();
for (int x = 0; x < r.numSections(); x++) {
Section s = r.getSection(x);
for (int y = 0; y < s.numParagraphs(); y++) {
Paragraph paragraph = s.getParagraph(y);
// System.out.println(paragraph.getCharacterRun(0).text());
}
}
}
/**
* Test for TableCell not skipping the last paragraph. Bugs #45062 and
* #44292
*/
public void testTableCellLastParagraph() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug44292.doc");
Range r = doc.getRange();
assertEquals(6, r.numParagraphs());
assertEquals(0, r.getStartOffset());
assertEquals(87, r.getEndOffset());
// Paragraph with table
Paragraph p = r.getParagraph(0);
assertEquals(0, p.getStartOffset());
assertEquals(20, p.getEndOffset());
// Get the table
Table t = r.getTable(p);
// get the only row
assertEquals(1, t.numRows());
TableRow row = t.getRow(0);
// get the first cell
TableCell cell = row.getCell(0);
// First cell should have one paragraph
assertEquals(1, cell.numParagraphs());
assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
// get the second
cell = row.getCell(1);
// Second cell should be detected as having two paragraphs
assertEquals(2, cell.numParagraphs());
assertEquals("First para is ok\r", cell.getParagraph(0).text());
assertEquals("Second paragraph is skipped\7", cell.getParagraph(1).text());
// get the last cell
cell = row.getCell(2);
// Last cell should have one paragraph
assertEquals(1, cell.numParagraphs());
assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
}
public void testRangeDelete() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug28627.doc");
Range range = doc.getRange();
int numParagraphs = range.numParagraphs();
int totalLength = 0, deletedLength = 0;
for (int i = 0; i < numParagraphs; i++) {
Paragraph para = range.getParagraph(i);
String text = para.text();
totalLength += text.length();
if (text.indexOf("{delete me}") > -1) {
para.delete();
deletedLength = text.length();
}
}
// check the text length after deletion
int newLength = 0;
range = doc.getRange();
numParagraphs = range.numParagraphs();
for (int i = 0; i < numParagraphs; i++) {
Paragraph para = range.getParagraph(i);
String text = para.text();
newLength += text.length();
}
assertEquals(newLength, totalLength - deletedLength);
}
/**
* With an encrypted file, we should give a suitable exception, and not OOM
*/
public void testEncryptedFile() {
try {
HWPFTestDataSamples.openSampleFile("PasswordProtected.doc");
fail();
} catch (EncryptedDocumentException e) {
// Good
}
}
public void testWriteProperties() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("SampleDoc.doc");
assertEquals("Nick Burch", doc.getSummaryInformation().getAuthor());
// Write and read
HWPFDocument doc2 = writeOutAndRead(doc);
assertEquals("Nick Burch", doc2.getSummaryInformation().getAuthor());
}
/**
* Test for reading paragraphs from Range after replacing some
* text in this Range.
* Bug #45269
*/
public void testReadParagraphsAfterReplaceText()throws Exception{
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc");
Range range = doc.getRange();
String toFind = "campo1";
String longer = " foi porraaaaa ";
String shorter = " foi ";
//check replace with longer text
for (int x = 0; x < range.numParagraphs(); x++) {
Paragraph para = range.getParagraph(x);
int offset = para.text().indexOf(toFind);
if (offset >= 0) {
para.replaceText(toFind, longer, offset);
assertEquals(offset, para.text().indexOf(longer));
} }
} }
doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc"); /**
range = doc.getRange(); * AIOOB for TableSprmUncompressor.unCompressTAPOperation
*/
public void testSprmAIOOB() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("AIOOB-Tap.doc");
//check replace with shorter text Range r = doc.getRange();
for (int x = 0; x < range.numParagraphs(); x++) { StyleSheet styleSheet = doc.getStyleSheet();
Paragraph para = range.getParagraph(x); for (int x = 0; x < r.numSections(); x++) {
int offset = para.text().indexOf(toFind); Section s = r.getSection(x);
if (offset >= 0) { for (int y = 0; y < s.numParagraphs(); y++) {
para.replaceText(toFind, shorter, offset); Paragraph paragraph = s.getParagraph(y);
assertEquals(offset, para.text().indexOf(shorter)); // System.out.println(paragraph.getCharacterRun(0).text());
}
} }
} }
}
/**
* Test for TableCell not skipping the last paragraph. Bugs #45062 and
* #44292
*/
public void testTableCellLastParagraph() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug44292.doc");
Range r = doc.getRange();
assertEquals(6, r.numParagraphs());
assertEquals(0, r.getStartOffset());
assertEquals(87, r.getEndOffset());
// Paragraph with table
Paragraph p = r.getParagraph(0);
assertEquals(0, p.getStartOffset());
assertEquals(20, p.getEndOffset());
// Check a few bits of the table directly
assertEquals("One paragraph is ok\7", r.getParagraph(0).text());
assertEquals("First para is ok\r", r.getParagraph(1).text());
assertEquals("Second paragraph is skipped\7", r.getParagraph(2).text());
assertEquals("One paragraph is ok\7", r.getParagraph(3).text());
assertEquals("\7", r.getParagraph(4).text());
assertEquals("\r", r.getParagraph(5).text());
for(int i=0; i<=5; i++) {
assertFalse(r.getParagraph(i).usesUnicode());
}
// Get the table
Table t = r.getTable(p);
// get the only row
assertEquals(1, t.numRows());
TableRow row = t.getRow(0);
// sanity check our row
assertEquals(5, row.numParagraphs());
assertEquals(0, row._parStart);
assertEquals(5, row._parEnd);
assertEquals(0, row.getStartOffset());
assertEquals(87, row.getEndOffset());
// get the first cell
TableCell cell = row.getCell(0);
// First cell should have one paragraph
assertEquals(1, cell.numParagraphs());
assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
assertEquals(0, cell._parStart);
assertEquals(1, cell._parEnd);
assertEquals(0, cell.getStartOffset());
assertEquals(20, cell.getEndOffset());
// get the second
cell = row.getCell(1);
// Second cell should be detected as having two paragraphs
assertEquals(2, cell.numParagraphs());
assertEquals("First para is ok\r", cell.getParagraph(0).text());
assertEquals("Second paragraph is skipped\7", cell.getParagraph(1).text());
assertEquals(1, cell._parStart);
assertEquals(3, cell._parEnd);
assertEquals(20, cell.getStartOffset());
assertEquals(65, cell.getEndOffset());
// get the last cell
cell = row.getCell(2);
// Last cell should have one paragraph
assertEquals(1, cell.numParagraphs());
assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
assertEquals(3, cell._parStart);
assertEquals(4, cell._parEnd);
assertEquals(65, cell.getStartOffset());
assertEquals(85, cell.getEndOffset());
}
public void testRangeDelete() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug28627.doc");
Range range = doc.getRange();
int numParagraphs = range.numParagraphs();
int totalLength = 0, deletedLength = 0;
for (int i = 0; i < numParagraphs; i++) {
Paragraph para = range.getParagraph(i);
String text = para.text();
totalLength += text.length();
if (text.indexOf("{delete me}") > -1) {
para.delete();
deletedLength = text.length();
}
}
// check the text length after deletion
int newLength = 0;
range = doc.getRange();
numParagraphs = range.numParagraphs();
for (int i = 0; i < numParagraphs; i++) {
Paragraph para = range.getParagraph(i);
String text = para.text();
newLength += text.length();
}
assertEquals(newLength, totalLength - deletedLength);
}
/**
* With an encrypted file, we should give a suitable exception, and not OOM
*/
public void testEncryptedFile() {
try {
HWPFTestDataSamples.openSampleFile("PasswordProtected.doc");
fail();
} catch (EncryptedDocumentException e) {
// Good
}
}
public void testWriteProperties() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("SampleDoc.doc");
assertEquals("Nick Burch", doc.getSummaryInformation().getAuthor());
// Write and read
HWPFDocument doc2 = writeOutAndRead(doc);
assertEquals("Nick Burch", doc2.getSummaryInformation().getAuthor());
}
/**
* Test for reading paragraphs from Range after replacing some
* text in this Range.
* Bug #45269
*/
public void testReadParagraphsAfterReplaceText()throws Exception{
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc");
Range range = doc.getRange();
String toFind = "campo1";
String longer = " foi porraaaaa ";
String shorter = " foi ";
//check replace with longer text
for (int x = 0; x < range.numParagraphs(); x++) {
Paragraph para = range.getParagraph(x);
int offset = para.text().indexOf(toFind);
if (offset >= 0) {
para.replaceText(toFind, longer, offset);
assertEquals(offset, para.text().indexOf(longer));
}
}
doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc");
range = doc.getRange();
//check replace with shorter text
for (int x = 0; x < range.numParagraphs(); x++) {
Paragraph para = range.getParagraph(x);
int offset = para.text().indexOf(toFind);
if (offset >= 0) {
para.replaceText(toFind, shorter, offset);
assertEquals(offset, para.text().indexOf(shorter));
}
}
}
/**
* Bug #48245 - don't include the text from the
* next cell in the current one
*/
public void testTableIterator() throws Exception {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("simple-table2.doc");
Range r = doc.getRange();
// Check the text is as we'd expect
assertEquals(13, r.numParagraphs());
assertEquals("Row 1/Cell 1\u0007", r.getParagraph(0).text());
assertEquals("Row 1/Cell 2\u0007", r.getParagraph(1).text());
assertEquals("Row 1/Cell 3\u0007", r.getParagraph(2).text());
assertEquals("\u0007", r.getParagraph(3).text());
assertEquals("Row 2/Cell 1\u0007", r.getParagraph(4).text());
assertEquals("Row 2/Cell 2\u0007", r.getParagraph(5).text());
assertEquals("Row 2/Cell 3\u0007", r.getParagraph(6).text());
assertEquals("\u0007", r.getParagraph(7).text());
assertEquals("Row 3/Cell 1\u0007", r.getParagraph(8).text());
assertEquals("Row 3/Cell 2\u0007", r.getParagraph(9).text());
assertEquals("Row 3/Cell 3\u0007", r.getParagraph(10).text());
assertEquals("\u0007", r.getParagraph(11).text());
assertEquals("\r", r.getParagraph(12).text());
for(int i=0; i<=12; i++) {
assertFalse(r.getParagraph(i).usesUnicode());
}
Paragraph p;
// Take a look in detail at the first couple of
// paragraphs
p = r.getParagraph(0);
assertEquals(1, p.numParagraphs());
assertEquals(0, p.getStartOffset());
assertEquals(13, p.getEndOffset());
assertEquals(0, p._parStart);
assertEquals(1, p._parEnd);
p = r.getParagraph(1);
assertEquals(1, p.numParagraphs());
assertEquals(13, p.getStartOffset());
assertEquals(26, p.getEndOffset());
assertEquals(1, p._parStart);
assertEquals(2, p._parEnd);
p = r.getParagraph(2);
assertEquals(1, p.numParagraphs());
assertEquals(26, p.getStartOffset());
assertEquals(39, p.getEndOffset());
assertEquals(2, p._parStart);
assertEquals(3, p._parEnd);
// Now look at the table
Table table = r.getTable(r.getParagraph(0));
assertEquals(3, table.numRows());
TableRow row;
TableCell cell;
row = table.getRow(0);
assertEquals(0, row._parStart);
assertEquals(4, row._parEnd);
cell = row.getCell(0);
assertEquals(1, cell.numParagraphs());
assertEquals(0, cell._parStart);
assertEquals(1, cell._parEnd);
assertEquals(0, cell.getStartOffset());
assertEquals(13, cell.getEndOffset());
assertEquals("Row 1/Cell 1\u0007", cell.text());
cell = row.getCell(1);
assertEquals(1, cell.numParagraphs());
assertEquals(1, cell._parStart);
assertEquals(2, cell._parEnd);
assertEquals(13, cell.getStartOffset());
assertEquals(26, cell.getEndOffset());
assertEquals("Row 1/Cell 2\u0007", cell.text());
cell = row.getCell(2);
assertEquals(1, cell.numParagraphs());
assertEquals(2, cell._parStart);
assertEquals(3, cell._parEnd);
assertEquals(26, cell.getStartOffset());
assertEquals(39, cell.getEndOffset());
assertEquals("Row 1/Cell 3\u0007", cell.text());
// Onto row #2
row = table.getRow(1);
assertEquals(4, row._parStart);
assertEquals(8, row._parEnd);
cell = row.getCell(0);
assertEquals(1, cell.numParagraphs());
assertEquals(4, cell._parStart);
assertEquals(5, cell._parEnd);
assertEquals(40, cell.getStartOffset());
assertEquals(53, cell.getEndOffset());
assertEquals("Row 2/Cell 1\u0007", cell.text());
cell = row.getCell(1);
assertEquals(1, cell.numParagraphs());
assertEquals(5, cell._parStart);
assertEquals(6, cell._parEnd);
assertEquals(53, cell.getStartOffset());
assertEquals(66, cell.getEndOffset());
assertEquals("Row 2/Cell 2\u0007", cell.text());
cell = row.getCell(2);
assertEquals(1, cell.numParagraphs());
assertEquals(6, cell._parStart);
assertEquals(7, cell._parEnd);
assertEquals(66, cell.getStartOffset());
assertEquals(79, cell.getEndOffset());
assertEquals("Row 2/Cell 3\u0007", cell.text());
// Finally row 3
row = table.getRow(2);
assertEquals(8, row._parStart);
assertEquals(12, row._parEnd);
cell = row.getCell(0);
assertEquals(1, cell.numParagraphs());
assertEquals(8, cell._parStart);
assertEquals(9, cell._parEnd);
assertEquals(80, cell.getStartOffset());
assertEquals(93, cell.getEndOffset());
assertEquals("Row 3/Cell 1\u0007", cell.text());
cell = row.getCell(1);
assertEquals(1, cell.numParagraphs());
assertEquals(9, cell._parStart);
assertEquals(10, cell._parEnd);
assertEquals(93, cell.getStartOffset());
assertEquals(106, cell.getEndOffset());
assertEquals("Row 3/Cell 2\u0007", cell.text());
cell = row.getCell(2);
assertEquals(1, cell.numParagraphs());
assertEquals(10, cell._parStart);
assertEquals(11, cell._parEnd);
assertEquals(106, cell.getStartOffset());
assertEquals(119, cell.getEndOffset());
assertEquals("Row 3/Cell 3\u0007", cell.text());
}
} }

Binary file not shown.