From e544419a2294b6bf4196894fbb44ef470c9e710a Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Tue, 9 Aug 2011 04:46:36 +0000 Subject: [PATCH] extract Bugzilla tests from TestProblems into TestBugs git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1155205 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/poi/hwpf/usermodel/TestBugs.java | 476 ++++++++++++++++++ .../poi/hwpf/usermodel/TestProblems.java | 418 --------------- 2 files changed, 476 insertions(+), 418 deletions(-) create mode 100644 src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java new file mode 100644 index 0000000000..d524a17728 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java @@ -0,0 +1,476 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hwpf.usermodel; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.List; + +import junit.framework.AssertionFailedError; +import junit.framework.TestCase; +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.poi.POIDataSamples; +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.HWPFOldDocument; +import org.apache.poi.hwpf.HWPFTestDataSamples; +import org.apache.poi.hwpf.extractor.Word6Extractor; +import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.hwpf.model.FieldsDocumentPart; +import org.apache.poi.hwpf.model.PlexOfField; +import org.apache.poi.util.IOUtils; + +/** + * Test different problems reported in Apache Bugzilla + * + * @author Nick Burch (nick at torchbox dot com) + * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) + */ +public class TestBugs extends TestCase +{ + + private static void assertTableStructures( Range expected, Range actual ) + { + assertEquals( expected.numParagraphs(), actual.numParagraphs() ); + for ( int p = 0; p < expected.numParagraphs(); p++ ) + { + Paragraph expParagraph = expected.getParagraph( p ); + Paragraph actParagraph = actual.getParagraph( p ); + + assertEquals( expParagraph.text(), actParagraph.text() ); + assertEquals( "Diffent isInTable flags for paragraphs #" + p + + " -- " + expParagraph + " -- " + actParagraph + ".", + expParagraph.isInTable(), actParagraph.isInTable() ); + assertEquals( expParagraph.isTableRowEnd(), + actParagraph.isTableRowEnd() ); + + if ( expParagraph.isInTable() && actParagraph.isInTable() ) + { + Table expTable, actTable; + try + { + expTable = expected.getTable( expParagraph ); + actTable = actual.getTable( actParagraph ); + } + catch ( Exception exc ) + { + continue; + } + + assertEquals( expTable.numRows(), actTable.numRows() ); + assertEquals( expTable.numParagraphs(), + actTable.numParagraphs() ); + } + } + } + + static void fixed( String bugzillaId ) + { + throw new Error( + "Bug " + + bugzillaId + + " seems to be fixed. " + + "Please resolve the issue in Bugzilla and remove fail() from the test" ); + } + + private static void test47563_insertTable( int rows, int columns ) + { + // POI apparently can't create a document from scratch, + // so we need an existing empty dummy document + HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "empty.doc" ); + + Range range = doc.getRange(); + Table table = range.insertBefore( + new TableProperties( (short) columns ), rows ); + table.sanityCheck(); + range.sanityCheck(); + + for ( int rowIdx = 0; rowIdx < table.numRows(); rowIdx++ ) + { + TableRow row = table.getRow( rowIdx ); + row.sanityCheck(); + for ( int colIdx = 0; colIdx < row.numCells(); colIdx++ ) + { + TableCell cell = row.getCell( colIdx ); + cell.sanityCheck(); + + Paragraph par = cell.getParagraph( 0 ); + par.sanityCheck(); + + par.insertBefore( "" + ( rowIdx * row.numCells() + colIdx ) ); + + par.sanityCheck(); + cell.sanityCheck(); + row.sanityCheck(); + table.sanityCheck(); + range.sanityCheck(); + } + } + + String text = range.text(); + int mustBeAfter = 0; + for ( int i = 0; i < rows * columns; i++ ) + { + int next = text.indexOf( Integer.toString( i ), mustBeAfter ); + assertFalse( next == -1 ); + mustBeAfter = next; + } + } + + /** + * Bug 33519 - HWPF fails to read a file + */ + public void test33519() + { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug33519.doc" ); + WordExtractor extractor = new WordExtractor( doc ); + extractor.getText(); + } + + /** + * Bug 34898 - WordExtractor doesn't read the whole string from the file + */ + public void test34898() + { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug34898.doc" ); + WordExtractor extractor = new WordExtractor( doc ); + assertEquals( "\u30c7\u30a3\u30ec\u30af\u30c8\u30ea", extractor + .getText().trim() ); + } + + /** + * Bug 44331 - HWPFDocument.write destroys fields + */ + public void test44431() + { + HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug44431.doc" ); + WordExtractor extractor1 = new WordExtractor( doc1 ); + + HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 ); + WordExtractor extractor2 = new WordExtractor( doc2 ); + + assertEquals( extractor1.getFooterText(), extractor2.getFooterText() ); + assertEquals( extractor1.getHeaderText(), extractor2.getHeaderText() ); + assertEquals( Arrays.toString( extractor1.getParagraphText() ), + Arrays.toString( extractor2.getParagraphText() ) ); + + assertEquals( extractor1.getText(), extractor2.getText() ); + } + + /** + * Bug 45473 - HWPF cannot read file after save + */ + public void test45473() + { + HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug45473.doc" ); + String text1 = new WordExtractor( doc1 ).getText().trim(); + + HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 ); + String text2 = new WordExtractor( doc2 ).getText().trim(); + + // the text in the saved document has some differences in line + // separators but we tolerate that + assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) ); + } + + /** + * Bug 46220 - images are not properly extracted + */ + public void test46220() + { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46220.doc" ); + // reference checksums as in Bugzilla + String[] md5 = { "851be142bce6d01848e730cb6903f39e", + "7fc6d8fb58b09ababd036d10a0e8c039", + "a7dc644c40bc2fbf17b2b62d07f99248", + "72d07b8db5fad7099d90bc4c304b4666" }; + List pics = doc.getPicturesTable().getAllPictures(); + assertEquals( 4, pics.size() ); + for ( int i = 0; i < pics.size(); i++ ) + { + Picture pic = pics.get( i ); + byte[] data = pic.getRawContent(); + // use Apache Commons Codec utils to compute md5 + assertEquals( md5[i], DigestUtils.md5Hex( data ) ); + } + } + + /** + * [RESOLVED FIXED] Bug 46817 - Regression: Text from some table cells + * missing + */ + public void test46817() + { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46817.doc" ); + WordExtractor extractor = new WordExtractor( doc ); + String text = extractor.getText().trim(); + + assertTrue( text.contains( "Nazwa wykonawcy" ) ); + assertTrue( text.contains( "kujawsko-pomorskie" ) ); + assertTrue( text.contains( "ekomel@ekomel.com.pl" ) ); + } + + /** + * [FAILING] Bug 47286 - Word documents saves in wrong format if source + * contains form elements + * + * @throws IOException + */ + public void test47286() throws IOException + { + HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug47286.doc" ); + String text1 = new WordExtractor( doc1 ).getText().trim(); + + HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 ); + String text2 = new WordExtractor( doc2 ).getText().trim(); + + // the text in the saved document has some differences in line + // separators but we tolerate that + assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) ); + + assertEquals( doc1.getCharacterTable().getTextRuns().size(), doc2 + .getCharacterTable().getTextRuns().size() ); + + List expectedFields = doc1.getFieldsTables() + .getFieldsPLCF( FieldsDocumentPart.MAIN ); + List actualFields = doc2.getFieldsTables().getFieldsPLCF( + FieldsDocumentPart.MAIN ); + assertEquals( expectedFields.size(), actualFields.size() ); + + assertTableStructures( doc1.getRange(), doc2.getRange() ); + } + + /** + * [RESOLVED FIXED] Bug 47287 - StringIndexOutOfBoundsException in + * CharacterRun.replaceText() + */ + public void test47287() + { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47287.doc" ); + String[] values = { "1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7", + "1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15", }; + int usedVal = 0; + String PLACEHOLDER = "\u2002\u2002\u2002\u2002\u2002"; + Range r = doc.getRange(); + for ( int x = 0; x < r.numSections(); x++ ) + { + Section s = r.getSection( x ); + for ( int y = 0; y < s.numParagraphs(); y++ ) + { + Paragraph p = s.getParagraph( y ); + + for ( int z = 0; z < p.numCharacterRuns(); z++ ) + { + boolean isFound = false; + + // character run + CharacterRun run = p.getCharacterRun( z ); + // character run text + String text = run.text(); + String oldText = text; + int c = text.indexOf( "FORMTEXT " ); + if ( c < 0 ) + { + int k = text.indexOf( PLACEHOLDER ); + if ( k >= 0 ) + { + text = text.substring( 0, k ) + values[usedVal] + + text.substring( k + PLACEHOLDER.length() ); + usedVal++; + isFound = true; + } + } + else + { + for ( ; c >= 0; c = text.indexOf( "FORMTEXT ", c + + "FORMTEXT ".length() ) ) + { + int k = text.indexOf( PLACEHOLDER, c ); + if ( k >= 0 ) + { + text = text.substring( 0, k ) + + values[usedVal] + + text.substring( k + + PLACEHOLDER.length() ); + usedVal++; + isFound = true; + } + } + } + if ( isFound ) + { + run.replaceText( oldText, text, 0 ); + } + + } + } + } + + String docText = r.text(); + + assertTrue( docText.contains( "1-1" ) ); + assertTrue( docText.contains( "1-12" ) ); + + assertFalse( docText.contains( "1-13" ) ); + assertFalse( docText.contains( "1-15" ) ); + } + + /** + * [RESOLVED FIXED] Bug 47563 - Exception when working with table + */ + public void test47563() + { + test47563_insertTable( 1, 5 ); + test47563_insertTable( 1, 6 ); + test47563_insertTable( 5, 1 ); + test47563_insertTable( 6, 1 ); + test47563_insertTable( 2, 2 ); + test47563_insertTable( 3, 2 ); + test47563_insertTable( 2, 3 ); + test47563_insertTable( 3, 3 ); + } + + /** + * [FAILING] Bug 47731 - Word Extractor considers text copied from some + * website as an embedded object + */ + public void test47731() throws Exception + { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47731.doc" ); + String foundText = new WordExtractor( doc ).getText(); + + try + { + assertTrue( foundText + .contains( "Soak the rice in water for three to four hours" ) ); + + fixed( "47731" ); + } + catch ( AssertionFailedError exc ) + { + // expected + } + } + + /** + * Bug 4774 - text extracted by WordExtractor is broken + */ + public void test47742() throws Exception + { + + // (1) extract text from MS Word document via POI + HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47742.doc" ); + String foundText = new WordExtractor( doc ).getText(); + + // (2) read text from text document (retrieved by saving the word + // document as text file using encoding UTF-8) + InputStream is = POIDataSamples.getDocumentInstance() + .openResourceAsStream( "Bug47742-text.txt" ); + byte[] expectedBytes = IOUtils.toByteArray( is ); + String expectedText = new String( expectedBytes, "utf-8" ) + .substring( 1 ); // strip-off the unicode marker + + assertEquals( expectedText, foundText ); + } + + /** + * [FAILING] Bug 47958 - Exception during Escher walk of pictures + */ + public void test47958() + { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47958.doc" ); + try + { + for ( Picture pic : doc.getPicturesTable().getAllPictures() ) + { + System.out.println( pic.suggestFullFileName() ); + } + fixed( "47958" ); + } + catch ( Exception e ) + { + // expected exception + } + } + + /** + * [RESOLVED FIXED] Bug 48065 - Problems with save output of HWPF (losing + * formatting) + */ + public void test48065() + { + HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug48065.doc" ); + HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 ); + + Range expected = doc1.getRange(); + Range actual = doc2.getRange(); + + assertEquals( + expected.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ), + actual.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ) ); + + assertTableStructures( expected, actual ); + } + + public void test49933() + { + HWPFOldDocument document = HWPFTestDataSamples + .openOldSampleFile( "Bug49933.doc" ); + + Word6Extractor word6Extractor = new Word6Extractor( document ); + String text = word6Extractor.getText(); + + assertTrue( text.contains( "best.wine.jump.ru" ) ); + } + + /** + * Bug 50936 - HWPF fails to read a file + */ + public void test50936() + { + HWPFTestDataSamples.openSampleFile( "Bug50936.doc" ); + } + + /** + * [FAILING] Bug 50955 - error while retrieving the text file + */ + public void test50955() + { + try + { + HWPFOldDocument doc = HWPFTestDataSamples + .openOldSampleFile( "Bug50955.doc" ); + Word6Extractor extractor = new Word6Extractor( doc ); + extractor.getText(); + + fixed( "50955" ); + } + catch ( Exception e ) + { + // expected exception + } + } + + /** + * Bug 51524 - PapBinTable constructor is slow + */ + public void test51524() + { + HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" ); + } + +} diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java index 6cb3a03f8f..3623629585 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java @@ -17,26 +17,14 @@ package org.apache.poi.hwpf.usermodel; -import java.io.IOException; -import java.io.InputStream; -import java.util.Arrays; -import java.util.List; -import junit.framework.AssertionFailedError; -import org.apache.commons.codec.digest.DigestUtils; import org.apache.poi.EncryptedDocumentException; -import org.apache.poi.POIDataSamples; import org.apache.poi.hwpf.HWPFDocument; -import org.apache.poi.hwpf.HWPFOldDocument; import org.apache.poi.hwpf.HWPFTestCase; import org.apache.poi.hwpf.HWPFTestDataSamples; -import org.apache.poi.hwpf.extractor.Word6Extractor; import org.apache.poi.hwpf.extractor.WordExtractor; -import org.apache.poi.hwpf.model.FieldsDocumentPart; -import org.apache.poi.hwpf.model.PlexOfField; import org.apache.poi.hwpf.model.StyleSheet; -import org.apache.poi.util.IOUtils; /** * Test various problem documents @@ -427,410 +415,4 @@ public final class TestProblems extends HWPFTestCase { assertEquals("Row 3/Cell 3\u0007", cell.text()); } - static void fixed(String bugzillaId) { - throw new Error("Bug " + bugzillaId + " seems to be fixed. " + - "Please resolve the issue in Bugzilla and remove fail() from the test"); - } - - /** - * Bug 33519 - HWPF fails to read a file - */ - public void test33519() { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug33519.doc"); - WordExtractor extractor = new WordExtractor(doc); - String text = extractor.getText(); - } - - /** - * Bug 34898 - WordExtractor doesn't read the whole string from the file - */ - public void test34898() { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug34898.doc"); - WordExtractor extractor = new WordExtractor(doc); - assertEquals("\u30c7\u30a3\u30ec\u30af\u30c8\u30ea", extractor.getText().trim()); - } - - /** - * Bug 44331 - HWPFDocument.write destroys fields - */ - public void test44431() - { - HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug44431.doc" ); - WordExtractor extractor1 = new WordExtractor( doc1 ); - - HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 ); - WordExtractor extractor2 = new WordExtractor( doc2 ); - - assertEquals( extractor1.getFooterText(), extractor2.getFooterText() ); - assertEquals( extractor1.getHeaderText(), extractor2.getHeaderText() ); - assertEquals( Arrays.toString( extractor1.getParagraphText() ), - Arrays.toString( extractor2.getParagraphText() ) ); - - assertEquals( extractor1.getText(), extractor2.getText() ); - } - - /** - * [RESOLVED FIXED] Bug 46817 - Regression: Text from some table cells - * missing - */ - public void test46817() - { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46817.doc" ); - WordExtractor extractor = new WordExtractor( doc ); - String text = extractor.getText().trim(); - - assertTrue( text.contains( "Nazwa wykonawcy" ) ); - assertTrue( text.contains( "kujawsko-pomorskie" ) ); - assertTrue( text.contains( "ekomel@ekomel.com.pl" ) ); - } - - /** - * Bug 46220 - images are not properly extracted - */ - public void test46220() { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug46220.doc"); - // reference checksums as in Bugzilla - String[] md5 = { - "851be142bce6d01848e730cb6903f39e", - "7fc6d8fb58b09ababd036d10a0e8c039", - "a7dc644c40bc2fbf17b2b62d07f99248", - "72d07b8db5fad7099d90bc4c304b4666" - }; - List pics = doc.getPicturesTable().getAllPictures(); - assertEquals(4, pics.size()); - for (int i = 0; i < pics.size(); i++) { - Picture pic = pics.get(i); - byte[] data = pic.getRawContent(); - // use Apache Commons Codec utils to compute md5 - assertEquals(md5[i], DigestUtils.md5Hex(data)); - } - } - - /** - * Bug 45473 - HWPF cannot read file after save - */ - public void test45473() { - HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug45473.doc"); - String text1 = new WordExtractor(doc1).getText().trim(); - - HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1); - String text2 = new WordExtractor(doc2).getText().trim(); - - // the text in the saved document has some differences in line separators but we tolerate that - assertEquals(text1.replaceAll("\n", ""), text2.replaceAll("\n", "")); - } - - /** - * [FAILING] Bug 47286 - Word documents saves in wrong format if source - * contains form elements - * - * @throws IOException - */ - public void test47286() throws IOException - { - HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug47286.doc" ); - String text1 = new WordExtractor( doc1 ).getText().trim(); - - HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 ); - String text2 = new WordExtractor( doc2 ).getText().trim(); - - // the text in the saved document has some differences in line - // separators but we tolerate that - assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) ); - - assertEquals( doc1.getCharacterTable().getTextRuns().size(), doc2 - .getCharacterTable().getTextRuns().size() ); - - List expectedFields = doc1.getFieldsTables() - .getFieldsPLCF( FieldsDocumentPart.MAIN ); - List actualFields = doc2.getFieldsTables().getFieldsPLCF( - FieldsDocumentPart.MAIN ); - assertEquals( expectedFields.size(), actualFields.size() ); - - assertTableStructures( doc1.getRange(), doc2.getRange() ); - } - - /** - * [RESOLVED FIXED] Bug 47287 - StringIndexOutOfBoundsException in - * CharacterRun.replaceText() - */ - public void test47287() - { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47287.doc" ); - String[] values = { "1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7", - "1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15", }; - int usedVal = 0; - String PLACEHOLDER = "\u2002\u2002\u2002\u2002\u2002"; - Range r = doc.getRange(); - for ( int x = 0; x < r.numSections(); x++ ) - { - Section s = r.getSection( x ); - for ( int y = 0; y < s.numParagraphs(); y++ ) - { - Paragraph p = s.getParagraph( y ); - - for ( int z = 0; z < p.numCharacterRuns(); z++ ) - { - boolean isFound = false; - - // character run - CharacterRun run = p.getCharacterRun( z ); - // character run text - String text = run.text(); - String oldText = text; - int c = text.indexOf( "FORMTEXT " ); - if ( c < 0 ) - { - int k = text.indexOf( PLACEHOLDER ); - if ( k >= 0 ) - { - text = text.substring( 0, k ) + values[usedVal] - + text.substring( k + PLACEHOLDER.length() ); - usedVal++; - isFound = true; - } - } - else - { - for ( ; c >= 0; c = text.indexOf( "FORMTEXT ", c - + "FORMTEXT ".length() ) ) - { - int k = text.indexOf( PLACEHOLDER, c ); - if ( k >= 0 ) - { - text = text.substring( 0, k ) - + values[usedVal] - + text.substring( k - + PLACEHOLDER.length() ); - usedVal++; - isFound = true; - } - } - } - if ( isFound ) - { - run.replaceText( oldText, text, 0 ); - } - - } - } - } - - String docText = r.text(); - - assertTrue( docText.contains( "1-1" ) ); - assertTrue( docText.contains( "1-12" ) ); - - assertFalse( docText.contains( "1-13" ) ); - assertFalse( docText.contains( "1-15" ) ); - } - - private static void insertTable( int rows, int columns ) - { - // POI apparently can't create a document from scratch, - // so we need an existing empty dummy document - HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "empty.doc" ); - - Range range = doc.getRange(); - Table table = range.insertBefore( - new TableProperties( (short) columns ), rows ); - table.sanityCheck(); - range.sanityCheck(); - - for ( int rowIdx = 0; rowIdx < table.numRows(); rowIdx++ ) - { - TableRow row = table.getRow( rowIdx ); - row.sanityCheck(); - for ( int colIdx = 0; colIdx < row.numCells(); colIdx++ ) - { - TableCell cell = row.getCell( colIdx ); - cell.sanityCheck(); - - Paragraph par = cell.getParagraph( 0 ); - par.sanityCheck(); - - par.insertBefore( "" + ( rowIdx * row.numCells() + colIdx ) ); - - par.sanityCheck(); - cell.sanityCheck(); - row.sanityCheck(); - table.sanityCheck(); - range.sanityCheck(); - } - } - - String text = range.text(); - int mustBeAfter = 0; - for ( int i = 0; i < rows * columns; i++ ) - { - int next = text.indexOf( Integer.toString( i ), mustBeAfter ); - assertFalse( next == -1 ); - mustBeAfter = next; - } - } - - /** - * [RESOLVED FIXED] Bug 47563 - Exception when working with table - */ - public void test47563() - { - insertTable( 1, 5 ); - insertTable( 1, 6 ); - insertTable( 5, 1 ); - insertTable( 6, 1 ); - insertTable( 2, 2 ); - insertTable( 3, 2 ); - insertTable( 2, 3 ); - insertTable( 3, 3 ); - } - - /** - * [FAILING] Bug 47731 - Word Extractor considers text copied from some - * website as an embedded object - */ - public void test47731() throws Exception - { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47731.doc" ); - String foundText = new WordExtractor( doc ).getText(); - - try - { - assertTrue( foundText - .contains( "Soak the rice in water for three to four hours" ) ); - - fixed( "47731" ); - } - catch ( AssertionFailedError exc ) - { - // expected - } - } - - /** - * Bug 4774 - text extracted by WordExtractor is broken - */ - public void test47742() throws Exception { - - // (1) extract text from MS Word document via POI - HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47742.doc"); - String foundText = new WordExtractor(doc).getText(); - - // (2) read text from text document (retrieved by saving the word - // document as text file using encoding UTF-8) - InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("Bug47742-text.txt"); - byte[] expectedBytes = IOUtils.toByteArray(is); - String expectedText = new String(expectedBytes, "utf-8").substring(1); // strip-off the unicode marker - - assertEquals(expectedText, foundText); - } - - /** - * [FAILING] Bug 47958 - Exception during Escher walk of pictures - */ - public void test47958() { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47958.doc"); - try { - for (Picture pic : doc.getPicturesTable().getAllPictures()) { - System.out.println(pic.suggestFullFileName()); - } - fixed("47958"); - } catch (Exception e) { - // expected exception - } - } - - /** - * [RESOLVED FIXED] Bug 48065 - Problems with save output of HWPF (losing - * formatting) - */ - public void test48065() - { - HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug48065.doc" ); - HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 ); - - Range expected = doc1.getRange(); - Range actual = doc2.getRange(); - - assertEquals( - expected.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ), - actual.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ) ); - - assertTableStructures( expected, actual ); - } - - private static void assertTableStructures( Range expected, Range actual ) - { - assertEquals( expected.numParagraphs(), actual.numParagraphs() ); - for ( int p = 0; p < expected.numParagraphs(); p++ ) - { - Paragraph expParagraph = expected.getParagraph( p ); - Paragraph actParagraph = actual.getParagraph( p ); - - assertEquals( expParagraph.text(), actParagraph.text() ); - assertEquals( "Diffent isInTable flags for paragraphs #" + p - + " -- " + expParagraph + " -- " + actParagraph + ".", - expParagraph.isInTable(), actParagraph.isInTable() ); - assertEquals( expParagraph.isTableRowEnd(), - actParagraph.isTableRowEnd() ); - - if ( expParagraph.isInTable() && actParagraph.isInTable() ) - { - Table expTable, actTable; - try - { - expTable = expected.getTable( expParagraph ); - actTable = actual.getTable( actParagraph ); - } - catch ( Exception exc ) - { - continue; - } - - assertEquals( expTable.numRows(), actTable.numRows() ); - assertEquals( expTable.numParagraphs(), - actTable.numParagraphs() ); - } - } - } - - public void test49933() - { - HWPFOldDocument document = HWPFTestDataSamples - .openOldSampleFile( "Bug49933.doc" ); - - Word6Extractor word6Extractor = new Word6Extractor( document ); - String text = word6Extractor.getText(); - - assertTrue( text.contains( "best.wine.jump.ru" ) ); - } - - /** - * Bug 50936 - HWPF fails to read a file - */ - public void test50936() { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug50936.doc"); - } - - /** - * [FAILING] Bug 50955 - error while retrieving the text file - */ - public void test50955() { - try { - HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug50955.doc"); - Word6Extractor extractor = new Word6Extractor(doc); - String text = extractor.getText(); - fixed("50955"); - } catch (Exception e) { - // expected exception - } - } - - /** - * Bug 51524 - PapBinTable constructor is slow - */ - public void test51524() - { - HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" ); - } - }