extract Bugzilla tests from TestProblems into TestBugs

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1155205 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-08-09 04:46:36 +00:00
parent 2d5db78a3b
commit e544419a22
2 changed files with 476 additions and 418 deletions

View File

@ -0,0 +1,476 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hwpf.usermodel;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
import junit.framework.AssertionFailedError;
import junit.framework.TestCase;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFOldDocument;
import org.apache.poi.hwpf.HWPFTestDataSamples;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.model.FieldsDocumentPart;
import org.apache.poi.hwpf.model.PlexOfField;
import org.apache.poi.util.IOUtils;
/**
* Test different problems reported in Apache Bugzilla
*
* @author Nick Burch (nick at torchbox dot com)
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
*/
public class TestBugs extends TestCase
{
private static void assertTableStructures( Range expected, Range actual )
{
assertEquals( expected.numParagraphs(), actual.numParagraphs() );
for ( int p = 0; p < expected.numParagraphs(); p++ )
{
Paragraph expParagraph = expected.getParagraph( p );
Paragraph actParagraph = actual.getParagraph( p );
assertEquals( expParagraph.text(), actParagraph.text() );
assertEquals( "Diffent isInTable flags for paragraphs #" + p
+ " -- " + expParagraph + " -- " + actParagraph + ".",
expParagraph.isInTable(), actParagraph.isInTable() );
assertEquals( expParagraph.isTableRowEnd(),
actParagraph.isTableRowEnd() );
if ( expParagraph.isInTable() && actParagraph.isInTable() )
{
Table expTable, actTable;
try
{
expTable = expected.getTable( expParagraph );
actTable = actual.getTable( actParagraph );
}
catch ( Exception exc )
{
continue;
}
assertEquals( expTable.numRows(), actTable.numRows() );
assertEquals( expTable.numParagraphs(),
actTable.numParagraphs() );
}
}
}
static void fixed( String bugzillaId )
{
throw new Error(
"Bug "
+ bugzillaId
+ " seems to be fixed. "
+ "Please resolve the issue in Bugzilla and remove fail() from the test" );
}
private static void test47563_insertTable( int rows, int columns )
{
// POI apparently can't create a document from scratch,
// so we need an existing empty dummy document
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "empty.doc" );
Range range = doc.getRange();
Table table = range.insertBefore(
new TableProperties( (short) columns ), rows );
table.sanityCheck();
range.sanityCheck();
for ( int rowIdx = 0; rowIdx < table.numRows(); rowIdx++ )
{
TableRow row = table.getRow( rowIdx );
row.sanityCheck();
for ( int colIdx = 0; colIdx < row.numCells(); colIdx++ )
{
TableCell cell = row.getCell( colIdx );
cell.sanityCheck();
Paragraph par = cell.getParagraph( 0 );
par.sanityCheck();
par.insertBefore( "" + ( rowIdx * row.numCells() + colIdx ) );
par.sanityCheck();
cell.sanityCheck();
row.sanityCheck();
table.sanityCheck();
range.sanityCheck();
}
}
String text = range.text();
int mustBeAfter = 0;
for ( int i = 0; i < rows * columns; i++ )
{
int next = text.indexOf( Integer.toString( i ), mustBeAfter );
assertFalse( next == -1 );
mustBeAfter = next;
}
}
/**
* Bug 33519 - HWPF fails to read a file
*/
public void test33519()
{
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug33519.doc" );
WordExtractor extractor = new WordExtractor( doc );
extractor.getText();
}
/**
* Bug 34898 - WordExtractor doesn't read the whole string from the file
*/
public void test34898()
{
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug34898.doc" );
WordExtractor extractor = new WordExtractor( doc );
assertEquals( "\u30c7\u30a3\u30ec\u30af\u30c8\u30ea", extractor
.getText().trim() );
}
/**
* Bug 44331 - HWPFDocument.write destroys fields
*/
public void test44431()
{
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug44431.doc" );
WordExtractor extractor1 = new WordExtractor( doc1 );
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
WordExtractor extractor2 = new WordExtractor( doc2 );
assertEquals( extractor1.getFooterText(), extractor2.getFooterText() );
assertEquals( extractor1.getHeaderText(), extractor2.getHeaderText() );
assertEquals( Arrays.toString( extractor1.getParagraphText() ),
Arrays.toString( extractor2.getParagraphText() ) );
assertEquals( extractor1.getText(), extractor2.getText() );
}
/**
* Bug 45473 - HWPF cannot read file after save
*/
public void test45473()
{
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug45473.doc" );
String text1 = new WordExtractor( doc1 ).getText().trim();
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
String text2 = new WordExtractor( doc2 ).getText().trim();
// the text in the saved document has some differences in line
// separators but we tolerate that
assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) );
}
/**
* Bug 46220 - images are not properly extracted
*/
public void test46220()
{
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46220.doc" );
// reference checksums as in Bugzilla
String[] md5 = { "851be142bce6d01848e730cb6903f39e",
"7fc6d8fb58b09ababd036d10a0e8c039",
"a7dc644c40bc2fbf17b2b62d07f99248",
"72d07b8db5fad7099d90bc4c304b4666" };
List<Picture> pics = doc.getPicturesTable().getAllPictures();
assertEquals( 4, pics.size() );
for ( int i = 0; i < pics.size(); i++ )
{
Picture pic = pics.get( i );
byte[] data = pic.getRawContent();
// use Apache Commons Codec utils to compute md5
assertEquals( md5[i], DigestUtils.md5Hex( data ) );
}
}
/**
* [RESOLVED FIXED] Bug 46817 - Regression: Text from some table cells
* missing
*/
public void test46817()
{
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46817.doc" );
WordExtractor extractor = new WordExtractor( doc );
String text = extractor.getText().trim();
assertTrue( text.contains( "Nazwa wykonawcy" ) );
assertTrue( text.contains( "kujawsko-pomorskie" ) );
assertTrue( text.contains( "ekomel@ekomel.com.pl" ) );
}
/**
* [FAILING] Bug 47286 - Word documents saves in wrong format if source
* contains form elements
*
* @throws IOException
*/
public void test47286() throws IOException
{
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug47286.doc" );
String text1 = new WordExtractor( doc1 ).getText().trim();
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
String text2 = new WordExtractor( doc2 ).getText().trim();
// the text in the saved document has some differences in line
// separators but we tolerate that
assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) );
assertEquals( doc1.getCharacterTable().getTextRuns().size(), doc2
.getCharacterTable().getTextRuns().size() );
List<PlexOfField> expectedFields = doc1.getFieldsTables()
.getFieldsPLCF( FieldsDocumentPart.MAIN );
List<PlexOfField> actualFields = doc2.getFieldsTables().getFieldsPLCF(
FieldsDocumentPart.MAIN );
assertEquals( expectedFields.size(), actualFields.size() );
assertTableStructures( doc1.getRange(), doc2.getRange() );
}
/**
* [RESOLVED FIXED] Bug 47287 - StringIndexOutOfBoundsException in
* CharacterRun.replaceText()
*/
public void test47287()
{
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47287.doc" );
String[] values = { "1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7",
"1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15", };
int usedVal = 0;
String PLACEHOLDER = "\u2002\u2002\u2002\u2002\u2002";
Range r = doc.getRange();
for ( int x = 0; x < r.numSections(); x++ )
{
Section s = r.getSection( x );
for ( int y = 0; y < s.numParagraphs(); y++ )
{
Paragraph p = s.getParagraph( y );
for ( int z = 0; z < p.numCharacterRuns(); z++ )
{
boolean isFound = false;
// character run
CharacterRun run = p.getCharacterRun( z );
// character run text
String text = run.text();
String oldText = text;
int c = text.indexOf( "FORMTEXT " );
if ( c < 0 )
{
int k = text.indexOf( PLACEHOLDER );
if ( k >= 0 )
{
text = text.substring( 0, k ) + values[usedVal]
+ text.substring( k + PLACEHOLDER.length() );
usedVal++;
isFound = true;
}
}
else
{
for ( ; c >= 0; c = text.indexOf( "FORMTEXT ", c
+ "FORMTEXT ".length() ) )
{
int k = text.indexOf( PLACEHOLDER, c );
if ( k >= 0 )
{
text = text.substring( 0, k )
+ values[usedVal]
+ text.substring( k
+ PLACEHOLDER.length() );
usedVal++;
isFound = true;
}
}
}
if ( isFound )
{
run.replaceText( oldText, text, 0 );
}
}
}
}
String docText = r.text();
assertTrue( docText.contains( "1-1" ) );
assertTrue( docText.contains( "1-12" ) );
assertFalse( docText.contains( "1-13" ) );
assertFalse( docText.contains( "1-15" ) );
}
/**
* [RESOLVED FIXED] Bug 47563 - Exception when working with table
*/
public void test47563()
{
test47563_insertTable( 1, 5 );
test47563_insertTable( 1, 6 );
test47563_insertTable( 5, 1 );
test47563_insertTable( 6, 1 );
test47563_insertTable( 2, 2 );
test47563_insertTable( 3, 2 );
test47563_insertTable( 2, 3 );
test47563_insertTable( 3, 3 );
}
/**
* [FAILING] Bug 47731 - Word Extractor considers text copied from some
* website as an embedded object
*/
public void test47731() throws Exception
{
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47731.doc" );
String foundText = new WordExtractor( doc ).getText();
try
{
assertTrue( foundText
.contains( "Soak the rice in water for three to four hours" ) );
fixed( "47731" );
}
catch ( AssertionFailedError exc )
{
// expected
}
}
/**
* Bug 4774 - text extracted by WordExtractor is broken
*/
public void test47742() throws Exception
{
// (1) extract text from MS Word document via POI
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47742.doc" );
String foundText = new WordExtractor( doc ).getText();
// (2) read text from text document (retrieved by saving the word
// document as text file using encoding UTF-8)
InputStream is = POIDataSamples.getDocumentInstance()
.openResourceAsStream( "Bug47742-text.txt" );
byte[] expectedBytes = IOUtils.toByteArray( is );
String expectedText = new String( expectedBytes, "utf-8" )
.substring( 1 ); // strip-off the unicode marker
assertEquals( expectedText, foundText );
}
/**
* [FAILING] Bug 47958 - Exception during Escher walk of pictures
*/
public void test47958()
{
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47958.doc" );
try
{
for ( Picture pic : doc.getPicturesTable().getAllPictures() )
{
System.out.println( pic.suggestFullFileName() );
}
fixed( "47958" );
}
catch ( Exception e )
{
// expected exception
}
}
/**
* [RESOLVED FIXED] Bug 48065 - Problems with save output of HWPF (losing
* formatting)
*/
public void test48065()
{
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug48065.doc" );
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
Range expected = doc1.getRange();
Range actual = doc2.getRange();
assertEquals(
expected.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ),
actual.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ) );
assertTableStructures( expected, actual );
}
public void test49933()
{
HWPFOldDocument document = HWPFTestDataSamples
.openOldSampleFile( "Bug49933.doc" );
Word6Extractor word6Extractor = new Word6Extractor( document );
String text = word6Extractor.getText();
assertTrue( text.contains( "best.wine.jump.ru" ) );
}
/**
* Bug 50936 - HWPF fails to read a file
*/
public void test50936()
{
HWPFTestDataSamples.openSampleFile( "Bug50936.doc" );
}
/**
* [FAILING] Bug 50955 - error while retrieving the text file
*/
public void test50955()
{
try
{
HWPFOldDocument doc = HWPFTestDataSamples
.openOldSampleFile( "Bug50955.doc" );
Word6Extractor extractor = new Word6Extractor( doc );
extractor.getText();
fixed( "50955" );
}
catch ( Exception e )
{
// expected exception
}
}
/**
* Bug 51524 - PapBinTable constructor is slow
*/
public void test51524()
{
HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" );
}
}

View File

@ -17,26 +17,14 @@
package org.apache.poi.hwpf.usermodel;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
import junit.framework.AssertionFailedError;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFOldDocument;
import org.apache.poi.hwpf.HWPFTestCase;
import org.apache.poi.hwpf.HWPFTestDataSamples;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.model.FieldsDocumentPart;
import org.apache.poi.hwpf.model.PlexOfField;
import org.apache.poi.hwpf.model.StyleSheet;
import org.apache.poi.util.IOUtils;
/**
* Test various problem documents
@ -427,410 +415,4 @@ public final class TestProblems extends HWPFTestCase {
assertEquals("Row 3/Cell 3\u0007", cell.text());
}
static void fixed(String bugzillaId) {
throw new Error("Bug " + bugzillaId + " seems to be fixed. " +
"Please resolve the issue in Bugzilla and remove fail() from the test");
}
/**
* Bug 33519 - HWPF fails to read a file
*/
public void test33519() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug33519.doc");
WordExtractor extractor = new WordExtractor(doc);
String text = extractor.getText();
}
/**
* Bug 34898 - WordExtractor doesn't read the whole string from the file
*/
public void test34898() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug34898.doc");
WordExtractor extractor = new WordExtractor(doc);
assertEquals("\u30c7\u30a3\u30ec\u30af\u30c8\u30ea", extractor.getText().trim());
}
/**
* Bug 44331 - HWPFDocument.write destroys fields
*/
public void test44431()
{
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug44431.doc" );
WordExtractor extractor1 = new WordExtractor( doc1 );
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
WordExtractor extractor2 = new WordExtractor( doc2 );
assertEquals( extractor1.getFooterText(), extractor2.getFooterText() );
assertEquals( extractor1.getHeaderText(), extractor2.getHeaderText() );
assertEquals( Arrays.toString( extractor1.getParagraphText() ),
Arrays.toString( extractor2.getParagraphText() ) );
assertEquals( extractor1.getText(), extractor2.getText() );
}
/**
* [RESOLVED FIXED] Bug 46817 - Regression: Text from some table cells
* missing
*/
public void test46817()
{
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46817.doc" );
WordExtractor extractor = new WordExtractor( doc );
String text = extractor.getText().trim();
assertTrue( text.contains( "Nazwa wykonawcy" ) );
assertTrue( text.contains( "kujawsko-pomorskie" ) );
assertTrue( text.contains( "ekomel@ekomel.com.pl" ) );
}
/**
* Bug 46220 - images are not properly extracted
*/
public void test46220() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug46220.doc");
// reference checksums as in Bugzilla
String[] md5 = {
"851be142bce6d01848e730cb6903f39e",
"7fc6d8fb58b09ababd036d10a0e8c039",
"a7dc644c40bc2fbf17b2b62d07f99248",
"72d07b8db5fad7099d90bc4c304b4666"
};
List<Picture> pics = doc.getPicturesTable().getAllPictures();
assertEquals(4, pics.size());
for (int i = 0; i < pics.size(); i++) {
Picture pic = pics.get(i);
byte[] data = pic.getRawContent();
// use Apache Commons Codec utils to compute md5
assertEquals(md5[i], DigestUtils.md5Hex(data));
}
}
/**
* Bug 45473 - HWPF cannot read file after save
*/
public void test45473() {
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug45473.doc");
String text1 = new WordExtractor(doc1).getText().trim();
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
String text2 = new WordExtractor(doc2).getText().trim();
// the text in the saved document has some differences in line separators but we tolerate that
assertEquals(text1.replaceAll("\n", ""), text2.replaceAll("\n", ""));
}
/**
* [FAILING] Bug 47286 - Word documents saves in wrong format if source
* contains form elements
*
* @throws IOException
*/
public void test47286() throws IOException
{
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug47286.doc" );
String text1 = new WordExtractor( doc1 ).getText().trim();
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
String text2 = new WordExtractor( doc2 ).getText().trim();
// the text in the saved document has some differences in line
// separators but we tolerate that
assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) );
assertEquals( doc1.getCharacterTable().getTextRuns().size(), doc2
.getCharacterTable().getTextRuns().size() );
List<PlexOfField> expectedFields = doc1.getFieldsTables()
.getFieldsPLCF( FieldsDocumentPart.MAIN );
List<PlexOfField> actualFields = doc2.getFieldsTables().getFieldsPLCF(
FieldsDocumentPart.MAIN );
assertEquals( expectedFields.size(), actualFields.size() );
assertTableStructures( doc1.getRange(), doc2.getRange() );
}
/**
* [RESOLVED FIXED] Bug 47287 - StringIndexOutOfBoundsException in
* CharacterRun.replaceText()
*/
public void test47287()
{
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47287.doc" );
String[] values = { "1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7",
"1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15", };
int usedVal = 0;
String PLACEHOLDER = "\u2002\u2002\u2002\u2002\u2002";
Range r = doc.getRange();
for ( int x = 0; x < r.numSections(); x++ )
{
Section s = r.getSection( x );
for ( int y = 0; y < s.numParagraphs(); y++ )
{
Paragraph p = s.getParagraph( y );
for ( int z = 0; z < p.numCharacterRuns(); z++ )
{
boolean isFound = false;
// character run
CharacterRun run = p.getCharacterRun( z );
// character run text
String text = run.text();
String oldText = text;
int c = text.indexOf( "FORMTEXT " );
if ( c < 0 )
{
int k = text.indexOf( PLACEHOLDER );
if ( k >= 0 )
{
text = text.substring( 0, k ) + values[usedVal]
+ text.substring( k + PLACEHOLDER.length() );
usedVal++;
isFound = true;
}
}
else
{
for ( ; c >= 0; c = text.indexOf( "FORMTEXT ", c
+ "FORMTEXT ".length() ) )
{
int k = text.indexOf( PLACEHOLDER, c );
if ( k >= 0 )
{
text = text.substring( 0, k )
+ values[usedVal]
+ text.substring( k
+ PLACEHOLDER.length() );
usedVal++;
isFound = true;
}
}
}
if ( isFound )
{
run.replaceText( oldText, text, 0 );
}
}
}
}
String docText = r.text();
assertTrue( docText.contains( "1-1" ) );
assertTrue( docText.contains( "1-12" ) );
assertFalse( docText.contains( "1-13" ) );
assertFalse( docText.contains( "1-15" ) );
}
private static void insertTable( int rows, int columns )
{
// POI apparently can't create a document from scratch,
// so we need an existing empty dummy document
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "empty.doc" );
Range range = doc.getRange();
Table table = range.insertBefore(
new TableProperties( (short) columns ), rows );
table.sanityCheck();
range.sanityCheck();
for ( int rowIdx = 0; rowIdx < table.numRows(); rowIdx++ )
{
TableRow row = table.getRow( rowIdx );
row.sanityCheck();
for ( int colIdx = 0; colIdx < row.numCells(); colIdx++ )
{
TableCell cell = row.getCell( colIdx );
cell.sanityCheck();
Paragraph par = cell.getParagraph( 0 );
par.sanityCheck();
par.insertBefore( "" + ( rowIdx * row.numCells() + colIdx ) );
par.sanityCheck();
cell.sanityCheck();
row.sanityCheck();
table.sanityCheck();
range.sanityCheck();
}
}
String text = range.text();
int mustBeAfter = 0;
for ( int i = 0; i < rows * columns; i++ )
{
int next = text.indexOf( Integer.toString( i ), mustBeAfter );
assertFalse( next == -1 );
mustBeAfter = next;
}
}
/**
* [RESOLVED FIXED] Bug 47563 - Exception when working with table
*/
public void test47563()
{
insertTable( 1, 5 );
insertTable( 1, 6 );
insertTable( 5, 1 );
insertTable( 6, 1 );
insertTable( 2, 2 );
insertTable( 3, 2 );
insertTable( 2, 3 );
insertTable( 3, 3 );
}
/**
* [FAILING] Bug 47731 - Word Extractor considers text copied from some
* website as an embedded object
*/
public void test47731() throws Exception
{
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47731.doc" );
String foundText = new WordExtractor( doc ).getText();
try
{
assertTrue( foundText
.contains( "Soak the rice in water for three to four hours" ) );
fixed( "47731" );
}
catch ( AssertionFailedError exc )
{
// expected
}
}
/**
* Bug 4774 - text extracted by WordExtractor is broken
*/
public void test47742() throws Exception {
// (1) extract text from MS Word document via POI
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47742.doc");
String foundText = new WordExtractor(doc).getText();
// (2) read text from text document (retrieved by saving the word
// document as text file using encoding UTF-8)
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("Bug47742-text.txt");
byte[] expectedBytes = IOUtils.toByteArray(is);
String expectedText = new String(expectedBytes, "utf-8").substring(1); // strip-off the unicode marker
assertEquals(expectedText, foundText);
}
/**
* [FAILING] Bug 47958 - Exception during Escher walk of pictures
*/
public void test47958() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47958.doc");
try {
for (Picture pic : doc.getPicturesTable().getAllPictures()) {
System.out.println(pic.suggestFullFileName());
}
fixed("47958");
} catch (Exception e) {
// expected exception
}
}
/**
* [RESOLVED FIXED] Bug 48065 - Problems with save output of HWPF (losing
* formatting)
*/
public void test48065()
{
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug48065.doc" );
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
Range expected = doc1.getRange();
Range actual = doc2.getRange();
assertEquals(
expected.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ),
actual.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ) );
assertTableStructures( expected, actual );
}
private static void assertTableStructures( Range expected, Range actual )
{
assertEquals( expected.numParagraphs(), actual.numParagraphs() );
for ( int p = 0; p < expected.numParagraphs(); p++ )
{
Paragraph expParagraph = expected.getParagraph( p );
Paragraph actParagraph = actual.getParagraph( p );
assertEquals( expParagraph.text(), actParagraph.text() );
assertEquals( "Diffent isInTable flags for paragraphs #" + p
+ " -- " + expParagraph + " -- " + actParagraph + ".",
expParagraph.isInTable(), actParagraph.isInTable() );
assertEquals( expParagraph.isTableRowEnd(),
actParagraph.isTableRowEnd() );
if ( expParagraph.isInTable() && actParagraph.isInTable() )
{
Table expTable, actTable;
try
{
expTable = expected.getTable( expParagraph );
actTable = actual.getTable( actParagraph );
}
catch ( Exception exc )
{
continue;
}
assertEquals( expTable.numRows(), actTable.numRows() );
assertEquals( expTable.numParagraphs(),
actTable.numParagraphs() );
}
}
}
public void test49933()
{
HWPFOldDocument document = HWPFTestDataSamples
.openOldSampleFile( "Bug49933.doc" );
Word6Extractor word6Extractor = new Word6Extractor( document );
String text = word6Extractor.getText();
assertTrue( text.contains( "best.wine.jump.ru" ) );
}
/**
* Bug 50936 - HWPF fails to read a file
*/
public void test50936() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug50936.doc");
}
/**
* [FAILING] Bug 50955 - error while retrieving the text file
*/
public void test50955() {
try {
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug50955.doc");
Word6Extractor extractor = new Word6Extractor(doc);
String text = extractor.getText();
fixed("50955");
} catch (Exception e) {
// expected exception
}
}
/**
* Bug 51524 - PapBinTable constructor is slow
*/
public void test51524()
{
HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" );
}
}