mirror of https://github.com/apache/poi.git
extract Bugzilla tests from TestProblems into TestBugs
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1155205 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2d5db78a3b
commit
e544419a22
|
@ -0,0 +1,476 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.usermodel;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import junit.framework.AssertionFailedError;
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFOldDocument;
|
||||
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
||||
import org.apache.poi.hwpf.extractor.Word6Extractor;
|
||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||
import org.apache.poi.hwpf.model.FieldsDocumentPart;
|
||||
import org.apache.poi.hwpf.model.PlexOfField;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Test different problems reported in Apache Bugzilla
|
||||
*
|
||||
* @author Nick Burch (nick at torchbox dot com)
|
||||
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
|
||||
*/
|
||||
public class TestBugs extends TestCase
|
||||
{
|
||||
|
||||
private static void assertTableStructures( Range expected, Range actual )
|
||||
{
|
||||
assertEquals( expected.numParagraphs(), actual.numParagraphs() );
|
||||
for ( int p = 0; p < expected.numParagraphs(); p++ )
|
||||
{
|
||||
Paragraph expParagraph = expected.getParagraph( p );
|
||||
Paragraph actParagraph = actual.getParagraph( p );
|
||||
|
||||
assertEquals( expParagraph.text(), actParagraph.text() );
|
||||
assertEquals( "Diffent isInTable flags for paragraphs #" + p
|
||||
+ " -- " + expParagraph + " -- " + actParagraph + ".",
|
||||
expParagraph.isInTable(), actParagraph.isInTable() );
|
||||
assertEquals( expParagraph.isTableRowEnd(),
|
||||
actParagraph.isTableRowEnd() );
|
||||
|
||||
if ( expParagraph.isInTable() && actParagraph.isInTable() )
|
||||
{
|
||||
Table expTable, actTable;
|
||||
try
|
||||
{
|
||||
expTable = expected.getTable( expParagraph );
|
||||
actTable = actual.getTable( actParagraph );
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
assertEquals( expTable.numRows(), actTable.numRows() );
|
||||
assertEquals( expTable.numParagraphs(),
|
||||
actTable.numParagraphs() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void fixed( String bugzillaId )
|
||||
{
|
||||
throw new Error(
|
||||
"Bug "
|
||||
+ bugzillaId
|
||||
+ " seems to be fixed. "
|
||||
+ "Please resolve the issue in Bugzilla and remove fail() from the test" );
|
||||
}
|
||||
|
||||
private static void test47563_insertTable( int rows, int columns )
|
||||
{
|
||||
// POI apparently can't create a document from scratch,
|
||||
// so we need an existing empty dummy document
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "empty.doc" );
|
||||
|
||||
Range range = doc.getRange();
|
||||
Table table = range.insertBefore(
|
||||
new TableProperties( (short) columns ), rows );
|
||||
table.sanityCheck();
|
||||
range.sanityCheck();
|
||||
|
||||
for ( int rowIdx = 0; rowIdx < table.numRows(); rowIdx++ )
|
||||
{
|
||||
TableRow row = table.getRow( rowIdx );
|
||||
row.sanityCheck();
|
||||
for ( int colIdx = 0; colIdx < row.numCells(); colIdx++ )
|
||||
{
|
||||
TableCell cell = row.getCell( colIdx );
|
||||
cell.sanityCheck();
|
||||
|
||||
Paragraph par = cell.getParagraph( 0 );
|
||||
par.sanityCheck();
|
||||
|
||||
par.insertBefore( "" + ( rowIdx * row.numCells() + colIdx ) );
|
||||
|
||||
par.sanityCheck();
|
||||
cell.sanityCheck();
|
||||
row.sanityCheck();
|
||||
table.sanityCheck();
|
||||
range.sanityCheck();
|
||||
}
|
||||
}
|
||||
|
||||
String text = range.text();
|
||||
int mustBeAfter = 0;
|
||||
for ( int i = 0; i < rows * columns; i++ )
|
||||
{
|
||||
int next = text.indexOf( Integer.toString( i ), mustBeAfter );
|
||||
assertFalse( next == -1 );
|
||||
mustBeAfter = next;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 33519 - HWPF fails to read a file
|
||||
*/
|
||||
public void test33519()
|
||||
{
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug33519.doc" );
|
||||
WordExtractor extractor = new WordExtractor( doc );
|
||||
extractor.getText();
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 34898 - WordExtractor doesn't read the whole string from the file
|
||||
*/
|
||||
public void test34898()
|
||||
{
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug34898.doc" );
|
||||
WordExtractor extractor = new WordExtractor( doc );
|
||||
assertEquals( "\u30c7\u30a3\u30ec\u30af\u30c8\u30ea", extractor
|
||||
.getText().trim() );
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 44331 - HWPFDocument.write destroys fields
|
||||
*/
|
||||
public void test44431()
|
||||
{
|
||||
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug44431.doc" );
|
||||
WordExtractor extractor1 = new WordExtractor( doc1 );
|
||||
|
||||
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
|
||||
WordExtractor extractor2 = new WordExtractor( doc2 );
|
||||
|
||||
assertEquals( extractor1.getFooterText(), extractor2.getFooterText() );
|
||||
assertEquals( extractor1.getHeaderText(), extractor2.getHeaderText() );
|
||||
assertEquals( Arrays.toString( extractor1.getParagraphText() ),
|
||||
Arrays.toString( extractor2.getParagraphText() ) );
|
||||
|
||||
assertEquals( extractor1.getText(), extractor2.getText() );
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 45473 - HWPF cannot read file after save
|
||||
*/
|
||||
public void test45473()
|
||||
{
|
||||
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug45473.doc" );
|
||||
String text1 = new WordExtractor( doc1 ).getText().trim();
|
||||
|
||||
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
|
||||
String text2 = new WordExtractor( doc2 ).getText().trim();
|
||||
|
||||
// the text in the saved document has some differences in line
|
||||
// separators but we tolerate that
|
||||
assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 46220 - images are not properly extracted
|
||||
*/
|
||||
public void test46220()
|
||||
{
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46220.doc" );
|
||||
// reference checksums as in Bugzilla
|
||||
String[] md5 = { "851be142bce6d01848e730cb6903f39e",
|
||||
"7fc6d8fb58b09ababd036d10a0e8c039",
|
||||
"a7dc644c40bc2fbf17b2b62d07f99248",
|
||||
"72d07b8db5fad7099d90bc4c304b4666" };
|
||||
List<Picture> pics = doc.getPicturesTable().getAllPictures();
|
||||
assertEquals( 4, pics.size() );
|
||||
for ( int i = 0; i < pics.size(); i++ )
|
||||
{
|
||||
Picture pic = pics.get( i );
|
||||
byte[] data = pic.getRawContent();
|
||||
// use Apache Commons Codec utils to compute md5
|
||||
assertEquals( md5[i], DigestUtils.md5Hex( data ) );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* [RESOLVED FIXED] Bug 46817 - Regression: Text from some table cells
|
||||
* missing
|
||||
*/
|
||||
public void test46817()
|
||||
{
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46817.doc" );
|
||||
WordExtractor extractor = new WordExtractor( doc );
|
||||
String text = extractor.getText().trim();
|
||||
|
||||
assertTrue( text.contains( "Nazwa wykonawcy" ) );
|
||||
assertTrue( text.contains( "kujawsko-pomorskie" ) );
|
||||
assertTrue( text.contains( "ekomel@ekomel.com.pl" ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* [FAILING] Bug 47286 - Word documents saves in wrong format if source
|
||||
* contains form elements
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public void test47286() throws IOException
|
||||
{
|
||||
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug47286.doc" );
|
||||
String text1 = new WordExtractor( doc1 ).getText().trim();
|
||||
|
||||
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
|
||||
String text2 = new WordExtractor( doc2 ).getText().trim();
|
||||
|
||||
// the text in the saved document has some differences in line
|
||||
// separators but we tolerate that
|
||||
assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) );
|
||||
|
||||
assertEquals( doc1.getCharacterTable().getTextRuns().size(), doc2
|
||||
.getCharacterTable().getTextRuns().size() );
|
||||
|
||||
List<PlexOfField> expectedFields = doc1.getFieldsTables()
|
||||
.getFieldsPLCF( FieldsDocumentPart.MAIN );
|
||||
List<PlexOfField> actualFields = doc2.getFieldsTables().getFieldsPLCF(
|
||||
FieldsDocumentPart.MAIN );
|
||||
assertEquals( expectedFields.size(), actualFields.size() );
|
||||
|
||||
assertTableStructures( doc1.getRange(), doc2.getRange() );
|
||||
}
|
||||
|
||||
/**
|
||||
* [RESOLVED FIXED] Bug 47287 - StringIndexOutOfBoundsException in
|
||||
* CharacterRun.replaceText()
|
||||
*/
|
||||
public void test47287()
|
||||
{
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47287.doc" );
|
||||
String[] values = { "1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7",
|
||||
"1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15", };
|
||||
int usedVal = 0;
|
||||
String PLACEHOLDER = "\u2002\u2002\u2002\u2002\u2002";
|
||||
Range r = doc.getRange();
|
||||
for ( int x = 0; x < r.numSections(); x++ )
|
||||
{
|
||||
Section s = r.getSection( x );
|
||||
for ( int y = 0; y < s.numParagraphs(); y++ )
|
||||
{
|
||||
Paragraph p = s.getParagraph( y );
|
||||
|
||||
for ( int z = 0; z < p.numCharacterRuns(); z++ )
|
||||
{
|
||||
boolean isFound = false;
|
||||
|
||||
// character run
|
||||
CharacterRun run = p.getCharacterRun( z );
|
||||
// character run text
|
||||
String text = run.text();
|
||||
String oldText = text;
|
||||
int c = text.indexOf( "FORMTEXT " );
|
||||
if ( c < 0 )
|
||||
{
|
||||
int k = text.indexOf( PLACEHOLDER );
|
||||
if ( k >= 0 )
|
||||
{
|
||||
text = text.substring( 0, k ) + values[usedVal]
|
||||
+ text.substring( k + PLACEHOLDER.length() );
|
||||
usedVal++;
|
||||
isFound = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for ( ; c >= 0; c = text.indexOf( "FORMTEXT ", c
|
||||
+ "FORMTEXT ".length() ) )
|
||||
{
|
||||
int k = text.indexOf( PLACEHOLDER, c );
|
||||
if ( k >= 0 )
|
||||
{
|
||||
text = text.substring( 0, k )
|
||||
+ values[usedVal]
|
||||
+ text.substring( k
|
||||
+ PLACEHOLDER.length() );
|
||||
usedVal++;
|
||||
isFound = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( isFound )
|
||||
{
|
||||
run.replaceText( oldText, text, 0 );
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String docText = r.text();
|
||||
|
||||
assertTrue( docText.contains( "1-1" ) );
|
||||
assertTrue( docText.contains( "1-12" ) );
|
||||
|
||||
assertFalse( docText.contains( "1-13" ) );
|
||||
assertFalse( docText.contains( "1-15" ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* [RESOLVED FIXED] Bug 47563 - Exception when working with table
|
||||
*/
|
||||
public void test47563()
|
||||
{
|
||||
test47563_insertTable( 1, 5 );
|
||||
test47563_insertTable( 1, 6 );
|
||||
test47563_insertTable( 5, 1 );
|
||||
test47563_insertTable( 6, 1 );
|
||||
test47563_insertTable( 2, 2 );
|
||||
test47563_insertTable( 3, 2 );
|
||||
test47563_insertTable( 2, 3 );
|
||||
test47563_insertTable( 3, 3 );
|
||||
}
|
||||
|
||||
/**
|
||||
* [FAILING] Bug 47731 - Word Extractor considers text copied from some
|
||||
* website as an embedded object
|
||||
*/
|
||||
public void test47731() throws Exception
|
||||
{
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47731.doc" );
|
||||
String foundText = new WordExtractor( doc ).getText();
|
||||
|
||||
try
|
||||
{
|
||||
assertTrue( foundText
|
||||
.contains( "Soak the rice in water for three to four hours" ) );
|
||||
|
||||
fixed( "47731" );
|
||||
}
|
||||
catch ( AssertionFailedError exc )
|
||||
{
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 4774 - text extracted by WordExtractor is broken
|
||||
*/
|
||||
public void test47742() throws Exception
|
||||
{
|
||||
|
||||
// (1) extract text from MS Word document via POI
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47742.doc" );
|
||||
String foundText = new WordExtractor( doc ).getText();
|
||||
|
||||
// (2) read text from text document (retrieved by saving the word
|
||||
// document as text file using encoding UTF-8)
|
||||
InputStream is = POIDataSamples.getDocumentInstance()
|
||||
.openResourceAsStream( "Bug47742-text.txt" );
|
||||
byte[] expectedBytes = IOUtils.toByteArray( is );
|
||||
String expectedText = new String( expectedBytes, "utf-8" )
|
||||
.substring( 1 ); // strip-off the unicode marker
|
||||
|
||||
assertEquals( expectedText, foundText );
|
||||
}
|
||||
|
||||
/**
|
||||
* [FAILING] Bug 47958 - Exception during Escher walk of pictures
|
||||
*/
|
||||
public void test47958()
|
||||
{
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47958.doc" );
|
||||
try
|
||||
{
|
||||
for ( Picture pic : doc.getPicturesTable().getAllPictures() )
|
||||
{
|
||||
System.out.println( pic.suggestFullFileName() );
|
||||
}
|
||||
fixed( "47958" );
|
||||
}
|
||||
catch ( Exception e )
|
||||
{
|
||||
// expected exception
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* [RESOLVED FIXED] Bug 48065 - Problems with save output of HWPF (losing
|
||||
* formatting)
|
||||
*/
|
||||
public void test48065()
|
||||
{
|
||||
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug48065.doc" );
|
||||
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
|
||||
|
||||
Range expected = doc1.getRange();
|
||||
Range actual = doc2.getRange();
|
||||
|
||||
assertEquals(
|
||||
expected.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ),
|
||||
actual.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ) );
|
||||
|
||||
assertTableStructures( expected, actual );
|
||||
}
|
||||
|
||||
public void test49933()
|
||||
{
|
||||
HWPFOldDocument document = HWPFTestDataSamples
|
||||
.openOldSampleFile( "Bug49933.doc" );
|
||||
|
||||
Word6Extractor word6Extractor = new Word6Extractor( document );
|
||||
String text = word6Extractor.getText();
|
||||
|
||||
assertTrue( text.contains( "best.wine.jump.ru" ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 50936 - HWPF fails to read a file
|
||||
*/
|
||||
public void test50936()
|
||||
{
|
||||
HWPFTestDataSamples.openSampleFile( "Bug50936.doc" );
|
||||
}
|
||||
|
||||
/**
|
||||
* [FAILING] Bug 50955 - error while retrieving the text file
|
||||
*/
|
||||
public void test50955()
|
||||
{
|
||||
try
|
||||
{
|
||||
HWPFOldDocument doc = HWPFTestDataSamples
|
||||
.openOldSampleFile( "Bug50955.doc" );
|
||||
Word6Extractor extractor = new Word6Extractor( doc );
|
||||
extractor.getText();
|
||||
|
||||
fixed( "50955" );
|
||||
}
|
||||
catch ( Exception e )
|
||||
{
|
||||
// expected exception
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 51524 - PapBinTable constructor is slow
|
||||
*/
|
||||
public void test51524()
|
||||
{
|
||||
HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" );
|
||||
}
|
||||
|
||||
}
|
|
@ -17,26 +17,14 @@
|
|||
|
||||
package org.apache.poi.hwpf.usermodel;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import junit.framework.AssertionFailedError;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.poi.EncryptedDocumentException;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFOldDocument;
|
||||
import org.apache.poi.hwpf.HWPFTestCase;
|
||||
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
||||
import org.apache.poi.hwpf.extractor.Word6Extractor;
|
||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||
import org.apache.poi.hwpf.model.FieldsDocumentPart;
|
||||
import org.apache.poi.hwpf.model.PlexOfField;
|
||||
import org.apache.poi.hwpf.model.StyleSheet;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Test various problem documents
|
||||
|
@ -427,410 +415,4 @@ public final class TestProblems extends HWPFTestCase {
|
|||
assertEquals("Row 3/Cell 3\u0007", cell.text());
|
||||
}
|
||||
|
||||
static void fixed(String bugzillaId) {
|
||||
throw new Error("Bug " + bugzillaId + " seems to be fixed. " +
|
||||
"Please resolve the issue in Bugzilla and remove fail() from the test");
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 33519 - HWPF fails to read a file
|
||||
*/
|
||||
public void test33519() {
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug33519.doc");
|
||||
WordExtractor extractor = new WordExtractor(doc);
|
||||
String text = extractor.getText();
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 34898 - WordExtractor doesn't read the whole string from the file
|
||||
*/
|
||||
public void test34898() {
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug34898.doc");
|
||||
WordExtractor extractor = new WordExtractor(doc);
|
||||
assertEquals("\u30c7\u30a3\u30ec\u30af\u30c8\u30ea", extractor.getText().trim());
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 44331 - HWPFDocument.write destroys fields
|
||||
*/
|
||||
public void test44431()
|
||||
{
|
||||
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug44431.doc" );
|
||||
WordExtractor extractor1 = new WordExtractor( doc1 );
|
||||
|
||||
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
|
||||
WordExtractor extractor2 = new WordExtractor( doc2 );
|
||||
|
||||
assertEquals( extractor1.getFooterText(), extractor2.getFooterText() );
|
||||
assertEquals( extractor1.getHeaderText(), extractor2.getHeaderText() );
|
||||
assertEquals( Arrays.toString( extractor1.getParagraphText() ),
|
||||
Arrays.toString( extractor2.getParagraphText() ) );
|
||||
|
||||
assertEquals( extractor1.getText(), extractor2.getText() );
|
||||
}
|
||||
|
||||
/**
|
||||
* [RESOLVED FIXED] Bug 46817 - Regression: Text from some table cells
|
||||
* missing
|
||||
*/
|
||||
public void test46817()
|
||||
{
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46817.doc" );
|
||||
WordExtractor extractor = new WordExtractor( doc );
|
||||
String text = extractor.getText().trim();
|
||||
|
||||
assertTrue( text.contains( "Nazwa wykonawcy" ) );
|
||||
assertTrue( text.contains( "kujawsko-pomorskie" ) );
|
||||
assertTrue( text.contains( "ekomel@ekomel.com.pl" ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 46220 - images are not properly extracted
|
||||
*/
|
||||
public void test46220() {
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug46220.doc");
|
||||
// reference checksums as in Bugzilla
|
||||
String[] md5 = {
|
||||
"851be142bce6d01848e730cb6903f39e",
|
||||
"7fc6d8fb58b09ababd036d10a0e8c039",
|
||||
"a7dc644c40bc2fbf17b2b62d07f99248",
|
||||
"72d07b8db5fad7099d90bc4c304b4666"
|
||||
};
|
||||
List<Picture> pics = doc.getPicturesTable().getAllPictures();
|
||||
assertEquals(4, pics.size());
|
||||
for (int i = 0; i < pics.size(); i++) {
|
||||
Picture pic = pics.get(i);
|
||||
byte[] data = pic.getRawContent();
|
||||
// use Apache Commons Codec utils to compute md5
|
||||
assertEquals(md5[i], DigestUtils.md5Hex(data));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 45473 - HWPF cannot read file after save
|
||||
*/
|
||||
public void test45473() {
|
||||
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug45473.doc");
|
||||
String text1 = new WordExtractor(doc1).getText().trim();
|
||||
|
||||
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
|
||||
String text2 = new WordExtractor(doc2).getText().trim();
|
||||
|
||||
// the text in the saved document has some differences in line separators but we tolerate that
|
||||
assertEquals(text1.replaceAll("\n", ""), text2.replaceAll("\n", ""));
|
||||
}
|
||||
|
||||
/**
|
||||
* [FAILING] Bug 47286 - Word documents saves in wrong format if source
|
||||
* contains form elements
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public void test47286() throws IOException
|
||||
{
|
||||
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug47286.doc" );
|
||||
String text1 = new WordExtractor( doc1 ).getText().trim();
|
||||
|
||||
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
|
||||
String text2 = new WordExtractor( doc2 ).getText().trim();
|
||||
|
||||
// the text in the saved document has some differences in line
|
||||
// separators but we tolerate that
|
||||
assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) );
|
||||
|
||||
assertEquals( doc1.getCharacterTable().getTextRuns().size(), doc2
|
||||
.getCharacterTable().getTextRuns().size() );
|
||||
|
||||
List<PlexOfField> expectedFields = doc1.getFieldsTables()
|
||||
.getFieldsPLCF( FieldsDocumentPart.MAIN );
|
||||
List<PlexOfField> actualFields = doc2.getFieldsTables().getFieldsPLCF(
|
||||
FieldsDocumentPart.MAIN );
|
||||
assertEquals( expectedFields.size(), actualFields.size() );
|
||||
|
||||
assertTableStructures( doc1.getRange(), doc2.getRange() );
|
||||
}
|
||||
|
||||
/**
|
||||
* [RESOLVED FIXED] Bug 47287 - StringIndexOutOfBoundsException in
|
||||
* CharacterRun.replaceText()
|
||||
*/
|
||||
public void test47287()
|
||||
{
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47287.doc" );
|
||||
String[] values = { "1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7",
|
||||
"1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15", };
|
||||
int usedVal = 0;
|
||||
String PLACEHOLDER = "\u2002\u2002\u2002\u2002\u2002";
|
||||
Range r = doc.getRange();
|
||||
for ( int x = 0; x < r.numSections(); x++ )
|
||||
{
|
||||
Section s = r.getSection( x );
|
||||
for ( int y = 0; y < s.numParagraphs(); y++ )
|
||||
{
|
||||
Paragraph p = s.getParagraph( y );
|
||||
|
||||
for ( int z = 0; z < p.numCharacterRuns(); z++ )
|
||||
{
|
||||
boolean isFound = false;
|
||||
|
||||
// character run
|
||||
CharacterRun run = p.getCharacterRun( z );
|
||||
// character run text
|
||||
String text = run.text();
|
||||
String oldText = text;
|
||||
int c = text.indexOf( "FORMTEXT " );
|
||||
if ( c < 0 )
|
||||
{
|
||||
int k = text.indexOf( PLACEHOLDER );
|
||||
if ( k >= 0 )
|
||||
{
|
||||
text = text.substring( 0, k ) + values[usedVal]
|
||||
+ text.substring( k + PLACEHOLDER.length() );
|
||||
usedVal++;
|
||||
isFound = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for ( ; c >= 0; c = text.indexOf( "FORMTEXT ", c
|
||||
+ "FORMTEXT ".length() ) )
|
||||
{
|
||||
int k = text.indexOf( PLACEHOLDER, c );
|
||||
if ( k >= 0 )
|
||||
{
|
||||
text = text.substring( 0, k )
|
||||
+ values[usedVal]
|
||||
+ text.substring( k
|
||||
+ PLACEHOLDER.length() );
|
||||
usedVal++;
|
||||
isFound = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( isFound )
|
||||
{
|
||||
run.replaceText( oldText, text, 0 );
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String docText = r.text();
|
||||
|
||||
assertTrue( docText.contains( "1-1" ) );
|
||||
assertTrue( docText.contains( "1-12" ) );
|
||||
|
||||
assertFalse( docText.contains( "1-13" ) );
|
||||
assertFalse( docText.contains( "1-15" ) );
|
||||
}
|
||||
|
||||
private static void insertTable( int rows, int columns )
|
||||
{
|
||||
// POI apparently can't create a document from scratch,
|
||||
// so we need an existing empty dummy document
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "empty.doc" );
|
||||
|
||||
Range range = doc.getRange();
|
||||
Table table = range.insertBefore(
|
||||
new TableProperties( (short) columns ), rows );
|
||||
table.sanityCheck();
|
||||
range.sanityCheck();
|
||||
|
||||
for ( int rowIdx = 0; rowIdx < table.numRows(); rowIdx++ )
|
||||
{
|
||||
TableRow row = table.getRow( rowIdx );
|
||||
row.sanityCheck();
|
||||
for ( int colIdx = 0; colIdx < row.numCells(); colIdx++ )
|
||||
{
|
||||
TableCell cell = row.getCell( colIdx );
|
||||
cell.sanityCheck();
|
||||
|
||||
Paragraph par = cell.getParagraph( 0 );
|
||||
par.sanityCheck();
|
||||
|
||||
par.insertBefore( "" + ( rowIdx * row.numCells() + colIdx ) );
|
||||
|
||||
par.sanityCheck();
|
||||
cell.sanityCheck();
|
||||
row.sanityCheck();
|
||||
table.sanityCheck();
|
||||
range.sanityCheck();
|
||||
}
|
||||
}
|
||||
|
||||
String text = range.text();
|
||||
int mustBeAfter = 0;
|
||||
for ( int i = 0; i < rows * columns; i++ )
|
||||
{
|
||||
int next = text.indexOf( Integer.toString( i ), mustBeAfter );
|
||||
assertFalse( next == -1 );
|
||||
mustBeAfter = next;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* [RESOLVED FIXED] Bug 47563 - Exception when working with table
|
||||
*/
|
||||
public void test47563()
|
||||
{
|
||||
insertTable( 1, 5 );
|
||||
insertTable( 1, 6 );
|
||||
insertTable( 5, 1 );
|
||||
insertTable( 6, 1 );
|
||||
insertTable( 2, 2 );
|
||||
insertTable( 3, 2 );
|
||||
insertTable( 2, 3 );
|
||||
insertTable( 3, 3 );
|
||||
}
|
||||
|
||||
/**
|
||||
* [FAILING] Bug 47731 - Word Extractor considers text copied from some
|
||||
* website as an embedded object
|
||||
*/
|
||||
public void test47731() throws Exception
|
||||
{
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47731.doc" );
|
||||
String foundText = new WordExtractor( doc ).getText();
|
||||
|
||||
try
|
||||
{
|
||||
assertTrue( foundText
|
||||
.contains( "Soak the rice in water for three to four hours" ) );
|
||||
|
||||
fixed( "47731" );
|
||||
}
|
||||
catch ( AssertionFailedError exc )
|
||||
{
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 4774 - text extracted by WordExtractor is broken
|
||||
*/
|
||||
public void test47742() throws Exception {
|
||||
|
||||
// (1) extract text from MS Word document via POI
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47742.doc");
|
||||
String foundText = new WordExtractor(doc).getText();
|
||||
|
||||
// (2) read text from text document (retrieved by saving the word
|
||||
// document as text file using encoding UTF-8)
|
||||
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("Bug47742-text.txt");
|
||||
byte[] expectedBytes = IOUtils.toByteArray(is);
|
||||
String expectedText = new String(expectedBytes, "utf-8").substring(1); // strip-off the unicode marker
|
||||
|
||||
assertEquals(expectedText, foundText);
|
||||
}
|
||||
|
||||
/**
|
||||
* [FAILING] Bug 47958 - Exception during Escher walk of pictures
|
||||
*/
|
||||
public void test47958() {
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47958.doc");
|
||||
try {
|
||||
for (Picture pic : doc.getPicturesTable().getAllPictures()) {
|
||||
System.out.println(pic.suggestFullFileName());
|
||||
}
|
||||
fixed("47958");
|
||||
} catch (Exception e) {
|
||||
// expected exception
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* [RESOLVED FIXED] Bug 48065 - Problems with save output of HWPF (losing
|
||||
* formatting)
|
||||
*/
|
||||
public void test48065()
|
||||
{
|
||||
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug48065.doc" );
|
||||
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
|
||||
|
||||
Range expected = doc1.getRange();
|
||||
Range actual = doc2.getRange();
|
||||
|
||||
assertEquals(
|
||||
expected.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ),
|
||||
actual.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ) );
|
||||
|
||||
assertTableStructures( expected, actual );
|
||||
}
|
||||
|
||||
private static void assertTableStructures( Range expected, Range actual )
|
||||
{
|
||||
assertEquals( expected.numParagraphs(), actual.numParagraphs() );
|
||||
for ( int p = 0; p < expected.numParagraphs(); p++ )
|
||||
{
|
||||
Paragraph expParagraph = expected.getParagraph( p );
|
||||
Paragraph actParagraph = actual.getParagraph( p );
|
||||
|
||||
assertEquals( expParagraph.text(), actParagraph.text() );
|
||||
assertEquals( "Diffent isInTable flags for paragraphs #" + p
|
||||
+ " -- " + expParagraph + " -- " + actParagraph + ".",
|
||||
expParagraph.isInTable(), actParagraph.isInTable() );
|
||||
assertEquals( expParagraph.isTableRowEnd(),
|
||||
actParagraph.isTableRowEnd() );
|
||||
|
||||
if ( expParagraph.isInTable() && actParagraph.isInTable() )
|
||||
{
|
||||
Table expTable, actTable;
|
||||
try
|
||||
{
|
||||
expTable = expected.getTable( expParagraph );
|
||||
actTable = actual.getTable( actParagraph );
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
assertEquals( expTable.numRows(), actTable.numRows() );
|
||||
assertEquals( expTable.numParagraphs(),
|
||||
actTable.numParagraphs() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void test49933()
|
||||
{
|
||||
HWPFOldDocument document = HWPFTestDataSamples
|
||||
.openOldSampleFile( "Bug49933.doc" );
|
||||
|
||||
Word6Extractor word6Extractor = new Word6Extractor( document );
|
||||
String text = word6Extractor.getText();
|
||||
|
||||
assertTrue( text.contains( "best.wine.jump.ru" ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 50936 - HWPF fails to read a file
|
||||
*/
|
||||
public void test50936() {
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug50936.doc");
|
||||
}
|
||||
|
||||
/**
|
||||
* [FAILING] Bug 50955 - error while retrieving the text file
|
||||
*/
|
||||
public void test50955() {
|
||||
try {
|
||||
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug50955.doc");
|
||||
Word6Extractor extractor = new Word6Extractor(doc);
|
||||
String text = extractor.getText();
|
||||
fixed("50955");
|
||||
} catch (Exception e) {
|
||||
// expected exception
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 51524 - PapBinTable constructor is slow
|
||||
*/
|
||||
public void test51524()
|
||||
{
|
||||
HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue