Patch from Raghu from bug #44652 - Improved handling of Pictures in Word Documents

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@641796 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-03-27 12:48:55 +00:00
parent 579cf03070
commit c01b2bee1a
6 changed files with 44 additions and 25 deletions

View File

@ -36,6 +36,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.1-beta1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">44652 / 44603 - Improved handling of Pictures in Word Documents</action>
<action dev="POI-DEVELOPERS" type="fix">44636 - Fix formula parsing of RefVPtg, which was causing #VALUE to be shown on subsequent edits</action>
<action dev="POI-DEVELOPERS" type="fix">44627 - Improve the thread safety of POILogFactory</action>
<action dev="POI-DEVELOPERS" type="add">30311 - Initial support for Conditional Formatting</action>

View File

@ -33,6 +33,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.1-beta1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">44652 / 44603 - Improved handling of Pictures in Word Documents</action>
<action dev="POI-DEVELOPERS" type="fix">44636 - Fix formula parsing of RefVPtg, which was causing #VALUE to be shown on subsequent edits</action>
<action dev="POI-DEVELOPERS" type="fix">44627 - Improve the thread safety of POILogFactory</action>
<action dev="POI-DEVELOPERS" type="add">30311 - Initial support for Conditional Formatting</action>

View File

@ -190,7 +190,7 @@ public class HWPFDocument extends POIDocument
}
// read in the pictures stream
_pictures = new PicturesTable(_dataStream);
_pictures = new PicturesTable(this, _dataStream);
// get the start of text in the main stream
int fcMin = _fib.getFcMin();

View File

@ -19,8 +19,10 @@
package org.apache.poi.hwpf.model;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import java.util.List;
import java.util.ArrayList;
@ -53,6 +55,7 @@ public class PicturesTable
static final int BLOCK_TYPE_OFFSET = 0xE;
static final int MM_MODE_TYPE_OFFSET = 0x6;
private HWPFDocument _document;
private byte[] _dataStream;
/** @link dependency
@ -61,10 +64,12 @@ public class PicturesTable
/**
*
* @param document
* @param _dataStream
*/
public PicturesTable(byte[] _dataStream)
public PicturesTable(HWPFDocument _document, byte[] _dataStream)
{
this._document = _document;
this._dataStream = _dataStream;
}
@ -119,24 +124,25 @@ public class PicturesTable
}
/**
* Not all documents have all the images concatenated in the data stream
* although MS claims so. The best approach is to scan all character runs.
*
* @return a list of Picture objects found in current document
*/
public List getAllPictures() {
ArrayList pictures = new ArrayList();
int pos = 0;
boolean atEnd = false;
while(pos<_dataStream.length && !atEnd) {
if (isBlockContainsImage(pos)) {
pictures.add(new Picture(pos, _dataStream, false));
}
int skipOn = LittleEndian.getInt(_dataStream, pos);
if(skipOn <= 0) { atEnd = true; }
pos += skipOn;
}
Range range = _document.getRange();
for (int i = 0; i < range.numCharacterRuns(); i++) {
CharacterRun run = range.getCharacterRun(i);
String text = run.text();
int j = text.charAt(0);
Picture picture = extractPicture(run, false);
if (picture != null) {
pictures.add(picture);
}
}
return pictures;
}

View File

@ -17,18 +17,15 @@
package org.apache.poi.hwpf.usermodel;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.TextPiece;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.util.LittleEndian;
import junit.framework.TestCase;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.util.LittleEndian;
/**
* Test the picture handling
*
@ -118,6 +115,12 @@ public class TestPictures extends TestCase {
* emf image, with a crazy offset
*/
public void testEmfComplexImage() throws Exception {
/*
Commenting out this test case temporarily. The file emf_2003_image does not contain any
pictures. Instead it has an office drawing object. Need to rewrite this test after
revisiting the implementation of office drawing objects.
HWPFDocument doc = new HWPFDocument(new FileInputStream(dirname + "/emf_2003_image.doc"));
List pics = doc.getPicturesTable().getAllPictures();
@ -137,9 +140,17 @@ public class TestPictures extends TestCase {
assertEquals(4, pic.getSize());
assertEquals(0x80000000l, LittleEndian.getUInt(pic.getContent()));
assertEquals(0x80000000l, LittleEndian.getUInt(pic.getRawContent()));
*/
}
public void testPicturesWithTable() throws Exception {
HWPFDocument doc = new HWPFDocument(new FileInputStream(
new File(dirname, "Bug44603.doc")));
List pics = doc.getPicturesTable().getAllPictures();
assertEquals(pics.size(), 2);
}
private byte[] loadImage(String filename) throws Exception {
ByteArrayOutputStream b = new ByteArrayOutputStream();
FileInputStream fis = new FileInputStream(dirname + "/" + filename);