avoid exceptions when using POI in Tika, see BUgs 51771 and 51770

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1169679 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2011-09-12 10:19:50 +00:00
parent 686f2ca6d0
commit 3ca4e3bd2d
7 changed files with 130 additions and 40 deletions

View File

@ -34,6 +34,8 @@
<changes> <changes>
<release version="3.8-beta5" date="2011-??-??"> <release version="3.8-beta5" date="2011-??-??">
<action dev="poi-developers" type="add">51196 - prevent NPE in XWPFPicture.getPictureData() </action>
<action dev="poi-developers" type="add">51771 - prevent NPE when getting object data from OLEShape in HSLF</action>
<action dev="poi-developers" type="add">51196 - more progress with Chart APi in XSSF</action> <action dev="poi-developers" type="add">51196 - more progress with Chart APi in XSSF</action>
<action dev="poi-developers" type="fix">51785 - Allow XSSF setForceFormulaRecalculation to work with the minimal ooxml-schemas jar</action> <action dev="poi-developers" type="fix">51785 - Allow XSSF setForceFormulaRecalculation to work with the minimal ooxml-schemas jar</action>
<action dev="poi-developers" type="fix">51772 - IllegalArgumentException Parsing MS Word 97 - 2003</action> <action dev="poi-developers" type="fix">51772 - IllegalArgumentException Parsing MS Word 97 - 2003</action>

View File

@ -18,7 +18,6 @@
package org.apache.poi.hssf.record; package org.apache.poi.hssf.record;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -26,6 +25,7 @@ import java.util.Map;
import org.apache.poi.ddf.DefaultEscherRecordFactory; import org.apache.poi.ddf.DefaultEscherRecordFactory;
import org.apache.poi.ddf.EscherBoolProperty; import org.apache.poi.ddf.EscherBoolProperty;
import org.apache.poi.ddf.EscherChildAnchorRecord;
import org.apache.poi.ddf.EscherClientAnchorRecord; import org.apache.poi.ddf.EscherClientAnchorRecord;
import org.apache.poi.ddf.EscherClientDataRecord; import org.apache.poi.ddf.EscherClientDataRecord;
import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.ddf.EscherContainerRecord;
@ -33,7 +33,6 @@ import org.apache.poi.ddf.EscherDgRecord;
import org.apache.poi.ddf.EscherDggRecord; import org.apache.poi.ddf.EscherDggRecord;
import org.apache.poi.ddf.EscherOptRecord; import org.apache.poi.ddf.EscherOptRecord;
import org.apache.poi.ddf.EscherProperties; import org.apache.poi.ddf.EscherProperties;
import org.apache.poi.ddf.EscherProperty;
import org.apache.poi.ddf.EscherRecord; import org.apache.poi.ddf.EscherRecord;
import org.apache.poi.ddf.EscherRecordFactory; import org.apache.poi.ddf.EscherRecordFactory;
import org.apache.poi.ddf.EscherSerializationListener; import org.apache.poi.ddf.EscherSerializationListener;
@ -46,14 +45,16 @@ import org.apache.poi.hssf.model.CommentShape;
import org.apache.poi.hssf.model.ConvertAnchor; import org.apache.poi.hssf.model.ConvertAnchor;
import org.apache.poi.hssf.model.DrawingManager2; import org.apache.poi.hssf.model.DrawingManager2;
import org.apache.poi.hssf.model.TextboxShape; import org.apache.poi.hssf.model.TextboxShape;
import org.apache.poi.hssf.usermodel.HSSFAnchor;
import org.apache.poi.hssf.usermodel.HSSFChildAnchor;
import org.apache.poi.hssf.usermodel.HSSFClientAnchor; import org.apache.poi.hssf.usermodel.HSSFClientAnchor;
import org.apache.poi.hssf.usermodel.HSSFPatriarch; import org.apache.poi.hssf.usermodel.HSSFPatriarch;
import org.apache.poi.hssf.usermodel.HSSFPicture; import org.apache.poi.hssf.usermodel.HSSFPicture;
import org.apache.poi.hssf.usermodel.HSSFShape; import org.apache.poi.hssf.usermodel.HSSFShape;
import org.apache.poi.hssf.usermodel.HSSFShapeContainer; import org.apache.poi.hssf.usermodel.HSSFShapeContainer;
import org.apache.poi.hssf.usermodel.HSSFShapeGroup; import org.apache.poi.hssf.usermodel.HSSFShapeGroup;
import org.apache.poi.hssf.usermodel.HSSFTextbox;
import org.apache.poi.hssf.usermodel.HSSFSimpleShape; import org.apache.poi.hssf.usermodel.HSSFSimpleShape;
import org.apache.poi.hssf.usermodel.HSSFTextbox;
import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger; import org.apache.poi.util.POILogger;
@ -584,28 +585,42 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
); );
} }
convertRecordsToUserModelRecursive(tcc, patriarch, null);
// Now, clear any trace of what records make up
// the patriarch
// Otherwise, everything will go horribly wrong
// when we try to write out again....
// clearEscherRecords();
drawingManager.getDgg().setFileIdClusters(new EscherDggRecord.FileIdCluster[0]);
// TODO: Support converting our records
// back into shapes
// log.log(POILogger.WARN, "Not processing objects into Patriarch!");
}
private static void convertRecordsToUserModelRecursive(List tcc, HSSFShapeContainer container, HSSFShape parent) {
// Now process the containers for each group // Now process the containers for each group
// and objects // and objects
for(int i=1; i<tcc.size(); i++) { for(int i=1; i<tcc.size(); i++) {
EscherContainerRecord shapeContainer = EscherContainerRecord shapeContainer = (EscherContainerRecord)tcc.get(i);
(EscherContainerRecord)tcc.get(i);
//System.err.println("\n\n*****\n\n");
//System.err.println(shapeContainer);
// Could be a group, or a base object // Could be a group, or a base object
if (shapeContainer.getRecordId() == EscherContainerRecord.SPGR_CONTAINER) if (shapeContainer.getRecordId() == EscherContainerRecord.SPGR_CONTAINER)
{ {
// Group // Group
if (shapeContainer.getChildRecords().size() > 0) final int shapeChildren = shapeContainer.getChildRecords().size();
if (shapeChildren > 0)
{ {
HSSFShapeGroup group = new HSSFShapeGroup( null, HSSFShapeGroup group = new HSSFShapeGroup( parent, new HSSFClientAnchor() );
new HSSFClientAnchor() ); addToParentOrContainer(group, container, parent);
patriarch.getChildren().add( group );
EscherContainerRecord groupContainer = (EscherContainerRecord) shapeContainer EscherContainerRecord groupContainer = (EscherContainerRecord) shapeContainer.getChild( 0 );
.getChild( 0 );
convertRecordsToUserModel( groupContainer, group ); convertRecordsToUserModel( groupContainer, group );
if (shapeChildren>1){
convertRecordsToUserModelRecursive(shapeContainer.getChildRecords(), container, group);
}
} else } else
{ {
log.log( POILogger.WARN, log.log( POILogger.WARN,
@ -621,9 +636,9 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
switch (type) switch (type)
{ {
case ST_TEXTBOX: case ST_TEXTBOX:
HSSFTextbox box = new HSSFTextbox( null, HSSFTextbox box = new HSSFTextbox( parent,
new HSSFClientAnchor() ); new HSSFClientAnchor() );
patriarch.addShape( box ); addToParentOrContainer(box, container, parent);
convertRecordsToUserModel( shapeContainer, box ); convertRecordsToUserModel( shapeContainer, box );
break; break;
@ -645,14 +660,34 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
EscherClientAnchorRecord anchorRecord = (EscherClientAnchorRecord) getEscherChild( EscherClientAnchorRecord anchorRecord = (EscherClientAnchorRecord) getEscherChild(
shapeContainer, shapeContainer,
EscherClientAnchorRecord.RECORD_ID ); EscherClientAnchorRecord.RECORD_ID );
HSSFClientAnchor anchor = toClientAnchor(anchorRecord);
HSSFPicture picture = new HSSFPicture( null, anchor ); EscherChildAnchorRecord childRecord = (EscherChildAnchorRecord) getEscherChild(
shapeContainer,
EscherChildAnchorRecord.RECORD_ID );
if (anchorRecord!=null && childRecord!=null){
log.log( POILogger.WARN, "Picture with both CLIENT and CHILD anchor: "+ type );
}
HSSFAnchor anchor;
if (anchorRecord!=null){
anchor = toClientAnchor(anchorRecord);
}else{
anchor = toChildAnchor(childRecord);
}
HSSFPicture picture = new HSSFPicture( parent, anchor );
picture.setPictureIndex( pictureIndex ); picture.setPictureIndex( pictureIndex );
patriarch.addShape( picture );
addToParentOrContainer(picture, container, parent);
} }
break; break;
default: default:
final HSSFSimpleShape shape = new HSSFSimpleShape( parent,
new HSSFClientAnchor() );
addToParentOrContainer(shape, container, parent);
convertRecordsToUserModel( shapeContainer, shape);
log.log( POILogger.WARN, "Unhandled shape type: " log.log( POILogger.WARN, "Unhandled shape type: "
+ type ); + type );
break; break;
@ -663,20 +698,19 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
} }
} }
// Now, clear any trace of what records make up
// the patriarch
// Otherwise, everything will go horribly wrong
// when we try to write out again....
// clearEscherRecords();
drawingManager.getDgg().setFileIdClusters(new EscherDggRecord.FileIdCluster[0]);
// TODO: Support converting our records
// back into shapes
// log.log(POILogger.WARN, "Not processing objects into Patriarch!");
} }
private HSSFClientAnchor toClientAnchor(EscherClientAnchorRecord anchorRecord){ private static void addToParentOrContainer(HSSFShape shape, HSSFShapeContainer container, HSSFShape parent) {
if (parent instanceof HSSFShapeGroup)
((HSSFShapeGroup) parent).addShape(shape);
else if (container instanceof HSSFPatriarch)
((HSSFPatriarch) container).addShape(shape);
else
container.getChildren().add(shape);
}
private static HSSFClientAnchor toClientAnchor(EscherClientAnchorRecord anchorRecord){
HSSFClientAnchor anchor = new HSSFClientAnchor(); HSSFClientAnchor anchor = new HSSFClientAnchor();
anchor.setAnchorType(anchorRecord.getFlag()); anchor.setAnchorType(anchorRecord.getFlag());
anchor.setCol1( anchorRecord.getCol1() ); anchor.setCol1( anchorRecord.getCol1() );
@ -690,7 +724,21 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
return anchor; return anchor;
} }
private void convertRecordsToUserModel(EscherContainerRecord shapeContainer, Object model) { private static HSSFChildAnchor toChildAnchor(EscherChildAnchorRecord anchorRecord){
HSSFChildAnchor anchor = new HSSFChildAnchor();
// anchor.setAnchorType(anchorRecord.getFlag());
// anchor.setCol1( anchorRecord.getCol1() );
// anchor.setCol2( anchorRecord.getCol2() );
anchor.setDx1( anchorRecord.getDx1() );
anchor.setDx2( anchorRecord.getDx2() );
anchor.setDy1( anchorRecord.getDy1() );
anchor.setDy2( anchorRecord.getDy2() );
// anchor.setRow1( anchorRecord.getRow1() );
// anchor.setRow2( anchorRecord.getRow2() );
return anchor;
}
private static void convertRecordsToUserModel(EscherContainerRecord shapeContainer, Object model) {
for(Iterator<EscherRecord> it = shapeContainer.getChildIterator(); it.hasNext();) { for(Iterator<EscherRecord> it = shapeContainer.getChildIterator(); it.hasNext();) {
EscherRecord r = it.next(); EscherRecord r = it.next();
if(r instanceof EscherSpgrRecord) { if(r instanceof EscherSpgrRecord) {
@ -728,6 +776,10 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
} }
else if(r instanceof EscherSpRecord) { else if(r instanceof EscherSpRecord) {
// Use flags if needed // Use flags if needed
final EscherSpRecord spr = (EscherSpRecord) r;
if (model instanceof HSSFShape){
final HSSFShape s = (HSSFShape) model;
}
} }
else if(r instanceof EscherOptRecord) { else if(r instanceof EscherOptRecord) {
// Use properties if needed // Use properties if needed

View File

@ -56,6 +56,11 @@ public class HSSFShapeGroup
return group; return group;
} }
public void addShape(HSSFShape shape){
shape._patriarch = this._patriarch;
shapes.add(shape);
}
/** /**
* Create a new simple shape under this group. * Create a new simple shape under this group.
* @param anchor the position of the shape. * @param anchor the position of the shape.
@ -177,4 +182,4 @@ public class HSSFShapeGroup
} }
return count; return count;
} }
} }

View File

@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel;
import org.apache.poi.POIXMLDocumentPart; import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.openxml4j.opc.PackageRelationship; import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.openxmlformats.schemas.drawingml.x2006.main.CTBlipFillProperties;
import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture; import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture;
@ -58,7 +59,14 @@ public class XWPFPicture {
* Note - not all kinds of picture have data * Note - not all kinds of picture have data
*/ */
public XWPFPictureData getPictureData(){ public XWPFPictureData getPictureData(){
String blipId = ctPic.getBlipFill().getBlip().getEmbed(); CTBlipFillProperties blipProps = ctPic.getBlipFill();
if(blipProps == null || !blipProps.isSetBlip()) {
// return null if Blip data is missing
return null;
}
String blipId = blipProps.getBlip().getEmbed();
POIXMLDocumentPart part = run.getParagraph().getPart(); POIXMLDocumentPart part = run.getParagraph().getPart();
if (part != null) if (part != null)
{ {

View File

@ -129,4 +129,26 @@ public class TestXWPFPictureData extends TestCase {
public void testGetChecksum() { public void testGetChecksum() {
} }
public void testBug51770() throws InvalidFormatException, IOException {
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug51170.docx");
XWPFHeaderFooterPolicy policy = doc.getHeaderFooterPolicy();
XWPFHeader header = policy.getDefaultHeader();
for (XWPFParagraph paragraph : header.getParagraphs()) {
for (XWPFRun run : paragraph.getRuns()) {
for (XWPFPicture picture : run.getEmbeddedPictures()) {
if (paragraph.getDocument() != null) {
System.out.println(picture.getCTPicture());
XWPFPictureData data = picture.getPictureData();
if(data != null) System.out.println(data.getFileName());
}
}
}
}
}
private void process(XWPFParagraph paragraph){
}
} }

View File

@ -83,16 +83,17 @@ public final class OLEShape extends Picture {
ObjectData[] ole = ppt.getEmbeddedObjects(); ObjectData[] ole = ppt.getEmbeddedObjects();
//persist reference //persist reference
int ref = getExEmbed().getExOleObjAtom().getObjStgDataRef(); ExEmbed exEmbed = getExEmbed();
ObjectData data = null; ObjectData data = null;
if(exEmbed != null) {
int ref = exEmbed.getExOleObjAtom().getObjStgDataRef();
for (int i = 0; i < ole.length; i++) { for (int i = 0; i < ole.length; i++) {
if(ole[i].getExOleObjStg().getPersistId() == ref) { if(ole[i].getExOleObjStg().getPersistId() == ref) {
data=ole[i]; data=ole[i];
}
} }
} }
if (data==null) { if (data==null) {
logger.log(POILogger.WARN, "OLE data not found"); logger.log(POILogger.WARN, "OLE data not found");
} }

Binary file not shown.