improved aggregating drawing records in documents with charts,

fixed reading EscherContainer records from byte array

git-svn-id: https://svn.apache.org/repos/asf/poi/branches/gsoc2012@1351850 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Evgeniy Berlog 2012-06-19 21:00:04 +00:00
parent 0e8a727b35
commit 0c5bf44cba
4 changed files with 195 additions and 11 deletions

View File

@ -65,8 +65,7 @@ public class DefaultEscherRecordFactory implements EscherRecordFactory {
// However, EscherTextboxRecord are containers of records for the // However, EscherTextboxRecord are containers of records for the
// host application, not of other Escher records, so treat them // host application, not of other Escher records, so treat them
// differently // differently
if ( ( options & (short) 0x000F ) == (short) 0x000F if (isContainer(options, recordId)) {
&& recordId != EscherTextboxRecord.RECORD_ID ) {
EscherContainerRecord r = new EscherContainerRecord(); EscherContainerRecord r = new EscherContainerRecord();
r.setRecordId( recordId ); r.setRecordId( recordId );
r.setOptions( options ); r.setOptions( options );
@ -145,4 +144,17 @@ public class DefaultEscherRecordFactory implements EscherRecordFactory {
} }
return result; return result;
} }
public static boolean isContainer(short options, short recordId){
if(recordId >= EscherContainerRecord.DGG_CONTAINER && recordId
<= EscherContainerRecord.SOLVER_CONTAINER){
return true;
} else {
if (recordId == EscherTextboxRecord.RECORD_ID) {
return false;
} else {
return ( options & (short) 0x000F ) == (short) 0x000F;
}
}
}
} }

View File

@ -25,6 +25,8 @@ import java.util.NoSuchElementException;
import org.apache.poi.util.HexDump; import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/** /**
* Escher container records store other escher records as children. * Escher container records store other escher records as children.
@ -42,6 +44,32 @@ public final class EscherContainerRecord extends EscherRecord {
public static final short SP_CONTAINER = (short)0xF004; public static final short SP_CONTAINER = (short)0xF004;
public static final short SOLVER_CONTAINER = (short)0xF005; public static final short SOLVER_CONTAINER = (short)0xF005;
private static POILogger log = POILogFactory.getLogger(EscherContainerRecord.class);
/**
* in case if document contains any charts we have such document structure:
* BOF
* ...
* DrawingRecord
* ...
* ObjRecord|TxtObjRecord
* ...
* EOF
* ...
* BOF(Chart begin)
* ...
* DrawingRecord
* ...
* ObjRecord|TxtObjRecord
* ...
* EOF
* So, when we call EscherAggregate.createAggregate() we have not all needed data.
* When we got warning "WARNING: " + bytesRemaining + " bytes remaining but no space left"
* we should save value of bytesRemaining
* and add it to container size when we serialize it
*/
private int _remainingLength;
private final List<EscherRecord> _childRecords = new ArrayList<EscherRecord>(); private final List<EscherRecord> _childRecords = new ArrayList<EscherRecord>();
public int fillFields(byte[] data, int pOffset, EscherRecordFactory recordFactory) { public int fillFields(byte[] data, int pOffset, EscherRecordFactory recordFactory) {
@ -56,7 +84,8 @@ public final class EscherContainerRecord extends EscherRecord {
bytesRemaining -= childBytesWritten; bytesRemaining -= childBytesWritten;
addChildRecord(child); addChildRecord(child);
if (offset >= data.length && bytesRemaining > 0) { if (offset >= data.length && bytesRemaining > 0) {
System.out.println("WARNING: " + bytesRemaining + " bytes remaining but no space left"); _remainingLength = bytesRemaining;
log.log(POILogger.WARN, "Not enough Escher data: " + bytesRemaining + " bytes remaining but no space left");
} }
} }
return bytesWritten; return bytesWritten;
@ -74,6 +103,7 @@ public final class EscherContainerRecord extends EscherRecord {
EscherRecord r = iterator.next(); EscherRecord r = iterator.next();
remainingBytes += r.getRecordSize(); remainingBytes += r.getRecordSize();
} }
remainingBytes += _remainingLength;
LittleEndian.putInt(data, offset+4, remainingBytes); LittleEndian.putInt(data, offset+4, remainingBytes);
int pos = offset+8; int pos = offset+8;
iterator = _childRecords.iterator(); iterator = _childRecords.iterator();

View File

@ -17,8 +17,7 @@
package org.apache.poi.hssf.model; package org.apache.poi.hssf.model;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.ddf.*;
import org.apache.poi.ddf.EscherDggRecord;
import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.record.ContinueRecord; import org.apache.poi.hssf.record.ContinueRecord;
import org.apache.poi.hssf.record.DrawingRecord; import org.apache.poi.hssf.record.DrawingRecord;
@ -36,6 +35,7 @@ import org.apache.poi.hssf.usermodel.HSSFPatriarch;
import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFTestHelper; import org.apache.poi.hssf.usermodel.HSSFTestHelper;
import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.HexRead; import org.apache.poi.util.HexRead;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
@ -188,15 +188,53 @@ public class TestDrawingAggregate extends TestCase {
// System.out.println("[WARN] Cannot read " + file.getName()); // System.out.println("[WARN] Cannot read " + file.getName());
continue; continue;
} }
try { assertWriteAndReadBack(wb);
assertWriteAndReadBack(wb);
} catch (Throwable e){
//e.printStackTrace();
System.err.println("[ERROR] assertion failed for " + file.getName() + ": " + e.getMessage());
}
} }
} }
/**
* when reading incomplete data ensure that the serialized bytes
match the source
*/
public void testIncompleteData(){
//EscherDgContainer and EscherSpgrContainer length exceeds the actual length of the data
String hex =
" 0F 00 02 F0 30 03 00 00 10 00 08 F0 08 00 00 " +
" 00 07 00 00 00 B2 04 00 00 0F 00 03 F0 18 03 00 " +
" 00 0F 00 04 F0 28 00 00 00 01 00 09 F0 10 00 00 " +
" 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 " +
" 00 02 00 0A F0 08 00 00 00 00 04 00 00 05 00 00 " +
" 00 0F 00 04 F0 74 00 00 00 92 0C 0A F0 08 00 00 " +
" 00 AD 04 00 00 00 0A 00 00 63 00 0B F0 3A 00 00 " +
" 00 7F 00 04 01 E5 01 BF 00 08 00 08 00 81 01 4E " +
" 00 00 08 BF 01 10 00 10 00 80 C3 16 00 00 00 BF " +
" 03 00 00 02 00 44 00 69 00 61 00 67 00 72 00 61 " +
" 00 6D 00 6D 00 20 00 32 00 00 00 00 00 10 F0 12 " +
" 00 00 00 00 00 05 00 00 00 01 00 00 00 0B 00 00 " +
" 00 0F 00 66 00 00 00 11 F0 00 00 00 00 ";
byte[] buffer = HexRead.readFromString(hex);
List<EscherRecord> records = new ArrayList<EscherRecord>();
EscherRecordFactory recordFactory = new DefaultEscherRecordFactory();
int pos = 0;
while (pos < buffer.length) {
EscherRecord r = recordFactory.createRecord(buffer, pos);
int bytesRead = r.fillFields(buffer, pos, recordFactory);
records.add(r);
pos += bytesRead;
}
assertEquals("data was not fully read", buffer.length, pos);
// serialize to byte array
ByteArrayOutputStream out = new ByteArrayOutputStream();
try {
for(EscherRecord r : records) out.write(r.serialize());
} catch (IOException e){
throw new RuntimeException(e);
}
assertEquals(HexDump.toHex(buffer, 10), HexDump.toHex(out.toByteArray(), 10));
}
/** /**
* TODO: figure out why it fails with "RecordFormatException: 0 bytes written but getRecordSize() reports 80" * TODO: figure out why it fails with "RecordFormatException: 0 bytes written but getRecordSize() reports 80"
*/ */
@ -266,6 +304,28 @@ public class TestDrawingAggregate extends TestCase {
assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave)); assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave));
} }
public void testFileWithCharts(){
HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("49581.xls");
HSSFSheet sh = wb.getSheetAt(0);
InternalSheet ish = HSSFTestHelper.getSheetForTest(sh);
List<RecordBase> records = ish.getRecords();
// records to be aggregated
List<RecordBase> dgRecords = records.subList(19, 21);
byte[] dgBytes = toByteArray(dgRecords);
sh.getDrawingPatriarch();
// collect drawing records into a byte buffer.
EscherAggregate agg = (EscherAggregate) ish.findFirstRecordBySid(EscherAggregate.sid);
byte[] dgBytesAfterSave = agg.serialize();
assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length);
for (int i=0; i< dgBytes.length; i++){
if (dgBytes[i] != dgBytesAfterSave[i]){
System.out.println("pos = " + i);
}
}
assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave));
}
/** /**
* test reading drawing aggregate from a test file from Bugzilla 45129 * test reading drawing aggregate from a test file from Bugzilla 45129
*/ */

View File

@ -0,0 +1,82 @@
package org.apache.poi.hssf.model;
import junit.framework.TestCase;
import org.apache.poi.ddf.DefaultEscherRecordFactory;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.ddf.EscherTextboxRecord;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.record.EscherAggregate;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.RecordBase;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFTestHelper;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
/**
* @author Evgeniy Berlog
* @date 18.06.12
*/
public class TestEscherRecordFactory extends TestCase{
private static byte[] toByteArray(List<RecordBase> records) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
for (RecordBase rb : records) {
Record r = (Record) rb;
try {
out.write(r.serialize());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
return out.toByteArray();
}
public void testDetectContainer() {
Random rnd = new Random();
assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x0, EscherContainerRecord.DG_CONTAINER));
assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x0, EscherContainerRecord.SOLVER_CONTAINER));
assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x0, EscherContainerRecord.SP_CONTAINER));
assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x0, EscherContainerRecord.DGG_CONTAINER));
assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x0, EscherContainerRecord.BSTORE_CONTAINER));
assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x0, EscherContainerRecord.SPGR_CONTAINER));
for (Short i=EscherContainerRecord.DGG_CONTAINER; i<= EscherContainerRecord.SOLVER_CONTAINER; i++){
assertEquals(true, DefaultEscherRecordFactory.isContainer(Integer.valueOf(rnd.nextInt(Short.MAX_VALUE)).shortValue(), i));
}
assertEquals(false, DefaultEscherRecordFactory.isContainer((short) 0x0, Integer.valueOf(EscherContainerRecord.DGG_CONTAINER-1).shortValue()));
assertEquals(false, DefaultEscherRecordFactory.isContainer((short) 0x0, Integer.valueOf(EscherContainerRecord.SOLVER_CONTAINER+1).shortValue()));
assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0x000F, Integer.valueOf(EscherContainerRecord.DGG_CONTAINER-1).shortValue()));
assertEquals(true, DefaultEscherRecordFactory.isContainer((short) 0xFFFF, Integer.valueOf(EscherContainerRecord.DGG_CONTAINER-1).shortValue()));
assertEquals(false, DefaultEscherRecordFactory.isContainer((short) 0x000C, Integer.valueOf(EscherContainerRecord.DGG_CONTAINER-1).shortValue()));
assertEquals(false, DefaultEscherRecordFactory.isContainer((short) 0xCCCC, Integer.valueOf(EscherContainerRecord.DGG_CONTAINER-1).shortValue()));
assertEquals(false, DefaultEscherRecordFactory.isContainer((short) 0x000F, EscherTextboxRecord.RECORD_ID));
assertEquals(false, DefaultEscherRecordFactory.isContainer((short) 0xCCCC, EscherTextboxRecord.RECORD_ID));
}
public void testDgContainerMustBeRootOfHSSFSheetEscherRecords() throws IOException {
HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("47251.xls");
HSSFSheet sh = wb.getSheetAt(0);
InternalSheet ish = HSSFTestHelper.getSheetForTest(sh);
List<RecordBase> records = ish.getRecords();
// records to be aggregated
List<RecordBase> dgRecords = records.subList(19, 23);
byte[] dgBytes = toByteArray(dgRecords);
sh.getDrawingPatriarch();
EscherAggregate agg = (EscherAggregate) ish.findFirstRecordBySid(EscherAggregate.sid);
assertEquals(true, agg.getEscherRecords().get(0) instanceof EscherContainerRecord);
assertEquals(EscherContainerRecord.DG_CONTAINER, agg.getEscherRecords().get(0).getRecordId());
assertEquals((short) 0x0, agg.getEscherRecords().get(0).getOptions());
agg = (EscherAggregate) ish.findFirstRecordBySid(EscherAggregate.sid);
byte[] dgBytesAfterSave = agg.serialize();
assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length);
assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave));
}
}