Fix 43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@645560 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-04-07 15:20:57 +00:00
parent 0a4b7bec9f
commit 4700af1209
11 changed files with 236 additions and 26 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.0.3-beta1" date="2008-04-??">
<action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action>
<action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action>
<action dev="POI-DEVELOPERS" type="fix">refactored all junits' usage of HSSF.testdata.path to one place</action>
<action dev="POI-DEVELOPERS" type="fix">44739 - Small fixes for conditional formatting (regions with max row/col index)</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.0.3-beta1" date="2008-04-??">
<action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action>
<action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action>
<action dev="POI-DEVELOPERS" type="fix">refactored all junits' usage of HSSF.testdata.path to one place</action>
<action dev="POI-DEVELOPERS" type="fix">44739 - Small fixes for conditional formatting (regions with max row/col index)</action>

View File

@ -47,10 +47,32 @@ public abstract class ChunkHeader {
ch.unknown3 = (short)LittleEndian.getUnsignedByte(data, offset + 18);
return ch;
} else if(documentVersion == 5) {
throw new RuntimeException("TODO");
} else if(documentVersion == 5 || documentVersion == 4) {
ChunkHeaderV4V5 ch = new ChunkHeaderV4V5();
ch.type = (int)LittleEndian.getShort(data, offset + 0);
ch.id = (int)LittleEndian.getShort(data, offset + 2);
ch.unknown2 = (short)LittleEndian.getUnsignedByte(data, offset + 4);
ch.unknown3 = (short)LittleEndian.getUnsignedByte(data, offset + 5);
ch.unknown1 = (short)LittleEndian.getShort(data, offset + 6);
ch.length = (int)LittleEndian.getUInt(data, offset + 8);
return ch;
} else {
throw new IllegalArgumentException("Visio files with versions below 5 are not supported, yours was " + documentVersion);
throw new IllegalArgumentException("Visio files with versions below 4 are not supported, yours was " + documentVersion);
}
}
/**
* Returns the size of a chunk header for the given document version.
*/
public static int getHeaderSize(int documentVersion) {
if(documentVersion > 6) {
return ChunkHeaderV11.getHeaderSize();
} else if(documentVersion == 6) {
return ChunkHeaderV6.getHeaderSize();
} else {
return ChunkHeaderV4V5.getHeaderSize();
}
}

View File

@ -0,0 +1,56 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hdgf.chunks;
/**
* A chunk header from v4 or v5
*/
public class ChunkHeaderV4V5 extends ChunkHeader {
protected short unknown2;
protected short unknown3;
public short getUnknown2() {
return unknown2;
}
public short getUnknown3() {
return unknown3;
}
protected static int getHeaderSize() {
return 12;
}
public int getSizeInBytes() {
return getHeaderSize();
}
/**
* Does the chunk have a trailer?
*/
public boolean hasTrailer() {
// V4 and V5 never has trailers
return false;
}
/**
* Does the chunk have a separator?
*/
public boolean hasSeparator() {
// V4 and V5 never has separators
return false;
}
}

View File

@ -30,9 +30,13 @@ public class ChunkHeaderV6 extends ChunkHeader {
return unknown3;
}
public int getSizeInBytes() {
protected static int getHeaderSize() {
// Looks like it ought to be 19...
return 19;
}
public int getSizeInBytes() {
return getHeaderSize();
}
/**
* Does the chunk have a trailer?

View File

@ -23,6 +23,7 @@ import java.util.ArrayList;
import org.apache.poi.POITextExtractor;
import org.apache.poi.hdgf.HDGFDiagram;
import org.apache.poi.hdgf.chunks.Chunk;
import org.apache.poi.hdgf.chunks.Chunk.Command;
import org.apache.poi.hdgf.streams.ChunkStream;
import org.apache.poi.hdgf.streams.PointerContainingStream;
@ -71,11 +72,13 @@ public class VisioTextExtractor extends POITextExtractor {
if(stream instanceof ChunkStream) {
ChunkStream cs = (ChunkStream)stream;
for(int i=0; i<cs.getChunks().length; i++) {
if(cs.getChunks()[i] != null &&
cs.getChunks()[i].getName() != null &&
cs.getChunks()[i].getName().equals("Text")) {
Chunk chunk = cs.getChunks()[i];
if(chunk != null &&
chunk.getName() != null &&
chunk.getName().equals("Text") &&
chunk.getCommands().length > 0) {
// First command
Command cmd = cs.getChunks()[i].getCommands()[0];
Command cmd = chunk.getCommands()[0];
if(cmd != null && cmd.getValue() != null) {
text.add( cmd.getValue().toString() );
}

View File

@ -20,6 +20,7 @@ import java.util.ArrayList;
import org.apache.poi.hdgf.chunks.Chunk;
import org.apache.poi.hdgf.chunks.ChunkFactory;
import org.apache.poi.hdgf.chunks.ChunkHeader;
import org.apache.poi.hdgf.pointers.Pointer;
public class ChunkStream extends Stream {
@ -51,10 +52,17 @@ public class ChunkStream extends Stream {
int pos = 0;
byte[] contents = getStore().getContents();
while(pos < contents.length) {
Chunk chunk = chunkFactory.createChunk(contents, pos);
chunksA.add(chunk);
pos += chunk.getOnDiskSize();
// Ensure we have enough data to create a chunk from
int headerSize = ChunkHeader.getHeaderSize(chunkFactory.getVersion());
if(pos+headerSize <= contents.length) {
Chunk chunk = chunkFactory.createChunk(contents, pos);
chunksA.add(chunk);
pos += chunk.getOnDiskSize();
} else {
System.err.println("Needed " + headerSize + " bytes to create the next chunk header, but only found " + (contents.length-pos) + " bytes, ignoring rest of data");
pos = contents.length;
}
}
chunks = (Chunk[])chunksA.toArray(new Chunk[chunksA.size()]);

View File

@ -17,25 +17,21 @@
package org.apache.poi.hdgf.extractor;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.PrintStream;
import junit.framework.TestCase;
import org.apache.poi.hdgf.HDGFDiagram;
import org.apache.poi.hdgf.chunks.Chunk;
import org.apache.poi.hdgf.chunks.ChunkFactory;
import org.apache.poi.hdgf.pointers.Pointer;
import org.apache.poi.hdgf.pointers.PointerFactory;
import org.apache.poi.hssf.record.formula.eval.StringOperationEval;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class TestVisioExtractor extends TestCase {
private String filename;
private String dirname;
private String defFilename;
protected void setUp() throws Exception {
String dirname = System.getProperty("HDGF.testdata.path");
filename = dirname + "/Test_Visio-Some_Random_Text.vsd";
dirname = System.getProperty("HDGF.testdata.path");
defFilename = dirname + "/Test_Visio-Some_Random_Text.vsd";
}
/**
@ -44,14 +40,14 @@ public class TestVisioExtractor extends TestCase {
public void testCreation() throws Exception {
VisioTextExtractor extractor;
extractor = new VisioTextExtractor(new FileInputStream(filename));
extractor = new VisioTextExtractor(new FileInputStream(defFilename));
assertNotNull(extractor);
assertNotNull(extractor.getAllText());
assertEquals(3, extractor.getAllText().length);
extractor = new VisioTextExtractor(
new POIFSFileSystem(
new FileInputStream(filename)
new FileInputStream(defFilename)
)
);
assertNotNull(extractor);
@ -61,7 +57,7 @@ public class TestVisioExtractor extends TestCase {
extractor = new VisioTextExtractor(
new HDGFDiagram(
new POIFSFileSystem(
new FileInputStream(filename)
new FileInputStream(defFilename)
)
)
);
@ -72,7 +68,7 @@ public class TestVisioExtractor extends TestCase {
public void testExtraction() throws Exception {
VisioTextExtractor extractor =
new VisioTextExtractor(new FileInputStream(filename));
new VisioTextExtractor(new FileInputStream(defFilename));
// Check the array fetch
String[] text = extractor.getAllText();
@ -88,13 +84,30 @@ public class TestVisioExtractor extends TestCase {
assertEquals("Test View\nI am a test view\nSome random text, on a page\n", textS);
}
public void testProblemFiles() throws Exception {
File a = new File(dirname, "44594.vsd");
VisioTextExtractor.main(new String[] {a.toString()});
File b = new File(dirname, "44594-2.vsd");
VisioTextExtractor.main(new String[] {b.toString()});
File c = new File(dirname, "ShortChunk1.vsd");
VisioTextExtractor.main(new String[] {c.toString()});
File d = new File(dirname, "ShortChunk2.vsd");
VisioTextExtractor.main(new String[] {d.toString()});
File e = new File(dirname, "ShortChunk3.vsd");
VisioTextExtractor.main(new String[] {e.toString()});
}
public void testMain() throws Exception {
PrintStream oldOut = System.out;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream capture = new PrintStream(baos);
System.setOut(capture);
VisioTextExtractor.main(new String[] {filename});
VisioTextExtractor.main(new String[] {defFilename});
// Put things back
System.setOut(oldOut);

View File

@ -0,0 +1,102 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hdgf.streams;
import java.io.FileInputStream;
import org.apache.poi.hdgf.HDGFDiagram;
import org.apache.poi.hdgf.chunks.ChunkFactory;
import org.apache.poi.hdgf.pointers.Pointer;
import org.apache.poi.hdgf.pointers.PointerFactory;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* Tests for bugs with streams
*/
public class TestStreamBugs extends StreamTest {
private byte[] contents;
private ChunkFactory chunkFactory;
private PointerFactory ptrFactory;
private POIFSFileSystem filesystem;
protected void setUp() throws Exception {
String dirname = System.getProperty("HDGF.testdata.path");
String filename = dirname + "/44594.vsd";
ptrFactory = new PointerFactory(11);
chunkFactory = new ChunkFactory(11);
FileInputStream fin = new FileInputStream(filename);
filesystem = new POIFSFileSystem(fin);
DocumentEntry docProps =
(DocumentEntry)filesystem.getRoot().getEntry("VisioDocument");
// Grab the document stream
contents = new byte[docProps.getSize()];
filesystem.createDocumentInputStream("VisioDocument").read(contents);
}
public void testGetTrailer() throws Exception {
Pointer trailerPointer = ptrFactory.createPointer(contents, 0x24);
Stream.createStream(trailerPointer, contents, chunkFactory, ptrFactory);
}
public void TOIMPLEMENTtestGetCertainChunks() throws Exception {
int offsetA = 3708;
int offsetB = 3744;
}
public void testGetChildren() throws Exception {
Pointer trailerPointer = ptrFactory.createPointer(contents, 0x24);
TrailerStream trailer = (TrailerStream)
Stream.createStream(trailerPointer, contents, chunkFactory, ptrFactory);
// Get without recursing
Pointer[] ptrs = trailer.getChildPointers();
for(int i=0; i<ptrs.length; i++) {
Stream.createStream(ptrs[i], contents, chunkFactory, ptrFactory);
}
// Get with recursing into chunks
for(int i=0; i<ptrs.length; i++) {
Stream stream =
Stream.createStream(ptrs[i], contents, chunkFactory, ptrFactory);
if(stream instanceof ChunkStream) {
ChunkStream cStream = (ChunkStream)stream;
cStream.findChunks();
}
}
// Get with recursing into chunks and pointers
for(int i=0; i<ptrs.length; i++) {
Stream stream =
Stream.createStream(ptrs[i], contents, chunkFactory, ptrFactory);
if(stream instanceof PointerContainingStream) {
PointerContainingStream pStream =
(PointerContainingStream)stream;
pStream.findChildren(contents);
}
}
trailer.findChildren(contents);
}
public void testOpen() throws Exception {
HDGFDiagram dg = new HDGFDiagram(filesystem);
}
}