mirror of https://github.com/apache/poi.git
Fix 43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@645560 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0a4b7bec9f
commit
4700af1209
|
@ -37,6 +37,7 @@
|
|||
|
||||
<!-- Don't forget to update status.xml too! -->
|
||||
<release version="3.0.3-beta1" date="2008-04-??">
|
||||
<action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">refactored all junits' usage of HSSF.testdata.path to one place</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">44739 - Small fixes for conditional formatting (regions with max row/col index)</action>
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
<!-- Don't forget to update changes.xml too! -->
|
||||
<changes>
|
||||
<release version="3.0.3-beta1" date="2008-04-??">
|
||||
<action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">refactored all junits' usage of HSSF.testdata.path to one place</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">44739 - Small fixes for conditional formatting (regions with max row/col index)</action>
|
||||
|
|
|
@ -47,10 +47,32 @@ public abstract class ChunkHeader {
|
|||
ch.unknown3 = (short)LittleEndian.getUnsignedByte(data, offset + 18);
|
||||
|
||||
return ch;
|
||||
} else if(documentVersion == 5) {
|
||||
throw new RuntimeException("TODO");
|
||||
} else if(documentVersion == 5 || documentVersion == 4) {
|
||||
ChunkHeaderV4V5 ch = new ChunkHeaderV4V5();
|
||||
|
||||
ch.type = (int)LittleEndian.getShort(data, offset + 0);
|
||||
ch.id = (int)LittleEndian.getShort(data, offset + 2);
|
||||
ch.unknown2 = (short)LittleEndian.getUnsignedByte(data, offset + 4);
|
||||
ch.unknown3 = (short)LittleEndian.getUnsignedByte(data, offset + 5);
|
||||
ch.unknown1 = (short)LittleEndian.getShort(data, offset + 6);
|
||||
ch.length = (int)LittleEndian.getUInt(data, offset + 8);
|
||||
|
||||
return ch;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Visio files with versions below 5 are not supported, yours was " + documentVersion);
|
||||
throw new IllegalArgumentException("Visio files with versions below 4 are not supported, yours was " + documentVersion);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of a chunk header for the given document version.
|
||||
*/
|
||||
public static int getHeaderSize(int documentVersion) {
|
||||
if(documentVersion > 6) {
|
||||
return ChunkHeaderV11.getHeaderSize();
|
||||
} else if(documentVersion == 6) {
|
||||
return ChunkHeaderV6.getHeaderSize();
|
||||
} else {
|
||||
return ChunkHeaderV4V5.getHeaderSize();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hdgf.chunks;
|
||||
|
||||
/**
|
||||
* A chunk header from v4 or v5
|
||||
*/
|
||||
public class ChunkHeaderV4V5 extends ChunkHeader {
|
||||
protected short unknown2;
|
||||
protected short unknown3;
|
||||
|
||||
public short getUnknown2() {
|
||||
return unknown2;
|
||||
}
|
||||
public short getUnknown3() {
|
||||
return unknown3;
|
||||
}
|
||||
|
||||
protected static int getHeaderSize() {
|
||||
return 12;
|
||||
}
|
||||
|
||||
public int getSizeInBytes() {
|
||||
return getHeaderSize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the chunk have a trailer?
|
||||
*/
|
||||
public boolean hasTrailer() {
|
||||
// V4 and V5 never has trailers
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the chunk have a separator?
|
||||
*/
|
||||
public boolean hasSeparator() {
|
||||
// V4 and V5 never has separators
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -30,9 +30,13 @@ public class ChunkHeaderV6 extends ChunkHeader {
|
|||
return unknown3;
|
||||
}
|
||||
|
||||
public int getSizeInBytes() {
|
||||
protected static int getHeaderSize() {
|
||||
// Looks like it ought to be 19...
|
||||
return 19;
|
||||
}
|
||||
public int getSizeInBytes() {
|
||||
return getHeaderSize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the chunk have a trailer?
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.ArrayList;
|
|||
|
||||
import org.apache.poi.POITextExtractor;
|
||||
import org.apache.poi.hdgf.HDGFDiagram;
|
||||
import org.apache.poi.hdgf.chunks.Chunk;
|
||||
import org.apache.poi.hdgf.chunks.Chunk.Command;
|
||||
import org.apache.poi.hdgf.streams.ChunkStream;
|
||||
import org.apache.poi.hdgf.streams.PointerContainingStream;
|
||||
|
@ -71,11 +72,13 @@ public class VisioTextExtractor extends POITextExtractor {
|
|||
if(stream instanceof ChunkStream) {
|
||||
ChunkStream cs = (ChunkStream)stream;
|
||||
for(int i=0; i<cs.getChunks().length; i++) {
|
||||
if(cs.getChunks()[i] != null &&
|
||||
cs.getChunks()[i].getName() != null &&
|
||||
cs.getChunks()[i].getName().equals("Text")) {
|
||||
Chunk chunk = cs.getChunks()[i];
|
||||
if(chunk != null &&
|
||||
chunk.getName() != null &&
|
||||
chunk.getName().equals("Text") &&
|
||||
chunk.getCommands().length > 0) {
|
||||
// First command
|
||||
Command cmd = cs.getChunks()[i].getCommands()[0];
|
||||
Command cmd = chunk.getCommands()[0];
|
||||
if(cmd != null && cmd.getValue() != null) {
|
||||
text.add( cmd.getValue().toString() );
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.util.ArrayList;
|
|||
|
||||
import org.apache.poi.hdgf.chunks.Chunk;
|
||||
import org.apache.poi.hdgf.chunks.ChunkFactory;
|
||||
import org.apache.poi.hdgf.chunks.ChunkHeader;
|
||||
import org.apache.poi.hdgf.pointers.Pointer;
|
||||
|
||||
public class ChunkStream extends Stream {
|
||||
|
@ -51,10 +52,17 @@ public class ChunkStream extends Stream {
|
|||
int pos = 0;
|
||||
byte[] contents = getStore().getContents();
|
||||
while(pos < contents.length) {
|
||||
Chunk chunk = chunkFactory.createChunk(contents, pos);
|
||||
chunksA.add(chunk);
|
||||
|
||||
pos += chunk.getOnDiskSize();
|
||||
// Ensure we have enough data to create a chunk from
|
||||
int headerSize = ChunkHeader.getHeaderSize(chunkFactory.getVersion());
|
||||
if(pos+headerSize <= contents.length) {
|
||||
Chunk chunk = chunkFactory.createChunk(contents, pos);
|
||||
chunksA.add(chunk);
|
||||
|
||||
pos += chunk.getOnDiskSize();
|
||||
} else {
|
||||
System.err.println("Needed " + headerSize + " bytes to create the next chunk header, but only found " + (contents.length-pos) + " bytes, ignoring rest of data");
|
||||
pos = contents.length;
|
||||
}
|
||||
}
|
||||
|
||||
chunks = (Chunk[])chunksA.toArray(new Chunk[chunksA.size()]);
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -17,25 +17,21 @@
|
|||
package org.apache.poi.hdgf.extractor;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.PrintStream;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.hdgf.HDGFDiagram;
|
||||
import org.apache.poi.hdgf.chunks.Chunk;
|
||||
import org.apache.poi.hdgf.chunks.ChunkFactory;
|
||||
import org.apache.poi.hdgf.pointers.Pointer;
|
||||
import org.apache.poi.hdgf.pointers.PointerFactory;
|
||||
import org.apache.poi.hssf.record.formula.eval.StringOperationEval;
|
||||
import org.apache.poi.poifs.filesystem.DocumentEntry;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
|
||||
public class TestVisioExtractor extends TestCase {
|
||||
private String filename;
|
||||
private String dirname;
|
||||
private String defFilename;
|
||||
protected void setUp() throws Exception {
|
||||
String dirname = System.getProperty("HDGF.testdata.path");
|
||||
filename = dirname + "/Test_Visio-Some_Random_Text.vsd";
|
||||
dirname = System.getProperty("HDGF.testdata.path");
|
||||
defFilename = dirname + "/Test_Visio-Some_Random_Text.vsd";
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -44,14 +40,14 @@ public class TestVisioExtractor extends TestCase {
|
|||
public void testCreation() throws Exception {
|
||||
VisioTextExtractor extractor;
|
||||
|
||||
extractor = new VisioTextExtractor(new FileInputStream(filename));
|
||||
extractor = new VisioTextExtractor(new FileInputStream(defFilename));
|
||||
assertNotNull(extractor);
|
||||
assertNotNull(extractor.getAllText());
|
||||
assertEquals(3, extractor.getAllText().length);
|
||||
|
||||
extractor = new VisioTextExtractor(
|
||||
new POIFSFileSystem(
|
||||
new FileInputStream(filename)
|
||||
new FileInputStream(defFilename)
|
||||
)
|
||||
);
|
||||
assertNotNull(extractor);
|
||||
|
@ -61,7 +57,7 @@ public class TestVisioExtractor extends TestCase {
|
|||
extractor = new VisioTextExtractor(
|
||||
new HDGFDiagram(
|
||||
new POIFSFileSystem(
|
||||
new FileInputStream(filename)
|
||||
new FileInputStream(defFilename)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
@ -72,7 +68,7 @@ public class TestVisioExtractor extends TestCase {
|
|||
|
||||
public void testExtraction() throws Exception {
|
||||
VisioTextExtractor extractor =
|
||||
new VisioTextExtractor(new FileInputStream(filename));
|
||||
new VisioTextExtractor(new FileInputStream(defFilename));
|
||||
|
||||
// Check the array fetch
|
||||
String[] text = extractor.getAllText();
|
||||
|
@ -88,13 +84,30 @@ public class TestVisioExtractor extends TestCase {
|
|||
assertEquals("Test View\nI am a test view\nSome random text, on a page\n", textS);
|
||||
}
|
||||
|
||||
public void testProblemFiles() throws Exception {
|
||||
File a = new File(dirname, "44594.vsd");
|
||||
VisioTextExtractor.main(new String[] {a.toString()});
|
||||
|
||||
File b = new File(dirname, "44594-2.vsd");
|
||||
VisioTextExtractor.main(new String[] {b.toString()});
|
||||
|
||||
File c = new File(dirname, "ShortChunk1.vsd");
|
||||
VisioTextExtractor.main(new String[] {c.toString()});
|
||||
|
||||
File d = new File(dirname, "ShortChunk2.vsd");
|
||||
VisioTextExtractor.main(new String[] {d.toString()});
|
||||
|
||||
File e = new File(dirname, "ShortChunk3.vsd");
|
||||
VisioTextExtractor.main(new String[] {e.toString()});
|
||||
}
|
||||
|
||||
public void testMain() throws Exception {
|
||||
PrintStream oldOut = System.out;
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
PrintStream capture = new PrintStream(baos);
|
||||
System.setOut(capture);
|
||||
|
||||
VisioTextExtractor.main(new String[] {filename});
|
||||
VisioTextExtractor.main(new String[] {defFilename});
|
||||
|
||||
// Put things back
|
||||
System.setOut(oldOut);
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hdgf.streams;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
|
||||
import org.apache.poi.hdgf.HDGFDiagram;
|
||||
import org.apache.poi.hdgf.chunks.ChunkFactory;
|
||||
import org.apache.poi.hdgf.pointers.Pointer;
|
||||
import org.apache.poi.hdgf.pointers.PointerFactory;
|
||||
import org.apache.poi.poifs.filesystem.DocumentEntry;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
|
||||
/**
|
||||
* Tests for bugs with streams
|
||||
*/
|
||||
public class TestStreamBugs extends StreamTest {
|
||||
private byte[] contents;
|
||||
private ChunkFactory chunkFactory;
|
||||
private PointerFactory ptrFactory;
|
||||
private POIFSFileSystem filesystem;
|
||||
|
||||
protected void setUp() throws Exception {
|
||||
String dirname = System.getProperty("HDGF.testdata.path");
|
||||
String filename = dirname + "/44594.vsd";
|
||||
ptrFactory = new PointerFactory(11);
|
||||
chunkFactory = new ChunkFactory(11);
|
||||
|
||||
FileInputStream fin = new FileInputStream(filename);
|
||||
filesystem = new POIFSFileSystem(fin);
|
||||
|
||||
DocumentEntry docProps =
|
||||
(DocumentEntry)filesystem.getRoot().getEntry("VisioDocument");
|
||||
|
||||
// Grab the document stream
|
||||
contents = new byte[docProps.getSize()];
|
||||
filesystem.createDocumentInputStream("VisioDocument").read(contents);
|
||||
}
|
||||
|
||||
public void testGetTrailer() throws Exception {
|
||||
Pointer trailerPointer = ptrFactory.createPointer(contents, 0x24);
|
||||
Stream.createStream(trailerPointer, contents, chunkFactory, ptrFactory);
|
||||
}
|
||||
|
||||
public void TOIMPLEMENTtestGetCertainChunks() throws Exception {
|
||||
int offsetA = 3708;
|
||||
int offsetB = 3744;
|
||||
}
|
||||
|
||||
public void testGetChildren() throws Exception {
|
||||
Pointer trailerPointer = ptrFactory.createPointer(contents, 0x24);
|
||||
TrailerStream trailer = (TrailerStream)
|
||||
Stream.createStream(trailerPointer, contents, chunkFactory, ptrFactory);
|
||||
|
||||
// Get without recursing
|
||||
Pointer[] ptrs = trailer.getChildPointers();
|
||||
for(int i=0; i<ptrs.length; i++) {
|
||||
Stream.createStream(ptrs[i], contents, chunkFactory, ptrFactory);
|
||||
}
|
||||
|
||||
// Get with recursing into chunks
|
||||
for(int i=0; i<ptrs.length; i++) {
|
||||
Stream stream =
|
||||
Stream.createStream(ptrs[i], contents, chunkFactory, ptrFactory);
|
||||
if(stream instanceof ChunkStream) {
|
||||
ChunkStream cStream = (ChunkStream)stream;
|
||||
cStream.findChunks();
|
||||
}
|
||||
}
|
||||
|
||||
// Get with recursing into chunks and pointers
|
||||
for(int i=0; i<ptrs.length; i++) {
|
||||
Stream stream =
|
||||
Stream.createStream(ptrs[i], contents, chunkFactory, ptrFactory);
|
||||
if(stream instanceof PointerContainingStream) {
|
||||
PointerContainingStream pStream =
|
||||
(PointerContainingStream)stream;
|
||||
pStream.findChildren(contents);
|
||||
}
|
||||
}
|
||||
|
||||
trailer.findChildren(contents);
|
||||
}
|
||||
|
||||
public void testOpen() throws Exception {
|
||||
HDGFDiagram dg = new HDGFDiagram(filesystem);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue