First stab at hdgf, an implementation of the visio format. Basic support for processing the equivalent of records - pointers and blocks. Now to refactor it into something sane!

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@548428 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2007-06-18 18:10:57 +00:00
parent f05388b261
commit 0b42cc2713
3 changed files with 468 additions and 0 deletions

View File

@ -0,0 +1,287 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hdgf;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.LittleEndian;
/**
* See
* http://www.redferni.uklinux.net/visio/
* http://www.gnome.ru/projects/docs/vsdocs.html
* http://www.gnome.ru/projects/docs/slide1.png
* http://www.gnome.ru/projects/docs/slide2.png
*/
public class HDGFDiagram {
private static final String VISIO_HEADER = "Visio (TM) Drawing\r\n";
private POIFSFileSystem filesystem;
private byte[] _docstream;
private short version;
private long docSize;
private VisioPointer trailerPointer;
private PointerBlock trailer;
public HDGFDiagram(POIFSFileSystem fs) throws IOException {
filesystem = fs;
DocumentEntry docProps =
(DocumentEntry)filesystem.getRoot().getEntry("VisioDocument");
// Grab the document stream
_docstream = new byte[docProps.getSize()];
filesystem.createDocumentInputStream("VisioDocument").read(_docstream);
// Check it's really visio
String typeString = new String(_docstream, 0, 20);
if(! typeString.equals(VISIO_HEADER)) {
throw new IllegalArgumentException("Wasn't a valid visio document, started with " + typeString);
}
// Grab the version number, 0x1a -> 0x1b
version = LittleEndian.getShort(_docstream, 0x1a);
// Grab the document size, 0x1c -> 0x1f
docSize = LittleEndian.getUInt(_docstream, 0x1c);
// ??? 0x20 -> 0x23
// Grab the pointer to the trailer
trailerPointer = VisioPointer.getPointerAt(_docstream, 0x24);
// And now grab the trailer
trailer = new CompressedPointerBlock(trailerPointer, _docstream);
}
public void debug() throws IOException {
System.err.println("Trailer is at " + trailerPointer.offset);
System.err.println("Trailer has type " + trailerPointer.type);
System.err.println("Trailer has length " + trailerPointer.length);
System.err.println("Trailer has format " + trailerPointer.format);
for(int i=0; i<trailer.getPointers().length; i++) {
VisioPointer ptr = trailer.getPointers()[i];
System.err.println("Looking at pointer " + i);
System.err.println("\tType is " + ptr.type + "\t\t" + Integer.toHexString(ptr.type));
System.err.println("\tOffset is " + ptr.offset + "\t\t" + Long.toHexString(ptr.offset));
System.err.println("\tAddress is " + ptr.address + "\t" + Long.toHexString(ptr.address));
System.err.println("\tLength is " + ptr.length + "\t\t" + Long.toHexString(ptr.length));
System.err.println("\tFormat is " + ptr.format + "\t\t" + Long.toHexString(ptr.format));
System.err.println("\tCompressed is " + ptr.destinationCompressed());
if(ptr.destinationHasPointers()) {
PointerBlock pb = PointerBlock.createAppropriateBlock(ptr, _docstream);
if(pb.getPointers() != null && pb.getPointers().length > 0) {
System.err.println("\tContains " + pb.getPointers().length + " other pointers");
for(int j=0; j<pb.getPointers().length; j++) {
VisioPointer sptr = pb.getPointers()[j];
System.err.println("\t\t" + j + " - Type is " + sptr.type + "\t\t" + Integer.toHexString(sptr.type));
System.err.println("\t\t" + j + " - Length is " + sptr.length + "\t\t" + Long.toHexString(sptr.length));
}
}
}
if(ptr.destinationHasStrings()) {
System.err.println("**strings**");
PointerBlock pb = PointerBlock.createAppropriateBlock(ptr, _docstream);
System.err.println(pb.contents.length);
}
}
}
/**
* Will only work on Test_Visio-Some_Random_Text.vsd !
*/
public void debugTestFile() throws Exception {
System.err.println();
VisioPointer p61ee = trailer.pointers[8];
System.err.println(p61ee.type + " " + Integer.toHexString(p61ee.type));
PointerBlock pb61ee = PointerBlock.createAppropriateBlock(p61ee, _docstream);
VisioPointer p4524 = pb61ee.pointers[4];
System.err.println(p4524.type + " " + Integer.toHexString(p4524.type));
PointerBlock pb4524 = PointerBlock.createAppropriateBlock(p4524, _docstream);
VisioPointer p44d3 = pb4524.pointers[5];
System.err.println(p44d3.type + " " + Integer.toHexString(p44d3.type));
PointerBlock pb44d3 = PointerBlock.createAppropriateBlock(p44d3, _docstream);
VisioPointer p4312 = pb44d3.pointers[1];
System.err.println(p4312.type + " " + Integer.toHexString(p4312.type));
PointerBlock pb4312 = PointerBlock.createAppropriateBlock(p4312, _docstream);
VisioPointer p347f = pb4312.pointers[0];
System.err.println();
System.err.println(p347f.type + " " + Integer.toHexString(p347f.type));
System.err.println(p347f.offset + " " + Long.toHexString(p347f.offset));
System.err.println(p347f.length + " " + Long.toHexString(p347f.length));
System.err.println("Has Strings - " + p347f.destinationHasStrings());
System.err.println("Compressed - " + p347f.destinationCompressed());
PointerBlock pb347f = PointerBlock.createAppropriateBlock(p347f, _docstream);
}
/**
* For testing only
*/
public static void main(String args[]) throws Exception {
HDGFDiagram hdgf = new HDGFDiagram(new POIFSFileSystem(new FileInputStream(args[0])));
hdgf.debug();
hdgf.debugTestFile();
}
/**
* A block containing lots of pointers to other blocks.
*/
public static class PointerBlock {
protected VisioPointer pointer;
private byte[] contents;
protected VisioPointer[] pointers;
protected PointerBlock(VisioPointer pointer) {
this.pointer = pointer;
}
protected PointerBlock(VisioPointer pointer, byte[] data) {
this(pointer);
processData(data, (int)pointer.offset, (int)pointer.length);
}
protected static PointerBlock createAppropriateBlock(VisioPointer pointer, byte[] data) throws IOException {
if(pointer.destinationCompressed()) {
return new CompressedPointerBlock(pointer,data);
} else {
return new PointerBlock(pointer,data);
}
}
public VisioPointer[] getPointers() { return pointers; }
/**
* Splits the data up into header + contents, and processes
*/
protected void processData(byte[] data, int offset, int len) {
if(len > data.length - offset) {
len = data.length - offset;
}
if(offset < 0) { len = 0; }
contents = new byte[len];
if(len > 0)
System.arraycopy(data, offset, contents, 0, contents.length);
// If we're of type 20, we have child pointers
if(len > 0 && (pointer.type == 20 || pointer.destinationHasPointers())) {
// Grab the offset to the number of pointers
int nPointersAt = (int)LittleEndian.getUInt(contents, 0);
int numPointers = (int)LittleEndian.getUInt(contents, nPointersAt);
int unknownA = (int)LittleEndian.getUInt(contents, nPointersAt+4);
pointers = new VisioPointer[numPointers];
int pos = nPointersAt + 8;
for(int i=0; i<numPointers; i++) {
pointers[i] = VisioPointer.getPointerAt(contents, pos);
pos += 18;
}
}
// If we have strings, try to make sense of them
if(len > 0 && (pointer.destinationHasStrings())) {
for(int i=0; i<64; i+=1) {
short s = LittleEndian.getShort(contents, i);
long l = LittleEndian.getUInt(contents, i);
System.err.println(i + "\t" + s + "\t" + Integer.toHexString(s));
System.err.println(i + "\t" + l + "\t" + Long.toHexString(l));
}
}
}
}
/**
* A block containing lots of pointers to other blocks, that
* is itself compressed
*/
public static class CompressedPointerBlock extends PointerBlock {
protected byte[] compressedContents;
private byte[] blockHeader = new byte[4];
protected CompressedPointerBlock(VisioPointer pointer, byte[] data) throws IOException {
super(pointer);
compressedContents = new byte[(int)pointer.length];
System.arraycopy(data, (int)pointer.offset, compressedContents, 0, compressedContents.length);
// Decompress
ByteArrayInputStream bais = new ByteArrayInputStream(compressedContents);
// TIFFLZWDecoder lzw = new TIFFLZWDecoder();
// byte[] out = new byte[4096];
// contents = lzw.decode(compressedContents, out);
LZW4HDGF lzw = new LZW4HDGF();
byte[] decomp = lzw.decode(bais);
System.arraycopy(decomp, 0, blockHeader, 0, 4);
processData(decomp, 4, decomp.length-4);
}
}
/**
* A visio pointer, for visio versions 6+
*/
public static class VisioPointer {
private int type;
private long address;
private long offset;
private long length;
private short format;
public boolean destinationHasStrings() {
return (0x40 <= format && format < 0x50);
}
public boolean destinationHasPointers() {
if(format == 0x1d || format == 0x1e) return true;
return (0x50 <= format && format < 0x60);
}
public boolean destinationHasChunks() {
return (0xd0 <= format && format < 0xd0);
}
public boolean destinationCompressed() {
// Apparently, it's the second least significant bit
return (format & 2) > 0;
}
public static VisioPointer getPointerAt(byte[] data, int offset) {
VisioPointer p = new VisioPointer();
p.type = LittleEndian.getInt(data, offset+0);
p.address = LittleEndian.getUInt(data, offset+4);
p.offset = LittleEndian.getUInt(data, offset+8);
p.length = LittleEndian.getUInt(data, offset+12);
p.format = LittleEndian.getShort(data, offset+16);
return p;
}
}
}

View File

@ -0,0 +1,80 @@
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
package org.apache.poi.hdgf;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* A decoder for the crazy LZW implementation used
* in Visio.
* This is a port of vsd_inflate.c from vsdump
* (http://www.gnome.ru/projects/vsdump_en.html)
*/
public class LZW4HDGF {
public byte fromInt(int b) {
if(b < 128) return (byte)b;
return (byte)(b - 256);
}
public byte[] decode(InputStream src) throws IOException {
ByteArrayOutputStream res = new ByteArrayOutputStream();
int pos = 0;
int flag;
byte[] buffer = new byte[4096];
buffer[0] = 0;
byte data;
int tmp;
int addr1, addr2;
int len, pntr;
while ( (flag = src.read()) != -1 ) {
for (int mask = 1; mask < 0x100 ; mask <<= 1) {
if ( (flag & mask) > 0) {
if( (tmp = src.read()) != -1) {
buffer[(pos&4095)] = fromInt(tmp);
pos++;
res.write( new byte[] {fromInt(tmp)} );
}
} else {
tmp = src.read();
if(tmp == -1) break;
addr1 = tmp;
tmp = src.read();
if(tmp == -1) break;
addr2 = tmp;
len = (addr2 & 15) + 3;
pntr = (addr2 & 240)*16 + addr1;
if(pntr > 4078) {
pntr = pntr - 4078;
} else {
pntr = pntr + 18;
}
for(int i=0; i<len; i++) {
buffer [(pos + i) & 4095] = buffer [(pntr + i) & 4095];
data = buffer[(pntr + i ) & 4095];
res.write(new byte[] {data});
}
pos = pos + len;
}
}
}
return res.toByteArray();
}
}

View File

@ -0,0 +1,101 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.apache.poi.hdgf;
import java.io.ByteArrayInputStream;
import junit.framework.TestCase;
public class TestLZW4HDGF extends TestCase {
public static final byte[] testTrailerComp = new byte[] {
123, -60, 2, -21, -16, 1, 0, 0, -72, -13, -16, 78, -32, -5, 1,
0, 3, -21, -16, 10, 5, 4, -21, -16, 21, 9, -21, -16, 103, -21,
-16, 34, -36, -1, 52, 15, 70, 15, 120, 88, 15, -7, -2, -28, -9,
-123, 21, 0, 44, -122, 1, -4, 104, 15, -24, -13, 40, -98, 32,
78, 102, -67, -1, -2, -30, 64, 40, -67, -113, -73, 116, -98,
-85, 2, 66, 123, 9, 109, -85, 2, -89, 14, -56, -69, -83, -79,
-34, -3, 120, 110, 75, -9, -10, 20, -6, -25, -12, 22, -21, -16,
-12, -81, 67, 1, -128, -70, -21, -16, 84, -21, -16, 70, 0, 23,
-21, -16, 76, 47, -40, 79, 1, -44, -21, -16, 32, 3, 18, 12, 17,
-43, -68, 17, 16, -8, 21, 22, -1, -21, -16, -84, -1, -35, 79,
-9, -10, 96, 0, 46, -21, -16, 44, -39, -41, 79, 1, 119, -13,
-16, -106, -13, -16, 84, 0, 125, 26, -21, -16, 68, -38, 79, 1,
17, 10, 0, -97, 50, 10, 0, 0, -42, -108, 15, 118, 31, 0, -3, 29,
-21, -16, -100, -25, 79, 1, -18, 97, -36, 76, 16, -21, -16, 86,
0, 36, -5, 1, -5, 79, 63, 1, -124, 98, 0, 0, 28, 3, 20, -34, -3,
125, 33, -21, -16, 100, -4, 79, 1, -92, -91, 16, -22, 24, 19, 41,
-21, -16, -44, -59, 16, 108, 100, 0, -21, 0, 71, -105, 18, 39, 85,
17, -3, 79, 1, 95, -108, 113, 0, 0, 104, 3, 18, 49, 49, 17, -1, 64,
85, 1, 0, 114, 0, 0, -93, -36, -21, -16, 100, 31, 0, 0, -40, -21,
-16, -92, 66, 127, 85, 1, 98, 119, 0, 0, -48, 79, 18, -3, 50, -17,
1, 67, 85, 1, 81, -127, 0, -41, 0, 14, 6, 4, 17, 63, -63, 17, 68,
85, -65, 1, 30, -120, 0, 0, 42, 79, 18, 68, 126, -21, -16, -76, 69,
85, 1, 102, -119, 72, 37, 0, 97, 33 };
public static final byte[] testTrailerDecomp = new byte[] {
-60, 2, 0, 0, 0, 1, 0, 0, -72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0,
0, 9, 0, 0, 0, 103, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-123, 21, 0, 44, -123, 21, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, -98, 32, 78, 102, -67,
-2, -30, 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, 40, 66,
123, 9, 109, -67, -2, -30, 64, 40, -98, 32, 78, 102, -67, -2, -30,
64, 40, -67, -113, -73, 116, -67, -2, -30, 64, -56, -83, -79, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 110, 75, 1, 0, 0, 0,
0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, -12, -81, 67,
1, -128, 0, 0, 0, 84, 0, 0, 0, 70, 0, 23, 0, 0, 0, 76, -40, 79, 1,
-44, 0, 0, 0, 32, 0, 0, 0, 84, 0, 23, 0, 0, 0, -68, -40, 79, 1, -8,
0, 0, 0, 32, 0, 0, 0, 84, 0, -1, 0, 0, 0, -84, -1, 79, 1, 0, 0, 0,
0, 0, 0, 0, 0, 96, 0, 46, 0, 0, 0, 44, -39, 79, 1, 119, 1, 0, 0,
-106, 1, 0, 0, 84, 0, 26, 0, 0, 0, 68, -38, 79, 1, 17, 3, 0, 0,
50, 10, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29, 0, 0, 0, -100, -25, 79, 1, -18, 97, 0, 0, -106, 0, 0, 0, 86, 0,
36, 0, 0, 0, -12, -5, 79, 1, -124, 98, 0, 0, 28, 0, 0, 0, 84, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 100,
-4, 79, 1, -92, 98, 0, 0, 32, 0, 0, 0, 84, 0, 41, 0, 0, 0, -44, -4,
79, 1, 108, 100, 0, 0, 71, 0, 0, 0, 86, 0, 39, 0, 0, 0, 68, -3, 79,
1, -108, 113, 0, 0, 104, 0, 0, 0, 84, 0, 49, 0, 0, 0, -84, 64, 85,
1, 0, 114, 0, 0, -93, 0, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 0, 0, 0, -92, 66, 85, 1, 98, 119,
0, 0, -48, 1, 0, 0, 84, 0, 50, 0, 0, 0, 20, 67, 85, 1, 81, -127,
0, 0, 14, 6, 0, 0, 84, 0, 63, 0, 0, 0, 100, 68, 85, 1, 30, -120,
0, 0, 42, 1, 0, 0, 84, 0, 68, 0, 0, 0, -76, 69, 85, 1, 102, -119,
0, 0, 42, 1, 0, 0, 84, 0, 0, 0, 0, 0
};
public void testCounts() throws Exception {
assertEquals(339, testTrailerComp.length);
assertEquals(632, testTrailerDecomp.length);
// Decode it using our engine
LZW4HDGF lzw2 = new LZW4HDGF();
byte[] dec = lzw2.decode(new ByteArrayInputStream(testTrailerComp));
// Check it's of the right size
assertEquals(632, dec.length);
// Now check it matches
for(int i=0; i<dec.length; i++) {
if(dec[i] != testTrailerDecomp[i])
System.err.println(i + "\t" + dec[i] + "\t" + testTrailerDecomp[i]);
}
}
}