More work understanding hpbf

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@686625 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-08-17 18:02:31 +00:00
parent caa6292337
commit e9a647c3f4
2 changed files with 97 additions and 6 deletions

View File

@ -71,5 +71,25 @@ Root Entry -
<p>If you set the background colour of a textbox, but make <p>If you set the background colour of a textbox, but make
no changes to the text, no changes to the text,
</section> </section>
<section><title>Structure of CONTENTS</title>
<p>First we have "CHNKINK ", followed by 24 bytes.</p>
<p>Next we have 20 sequences of 24 bytes each. If the first two bytes
at 0x1800, then that sequence entry exists, but if it's 0x0000 then
the entry doesn't exist. If it does exist, we then have 4 bytes of
upper case ASCII text, followed by three little endian shorts.
The first of these seems to be the count of that type, the second is
usually 1, the third is usually zero. The we have another 4 bytes of
upper case ASCII text, normally but not always the same as the first
text. Finally, we have an unsigned little endian 32 bit offset to
the start of the data for this, then an unsigned little endian
32 bit offset of the length of this section.</p>
<p>Normally, the first sequence entry is for TEXT, and the text data
will start at 0x200. After that is normally two or three STSH entries
(so the first short has values 0, then 1, then 2). After that it
seems to vary.</p>
<p>At 0x200 we have the text, stored as little endian 16 bit unicode.</p>
<p>After the text comes all sorts of other stuff, presumably as
described by the sequences.</p>
</section>
</body> </body>
</document> </document>

View File

@ -175,20 +175,23 @@ public class HPBFDumper {
// 00 00 00 88 1E 00 00 00 // 00 00 00 88 1E 00 00 00
} }
public void dumpCONTENTS(DirectoryNode dir) throws IOException { public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
byte[] data = getData(dir, "CONTENTS"); byte[] data = getData(dir, "CONTENTS");
System.out.println(""); System.out.println("");
System.out.println("CONTENTS - " + data.length + " bytes long:"); System.out.println("CONTENTS - " + data.length + " bytes long:");
// Between the start and 0x200 we have // Between the start and 0x200 we have
// CHNKINK(space) + 24 bytes + 0x1800 // CHNKINK(space) + 24 bytes
// 0x1800
// TEXT + 6 bytes // TEXT + 6 bytes
// TEXT + 8 bytes + 0x1800 // TEXT + 8 bytes
// 0x1800
// STSH + 6 bytes // STSH + 6 bytes
// STSH + 8 bytes + 0x1800 // STSH + 8 bytes
// 0x1800
// STSH + 6 bytes // STSH + 6 bytes
// STSH + 8 bytes + 0x1800 // STSH + 8 bytes
// but towards 0x200 the pattern may // but towards 0x200 the pattern may
// break down a little bit // break down a little bit
@ -237,6 +240,73 @@ public class HPBFDumper {
// The hyperlinks may come before the fonts, // The hyperlinks may come before the fonts,
// or slightly in front // or slightly in front
} }
public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
byte[] data = getData(dir, "CONTENTS");
System.out.println("");
System.out.println("CONTENTS - " + data.length + " bytes long:");
String[] startType = new String[20];
String[] endType = new String[20];
int[] optA = new int[20];
int[] optB = new int[20];
int[] optC = new int[20];
int[] from = new int[20];
int[] len = new int[20];
for(int i=0; i<20; i++) {
int offset = 0x20 + i*24;
if(data[offset] == 0x18 && data[offset+1] == 0x00) {
// Has data
startType[i] = new String(data, offset+2, 4);
optA[i] = LittleEndian.getUShort(data, offset+6);
optB[i] = LittleEndian.getUShort(data, offset+8);
optC[i] = LittleEndian.getUShort(data, offset+10);
endType[i] = new String(data, offset+12, 4);
from[i] = (int)LittleEndian.getUInt(data, offset+16);
len[i] = (int)LittleEndian.getUInt(data, offset+20);
} else {
// Doesn't have data
}
}
String text = StringUtil.getFromUnicodeLE(
data, from[0], len[0]/2
);
// Dump
for(int i=0; i<20; i++) {
String num = Integer.toString(i);
if(i < 10) {
num = "0" + i;
}
System.out.print(num + " ");
if(startType[i] == null) {
System.out.println("(not present)");
} else {
System.out.println(
"\t" +
startType[i] + " " +
optA[i] + " " +
optB[i] + " " +
optC[i]
);
System.out.println(
"\t" +
endType[i] + " " +
"from: " +
Integer.toHexString(from[i]) +
" (" + from[i] + ")" +
", len: " +
Integer.toHexString(len[i]) +
" (" + len[i] + ")"
);
}
}
System.out.println("");
System.out.println(text);
}
protected void dump001CompObj(DirectoryNode dir) { protected void dump001CompObj(DirectoryNode dir) {
// TODO // TODO
@ -249,6 +319,7 @@ public class HPBFDumper {
quillDir.getEntry("QuillSub"); quillDir.getEntry("QuillSub");
dump001CompObj(quillSubDir); dump001CompObj(quillSubDir);
dumpCONTENTS(quillSubDir); dumpCONTENTSraw(quillSubDir);
dumpCONTENTSguessed(quillSubDir);
} }
} }