mirror of https://github.com/apache/poi.git
convert some tabs to spaces
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1871921 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
66471836f5
commit
93a7b81ed9
|
@ -34,49 +34,49 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
|||
* file format.
|
||||
*/
|
||||
public final class HPBFDocument extends POIReadOnlyDocument {
|
||||
private MainContents mainContents;
|
||||
private QuillContents quillContents;
|
||||
private EscherStm escherStm;
|
||||
private EscherDelayStm escherDelayStm;
|
||||
private MainContents mainContents;
|
||||
private QuillContents quillContents;
|
||||
private EscherStm escherStm;
|
||||
private EscherDelayStm escherDelayStm;
|
||||
|
||||
/**
|
||||
* Opens a new publisher document
|
||||
*/
|
||||
public HPBFDocument(POIFSFileSystem fs) throws IOException {
|
||||
this(fs.getRoot());
|
||||
}
|
||||
/**
|
||||
* Opens a new publisher document
|
||||
*/
|
||||
public HPBFDocument(POIFSFileSystem fs) throws IOException {
|
||||
this(fs.getRoot());
|
||||
}
|
||||
|
||||
public HPBFDocument(InputStream inp) throws IOException {
|
||||
this(new POIFSFileSystem(inp));
|
||||
}
|
||||
public HPBFDocument(InputStream inp) throws IOException {
|
||||
this(new POIFSFileSystem(inp));
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens an embedded publisher document,
|
||||
* at the given directory.
|
||||
*/
|
||||
public HPBFDocument(DirectoryNode dir) throws IOException {
|
||||
super(dir);
|
||||
/**
|
||||
* Opens an embedded publisher document,
|
||||
* at the given directory.
|
||||
*/
|
||||
public HPBFDocument(DirectoryNode dir) throws IOException {
|
||||
super(dir);
|
||||
|
||||
// Go looking for our interesting child
|
||||
// streams
|
||||
mainContents = new MainContents(dir);
|
||||
quillContents = new QuillContents(dir);
|
||||
// Go looking for our interesting child
|
||||
// streams
|
||||
mainContents = new MainContents(dir);
|
||||
quillContents = new QuillContents(dir);
|
||||
|
||||
// Now the Escher bits
|
||||
escherStm = new EscherStm(dir);
|
||||
escherDelayStm = new EscherDelayStm(dir);
|
||||
}
|
||||
// Now the Escher bits
|
||||
escherStm = new EscherStm(dir);
|
||||
escherDelayStm = new EscherDelayStm(dir);
|
||||
}
|
||||
|
||||
public MainContents getMainContents() {
|
||||
return mainContents;
|
||||
}
|
||||
public QuillContents getQuillContents() {
|
||||
return quillContents;
|
||||
}
|
||||
public EscherStm getEscherStm() {
|
||||
return escherStm;
|
||||
}
|
||||
public EscherDelayStm getEscherDelayStm() {
|
||||
return escherDelayStm;
|
||||
}
|
||||
public MainContents getMainContents() {
|
||||
return mainContents;
|
||||
}
|
||||
public QuillContents getQuillContents() {
|
||||
return quillContents;
|
||||
}
|
||||
public EscherStm getEscherStm() {
|
||||
return escherStm;
|
||||
}
|
||||
public EscherDelayStm getEscherDelayStm() {
|
||||
return escherDelayStm;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,319 +36,319 @@ import org.apache.poi.util.StringUtil;
|
|||
* constructed.
|
||||
*/
|
||||
public final class HPBFDumper {
|
||||
private POIFSFileSystem fs;
|
||||
public HPBFDumper(POIFSFileSystem fs) {
|
||||
this.fs = fs;
|
||||
}
|
||||
|
||||
@SuppressWarnings("resource")
|
||||
private POIFSFileSystem fs;
|
||||
public HPBFDumper(POIFSFileSystem fs) {
|
||||
this.fs = fs;
|
||||
}
|
||||
|
||||
@SuppressWarnings("resource")
|
||||
public HPBFDumper(InputStream inp) throws IOException {
|
||||
this(new POIFSFileSystem(inp));
|
||||
}
|
||||
this(new POIFSFileSystem(inp));
|
||||
}
|
||||
|
||||
private static byte[] getData(DirectoryNode dir, String name) throws IOException {
|
||||
// Grab the document stream
|
||||
InputStream is = dir.createDocumentInputStream(name);
|
||||
byte[] d = IOUtils.toByteArray(is);
|
||||
is.close();
|
||||
private static byte[] getData(DirectoryNode dir, String name) throws IOException {
|
||||
// Grab the document stream
|
||||
InputStream is = dir.createDocumentInputStream(name);
|
||||
byte[] d = IOUtils.toByteArray(is);
|
||||
is.close();
|
||||
|
||||
// All done
|
||||
return d;
|
||||
}
|
||||
// All done
|
||||
return d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dumps out the given number of bytes as hex,
|
||||
* two chars
|
||||
*/
|
||||
private String dumpBytes(byte[] data, int offset, int len) {
|
||||
StringBuilder ret = new StringBuilder();
|
||||
for(int i=0; i<len; i++) {
|
||||
int j = i + offset;
|
||||
int b = data[j];
|
||||
if(b < 0) { b += 256; }
|
||||
/**
|
||||
* Dumps out the given number of bytes as hex,
|
||||
* two chars
|
||||
*/
|
||||
private String dumpBytes(byte[] data, int offset, int len) {
|
||||
StringBuilder ret = new StringBuilder();
|
||||
for(int i=0; i<len; i++) {
|
||||
int j = i + offset;
|
||||
int b = data[j];
|
||||
if(b < 0) { b += 256; }
|
||||
|
||||
String bs = Integer.toHexString(b);
|
||||
if(bs.length() == 1)
|
||||
ret.append('0');
|
||||
ret.append(bs);
|
||||
ret.append(' ');
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
String bs = Integer.toHexString(b);
|
||||
if(bs.length() == 1)
|
||||
ret.append('0');
|
||||
ret.append(bs);
|
||||
ret.append(' ');
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
|
||||
@SuppressWarnings("resource")
|
||||
@SuppressWarnings("resource")
|
||||
public static void main(String[] args) throws Exception {
|
||||
if(args.length < 1) {
|
||||
System.err.println("Use:");
|
||||
System.err.println(" HPBFDumper <filename>");
|
||||
System.exit(1);
|
||||
}
|
||||
HPBFDumper dump = new HPBFDumper(new POIFSFileSystem(new File(args[0])));
|
||||
if(args.length < 1) {
|
||||
System.err.println("Use:");
|
||||
System.err.println(" HPBFDumper <filename>");
|
||||
System.exit(1);
|
||||
}
|
||||
HPBFDumper dump = new HPBFDumper(new POIFSFileSystem(new File(args[0])));
|
||||
|
||||
System.out.println("Dumping " + args[0]);
|
||||
dump.dumpContents();
|
||||
dump.dumpEnvelope();
|
||||
dump.dumpEscher();
|
||||
dump.dump001CompObj(dump.fs.getRoot());
|
||||
dump.dumpQuill();
|
||||
System.out.println("Dumping " + args[0]);
|
||||
dump.dumpContents();
|
||||
dump.dumpEnvelope();
|
||||
dump.dumpEscher();
|
||||
dump.dump001CompObj(dump.fs.getRoot());
|
||||
dump.dumpQuill();
|
||||
|
||||
// Still to go:
|
||||
// (0x03)Internal
|
||||
// Objects
|
||||
}
|
||||
// Still to go:
|
||||
// (0x03)Internal
|
||||
// Objects
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump out the escher parts of the file.
|
||||
* Escher -> EscherStm and EscherDelayStm
|
||||
*/
|
||||
public void dumpEscher() throws IOException {
|
||||
DirectoryNode escherDir = (DirectoryNode)
|
||||
fs.getRoot().getEntry("Escher");
|
||||
/**
|
||||
* Dump out the escher parts of the file.
|
||||
* Escher -> EscherStm and EscherDelayStm
|
||||
*/
|
||||
public void dumpEscher() throws IOException {
|
||||
DirectoryNode escherDir = (DirectoryNode)
|
||||
fs.getRoot().getEntry("Escher");
|
||||
|
||||
dumpEscherStm(escherDir);
|
||||
dumpEscherDelayStm(escherDir);
|
||||
}
|
||||
private void dumpEscherStream(byte[] data) {
|
||||
DefaultEscherRecordFactory erf =
|
||||
new DefaultEscherRecordFactory();
|
||||
dumpEscherStm(escherDir);
|
||||
dumpEscherDelayStm(escherDir);
|
||||
}
|
||||
private void dumpEscherStream(byte[] data) {
|
||||
DefaultEscherRecordFactory erf =
|
||||
new DefaultEscherRecordFactory();
|
||||
|
||||
// Dump
|
||||
int left = data.length;
|
||||
while(left > 0) {
|
||||
EscherRecord er = erf.createRecord(data, 0);
|
||||
er.fillFields(data, 0, erf);
|
||||
left -= er.getRecordSize();
|
||||
// Dump
|
||||
int left = data.length;
|
||||
while(left > 0) {
|
||||
EscherRecord er = erf.createRecord(data, 0);
|
||||
er.fillFields(data, 0, erf);
|
||||
left -= er.getRecordSize();
|
||||
|
||||
System.out.println(er);
|
||||
}
|
||||
}
|
||||
protected void dumpEscherStm(DirectoryNode escherDir) throws IOException {
|
||||
byte[] data = getData(escherDir, "EscherStm");
|
||||
System.out.println();
|
||||
System.out.println("EscherStm - " + data.length + " bytes long:");
|
||||
if(data.length > 0)
|
||||
dumpEscherStream(data);
|
||||
}
|
||||
protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException {
|
||||
byte[] data = getData(escherDir, "EscherDelayStm");
|
||||
System.out.println();
|
||||
System.out.println("EscherDelayStm - " + data.length + " bytes long:");
|
||||
if(data.length > 0)
|
||||
dumpEscherStream(data);
|
||||
}
|
||||
System.out.println(er);
|
||||
}
|
||||
}
|
||||
protected void dumpEscherStm(DirectoryNode escherDir) throws IOException {
|
||||
byte[] data = getData(escherDir, "EscherStm");
|
||||
System.out.println();
|
||||
System.out.println("EscherStm - " + data.length + " bytes long:");
|
||||
if(data.length > 0)
|
||||
dumpEscherStream(data);
|
||||
}
|
||||
protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException {
|
||||
byte[] data = getData(escherDir, "EscherDelayStm");
|
||||
System.out.println();
|
||||
System.out.println("EscherDelayStm - " + data.length + " bytes long:");
|
||||
if(data.length > 0)
|
||||
dumpEscherStream(data);
|
||||
}
|
||||
|
||||
public void dumpEnvelope() throws IOException {
|
||||
byte[] data = getData(fs.getRoot(), "Envelope");
|
||||
public void dumpEnvelope() throws IOException {
|
||||
byte[] data = getData(fs.getRoot(), "Envelope");
|
||||
|
||||
System.out.println();
|
||||
System.out.println("Envelope - " + data.length + " bytes long:");
|
||||
}
|
||||
System.out.println();
|
||||
System.out.println("Envelope - " + data.length + " bytes long:");
|
||||
}
|
||||
|
||||
public void dumpContents() throws IOException {
|
||||
byte[] data = getData(fs.getRoot(), "Contents");
|
||||
public void dumpContents() throws IOException {
|
||||
byte[] data = getData(fs.getRoot(), "Contents");
|
||||
|
||||
System.out.println();
|
||||
System.out.println("Contents - " + data.length + " bytes long:");
|
||||
System.out.println();
|
||||
System.out.println("Contents - " + data.length + " bytes long:");
|
||||
|
||||
// 8 bytes, always seems to be
|
||||
// E8 AC 2C 00 E8 03 05 01
|
||||
// E8 AC 2C 00 E8 03 05 01
|
||||
// 8 bytes, always seems to be
|
||||
// E8 AC 2C 00 E8 03 05 01
|
||||
// E8 AC 2C 00 E8 03 05 01
|
||||
|
||||
// 4 bytes - size of contents
|
||||
// 13/15 00 00 01
|
||||
// 4 bytes - size of contents
|
||||
// 13/15 00 00 01
|
||||
|
||||
// ....
|
||||
// ....
|
||||
|
||||
// E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... ..........
|
||||
// E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... ..........
|
||||
|
||||
// 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... ..
|
||||
// 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... ..
|
||||
|
||||
// 01 18 30 00 03 20 00 00
|
||||
// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
|
||||
// 00 00 00 88 1E 00 00 00
|
||||
// 01 18 30 00 03 20 00 00
|
||||
// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
|
||||
// 00 00 00 88 1E 00 00 00
|
||||
|
||||
// 01 18 31 00 03 20 00 00
|
||||
// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
|
||||
// 00 00 00 88 1E 00 00 00
|
||||
// 01 18 31 00 03 20 00 00
|
||||
// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
|
||||
// 00 00 00 88 1E 00 00 00
|
||||
|
||||
// 01 18 32 00 03 20 00 00
|
||||
// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
|
||||
// 00 00 00 88 1E 00 00 00
|
||||
}
|
||||
// 01 18 32 00 03 20 00 00
|
||||
// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
|
||||
// 00 00 00 88 1E 00 00 00
|
||||
}
|
||||
|
||||
public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
|
||||
byte[] data = getData(dir, "CONTENTS");
|
||||
public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
|
||||
byte[] data = getData(dir, "CONTENTS");
|
||||
|
||||
System.out.println();
|
||||
System.out.println("CONTENTS - " + data.length + " bytes long:");
|
||||
System.out.println();
|
||||
System.out.println("CONTENTS - " + data.length + " bytes long:");
|
||||
|
||||
// Between the start and 0x200 we have
|
||||
// CHNKINK(space) + 24 bytes
|
||||
// 0x1800
|
||||
// TEXT + 6 bytes
|
||||
// TEXT + 8 bytes
|
||||
// 0x1800
|
||||
// STSH + 6 bytes
|
||||
// STSH + 8 bytes
|
||||
// 0x1800
|
||||
// STSH + 6 bytes
|
||||
// STSH + 8 bytes
|
||||
// but towards 0x200 the pattern may
|
||||
// break down a little bit
|
||||
// Between the start and 0x200 we have
|
||||
// CHNKINK(space) + 24 bytes
|
||||
// 0x1800
|
||||
// TEXT + 6 bytes
|
||||
// TEXT + 8 bytes
|
||||
// 0x1800
|
||||
// STSH + 6 bytes
|
||||
// STSH + 8 bytes
|
||||
// 0x1800
|
||||
// STSH + 6 bytes
|
||||
// STSH + 8 bytes
|
||||
// but towards 0x200 the pattern may
|
||||
// break down a little bit
|
||||
|
||||
// After the second of a given type,
|
||||
// it seems to be 4 bytes giving the start,
|
||||
// then 4 bytes giving the length, then
|
||||
// 18 00
|
||||
System.out.println(
|
||||
new String(data, 0, 8, LocaleUtil.CHARSET_1252) +
|
||||
dumpBytes(data, 8, 0x20-8)
|
||||
);
|
||||
// After the second of a given type,
|
||||
// it seems to be 4 bytes giving the start,
|
||||
// then 4 bytes giving the length, then
|
||||
// 18 00
|
||||
System.out.println(
|
||||
new String(data, 0, 8, LocaleUtil.CHARSET_1252) +
|
||||
dumpBytes(data, 8, 0x20-8)
|
||||
);
|
||||
|
||||
int pos = 0x20;
|
||||
boolean sixNotEight = true;
|
||||
while(pos < 0x200) {
|
||||
if(sixNotEight) {
|
||||
System.out.println(
|
||||
dumpBytes(data, pos, 2)
|
||||
);
|
||||
pos += 2;
|
||||
}
|
||||
String text = new String(data, pos, 4, LocaleUtil.CHARSET_1252);
|
||||
int blen = 8;
|
||||
if(sixNotEight)
|
||||
blen = 6;
|
||||
System.out.println(
|
||||
text + " " + dumpBytes(data, pos+4, blen)
|
||||
);
|
||||
int pos = 0x20;
|
||||
boolean sixNotEight = true;
|
||||
while(pos < 0x200) {
|
||||
if(sixNotEight) {
|
||||
System.out.println(
|
||||
dumpBytes(data, pos, 2)
|
||||
);
|
||||
pos += 2;
|
||||
}
|
||||
String text = new String(data, pos, 4, LocaleUtil.CHARSET_1252);
|
||||
int blen = 8;
|
||||
if(sixNotEight)
|
||||
blen = 6;
|
||||
System.out.println(
|
||||
text + " " + dumpBytes(data, pos+4, blen)
|
||||
);
|
||||
|
||||
pos += 4 + blen;
|
||||
sixNotEight = ! sixNotEight;
|
||||
}
|
||||
pos += 4 + blen;
|
||||
sixNotEight = ! sixNotEight;
|
||||
}
|
||||
|
||||
// Text from 0x200 onwards until we get
|
||||
// to \r(00)\n(00)(00)(00)
|
||||
int textStop = -1;
|
||||
for(int i=0x200; i<data.length-2 && textStop == -1; i++) {
|
||||
if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) {
|
||||
textStop = i;
|
||||
}
|
||||
}
|
||||
if(textStop > 0) {
|
||||
int len = (textStop - 0x200) / 2;
|
||||
System.out.println();
|
||||
System.out.println(
|
||||
StringUtil.getFromUnicodeLE(data, 0x200, len)
|
||||
);
|
||||
}
|
||||
// Text from 0x200 onwards until we get
|
||||
// to \r(00)\n(00)(00)(00)
|
||||
int textStop = -1;
|
||||
for(int i=0x200; i<data.length-2 && textStop == -1; i++) {
|
||||
if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) {
|
||||
textStop = i;
|
||||
}
|
||||
}
|
||||
if(textStop > 0) {
|
||||
int len = (textStop - 0x200) / 2;
|
||||
System.out.println();
|
||||
System.out.println(
|
||||
StringUtil.getFromUnicodeLE(data, 0x200, len)
|
||||
);
|
||||
}
|
||||
|
||||
// The font list comes slightly later
|
||||
// The font list comes slightly later
|
||||
|
||||
// The hyperlinks may come before the fonts,
|
||||
// or slightly in front
|
||||
}
|
||||
public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
|
||||
byte[] data = getData(dir, "CONTENTS");
|
||||
// The hyperlinks may come before the fonts,
|
||||
// or slightly in front
|
||||
}
|
||||
public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
|
||||
byte[] data = getData(dir, "CONTENTS");
|
||||
|
||||
System.out.println();
|
||||
System.out.println("CONTENTS - " + data.length + " bytes long:");
|
||||
System.out.println();
|
||||
System.out.println("CONTENTS - " + data.length + " bytes long:");
|
||||
|
||||
String[] startType = new String[20];
|
||||
String[] endType = new String[20];
|
||||
int[] optA = new int[20];
|
||||
int[] optB = new int[20];
|
||||
int[] optC = new int[20];
|
||||
int[] from = new int[20];
|
||||
int[] len = new int[20];
|
||||
String[] startType = new String[20];
|
||||
String[] endType = new String[20];
|
||||
int[] optA = new int[20];
|
||||
int[] optB = new int[20];
|
||||
int[] optC = new int[20];
|
||||
int[] from = new int[20];
|
||||
int[] len = new int[20];
|
||||
|
||||
for(int i=0; i<20; i++) {
|
||||
int offset = 0x20 + i*24;
|
||||
if(data[offset] == 0x18 && data[offset+1] == 0x00) {
|
||||
// Has data
|
||||
startType[i] = new String(data, offset+2, 4, LocaleUtil.CHARSET_1252);
|
||||
optA[i] = LittleEndian.getUShort(data, offset+6);
|
||||
optB[i] = LittleEndian.getUShort(data, offset+8);
|
||||
optC[i] = LittleEndian.getUShort(data, offset+10);
|
||||
endType[i] = new String(data, offset+12, 4, LocaleUtil.CHARSET_1252);
|
||||
from[i] = (int)LittleEndian.getUInt(data, offset+16);
|
||||
len[i] = (int)LittleEndian.getUInt(data, offset+20);
|
||||
} else {
|
||||
// Doesn't have data
|
||||
}
|
||||
}
|
||||
for(int i=0; i<20; i++) {
|
||||
int offset = 0x20 + i*24;
|
||||
if(data[offset] == 0x18 && data[offset+1] == 0x00) {
|
||||
// Has data
|
||||
startType[i] = new String(data, offset+2, 4, LocaleUtil.CHARSET_1252);
|
||||
optA[i] = LittleEndian.getUShort(data, offset+6);
|
||||
optB[i] = LittleEndian.getUShort(data, offset+8);
|
||||
optC[i] = LittleEndian.getUShort(data, offset+10);
|
||||
endType[i] = new String(data, offset+12, 4, LocaleUtil.CHARSET_1252);
|
||||
from[i] = (int)LittleEndian.getUInt(data, offset+16);
|
||||
len[i] = (int)LittleEndian.getUInt(data, offset+20);
|
||||
} else {
|
||||
// Doesn't have data
|
||||
}
|
||||
}
|
||||
|
||||
String text = StringUtil.getFromUnicodeLE(
|
||||
data, from[0], len[0]/2
|
||||
);
|
||||
String text = StringUtil.getFromUnicodeLE(
|
||||
data, from[0], len[0]/2
|
||||
);
|
||||
|
||||
// Dump
|
||||
for(int i=0; i<20; i++) {
|
||||
String num = Integer.toString(i);
|
||||
if(i < 10) {
|
||||
num = "0" + i;
|
||||
}
|
||||
System.out.print(num + " ");
|
||||
// Dump
|
||||
for(int i=0; i<20; i++) {
|
||||
String num = Integer.toString(i);
|
||||
if(i < 10) {
|
||||
num = "0" + i;
|
||||
}
|
||||
System.out.print(num + " ");
|
||||
|
||||
if(startType[i] == null) {
|
||||
System.out.println("(not present)");
|
||||
} else {
|
||||
System.out.println(
|
||||
"\t" +
|
||||
startType[i] + " " +
|
||||
optA[i] + " " +
|
||||
optB[i] + " " +
|
||||
optC[i]
|
||||
);
|
||||
System.out.println(
|
||||
"\t" +
|
||||
endType[i] + " " +
|
||||
"from: " +
|
||||
Integer.toHexString(from[i]) +
|
||||
" (" + from[i] + ")" +
|
||||
", len: " +
|
||||
Integer.toHexString(len[i]) +
|
||||
" (" + len[i] + ")"
|
||||
);
|
||||
}
|
||||
}
|
||||
if(startType[i] == null) {
|
||||
System.out.println("(not present)");
|
||||
} else {
|
||||
System.out.println(
|
||||
"\t" +
|
||||
startType[i] + " " +
|
||||
optA[i] + " " +
|
||||
optB[i] + " " +
|
||||
optC[i]
|
||||
);
|
||||
System.out.println(
|
||||
"\t" +
|
||||
endType[i] + " " +
|
||||
"from: " +
|
||||
Integer.toHexString(from[i]) +
|
||||
" (" + from[i] + ")" +
|
||||
", len: " +
|
||||
Integer.toHexString(len[i]) +
|
||||
" (" + len[i] + ")"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Text
|
||||
System.out.println();
|
||||
System.out.println("TEXT:");
|
||||
System.out.println(text);
|
||||
System.out.println();
|
||||
// Text
|
||||
System.out.println();
|
||||
System.out.println("TEXT:");
|
||||
System.out.println(text);
|
||||
System.out.println();
|
||||
|
||||
// All the others
|
||||
for(int i=0; i<20; i++) {
|
||||
if(startType[i] == null) {
|
||||
continue;
|
||||
}
|
||||
int start = from[i];
|
||||
// All the others
|
||||
for(int i=0; i<20; i++) {
|
||||
if(startType[i] == null) {
|
||||
continue;
|
||||
}
|
||||
int start = from[i];
|
||||
|
||||
System.out.println(
|
||||
startType[i] + " -> " + endType[i] +
|
||||
" @ " + Integer.toHexString(start) +
|
||||
" (" + start + ")"
|
||||
);
|
||||
System.out.println("\t" + dumpBytes(data, start, 4));
|
||||
System.out.println("\t" + dumpBytes(data, start+4, 4));
|
||||
System.out.println("\t" + dumpBytes(data, start+8, 4));
|
||||
System.out.println("\t(etc)");
|
||||
}
|
||||
}
|
||||
System.out.println(
|
||||
startType[i] + " -> " + endType[i] +
|
||||
" @ " + Integer.toHexString(start) +
|
||||
" (" + start + ")"
|
||||
);
|
||||
System.out.println("\t" + dumpBytes(data, start, 4));
|
||||
System.out.println("\t" + dumpBytes(data, start+4, 4));
|
||||
System.out.println("\t" + dumpBytes(data, start+8, 4));
|
||||
System.out.println("\t(etc)");
|
||||
}
|
||||
}
|
||||
|
||||
protected void dump001CompObj(DirectoryNode dir) {
|
||||
// TODO
|
||||
}
|
||||
protected void dump001CompObj(DirectoryNode dir) {
|
||||
// TODO
|
||||
}
|
||||
|
||||
public void dumpQuill() throws IOException {
|
||||
DirectoryNode quillDir = (DirectoryNode)
|
||||
fs.getRoot().getEntry("Quill");
|
||||
DirectoryNode quillSubDir = (DirectoryNode)
|
||||
quillDir.getEntry("QuillSub");
|
||||
public void dumpQuill() throws IOException {
|
||||
DirectoryNode quillDir = (DirectoryNode)
|
||||
fs.getRoot().getEntry("Quill");
|
||||
DirectoryNode quillSubDir = (DirectoryNode)
|
||||
quillDir.getEntry("QuillSub");
|
||||
|
||||
dump001CompObj(quillSubDir);
|
||||
dumpCONTENTSraw(quillSubDir);
|
||||
dumpCONTENTSguessed(quillSubDir);
|
||||
}
|
||||
dump001CompObj(quillSubDir);
|
||||
dumpCONTENTSraw(quillSubDir);
|
||||
dumpCONTENTSguessed(quillSubDir);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,53 +33,53 @@ import org.apache.poi.util.HexDump;
|
|||
* what the format of them is.
|
||||
*/
|
||||
public final class PLCDumper {
|
||||
private HPBFDocument doc;
|
||||
private QuillContents qc;
|
||||
private HPBFDocument doc;
|
||||
private QuillContents qc;
|
||||
|
||||
public PLCDumper(HPBFDocument hpbfDoc) {
|
||||
doc = hpbfDoc;
|
||||
qc = doc.getQuillContents();
|
||||
}
|
||||
public PLCDumper(POIFSFileSystem fs) throws IOException {
|
||||
this(new HPBFDocument(fs));
|
||||
}
|
||||
public PLCDumper(InputStream inp) throws IOException {
|
||||
this(new POIFSFileSystem(inp));
|
||||
}
|
||||
public PLCDumper(HPBFDocument hpbfDoc) {
|
||||
doc = hpbfDoc;
|
||||
qc = doc.getQuillContents();
|
||||
}
|
||||
public PLCDumper(POIFSFileSystem fs) throws IOException {
|
||||
this(new HPBFDocument(fs));
|
||||
}
|
||||
public PLCDumper(InputStream inp) throws IOException {
|
||||
this(new POIFSFileSystem(inp));
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
if(args.length < 1) {
|
||||
System.err.println("Use:");
|
||||
System.err.println(" PLCDumper <filename>");
|
||||
System.exit(1);
|
||||
}
|
||||
public static void main(String[] args) throws Exception {
|
||||
if(args.length < 1) {
|
||||
System.err.println("Use:");
|
||||
System.err.println(" PLCDumper <filename>");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
try (FileInputStream fis = new FileInputStream(args[0])) {
|
||||
PLCDumper dump = new PLCDumper(fis);
|
||||
try (FileInputStream fis = new FileInputStream(args[0])) {
|
||||
PLCDumper dump = new PLCDumper(fis);
|
||||
|
||||
System.out.println("Dumping " + args[0]);
|
||||
dump.dumpPLC();
|
||||
}
|
||||
}
|
||||
System.out.println("Dumping " + args[0]);
|
||||
dump.dumpPLC();
|
||||
}
|
||||
}
|
||||
|
||||
private void dumpPLC() {
|
||||
QCBit[] bits = qc.getBits();
|
||||
private void dumpPLC() {
|
||||
QCBit[] bits = qc.getBits();
|
||||
|
||||
for(int i=0; i<bits.length; i++) {
|
||||
if(bits[i] == null) continue;
|
||||
if(bits[i].getBitType().equals("PLC ")) {
|
||||
dumpBit(bits[i], i);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(int i=0; i<bits.length; i++) {
|
||||
if(bits[i] == null) continue;
|
||||
if(bits[i].getBitType().equals("PLC ")) {
|
||||
dumpBit(bits[i], i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void dumpBit(QCBit bit, int index) {
|
||||
System.out.println();
|
||||
System.out.println("Dumping " + bit.getBitType() + " bit at " + index);
|
||||
System.out.println(" Is a " + bit.getThingType() + ", number is " + bit.getOptA());
|
||||
System.out.println(" Starts at " + bit.getDataOffset() + " (0x" + Integer.toHexString(bit.getDataOffset()) + ")");
|
||||
System.out.println(" Runs for " + bit.getLength() + " (0x" + Integer.toHexString(bit.getLength()) + ")");
|
||||
private void dumpBit(QCBit bit, int index) {
|
||||
System.out.println();
|
||||
System.out.println("Dumping " + bit.getBitType() + " bit at " + index);
|
||||
System.out.println(" Is a " + bit.getThingType() + ", number is " + bit.getOptA());
|
||||
System.out.println(" Starts at " + bit.getDataOffset() + " (0x" + Integer.toHexString(bit.getDataOffset()) + ")");
|
||||
System.out.println(" Runs for " + bit.getLength() + " (0x" + Integer.toHexString(bit.getLength()) + ")");
|
||||
|
||||
System.out.println(HexDump.dump(bit.getData(), 0, 0));
|
||||
}
|
||||
System.out.println(HexDump.dump(bit.getData(), 0, 0));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,65 +50,65 @@ public final class PublisherTextExtractor extends POIOLE2TextExtractor {
|
|||
this(new POIFSFileSystem(is));
|
||||
}
|
||||
|
||||
/**
|
||||
* Should a call to getText() return hyperlinks inline
|
||||
* with the text?
|
||||
* Default is no
|
||||
*/
|
||||
public void setHyperlinksByDefault(boolean hyperlinksByDefault) {
|
||||
this.hyperlinksByDefault = hyperlinksByDefault;
|
||||
}
|
||||
/**
|
||||
* Should a call to getText() return hyperlinks inline
|
||||
* with the text?
|
||||
* Default is no
|
||||
*/
|
||||
public void setHyperlinksByDefault(boolean hyperlinksByDefault) {
|
||||
this.hyperlinksByDefault = hyperlinksByDefault;
|
||||
}
|
||||
|
||||
|
||||
public String getText() {
|
||||
StringBuilder text = new StringBuilder();
|
||||
public String getText() {
|
||||
StringBuilder text = new StringBuilder();
|
||||
|
||||
// Get the text from the Quill Contents
|
||||
QCBit[] bits = doc.getQuillContents().getBits();
|
||||
for (QCBit bit1 : bits) {
|
||||
if (bit1 != null && bit1 instanceof QCTextBit) {
|
||||
QCTextBit t = (QCTextBit) bit1;
|
||||
text.append(t.getText().replace('\r', '\n'));
|
||||
}
|
||||
}
|
||||
// Get the text from the Quill Contents
|
||||
QCBit[] bits = doc.getQuillContents().getBits();
|
||||
for (QCBit bit1 : bits) {
|
||||
if (bit1 != null && bit1 instanceof QCTextBit) {
|
||||
QCTextBit t = (QCTextBit) bit1;
|
||||
text.append(t.getText().replace('\r', '\n'));
|
||||
}
|
||||
}
|
||||
|
||||
// If requested, add in the hyperlinks
|
||||
// Ideally, we'd do these inline, but the hyperlink
|
||||
// positions are relative to the text area the
|
||||
// hyperlink is in, and we have yet to figure out
|
||||
// how to tie that together.
|
||||
if(hyperlinksByDefault) {
|
||||
for (QCBit bit : bits) {
|
||||
if (bit != null && bit instanceof Type12) {
|
||||
Type12 hyperlinks = (Type12) bit;
|
||||
for (int j = 0; j < hyperlinks.getNumberOfHyperlinks(); j++) {
|
||||
text.append("<");
|
||||
text.append(hyperlinks.getHyperlink(j));
|
||||
text.append(">\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// If requested, add in the hyperlinks
|
||||
// Ideally, we'd do these inline, but the hyperlink
|
||||
// positions are relative to the text area the
|
||||
// hyperlink is in, and we have yet to figure out
|
||||
// how to tie that together.
|
||||
if(hyperlinksByDefault) {
|
||||
for (QCBit bit : bits) {
|
||||
if (bit != null && bit instanceof Type12) {
|
||||
Type12 hyperlinks = (Type12) bit;
|
||||
for (int j = 0; j < hyperlinks.getNumberOfHyperlinks(); j++) {
|
||||
text.append("<");
|
||||
text.append(hyperlinks.getHyperlink(j));
|
||||
text.append(">\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get more text
|
||||
// TODO
|
||||
// Get more text
|
||||
// TODO
|
||||
|
||||
return text.toString();
|
||||
}
|
||||
return text.toString();
|
||||
}
|
||||
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
if(args.length == 0) {
|
||||
System.err.println("Use:");
|
||||
System.err.println(" PublisherTextExtractor <file.pub>");
|
||||
}
|
||||
public static void main(String[] args) throws Exception {
|
||||
if(args.length == 0) {
|
||||
System.err.println("Use:");
|
||||
System.err.println(" PublisherTextExtractor <file.pub>");
|
||||
}
|
||||
|
||||
for (String arg : args) {
|
||||
try (FileInputStream fis = new FileInputStream(arg)) {
|
||||
PublisherTextExtractor te = new PublisherTextExtractor(fis);
|
||||
System.out.println(te.getText());
|
||||
te.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
for (String arg : args) {
|
||||
try (FileInputStream fis = new FileInputStream(arg)) {
|
||||
PublisherTextExtractor te = new PublisherTextExtractor(fis);
|
||||
System.out.println(te.getText());
|
||||
te.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,158 +53,158 @@ import org.apache.poi.util.LittleEndian;
|
|||
* lucene indexers) that would ever want to use this!
|
||||
*/
|
||||
public final class QuickButCruddyTextExtractor {
|
||||
private POIFSFileSystem fs;
|
||||
private InputStream is;
|
||||
private byte[] pptContents;
|
||||
private POIFSFileSystem fs;
|
||||
private InputStream is;
|
||||
private byte[] pptContents;
|
||||
|
||||
/**
|
||||
* Really basic text extractor, that will also return lots of crud text.
|
||||
* Takes a single argument, the file to extract from
|
||||
*/
|
||||
public static void main(String[] args) throws IOException
|
||||
{
|
||||
if(args.length < 1) {
|
||||
System.err.println("Useage:");
|
||||
System.err.println("\tQuickButCruddyTextExtractor <file>");
|
||||
System.exit(1);
|
||||
}
|
||||
/**
|
||||
* Really basic text extractor, that will also return lots of crud text.
|
||||
* Takes a single argument, the file to extract from
|
||||
*/
|
||||
public static void main(String[] args) throws IOException
|
||||
{
|
||||
if(args.length < 1) {
|
||||
System.err.println("Useage:");
|
||||
System.err.println("\tQuickButCruddyTextExtractor <file>");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
String file = args[0];
|
||||
String file = args[0];
|
||||
|
||||
QuickButCruddyTextExtractor ppe = new QuickButCruddyTextExtractor(file);
|
||||
System.out.println(ppe.getTextAsString());
|
||||
ppe.close();
|
||||
}
|
||||
QuickButCruddyTextExtractor ppe = new QuickButCruddyTextExtractor(file);
|
||||
System.out.println(ppe.getTextAsString());
|
||||
ppe.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an extractor from a given file name
|
||||
* @param fileName
|
||||
*/
|
||||
@SuppressWarnings("resource")
|
||||
public QuickButCruddyTextExtractor(String fileName) throws IOException {
|
||||
this(new POIFSFileSystem(new File(fileName)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an extractor from a given input stream
|
||||
* @param iStream
|
||||
*/
|
||||
/**
|
||||
* Creates an extractor from a given file name
|
||||
* @param fileName
|
||||
*/
|
||||
@SuppressWarnings("resource")
|
||||
public QuickButCruddyTextExtractor(InputStream iStream) throws IOException {
|
||||
this(new POIFSFileSystem(iStream));
|
||||
is = iStream;
|
||||
}
|
||||
public QuickButCruddyTextExtractor(String fileName) throws IOException {
|
||||
this(new POIFSFileSystem(new File(fileName)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an extractor from a POIFS Filesystem
|
||||
* @param poifs
|
||||
*/
|
||||
public QuickButCruddyTextExtractor(POIFSFileSystem poifs) throws IOException {
|
||||
fs = poifs;
|
||||
/**
|
||||
* Creates an extractor from a given input stream
|
||||
* @param iStream
|
||||
*/
|
||||
@SuppressWarnings("resource")
|
||||
public QuickButCruddyTextExtractor(InputStream iStream) throws IOException {
|
||||
this(new POIFSFileSystem(iStream));
|
||||
is = iStream;
|
||||
}
|
||||
|
||||
// Find the PowerPoint bit, and get out the bytes
|
||||
InputStream pptIs = fs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT);
|
||||
pptContents = IOUtils.toByteArray(pptIs);
|
||||
pptIs.close();
|
||||
}
|
||||
/**
|
||||
* Creates an extractor from a POIFS Filesystem
|
||||
* @param poifs
|
||||
*/
|
||||
public QuickButCruddyTextExtractor(POIFSFileSystem poifs) throws IOException {
|
||||
fs = poifs;
|
||||
|
||||
// Find the PowerPoint bit, and get out the bytes
|
||||
InputStream pptIs = fs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT);
|
||||
pptContents = IOUtils.toByteArray(pptIs);
|
||||
pptIs.close();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Shuts down the underlying streams
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
if(is != null) { is.close(); }
|
||||
fs = null;
|
||||
}
|
||||
/**
|
||||
* Shuts down the underlying streams
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
if(is != null) { is.close(); }
|
||||
fs = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches the ALL the text of the powerpoint file, as a single string
|
||||
*/
|
||||
public String getTextAsString() {
|
||||
StringBuilder ret = new StringBuilder();
|
||||
List<String> textV = getTextAsVector();
|
||||
for(String text : textV) {
|
||||
ret.append(text);
|
||||
if(! text.endsWith("\n")) {
|
||||
ret.append('\n');
|
||||
}
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
/**
|
||||
* Fetches the ALL the text of the powerpoint file, as a single string
|
||||
*/
|
||||
public String getTextAsString() {
|
||||
StringBuilder ret = new StringBuilder();
|
||||
List<String> textV = getTextAsVector();
|
||||
for(String text : textV) {
|
||||
ret.append(text);
|
||||
if(! text.endsWith("\n")) {
|
||||
ret.append('\n');
|
||||
}
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches the ALL the text of the powerpoint file, in a List of
|
||||
* strings, one per text record
|
||||
*/
|
||||
public List<String> getTextAsVector() {
|
||||
List<String> textV = new ArrayList<>();
|
||||
/**
|
||||
* Fetches the ALL the text of the powerpoint file, in a List of
|
||||
* strings, one per text record
|
||||
*/
|
||||
public List<String> getTextAsVector() {
|
||||
List<String> textV = new ArrayList<>();
|
||||
|
||||
// Set to the start of the file
|
||||
int walkPos = 0;
|
||||
// Set to the start of the file
|
||||
int walkPos = 0;
|
||||
|
||||
// Start walking the file, looking for the records
|
||||
while(walkPos != -1) {
|
||||
// Start walking the file, looking for the records
|
||||
while(walkPos != -1) {
|
||||
walkPos = findTextRecords(walkPos,textV);
|
||||
}
|
||||
}
|
||||
|
||||
// Return what we find
|
||||
return textV;
|
||||
}
|
||||
// Return what we find
|
||||
return textV;
|
||||
}
|
||||
|
||||
/**
|
||||
* For the given position, look if the record is a text record, and wind
|
||||
* on after.
|
||||
* If it is a text record, grabs out the text. Whatever happens, returns
|
||||
* the position of the next record, or -1 if no more.
|
||||
*/
|
||||
public int findTextRecords(int startPos, List<String> textV) {
|
||||
// Grab the length, and the first option byte
|
||||
// Note that the length doesn't include the 8 byte atom header
|
||||
int len = (int)LittleEndian.getUInt(pptContents,startPos+4);
|
||||
byte opt = pptContents[startPos];
|
||||
/**
|
||||
* For the given position, look if the record is a text record, and wind
|
||||
* on after.
|
||||
* If it is a text record, grabs out the text. Whatever happens, returns
|
||||
* the position of the next record, or -1 if no more.
|
||||
*/
|
||||
public int findTextRecords(int startPos, List<String> textV) {
|
||||
// Grab the length, and the first option byte
|
||||
// Note that the length doesn't include the 8 byte atom header
|
||||
int len = (int)LittleEndian.getUInt(pptContents,startPos+4);
|
||||
byte opt = pptContents[startPos];
|
||||
|
||||
// If it's a container, step into it and return
|
||||
// (If it's a container, option byte 1 BINARY_AND 0x0f will be 0x0f)
|
||||
int container = opt & 0x0f;
|
||||
if(container == 0x0f) {
|
||||
return (startPos+8);
|
||||
}
|
||||
// If it's a container, step into it and return
|
||||
// (If it's a container, option byte 1 BINARY_AND 0x0f will be 0x0f)
|
||||
int container = opt & 0x0f;
|
||||
if(container == 0x0f) {
|
||||
return (startPos+8);
|
||||
}
|
||||
|
||||
// Otherwise, check the type to see if it's text
|
||||
int type = LittleEndian.getUShort(pptContents,startPos+2);
|
||||
// Otherwise, check the type to see if it's text
|
||||
int type = LittleEndian.getUShort(pptContents,startPos+2);
|
||||
|
||||
// TextBytesAtom
|
||||
if(type == RecordTypes.TextBytesAtom.typeID) {
|
||||
TextBytesAtom tba = (TextBytesAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
|
||||
String text = HSLFTextParagraph.toExternalString(tba.getText(), -1);
|
||||
textV.add(text);
|
||||
}
|
||||
// TextCharsAtom
|
||||
if(type == RecordTypes.TextCharsAtom.typeID) {
|
||||
TextCharsAtom tca = (TextCharsAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
|
||||
// TextBytesAtom
|
||||
if(type == RecordTypes.TextBytesAtom.typeID) {
|
||||
TextBytesAtom tba = (TextBytesAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
|
||||
String text = HSLFTextParagraph.toExternalString(tba.getText(), -1);
|
||||
textV.add(text);
|
||||
}
|
||||
// TextCharsAtom
|
||||
if(type == RecordTypes.TextCharsAtom.typeID) {
|
||||
TextCharsAtom tca = (TextCharsAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
|
||||
String text = HSLFTextParagraph.toExternalString(tca.getText(), -1);
|
||||
textV.add(text);
|
||||
}
|
||||
}
|
||||
|
||||
// CString (doesn't go via a TextRun)
|
||||
if(type == RecordTypes.CString.typeID) {
|
||||
CString cs = (CString)Record.createRecordForType(type, pptContents, startPos, len+8);
|
||||
String text = cs.getText();
|
||||
// CString (doesn't go via a TextRun)
|
||||
if(type == RecordTypes.CString.typeID) {
|
||||
CString cs = (CString)Record.createRecordForType(type, pptContents, startPos, len+8);
|
||||
String text = cs.getText();
|
||||
|
||||
// Ignore the ones we know to be rubbish
|
||||
if(text.equals("___PPT10")) {
|
||||
} else if(text.equals("Default Design")) {
|
||||
} else {
|
||||
textV.add(text);
|
||||
}
|
||||
}
|
||||
// Ignore the ones we know to be rubbish
|
||||
if(text.equals("___PPT10")) {
|
||||
} else if(text.equals("Default Design")) {
|
||||
} else {
|
||||
textV.add(text);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Wind on by the atom length, and check we're not at the end
|
||||
int newPos = (startPos + 8 + len);
|
||||
if(newPos > (pptContents.length - 8)) {
|
||||
newPos = -1;
|
||||
}
|
||||
return newPos;
|
||||
}
|
||||
// Wind on by the atom length, and check we're not at the end
|
||||
int newPos = (startPos + 8 + len);
|
||||
if(newPos > (pptContents.length - 8)) {
|
||||
newPos = -1;
|
||||
}
|
||||
return newPos;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue