mirror of https://github.com/apache/poi.git
Contribution from Yegor Kozlov (Bug 35630):
Dumps out the raw contents of a PPT file in XML format git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353751 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ab7fb64929
commit
9a2dcc327b
|
@ -0,0 +1,254 @@
|
|||
/* ====================================================================
|
||||
Copyright 2002-2004 Apache Software Foundation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hslf.dev;
|
||||
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.hslf.record.RecordTypes;
|
||||
import org.apache.poi.poifs.filesystem.*;
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* Utility class which dumps raw contents of a ppt file into XML format
|
||||
*
|
||||
* @author Yegor Kozlov
|
||||
*/
|
||||
|
||||
public class PPTXMLDump {
|
||||
public static final int HEADER_SIZE = 8; //size of the record header
|
||||
public static final int PICT_HEADER_SIZE = 25; //size of the picture header
|
||||
public final static String PPDOC_ENTRY = "PowerPoint Document";
|
||||
public final static String PICTURES_ENTRY = "Pictures";
|
||||
public static String CR = System.getProperty("line.separator");
|
||||
|
||||
protected Writer out;
|
||||
protected byte[] docstream;
|
||||
protected byte[] pictstream;
|
||||
protected boolean hexHeader = true;
|
||||
|
||||
public PPTXMLDump(File ppt) throws IOException {
|
||||
FileInputStream fis = new FileInputStream(ppt);
|
||||
POIFSFileSystem fs = new POIFSFileSystem(fis);
|
||||
fis.close();
|
||||
|
||||
//read the document entry from OLE file system
|
||||
DocumentEntry entry = (DocumentEntry)fs.getRoot().getEntry(PPDOC_ENTRY);
|
||||
docstream = new byte[entry.getSize()];
|
||||
DocumentInputStream is = fs.createDocumentInputStream(PPDOC_ENTRY);
|
||||
is.read(docstream);
|
||||
|
||||
try {
|
||||
entry = (DocumentEntry)fs.getRoot().getEntry(PICTURES_ENTRY);
|
||||
pictstream = new byte[entry.getSize()];
|
||||
is = fs.createDocumentInputStream(PICTURES_ENTRY);
|
||||
is.read(pictstream);
|
||||
} catch(FileNotFoundException e){
|
||||
//silently catch errors if the presentation does not contain pictures
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump the structure of the supplied PPT file into XML
|
||||
* @param out <code>Writer</code> to write out
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public void dump(Writer out) throws IOException {
|
||||
this.out = out;
|
||||
|
||||
int padding = 0;
|
||||
write(out, "<Presentation>" + CR, padding);
|
||||
padding++;
|
||||
if (pictstream != null){
|
||||
write(out, "<Pictures>" + CR, padding);
|
||||
dumpPictures(pictstream, padding);
|
||||
write(out, "</Pictures>" + CR, padding);
|
||||
}
|
||||
//dump the structure of the powerpoint document
|
||||
write(out, "<PowerPointDocument>" + CR, padding);
|
||||
padding++;
|
||||
dump(docstream, 0, docstream.length, padding);
|
||||
padding--;
|
||||
write(out, "</PowerPointDocument>" + CR, padding);
|
||||
padding--;
|
||||
write(out, "</Presentation>", padding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump a part of the document stream into XML
|
||||
* @param data PPT binary data
|
||||
* @param offset offset from the beginning of the document
|
||||
* @param length of the document
|
||||
* @param padding used for formatting results
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public void dump(byte[] data, int offset, int length, int padding) throws IOException {
|
||||
int pos = offset;
|
||||
while (pos <= (offset + length - HEADER_SIZE)){
|
||||
if (pos < 0) break;
|
||||
|
||||
//read record header
|
||||
int info = LittleEndian.getUShort(data, pos);
|
||||
pos += LittleEndian.SHORT_SIZE;
|
||||
int type = LittleEndian.getUShort(data, pos);
|
||||
pos += LittleEndian.SHORT_SIZE;
|
||||
int size = (int)LittleEndian.getUInt(data, pos);
|
||||
pos += LittleEndian.INT_SIZE;
|
||||
|
||||
//get name of the record by type
|
||||
String recname = RecordTypes.recordName(type);
|
||||
write(out, "<"+recname + " info=\""+info+"\" type=\""+type+"\" size=\""+size+"\" offset=\""+(pos-8)+"\"", padding);
|
||||
if (hexHeader){
|
||||
out.write(" header=\"");
|
||||
dump(out, data, pos-8, 8, 0, false);
|
||||
out.write("\"");
|
||||
}
|
||||
out.write(">" + CR);
|
||||
padding++;
|
||||
//this check works both for Escher and PowerPoint records
|
||||
boolean isContainer = (info & 0x000F) == 0x000F;
|
||||
if (isContainer) {
|
||||
//continue to dump child records
|
||||
dump(data, pos, size, padding);
|
||||
} else {
|
||||
//dump first 100 bytes of the atom data
|
||||
dump(out, data, pos, Math.min(size, 100), padding, true);
|
||||
}
|
||||
padding--;
|
||||
write(out, "</"+recname + ">" + CR, padding);
|
||||
|
||||
pos += size;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Dumps the Pictures OLE stream into XML.
|
||||
*
|
||||
* @param data from the Pictures OLE data stream
|
||||
* @param padding
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public void dumpPictures(byte[] data, int padding) throws IOException {
|
||||
int pos = 0;
|
||||
while (pos < data.length) {
|
||||
byte[] header = new byte[PICT_HEADER_SIZE];
|
||||
|
||||
System.arraycopy(data, pos, header, 0, header.length);
|
||||
int size = LittleEndian.getInt(header, 4) - 17;
|
||||
byte[] pictdata = new byte[size];
|
||||
System.arraycopy(data, pos + PICT_HEADER_SIZE, pictdata, 0, pictdata.length);
|
||||
pos += PICT_HEADER_SIZE + size;
|
||||
|
||||
padding++;
|
||||
write(out, "<picture size=\""+size+"\" type=\""+getPictureType(header)+"\">" + CR, padding);
|
||||
padding++;
|
||||
write(out, "<header>" + CR, padding);
|
||||
dump(out, header, 0, header.length, padding, true);
|
||||
write(out, "</header>" + CR, padding);
|
||||
write(out, "<imgdata>" + CR, padding);
|
||||
dump(out, pictdata, 0, Math.min(pictdata.length, 100), padding, true);
|
||||
write(out, "</imgdata>" + CR, padding);
|
||||
padding--;
|
||||
write(out, "</picture>" + CR, padding);
|
||||
padding--;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
if (args.length == 0){
|
||||
System.out.println(
|
||||
"Usage: PPTXMLDump (options) pptfile\n" +
|
||||
"Where options include:\n" +
|
||||
" -f write output to <pptfile>.xml file in the current directory"
|
||||
);
|
||||
return;
|
||||
}
|
||||
boolean outFile = false;
|
||||
for (int i = 0; i < args.length; i++){
|
||||
|
||||
if (args[i].startsWith("-")) {
|
||||
if ("-f".equals(args[i])){
|
||||
//write ouput to a file
|
||||
outFile = true;
|
||||
}
|
||||
} else {
|
||||
File ppt = new File(args[i]);
|
||||
PPTXMLDump dump = new PPTXMLDump(ppt);
|
||||
System.out.println("Dumping " + args[i]);
|
||||
|
||||
if (outFile){
|
||||
FileWriter out = new FileWriter(ppt.getName() + ".xml");
|
||||
dump.dump(out);
|
||||
out.close();
|
||||
} else {
|
||||
StringWriter out = new StringWriter();
|
||||
dump.dump(out);
|
||||
System.out.println(out.toString());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* write a string to <code>out</code> with the specified padding
|
||||
*/
|
||||
private static void write(Writer out, String str, int padding) throws IOException {
|
||||
for (int i = 0; i < padding; i++) out.write(" ");
|
||||
out.write(str);
|
||||
}
|
||||
|
||||
private String getPictureType(byte[] header){
|
||||
String type;
|
||||
int meta = LittleEndian.getUShort(header, 0);
|
||||
|
||||
switch(meta){
|
||||
case 0x46A0: type = "jpeg"; break;
|
||||
case 0x2160: type = "wmf"; break;
|
||||
case 0x6E00: type = "png"; break;
|
||||
default: type = "unknown"; break;
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* dump binary data to <code>out</code> with the specified padding
|
||||
*/
|
||||
private static void dump(Writer out, byte[] data, int offset, int length, int padding, boolean nl) throws IOException {
|
||||
int linesize = 25;
|
||||
for (int i = 0; i < padding; i++) out.write(" ");
|
||||
int i;
|
||||
for (i = offset; i < (offset + length); i++) {
|
||||
int c = data[i];
|
||||
out.write((char) hexval[(c & 0xF0) >> 4]);
|
||||
out.write((char) hexval[(c & 0x0F) >> 0]);
|
||||
out.write(' ');
|
||||
if((i+1-offset) % linesize == 0 && i != (offset + length-1)) {
|
||||
out.write(CR);
|
||||
for (int j = 0; j < padding; j++) out.write(" ");
|
||||
}
|
||||
}
|
||||
if(nl && length > 0)out.write(CR);
|
||||
}
|
||||
|
||||
private static final byte hexval[] =
|
||||
{(byte) '0', (byte) '1', (byte) '2', (byte) '3',
|
||||
(byte) '4', (byte) '5', (byte) '6', (byte) '7',
|
||||
(byte) '8', (byte) '9', (byte) 'A', (byte) 'B',
|
||||
(byte) 'C', (byte) 'D', (byte) 'E', (byte) 'F'};
|
||||
|
||||
}
|
Loading…
Reference in New Issue