Initial Powerpoint support, by Nick Burch

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353701 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Avik Sengupta 2005-05-28 05:36:00 +00:00
parent 865c8bb4c4
commit 6424e17b17
47 changed files with 4876 additions and 0 deletions

View File

@ -0,0 +1,347 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf;
import java.util.*;
import java.io.*;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSDocument;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.MutablePropertySet;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.hslf.record.*;
/**
* This class contains the main functionality for the Powerpoint file
* "reader". It is only a very basic class for now
*
* @author Nick Burch
*/
public class HSLFSlideShow
{
private InputStream istream;
private POIFSFileSystem filesystem;
// Holds metadata on our document
private SummaryInformation sInf;
private DocumentSummaryInformation dsInf;
private CurrentUserAtom currentUser;
// Low level contents of the file
private byte[] _docstream;
// Low level contents
private Record[] _records;
/**
* Constructs a Powerpoint document from fileName. Parses the document
* and places all the important stuff into data structures.
*
* @param fileName The name of the file to read.
* @throws IOException if there is a problem while parsing the document.
*/
public HSLFSlideShow(String fileName) throws IOException
{
this(new FileInputStream(fileName));
}
/**
* Constructs a Powerpoint document from an input stream. Parses the
* document and places all the important stuff into data structures.
*
* @param inputStream the source of the data
* @throws IOException if there is a problem while parsing the document.
*/
public HSLFSlideShow(InputStream inputStream) throws IOException
{
//do Ole stuff
this(new POIFSFileSystem(inputStream));
istream = inputStream;
}
/**
* Constructs a Powerpoint document from a POIFS Filesystem. Parses the
* document and places all the important stuff into data structures.
*
* @param filesystem the POIFS FileSystem to read from
* @throws IOException if there is a problem while parsing the document.
*/
public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException
{
this.filesystem = filesystem;
// Go find a PowerPoint document in the stream
// Save anything useful we come across
readFIB();
// Look for Property Streams:
readProperties();
}
/**
* Shuts things down. Closes underlying streams etc
*
* @throws IOException
*/
public void close() throws IOException
{
if(istream != null) {
istream.close();
}
filesystem = null;
}
/**
* Extracts the main document stream from the POI file then hands off
* to other functions that parse other areas.
*
* @throws IOException
*/
private void readFIB() throws IOException
{
// Get the main document stream
DocumentEntry docProps =
(DocumentEntry)filesystem.getRoot().getEntry("PowerPoint Document");
// Grab the document stream
_docstream = new byte[docProps.getSize()];
filesystem.createDocumentInputStream("PowerPoint Document").read(_docstream);
// The format of records in a powerpoint file are:
// <little endian 2 byte "info">
// <little endian 2 byte "type">
// <little endian 4 byte "length">
// If it has a zero length, following it will be another record
// <xx xx yy yy 00 00 00 00> <xx xx yy yy zz zz zz zz>
// If it has a length, depending on its type it may have children or data
// If it has children, these will follow straight away
// <xx xx yy yy zz zz zz zz <xx xx yy yy zz zz zz zz>>
// If it has data, this will come straigh after, and run for the length
// <xx xx yy yy zz zz zz zz dd dd dd dd dd dd dd>
// All lengths given exclude the 8 byte record header
// (Data records are known as Atoms)
// Document should start with:
// 0F 00 E8 03 ## ## ## ##
// (type 1000 = document, info 00 0f is normal, rest is document length)
// 01 00 E9 03 28 00 00 00
// (type 1001 = document atom, info 00 01 normal, 28 bytes long)
// 80 16 00 00 E0 10 00 00 xx xx xx xx xx xx xx xx
// 05 00 00 00 0A 00 00 00 xx xx xx
// (the contents of the document atom, not sure what it means yet)
// (records then follow)
// When parsing a document, look to see if you know about that type
// of the current record. If you know it's a type that has children,
// process the record's data area looking for more records
// If you know about the type and it doesn't have children, either do
// something with the data (eg TextRun) or skip over it
// If you don't know about the type, play safe and skip over it (using
// its length to know where the next record will start)
//
// For now, this work is handled by Record.findChildRecords
_records = Record.findChildRecords(_docstream,0,_docstream.length);
}
/**
* Find the properties from the filesystem, and load them
*/
public void readProperties() {
// DocumentSummaryInformation
dsInf = (DocumentSummaryInformation)getPropertySet("\005DocumentSummaryInformation");
// SummaryInformation
sInf = (SummaryInformation)getPropertySet("\005SummaryInformation");
// Current User
try {
currentUser = new CurrentUserAtom(filesystem);
} catch(IOException ie) {
System.err.println("Error finding Current User Atom:\n" + ie);
currentUser = new CurrentUserAtom();
}
}
/**
* For a given named property entry, either return it or null if
* if it wasn't found
*/
public PropertySet getPropertySet(String setName) {
DocumentInputStream dis;
try {
// Find the entry, and get an input stream for it
dis = filesystem.createDocumentInputStream(setName);
} catch(IOException ie) {
// Oh well, doesn't exist
System.err.println("Error getting property set with name " + setName + "\n" + ie);
return null;
}
try {
// Create the Property Set
PropertySet set = PropertySetFactory.create(dis);
return set;
} catch(IOException ie) {
// Must be corrupt or something like that
System.err.println("Error creating property set with name " + setName + "\n" + ie);
} catch(org.apache.poi.hpsf.HPSFException he) {
// Oh well, doesn't exist
System.err.println("Error creating property set with name " + setName + "\n" + he);
}
return null;
}
/**
* Writes out the slideshow file the is represented by an instance of
* this class
* @param out The OutputStream to write to.
* @throws IOException If there is an unexpected IOException from the passed
* in OutputStream
*/
public void write(OutputStream out) throws IOException {
// Get a new Filesystem to write into
POIFSFileSystem outFS = new POIFSFileSystem();
// Write out the Property Streams
if(sInf != null) {
writePropertySet("\005SummaryInformation",sInf,outFS);
}
if(dsInf != null) {
writePropertySet("\005DocumentSummaryInformation",dsInf,outFS);
}
// Need to take special care of PersistPtrHolder and UserEditAtoms
// Store where they used to be, and where they are now
Hashtable persistPtrHolderPos = new Hashtable();
Hashtable userEditAtomsPos = new Hashtable();
int lastUserEditAtomPos = -1;
// Write ourselves out
ByteArrayOutputStream baos = new ByteArrayOutputStream();
for(int i=0; i<_records.length; i++) {
// If it's a special record, record where it was and now is
if(_records[i] instanceof PersistPtrHolder) {
// Update position
PersistPtrHolder pph = (PersistPtrHolder)_records[i];
int oldPos = pph.getLastOnDiskOffset();
int newPos = baos.size();
pph.setLastOnDiskOffet(newPos);
persistPtrHolderPos.put(new Integer(oldPos),new Integer(newPos));
}
if(_records[i] instanceof UserEditAtom) {
// Update position
UserEditAtom uea = (UserEditAtom)_records[i];
int oldPos = uea.getLastOnDiskOffset();
int newPos = baos.size();
lastUserEditAtomPos = newPos;
uea.setLastOnDiskOffet(newPos);
userEditAtomsPos.put(new Integer(oldPos),new Integer(newPos));
// Update internal positions
if(uea.getLastUserEditAtomOffset() != 0) {
Integer ueNewPos = (Integer)userEditAtomsPos.get( new Integer( uea.getLastUserEditAtomOffset() ) );
uea.setLastUserEditAtomOffset(ueNewPos.intValue());
}
if(uea.getPersistPointersOffset() != 0) {
Integer ppNewPos = (Integer)persistPtrHolderPos.get( new Integer( uea.getPersistPointersOffset() ) );
uea.setPersistPointersOffset(ppNewPos.intValue());
}
}
// Finally, write out
_records[i].writeOut(baos);
}
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
outFS.createDocument(bais,"PowerPoint Document");
// Update and write out the Current User atom
if(lastUserEditAtomPos != -1) {
currentUser.setCurrentEditOffset(lastUserEditAtomPos);
}
currentUser.writeToFS(outFS);
// Send the POIFSFileSystem object out
outFS.writeFilesystem(out);
}
/**
* Writes out a given ProperySet
*/
private void writePropertySet(String name, PropertySet set, POIFSFileSystem fs) throws IOException {
try {
MutablePropertySet mSet = new MutablePropertySet(set);
ByteArrayOutputStream bOut = new ByteArrayOutputStream();
mSet.write(bOut);
byte[] data = bOut.toByteArray();
ByteArrayInputStream bIn = new ByteArrayInputStream(data);
fs.createDocument(bIn,name);
System.out.println("Wrote property set " + name + " of size " + data.length);
} catch(org.apache.poi.hpsf.WritingNotSupportedException wnse) {
System.err.println("Couldn't write property set with name " + name + " as not supported by HPSF yet");
}
}
/* ******************* fetching methods follow ********************* */
/**
* Returns an array of all the records found in the slideshow
*/
public Record[] getRecords() { return _records; }
/**
* Returns an array of the bytes of the file. Only correct after a
* call to open or write - at all other times might be wrong!
*/
public byte[] getUnderlyingBytes() { return _docstream; }
/**
* Fetch the Document Summary Information of the document
*/
public DocumentSummaryInformation getDocumentSummaryInformation() { return dsInf; }
/**
* Fetch the Summary Information of the document
*/
public SummaryInformation getSummaryInformation() { return sInf; }
/**
* Fetch the Current User Atom of the document
*/
public CurrentUserAtom getCurrentUserAtom() { return currentUser; }
}

View File

@ -0,0 +1,86 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.dev;
import org.apache.poi.hslf.*;
import org.apache.poi.hslf.model.*;
import org.apache.poi.hslf.record.*;
import org.apache.poi.hslf.usermodel.*;
import java.io.*;
/**
* Uses record level code to locate PPDrawing entries.
* Having found them, it sees if they have DDF Textbox records, and if so,
* searches those for text. Prints out any text it finds
*/
public class PPDrawingTextListing {
public static void main(String[] args) throws Exception {
if(args.length < 1) {
System.err.println("Need to give a filename");
System.exit(1);
}
HSLFSlideShow ss = new HSLFSlideShow(args[0]);
// Find PPDrawings at any second level position
Record[] records = ss.getRecords();
for(int i=0; i<records.length; i++) {
Record[] children = records[i].getChildRecords();
if(children != null && children.length != 0) {
for(int j=0; j<children.length; j++) {
if(children[j] instanceof PPDrawing) {
System.out.println("Found PPDrawing at " + j + " in top level record " + i + " (" + records[i].getRecordType() + ")" );
// Look for EscherTextboxWrapper's
PPDrawing ppd = (PPDrawing)children[j];
EscherTextboxWrapper[] wrappers = ppd.getTextboxWrappers();
System.out.println(" Has " + wrappers.length + " textbox wrappers within");
// Loop over the wrappers, showing what they contain
for(int k=0; k<wrappers.length; k++) {
EscherTextboxWrapper tbw = wrappers[k];
System.out.println(" " + k + " has " + tbw.getChildRecords().length + " PPT atoms within");
// Loop over the records, printing the text
Record[] pptatoms = tbw.getChildRecords();
for(int l=0; l<pptatoms.length; l++) {
String text = null;
if(pptatoms[l] instanceof TextBytesAtom) {
TextBytesAtom tba = (TextBytesAtom)pptatoms[l];
text = tba.getText();
}
if(pptatoms[l] instanceof TextCharsAtom) {
TextCharsAtom tca = (TextCharsAtom)pptatoms[l];
text = tca.getText();
}
if(text != null) {
text = text.replace('\r','\n');
System.out.println(" ''" + text + "''");
}
}
}
}
}
}
}
}
}

View File

@ -0,0 +1,91 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.dev;
import org.apache.poi.hslf.*;
import org.apache.poi.hslf.model.*;
import org.apache.poi.hslf.record.*;
import org.apache.poi.hslf.usermodel.*;
import java.io.*;
/**
* Uses record level code to locate SlideListWithText entries.
* Having found them, it sees if they have any text, and prints out
* what it finds.
*/
public class SLWTTextListing {
public static void main(String[] args) throws Exception {
if(args.length < 1) {
System.err.println("Need to give a filename");
System.exit(1);
}
HSLFSlideShow ss = new HSLFSlideShow(args[0]);
// Find the documents, and then their SLWT
Record[] records = ss.getRecords();
for(int i=0; i<records.length; i++) {
if(records[i].getRecordType() == 1000l) {
Record docRecord = records[i];
Record[] docChildren = docRecord.getChildRecords();
for(int j=0; j<docChildren.length; j++) {
if(docChildren[j] instanceof SlideListWithText) {
System.out.println("Found SLWT in document at " + i);
System.out.println(" Has " + docChildren[j].getChildRecords().length + " children");
// Grab the SlideAtomSet's, which contain
// a SlidePersistAtom and then a bunch of text
// + related records
SlideListWithText slwt = (SlideListWithText)docChildren[j];
SlideListWithText.SlideAtomsSet[] thisSets = slwt.getSlideAtomsSets();
System.out.println(" Has " + thisSets.length + " AtomSets in it");
// Loop over the sets, showing what they contain
for(int k=0; k<thisSets.length; k++) {
SlidePersistAtom spa = thisSets[k].getSlidePersistAtom();
System.out.println(" " + k + " has slide id " + spa.getSlideIdentifier() );
System.out.println(" " + k + " has ref id " + spa.getRefID() );
// Loop over the records, printing the text
Record[] slwtc = thisSets[k].getSlideRecords();
for(int l=0; l<slwtc.length; l++) {
String text = null;
if(slwtc[l] instanceof TextBytesAtom) {
TextBytesAtom tba = (TextBytesAtom)slwtc[l];
text = tba.getText();
}
if(slwtc[l] instanceof TextCharsAtom) {
TextCharsAtom tca = (TextCharsAtom)slwtc[l];
text = tca.getText();
}
if(text != null) {
text = text.replace('\r','\n');
System.out.println(" ''" + text + "''");
}
}
}
}
}
}
}
}
}

View File

@ -0,0 +1,66 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.dev;
import org.apache.poi.hslf.*;
import org.apache.poi.hslf.record.*;
import java.io.*;
/**
* Uses record level code to locate Notes and Slide records.
* Having found them, it asks their SlideAtom or NotesAtom entries
* what they are all about. Useful for checking the matching between
* Slides, Master Slides and Notes
*/
public class SlideAndNotesAtomListing {
public static void main(String[] args) throws Exception {
if(args.length < 1) {
System.err.println("Need to give a filename");
System.exit(1);
}
HSLFSlideShow ss = new HSLFSlideShow(args[0]);
System.out.println("");
// Find either Slides or Notes
Record[] records = ss.getRecords();
for(int i=0; i<records.length; i++) {
Record r = records[i];
// When we find them, print out their IDs
if(r instanceof Slide) {
Slide s = (Slide)r;
SlideAtom sa = s.getSlideAtom();
System.out.println("Found Slide at " + i);
System.out.println(" Slide's master ID is " + sa.getMasterID());
System.out.println(" Slide's notes ID is " + sa.getNotesID());
System.out.println("");
}
if(r instanceof Notes) {
Notes n = (Notes)r;
NotesAtom na = n.getNotesAtom();
System.out.println("Found Notes at " + i);
System.out.println(" Notes ID is " + na.getSlideID());
System.out.println("");
}
}
}
}

View File

@ -0,0 +1,167 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.dev;
import java.util.*;
import java.io.*;
import org.apache.poi.ddf.*;
import org.apache.poi.hslf.*;
import org.apache.poi.hslf.record.*;
import org.apache.poi.util.LittleEndian;
/**
* This class provides a way to view the contents of a powerpoint file.
* It will use the recored layer to grok the contents of the file, and
* will print out what it finds.
*
* @author Nick Burch
*/
public class SlideShowRecordDumper
{
private HSLFSlideShow doc;
/**
* right now this function takes one parameter: a ppt file, and outputs
* a dump of what it contains
*/
public static void main(String args[]) throws IOException
{
if(args.length == 0) {
System.err.println("Useage: SlideShowDumper <filename>");
return;
}
String filename = args[0];
SlideShowRecordDumper foo = new SlideShowRecordDumper(filename);
foo.printDump();
foo.close();
}
/**
* Constructs a Powerpoint dump from fileName. Parses the document
* and dumps out the contents
*
* @param fileName The name of the file to read.
* @throws IOException if there is a problem while parsing the document.
*/
public SlideShowRecordDumper(String fileName) throws IOException
{
doc = new HSLFSlideShow(fileName);
}
/**
* Shuts things down. Closes underlying streams etc
*
* @throws IOException
*/
public void close() throws IOException
{
if(doc != null) {
doc.close();
}
doc = null;
}
public void printDump() throws IOException {
// Prints out the records in the tree
walkTree(0,0,doc.getRecords());
}
public String makeHex(int number, int padding) {
String hex = Integer.toHexString(number).toUpperCase();
while(hex.length() < padding) {
hex = "0" + hex;
}
return hex;
}
public String reverseHex(String s) {
StringBuffer ret = new StringBuffer();
// Get to a multiple of two
if((s.length() / 2) * 2 != s.length()) { s = "0" + s; }
// Break up into blocks
char[] c = s.toCharArray();
for(int i=c.length; i>0; i-=2) {
ret.append(c[i-2]);
ret.append(c[i-1]);
if(i != 2) { ret.append(' '); }
}
return ret.toString();
}
public int getDiskLen(Record r) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
r.writeOut(baos);
byte[] b = baos.toByteArray();
return b.length;
}
public void walkTree(int depth, int pos, Record[] records) throws IOException {
int indent = depth;
String ind = "";
for(int i=0; i<indent; i++) { ind += " "; }
for(int i=0; i<records.length; i++) {
Record r = records[i];
// Figure out how big it is
int len = getDiskLen(r);
// Grab the type as hex
String hexType = makeHex((int)r.getRecordType(),4);
String rHexType = reverseHex(hexType);
// Grab the hslf.record type
Class c = r.getClass();
String cname = c.toString();
if(cname.startsWith("class ")) {
cname = cname.substring(6);
}
if(cname.startsWith("org.apache.poi.hslf.record.")) {
cname = cname.substring(27);
}
// Display the record
System.out.println(ind + "At position " + pos + " (" + makeHex(pos,6) + "):");
System.out.println(ind + " Record is of type " + cname);
System.out.println(ind + " Type is " + r.getRecordType() + " (" + hexType + " -> " + rHexType + " )");
System.out.println(ind + " Len is " + (len-8) + " (" + makeHex((len-8),8) + "), on disk len is " + len );
System.out.println();
// If it has children, show them
if(r.getChildRecords() != null) {
walkTree((depth+3),pos+8,r.getChildRecords());
}
// Wind on the position marker
pos += len;
}
}
}

View File

@ -0,0 +1,95 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.dev;
import org.apache.poi.hslf.*;
import org.apache.poi.hslf.record.*;
import java.io.*;
/**
* Uses record level code to locate UserEditAtom records, and other
* persistence related atoms. Tries to match them together, to help
* illuminate quite what all the offsets mean
*/
public class UserEditAndPersistListing {
public static void main(String[] args) throws Exception {
if(args.length < 1) {
System.err.println("Need to give a filename");
System.exit(1);
}
HSLFSlideShow ss = new HSLFSlideShow(args[0]);
System.out.println("");
// Find any persist ones first
Record[] records = ss.getRecords();
int pos = 0;
for(int i=0; i<records.length; i++) {
Record r = records[i];
if(r.getRecordType() == 6001l) {
// PersistPtrFullBlock
System.out.println("Found PersistPtrFullBlock at " + pos + " (" + Integer.toHexString(pos) + ")");
}
if(r.getRecordType() == 6002l) {
// PersistPtrIncrementalBlock
System.out.println("Found PersistPtrIncrementalBlock at " + pos + " (" + Integer.toHexString(pos) + ")");
}
// Increase the position by the on disk size
ByteArrayOutputStream baos = new ByteArrayOutputStream();
r.writeOut(baos);
pos += baos.size();
}
System.out.println("");
pos = 0;
// Now look for UserEditAtoms
for(int i=0; i<records.length; i++) {
Record r = records[i];
if(r instanceof UserEditAtom) {
UserEditAtom uea = (UserEditAtom)r;
System.out.println("Found UserEditAtom at " + pos + " (" + Integer.toHexString(pos) + ")");
System.out.println(" lastUserEditAtomOffset = " + uea.getLastUserEditAtomOffset() );
System.out.println(" persistPointersOffset = " + uea.getPersistPointersOffset() );
System.out.println(" docPersistRef = " + uea.getDocPersistRef() );
System.out.println(" maxPersistWritten = " + uea.getMaxPersistWritten() );
}
// Increase the position by the on disk size
ByteArrayOutputStream baos = new ByteArrayOutputStream();
r.writeOut(baos);
pos += baos.size();
}
System.out.println("");
// Query the CurrentUserAtom
CurrentUserAtom cua = ss.getCurrentUserAtom();
System.out.println("Checking Current User Atom");
System.out.println(" Thinks the CurrentEditOffset is " + cua.getCurrentEditOffset());
System.out.println("");
}
}

View File

@ -0,0 +1,34 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.exceptions;
/**
* This exception is thrown when we try to create a record, and the
* underlying data just doesn't match up
*
* @author Nick Burch
*/
public class InvalidRecordFormatException extends Exception
{
public InvalidRecordFormatException(String s) {
super(s);
}
}

View File

@ -0,0 +1,182 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.extractor;
import java.io.*;
import java.util.HashSet;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.hslf.*;
import org.apache.poi.hslf.model.*;
import org.apache.poi.hslf.usermodel.*;
/**
* This class can be used to extract text from a PowerPoint file.
* Can optionally also get the notes from one.
*
* @author Nick Burch
*/
public class PowerPointExtractor
{
private HSLFSlideShow _hslfshow;
private SlideShow _show;
private Slide[] _slides;
private Notes[] _notes;
/**
* Basic extractor. Returns all the text, and optionally all the notes
*/
public static void main(String args[]) throws IOException
{
if(args.length < 1) {
System.err.println("Useage:");
System.err.println("\tPowerPointExtractor [-notes] <file>");
System.exit(1);
}
boolean notes = false;
String file;
if(args.length > 1) {
notes = true;
file = args[1];
} else {
file = args[0];
}
PowerPointExtractor ppe = new PowerPointExtractor(file);
System.out.println(ppe.getText(true,notes));
ppe.close();
}
/**
* Creates a PowerPointExtractor
* @param fileName
*/
public PowerPointExtractor(String fileName) throws IOException {
_hslfshow = new HSLFSlideShow(fileName);
_show = new SlideShow(_hslfshow);
_slides = _show.getSlides();
_notes = _show.getNotes();
}
/**
* Creates a PowerPointExtractor
* @param iStream
*/
public PowerPointExtractor(InputStream iStream) throws IOException {
_hslfshow = new HSLFSlideShow(iStream);
_show = new SlideShow(_hslfshow);
_slides = _show.getSlides();
_notes = _show.getNotes();
}
/**
* Creates a PowerPointExtractor
* @param fs
*/
public PowerPointExtractor(POIFSFileSystem fs) throws IOException {
_hslfshow = new HSLFSlideShow(fs);
_show = new SlideShow(_hslfshow);
_slides = _show.getSlides();
_notes = _show.getNotes();
}
/**
* Shuts down the underlying streams
*/
public void close() throws IOException {
_hslfshow.close();
_hslfshow = null;
_show = null;
_slides = null;
_notes = null;
}
/**
* Fetches all the slide text from the slideshow, but not the notes
*/
public String getText() {
return getText(true,false);
}
/**
* Fetches all the notes text from the slideshow, but not the slide text
*/
public String getNotes() {
return getText(false,true);
}
/**
* Fetches text from the slideshow, be it slide text or note text
* @param getSlideText fetch slide text
* @param getNoteText fetch note text
*/
public String getText(boolean getSlideText, boolean getNoteText) {
StringBuffer ret = new StringBuffer();
if(getSlideText) {
for(int i=0; i<_slides.length; i++) {
Slide slide = _slides[i];
TextRun[] runs = slide.getTextRuns();
for(int j=0; j<runs.length; j++) {
TextRun run = runs[j];
String text = run.getText();
ret.append(text);
if(! text.endsWith("\n")) {
ret.append("\n");
}
}
}
if(getNoteText) {
ret.append(" ");
}
}
if(getNoteText) {
// Not currently using _notes, as that can have the notes of
// master sheets in. Grab Slide list, then work from there,
// but ensure no duplicates
HashSet seenNotes = new HashSet();
for(int i=0; i<_slides.length; i++) {
Notes notes = _slides[i].getNotesSheet();
if(notes == null) { continue; }
Integer id = new Integer(notes.getSheetNumber());
if(seenNotes.contains(id)) { continue; }
seenNotes.add(id);
TextRun[] runs = notes.getTextRuns();
if(runs != null && runs.length > 0) {
for(int j=0; j<runs.length; j++) {
TextRun run = runs[j];
String text = run.getText();
ret.append(text);
if(! text.endsWith("\n")) {
ret.append("\n");
}
}
}
}
}
return ret.toString();
}
}

View File

@ -0,0 +1,73 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.model;
import java.util.*;
import org.apache.poi.hslf.record.*;
import org.apache.poi.hslf.record.SlideListWithText.*;
import org.apache.poi.util.LittleEndian;
/**
* This class represents a slide's notes in a PowerPoint Document. It
* allows access to the text within, and the layout. For now, it only
* does the text side of things though
*
* @author Nick Burch
*/
public class Notes extends Sheet
{
private int _sheetNo;
private org.apache.poi.hslf.record.Notes _notes;
private TextRun[] _runs;
/**
* Constructs a Notes Sheet from the given Notes record.
* Initialises TextRuns, to provide easier access to the text
*
* @param notes the Notes record to read from
*/
public Notes (org.apache.poi.hslf.record.Notes notes) {
_notes = notes;
// Grab the sheet number, via the NotesAtom
_sheetNo = _notes.getNotesAtom().getSlideID();
// Now, build up TextRuns from pairs of TextHeaderAtom and
// one of TextBytesAtom or TextCharsAtom, found inside
// EscherTextboxWrapper's in the PPDrawing
_runs = findTextRuns(_notes.getPPDrawing());
}
// Accesser methods follow
/**
* Returns an array of all the TextRuns found
*/
public TextRun[] getTextRuns() { return _runs; }
/**
* Returns the sheet number
*/
public int getSheetNumber() { return _sheetNo; }
}

View File

@ -0,0 +1,92 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.model;
import org.apache.poi.hslf.record.*;
import java.util.Vector;
/**
* This class defines the common format of "Sheets" in a powerpoint
* document. Such sheets could be Slides, Notes, Master etc
*
* @author Nick Burch
*/
public abstract class Sheet
{
/**
* Returns an array of all the TextRuns in the sheet.
*/
public abstract TextRun[] getTextRuns();
/**
* Returns the sheet number
*/
public abstract int getSheetNumber();
/**
* For a given PPDrawing, grab all the TextRuns
*/
public static TextRun[] findTextRuns(PPDrawing ppdrawing) {
Vector runsV = new Vector();
EscherTextboxWrapper[] wrappers = ppdrawing.getTextboxWrappers();
for(int i=0; i<wrappers.length; i++) {
findTextRuns(wrappers[i].getChildRecords(),runsV);
}
TextRun[] runs = new TextRun[runsV.size()];
for(int i=0; i<runs.length; i++) {
runs[i] = (TextRun)runsV.get(i);
}
return runs;
}
/**
* Scans through the supplied record array, looking for
* a TextHeaderAtom followed by one of a TextBytesAtom or
* a TextCharsAtom. Builds up TextRuns from these
*
* @param records the records to build from
* @param found vector to add any found to
*/
protected static void findTextRuns(Record[] records, Vector found) {
// Look for a TextHeaderAtom
for(int i=0; i<(records.length-1); i++) {
if(records[i] instanceof TextHeaderAtom) {
TextRun trun = null;
TextHeaderAtom tha = (TextHeaderAtom)records[i];
if(records[i+1] instanceof TextCharsAtom) {
TextCharsAtom tca = (TextCharsAtom)records[i+1];
trun = new TextRun(tha,tca);
} else if(records[i+1] instanceof TextBytesAtom) {
TextBytesAtom tba = (TextBytesAtom)records[i+1];
trun = new TextRun(tha,tba);
} else if(records[i+1].getRecordType() == 4010l) {
// Safe to ignore
} else {
System.err.println("Found a TextHeaderAtom not followed by a TextBytesAtom or TextCharsAtom: Followed by " + records[i+1].getRecordType());
continue;
}
found.add(trun);
i++;
}
}
}
}

View File

@ -0,0 +1,118 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.model;
import java.util.*;
import org.apache.poi.hslf.record.*;
import org.apache.poi.hslf.record.SlideListWithText.*;
import org.apache.poi.util.LittleEndian;
/**
* This class represents a slide in a PowerPoint Document. It allows
* access to the text within, and the layout. For now, it only does
* the text side of things though
*
* @author Nick Burch
*/
public class Slide extends Sheet
{
private int _sheetNo;
private org.apache.poi.hslf.record.Slide _slide;
private SlideAtomsSet[] _atomSet;
private TextRun[] _runs;
private TextRun[] _otherRuns; // Any from the PPDrawing, shouldn't really be any though
private Notes _notes;
/**
* Constructs a Slide from the Slide record, and the SlideAtomsSets
* for ones not embeded in the PPDrawing.
* Initialises TextRuns, to provide easier access to the text
*
* @param slide the Slide record we're based on
* @param atomSet the SlideAtomsSet to get the text from
*/
public Slide(org.apache.poi.hslf.record.Slide slide, Notes notes, SlideAtomsSet[] atomSet) {
_slide = slide;
_notes = notes;
_atomSet = atomSet;
// Grab the sheet number
//_sheetNo = _slide.getSlideAtom().getSheetNumber();
_sheetNo = -1;
// Grab the TextRuns from the PPDrawing
_otherRuns = findTextRuns(_slide.getPPDrawing());
// Ensure we've only got only copy of each SlideAtomSet
// When in doubt, prefere the later one
Hashtable seenSets = new Hashtable();
Vector useSets = new Vector();
for(int i=0; i<_atomSet.length; i++) {
SlideAtomsSet set = _atomSet[i];
int id = set.getSlidePersistAtom().getRefID();
Integer idI = new Integer(id);
if(seenSets.containsKey(idI)) {
// Replace old one
Integer replacePos = (Integer)seenSets.get(idI);
useSets.set(replacePos.intValue(),set);
} else {
// Use for now
useSets.add(set);
seenSets.put(idI,new Integer(useSets.size()-1));
}
}
// For the text coming in from the SlideAtomsSet:
// Build up TextRuns from pairs of TextHeaderAtom and
// one of TextBytesAtom or TextCharsAtom
Vector runSets = new Vector();
for(int i=0; i<useSets.size(); i++) {
SlideAtomsSet set = (SlideAtomsSet)useSets.get(i);
findTextRuns(set.getSlideRecords(),runSets);
}
// Build an array, more useful than a vector
_runs = new TextRun[runSets.size()];
for(int i=0; i<_runs.length; i++) {
_runs[i] = (TextRun)runSets.get(i);
}
}
// Accesser methods follow
/**
* Returns an array of all the TextRuns found
*/
public TextRun[] getTextRuns() { return _runs; }
/**
* Returns the sheet number
*/
public int getSheetNumber() { return _sheetNo; }
/**
* Returns the Notes Sheet for this slide, or null if there isn't one
*/
public Notes getNotesSheet() { return _notes; }
}

View File

@ -0,0 +1,141 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.model;
import org.apache.poi.hslf.record.*;
import org.apache.poi.util.StringUtil;
/**
* This class represents a run of text in a powerpoint document. That
* run could be text on a sheet, or text in a note.
* It is only a very basic class for now
*
* @author Nick Burch
*/
public class TextRun
{
private TextHeaderAtom _headerAtom;
private TextBytesAtom _byteAtom;
private TextCharsAtom _charAtom;
private boolean _isUnicode;
/**
* Constructs a Text Run from a Unicode text block
*
* @param tha the TextHeaderAtom that defines what's what
* @param tca the TextCharsAtom containing the text
*/
public TextRun(TextHeaderAtom tha, TextCharsAtom tca) {
_headerAtom = tha;
_charAtom = tca;
_isUnicode = true;
}
/**
* Constructs a Text Run from a Ascii text block
*
* @param tha the TextHeaderAtom that defines what's what
* @param tba the TextBytesAtom containing the text
*/
public TextRun(TextHeaderAtom tha, TextBytesAtom tba) {
_headerAtom = tha;
_byteAtom = tba;
_isUnicode = false;
}
// Accesser methods follow
/**
* Returns the text content of the run, which has been made safe
* for printing and other use.
*/
public String getText() {
String rawText = getRawText();
// PowerPoint seems to store files with \r as the line break
// The messes things up on everything but a Mac, so translate
// them to \n
String text = rawText.replace('\r','\n');
return text;
}
/**
* Returns the raw text content of the run. This hasn't had any
* changes applied to it, and so is probably unlikely to print
* out nicely.
*/
public String getRawText() {
if(_isUnicode) {
return _charAtom.getText();
} else {
return _byteAtom.getText();
}
}
/**
* Changes the text. Chance are, this won't work just yet, because
* we also need to update some other bits of the powerpoint file
* to match the change in the Text Atom, especially byte offsets
*/
public void setText(String s) {
// If size changed, warn
if(s.length() != getText().length()) {
System.err.println("Warning: Your powerpoint file is probably no longer readable by powerpoint, as the text run has changed size!");
}
if(_isUnicode) {
// The atom can safely convert to unicode
_charAtom.setText(s);
} else {
// Will it fit in a 8 bit atom?
boolean hasMultibyte = StringUtil.hasMultibyte(s);
if(! hasMultibyte) {
// Fine to go into 8 bit atom
byte[] text = new byte[s.length()];
StringUtil.putCompressedUnicode(s,text,0);
_byteAtom.setText(text);
} else {
throw new RuntimeException("Setting of unicode text is currently only possible for Text Runs that are Unicode in the file, sorry. For now, please convert that text to us-ascii and re-try it");
}
}
}
/**
* Returns the type of the text, from the TextHeaderAtom.
* Possible values can be seen from TextHeaderAtom
* @see org.apache.poi.hslf.record.TextHeaderAtom
*/
public int getRunType() {
return _headerAtom.getTextType();
}
/**
* Changes the type of the text. Values should be taken
* from TextHeaderAtom. No checking is done to ensure you
* set this to a valid value!
* @see org.apache.poi.hslf.record.TextHeaderAtom
*/
public void setRunType(int type) {
_headerAtom.setTextType(type);
}
}

View File

@ -0,0 +1,218 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import java.io.*;
import org.apache.poi.poifs.filesystem.*;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.StringUtil;
/**
* This is a special kind of Atom, becauase it doesn't live inside the
* PowerPoint document. Instead, it lives in a seperate stream in the
* document. As such, it has to be treaded specially
*
* @author Nick Burch
*/
public class CurrentUserAtom
{
/** Standard Atom header */
public static final byte[] atomHeader = new byte[] { 0, 0, -10, 15 };
/** The Powerpoint magic numer */
public static final byte[] magicNumber = new byte[] { 95, -64, -111, -29 };
/** The Powerpoint 97 version, major and minor numbers */
public static final byte[] ppt97FileVer = new byte[] { 8, 00, -13, 03, 03, 00 };
/** The version, major and minor numbers */
private int docFinalVersionA;
private int docFinalVersionB;
private byte docMajorNo;
private byte docMinorNo;
/** The Offset into the file for the current edit */
private long currentEditOffset;
/** The Username of the last person to edit the file */
private String lastEditUser;
/** The document release version */
private long releaseVersion;
/** Only correct after reading in or writing out */
private byte[] _contents;
/* ********************* getter/setter follows *********************** */
public int getDocFinalVersionA() { return docFinalVersionA; }
public int getDocFinalVersionB() { return docFinalVersionB; }
public byte getDocMajorNo() { return docMajorNo; }
public byte getDocMinorNo() { return docMinorNo; }
public long getReleaseVersion() { return releaseVersion; }
public void setReleaseVersion(long rv) { releaseVersion = rv; }
/** Points to the UserEditAtom */
public long getCurrentEditOffset() { return currentEditOffset; }
public void setCurrentEditOffset(long id ) { currentEditOffset = id; }
public String getLastEditUsername() { return lastEditUser; }
public void setLastEditUsername(String u) { lastEditUser = u; }
/* ********************* real code follows *************************** */
/**
* Create a new Current User Atom
*/
public CurrentUserAtom() {
_contents = new byte[0];
throw new RuntimeException("Creation support for Current User Atom not complete");
}
/**
* Find the Current User in the filesystem, and create from that
*/
public CurrentUserAtom(POIFSFileSystem fs) throws IOException {
// Decide how big it is
DocumentEntry docProps =
(DocumentEntry)fs.getRoot().getEntry("Current User");
_contents = new byte[docProps.getSize()];
// Grab the contents
InputStream in = fs.createDocumentInputStream("Current User");
in.read(_contents);
// Set everything up
init();
}
/**
* Create things from the bytes
*/
public CurrentUserAtom(byte[] b) {
_contents = b;
init();
}
/**
* Actually do the creation from a block of bytes
*/
private void init() {
// Grab the edit offset
currentEditOffset = LittleEndian.getUInt(_contents,16);
// Grab the versions
docFinalVersionA = LittleEndian.getUShort(_contents,20);
docFinalVersionB = LittleEndian.getUShort(_contents,22);
docMajorNo = _contents[24];
docMinorNo = _contents[25];
// Get the username length
long usernameLen = LittleEndian.getUShort(_contents,20);
// Use this to grab the revision
releaseVersion = LittleEndian.getUInt(_contents,28+(int)usernameLen);
// Grab the unicode username, if stored
int start = 28+(int)usernameLen+4;
int len = 2*(int)usernameLen;
if(_contents.length >= start+len) {
byte[] textBytes = new byte[len];
System.arraycopy(_contents,start,textBytes,0,len);
lastEditUser = StringUtil.getFromUnicodeLE(textBytes);
} else {
// Fake from the 8 bit version
byte[] textBytes = new byte[(int)usernameLen];
System.arraycopy(_contents,28,textBytes,0,(int)usernameLen);
lastEditUser = StringUtil.getFromCompressedUnicode(textBytes,0,(int)usernameLen);
}
}
/**
* Writes ourselves back out
*/
public void writeOut(OutputStream out) throws IOException {
// Decide on the size
// 8 = atom header
// 20 = up to name
// 4 = revision
// 3 * len = ascii + unicode
int size = 8 + 20 + 4 + (3 * lastEditUser.length());
_contents = new byte[size];
// First we have a 8 byte atom header
System.arraycopy(atomHeader,0,_contents,0,4);
// Size is 20+user len + revision len(4)
int atomSize = 20+4+lastEditUser.length();
LittleEndian.putInt(_contents,4,atomSize);
// Now we have the size of the details, which is 20
LittleEndian.putInt(_contents,8,20);
// Now the ppt magic number (4 bytes)
System.arraycopy(magicNumber,0,_contents,12,4);
// Now the current edit offset
LittleEndian.putInt(_contents,16,(int)currentEditOffset);
// Now the file versions, 2+2+1+1
LittleEndian.putShort(_contents,20,(short)docFinalVersionA);
LittleEndian.putShort(_contents,22,(short)docFinalVersionB);
_contents[24] = docMajorNo;
_contents[25] = docMinorNo;
// 2 bytes blank
_contents[26] = 0;
_contents[27] = 0;
// username in bytes in us ascii
byte[] asciiUN = new byte[lastEditUser.length()];
StringUtil.putCompressedUnicode(lastEditUser,asciiUN,0);
System.arraycopy(asciiUN,0,_contents,28,asciiUN.length);
// 4 byte release version
LittleEndian.putInt(_contents,28+asciiUN.length,(int)releaseVersion);
// username in unicode
byte [] ucUN = new byte[lastEditUser.length()*2];
StringUtil.putUnicodeLE(lastEditUser,ucUN,0);
System.arraycopy(ucUN,0,_contents,28+asciiUN.length+4,ucUN.length);
// Write out
out.write(_contents);
}
/**
* Writes ourselves back out to a filesystem
*/
public void writeToFS(POIFSFileSystem fs) throws IOException {
// Grab contents
ByteArrayOutputStream baos = new ByteArrayOutputStream();
writeOut(baos);
ByteArrayInputStream bais =
new ByteArrayInputStream(baos.toByteArray());
// Write out
fs.createDocument(bais,"Current User");
}
}

View File

@ -0,0 +1,70 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
/**
* If we come across a record we know has children of (potential)
* interest, but where the record itself is boring, we create one
* of these. It allows us to get at the children, but not much else
*
* @author Nick Burch
*/
public class DummyRecordWithChildren extends RecordContainer
{
private Record[] _children;
private byte[] _header;
private long _type;
/**
* Create a new holder for a boring record with children
*/
protected DummyRecordWithChildren(byte[] source, int start, int len) {
// Just grab the header, not the whole contents
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
_type = LittleEndian.getUShort(_header,2);
// Find our children
_children = Record.findChildRecords(source,start+8,len-8);
}
/**
* Return the value we were given at creation
*/
public long getRecordType() { return _type; }
/**
* Return any children
*/
public Record[] getChildRecords() { return _children; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
writeOut(_header[0],_header[1],_type,_children,out);
}
}

View File

@ -0,0 +1,90 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.ddf.*;
import java.io.IOException;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
/**
* A wrapper around a DDF (Escher) EscherTextbox Record. Causes the DDF
* Record to be accessible as if it were a HSLF record.
* Note: when asked to write out, will simply put any child records correctly
* into the Escher layer. A call to the escher layer to write out (by the
* parent PPDrawing) will do the actual write out
*
* @author Nick Burch
*/
public class EscherTextboxWrapper extends RecordContainer
{
private EscherTextboxRecord _escherRecord;
private Record[] _children;
private long _type;
/**
* Returns the underlying DDF Escher Record
*/
public EscherTextboxRecord getEscherRecord() { return _escherRecord; }
/**
* Creates the wrapper for the given DDF Escher Record and children
*/
protected EscherTextboxWrapper(EscherTextboxRecord textbox) {
_escherRecord = textbox;
_type = (long)_escherRecord.getRecordId();
// Find the child records in the escher data
byte[] data = _escherRecord.getData();
_children = Record.findChildRecords(data,0,data.length);
}
/**
* Return the type of the escher record (normally in the 0xFnnn range)
*/
public long getRecordType() { return _type; }
/**
* Return any children
*/
public Record[] getChildRecords() { return _children; }
/**
* Stores the data for the child records back into the Escher layer.
* Doesn't actually do the writing out, that's left to the Escher
* layer to do. Must be called before writeOut/serialize is called
* on the underlying Escher object!
*/
public void writeOut(OutputStream out) throws IOException {
// Write out our children, and stuff them into the Escher layer
// Grab the children's data
ByteArrayOutputStream baos = new ByteArrayOutputStream();
for(int i=0; i<_children.length; i++) {
_children[i].writeOut(baos);
}
byte[] data = baos.toByteArray();
// Save in the escher layer
_escherRecord.setData(data);
}
}

View File

@ -0,0 +1,95 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
/**
* Master container for Notes. There is one of these for every page of
* notes, and they have certain specific children
*
* @author Nick Burch
*/
public class Notes extends RecordContainer
{
private Record[] _children;
private byte[] _header;
private static long _type = 1008l;
// Links to our more interesting children
private NotesAtom notesAtom;
private PPDrawing ppDrawing;
/**
* Returns the NotesAtom of this Notes
*/
public NotesAtom getNotesAtom() { return notesAtom; }
/**
* Returns the PPDrawing of this Notes, which has all the
* interesting data in it
*/
public PPDrawing getPPDrawing() { return ppDrawing; }
/**
* Set things up, and find our more interesting children
*/
protected Notes(byte[] source, int start, int len) {
// Grab the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Find our children
_children = Record.findChildRecords(source,start+8,len-8);
// Find the interesting ones in there
for(int i=0; i<_children.length; i++) {
if(_children[i] instanceof NotesAtom) {
notesAtom = (NotesAtom)_children[i];
//System.out.println("Found notes for sheet " + notesAtom.getSlideID());
}
if(_children[i] instanceof PPDrawing) {
ppDrawing = (PPDrawing)_children[i];
}
}
}
/**
* We are of type 1008
*/
public long getRecordType() { return _type; }
/**
* Return any children
*/
public Record[] getChildRecords() { return _children; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
writeOut(_header[0],_header[1],_type,_children,out);
}
}

View File

@ -0,0 +1,120 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
/**
* A Notes Atom (type 1009). Holds information on the parent Notes, such
* as what slide it is tied to
*
* @author Nick Burch
*/
public class NotesAtom extends RecordAtom
{
private byte[] _header;
private static long _type = 1009l;
private int slideID;
private boolean followMasterObjects;
private boolean followMasterScheme;
private boolean followMasterBackground;
private byte[] reserved;
public int getSlideID() { return slideID; }
public void setSlideID(int id) { slideID = id; }
public boolean getFollowMasterObjects() { return followMasterObjects; }
public boolean getFollowMasterScheme() { return followMasterScheme; }
public boolean getFollowMasterBackground() { return followMasterBackground; }
public void setFollowMasterObjects(boolean flag) { followMasterObjects = flag; }
public void setFollowMasterScheme(boolean flag) { followMasterScheme = flag; }
public void setFollowMasterBackground(boolean flag) { followMasterBackground = flag; }
/* *************** record code follows ********************** */
/**
* For the Notes Atom
*/
protected NotesAtom(byte[] source, int start, int len) {
// Sanity Checking
if(len < 8) { len = 8; }
// Get the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Get the slide ID
slideID = (int)LittleEndian.getInt(source,start+8);
// Grok the flags, stored as bits
int flags = LittleEndian.getUShort(source,start+12);
if((flags&4) == 4) {
followMasterBackground = true;
} else {
followMasterBackground = false;
}
if((flags&2) == 2) {
followMasterScheme = true;
} else {
followMasterScheme = false;
}
if((flags&1) == 1) {
followMasterObjects = true;
} else {
followMasterObjects = false;
}
// There might be 2 more bytes, which are a reserved field
reserved = new byte[len-14];
System.arraycopy(source,start+14,reserved,0,reserved.length);
}
/**
* We are of type 1009
*/
public long getRecordType() { return _type; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
// Header
out.write(_header);
// Slide ID
writeLittleEndian(slideID,out);
// Flags
short flags = 0;
if(followMasterObjects) { flags += 1; }
if(followMasterScheme) { flags += 2; }
if(followMasterBackground) { flags += 4; }
writeLittleEndian(flags,out);
// Reserved fields
out.write(reserved);
}
}

View File

@ -0,0 +1,191 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.ddf.*;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import java.util.Vector;
/**
* These are actually wrappers onto Escher drawings. Make use of
* the DDF classes to do useful things with them.
* For now, creates a tree of the Escher records, and then creates any
* PowerPoint (hslf) records found within the EscherTextboxRecord
* (msofbtClientTextbox) records.
* Also provides easy access to the EscherTextboxRecords, so that their
* text may be extracted and used in Sheets
*
* @author Nick Burch
*/
// For now, pretending to be an atom. Might not always be, but that
// would require a wrapping class
public class PPDrawing extends RecordAtom
{
private byte[] _header;
private long _type;
private EscherRecord[] childRecords;
private EscherTextboxWrapper[] textboxWrappers;
/**
* Get access to the underlying Escher Records
*/
public EscherRecord[] getEscherRecords() { return childRecords; }
/**
* Get access to the atoms inside Textboxes
*/
public EscherTextboxWrapper[] getTextboxWrappers() { return textboxWrappers; }
/* ******************** record stuff follows ********************** */
/**
* Sets everything up, groks the escher etc
*/
protected PPDrawing(byte[] source, int start, int len) {
// Get the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Get the type
_type = LittleEndian.getUShort(_header,2);
// Get the contents for now
byte[] contents = new byte[len];
System.arraycopy(source,start,contents,0,len);
// Build up a tree of Escher records contained within
DefaultEscherRecordFactory erf = new DefaultEscherRecordFactory();
Vector escherChildren = new Vector();
findEscherChildren(erf,contents,8,len-8,escherChildren);
childRecords = new EscherRecord[escherChildren.size()];
for(int i=0; i<childRecords.length; i++) {
childRecords[i] = (EscherRecord)escherChildren.get(i);
}
// Find and EscherTextboxRecord's, and wrap them up
Vector textboxes = new Vector();
findEscherTextboxRecord(childRecords, textboxes);
textboxWrappers = new EscherTextboxWrapper[textboxes.size()];
for(int i=0; i<textboxWrappers.length; i++) {
textboxWrappers[i] = (EscherTextboxWrapper)textboxes.get(i);
}
}
/**
* Tree walking way of finding Escher Child Records
*/
private void findEscherChildren(DefaultEscherRecordFactory erf, byte[] source, int startPos, int lenToGo, Vector found) {
// Find the record
EscherRecord r = erf.createRecord(source,startPos);
// Fill it in
r.fillFields( source, startPos, erf );
// Save it
found.add(r);
// Wind on
int size = r.getRecordSize();
if(size < 8) {
System.err.println("Hit short DDF record at " + startPos + " - " + size);
}
startPos += size;
lenToGo -= size;
if(lenToGo >= 8) {
findEscherChildren(erf, source, startPos, lenToGo, found);
}
}
/**
* Look for EscherTextboxRecords
*/
private void findEscherTextboxRecord(EscherRecord[] toSearch, Vector found) {
for(int i=0; i<toSearch.length; i++) {
if(toSearch[i] instanceof EscherTextboxRecord) {
EscherTextboxRecord tbr = (EscherTextboxRecord)toSearch[i];
EscherTextboxWrapper w = new EscherTextboxWrapper(tbr);
found.add(w);
} else {
// If it has children, walk them
if(toSearch[i].isContainerRecord()) {
List childrenL = toSearch[i].getChildRecords();
EscherRecord[] children = new EscherRecord[childrenL.size()];
for(int j=0; j< children.length; j++) {
children[j] = (EscherRecord)childrenL.get(j);
}
findEscherTextboxRecord(children,found);
}
}
}
}
/**
* We are type 1036
*/
public long getRecordType() { return _type; }
/**
* We're pretending to be an atom, so return null
*/
public Record[] getChildRecords() { return null; }
/**
* Write the contents of the record back, so it can be written
* to disk
* Walks the escher layer to get the contents
*/
public void writeOut(OutputStream out) throws IOException {
// Ensure the escher layer reflects the text changes
for(int i=0; i<textboxWrappers.length; i++) {
textboxWrappers[i].writeOut(null);
}
// Find the new size of the escher children;
int newSize = 0;
for(int i=0; i<childRecords.length; i++) {
newSize += childRecords[i].getRecordSize();
}
// Update the size (header bytes 5-8)
LittleEndian.putInt(_header,4,newSize);
// Write out our header
out.write(_header);
// Now grab the children's data
byte[] b = new byte[newSize];
int done = 0;
for(int i=0; i<childRecords.length; i++) {
int written = childRecords[i].serialize( done, b );
done += written;
}
// Finally, write out the children
out.write(b);
}
}

View File

@ -0,0 +1,67 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
/**
* General holder for PersistPtrFullBlock and PersistPtrIncrementalBlock
* records. We need to handle them specially, since we have to go around
* updating UserEditAtoms if they shuffle about on disk
*
* @author Nick Burch
*/
public class PersistPtrHolder extends PositionDependentRecordAtom
{
private byte[] _contents;
private long _type;
/**
* Create a new holder for a PersistPtr record
*/
protected PersistPtrHolder(byte[] source, int start, int len) {
// Sanity Checking - including whole header, so treat
// length as based of 0, not 8 (including header size based)
if(len < 4) { len = 4; }
// Store where we are found on disk
myLastOnDiskOffset = start;
// Treat as an atom, grab and hold everything
_contents = new byte[len];
System.arraycopy(source,start,_contents,0,len);
_type = LittleEndian.getUShort(_contents,2);
}
/**
* Return the value we were given at creation
*/
public long getRecordType() { return _type; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
out.write(_contents);
}
}

View File

@ -0,0 +1,44 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
/**
* A special (and dangerous) kind of Record Atom that cares about where
* it lives on the disk, or who has other Atoms that care about where
* this is on the disk.
*
* @author Nick Burch
*/
public abstract class PositionDependentRecordAtom extends RecordAtom
{
/** Our location on the disk, as of the last write out */
protected int myLastOnDiskOffset;
/** Fetch our location on the disk, as of the last write out */
public int getLastOnDiskOffset() { return myLastOnDiskOffset; }
/**
* Update the Record's idea of where on disk it lives, after a write out.
* Use with care...
*/
public void setLastOnDiskOffet(int offset) {
myLastOnDiskOffset = offset;
}
}

View File

@ -0,0 +1,192 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import java.io.IOException;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
import java.util.Vector;
import org.apache.poi.util.LittleEndian;
/**
* This abstract class represents a record in the PowerPoint document.
* Record classes should extend with RecordContainer or RecordAtom, which
* extend this in turn.
*
* @author Nick Burch
*/
public abstract class Record
{
/**
* Is this record type an Atom record (only has data),
* or is it a non-Atom record (has other records)?
*/
public abstract boolean isAnAtom();
/**
* Returns the type (held as a little endian in bytes 3 and 4)
* that this class handles
*/
public abstract long getRecordType();
/**
* Fetch all the child records of this record
* If this record is an atom, will return null
* If this record is a non-atom, but has no children, will return
* an empty array
*/
public abstract Record[] getChildRecords();
/**
* Have the contents printer out into an OutputStream, used when
* writing a file back out to disk
* (Normally, atom classes will keep their bytes around, but
* non atom classes will just request the bytes from their
* children, then chuck on their header and return)
*/
public abstract void writeOut(OutputStream o) throws IOException;
/**
* When writing out, write out a signed int (32bit) in Little Endian format
*/
public static void writeLittleEndian(int i,OutputStream o) throws IOException {
byte[] bi = new byte[4];
LittleEndian.putInt(bi,i);
o.write(bi);
}
/**
* When writing out, write out a signed short (16bit) in Little Endian format
*/
public static void writeLittleEndian(short s,OutputStream o) throws IOException {
byte[] bs = new byte[2];
LittleEndian.putShort(bs,s);
o.write(bs);
}
/**
* Default method for finding child records of a given record
*/
public static Record[] findChildRecords(byte[] b, int start, int len) {
Vector children = new Vector(5);
// Jump our little way along, creating records as we go
int pos = start;
while(pos <= (start+len-8)) {
long type = LittleEndian.getUShort(b,pos+2);
long rlen = LittleEndian.getUInt(b,pos+4);
// Sanity check the length
int rleni = (int)rlen;
if(rleni < 0) { rleni = 0; }
//System.out.println("Found a " + type + " at pos " + pos + " (" + Integer.toHexString(pos) + "), len " + rlen);
Record r = createRecordForType(type,b,pos,8+rleni);
children.add(r);
pos += 8;
pos += rlen;
}
// Turn the vector into an array, and return
Record[] cRecords = new Record[children.size()];
for(int i=0; i < children.size(); i++) {
cRecords[i] = (Record)children.get(i);
}
return cRecords;
}
/**
* For a given type (little endian bytes 3 and 4 in record header),
* byte array, start position and length:
* will return a Record object that will handle that record
*
* Remember that while PPT stores the record lengths as 8 bytes short
* (not including the size of the header), this code assumes you're
* passing in corrected lengths
*/
protected static Record createRecordForType(long type, byte[] b, int start, int len) {
// Default is to use UnknownRecordPlaceholder
// When you create classes for new Records, add them here
switch((int)type) {
// Document
case 1000:
return new DummyRecordWithChildren(b,start,len);
// "Slide"
case 1006:
return new Slide(b,start,len);
// "SlideAtom"
case 1007:
return new SlideAtom(b,start,len);
// "Notes"
case 1008:
return new Notes(b,start,len);
// "NotesAtom" (Details on Notes sheets)
case 1009:
return new NotesAtom(b,start,len);
// "SlidePersistAtom" (Details on text for a sheet)
case 1011:
return new SlidePersistAtom(b,start,len);
// MainMaster (MetaSheet lives inside the PPDrawing inside this)
case 1016:
return new DummyRecordWithChildren(b,start,len);
// PPDrawing (MetaSheet lives inside this)
case 1036:
return new PPDrawing(b,start,len);
// TextHeaderAtom (Holds details on following text)
case 3999:
return new TextHeaderAtom(b,start,len);
// TextCharsAtom (Text in Unicode format)
case 4000:
return new TextCharsAtom(b,start,len);
// TextByteAtom (Text in ascii format)
case 4008:
return new TextBytesAtom(b,start,len);
// SlideListWithText (Many Sheets live inside here)
case 4080:
return new SlideListWithText(b,start,len);
// UserEditAtom (Holds pointers, last viewed etc)
case 4085:
return new UserEditAtom(b,start,len);
// PersistPtrFullBlock (Don't know what it holds, but do care about where it lives)
case 6001:
return new PersistPtrHolder(b,start,len);
// PersistPtrIncrementalBlock (Don't know what it holds, but do care about where it lives)
case 6002:
return new PersistPtrHolder(b,start,len);
default:
return new UnknownRecordPlaceholder(b,start,len);
}
}
}

View File

@ -0,0 +1,38 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
/**
* Abstract class which all atom records will extend.
*
* @author Nick Burch
*/
public abstract class RecordAtom extends Record
{
/**
* We are an atom
*/
public boolean isAnAtom() { return true; }
/**
* We're an atom, returns null
*/
public Record[] getChildRecords() { return null; }
}

View File

@ -0,0 +1,109 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.hslf.util.MutableByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
/**
* Abstract class which all container records will extend. Providers
* helpful methods for writing child records out to disk
*
* @author Nick Burch
*/
public abstract class RecordContainer extends Record
{
/**
* We're not an atom
*/
public boolean isAnAtom() { return false; }
/**
* Write out our header, and our children.
* @param headerA the first byte of the header
* @param headerB the second byte of the header
* @param type the record type
* @param children our child records
* @param out the stream to write to
*/
public void writeOut(byte headerA, byte headerB, long type, Record[] children, OutputStream out) throws IOException {
// If we have a mutable output stream, take advantage of that
if(out instanceof MutableByteArrayOutputStream) {
MutableByteArrayOutputStream mout =
(MutableByteArrayOutputStream)out;
// Grab current size
int oldSize = mout.getBytesWritten();
// Write out our header, less the size
mout.write(new byte[] {headerA,headerB});
byte[] typeB = new byte[2];
LittleEndian.putShort(typeB,(short)type);
mout.write(typeB);
mout.write(new byte[4]);
// Write out the children
for(int i=0; i<children.length; i++) {
children[i].writeOut(mout);
}
// Update our header with the size
// Don't forget to knock 8 more off, since we don't include the
// header in the size
int length = mout.getBytesWritten() - oldSize - 8;
byte[] size = new byte[4];
LittleEndian.putInt(size,0,length);
mout.overwrite(size, oldSize+4);
} else {
// Going to have to do it a slower way, because we have
// to update the length come the end
// Create a ByteArrayOutputStream to hold everything in
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// Write out our header, less the size
baos.write(new byte[] {headerA,headerB});
byte[] typeB = new byte[2];
LittleEndian.putShort(typeB,(short)type);
baos.write(typeB);
baos.write(new byte[] {0,0,0,0});
// Write out our children
for(int i=0; i<children.length; i++) {
children[i].writeOut(baos);
}
// Grab the bytes back
byte[] toWrite = baos.toByteArray();
// Update our header with the size
// Don't forget to knock 8 more off, since we don't include the
// header in the size
LittleEndian.putInt(toWrite,4,(toWrite.length-8));
// Write out the bytes
out.write(toWrite);
}
}
}

View File

@ -0,0 +1,95 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
/**
* Master container for Slides. There is one of these for every slide,
* and they have certain specific children
*
* @author Nick Burch
*/
public class Slide extends RecordContainer
{
private Record[] _children;
private byte[] _header;
private static long _type = 1006l;
// Links to our more interesting children
private SlideAtom slideAtom;
private PPDrawing ppDrawing;
/**
* Returns the SlideAtom of this Slide
*/
public SlideAtom getSlideAtom() { return slideAtom; }
/**
* Returns the PPDrawing of this Slide, which has all the
* interesting data in it
*/
public PPDrawing getPPDrawing() { return ppDrawing; }
/**
* Set things up, and find our more interesting children
*/
protected Slide(byte[] source, int start, int len) {
// Grab the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Find our children
_children = Record.findChildRecords(source,start+8,len-8);
// Find the interesting ones in there
for(int i=0; i<_children.length; i++) {
if(_children[i] instanceof SlideAtom) {
slideAtom = (SlideAtom)_children[i];
}
if(_children[i] instanceof PPDrawing) {
ppDrawing = (PPDrawing)_children[i];
}
}
}
/**
* We are of type 1006
*/
public long getRecordType() { return _type; }
/**
* Return any children
*/
public Record[] getChildRecords() { return _children; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
writeOut(_header[0],_header[1],_type,_children,out);
}
}

View File

@ -0,0 +1,206 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
/**
* A Slide Atom (type 1007). Holds information on the parent Slide, what
* Master Slide it uses, what Notes is attached to it, that sort of thing.
* It also has a SSlideLayoutAtom embeded in it, but without the Atom header
*
* @author Nick Burch
*/
public class SlideAtom extends RecordAtom
{
private byte[] _header;
private static long _type = 1007l;
public static final int MASTER_SLIDE_ID = 0;
public static final int USES_MASTER_SLIDE_ID = -2147483648;
private int masterID;
private int notesID;
private boolean followMasterObjects;
private boolean followMasterScheme;
private boolean followMasterBackground;
private SSlideLayoutAtom layoutAtom;
private byte[] reserved;
/** Get the ID of the master slide used. 0 if this is a master slide, otherwise -2147483648 */
public int getMasterID() { return masterID; }
/** Get the ID of the notes for this slide. 0 if doesn't have one */
public int getNotesID() { return notesID; }
/** Get the embeded SSlideLayoutAtom */
public SSlideLayoutAtom getSSlideLayoutAtom() { return layoutAtom; }
public boolean getFollowMasterObjects() { return followMasterObjects; }
public boolean getFollowMasterScheme() { return followMasterScheme; }
public boolean getFollowMasterBackground() { return followMasterBackground; }
public void setFollowMasterObjects(boolean flag) { followMasterObjects = flag; }
public void setFollowMasterScheme(boolean flag) { followMasterScheme = flag; }
public void setFollowMasterBackground(boolean flag) { followMasterBackground = flag; }
/* *************** record code follows ********************** */
/**
* For the Slide Atom
*/
protected SlideAtom(byte[] source, int start, int len) {
// Sanity Checking
if(len < 30) { len = 30; }
// Get the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Grab the 12 bytes that is "SSlideLayoutAtom"
byte[] SSlideLayoutAtomData = new byte[12];
System.arraycopy(source,start+8,SSlideLayoutAtomData,0,12);
// Use them to build up the SSlideLayoutAtom
layoutAtom = new SSlideLayoutAtom(SSlideLayoutAtomData);
// Get the IDs of the master and notes
masterID = (int)LittleEndian.getInt(source,start+12+8);
notesID = (int)LittleEndian.getInt(source,start+16+8);
// Grok the flags, stored as bits
int flags = LittleEndian.getUShort(source,start+20+8);
if((flags&4) == 4) {
followMasterBackground = true;
} else {
followMasterBackground = false;
}
if((flags&2) == 2) {
followMasterScheme = true;
} else {
followMasterScheme = false;
}
if((flags&1) == 1) {
followMasterObjects = true;
} else {
followMasterObjects = false;
}
// If there's any other bits of data, keep them about
// 8 bytes header + 20 bytes to flags + 2 bytes flags = 30 bytes
reserved = new byte[len-30];
System.arraycopy(source,start+30,reserved,0,reserved.length);
}
/**
* We are of type 1007
*/
public long getRecordType() { return _type; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
// Header
out.write(_header);
// SSSlideLayoutAtom stuff
layoutAtom.writeOut(out);
// IDs
writeLittleEndian(masterID,out);
writeLittleEndian(notesID,out);
// Flags
short flags = 0;
if(followMasterObjects) { flags += 1; }
if(followMasterScheme) { flags += 2; }
if(followMasterBackground) { flags += 4; }
writeLittleEndian(flags,out);
// Reserved data
out.write(reserved);
}
/**
* Holds the geometry of the Slide, and the ID of the placeholders
* on the slide.
* (Embeded inside SlideAtom is a SSlideLayoutAtom, without the
* usual record header. Since it's a fixed size and tied to
* the SlideAtom, we'll hold it here.)
*/
public class SSlideLayoutAtom {
// The different kinds of geometry
public static final int TITLE_SLIDE = 0;
public static final int TITLE_BODY_SLIDE = 1;
public static final int TITLE_MASTER_SLIDE = 2;
public static final int MASTER_SLIDE = 3;
public static final int MASTER_NOTES = 4;
public static final int NOTES_TITLE_BODY = 5;
public static final int HANDOUT = 6; // Only header, footer and date placeholders
public static final int TITLE_ONLY = 7;
public static final int TITLE_2_COLUMN_BODY = 8;
public static final int TITLE_2_ROW_BODY = 9;
public static final int TITLE_2_COLUNM_RIGHT_2_ROW_BODY = 10;
public static final int TITLE_2_COLUNM_LEFT_2_ROW_BODY = 11;
public static final int TITLE_2_ROW_BOTTOM_2_COLUMN_BODY = 12;
public static final int TITLE_2_ROW_TOP_2_COLUMN_BODY = 13;
public static final int FOUR_OBJECTS = 14;
public static final int BIG_OBJECT = 15;
public static final int BLANK_SLIDE = 16;
public static final int VERTICAL_TITLE_BODY_LEFT = 17;
public static final int VERTICAL_TITLE_2_ROW_BODY_LEFT = 17;
/** What geometry type we are */
private int geometry;
/** What placeholder IDs we have */
private byte[] placeholderIDs;
/** Retrieve the geometry type */
public int getGeometryType() { return geometry; }
/**
* Create a new Embeded SSlideLayoutAtom, from 12 bytes of data
*/
public SSlideLayoutAtom(byte[] data) {
if(data.length != 12) {
throw new RuntimeException("SSlideLayoutAtom created with byte array not 12 bytes long - was " + data.length + " bytes in size");
}
// Grab out our data
geometry = (int)LittleEndian.getInt(data,0);
placeholderIDs = new byte[8];
System.arraycopy(data,4,placeholderIDs,0,8);
}
/**
* Write the contents of the record back, so it can be written
* to disk. Skips the record header
*/
public void writeOut(OutputStream out) throws IOException {
// Write the geometry
writeLittleEndian(geometry,out);
// Write the placeholder IDs
out.write(placeholderIDs);
}
}
}

View File

@ -0,0 +1,148 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.hslf.model.Sheet;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Vector;
/**
* These are tricky beasts. They contain the text of potentially
* many (normal) slides. They are made up of several sets of
* - SlidePersistAtom
* - TextHeaderAtom
* - TextBytesAtom / TextCharsAtom
* - StyleTextPropAtom (optional)
* - TextSpecInfoAtom (optional)
* - InteractiveInfo (optional)
* - TxInteractiveInfoAtom (optional)
* and then the next SlidePersistAtom.
*
* Eventually, Slides will find the blocks that interest them from all
* the SlideListWithText entries, and refere to them
*
* For now, we scan through looking for interesting bits, then creating
* the helpful Sheet from model for them
*
* @author Nick Burch
*/
// For now, pretend to be an atom
public class SlideListWithText extends RecordContainer
{
private Record[] _children;
private byte[] _header;
private static long _type = 4080;
private SlideAtomsSet[] slideAtomsSets;
/**
* Create a new holder for slide records
*/
protected SlideListWithText(byte[] source, int start, int len) {
// Grab the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Find our children
_children = Record.findChildRecords(source,start+8,len-8);
// Group our children together into SlideAtomsSets
// That way, model layer code can just grab the sets to use,
// without having to try to match the children together
Vector sets = new Vector();
for(int i=0; i<_children.length; i++) {
if(_children[i] instanceof SlidePersistAtom) {
// Find where the next SlidePersistAtom is
int endPos = i+1;
while(endPos < _children.length && !(_children[endPos] instanceof SlidePersistAtom)) {
endPos += 1;
}
// Now, if not empty, create a SlideAtomsSets
int clen = endPos - i - 1;
if(clen == 0) { continue; }
Record[] spaChildren = new Record[clen];
System.arraycopy(_children,i+1,spaChildren,0,clen);
SlideAtomsSet set = new SlideAtomsSet((SlidePersistAtom)_children[i],spaChildren);
sets.add(set);
// Wind on
i += clen;
}
}
// Turn the vector into an array
slideAtomsSets = new SlideAtomsSet[sets.size()];
for(int i=0; i<slideAtomsSets.length; i++) {
slideAtomsSets[i] = (SlideAtomsSet)sets.get(i);
}
}
/**
* Get access to the SlideAtomsSets of the children of this record
*/
public SlideAtomsSet[] getSlideAtomsSets() { return slideAtomsSets; }
/**
* Return the value we were given at creation
*/
public long getRecordType() { return _type; }
/**
* We're pretending to be an atom, so return null
*/
public Record[] getChildRecords() { return _children; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
writeOut(_header[0],_header[1],_type,_children,out);
}
/**
* Inner class to wrap up a matching set of records that hold the
* text for a given sheet. Contains the leading SlidePersistAtom,
* and all of the records until the next SlidePersistAtom. This
* includes sets of TextHeaderAtom and TextBytesAtom/TextCharsAtom,
* along with some others.
*/
public class SlideAtomsSet {
private SlidePersistAtom slidePersistAtom;
private Record[] slideRecords;
/** Get the SlidePersistAtom, which gives details on the Slide this text is associated with */
public SlidePersistAtom getSlidePersistAtom() { return slidePersistAtom; }
/** Get the Text related records for this slide */
public Record[] getSlideRecords() { return slideRecords; }
/** Create one to hold the Records for one Slide's text */
public SlideAtomsSet(SlidePersistAtom s, Record[] r) {
slidePersistAtom = s;
slideRecords = r;
}
}
}

View File

@ -0,0 +1,114 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
/**
* A SlidePersist Atom (type 1011). Holds information on the text of a
* given slide, which are stored in the same SlideListWithText
*
* @author Nick Burch
*/
public class SlidePersistAtom extends RecordAtom
{
private byte[] _header;
private static long _type = 1011l;
/** Slide reference ID. A machine readable "page id" */
private int refID;
private boolean hasShapesOtherThanPlaceholders;
/** Number of placeholder texts that will follow in the SlideListWithText */
private int numPlaceholderTexts;
/** Less useful identifier */
private int slideIdentifier;
/** Reserved fields. Who knows what they do */
private byte[] reservedFields;
public int getRefID() { return refID; }
public int getSlideIdentifier() { return slideIdentifier; }
public int getNumPlaceholderTexts() { return numPlaceholderTexts; }
public boolean getHasShapesOtherThanPlaceholders() { return hasShapesOtherThanPlaceholders; }
/* *************** record code follows ********************** */
/**
* For the SlidePersist Atom
*/
protected SlidePersistAtom(byte[] source, int start, int len) {
// Sanity Checking
if(len < 8) { len = 8; }
// Get the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Grab the reference ID
refID = (int)LittleEndian.getInt(source,start+8);
// Next up is a set of flags, but only bit 3 is used!
int flags = (int)LittleEndian.getInt(source,start+12);
if(flags == 4) {
hasShapesOtherThanPlaceholders = true;
} else {
hasShapesOtherThanPlaceholders = false;
}
// Now the number of Placeholder Texts
numPlaceholderTexts = (int)LittleEndian.getInt(source,start+16);
// Last useful one is the unique slide identifier
slideIdentifier = (int)LittleEndian.getInt(source,start+20);
// Finally you have typically 4 or 8 bytes of reserved fields,
// all zero running from 24 bytes in to the end
reservedFields = new byte[len-24];
System.arraycopy(source,start+24,reservedFields,0,reservedFields.length);
}
/**
* We are of type 1011
*/
public long getRecordType() { return _type; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
// Header - size or type unchanged
out.write(_header);
// Compute the flags part - only bit 3 is used
int flags = 0;
if(hasShapesOtherThanPlaceholders) {
flags = 4;
}
// Write out our fields
writeLittleEndian(refID,out);
writeLittleEndian(flags,out);
writeLittleEndian(numPlaceholderTexts,out);
writeLittleEndian(slideIdentifier,out);
out.write(reservedFields);
}
}

View File

@ -0,0 +1,91 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.StringUtil;
import java.io.IOException;
import java.io.OutputStream;
/**
* A TextBytesAtom (type 4008). Holds text in ascii form (unknown
* code page, for now assumed to be the default of
* org.apache.poi.util.StringUtil, which is the Excel default).
* The trailing return character is always stripped from this
*
* @author Nick Burch
*/
public class TextBytesAtom extends RecordAtom
{
private byte[] _header;
private static long _type = 4008l;
/** The bytes that make up the text */
private byte[] _text;
/** Grabs the text. Uses the default codepage */
public String getText() {
return StringUtil.getFromCompressedUnicode(_text,0,_text.length);
}
/** Updates the text in the Atom. Must be 8 bit ascii */
public void setText(byte[] b) {
// Set the text
_text = b;
// Update the size (header bytes 5-8)
LittleEndian.putInt(_header,4,_text.length);
}
/* *************** record code follows ********************** */
/**
* For the TextBytes Atom
*/
protected TextBytesAtom(byte[] source, int start, int len) {
// Sanity Checking
if(len < 8) { len = 8; }
// Get the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Grab the text
_text = new byte[len-8];
System.arraycopy(source,start+8,_text,0,len-8);
}
/**
* We are of type 4008
*/
public long getRecordType() { return _type; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
// Header - size or type unchanged
out.write(_header);
// Write out our text
out.write(_text);
}
}

View File

@ -0,0 +1,91 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.StringUtil;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
/**
* A TextCharsAtom (type 4000). Holds text in byte swapped unicode form.
* The trailing return character is always stripped from this
*
* @author Nick Burch
*/
public class TextCharsAtom extends RecordAtom
{
private byte[] _header;
private static long _type = 4000l;
/** The bytes that make up the text */
private byte[] _text;
/** Grabs the text. */
public String getText() {
return StringUtil.getFromUnicodeLE(_text);
}
/** Updates the text in the Atom. */
public void setText(String text) {
// Convert to little endian unicode
_text = new byte[text.length()*2];
StringUtil.putUnicodeLE(text,_text,0);
// Update the size (header bytes 5-8)
LittleEndian.putInt(_header,4,_text.length);
}
/* *************** record code follows ********************** */
/**
* For the TextChars Atom
*/
protected TextCharsAtom(byte[] source, int start, int len) {
// Sanity Checking
if(len < 8) { len = 8; }
// Get the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Grab the text
_text = new byte[len-8];
System.arraycopy(source,start+8,_text,0,len-8);
}
/**
* We are of type 4000
*/
public long getRecordType() { return _type; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
// Header - size or type unchanged
out.write(_header);
// Write out our text
out.write(_text);
}
}

View File

@ -0,0 +1,91 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
/**
* A TextHeaderAtom (type 3999). Holds information on what kind of
* text is contained in the TextBytesAtom / TextCharsAtom that follows
* straight after
*
* @author Nick Burch
*/
public class TextHeaderAtom extends RecordAtom
{
private byte[] _header;
private static long _type = 3999l;
public static final int TITLE_TYPE = 0;
public static final int BODY_TYPE = 1;
public static final int NOTES_TYPE = 2;
public static final int OTHER_TYPE = 4;
public static final int CENTRE_BODY_TYPE = 5;
public static final int CENTER_TITLE_TYPE = 6;
public static final int HALF_BODY_TYPE = 7;
public static final int QUARTER_BODY_TYPE = 8;
/** The kind of text it is */
private int textType;
public int getTextType() { return textType; }
public void setTextType(int type) { textType = type; }
/* *************** record code follows ********************** */
/**
* For the TextHeader Atom
*/
protected TextHeaderAtom(byte[] source, int start, int len) {
// Sanity Checking - we're always 12 bytes long
if(len < 12) {
len = 12;
if(source.length - start < 12) {
throw new RuntimeException("Not enough data to form a TextHeaderAtom (always 12 bytes long) - found " + (source.length - start));
}
}
// Get the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Grab the type
textType = (int)LittleEndian.getInt(source,start+8);
}
/**
* We are of type 3999
*/
public long getRecordType() { return _type; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
// Header - size or type unchanged
out.write(_header);
// Write out our type
writeLittleEndian(textType,out);
}
}

View File

@ -0,0 +1,64 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
/**
* If we come across a record we don't know about, we create one of
* these. It allows us to keep track of what it contains, so we can
* write it back out to disk unchanged
*
* @author Nick Burch
*/
public class UnknownRecordPlaceholder extends RecordAtom
{
private byte[] _contents;
private long _type;
/**
* Create a new holder for a record we don't grok
*/
protected UnknownRecordPlaceholder(byte[] source, int start, int len) {
// Sanity Checking - including whole header, so treat
// length as based of 0, not 8 (including header size based)
if(len < 0) { len = 0; }
// Treat as an atom, grab and hold everything
_contents = new byte[len];
System.arraycopy(source,start,_contents,0,len);
_type = LittleEndian.getUShort(_contents,2);
}
/**
* Return the value we were given at creation
*/
public long getRecordType() { return _type; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
out.write(_contents);
}
}

View File

@ -0,0 +1,141 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
/**
* A UserEdit Atom (type 4085). Holds information which bits of the file
* were last used by powerpoint, the version of powerpoint last used etc.
*
* ** WARNING ** stores byte offsets from the start of the PPT stream to
* other records! If you change the size of any elements before one of
* these, you'll need to update the offsets!
*
* @author Nick Burch
*/
public class UserEditAtom extends PositionDependentRecordAtom
{
public static final int LAST_VIEW_NONE = 0;
public static final int LAST_VIEW_SLIDE_VIEW = 1;
public static final int LAST_VIEW_OUTLINE_VIEW = 2;
public static final int LAST_VIEW_NOTES = 3;
private byte[] _header;
private static long _type = 4085l;
private byte[] reserved;
private int lastViewedSlideID;
private int pptVersion;
private int lastUserEditAtomOffset;
private int persistPointersOffset;
private int docPersistRef;
private int maxPersistWritten;
private short lastViewType;
// Somewhat user facing getters
public int getLastViewedSlideID() { return lastViewedSlideID; }
public short getLastViewType() { return lastViewType; }
// Scary internal getters
public int getLastUserEditAtomOffset() { return lastUserEditAtomOffset; }
public int getPersistPointersOffset() { return persistPointersOffset; }
public int getDocPersistRef() { return docPersistRef; }
public int getMaxPersistWritten() { return maxPersistWritten; }
// More scary internal setters
public void setLastUserEditAtomOffset(int offset) { lastUserEditAtomOffset = offset; }
public void setPersistPointersOffset(int offset) { persistPointersOffset = offset; }
/* *************** record code follows ********************** */
/**
* For the UserEdit Atom
*/
protected UserEditAtom(byte[] source, int start, int len) {
// Sanity Checking
if(len < 34) { len = 34; }
// Store where we currently live on disk
myLastOnDiskOffset = start;
// Get the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Get the last viewed slide ID
lastViewedSlideID = (int)LittleEndian.getInt(source,start+0+8);
// Get the PPT version
pptVersion = (int)LittleEndian.getInt(source,start+4+8);
// Get the offset to the previous incremental save's UserEditAtom
// This will be the byte offset on disk where the previous one
// starts, or 0 if this is the first one
lastUserEditAtomOffset = (int)LittleEndian.getInt(source,start+8+8);
// Get the offset to the persist pointers
// This will be the byte offset on disk where the preceding
// PersistPtrFullBlock or PersistPtrIncrementalBlock starts
persistPointersOffset = (int)LittleEndian.getInt(source,start+12+8);
// Get the persist reference for the document persist object
// Normally seems to be 1
docPersistRef = (int)LittleEndian.getInt(source,start+16+8);
// Maximum number of persist objects written
maxPersistWritten = (int)LittleEndian.getInt(source,start+20+8);
// Last view type
lastViewType = (short)LittleEndian.getShort(source,start+24+8);
// There might be a few more bytes, which are a reserved field
reserved = new byte[len-26-8];
System.arraycopy(source,start+26+8,reserved,0,reserved.length);
}
/**
* We are of type 4085
*/
public long getRecordType() { return _type; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
// Header
out.write(_header);
// Write out the values
writeLittleEndian(lastViewedSlideID,out);
writeLittleEndian(pptVersion,out);
writeLittleEndian(lastUserEditAtomOffset,out);
writeLittleEndian(persistPointersOffset,out);
writeLittleEndian(docPersistRef,out);
writeLittleEndian(maxPersistWritten,out);
writeLittleEndian(lastViewType,out);
// Reserved fields
out.write(reserved);
}
}

View File

@ -0,0 +1,281 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.usermodel;
import java.util.*;
import java.io.*;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.hslf.*;
import org.apache.poi.hslf.model.*;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.SlideAtom;
import org.apache.poi.hslf.record.SlideListWithText;
import org.apache.poi.hslf.record.SlideListWithText.*;
/**
* This class is a friendly wrapper on top of the more scary HSLFSlideShow.
*
* TODO:
* - figure out how to match notes to their correct sheet
* (will involve understanding DocSlideList and DocNotesList)
* - handle Slide creation cleaner
*
* @author Nick Burch
*/
public class SlideShow
{
// What we're based on
private HSLFSlideShow _hslfSlideShow;
// Low level contents, as taken from HSLFSlideShow
private Record[] _records;
// Friendly objects for people to deal with
private Slide[] _slides;
private Notes[] _notes;
// private MetaSheets[] _msheets;
/**
* right now this function takes one parameter: a ppt file, and outputs
* the text it can find for it
*/
public static void main(String args[]) throws IOException
{
HSLFSlideShow basefoo = new HSLFSlideShow(args[0]);
SlideShow foo = new SlideShow(basefoo);
Slide[] slides = foo.getSlides();
for(int i=0; i<slides.length; i++) {
Slide slide = slides[i];
System.out.println("*Slide " + slide.getSheetNumber() + ":");
TextRun[] runs = slide.getTextRuns();
for(int j=0; j<runs.length; j++) {
TextRun run = runs[j];
System.out.println(" * Text run " + run.getRunType());
System.out.println("\n" + run.getText() + "\n");
}
}
}
/**
* Constructs a Powerpoint document from the underlying
* HSLFSlideShow object. Finds the model stuff from this
*
* @param hslfSlideShow the HSLFSlideShow to base on
*/
public SlideShow(HSLFSlideShow hslfSlideShow) throws IOException
{
// Get useful things from our base slideshow
_hslfSlideShow = hslfSlideShow;
_records = _hslfSlideShow.getRecords();
byte[] _docstream = _hslfSlideShow.getUnderlyingBytes();
// For holding the Slide Records
Vector slidesV = new Vector(10);
// For holding the Notes Records
Vector notesV = new Vector(10);
// For holding the Meta Sheet Records
Vector metaSheetsV = new Vector(10);
// For holding Document Records
Vector documentsV = new Vector(10);
// For holding SlideListWithText Records
Vector slwtV = new Vector(10);
// Look for Notes, Slides and Documents
for(int i=0; i<_records.length; i++) {
if(_records[i] instanceof org.apache.poi.hslf.record.Notes) {
notesV.add(_records[i]);
}
if(_records[i] instanceof org.apache.poi.hslf.record.Slide) {
slidesV.add(_records[i]);
}
if(_records[i].getRecordType() == 1000l) {
documentsV.add(_records[i]);
}
}
// Also look for SlideListWithTexts in Documents
//
// Need to get the SlideAtomsSets for all of these. Then, query the
// SlidePersistAtom, and group stuff together between SLWT blocks
// based on the refID/slideID. Finally, build up a list of all the
// SlideAtomsSets for a given refID / slideID, and pass them on to
// the Slide when creating
//
// If a notes sheet exists, can normally match the Notes sheet ID
// to the slide ID in the SlidePersistAtom. Since there isn't always,
// and we can't find the ID in the slide, just order on the slide ID,
// and hand off to the Slides in turn.
// (Based on output from dev.SLWTTextListing and dev.SlideAndNotesAtomListing)
//
// There is often duplicate text, especially for the first few
// Slides. Currently, it's up to the Slide model code to detect
// and ignore those
for(int i=0; i<documentsV.size(); i++) {
Record docRecord = (Record)documentsV.get(i);
Record[] docChildren = docRecord.getChildRecords();
for(int j=0; j<docChildren.length; j++) {
if(docChildren[j] instanceof SlideListWithText) {
//System.out.println("Found SLWT in document " + i);
//System.out.println(" Has " + docChildren[j].getChildRecords().length + " children");
slwtV.add(docChildren[j]);
}
}
}
// For now, grab out all the sets of Atoms in the SlideListWithText's
// Only store those which aren't empty
Vector setsV = new Vector();
for(int i=0; i<slwtV.size(); i++) {
SlideListWithText slwt = (SlideListWithText)slwtV.get(i);
SlideAtomsSet[] thisSets = slwt.getSlideAtomsSets();
for(int j=0; j<thisSets.length; j++) {
setsV.add(thisSets[j]);
}
}
// Now, sort the SlideAtomSets together into groups for the same slide ID,
// and order them by the slide ID
// Find the unique IDs
HashSet uniqueSlideIDs = new HashSet();
for(int i=0; i<setsV.size(); i++) {
SlideAtomsSet thisSet = (SlideAtomsSet)setsV.get(i);
int id = thisSet.getSlidePersistAtom().getSlideIdentifier();
Integer idI = new Integer(id);
if(! uniqueSlideIDs.contains(idI) ) {
uniqueSlideIDs.add(idI);
}
}
int[] slideIDs = new int[uniqueSlideIDs.size()];
int pos = 0;
for(Iterator getIDs = uniqueSlideIDs.iterator(); getIDs.hasNext(); pos++) {
Integer id = (Integer)getIDs.next();
slideIDs[pos] = id.intValue();
}
// Sort
Arrays.sort(slideIDs);
// Group
Vector[] sortedSetsV = new Vector[slideIDs.length];
for(int i=0; i<setsV.size(); i++) {
SlideAtomsSet thisSet = (SlideAtomsSet)setsV.get(i);
int id = thisSet.getSlidePersistAtom().getSlideIdentifier();
int arrayPos = -1;
for(int j=0; j<slideIDs.length; j++) {
if(slideIDs[j] == id) { arrayPos = j; }
}
if(sortedSetsV[arrayPos] == null) { sortedSetsV[arrayPos] = new Vector(); }
sortedSetsV[arrayPos].add(thisSet);
}
// ******************* Do the real model layer creation ****************
// Create our Notes
// (Need to create first, as passed to the Slides)
_notes = new Notes[notesV.size()];
for(int i=0; i<_notes.length; i++) {
_notes[i] = new Notes((org.apache.poi.hslf.record.Notes)notesV.get(i));
}
// Create our Slides
_slides = new Slide[slidesV.size()];
for(int i=0; i<_slides.length; i++) {
// Grab the slide Record
org.apache.poi.hslf.record.Slide slideRecord = (org.apache.poi.hslf.record.Slide)slidesV.get(i);
// Do they have a Notes?
Notes thisNotes = null;
// Find their SlideAtom, and use this to check for a Notes
Record[] slideRecordChildren = slideRecord.getChildRecords();
for(int j=0; j<slideRecordChildren.length; j++) {
if(slideRecordChildren[j] instanceof SlideAtom) {
SlideAtom sa = (SlideAtom)slideRecordChildren[j];
int notesID = sa.getNotesID();
if(notesID != 0) {
for(int k=0; k<_notes.length; k++) {
if(_notes[k].getSheetNumber() == notesID) {
thisNotes = _notes[k];
}
}
}
}
}
// Grab the (hopefully) corresponding block of Atoms
SlideAtomsSet[] sets;
if(sortedSetsV.length > i) {
Vector thisSetsV = sortedSetsV[i];
sets = new SlideAtomsSet[thisSetsV.size()];
for(int j=0; j<sets.length; j++) {
sets[j] = (SlideAtomsSet)thisSetsV.get(j);
}
//System.out.println("For slide " + i + ", found " + sets.length + " Sets of text");
} else {
// Didn't find enough SlideAtomSets to give any to this sheet
sets = new SlideAtomsSet[0];
}
// Create the Slide model layer
_slides[i] = new Slide(slideRecord,thisNotes,sets);
}
}
/**
* Writes out the slideshow file the is represented by an instance of
* this class
* @param out The OutputStream to write to.
* @throws IOException If there is an unexpected IOException from the passed
* in OutputStream
*/
public void write(OutputStream out) throws IOException {
_hslfSlideShow.write(out);
}
// Accesser methods follow
/**
* Returns an array of all the normal Slides found in the slideshow
*/
public Slide[] getSlides() { return _slides; }
/**
* Returns an array of all the normal Notes found in the slideshow
*/
public Notes[] getNotes() { return _notes; }
/**
* Returns an array of all the meta Sheets (master sheets etc)
* found in the slideshow
*/
//public MetaSheet[] getMetaSheets() { return _msheets; }
}

View File

@ -0,0 +1,42 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.util;
import java.io.ByteArrayOutputStream;
/**
* This class doesn't work yet, but is here to show the idea of a
* ByteArrayOutputStream where you can track how many bytes you've
* already written, and go back and write over a previous part of the stream
*
* @author Nick Burch
*/
public class MutableByteArrayOutputStream extends ByteArrayOutputStream
{
/** Return how many bytes we've stuffed in so far */
public int getBytesWritten() { return -1; }
/** Write some bytes to the array */
public void write(byte[] b) {}
public void write(int b) {}
/** Write some bytes to an earlier bit of the array */
public void overwrite(byte[] b, int startPos) {}
}

View File

@ -0,0 +1,73 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf;
import junit.framework.TestCase;
import java.io.*;
import org.apache.poi.poifs.filesystem.*;
/**
* Tests that HSLFSlideShow writes the powerpoint bit of data back out
* correctly. Currently, that means being the same as what it read in
*
* @author Nick Burch (nick at torchbox dot com)
*/
public class TestReWrite extends TestCase {
// HSLFSlideShow primed on the test data
private HSLFSlideShow ss;
// POIFS primed on the test data
private POIFSFileSystem pfs;
public TestReWrite() throws Exception {
String dirname = System.getProperty("HSLF.testdata.path");
String filename = dirname + "/basic_test_ppt_file.ppt";
FileInputStream fis = new FileInputStream(filename);
pfs = new POIFSFileSystem(fis);
ss = new HSLFSlideShow(pfs);
}
public void testWritesOutTheSame() throws Exception {
// Write out to a byte array
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ss.write(baos);
// Build an input stream of it
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
// Use POIFS to query that lot
POIFSFileSystem npfs = new POIFSFileSystem(bais);
// Check that the "PowerPoint Document" sections have the same size
DocumentEntry oProps = (DocumentEntry)pfs.getRoot().getEntry("PowerPoint Document");
DocumentEntry nProps = (DocumentEntry)npfs.getRoot().getEntry("PowerPoint Document");
assertEquals(oProps.getSize(),nProps.getSize());
// Check that they contain the same data
byte[] _oData = new byte[oProps.getSize()];
byte[] _nData = new byte[nProps.getSize()];
pfs.createDocumentInputStream("PowerPoint Document").read(_oData);
npfs.createDocumentInputStream("PowerPoint Document").read(_nData);
for(int i=0; i<_oData.length; i++) {
System.out.println(i + "\t" + Integer.toHexString(i));
assertEquals(_oData[i], _nData[i]);
}
}
}

View File

@ -0,0 +1,100 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf;
import junit.framework.TestCase;
import java.io.*;
import java.util.*;
import org.apache.poi.hslf.record.*;
import org.apache.poi.poifs.filesystem.*;
/**
* Tests that HSLFSlideShow writes the powerpoint bit of data back out
* in a sane manner - i.e. records end up in the right place
*
* @author Nick Burch (nick at torchbox dot com)
*/
public class TestReWriteSanity extends TestCase {
// HSLFSlideShow primed on the test data
private HSLFSlideShow ss;
// POIFS primed on the test data
private POIFSFileSystem pfs;
public TestReWriteSanity() throws Exception {
String dirname = System.getProperty("HSLF.testdata.path");
String filename = dirname + "/basic_test_ppt_file.ppt";
FileInputStream fis = new FileInputStream(filename);
pfs = new POIFSFileSystem(fis);
ss = new HSLFSlideShow(pfs);
}
public void testUserEditAtomsRight() throws Exception {
// Write out to a byte array
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ss.write(baos);
// Build an input stream of it
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
// Create a new one from that
HSLFSlideShow wss = new HSLFSlideShow(bais);
// Find the location of the PersistPtrIncrementalBlocks and
// UserEditAtoms
Record[] r = wss.getRecords();
Hashtable pp = new Hashtable();
Hashtable ue = new Hashtable();
ue.put(new Integer(0),new Integer(0)); // Will show 0 if first
int pos = 0;
int lastUEPos = -1;
for(int i=0; i<r.length; i++) {
if(r[i] instanceof PersistPtrHolder) {
pp.put(new Integer(pos), r[i]);
}
if(r[i] instanceof UserEditAtom) {
ue.put(new Integer(pos), r[i]);
lastUEPos = pos;
}
ByteArrayOutputStream bc = new ByteArrayOutputStream();
r[i].writeOut(bc);
pos += bc.size();
}
// Check that the UserEditAtom's point to right stuff
for(int i=0; i<r.length; i++) {
if(r[i] instanceof UserEditAtom) {
UserEditAtom uea = (UserEditAtom)r[i];
int luPos = uea.getLastUserEditAtomOffset();
int ppPos = uea.getPersistPointersOffset();
assertTrue(pp.containsKey(new Integer(ppPos)));
assertTrue(ue.containsKey(new Integer(luPos)));
}
}
// Check that the CurrentUserAtom points to the right UserEditAtom
CurrentUserAtom cua = wss.getCurrentUserAtom();
int listedUEPos = (int)cua.getCurrentEditOffset();
assertEquals(lastUEPos,listedUEPos);
}
}

View File

@ -0,0 +1,86 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf;
import junit.framework.TestCase;
import org.apache.poi.hslf.record.*;
/**
* Tests that HSLFSlideShow returns the right numbers of key records when
* it parses the test file
*
* @author Nick Burch (nick at torchbox dot com)
*/
public class TestRecordCounts extends TestCase {
// HSLFSlideShow primed on the test data
private HSLFSlideShow ss;
public TestRecordCounts() throws Exception {
String dirname = System.getProperty("HSLF.testdata.path");
String filename = dirname + "/basic_test_ppt_file.ppt";
ss = new HSLFSlideShow(filename);
}
public void testSheetsCount() throws Exception {
// Top level
Record[] r = ss.getRecords();
int count = 0;
for(int i=0; i<r.length; i++) {
if(r[i] instanceof Slide) {
count++;
}
}
// Currently still sees the Master Sheet, but might not in the future
assertEquals(3,count);
}
public void testNotesCount() throws Exception {
// Top level
Record[] r = ss.getRecords();
int count = 0;
for(int i=0; i<r.length; i++) {
if(r[i] instanceof Notes &&
r[i].getRecordType() == 1008l) {
count++;
}
}
// Two real sheets, plus the master sheet
assertEquals(3,count);
}
public void testSlideListWithTextCount() throws Exception {
// Second level
Record[] rt = ss.getRecords();
Record[] r = rt[0].getChildRecords();
int count = 0;
for(int i=0; i<r.length; i++) {
if(r[i] instanceof SlideListWithText &&
r[i].getRecordType() == 4080l) {
count++;
}
}
// Two real sheets, plus the master sheet
assertEquals(3,count);
}
}

View File

@ -0,0 +1,67 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.extractor;
import junit.framework.TestCase;
/**
* Tests that the extractor correctly gets the text out of our sample file
*
* @author Nick Burch (nick at torchbox dot com)
*/
public class TextExtractor extends TestCase {
// Extractor primed on the test data
private PowerPointExtractor ppe;
public TextExtractor() throws Exception {
String dirname = System.getProperty("HSLF.testdata.path");
String filename = dirname + "/basic_test_ppt_file.ppt";
ppe = new PowerPointExtractor(filename);
}
public void testReadSheetText() throws Exception {
String sheetText = ppe.getText();
String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n";
assertEquals(expectText.length(),sheetText.length());
char[] st = sheetText.toCharArray();
char[] et = expectText.toCharArray();
for(int i=0; i<et.length; i++) {
System.out.println(i + "\t" + et[i] + " " + st[i]);
assertEquals(et[i],st[i]);
}
assertEquals(expectText,sheetText);
}
public void testReadNoteText() throws Exception {
String notesText = ppe.getNotes();
String expectText = "These are the notes for page 1\nThese are the notes on page two, again lacking formatting\n";
assertEquals(expectText.length(),notesText.length());
char[] nt = notesText.toCharArray();
char[] et = expectText.toCharArray();
for(int i=0; i<et.length; i++) {
System.out.println(i + "\t" + et[i] + " " + nt[i]);
assertEquals(et[i],nt[i]);
}
assertEquals(expectText,notesText);
}
}

View File

@ -0,0 +1,60 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import junit.framework.TestCase;
import java.io.ByteArrayOutputStream;
/**
* Tests that SlidePersistAtom works properly
*
* @author Nick Burch (nick at torchbox dot com)
*/
public class TestSlidePersistAtom extends TestCase {
// From a real file
private byte[] data_a = new byte[] { 0, 0, 0xF3-256, 3, 0x14, 0, 0, 0,
4, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0 };
public void testRecordType() throws Exception {
SlidePersistAtom spa = new SlidePersistAtom(data_a, 0, data_a.length);
assertEquals(1011l, spa.getRecordType());
}
public void testFlags() throws Exception {
SlidePersistAtom spa = new SlidePersistAtom(data_a, 0, data_a.length);
assertEquals(4, spa.getRefID() );
assertEquals(true, spa.getHasShapesOtherThanPlaceholders() );
assertEquals(2, spa.getNumPlaceholderTexts() );
assertEquals(256, spa.getSlideIdentifier());
}
public void testWrite() throws Exception {
SlidePersistAtom spa = new SlidePersistAtom(data_a, 0, data_a.length);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
spa.writeOut(baos);
byte[] b = baos.toByteArray();
assertEquals(data_a.length, b.length);
for(int i=0; i<data_a.length; i++) {
assertEquals(data_a[i],b[i]);
}
}
}

View File

@ -0,0 +1,83 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import junit.framework.TestCase;
import java.io.ByteArrayOutputStream;
/**
* Tests that TextBytesAtom works properly
*
* @author Nick Burch (nick at torchbox dot com)
*/
public class TestTextBytesAtom extends TestCase {
// From a real file
private byte[] data = new byte[] { 0, 0, 0xA8-256, 0x0f, 0x1c, 0, 0, 0,
0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68,
0x65, 0x20, 0x74, 0x69, 0x74, 0x6C, 0x65, 0x20, 0x6F, 0x6E,
0x20, 0x70, 0x61, 0x67, 0x65, 0x20, 0x32 };
private String data_text = "This is the title on page 2";
private byte[] alt_data = new byte[] { 0, 0, 0xA8-256, 0x0F, 0x14, 0, 0, 0,
0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20,
0x74, 0x65, 0x73, 0x74, 0x20, 0x74, 0x69, 0x74, 0x6C, 0x65 };
private String alt_text = "This is a test title";
public void testRecordType() throws Exception {
TextBytesAtom tba = new TextBytesAtom(data,0,data.length);
assertEquals(4008l, tba.getRecordType());
}
public void testTextA() throws Exception {
TextBytesAtom tba = new TextBytesAtom(data,0,data.length);
assertEquals(data_text, tba.getText());
}
public void testTextB() throws Exception {
TextBytesAtom tba = new TextBytesAtom(alt_data,0,alt_data.length);
assertEquals(alt_text, tba.getText());
}
public void testChangeText() throws Exception {
TextBytesAtom tba = new TextBytesAtom(data,0,data.length);
tba.setText(alt_text.getBytes("ISO-8859-1"));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
tba.writeOut(baos);
byte[] b = baos.toByteArray();
// Compare the header and the text
assertEquals(alt_data.length, b.length);
for(int i=0; i<alt_data.length; i++) {
assertEquals(alt_data[i],b[i]);
}
}
public void testWrite() throws Exception {
TextBytesAtom tba = new TextBytesAtom(data,0,data.length);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
tba.writeOut(baos);
byte[] b = baos.toByteArray();
assertEquals(data.length, b.length);
for(int i=0; i<data.length; i++) {
assertEquals(data[i],b[i]);
}
}
}

View File

@ -0,0 +1,80 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import junit.framework.TestCase;
import java.io.ByteArrayOutputStream;
/**
* Tests that TextCharsAtom works properly
*
* @author Nick Burch (nick at torchbox dot com)
*/
public class TestTextCharsAtom extends TestCase {
// From a real file
private byte[] data = new byte[] { 0, 0, 0xA0-256, 0x0f, 0x08, 0, 0, 0,
0x54, 0x00, 0x68, 0x00, 0x69, 0x00, 0x73, 0x00 };
private String data_text = "This";
private byte[] alt_data = new byte[] { 0, 0, 0xA0-256, 0x0F, 0x0a, 0, 0, 0,
0x54, 0x00, 0x68, 0x00, 0x69, 0x00, 0x73, 0x00, 0xa3-256, 0x01 };
private String alt_text = "This\u01A3";
public void testRecordType() throws Exception {
TextCharsAtom tca = new TextCharsAtom(data,0,data.length);
assertEquals(4000l, tca.getRecordType());
}
public void testTextA() throws Exception {
TextCharsAtom tca = new TextCharsAtom(data,0,data.length);
assertEquals(data_text, tca.getText());
}
public void testTextB() throws Exception {
TextCharsAtom tca = new TextCharsAtom(alt_data,0,alt_data.length);
assertEquals(alt_text, tca.getText());
}
public void testChangeText() throws Exception {
TextCharsAtom tca = new TextCharsAtom(data,0,data.length);
tca.setText(alt_text);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
tca.writeOut(baos);
byte[] b = baos.toByteArray();
// Compare the header and the text
assertEquals(alt_data.length, b.length);
for(int i=0; i<alt_data.length; i++) {
assertEquals(alt_data[i],b[i]);
}
}
public void testWrite() throws Exception {
TextCharsAtom tca = new TextCharsAtom(data,0,data.length);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
tca.writeOut(baos);
byte[] b = baos.toByteArray();
assertEquals(data.length, b.length);
for(int i=0; i<data.length; i++) {
assertEquals(data[i],b[i]);
}
}
}

View File

@ -0,0 +1,61 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import junit.framework.TestCase;
import java.io.ByteArrayOutputStream;
/**
* Tests that TextHeaderAtom works properly
*
* @author Nick Burch (nick at torchbox dot com)
*/
public class TestTextHeaderAtom extends TestCase {
// From a real file
private byte[] notes_data = new byte[] { 0, 0, 0x9f-256, 0x0f, 4, 0, 0, 0, 2, 0, 0, 0};
private byte[] title_data = new byte[] { 0, 0, 0x9f-256, 0x0f, 4, 0, 0, 0, 0, 0, 0, 0 };
private byte[] body_data = new byte[] { 0, 0, 0x9f-256, 0x0f, 4, 0, 0, 0, 1, 0, 0, 0 };
public void testRecordType() throws Exception {
TextHeaderAtom tha = new TextHeaderAtom(notes_data,0,12);
assertEquals(3999l, tha.getRecordType());
}
public void testTypes() throws Exception {
TextHeaderAtom n_tha = new TextHeaderAtom(notes_data,0,12);
TextHeaderAtom t_tha = new TextHeaderAtom(title_data,0,12);
TextHeaderAtom b_tha = new TextHeaderAtom(body_data,0,12);
assertEquals(TextHeaderAtom.NOTES_TYPE, n_tha.getTextType());
assertEquals(TextHeaderAtom.TITLE_TYPE, t_tha.getTextType());
assertEquals(TextHeaderAtom.BODY_TYPE, b_tha.getTextType());
}
public void testWrite() throws Exception {
TextHeaderAtom tha = new TextHeaderAtom(notes_data,0,12);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
tha.writeOut(baos);
byte[] b = baos.toByteArray();
assertEquals(notes_data.length, b.length);
for(int i=0; i<notes_data.length; i++) {
assertEquals(notes_data[i],b[i]);
}
}
}

View File

@ -0,0 +1,54 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.usermodel;
import junit.framework.TestCase;
import org.apache.poi.hslf.*;
import org.apache.poi.hslf.model.*;
/**
* Tests that SlideShow returns the right number of Sheets and MetaSheets
*
* @author Nick Burch (nick at torchbox dot com)
*/
public class TestCounts extends TestCase {
// SlideShow primed on the test data
private SlideShow ss;
public TestCounts() throws Exception {
String dirname = System.getProperty("HSLF.testdata.path");
String filename = dirname + "/basic_test_ppt_file.ppt";
HSLFSlideShow hss = new HSLFSlideShow(filename);
ss = new SlideShow(hss);
}
public void testSheetsCount() throws Exception {
Slide[] slides = ss.getSlides();
// Two sheets, plus some crap related to the master sheet
assertEquals(3, slides.length);
}
public void testNotesCount() throws Exception {
Notes[] notes = ss.getNotes();
// Two sheets -> two notes, plus the notes on the slide master
assertEquals(3, notes.length);
}
}

View File

@ -0,0 +1,61 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.usermodel;
import junit.framework.TestCase;
import org.apache.poi.hslf.*;
import org.apache.poi.hslf.model.*;
/**
* Tests that SlideShow returns MetaSheets which have the right text in them
*
* @author Nick Burch (nick at torchbox dot com)
*/
public class TestNotesText extends TestCase {
// SlideShow primed on the test data
private SlideShow ss;
public TestNotesText() throws Exception {
String dirname = System.getProperty("HSLF.testdata.path");
String filename = dirname + "/basic_test_ppt_file.ppt";
HSLFSlideShow hss = new HSLFSlideShow(filename);
ss = new SlideShow(hss);
}
public void testNotesOne() throws Exception {
Notes notes = ss.getNotes()[1];
String[] expectText = new String[] {"These are the notes for page 1"};
assertEquals(expectText.length, notes.getTextRuns().length);
for(int i=0; i<expectText.length; i++) {
assertEquals(expectText[i], notes.getTextRuns()[i].getText());
}
}
public void testNotesTwo() throws Exception {
Notes notes = ss.getNotes()[2];
String[] expectText = new String[] {"These are the notes on page two, again lacking formatting"};
assertEquals(expectText.length, notes.getTextRuns().length);
for(int i=0; i<expectText.length; i++) {
assertEquals(expectText[i], notes.getTextRuns()[i].getText());
}
}
}

View File

@ -0,0 +1,61 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.usermodel;
import junit.framework.TestCase;
import org.apache.poi.hslf.*;
import org.apache.poi.hslf.model.*;
/**
* Tests that SlideShow returns Sheets which have the right text in them
*
* @author Nick Burch (nick at torchbox dot com)
*/
public class TestSheetText extends TestCase {
// SlideShow primed on the test data
private SlideShow ss;
public TestSheetText() throws Exception {
String dirname = System.getProperty("HSLF.testdata.path");
String filename = dirname + "/basic_test_ppt_file.ppt";
HSLFSlideShow hss = new HSLFSlideShow(filename);
ss = new SlideShow(hss);
}
public void testSheetOne() throws Exception {
Sheet slideOne = ss.getSlides()[0];
String[] expectText = new String[] {"This is a test title","This is a test subtitle\nThis is on page 1"};
assertEquals(expectText.length, slideOne.getTextRuns().length);
for(int i=0; i<expectText.length; i++) {
assertEquals(expectText[i], slideOne.getTextRuns()[i].getText());
}
}
public void testSheetTwo() throws Exception {
Sheet slideTwo = ss.getSlides()[1];
String[] expectText = new String[] {"This is the title on page 2","This is page two\nIt has several blocks of text\nNone of them have formatting"};
assertEquals(expectText.length, slideTwo.getTextRuns().length);
for(int i=0; i<expectText.length; i++) {
assertEquals(expectText[i], slideTwo.getTextRuns()[i].getText());
}
}
}