diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index c97d6a8bf0..9a0cdb5cca 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -50,8 +50,10 @@ import org.apache.poi.hwpf.usermodel.*; public class HWPFDocument extends POIDocument // implements Cloneable { - /** The FIB*/ + /** The FIB */ protected FileInformationBlock _fib; + /** And for making sense of CP lengths in the FIB */ + protected CPSplitCalculator _cpSplit; /** main document stream buffer*/ protected byte[] _mainStream; @@ -177,6 +179,7 @@ public class HWPFDocument extends POIDocument // Create our FIB, and check for the doc being encrypted _fib = new FileInformationBlock(_mainStream); + _cpSplit = new CPSplitCalculator(_fib); if(_fib.isFEncrypted()) { throw new EncryptedDocumentException("Cannot process encrypted word files!"); } @@ -290,14 +293,54 @@ public class HWPFDocument extends POIDocument { return _dop; } + + /** + * Returns the range that covers all text in the + * file, including main text, footnotes, headers + * and comments + */ + public Range getOverallRange() { + // hack to get the ending cp of the document, Have to revisit this. + java.util.List text = _tpt.getTextPieces(); + PropertyNode p = (PropertyNode)text.get(text.size() - 1); - public Range getRange() - { - // hack to get the ending cp of the document, Have to revisit this. - java.util.List text = _tpt.getTextPieces(); - PropertyNode p = (PropertyNode)text.get(text.size() - 1); + return new Range(0, p.getEnd(), this); + } - return new Range(0, p.getEnd(), this); + /** + * Returns the range which covers the whole of the + * document, but excludes any headers and footers. + */ + public Range getRange() { + return new Range( + _cpSplit.getMainDocumentStart(), + _cpSplit.getMainDocumentEnd(), + this + ); + } + + /** + * Returns the range which covers all the Footnotes. + */ + public Range getFootnoteRange() { + return new Range( + _cpSplit.getFootnoteStart(), + _cpSplit.getFootnoteEnd(), + this + ); + } + + /** + * Returns the range which covers all "Header Stories". + * A header story contains a header, footer, end note + * separators and footnote separators. + */ + public Range getHeaderStoryRange() { + return new Range( + _cpSplit.getHeaderStoryStart(), + _cpSplit.getHeaderStoryEnd(), + this + ); } /** diff --git a/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java b/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java new file mode 100644 index 0000000000..8d14c3613a --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java @@ -0,0 +1,52 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hwpf.dev; + +import java.io.FileInputStream; + +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.model.FileInformationBlock; + +/** + * Used by developers to list out key information on a + * HWPF file. End users will probably never need to + * use this program. + */ +public class HWPFLister { + private HWPFDocument doc; + public HWPFLister(HWPFDocument doc) { + this.doc = doc; + } + + public static void main(String[] args) throws Exception { + if(args.length == 0) { + System.err.println("Use:"); + System.err.println(" HWPFLister "); + System.exit(1); + } + + HWPFLister l = new HWPFLister( + new HWPFDocument(new FileInputStream(args[0])) + ); + l.dumpFIB(); + } + + public void dumpFIB() throws Exception { + FileInformationBlock fib = doc.getFileInformationBlock(); + System.out.println(fib.toString()); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CPSplitCalculator.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CPSplitCalculator.java new file mode 100644 index 0000000000..774a07b119 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CPSplitCalculator.java @@ -0,0 +1,79 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hwpf.model; + +import org.apache.poi.hwpf.HWPFDocument; + +/** + * Helper class for {@link HWPFDocument}, which figures out + * where different kinds of text can be found within the + * overall CP splurge. + */ +public class CPSplitCalculator { + private FileInformationBlock fib; + public CPSplitCalculator(FileInformationBlock fib) { + this.fib = fib; + } + + /** + * Where the main document text starts. Always 0. + */ + public int getMainDocumentStart() { + return 0; + } + /** + * Where the main document text ends. + * Given by FibRgLw97.ccpText + */ + public int getMainDocumentEnd() { + return fib.getCcpText(); + } + + /** + * Where the Footnotes text starts. + * Follows straight on from the main text. + */ + public int getFootnoteStart() { + return getMainDocumentEnd(); + } + /** + * Where the Footnotes text ends. + * Length comes from FibRgLw97.ccpFtn + */ + public int getFootnoteEnd() { + throw new IllegalStateException("Not yet finished!"); +// return getFootnoteStart() + +// ???; + } + + /** + * Where the "Header Story" text starts. + * Follows straight on from the footnotes. + */ + public int getHeaderStoryStart() { + return getFootnoteEnd(); + } + /** + * Where the "Header Story" text ends. + * Length comes from FibRgLw97.ccpHdd + */ + public int getHeaderStoryEnd() { + throw new IllegalStateException("Not yet finished!"); +// return getHeaderStoryStart() + +// ???; + } +}