From 40b5fb8af8a468a8f5583a812ccf3e7317eb9d8a Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 9 Apr 2008 12:46:42 +0000 Subject: [PATCH] Provide a common ole2 implementation of POITextExtractor, which gives access to the document metadata git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@646312 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/POIOLE2TextExtractor.java | 53 +++++++++++++++++++ .../poi/hssf/extractor/ExcelExtractor.java | 4 +- .../hdgf/extractor/VisioTextExtractor.java | 4 +- .../hslf/extractor/PowerPointExtractor.java | 4 +- .../poi/hwpf/extractor/WordExtractor.java | 4 +- 5 files changed, 61 insertions(+), 8 deletions(-) create mode 100644 src/java/org/apache/poi/POIOLE2TextExtractor.java diff --git a/src/java/org/apache/poi/POIOLE2TextExtractor.java b/src/java/org/apache/poi/POIOLE2TextExtractor.java new file mode 100644 index 0000000000..f5aee4cc6d --- /dev/null +++ b/src/java/org/apache/poi/POIOLE2TextExtractor.java @@ -0,0 +1,53 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi; + +import org.apache.poi.hpsf.DocumentSummaryInformation; +import org.apache.poi.hpsf.SummaryInformation; + +/** + * Common Parent for OLE2 based Text Extractors + * of POI Documents, such as .doc, .xls + * You will typically find the implementation of + * a given format's text extractor under + * org.apache.poi.[format].extractor . + * @see org.apache.poi.hssf.extractor.ExcelExtractor + * @see org.apache.poi.hslf.extractor.PowerPointExtractor + * @see org.apache.poi.hdgf.extractor.VisioTextExtractor + * @see org.apache.poi.hwpf.extractor.WordExtractor + */ +public abstract class POIOLE2TextExtractor extends POITextExtractor { + /** + * Creates a new text extractor for the given document + */ + public POIOLE2TextExtractor(POIDocument document) { + super(document); + } + + /** + * Returns the document information metadata for the document + */ + public DocumentSummaryInformation getDocSummaryInformation() { + return document.getDocumentSummaryInformation(); + } + /** + * Returns the summary information metadata for the document + */ + public SummaryInformation getSummaryInformation() { + return document.getSummaryInformation(); + } +} diff --git a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java index f45f54dff1..2a9c455cac 100644 --- a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java +++ b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java @@ -18,7 +18,7 @@ package org.apache.poi.hssf.extractor; import java.io.IOException; -import org.apache.poi.POITextExtractor; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRichTextString; import org.apache.poi.hssf.usermodel.HSSFRow; @@ -35,7 +35,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * the XLS2CSVmra example * @see org.apache.poi.hssf.eventusermodel.examples.XLS2CSVmra */ -public class ExcelExtractor extends POITextExtractor{ +public class ExcelExtractor extends POIOLE2TextExtractor { private HSSFWorkbook wb; private boolean includeSheetNames = true; private boolean formulasNotResults = false; diff --git a/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java b/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java index 034714c7bc..9b1307cee3 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java @@ -21,7 +21,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; -import org.apache.poi.POITextExtractor; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hdgf.HDGFDiagram; import org.apache.poi.hdgf.chunks.Chunk; import org.apache.poi.hdgf.chunks.Chunk.Command; @@ -35,7 +35,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * Can opperate on the command line (outputs to stdout), or * can return the text for you (eg for use with Lucene). */ -public class VisioTextExtractor extends POITextExtractor { +public class VisioTextExtractor extends POIOLE2TextExtractor { private HDGFDiagram hdgf; private POIFSFileSystem fs; diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java index f247227007..cd9fa28256 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java @@ -23,7 +23,7 @@ package org.apache.poi.hslf.extractor; import java.io.*; import java.util.HashSet; -import org.apache.poi.POITextExtractor; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.hslf.*; import org.apache.poi.hslf.model.*; @@ -36,7 +36,7 @@ import org.apache.poi.hslf.usermodel.*; * @author Nick Burch */ -public class PowerPointExtractor extends POITextExtractor +public class PowerPointExtractor extends POIOLE2TextExtractor { private HSLFSlideShow _hslfshow; private SlideShow _show; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java index 6f15ee1f9a..85009459d7 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java @@ -22,7 +22,7 @@ import java.io.FileInputStream; import java.io.UnsupportedEncodingException; import java.util.Iterator; -import org.apache.poi.POITextExtractor; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.model.TextPiece; import org.apache.poi.hwpf.usermodel.Paragraph; @@ -37,7 +37,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * * @author Nick Burch (nick at torchbox dot com) */ -public class WordExtractor extends POITextExtractor { +public class WordExtractor extends POIOLE2TextExtractor { private POIFSFileSystem fs; private HWPFDocument doc;