Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646313 via svnmerge from

https://svn.apache.org:443/repos/asf/poi/trunk

........
  r646312 | nick | 2008-04-09 13:46:42 +0100 (Wed, 09 Apr 2008) | 1 line
  
  Provide a common ole2 implementation of POITextExtractor, which gives access to the document metadata
........


git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@646818 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-04-10 14:26:36 +00:00
parent 8f63a46255
commit d8a63301aa
7 changed files with 64 additions and 9 deletions

View File

@ -220,6 +220,7 @@ under the License.
<path id="ooxml.classpath"> <path id="ooxml.classpath">
<path refid="main.classpath"/> <path refid="main.classpath"/>
<path refid="scratchpad.classpath"/> <path refid="scratchpad.classpath"/>
<pathelement location="${scratchpad.output.dir}"/>
<fileset dir="${ooxml.lib}"> <fileset dir="${ooxml.lib}">
<include name="*.jar" /> <include name="*.jar" />
</fileset> </fileset>

View File

@ -0,0 +1,53 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.SummaryInformation;
/**
* Common Parent for OLE2 based Text Extractors
* of POI Documents, such as .doc, .xls
* You will typically find the implementation of
* a given format's text extractor under
* org.apache.poi.[format].extractor .
* @see org.apache.poi.hssf.extractor.ExcelExtractor
* @see org.apache.poi.hslf.extractor.PowerPointExtractor
* @see org.apache.poi.hdgf.extractor.VisioTextExtractor
* @see org.apache.poi.hwpf.extractor.WordExtractor
*/
public abstract class POIOLE2TextExtractor extends POITextExtractor {
/**
* Creates a new text extractor for the given document
*/
public POIOLE2TextExtractor(POIDocument document) {
super(document);
}
/**
* Returns the document information metadata for the document
*/
public DocumentSummaryInformation getDocSummaryInformation() {
return document.getDocumentSummaryInformation();
}
/**
* Returns the summary information metadata for the document
*/
public SummaryInformation getSummaryInformation() {
return document.getSummaryInformation();
}
}

View File

@ -18,7 +18,7 @@ package org.apache.poi.hssf.extractor;
import java.io.IOException; import java.io.IOException;
import org.apache.poi.POITextExtractor; import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRichTextString; import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFRow;
@ -35,7 +35,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* the XLS2CSVmra example * the XLS2CSVmra example
* @see org.apache.poi.hssf.eventusermodel.examples.XLS2CSVmra * @see org.apache.poi.hssf.eventusermodel.examples.XLS2CSVmra
*/ */
public class ExcelExtractor extends POITextExtractor{ public class ExcelExtractor extends POIOLE2TextExtractor {
private HSSFWorkbook wb; private HSSFWorkbook wb;
private boolean includeSheetNames = true; private boolean includeSheetNames = true;
private boolean formulasNotResults = false; private boolean formulasNotResults = false;

View File

@ -29,6 +29,7 @@ import org.openxml4j.opc.Package;
import org.openxml4j.opc.PackagePart; import org.openxml4j.opc.PackagePart;
import org.openxml4j.opc.PackageRelationshipCollection; import org.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.POITextExtractor; import org.apache.poi.POITextExtractor;
import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.POIXMLTextExtractor;
@ -104,7 +105,7 @@ public class ExtractorFactory {
throw new IllegalArgumentException("No supported documents found in the OOXML package"); throw new IllegalArgumentException("No supported documents found in the OOXML package");
} }
public static POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException { public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
// Look for certain entries in the stream, to figure it // Look for certain entries in the stream, to figure it
// out from // out from
for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) { for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import org.apache.poi.POITextExtractor; import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hdgf.HDGFDiagram; import org.apache.poi.hdgf.HDGFDiagram;
import org.apache.poi.hdgf.chunks.Chunk; import org.apache.poi.hdgf.chunks.Chunk;
import org.apache.poi.hdgf.chunks.Chunk.Command; import org.apache.poi.hdgf.chunks.Chunk.Command;
@ -35,7 +35,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* Can opperate on the command line (outputs to stdout), or * Can opperate on the command line (outputs to stdout), or
* can return the text for you (eg for use with Lucene). * can return the text for you (eg for use with Lucene).
*/ */
public class VisioTextExtractor extends POITextExtractor { public class VisioTextExtractor extends POIOLE2TextExtractor {
private HDGFDiagram hdgf; private HDGFDiagram hdgf;
private POIFSFileSystem fs; private POIFSFileSystem fs;

View File

@ -23,7 +23,7 @@ package org.apache.poi.hslf.extractor;
import java.io.*; import java.io.*;
import java.util.HashSet; import java.util.HashSet;
import org.apache.poi.POITextExtractor; import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.hslf.*; import org.apache.poi.hslf.*;
import org.apache.poi.hslf.model.*; import org.apache.poi.hslf.model.*;
@ -36,7 +36,7 @@ import org.apache.poi.hslf.usermodel.*;
* @author Nick Burch * @author Nick Burch
*/ */
public class PowerPointExtractor extends POITextExtractor public class PowerPointExtractor extends POIOLE2TextExtractor
{ {
private HSLFSlideShow _hslfshow; private HSLFSlideShow _hslfshow;
private SlideShow _show; private SlideShow _show;

View File

@ -22,7 +22,7 @@ import java.io.FileInputStream;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.util.Iterator; import java.util.Iterator;
import org.apache.poi.POITextExtractor; import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.TextPiece; import org.apache.poi.hwpf.model.TextPiece;
import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Paragraph;
@ -37,7 +37,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* *
* @author Nick Burch (nick at torchbox dot com) * @author Nick Burch (nick at torchbox dot com)
*/ */
public class WordExtractor extends POITextExtractor { public class WordExtractor extends POIOLE2TextExtractor {
private POIFSFileSystem fs; private POIFSFileSystem fs;
private HWPFDocument doc; private HWPFDocument doc;