mirror of https://github.com/apache/poi.git
Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646313 via svnmerge from
https://svn.apache.org:443/repos/asf/poi/trunk ........ r646312 | nick | 2008-04-09 13:46:42 +0100 (Wed, 09 Apr 2008) | 1 line Provide a common ole2 implementation of POITextExtractor, which gives access to the document metadata ........ git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@646818 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8f63a46255
commit
d8a63301aa
|
@ -220,6 +220,7 @@ under the License.
|
||||||
<path id="ooxml.classpath">
|
<path id="ooxml.classpath">
|
||||||
<path refid="main.classpath"/>
|
<path refid="main.classpath"/>
|
||||||
<path refid="scratchpad.classpath"/>
|
<path refid="scratchpad.classpath"/>
|
||||||
|
<pathelement location="${scratchpad.output.dir}"/>
|
||||||
<fileset dir="${ooxml.lib}">
|
<fileset dir="${ooxml.lib}">
|
||||||
<include name="*.jar" />
|
<include name="*.jar" />
|
||||||
</fileset>
|
</fileset>
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi;
|
||||||
|
|
||||||
|
import org.apache.poi.hpsf.DocumentSummaryInformation;
|
||||||
|
import org.apache.poi.hpsf.SummaryInformation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Common Parent for OLE2 based Text Extractors
|
||||||
|
* of POI Documents, such as .doc, .xls
|
||||||
|
* You will typically find the implementation of
|
||||||
|
* a given format's text extractor under
|
||||||
|
* org.apache.poi.[format].extractor .
|
||||||
|
* @see org.apache.poi.hssf.extractor.ExcelExtractor
|
||||||
|
* @see org.apache.poi.hslf.extractor.PowerPointExtractor
|
||||||
|
* @see org.apache.poi.hdgf.extractor.VisioTextExtractor
|
||||||
|
* @see org.apache.poi.hwpf.extractor.WordExtractor
|
||||||
|
*/
|
||||||
|
public abstract class POIOLE2TextExtractor extends POITextExtractor {
|
||||||
|
/**
|
||||||
|
* Creates a new text extractor for the given document
|
||||||
|
*/
|
||||||
|
public POIOLE2TextExtractor(POIDocument document) {
|
||||||
|
super(document);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the document information metadata for the document
|
||||||
|
*/
|
||||||
|
public DocumentSummaryInformation getDocSummaryInformation() {
|
||||||
|
return document.getDocumentSummaryInformation();
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Returns the summary information metadata for the document
|
||||||
|
*/
|
||||||
|
public SummaryInformation getSummaryInformation() {
|
||||||
|
return document.getSummaryInformation();
|
||||||
|
}
|
||||||
|
}
|
|
@ -18,7 +18,7 @@ package org.apache.poi.hssf.extractor;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.poi.POITextExtractor;
|
import org.apache.poi.POIOLE2TextExtractor;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFCell;
|
import org.apache.poi.hssf.usermodel.HSSFCell;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
|
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFRow;
|
import org.apache.poi.hssf.usermodel.HSSFRow;
|
||||||
|
@ -35,7 +35,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
* the XLS2CSVmra example
|
* the XLS2CSVmra example
|
||||||
* @see org.apache.poi.hssf.eventusermodel.examples.XLS2CSVmra
|
* @see org.apache.poi.hssf.eventusermodel.examples.XLS2CSVmra
|
||||||
*/
|
*/
|
||||||
public class ExcelExtractor extends POITextExtractor{
|
public class ExcelExtractor extends POIOLE2TextExtractor {
|
||||||
private HSSFWorkbook wb;
|
private HSSFWorkbook wb;
|
||||||
private boolean includeSheetNames = true;
|
private boolean includeSheetNames = true;
|
||||||
private boolean formulasNotResults = false;
|
private boolean formulasNotResults = false;
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.openxml4j.opc.Package;
|
||||||
import org.openxml4j.opc.PackagePart;
|
import org.openxml4j.opc.PackagePart;
|
||||||
import org.openxml4j.opc.PackageRelationshipCollection;
|
import org.openxml4j.opc.PackageRelationshipCollection;
|
||||||
|
|
||||||
|
import org.apache.poi.POIOLE2TextExtractor;
|
||||||
import org.apache.poi.POITextExtractor;
|
import org.apache.poi.POITextExtractor;
|
||||||
import org.apache.poi.POIXMLDocument;
|
import org.apache.poi.POIXMLDocument;
|
||||||
import org.apache.poi.POIXMLTextExtractor;
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
|
@ -104,7 +105,7 @@ public class ExtractorFactory {
|
||||||
throw new IllegalArgumentException("No supported documents found in the OOXML package");
|
throw new IllegalArgumentException("No supported documents found in the OOXML package");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
|
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
|
||||||
// Look for certain entries in the stream, to figure it
|
// Look for certain entries in the stream, to figure it
|
||||||
// out from
|
// out from
|
||||||
for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
|
for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
import org.apache.poi.POITextExtractor;
|
import org.apache.poi.POIOLE2TextExtractor;
|
||||||
import org.apache.poi.hdgf.HDGFDiagram;
|
import org.apache.poi.hdgf.HDGFDiagram;
|
||||||
import org.apache.poi.hdgf.chunks.Chunk;
|
import org.apache.poi.hdgf.chunks.Chunk;
|
||||||
import org.apache.poi.hdgf.chunks.Chunk.Command;
|
import org.apache.poi.hdgf.chunks.Chunk.Command;
|
||||||
|
@ -35,7 +35,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
* Can opperate on the command line (outputs to stdout), or
|
* Can opperate on the command line (outputs to stdout), or
|
||||||
* can return the text for you (eg for use with Lucene).
|
* can return the text for you (eg for use with Lucene).
|
||||||
*/
|
*/
|
||||||
public class VisioTextExtractor extends POITextExtractor {
|
public class VisioTextExtractor extends POIOLE2TextExtractor {
|
||||||
private HDGFDiagram hdgf;
|
private HDGFDiagram hdgf;
|
||||||
private POIFSFileSystem fs;
|
private POIFSFileSystem fs;
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ package org.apache.poi.hslf.extractor;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
|
||||||
import org.apache.poi.POITextExtractor;
|
import org.apache.poi.POIOLE2TextExtractor;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.apache.poi.hslf.*;
|
import org.apache.poi.hslf.*;
|
||||||
import org.apache.poi.hslf.model.*;
|
import org.apache.poi.hslf.model.*;
|
||||||
|
@ -36,7 +36,7 @@ import org.apache.poi.hslf.usermodel.*;
|
||||||
* @author Nick Burch
|
* @author Nick Burch
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class PowerPointExtractor extends POITextExtractor
|
public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||||
{
|
{
|
||||||
private HSLFSlideShow _hslfshow;
|
private HSLFSlideShow _hslfshow;
|
||||||
private SlideShow _show;
|
private SlideShow _show;
|
||||||
|
|
|
@ -22,7 +22,7 @@ import java.io.FileInputStream;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.poi.POITextExtractor;
|
import org.apache.poi.POIOLE2TextExtractor;
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
import org.apache.poi.hwpf.model.TextPiece;
|
import org.apache.poi.hwpf.model.TextPiece;
|
||||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||||
|
@ -37,7 +37,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
*
|
*
|
||||||
* @author Nick Burch (nick at torchbox dot com)
|
* @author Nick Burch (nick at torchbox dot com)
|
||||||
*/
|
*/
|
||||||
public class WordExtractor extends POITextExtractor {
|
public class WordExtractor extends POIOLE2TextExtractor {
|
||||||
private POIFSFileSystem fs;
|
private POIFSFileSystem fs;
|
||||||
private HWPFDocument doc;
|
private HWPFDocument doc;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue