mirror of https://github.com/apache/lucene.git
Remove outdated sandbox code
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@165365 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f848854278
commit
acf2b4c60c
|
@ -1,10 +0,0 @@
|
||||||
<customerInfo>
|
|
||||||
<name><![CDATA[Aruna A. Raghavan]]></name>
|
|
||||||
<profession><![CDATA[Software Developer]]></profession>
|
|
||||||
<addressLine1><![CDATA[6801 West 106th Street]]></addressLine1>
|
|
||||||
<addressLine2><![CDATA[#205]]></addressLine2>
|
|
||||||
<city><![CDATA[Eagan]]></city>
|
|
||||||
<state><![CDATA[MN]]></state>
|
|
||||||
<zip><![CDATA[55121]]></zip>
|
|
||||||
<country><![CDATA[USA]]></country>
|
|
||||||
</customerInfo>
|
|
|
@ -1,6 +0,0 @@
|
||||||
This is the README file for XML Indexing Demo contributed by Aruna Raghavan.
|
|
||||||
|
|
||||||
$Id$
|
|
||||||
|
|
||||||
Lucene Indexing Demo illustrates how one can parse and index XML documents
|
|
||||||
using a SAX2 or DOM parser with Lucene.
|
|
Binary file not shown.
|
@ -1,10 +0,0 @@
|
||||||
<?xml version="1.0"?>
|
|
||||||
|
|
||||||
<project name="xml" default="default">
|
|
||||||
|
|
||||||
<description>
|
|
||||||
Example of Lucene XML indexing
|
|
||||||
</description>
|
|
||||||
|
|
||||||
<import file="../common.xml"/>
|
|
||||||
</project>
|
|
|
@ -1,111 +0,0 @@
|
||||||
package org.apache.lucenesandbox.xmlindexingdemo;
|
|
||||||
|
|
||||||
/* ====================================================================
|
|
||||||
* The Apache Software License, Version 1.1
|
|
||||||
*
|
|
||||||
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
*
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in
|
|
||||||
* the documentation and/or other materials provided with the
|
|
||||||
* distribution.
|
|
||||||
*
|
|
||||||
* 3. The end-user documentation included with the redistribution,
|
|
||||||
* if any, must include the following acknowledgment:
|
|
||||||
* "This product includes software developed by the
|
|
||||||
* Apache Software Foundation (http://www.apache.org/)."
|
|
||||||
* Alternately, this acknowledgment may appear in the software itself,
|
|
||||||
* if and wherever such third-party acknowledgments normally appear.
|
|
||||||
*
|
|
||||||
* 4. The names "Apache" and "Apache Software Foundation" and
|
|
||||||
* "Apache Lucene" must not be used to endorse or promote products
|
|
||||||
* derived from this software without prior written permission. For
|
|
||||||
* written permission, please contact apache@apache.org.
|
|
||||||
*
|
|
||||||
* 5. Products derived from this software may not be called "Apache",
|
|
||||||
* "Apache Lucene", nor may "Apache" appear in their name, without
|
|
||||||
* prior written permission of the Apache Software Foundation.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
|
||||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
||||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
|
||||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
|
||||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
||||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
||||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
* ====================================================================
|
|
||||||
*
|
|
||||||
* This software consists of voluntary contributions made by many
|
|
||||||
* individuals on behalf of the Apache Software Foundation. For more
|
|
||||||
* information on the Apache Software Foundation, please see
|
|
||||||
* <http://www.apache.org/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
||||||
import org.apache.lucene.index.IndexWriter;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.util.Date;
|
|
||||||
|
|
||||||
class IndexFiles
|
|
||||||
{
|
|
||||||
public static void main(String[] args)
|
|
||||||
throws Exception
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
Date start = new Date();
|
|
||||||
|
|
||||||
IndexWriter writer = new IndexWriter("index", new StandardAnalyzer(), true);
|
|
||||||
indexDocs(writer, new File(args[0]));
|
|
||||||
|
|
||||||
writer.optimize();
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
Date end = new Date();
|
|
||||||
|
|
||||||
System.out.print(end.getTime() - start.getTime());
|
|
||||||
System.out.println(" total milliseconds");
|
|
||||||
|
|
||||||
}
|
|
||||||
catch (Exception e)
|
|
||||||
{
|
|
||||||
System.out.println(" caught a " + e.getClass() +
|
|
||||||
"\n with message: " + e.getMessage());
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void indexDocs(IndexWriter writer, File file)
|
|
||||||
throws Exception
|
|
||||||
{
|
|
||||||
if (file.isDirectory())
|
|
||||||
{
|
|
||||||
String[] files = file.list();
|
|
||||||
for (int i = 0; i < files.length; i++)
|
|
||||||
indexDocs(writer, new File(file, files[i]));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
System.out.println("adding " + file);
|
|
||||||
XMLDocumentHandlerSAX hdlr = new XMLDocumentHandlerSAX(file);
|
|
||||||
writer.addDocument(hdlr.getDocument());
|
|
||||||
// For DOM, use
|
|
||||||
// XMLDocumentHandlerDOM hdlr = new XMLDocumentHandlerDOM();
|
|
||||||
// writer.addDocument(hdlr.createXMLDocument(file));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,126 +0,0 @@
|
||||||
package org.apache.lucenesandbox.xmlindexingdemo;
|
|
||||||
|
|
||||||
/* ====================================================================
|
|
||||||
* The Apache Software License, Version 1.1
|
|
||||||
*
|
|
||||||
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
*
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in
|
|
||||||
* the documentation and/or other materials provided with the
|
|
||||||
* distribution.
|
|
||||||
*
|
|
||||||
* 3. The end-user documentation included with the redistribution,
|
|
||||||
* if any, must include the following acknowledgment:
|
|
||||||
* "This product includes software developed by the
|
|
||||||
* Apache Software Foundation (http://www.apache.org/)."
|
|
||||||
* Alternately, this acknowledgment may appear in the software itself,
|
|
||||||
* if and wherever such third-party acknowledgments normally appear.
|
|
||||||
*
|
|
||||||
* 4. The names "Apache" and "Apache Software Foundation" and
|
|
||||||
* "Apache Lucene" must not be used to endorse or promote products
|
|
||||||
* derived from this software without prior written permission. For
|
|
||||||
* written permission, please contact apache@apache.org.
|
|
||||||
*
|
|
||||||
* 5. Products derived from this software may not be called "Apache",
|
|
||||||
* "Apache Lucene", nor may "Apache" appear in their name, without
|
|
||||||
* prior written permission of the Apache Software Foundation.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
|
||||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
||||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
|
||||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
|
||||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
||||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
||||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
* ====================================================================
|
|
||||||
*
|
|
||||||
* This software consists of voluntary contributions made by many
|
|
||||||
* individuals on behalf of the Apache Software Foundation. For more
|
|
||||||
* information on the Apache Software Foundation, please see
|
|
||||||
* <http://www.apache.org/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.search.Searcher;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.Hits;
|
|
||||||
import org.apache.lucene.queryParser.QueryParser;
|
|
||||||
|
|
||||||
class SearchFiles {
|
|
||||||
public static void main(String[] args) {
|
|
||||||
try {
|
|
||||||
Searcher searcher = new IndexSearcher("index");
|
|
||||||
Analyzer analyzer = new StandardAnalyzer();
|
|
||||||
|
|
||||||
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
|
|
||||||
while (true) {
|
|
||||||
System.out.print("Query: ");
|
|
||||||
String line = in.readLine();
|
|
||||||
|
|
||||||
if (line.length() == -1)
|
|
||||||
break;
|
|
||||||
|
|
||||||
Query query = QueryParser.parse(line, "name", analyzer);
|
|
||||||
System.out.println("Searching for: " + query.toString("name"));
|
|
||||||
|
|
||||||
Hits hits = searcher.search(query);
|
|
||||||
System.out.println(hits.length() + " total matching documents");
|
|
||||||
|
|
||||||
final int HITS_PER_PAGE = 10;
|
|
||||||
for (int start = 0; start < hits.length(); start += HITS_PER_PAGE)
|
|
||||||
{
|
|
||||||
int end = Math.min(hits.length(), start + HITS_PER_PAGE);
|
|
||||||
for (int i = start; i < end; i++)
|
|
||||||
{
|
|
||||||
Document doc = hits.doc(i);
|
|
||||||
String name = doc.get("name");
|
|
||||||
System.out.println(name);
|
|
||||||
System.out.println(doc.get("profession"));
|
|
||||||
System.out.println(doc.get("addressLine1"));
|
|
||||||
System.out.println(doc.get("addressLine2"));
|
|
||||||
System.out.print(doc.get("city"));
|
|
||||||
System.out.print(" ");
|
|
||||||
System.out.print(doc.get("state"));
|
|
||||||
System.out.print(" ");
|
|
||||||
System.out.print(doc.get("zip"));
|
|
||||||
System.out.println(doc.get("country"));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hits.length() > end) {
|
|
||||||
System.out.print("more (y/n) ? ");
|
|
||||||
line = in.readLine();
|
|
||||||
if (line.length() == 0 || line.charAt(0) == 'n')
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
searcher.close();
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
System.out.println(" caught a " + e.getClass() +
|
|
||||||
"\n with message: " + e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,131 +0,0 @@
|
||||||
package org.apache.lucenesandbox.xmlindexingdemo;
|
|
||||||
|
|
||||||
import org.w3c.dom.*;
|
|
||||||
import org.w3c.dom.Node;
|
|
||||||
import javax.xml.parsers.*;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class XMLDocumentHandlerDOM {
|
|
||||||
public org.apache.lucene.document.Document createXMLDocument(File f) {
|
|
||||||
org.apache.lucene.document.Document document = new org.apache.lucene.document.Document();
|
|
||||||
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
|
||||||
try {
|
|
||||||
DocumentBuilder df = dbf.newDocumentBuilder();
|
|
||||||
org.w3c.dom.Document d = df.parse(f);
|
|
||||||
Node root = d.getDocumentElement();
|
|
||||||
traverseTree(root, document);
|
|
||||||
} catch (Exception e) {
|
|
||||||
System.out.println("error: " + e);
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
return document;
|
|
||||||
}
|
|
||||||
|
|
||||||
static private void traverseTree(Node node, org.apache.lucene.document.Document document) {
|
|
||||||
NodeList nl = node.getChildNodes();
|
|
||||||
if (nl.getLength() == 0) {
|
|
||||||
if (node.getNodeType() == Node.TEXT_NODE) {
|
|
||||||
Node parentNode = node.getParentNode();
|
|
||||||
if (parentNode.getNodeType() == Node.ELEMENT_NODE) {
|
|
||||||
// String parentNodeName = parentNode.getNodeName();
|
|
||||||
// String nodeValue = node.getNodeValue();
|
|
||||||
// if (parentNodeName.equals("name"))
|
|
||||||
// {
|
|
||||||
Node siblingNode = node.getNextSibling();
|
|
||||||
if (siblingNode != null) {
|
|
||||||
if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) {
|
|
||||||
document.add(Field.Text("name", siblingNode.getNodeValue()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// }
|
|
||||||
// else if (parentNodeName.equals("profession"))
|
|
||||||
// {
|
|
||||||
// Node siblingNode = node.getNextSibling();
|
|
||||||
// if (siblingNode != null)
|
|
||||||
// {
|
|
||||||
// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
|
|
||||||
// {
|
|
||||||
// document.add(Field.Text([arentNodeName, siblingNode.getNodeValue()));
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// else if (parentNodeName == "addressLine1")
|
|
||||||
// {
|
|
||||||
// Node siblingNode = node.getNextSibling();
|
|
||||||
// if(siblingNode != null)
|
|
||||||
// {
|
|
||||||
// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
|
|
||||||
// {
|
|
||||||
// document.add(Field.Text("addressLine1", siblingNode.getNodeValue()));
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// else if (parentNodeName.equals("addressLine2"))
|
|
||||||
// {
|
|
||||||
// Node siblingNode = node.getNextSibling();
|
|
||||||
// if (siblingNode != null)
|
|
||||||
// {
|
|
||||||
// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
|
|
||||||
// {
|
|
||||||
// document.add(Field.Text("addressLine2", siblingNode.getNodeValue()));
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// if (parentNodeName.equals("city"))
|
|
||||||
// {
|
|
||||||
// Node siblingNode = node.getNextSibling();
|
|
||||||
// if (siblingNode != null)
|
|
||||||
// {
|
|
||||||
// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
|
|
||||||
// {
|
|
||||||
// document.add(Field.Text("city", siblingNode.getNodeValue()));
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// else if (parentNodeName.equals("zip"))
|
|
||||||
// {
|
|
||||||
// Node siblingNode = node.getNextSibling();
|
|
||||||
// if (siblingNode != null)
|
|
||||||
// {
|
|
||||||
// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
|
|
||||||
// {
|
|
||||||
// document.add(Field.Text("zip", siblingNode.getNodeValue()));
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// else if (parentNodeName.equals("state"))
|
|
||||||
// {
|
|
||||||
// Node siblingNode = node.getNextSibling();
|
|
||||||
// if (siblingNode != null)
|
|
||||||
// {
|
|
||||||
// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
|
|
||||||
// {
|
|
||||||
// document.add(Field.Text("state", siblingNode.getNodeValue()));
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// else if (parentNodeName.equals("country"))
|
|
||||||
// {
|
|
||||||
// Node siblingNode = node.getNextSibling();
|
|
||||||
// if (siblingNode != null)
|
|
||||||
// {
|
|
||||||
// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
|
|
||||||
// {
|
|
||||||
// document.add(Field.Text("country", siblingNode.getNodeValue()));
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int i = 0; i < nl.getLength(); i++) {
|
|
||||||
traverseTree(nl.item(i), document);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,106 +0,0 @@
|
||||||
package org.apache.lucenesandbox.xmlindexingdemo;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copyright 2004 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
|
||||||
import javax.xml.parsers.SAXParser;
|
|
||||||
import javax.xml.parsers.SAXParserFactory;
|
|
||||||
import org.xml.sax.Attributes;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
import org.xml.sax.helpers.DefaultHandler;
|
|
||||||
|
|
||||||
public class XMLDocumentHandlerSAX extends DefaultHandler {
|
|
||||||
/** A buffer for each XML element */
|
|
||||||
private StringBuffer elementBuffer = new StringBuffer();
|
|
||||||
|
|
||||||
private Document mDocument;
|
|
||||||
|
|
||||||
// constructor
|
|
||||||
public XMLDocumentHandlerSAX(File xmlFile)
|
|
||||||
throws ParserConfigurationException, SAXException, IOException {
|
|
||||||
SAXParserFactory spf = SAXParserFactory.newInstance();
|
|
||||||
|
|
||||||
// use validating parser?
|
|
||||||
//spf.setValidating(false);
|
|
||||||
// make parser name space aware?
|
|
||||||
//spf.setNamespaceAware(true);
|
|
||||||
|
|
||||||
SAXParser parser = spf.newSAXParser();
|
|
||||||
//System.out.println("parser is validating: " + parser.isValidating());
|
|
||||||
try {
|
|
||||||
parser.parse(xmlFile, this);
|
|
||||||
} catch (org.xml.sax.SAXParseException spe) {
|
|
||||||
System.out.println("SAXParser caught SAXParseException at line: " +
|
|
||||||
spe.getLineNumber() + " column " +
|
|
||||||
spe.getColumnNumber());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// call at document start
|
|
||||||
public void startDocument() throws SAXException {
|
|
||||||
mDocument = new Document();
|
|
||||||
}
|
|
||||||
|
|
||||||
// call at element start
|
|
||||||
public void startElement(String namespaceURI, String localName,
|
|
||||||
String qualifiedName, Attributes attrs) throws SAXException {
|
|
||||||
|
|
||||||
String eName = localName;
|
|
||||||
if ("".equals(eName)) {
|
|
||||||
eName = qualifiedName; // namespaceAware = false
|
|
||||||
}
|
|
||||||
// list the attribute(s)
|
|
||||||
if (attrs != null) {
|
|
||||||
for (int i = 0; i < attrs.getLength(); i++) {
|
|
||||||
String aName = attrs.getLocalName(i); // Attr name
|
|
||||||
if ("".equals(aName)) { aName = attrs.getQName(i); }
|
|
||||||
// perform application specific action on attribute(s)
|
|
||||||
// for now just dump out attribute name and value
|
|
||||||
System.out.println("attr " + aName+"="+attrs.getValue(i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
elementBuffer.setLength(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// call when cdata found
|
|
||||||
public void characters(char[] text, int start, int length)
|
|
||||||
throws SAXException {
|
|
||||||
elementBuffer.append(text, start, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
// call at element end
|
|
||||||
public void endElement(String namespaceURI, String simpleName,
|
|
||||||
String qualifiedName) throws SAXException {
|
|
||||||
|
|
||||||
String eName = simpleName;
|
|
||||||
if ("".equals(eName)) {
|
|
||||||
eName = qualifiedName; // namespaceAware = false
|
|
||||||
}
|
|
||||||
|
|
||||||
mDocument.add(Field.Text(eName, elementBuffer.toString()));
|
|
||||||
}
|
|
||||||
|
|
||||||
public Document getDocument() {
|
|
||||||
return mDocument;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,17 +0,0 @@
|
||||||
<?xml version="1.0"?>
|
|
||||||
<document>
|
|
||||||
<properties>
|
|
||||||
<author>Aruna Raghavan</author>
|
|
||||||
<author>Otis Gospodnetic</author>
|
|
||||||
<title>Lucene Indexing Demo</title>
|
|
||||||
</properties>
|
|
||||||
|
|
||||||
<body>
|
|
||||||
|
|
||||||
<section name="Description">
|
|
||||||
<p>Lucene Indexing Demo illustrates how one can parse XML documents
|
|
||||||
using a SAX2 or DOM and index them with Lucene.</p>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</document>
|
|
|
@ -1,26 +0,0 @@
|
||||||
<project name="sandbox" default="build-tree">
|
|
||||||
|
|
||||||
<property name="dist.dir" location="dist"/>
|
|
||||||
|
|
||||||
<macrodef name="crawl">
|
|
||||||
<attribute name="target" default=""/>
|
|
||||||
<sequential>
|
|
||||||
<subant target="@{target}" failonerror="false">
|
|
||||||
<property name="dist.dir" location="${dist.dir}"/>
|
|
||||||
|
|
||||||
<fileset dir="."
|
|
||||||
includes="*/build.xml"
|
|
||||||
excludes="taglib/build.xml"
|
|
||||||
/>
|
|
||||||
</subant>
|
|
||||||
</sequential>
|
|
||||||
</macrodef>
|
|
||||||
|
|
||||||
<target name="clean">
|
|
||||||
<crawl target="clean"/>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="build-tree">
|
|
||||||
<crawl/>
|
|
||||||
</target>
|
|
||||||
</project>
|
|
|
@ -1,241 +0,0 @@
|
||||||
<?xml version="1.0"?>
|
|
||||||
|
|
||||||
<project name="common">
|
|
||||||
|
|
||||||
<!-- default values, intended to be overridden-->
|
|
||||||
<property name="version" value="dev"/>
|
|
||||||
<property name="Name" value="${ant.project.name}"/>
|
|
||||||
|
|
||||||
<!-- not intended to be overridden-->
|
|
||||||
<property name="src.dir" location="src/java"/>
|
|
||||||
<property name="build.dir" location="build"/>
|
|
||||||
<property name="build.classes.dir" location="${build.dir}/classes"/>
|
|
||||||
<property name="build.javadoc" value="${build.dir}/docs/api"/>
|
|
||||||
<property name="build.encoding" value="utf-8"/>
|
|
||||||
|
|
||||||
<property name="release.host" value="www.apache.org"/>
|
|
||||||
<property name="release.path" value="/www/cvs.apache.org/dist/jakarta/lucene/sandbox/${ant.project.name}"/>
|
|
||||||
|
|
||||||
<property name="web.host" value="www.apache.org"/>
|
|
||||||
<property name="web.path" value="/www/jakarta.apache.org/lucene/docs/lucene-sandbox/${ant.project.name}"/>
|
|
||||||
|
|
||||||
<property name="javadoc.link.java" value="http://java.sun.com/j2se/1.4.1/docs/api/"/>
|
|
||||||
<property name="javadoc.link.lucene" value="http://jakarta.apache.org/lucene/docs/api/"/>
|
|
||||||
|
|
||||||
<property name="test.src.dir" location="src/test"/>
|
|
||||||
<property name="test.output.dir" location="${build.dir}/test"/>
|
|
||||||
<property name="test.classes.dir" location="${test.output.dir}/classes"/>
|
|
||||||
|
|
||||||
<property name="dist.dir" location="dist"/>
|
|
||||||
<property name="dist.name" value="${ant.project.name}-${version}"/>
|
|
||||||
<property name="package.dir" location="dist/${dist.name}"/>
|
|
||||||
|
|
||||||
<property name="junit.jar" location="${ant.home}/lib/junit.jar"/>
|
|
||||||
<dirname file="${ant.file.common}" property="common.dir"/>
|
|
||||||
<property name="lucene.dir" location="${common.dir}/../../jakarta-lucene"/>
|
|
||||||
|
|
||||||
<property name="build.debug" value="true"/>
|
|
||||||
<property name="junit.fork" value="true"/>
|
|
||||||
|
|
||||||
<property name="jakarta.site2.home" location="../../../jakarta-site2"/>
|
|
||||||
<property name="project.name" value="site"/>
|
|
||||||
<property name="docs.src" location="xdocs"/>
|
|
||||||
<property name="docs.dest" location="docs"/>
|
|
||||||
|
|
||||||
<path id="anakia.classpath">
|
|
||||||
<fileset dir="${jakarta.site2.home}/lib">
|
|
||||||
<include name="*.jar"/>
|
|
||||||
</fileset>
|
|
||||||
</path>
|
|
||||||
|
|
||||||
<!-- ========================================================== -->
|
|
||||||
<!-- Datatype declarations -->
|
|
||||||
<!-- ========================================================== -->
|
|
||||||
<!-- TODO: define ${lucene.jar} for easier overriding -->
|
|
||||||
<path id="compile.classpath">
|
|
||||||
<fileset dir="${lucene.dir}" includes="build/lucene*.jar"/>
|
|
||||||
<pathelement path="${project.classpath}"/>
|
|
||||||
</path>
|
|
||||||
|
|
||||||
<path id="test.classpath">
|
|
||||||
<path refid="compile.classpath"/>
|
|
||||||
<pathelement location="${junit.jar}"/>
|
|
||||||
<pathelement location="${build.classes.dir}"/>
|
|
||||||
<pathelement location="${test.classes.dir}"/>
|
|
||||||
</path>
|
|
||||||
|
|
||||||
|
|
||||||
<target name="init">
|
|
||||||
<echo message="Building ${ant.project.name}"/>
|
|
||||||
<tstamp/>
|
|
||||||
|
|
||||||
<mkdir dir="${build.dir}"/>
|
|
||||||
<mkdir dir="${build.classes.dir}"/>
|
|
||||||
<mkdir dir="${dist.dir}"/>
|
|
||||||
|
|
||||||
<mkdir dir="${test.output.dir}"/>
|
|
||||||
<mkdir dir="${test.classes.dir}"/>
|
|
||||||
|
|
||||||
<available property="has.tests" file="${test.src.dir}" type="dir"/>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="clean"
|
|
||||||
description="Deletes all previous build artifacts">
|
|
||||||
<delete dir="${build.dir}"/>
|
|
||||||
<delete dir="${build.classes.dir}"/>
|
|
||||||
<delete dir="${dist.dir}"/>
|
|
||||||
<delete dir="${package.dir}"/>
|
|
||||||
|
|
||||||
<delete dir="${test.output.dir}"/>
|
|
||||||
<delete dir="${test.classes.dir}"/>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="dist" depends="compile" description="Create JAR">
|
|
||||||
<jar jarfile="${dist.dir}/${dist.name}.jar"
|
|
||||||
basedir="${build.classes.dir}"
|
|
||||||
/>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="compile" depends="init">
|
|
||||||
<javac destdir="${build.classes.dir}"
|
|
||||||
debug="${build.debug}"
|
|
||||||
includeAntRuntime="yes"
|
|
||||||
deprecation="true"
|
|
||||||
srcdir="${src.dir}"
|
|
||||||
classpathref="compile.classpath"
|
|
||||||
encoding="${build.encoding}"
|
|
||||||
/>
|
|
||||||
<copy todir="${build.classes.dir}">
|
|
||||||
<fileset dir="${src.dir}" excludes="**/*.java"/>
|
|
||||||
</copy>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="test-compile" depends="compile" if="has.tests">
|
|
||||||
<javac destdir="${test.classes.dir}"
|
|
||||||
debug="${build.debug}"
|
|
||||||
includeAntRuntime="yes"
|
|
||||||
srcdir="src/test"
|
|
||||||
classpathref="test.classpath"
|
|
||||||
encoding="${build.encoding}"
|
|
||||||
/>
|
|
||||||
|
|
||||||
<copy todir="${test.classes.dir}">
|
|
||||||
<fileset dir="src/test" excludes="**/*.java"/>
|
|
||||||
</copy>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="test" depends="test-compile" if="has.tests">
|
|
||||||
<junit printsummary="no"
|
|
||||||
errorProperty="test.failed"
|
|
||||||
failureProperty="test.failed"
|
|
||||||
fork="${junit.fork}">
|
|
||||||
<classpath refid="test.classpath"/>
|
|
||||||
<sysproperty key="docs.dir" file="${test.classes.dir}"/>
|
|
||||||
<sysproperty key="index.dir" file="${test.output.dir}/index"/>
|
|
||||||
<sysproperty key="dataDir" file="${test.src.dir}"/>
|
|
||||||
<formatter type="brief" usefile="false"/>
|
|
||||||
<test name="${testcase}" if="testcase"/>
|
|
||||||
<batchtest todir="${test.data.dir}" unless="testcase">
|
|
||||||
<fileset dir="${test.classes.dir}"
|
|
||||||
includes="**/*Test.class,**/Test*.class"
|
|
||||||
/>
|
|
||||||
</batchtest>
|
|
||||||
</junit>
|
|
||||||
|
|
||||||
<fail if="test.failed">
|
|
||||||
Unit tests failed. Check log or reports for details
|
|
||||||
</fail>
|
|
||||||
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="default" depends="test,dist"/>
|
|
||||||
|
|
||||||
<!-- ================================================================== -->
|
|
||||||
<!-- Documentation -->
|
|
||||||
<!-- ================================================================== -->
|
|
||||||
<target name="javadoc" depends="compile">
|
|
||||||
<mkdir dir="${build.javadoc}"/>
|
|
||||||
<javadoc
|
|
||||||
sourcepath="${src.dir}"
|
|
||||||
overview="${src.dir}/overview.html"
|
|
||||||
packagenames="*"
|
|
||||||
destdir="${build.javadoc}"
|
|
||||||
author="true"
|
|
||||||
version="true"
|
|
||||||
use="true"
|
|
||||||
windowtitle="${Name} ${version} API"
|
|
||||||
doctitle="${Name} ${version} API"
|
|
||||||
encoding="${build.encoding}"
|
|
||||||
>
|
|
||||||
<link href="${javadoc.link.java}"/>
|
|
||||||
<link href="${javadoc.link.lucene}"/>
|
|
||||||
<tag name="todo" description="To Do:"/>
|
|
||||||
<classpath refid="compile.classpath"/>
|
|
||||||
</javadoc>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<!-- ================================================================== -->
|
|
||||||
<!-- D I S T R I B U T I O N -->
|
|
||||||
<!-- ================================================================== -->
|
|
||||||
<!-- -->
|
|
||||||
<!-- ================================================================== -->
|
|
||||||
<target name="package" depends="dist, javadoc">
|
|
||||||
<mkdir dir="${package.dir}"/>
|
|
||||||
<mkdir dir="${package.dir}/docs"/>
|
|
||||||
<mkdir dir="${package.dir}/docs/api"/>
|
|
||||||
<mkdir dir="${docs.dest}"/>
|
|
||||||
<copy todir="${package.dir}/docs/api">
|
|
||||||
<fileset dir="${build.javadoc}"/>
|
|
||||||
</copy>
|
|
||||||
|
|
||||||
<copy todir="${package.dir}/docs">
|
|
||||||
<fileset dir="${docs.dest}/"/>
|
|
||||||
</copy>
|
|
||||||
|
|
||||||
<copy todir="${package.dir}">
|
|
||||||
<fileset dir=".">
|
|
||||||
<include name="*.txt"/>
|
|
||||||
</fileset>
|
|
||||||
</copy>
|
|
||||||
|
|
||||||
<copy todir="${package.dir}/src">
|
|
||||||
<fileset dir="src"/>
|
|
||||||
</copy>
|
|
||||||
<copy todir="${package.dir}/" file="build.xml"/>
|
|
||||||
<copy todir="${dist.dir}/" file="${common.dir}/common.xml"/>
|
|
||||||
|
|
||||||
<copy file="${dist.dir}/${dist.name}.jar" todir="${package.dir}"/>
|
|
||||||
|
|
||||||
<tar tarfile="${dist.dir}/${dist.name}.tar.gz" basedir="${dist.dir}/"
|
|
||||||
compression="gzip" includes="${dist.name}/**,common.xml"/>
|
|
||||||
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<!-- ================================================================== -->
|
|
||||||
<!-- Copy release to server -->
|
|
||||||
<!-- ================================================================== -->
|
|
||||||
<target name="release" depends="package">
|
|
||||||
<exec executable="ssh">
|
|
||||||
<arg value="${release.host}"/>
|
|
||||||
<arg value="mkdir"/>
|
|
||||||
<arg value="${release.path}/${dist.name}"/>
|
|
||||||
</exec>
|
|
||||||
<exec executable="scp">
|
|
||||||
<arg value="${dist.dir}/${dist.name}.jar"/>
|
|
||||||
<arg value="${dist.dir}/${dist.name}.tar.gz"/>
|
|
||||||
<arg value="${release.host}:${release.path}/${dist.name}"/>
|
|
||||||
</exec>
|
|
||||||
<exec executable="ssh">
|
|
||||||
<arg value="${web.host}"/>
|
|
||||||
<arg value="rm"/>
|
|
||||||
<arg value="-rf"/>
|
|
||||||
<arg value="${web.path}/api"/>
|
|
||||||
</exec>
|
|
||||||
<exec executable="scp">
|
|
||||||
<arg value="-r"/>
|
|
||||||
<arg value="${build.javadoc}"/>
|
|
||||||
<arg value="${web.host}:${web.path}/api"/>
|
|
||||||
</exec>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
</project>
|
|
|
@ -1,19 +0,0 @@
|
||||||
<?xml version="1.0"?>
|
|
||||||
|
|
||||||
<project name="parsers" default="default">
|
|
||||||
|
|
||||||
<description>
|
|
||||||
Document parsers
|
|
||||||
</description>
|
|
||||||
|
|
||||||
<path id="additional.dependencies">
|
|
||||||
<fileset dir="lib"/>
|
|
||||||
</path>
|
|
||||||
|
|
||||||
<pathconvert property="project.classpath"
|
|
||||||
targetos="unix"
|
|
||||||
refid="additional.dependencies"
|
|
||||||
/>
|
|
||||||
|
|
||||||
<import file="../common.xml"/>
|
|
||||||
</project>
|
|
|
@ -1 +0,0 @@
|
||||||
Place pj.jar here (from http://www.etymon.com/pub/software/pj/) and log4j JAR.
|
|
|
@ -1,172 +0,0 @@
|
||||||
package org.apache.lucene.parsers.pdf;
|
|
||||||
|
|
||||||
/* ====================================================================
|
|
||||||
* The Apache Software License, Version 1.1
|
|
||||||
*
|
|
||||||
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
*
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in
|
|
||||||
* the documentation and/or other materials provided with the
|
|
||||||
* distribution.
|
|
||||||
*
|
|
||||||
* 3. The end-user documentation included with the redistribution,
|
|
||||||
* if any, must include the following acknowledgment:
|
|
||||||
* "This product includes software developed by the
|
|
||||||
* Apache Software Foundation (http://www.apache.org/)."
|
|
||||||
* Alternately, this acknowledgment may appear in the software itself,
|
|
||||||
* if and wherever such third-party acknowledgments normally appear.
|
|
||||||
*
|
|
||||||
* 4. The names "Apache" and "Apache Software Foundation"
|
|
||||||
* must not be used to endorse or promote products
|
|
||||||
* derived from this software without prior written permission. For
|
|
||||||
* written permission, please contact apache@apache.org.
|
|
||||||
*
|
|
||||||
* 5. Products derived from this software may not be called "Apache",
|
|
||||||
* nor may "Apache" appear in their name, without
|
|
||||||
* prior written permission of the Apache Software Foundation.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
|
||||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
||||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
|
||||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
|
||||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
||||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
||||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
* ====================================================================
|
|
||||||
*
|
|
||||||
* This software consists of voluntary contributions made by many
|
|
||||||
* individuals on behalf of the Apache Software Foundation. For more
|
|
||||||
* information on the Apache Software Foundation, please see
|
|
||||||
* <http://www.apache.org/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import com.etymon.pj.Pdf;
|
|
||||||
import com.etymon.pj.exception.InvalidPdfObjectException;
|
|
||||||
import com.etymon.pj.exception.PjException;
|
|
||||||
import com.etymon.pj.object.PjArray;
|
|
||||||
import com.etymon.pj.object.PjObject;
|
|
||||||
import com.etymon.pj.object.PjPage;
|
|
||||||
import com.etymon.pj.object.PjStream;
|
|
||||||
import org.apache.log4j.Category;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Vector;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* <p>
|
|
||||||
* Attempts to extract text from a PDF file.
|
|
||||||
* </p>
|
|
||||||
* <p>
|
|
||||||
* <a href="http://www.mail-archive.com/lucene-user@jakarta.apache.org/msg00280.html">
|
|
||||||
* Known limitations</a>
|
|
||||||
* </p>
|
|
||||||
*
|
|
||||||
* @author <a href="mailto:kelvint@apache.org">Kelvin Tan</a>
|
|
||||||
* @version $Revision$
|
|
||||||
*/
|
|
||||||
public class PdfTextExtractor
|
|
||||||
{
|
|
||||||
private static Category cat = Category.getInstance(PdfTextExtractor.class);
|
|
||||||
|
|
||||||
public static void main(String[] args)
|
|
||||||
{
|
|
||||||
File f = new File("/usr/local/test.pdf");
|
|
||||||
try
|
|
||||||
{
|
|
||||||
Pdf pdf = new Pdf(f.toString());
|
|
||||||
int pagecount = pdf.getPageCount();
|
|
||||||
cat.debug(f.toString() + "has " + pagecount + " pages.");
|
|
||||||
for (int i = 1; i <= pagecount; i++)
|
|
||||||
{
|
|
||||||
System.out.println(getContent(pdf, i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (IOException ioe)
|
|
||||||
{
|
|
||||||
cat.error("IOException parsing PDF file:" + f.toString(), ioe);
|
|
||||||
}
|
|
||||||
catch (PjException pje)
|
|
||||||
{
|
|
||||||
cat.error("PjException parsing PDF file:" + f.toString(), pje);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String getContent(Pdf pdf, int pageNo)
|
|
||||||
{
|
|
||||||
String content = null;
|
|
||||||
PjStream stream = null;
|
|
||||||
StringBuffer strbf = new StringBuffer();
|
|
||||||
try
|
|
||||||
{
|
|
||||||
PjPage page = (PjPage) pdf.getObject(pdf.getPage(pageNo));
|
|
||||||
PjObject pobj = (PjObject) pdf.resolve(page.getContents());
|
|
||||||
if (pobj instanceof PjArray)
|
|
||||||
{
|
|
||||||
PjArray array = (PjArray) pobj;
|
|
||||||
Vector vArray = array.getVector();
|
|
||||||
int size = vArray.size();
|
|
||||||
for (int j = 0; j < size; j++)
|
|
||||||
{
|
|
||||||
stream = (PjStream) pdf.resolve((PjObject) vArray.get(j));
|
|
||||||
strbf.append(getStringFromPjStream(stream));
|
|
||||||
}
|
|
||||||
content = strbf.toString();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
stream = (PjStream) pobj;
|
|
||||||
content = getStringFromPjStream(stream);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (InvalidPdfObjectException pdfe)
|
|
||||||
{
|
|
||||||
cat.error("Invalid PDF Object:" + pdfe, pdfe);
|
|
||||||
}
|
|
||||||
catch (Exception e)
|
|
||||||
{
|
|
||||||
cat.error("Exception in getContent() " + e, e);
|
|
||||||
}
|
|
||||||
return content;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String getStringFromPjStream(PjStream stream)
|
|
||||||
{
|
|
||||||
StringBuffer strbf = new StringBuffer();
|
|
||||||
try
|
|
||||||
{
|
|
||||||
int start,end = 0;
|
|
||||||
stream = stream.flateDecompress();
|
|
||||||
String longString = stream.toString();
|
|
||||||
int strlen = longString.length();
|
|
||||||
int lastIndex = longString.lastIndexOf(')');
|
|
||||||
while (lastIndex != -1 && end != lastIndex)
|
|
||||||
{
|
|
||||||
start = longString.indexOf('(', end);
|
|
||||||
end = longString.indexOf(')', start);
|
|
||||||
String text = longString.substring(start + 1, end);
|
|
||||||
strbf.append(text);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (InvalidPdfObjectException pdfe)
|
|
||||||
{
|
|
||||||
cat.error("InvalidObjectException:" + pdfe.getMessage(), pdfe);
|
|
||||||
}
|
|
||||||
return strbf.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue