build and code cleanup

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150920 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erik Hatcher 2004-01-11 14:12:19 +00:00
parent 0686b22b3c
commit da02ffa7b0
3 changed files with 75 additions and 88 deletions

View File

@ -0,0 +1,10 @@
<?xml version="1.0"?>
<project name="xml" default="default">
<description>
Example of Lucene XML indexing
</description>
<import file="../common.xml"/>
</project>

View File

@ -10,49 +10,38 @@ import java.io.File;
/**
*
*/
public class XMLDocumentHandlerDOM
{
public org.apache.lucene.document.Document createXMLDocument(File f)
{
org.apache.lucene.document.Document document = new org.apache.lucene.document.Document();
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try
{
DocumentBuilder df = dbf.newDocumentBuilder();
org.w3c.dom.Document d = df.parse(f);
Node root = d.getDocumentElement();
traverseTree(root, document);
}
catch (Exception e)
{
System.out.println("error: " + e);
e.printStackTrace();
}
return document;
public class XMLDocumentHandlerDOM {
public org.apache.lucene.document.Document createXMLDocument(File f) {
org.apache.lucene.document.Document document = new org.apache.lucene.document.Document();
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder df = dbf.newDocumentBuilder();
org.w3c.dom.Document d = df.parse(f);
Node root = d.getDocumentElement();
traverseTree(root, document);
} catch (Exception e) {
System.out.println("error: " + e);
e.printStackTrace();
}
return document;
}
static private void traverseTree(Node node, org.apache.lucene.document.Document document)
{
NodeList nl = node.getChildNodes();
if (nl.getLength() == 0)
{
if (node.getNodeType() == Node.TEXT_NODE)
{
Node parentNode = node.getParentNode();
if (parentNode.getNodeType() == Node.ELEMENT_NODE)
{
String parentNodeName = parentNode.getNodeName();
static private void traverseTree(Node node, org.apache.lucene.document.Document document) {
NodeList nl = node.getChildNodes();
if (nl.getLength() == 0) {
if (node.getNodeType() == Node.TEXT_NODE) {
Node parentNode = node.getParentNode();
if (parentNode.getNodeType() == Node.ELEMENT_NODE) {
// String parentNodeName = parentNode.getNodeName();
// String nodeValue = node.getNodeValue();
// if (parentNodeName.equals("name"))
// {
Node siblingNode = node.getNextSibling();
if (siblingNode != null)
{
if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
{
document.add(Field.Text("name", siblingNode.getNodeValue()));
}
}
Node siblingNode = node.getNextSibling();
if (siblingNode != null) {
if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) {
document.add(Field.Text("name", siblingNode.getNodeValue()));
}
}
// }
// else if (parentNodeName.equals("profession"))
// {
@ -131,15 +120,12 @@ public class XMLDocumentHandlerDOM
// }
// }
// }
}
}
}
else
{
for(int i=0; i<nl.getLength(); i++)
{
traverseTree(nl.item(i), document);
}
}
}
} else {
for (int i = 0; i < nl.getLength(); i++) {
traverseTree(nl.item(i), document);
}
}
}
}

View File

@ -1,8 +1,6 @@
package org.apache.lucenesandbox.xmlindexingdemo;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import org.xml.sax.AttributeList;
import javax.xml.parsers.*;
import org.apache.lucene.document.Document;
@ -12,51 +10,44 @@ import java.io.File;
import java.io.IOException;
public class XMLDocumentHandlerSAX
extends HandlerBase
{
/** A buffer for each XML element */
private StringBuffer elementBuffer = new StringBuffer();
extends HandlerBase {
/** A buffer for each XML element */
private StringBuffer elementBuffer = new StringBuffer();
private Document mDocument;
private Document mDocument;
// constructor
public XMLDocumentHandlerSAX(File xmlFile)
throws ParserConfigurationException, SAXException, IOException
{
SAXParserFactory spf = SAXParserFactory.newInstance();
// constructor
public XMLDocumentHandlerSAX(File xmlFile)
throws ParserConfigurationException, SAXException, IOException {
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser parser = spf.newSAXParser();
parser.parse(xmlFile, this);
}
SAXParser parser = spf.newSAXParser();
parser.parse(xmlFile, this);
}
// call at document start
public void startDocument()
{
mDocument = new Document();
}
// call at document start
public void startDocument() {
mDocument = new Document();
}
// call at element start
public void startElement(String localName, AttributeList atts)
throws SAXException
{
elementBuffer.setLength(0);
}
// call at element start
public void startElement(String localName, AttributeList atts)
throws SAXException {
elementBuffer.setLength(0);
}
// call when cdata found
public void characters(char[] text, int start, int length)
{
elementBuffer.append(text, start, length);
}
// call when cdata found
public void characters(char[] text, int start, int length) {
elementBuffer.append(text, start, length);
}
// call at element end
public void endElement(String localName)
throws SAXException
{
mDocument.add(Field.Text(localName, elementBuffer.toString()));
}
// call at element end
public void endElement(String localName)
throws SAXException {
mDocument.add(Field.Text(localName, elementBuffer.toString()));
}
public Document getDocument()
{
return mDocument;
}
public Document getDocument() {
return mDocument;
}
}