mirror of https://github.com/apache/lucene.git
build and code cleanup
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150920 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0686b22b3c
commit
da02ffa7b0
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<project name="xml" default="default">
|
||||
|
||||
<description>
|
||||
Example of Lucene XML indexing
|
||||
</description>
|
||||
|
||||
<import file="../common.xml"/>
|
||||
</project>
|
|
@ -10,49 +10,38 @@ import java.io.File;
|
|||
/**
|
||||
*
|
||||
*/
|
||||
public class XMLDocumentHandlerDOM
|
||||
{
|
||||
public org.apache.lucene.document.Document createXMLDocument(File f)
|
||||
{
|
||||
org.apache.lucene.document.Document document = new org.apache.lucene.document.Document();
|
||||
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
||||
try
|
||||
{
|
||||
DocumentBuilder df = dbf.newDocumentBuilder();
|
||||
org.w3c.dom.Document d = df.parse(f);
|
||||
Node root = d.getDocumentElement();
|
||||
traverseTree(root, document);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
System.out.println("error: " + e);
|
||||
e.printStackTrace();
|
||||
}
|
||||
return document;
|
||||
public class XMLDocumentHandlerDOM {
|
||||
public org.apache.lucene.document.Document createXMLDocument(File f) {
|
||||
org.apache.lucene.document.Document document = new org.apache.lucene.document.Document();
|
||||
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
||||
try {
|
||||
DocumentBuilder df = dbf.newDocumentBuilder();
|
||||
org.w3c.dom.Document d = df.parse(f);
|
||||
Node root = d.getDocumentElement();
|
||||
traverseTree(root, document);
|
||||
} catch (Exception e) {
|
||||
System.out.println("error: " + e);
|
||||
e.printStackTrace();
|
||||
}
|
||||
return document;
|
||||
}
|
||||
|
||||
static private void traverseTree(Node node, org.apache.lucene.document.Document document)
|
||||
{
|
||||
NodeList nl = node.getChildNodes();
|
||||
if (nl.getLength() == 0)
|
||||
{
|
||||
if (node.getNodeType() == Node.TEXT_NODE)
|
||||
{
|
||||
Node parentNode = node.getParentNode();
|
||||
if (parentNode.getNodeType() == Node.ELEMENT_NODE)
|
||||
{
|
||||
String parentNodeName = parentNode.getNodeName();
|
||||
static private void traverseTree(Node node, org.apache.lucene.document.Document document) {
|
||||
NodeList nl = node.getChildNodes();
|
||||
if (nl.getLength() == 0) {
|
||||
if (node.getNodeType() == Node.TEXT_NODE) {
|
||||
Node parentNode = node.getParentNode();
|
||||
if (parentNode.getNodeType() == Node.ELEMENT_NODE) {
|
||||
// String parentNodeName = parentNode.getNodeName();
|
||||
// String nodeValue = node.getNodeValue();
|
||||
// if (parentNodeName.equals("name"))
|
||||
// {
|
||||
Node siblingNode = node.getNextSibling();
|
||||
if (siblingNode != null)
|
||||
{
|
||||
if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
|
||||
{
|
||||
document.add(Field.Text("name", siblingNode.getNodeValue()));
|
||||
}
|
||||
}
|
||||
Node siblingNode = node.getNextSibling();
|
||||
if (siblingNode != null) {
|
||||
if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) {
|
||||
document.add(Field.Text("name", siblingNode.getNodeValue()));
|
||||
}
|
||||
}
|
||||
// }
|
||||
// else if (parentNodeName.equals("profession"))
|
||||
// {
|
||||
|
@ -131,15 +120,12 @@ public class XMLDocumentHandlerDOM
|
|||
// }
|
||||
// }
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i=0; i<nl.getLength(); i++)
|
||||
{
|
||||
traverseTree(nl.item(i), document);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < nl.getLength(); i++) {
|
||||
traverseTree(nl.item(i), document);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
package org.apache.lucenesandbox.xmlindexingdemo;
|
||||
|
||||
import org.xml.sax.*;
|
||||
import org.xml.sax.helpers.*;
|
||||
import org.xml.sax.AttributeList;
|
||||
import javax.xml.parsers.*;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -12,51 +10,44 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
|
||||
public class XMLDocumentHandlerSAX
|
||||
extends HandlerBase
|
||||
{
|
||||
/** A buffer for each XML element */
|
||||
private StringBuffer elementBuffer = new StringBuffer();
|
||||
extends HandlerBase {
|
||||
/** A buffer for each XML element */
|
||||
private StringBuffer elementBuffer = new StringBuffer();
|
||||
|
||||
private Document mDocument;
|
||||
private Document mDocument;
|
||||
|
||||
// constructor
|
||||
public XMLDocumentHandlerSAX(File xmlFile)
|
||||
throws ParserConfigurationException, SAXException, IOException
|
||||
{
|
||||
SAXParserFactory spf = SAXParserFactory.newInstance();
|
||||
// constructor
|
||||
public XMLDocumentHandlerSAX(File xmlFile)
|
||||
throws ParserConfigurationException, SAXException, IOException {
|
||||
SAXParserFactory spf = SAXParserFactory.newInstance();
|
||||
|
||||
SAXParser parser = spf.newSAXParser();
|
||||
parser.parse(xmlFile, this);
|
||||
}
|
||||
SAXParser parser = spf.newSAXParser();
|
||||
parser.parse(xmlFile, this);
|
||||
}
|
||||
|
||||
// call at document start
|
||||
public void startDocument()
|
||||
{
|
||||
mDocument = new Document();
|
||||
}
|
||||
// call at document start
|
||||
public void startDocument() {
|
||||
mDocument = new Document();
|
||||
}
|
||||
|
||||
// call at element start
|
||||
public void startElement(String localName, AttributeList atts)
|
||||
throws SAXException
|
||||
{
|
||||
elementBuffer.setLength(0);
|
||||
}
|
||||
// call at element start
|
||||
public void startElement(String localName, AttributeList atts)
|
||||
throws SAXException {
|
||||
elementBuffer.setLength(0);
|
||||
}
|
||||
|
||||
// call when cdata found
|
||||
public void characters(char[] text, int start, int length)
|
||||
{
|
||||
elementBuffer.append(text, start, length);
|
||||
}
|
||||
// call when cdata found
|
||||
public void characters(char[] text, int start, int length) {
|
||||
elementBuffer.append(text, start, length);
|
||||
}
|
||||
|
||||
// call at element end
|
||||
public void endElement(String localName)
|
||||
throws SAXException
|
||||
{
|
||||
mDocument.add(Field.Text(localName, elementBuffer.toString()));
|
||||
}
|
||||
// call at element end
|
||||
public void endElement(String localName)
|
||||
throws SAXException {
|
||||
mDocument.add(Field.Text(localName, elementBuffer.toString()));
|
||||
}
|
||||
|
||||
public Document getDocument()
|
||||
{
|
||||
return mDocument;
|
||||
}
|
||||
public Document getDocument() {
|
||||
return mDocument;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue