From 38688edbaaf034f7b831299011fdb8976eef2cbe Mon Sep 17 00:00:00 2001
From: Noble Paul
Date: Mon, 14 Sep 2009 12:39:53 +0000
Subject: [PATCH] javadocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@814618 13f79535-47bb-0310-9956-ffa450edef68
---
.../handler/dataimport/XPathRecordReader.java | 83 +++++++++++++++++--
1 file changed, 78 insertions(+), 5 deletions(-)
diff --git a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
index 026c7001683..ceb2e2984bc 100644
--- a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
+++ b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
@@ -31,16 +31,33 @@ import java.util.regex.Pattern;
* A streaming xpath parser which uses StAX for XML parsing. It supports only a
* subset of xpath syntax.
*
+ * /a/b/subject[@qualifier='fullTitle']
+ * /a/b/subject/@qualifier
+ * /a/b/c
+ *
+ * Keep in mind that the wild-card syntax '//' is not supported
+ *
*
* This API is experimental and may change in the future.
+ * This class is thread-safe for parsing xml . But adding fields is not thread-safe. The recommended usage is
+ * to addField() in one thread and then share the instance across threads.
*
* @version $Id$
* @since solr 1.3
*/
public class XPathRecordReader {
private Node rootNode = new Node("/", null);
+ /**Use this flag in the addField() method to fetch all the cdata under a specific tag
+ *
+ */
public static final int FLATTEN = 1;
+ /**
+ * @param forEachXpath The XPATH for which a record is emitted. At the start of this xpath tag, it starts collecting the fields and at the close
+ * of the tag ,a record is emitted and the fields collected since the tag start is included in the record. If there
+ * are fields collected in the parent tag(s) they also will be included in the record but not cleared after emitting the record.
+ * It can use the ' | ' syntax of XPATH to pass in multiple xpaths.
+ */
public XPathRecordReader(String forEachXpath) {
String[] splits = forEachXpath.split("\\|");
for (String split : splits) {
@@ -58,6 +75,12 @@ public class XPathRecordReader {
return this;
}
+ /**Add a field's XPATH and its name.
+ * @param name . The name by which this field is referred in the emitted record
+ * @param xpath . The xpath to this field
+ * @param multiValued . If this is 'true' , then the emitted record will have a List as value
+ * @param flags . The only supported flag is 'FLATTEN'
+ */
public synchronized XPathRecordReader addField(String name, String xpath, boolean multiValued, int flags) {
if (!xpath.startsWith("/"))
throw new RuntimeException("xpath must start with '/' : " + xpath);
@@ -83,6 +106,10 @@ public class XPathRecordReader {
return results;
}
+ /** Stream records as and when they are colected
+ * @param r The reader
+ * @param handler The callback instance
+ */
public void streamRecords(Reader r, Handler handler) {
try {
XMLStreamReader parser = factory.createXMLStreamReader(r);
@@ -93,13 +120,26 @@ public class XPathRecordReader {
}
}
+ /**For each node/leaf in the tree there is one object of this class
+ */
private class Node {
- String name, fieldName, xpathName, forEachPath;
-
- List attributes, childNodes;
+ /**name of the tag/attribute*/
+ String name;
+ /**The field name as passed in the addField() . This will be used in the record*/
+ String fieldName;
+ /**stores the xpath name such as '@attr='xyz'*/
+ String xpathName;
+ /**The xpath of the record. if this is a record node */
+ String forEachPath;
+ /**child attribute nodes */
+ List attributes;
+ /**child nodes*/
+ List childNodes;
+ /**if attribs are used in the xpath their names and values*/
List> attribAndValues;
+ /**Parent node of this node */
Node parent;
boolean hasText = false, multiValued = false, isRecord = false;
@@ -117,6 +157,8 @@ public class XPathRecordReader {
this.multiValued = multiValued;
}
+ /**This is the method where all the parsing happens. For each tag/subtag this gets called recursively.
+ */
private void parse(XMLStreamReader parser, Handler handler,
Map values, Stack> stack,
boolean recordStarted) throws IOException, XMLStreamException {
@@ -203,7 +245,8 @@ public class XPathRecordReader {
}
}
} finally {
-
+ /*If a record has ended (tag closed) then clearup all the fields found
+ in this record after this tag started */
Set cleanThis = null;
if (isRecord || !recordStarted) {
cleanThis = stack.pop();
@@ -218,6 +261,9 @@ public class XPathRecordReader {
}
}
+ /**if a new tag is encountered, check if it is of interest of not (if there is a matching child Node).
+ * if yes continue parsing else skip
+ */
private void handleStartElement(XMLStreamReader parser, Set childrenFound,
Handler handler, Map values,
Stack> stack, boolean recordStarted)
@@ -231,6 +277,8 @@ public class XPathRecordReader {
}
}
+ /**check if the current tag is to be parsed or not. if yes return the Node object
+ */
private Node getMatchingChild(XMLStreamReader parser) {
if (childNodes == null)
return null;
@@ -259,6 +307,9 @@ public class XPathRecordReader {
return true;
}
+ /**If there is no value available for a field in a subtag then add a null
+ * TODO : needs better explanation
+ */
private void putNulls(Map values) {
if (attributes != null) {
for (Node n : attributes) {
@@ -274,6 +325,8 @@ public class XPathRecordReader {
}
}
+ /**Handle multivalued fields by adding List
+ */
@SuppressWarnings("unchecked")
private void putText(Map values, String value,
String fieldName, boolean multiValued) {
@@ -289,6 +342,8 @@ public class XPathRecordReader {
}
}
+ /**Skip a tag w/o processing the tag or its subtags
+ */
private void skipTag(XMLStreamReader parser) throws IOException,
XMLStreamException {
int type;
@@ -298,7 +353,14 @@ public class XPathRecordReader {
}
}
- public void build(List paths, String fieldName,
+ /**Build the node structure from the xpath
+ * @param paths the xpaths split by '/'
+ * @param fieldName name of the field
+ * @param multiValued . is multiValued or not
+ * @param record is this xpath a record or a field
+ * @param flags extra flags
+ */
+ private void build(List paths, String fieldName,
boolean multiValued, boolean record, int flags) {
String name = paths.remove(0);
if (paths.isEmpty() && name.startsWith("@")) {
@@ -355,6 +417,8 @@ public class XPathRecordReader {
}
}
+ /**If a field has List then they have to be deep-copied for thread safety
+ */
private Map getDeepCopy(Map values) {
Map result = new HashMap();
for (Map.Entry entry : values.entrySet()) {
@@ -397,7 +461,16 @@ public class XPathRecordReader {
factory.setProperty(XMLInputFactory.SUPPORT_DTD , Boolean.FALSE);
}
+ /**Implement this interface to stream records as and when it is found.
+ *
+ */
public static interface Handler {
+ /**
+ * @param record The record map . The key is the field name as provided in the addField() methods. The value
+ * can be a single String (for single valued) or a List (for multiValued)
+ * if an Exception is thrown from this method the parsing will be aborted
+ * @param xpath . The forEach XPATH for which this record is being emitted
+ */
public void handle(Map record, String xpath);
}