From 38688edbaaf034f7b831299011fdb8976eef2cbe Mon Sep 17 00:00:00 2001 From: Noble Paul Date: Mon, 14 Sep 2009 12:39:53 +0000 Subject: [PATCH] javadocs git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@814618 13f79535-47bb-0310-9956-ffa450edef68 --- .../handler/dataimport/XPathRecordReader.java | 83 +++++++++++++++++-- 1 file changed, 78 insertions(+), 5 deletions(-) diff --git a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java index 026c7001683..ceb2e2984bc 100644 --- a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java +++ b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java @@ -31,16 +31,33 @@ import java.util.regex.Pattern; * A streaming xpath parser which uses StAX for XML parsing. It supports only a * subset of xpath syntax. *

+ * /a/b/subject[@qualifier='fullTitle'] + * /a/b/subject/@qualifier + * /a/b/c + * + * Keep in mind that the wild-card syntax '//' is not supported + * *

* This API is experimental and may change in the future. + * This class is thread-safe for parsing xml . But adding fields is not thread-safe. The recommended usage is + * to addField() in one thread and then share the instance across threads. * * @version $Id$ * @since solr 1.3 */ public class XPathRecordReader { private Node rootNode = new Node("/", null); + /**Use this flag in the addField() method to fetch all the cdata under a specific tag + * + */ public static final int FLATTEN = 1; + /** + * @param forEachXpath The XPATH for which a record is emitted. At the start of this xpath tag, it starts collecting the fields and at the close + * of the tag ,a record is emitted and the fields collected since the tag start is included in the record. If there + * are fields collected in the parent tag(s) they also will be included in the record but not cleared after emitting the record. + * It can use the ' | ' syntax of XPATH to pass in multiple xpaths. + */ public XPathRecordReader(String forEachXpath) { String[] splits = forEachXpath.split("\\|"); for (String split : splits) { @@ -58,6 +75,12 @@ public class XPathRecordReader { return this; } + /**Add a field's XPATH and its name. + * @param name . The name by which this field is referred in the emitted record + * @param xpath . The xpath to this field + * @param multiValued . If this is 'true' , then the emitted record will have a List as value + * @param flags . The only supported flag is 'FLATTEN' + */ public synchronized XPathRecordReader addField(String name, String xpath, boolean multiValued, int flags) { if (!xpath.startsWith("/")) throw new RuntimeException("xpath must start with '/' : " + xpath); @@ -83,6 +106,10 @@ public class XPathRecordReader { return results; } + /** Stream records as and when they are colected + * @param r The reader + * @param handler The callback instance + */ public void streamRecords(Reader r, Handler handler) { try { XMLStreamReader parser = factory.createXMLStreamReader(r); @@ -93,13 +120,26 @@ public class XPathRecordReader { } } + /**For each node/leaf in the tree there is one object of this class + */ private class Node { - String name, fieldName, xpathName, forEachPath; - - List attributes, childNodes; + /**name of the tag/attribute*/ + String name; + /**The field name as passed in the addField() . This will be used in the record*/ + String fieldName; + /**stores the xpath name such as '@attr='xyz'*/ + String xpathName; + /**The xpath of the record. if this is a record node */ + String forEachPath; + /**child attribute nodes */ + List attributes; + /**child nodes*/ + List childNodes; + /**if attribs are used in the xpath their names and values*/ List> attribAndValues; + /**Parent node of this node */ Node parent; boolean hasText = false, multiValued = false, isRecord = false; @@ -117,6 +157,8 @@ public class XPathRecordReader { this.multiValued = multiValued; } + /**This is the method where all the parsing happens. For each tag/subtag this gets called recursively. + */ private void parse(XMLStreamReader parser, Handler handler, Map values, Stack> stack, boolean recordStarted) throws IOException, XMLStreamException { @@ -203,7 +245,8 @@ public class XPathRecordReader { } } } finally { - + /*If a record has ended (tag closed) then clearup all the fields found + in this record after this tag started */ Set cleanThis = null; if (isRecord || !recordStarted) { cleanThis = stack.pop(); @@ -218,6 +261,9 @@ public class XPathRecordReader { } } + /**if a new tag is encountered, check if it is of interest of not (if there is a matching child Node). + * if yes continue parsing else skip + */ private void handleStartElement(XMLStreamReader parser, Set childrenFound, Handler handler, Map values, Stack> stack, boolean recordStarted) @@ -231,6 +277,8 @@ public class XPathRecordReader { } } + /**check if the current tag is to be parsed or not. if yes return the Node object + */ private Node getMatchingChild(XMLStreamReader parser) { if (childNodes == null) return null; @@ -259,6 +307,9 @@ public class XPathRecordReader { return true; } + /**If there is no value available for a field in a subtag then add a null + * TODO : needs better explanation + */ private void putNulls(Map values) { if (attributes != null) { for (Node n : attributes) { @@ -274,6 +325,8 @@ public class XPathRecordReader { } } + /**Handle multivalued fields by adding List + */ @SuppressWarnings("unchecked") private void putText(Map values, String value, String fieldName, boolean multiValued) { @@ -289,6 +342,8 @@ public class XPathRecordReader { } } + /**Skip a tag w/o processing the tag or its subtags + */ private void skipTag(XMLStreamReader parser) throws IOException, XMLStreamException { int type; @@ -298,7 +353,14 @@ public class XPathRecordReader { } } - public void build(List paths, String fieldName, + /**Build the node structure from the xpath + * @param paths the xpaths split by '/' + * @param fieldName name of the field + * @param multiValued . is multiValued or not + * @param record is this xpath a record or a field + * @param flags extra flags + */ + private void build(List paths, String fieldName, boolean multiValued, boolean record, int flags) { String name = paths.remove(0); if (paths.isEmpty() && name.startsWith("@")) { @@ -355,6 +417,8 @@ public class XPathRecordReader { } } + /**If a field has List then they have to be deep-copied for thread safety + */ private Map getDeepCopy(Map values) { Map result = new HashMap(); for (Map.Entry entry : values.entrySet()) { @@ -397,7 +461,16 @@ public class XPathRecordReader { factory.setProperty(XMLInputFactory.SUPPORT_DTD , Boolean.FALSE); } + /**Implement this interface to stream records as and when it is found. + * + */ public static interface Handler { + /** + * @param record The record map . The key is the field name as provided in the addField() methods. The value + * can be a single String (for single valued) or a List (for multiValued) + * if an Exception is thrown from this method the parsing will be aborted + * @param xpath . The forEach XPATH for which this record is being emitted + */ public void handle(Map record, String xpath); }