SOLR-6304 JsonLoader should be able to flatten an input JSON to multiple docs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1617287 13f79535-47bb-0310-9956-ffa450edef68
2014-08-11 14:07:38 +00:00 · 2014-08-11 14:07:38 +00:00 · 6caeac6c2c
parent 4c4e56c902
commit 6caeac6c2c
4 changed files with 768 additions and 4 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -175,6 +175,9 @@ New Features

 * SOLR-6283: Add support for Interval Faceting in SolrJ. (Tomás Fernández Löbbe)

+* SOLR-6304 : JsonLoader should be able to flatten an input JSON to multiple docs (Noble Paul)
+
+
 Bug Fixes
 ----------------------

--- a/solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
+++ b/solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
@ -20,6 +20,7 @@ import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@ -28,6 +29,7 @@ import java.util.Map;
 import org.apache.commons.io.IOUtils;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.UpdateParams;
+import org.apache.solr.common.util.JsonRecordReader;
 import org.noggit.JSONParser;
 import org.noggit.ObjectBuilder;
 import org.apache.solr.common.SolrException;
@ -63,7 +65,7 @@ public class JsonLoader extends ContentStreamLoader {
  @Override
  public void load(SolrQueryRequest req, SolrQueryResponse rsp,
      ContentStream stream, UpdateRequestProcessor processor) throws Exception {
-    new SingleThreadedJsonLoader(req,processor).load(req, rsp, stream, processor);
+    new SingleThreadedJsonLoader(req,rsp,processor).load(req, rsp, stream, processor);
  }

  
@ -71,14 +73,16 @@ public class JsonLoader extends ContentStreamLoader {
    
    protected final UpdateRequestProcessor processor;
    protected final SolrQueryRequest req;
+    protected SolrQueryResponse rsp;
    protected JSONParser parser;
    protected final int commitWithin;
    protected final boolean overwrite;
  
-    public SingleThreadedJsonLoader(SolrQueryRequest req, UpdateRequestProcessor processor) {
+    public SingleThreadedJsonLoader(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor processor) {
      this.processor = processor;
      this.req = req;
-  
+      this.rsp = rsp;
+
      commitWithin = req.getParams().getInt(UpdateParams.COMMIT_WITHIN, -1);
      overwrite = req.getParams().getBool(UpdateParams.OVERWRITE, true);  
    }
@ -110,7 +114,13 @@ public class JsonLoader extends ContentStreamLoader {
    void processUpdate() throws IOException
    {
      if("false".equals( req.getParams().get("json.command"))){
-        handleStreamingSingleDocs();
+
+        String split = req.getParams().get("split");
+        if(split != null){
+          handleSplitMode(split);
+        } else {
+          handleStreamingSingleDocs();
+        }
        return;
      }
      int ev = parser.nextEvent();
@ -182,6 +192,39 @@ public class JsonLoader extends ContentStreamLoader {
      }
    }

+    private void handleSplitMode(String split) throws IOException {
+      String[] fields = req.getParams().getParams("f");
+      req.getCore().getLatestSchema().getDefaultSearchFieldName();
+      final boolean echo = "true".equals( req.getParams().get("echo"));
+      JsonRecordReader jsonRecordReader = JsonRecordReader.getInst(split, Arrays.asList(fields));
+      jsonRecordReader.streamRecords(parser,new JsonRecordReader.Handler() {
+        ArrayList docs =null;
+        @Override
+        public void handle(Map<String, Object> record, String path) {
+          if(echo){
+            if(docs ==null) {
+              docs = new ArrayList();
+              rsp.add("docs",docs);
+            }
+            docs.add(record);
+          } else {
+            AddUpdateCommand cmd = new AddUpdateCommand(req);
+            cmd.commitWithin = commitWithin;
+            cmd.overwrite = overwrite;
+            cmd.solrDoc =  new SolrInputDocument();
+            for (Map.Entry<String, Object> entry : record.entrySet()) {
+              cmd.solrDoc.setField(entry.getKey(),entry.getValue());
+            }
+            try {
+              processor.processAdd(cmd);
+            } catch (IOException e) {
+              throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "error inserting doc",e);
+            }
+          }
+        }
+      });
+    }
+
    private void handleStreamingSingleDocs() throws IOException
    {
      while( true ) {
--- a/solr/solrj/src/java/org/apache/solr/common/util/JsonRecordReader.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/JsonRecordReader.java
@ -0,0 +1,559 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.common.util;
+
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.*;
+
+import org.noggit.JSONParser;
+
+import static org.noggit.JSONParser.*;
+
+/**A Streaming parser for json to emit one record at a time.
+ */
+
+public class JsonRecordReader {
+
+  private Node rootNode = new Node("/", (Node)null);
+
+  public static JsonRecordReader getInst(String split, List<String> fieldMappings) {
+
+    JsonRecordReader jsonRecordReader = new JsonRecordReader(split);
+    for (String s : fieldMappings) {
+      String path = s;
+      int idx = s.indexOf(':');
+      String fieldName = null;
+      if(idx >0) {
+        fieldName = s.substring(0, idx);
+        path = s.substring(idx + 1);
+      }
+      jsonRecordReader.addField(path, fieldName, true, false);
+    }
+    return jsonRecordReader;
+  }
+
+  /**
+   * A constructor called with a '|' separated list of path expressions
+   * which define sub sections of the JSON stream that are to be emitted as
+   * separate records.
+   *
+   * @param splitPath The PATH for which a record is emitted. Once the
+   *                  path tag is encountered, the Node.getInst method starts collecting wanted
+   *                  fields and at the close of the tag, a record is emitted containing all
+   *                  fields collected since the tag start. Once
+   *                  emitted the collected fields are cleared. Any fields collected in the
+   *                  parent tag or above will also be included in the record, but these are
+   *                  not cleared after emitting the record.
+   *                  <p/>
+   *                  It uses the ' | ' syntax of PATH to pass in multiple paths.
+   */
+  private JsonRecordReader(String splitPath) {
+    String[] splits = splitPath.split("\\|");
+    for (String split : splits) {
+      split = split.trim();
+      if (split.startsWith("//"))
+        throw new RuntimeException("split cannot start with '//': " + split);
+      if (split.length() == 0)
+        continue;
+      // The created Node has a name set to the full split attribute path
+      addField(split, split, false, true);
+    }
+  }
+
+  /**
+   * Splits the path into a List of segments and calls build() to
+   * construct a tree of Nodes representing path segments. The resulting
+   * tree structure ends up describing all the paths we are interested in.
+   *
+   * @param path       The path expression for this field
+   * @param fieldName        The name for this field in the emitted record
+   * @param multiValued If 'true' then the emitted record will have values in
+   *                    a List&lt;String&gt;
+   * @param isRecord    Flags that this PATH is from a forEach statement
+   */
+  private void addField(String path, String fieldName, boolean multiValued, boolean isRecord) {
+    if(!path.startsWith("/")) throw new RuntimeException("All paths must start with '/' "+ path);
+    List<String> paths = splitEscapeQuote(path);
+    if(paths.size() ==0) {
+      if(isRecord) rootNode.isRecord=true;
+      return;//the patrh is "/"
+    }
+    // deal with how split behaves when seperator starts a string!
+    if ("".equals(paths.get(0).trim()))
+      paths.remove(0);
+    rootNode.build(paths, fieldName, multiValued, isRecord, path);
+//    rootNode.buildOptimise(null);
+  }
+
+  /**
+   * Uses {@link #streamRecords streamRecords} to getInst the JSON source but with
+   * a handler that collects all the emitted records into a single List which
+   * is returned upon completion.
+   *
+   * @param r the stream reader
+   * @return results a List of emitted records
+   */
+  public List<Map<String, Object>> getAllRecords(Reader r) throws IOException {
+    final List<Map<String, Object>> results = new ArrayList<>();
+    streamRecords(r, new Handler() {
+      @Override
+      public void handle(Map<String, Object> record, String path) {
+        results.add(record);
+      }
+    });
+    return results;
+  }
+
+  /**
+   * Creates an JSONParser on top of whatever reader has been
+   * configured. Then calls getInst() with a handler which is
+   * invoked forEach record emitted.
+   *
+   * @param r       the stream reader
+   * @param handler The callback instance
+   */
+  public void streamRecords(Reader r, Handler handler) throws IOException {
+    streamRecords(new JSONParser(r), handler);
+  }
+
+  public void streamRecords(JSONParser parser, Handler handler) throws IOException {
+    rootNode.parse(parser, handler,
+        new LinkedHashMap<String, Object>(),
+        new Stack<Set<String>>(), false);
+  }
+
+
+  /**
+   * For each node/leaf in the Node tree there is one object of this class.
+   * This tree of objects represents all the Paths we are interested in.
+   * For each path segment of interest we create a node. In most cases the
+   * node (branch) is rather basic , but for the final portion (leaf) of any
+   * path we add more information to the Node. When parsing the JSON document
+   * we step though this tree as we stream records from the reader. If the JSON
+   * document departs from this tree we skip start tags till we are back on
+   * the tree.
+   */
+  private static class Node {
+    String name;      // generally: segment of the path represented by this Node
+    String fieldName; // the fieldname in the emitted record (key of the map)
+    String splitPath; // the full path from the forEach entity attribute
+    final LinkedHashMap<String ,Node> childNodes = new LinkedHashMap<>(); // List of immediate child Nodes of this node
+    List<Node> wildCardNodes; // List of '//' style decendants of this Node
+    Node wildAncestor; // ancestor Node containing '//' style decendants
+    Node parent; // parent Node in the tree
+    boolean isLeaf = false; // flag: store/emit streamed text for this node
+    boolean isRecord = false; //flag: this Node starts a new record
+    Node wildCardChild ;
+    Node recursiveWildCardChild;
+
+
+    public Node(String name, Node p) {
+      // Create a basic Node, suitable for the mid portions of any path.
+      // Node.pathName and Node.name are set to same value.
+      this.name = name;
+      parent = p;
+    }
+
+    public Node(String name, String fieldName) {
+      // This is only called from build() when describing an attribute.
+      this.name = name;               // a segment from the path
+      this.fieldName = fieldName;     // name to store collected values against
+    }
+
+
+    /**
+     * Walk the Node tree propagating any wildDescentant information to
+     * child nodes. This allows us to optimise the performance of the
+     * main getInst method.
+     */
+    private void buildOptimise(Node wa) {
+      wildAncestor = wa;
+      if (wildCardNodes != null) wa = this;
+      if (childNodes != null)
+        for (Node n : childNodes.values()) n.buildOptimise(wa);
+    }
+    static final String WILDCARD_PATH = "*";
+    static final String RECURSIVE_WILDCARD_PATH = "**";
+
+    /**
+     * Build a Node tree structure representing all paths of intrest to us.
+     * This must be done before parsing of the JSON stream starts. Each node
+     * holds one portion of an path. Taking each path segment in turn this
+     * method walks the Node tree  and finds where the new segment should be
+     * inserted. It creates a Node representing a field's name, PATH and
+     * some flags and inserts the Node into the Node tree.
+     */
+    private void build(
+        List<String> paths,   // a List of segments from the split paths
+        String fieldName,     // the fieldName assoc with this path
+        boolean multiValued,  // flag if this fieldName is multiValued or not
+        boolean record,       // is this path a record or a field
+        String path) {
+      // recursively walk the paths Lists adding new Nodes as required
+      String segment = paths.remove(0); // shift out next path segment
+
+      if(segment.length() < 1) throw new RuntimeException("all pieces in path must be non empty "+path);
+
+      // does this "name" already exist as a child node.
+      Node n = getOrAddNode(segment, childNodes);
+      if (paths.isEmpty()) {
+        // We have emptied paths, we are for the moment a leaf of the tree.
+        // When parsing the actual input we have traversed to a position
+        // where we actutally have to do something. getOrAddNode() will
+        // have created and returned a new minimal Node with name and
+        // pathName already populated. We need to add more information.
+        if (record) {
+          //wild cards cannot be used in split
+          assert !WILDCARD_PATH.equals(n.name);
+          assert !RECURSIVE_WILDCARD_PATH.equals(n.name);
+          // split attribute
+          n.isRecord = true; // flag: split attribute, prepare to emit rec
+          n.splitPath = fieldName; // the full split attribute path
+        } else {
+          if (n.name.equals(WILDCARD_PATH)) {
+            wildCardChild = n;
+          }
+          if (n.name.equals(RECURSIVE_WILDCARD_PATH) ) {
+            recursiveWildCardChild = n.recursiveWildCardChild= n;
+          }
+
+          // path with content we want to store and return
+          n.isLeaf = true;        // we have to store text found here
+          n.fieldName = fieldName; // name to store collected text against
+        }
+      } else {
+        //wildcards must only come at the end
+        if(WILDCARD_PATH.equals(name) || RECURSIVE_WILDCARD_PATH.equals(name)) throw new RuntimeException("wild cards are allowed only in the end "+path) ;
+        // recurse to handle next paths segment
+        n.build(paths, fieldName, multiValued, record, path);
+      }
+    }
+
+    private Node getOrAddNode(String pathName, Map<String,Node> children) {
+      Node n = children.get(pathName);
+      if(n !=null) return n;
+      // new territory! add a new node for this path bitty
+      children.put(pathName, n = new Node(pathName, this));
+      return n;
+    }
+
+    /**
+     * Copies a supplied Map to a new Map which is returned. Used to copy a
+     * records values. If a fields value is a List then they have to be
+     * deep-copied for thread safety
+     */
+    private static Map<String, Object> getDeepCopy(Map<String, Object> values) {
+      Map<String, Object> result = new HashMap<>();
+      for (Map.Entry<String, Object> entry : values.entrySet()) {
+        if (entry.getValue() instanceof List) {
+          result.put(entry.getKey(), new ArrayList((List) entry.getValue()));
+        } else {
+          result.put(entry.getKey(), entry.getValue());
+        }
+      }
+      return result;
+    }
+
+    private void parse(JSONParser parser,
+                       Handler handler,
+                       Map<String, Object> values,
+                       Stack<Set<String>> stack, // lists of values to purge
+                       boolean recordStarted) throws IOException {
+
+      int event = -1;
+      for (; ; ) {
+        event = parser.nextEvent();
+        if(event == EOF) break;
+        if (event == OBJECT_START) {
+          handleObjectStart(parser, new HashSet<Node>(), handler, values, stack, recordStarted);
+        } else if (event == ARRAY_START) {
+          for (; ; ) {
+            event = parser.nextEvent();
+            if (event == ARRAY_END) break;
+            if (event == OBJECT_START) {
+              handleObjectStart(parser, new HashSet<Node>(), handler, values, stack, recordStarted);
+            }
+          }
+        }
+      }
+
+    }
+
+    /**
+     * If a new tag is encountered, check if it is of interest or not by seeing
+     * if it matches against our node tree. If we have deperted from the node
+     * tree then walk back though the tree's ancestor nodes checking to see if
+     * any // expressions exist for the node and compare them against the new
+     * tag. If matched then "jump" to that node, otherwise ignore the tag.
+     * <p/>
+     * Note, the list of // expressions found while walking back up the tree
+     * is chached in the HashMap decends. Then if the new tag is to be skipped,
+     * any inner chil tags are compared against the cache and jumped to if
+     * matched.
+     */
+    private void handleObjectStart(final JSONParser parser, final Set<Node> childrenFound,
+                                   final Handler handler, final Map<String, Object> values,
+                                   final Stack<Set<String>> stack, boolean recordStarted)
+        throws IOException {
+
+      final boolean isRecordStarted = recordStarted || isRecord;
+      Set<String> valuesAddedinThisFrame = null;
+      if (isRecord) {
+        // This Node is a match for an PATH from a forEach attribute,
+        // prepare for the clean up that will occurr when the record
+        // is emitted after its END_ELEMENT is matched
+        valuesAddedinThisFrame = new HashSet<>();
+        stack.push(valuesAddedinThisFrame);
+      } else if (recordStarted) {
+        // This node is a child of some parent which matched against forEach
+        // attribute. Continue to add values to an existing record.
+        valuesAddedinThisFrame = stack.peek();
+      }
+
+      class Wrapper extends MethodFrameWrapper {
+        Wrapper( Node node) {
+          this.node = node;
+        }
+
+        @Override
+        public void walk(int event) throws IOException {
+          if (event == OBJECT_START) {
+            node.handleObjectStart(parser, childrenFound, handler, values, stack, isRecordStarted);
+          } else if (event == ARRAY_START) {
+            for (; ; ) {
+              event = parser.nextEvent();
+              if (event == ARRAY_END) break;
+              if (event == OBJECT_START) {
+                node.handleObjectStart(parser, childrenFound, handler, values, stack, isRecordStarted);
+              }
+            }
+          }
+
+        }
+      }
+
+      try {
+        for (; ; ) {
+          int event = parser.nextEvent();
+          if (event == OBJECT_END) {
+            if (isRecord()) {
+              handler.handle(getDeepCopy(values), splitPath);
+            }
+            return;
+          }
+          assert event == STRING;
+          assert parser.wasKey();
+          String name = parser.getString();
+
+          Node node = childNodes.get(name);
+          if(node == null) node = wildCardChild;
+          if(node == null) node = recursiveWildCardChild;
+
+          if (node != null) {
+            if (node.isLeaf) {//this is a leaf collect data here
+              event = parser.nextEvent();
+              String nameInRecord = node.fieldName == null ? name : node.fieldName;
+              MethodFrameWrapper runnable = null;
+              if(event == OBJECT_START || event == ARRAY_START){
+                if(node.recursiveWildCardChild !=null) runnable = new Wrapper(node);
+              }
+              Object val = parseSingleFieldValue(event, parser, runnable);
+              if(val !=null) {
+                putValue(values, nameInRecord, val);
+                if (isRecordStarted) valuesAddedinThisFrame.add(nameInRecord);
+              }
+
+            } else {
+              event = parser.nextEvent();
+              new Wrapper(node).walk(event);
+            }
+          } else {
+            //this is not something we are interested in  . skip it
+            event = parser.nextEvent();
+            if (event == STRING ||
+                event == LONG ||
+                event == BIGNUMBER ||
+                event == BOOLEAN ||
+                event == NULL) {
+              continue;
+            }
+            if (event == ARRAY_START) {
+              consumeTillMatchingEnd(parser, 0, 1);
+              continue;
+            }
+            if (event == OBJECT_START) {
+              consumeTillMatchingEnd(parser, 1, 0);
+              continue;
+            } else throw new RuntimeException("unexpected token " + event);
+
+          }
+        }
+      } finally {
+        if ((isRecord() || !isRecordStarted) && !stack.empty()) {
+          Set<String> cleanThis = stack.pop();
+          if (cleanThis != null) {
+            for (String fld : cleanThis) {
+              values.remove(fld);
+            }
+          }
+        }
+      }
+    }
+
+    private boolean isRecord() {
+      return isRecord;
+    }
+
+
+    private void putValue(Map<String, Object> values, String fieldName, Object o) {
+      if(o==null) return;
+      Object val = values.get(fieldName);
+      if (val == null) {
+        values.put(fieldName, o);
+        return;
+      }
+      if (val instanceof List) {
+        List list = (List) val;
+        list.add(o);
+        return;
+      }
+      ArrayList l = new ArrayList();
+      l.add(val);
+      l.add(o);
+      values.put(fieldName, l);
+    }
+
+
+    @Override
+    public String toString() {
+      return name;
+    }
+  } // end of class Node
+
+
+  /**
+   * The path is split into segments using the '/' as a seperator. However
+   * this method deals with special cases where there is a slash '/' character
+   * inside the attribute value e.g. x/@html='text/html'. We split by '/' but
+   * then reassemble things were the '/' appears within a quoted sub-string.
+   * <p/>
+   * We have already enforced that the string must begin with a seperator. This
+   * method depends heavily on how split behaves if the string starts with the
+   * seperator or if a sequence of multiple seperator's appear.
+   */
+  private static List<String> splitEscapeQuote(String str) {
+    List<String> result = new LinkedList<>();
+    String[] ss = str.split("/");
+    for (int i = 0; i < ss.length; i++) { // i=1: skip seperator at start of string
+      StringBuilder sb = new StringBuilder();
+      int quoteCount = 0;
+      while (true) {
+        sb.append(ss[i]);
+        for (int j = 0; j < ss[i].length(); j++)
+          if (ss[i].charAt(j) == '\'') quoteCount++;
+        // have we got a split inside quoted sub-string?
+        if ((quoteCount % 2) == 0) break;
+        // yes!; replace the '/' and loop to concat next token
+        i++;
+        sb.append("/");
+      }
+      result.add(sb.toString());
+    }
+    return result;
+  }
+
+
+  /**
+   * Implement this interface to stream records as and when one is found.
+   */
+  public static interface Handler {
+    /**
+     * @param record The record map. The key is the field name as provided in
+     *               the addField() methods. The value can be a single String (for single
+     *               valued fields) or a List&lt;String&gt; (for multiValued).
+     * @param path  The forEach path for which this record is being emitted
+     *               If there is any change all parsing will be aborted and the Exception
+     *               is propagated up
+     */
+    public void handle(Map<String, Object> record, String path);
+  }
+
+  public static Object parseSingleFieldValue(int ev, JSONParser parser, MethodFrameWrapper runnable) throws IOException {
+    switch (ev) {
+      case STRING:
+        return parser.getString();
+      case LONG:
+        return parser.getLong();
+      case NUMBER:
+        return parser.getDouble();
+      case BIGNUMBER:
+        return parser.getNumberChars().toString();
+      case BOOLEAN:
+        return parser.getBoolean();
+      case NULL:
+        parser.getNull();
+        return null;
+      case ARRAY_START:
+        return parseArrayFieldValue(ev, parser,runnable);
+      case  OBJECT_START:
+        if(runnable !=null) {
+          runnable.walk(OBJECT_START);
+          return null;
+        }
+        consumeTillMatchingEnd(parser,1,0);
+        return null;
+      default:
+        throw new RuntimeException("Error parsing JSON field value. Unexpected " + JSONParser.getEventString(ev));
+    }
+  }
+  static abstract class MethodFrameWrapper {
+    Node node;
+
+    public abstract void walk(int event) throws IOException;
+  }
+
+  public static List<Object> parseArrayFieldValue(int ev, JSONParser parser, MethodFrameWrapper runnable) throws IOException {
+    assert ev == ARRAY_START;
+
+    ArrayList lst = new ArrayList(2);
+    for (; ; ) {
+      ev = parser.nextEvent();
+      if (ev == ARRAY_END) {
+        if(lst.isEmpty()) return null;
+        return lst;
+      }
+      Object val = parseSingleFieldValue(ev, parser, runnable);
+      if(val != null) lst.add(val);
+    }
+  }
+
+  public static void consumeTillMatchingEnd(JSONParser parser, int obj, int arr) throws IOException {
+    for (; ; ) {
+      int event = parser.nextEvent();
+      if (event == OBJECT_START) obj++;
+      if (event == OBJECT_END) obj--;
+      assert obj >= 0;
+      if (event == ARRAY_START) arr++;
+      if (event == ARRAY_END) arr--;
+      assert arr >= 0;
+      if (obj == 0 && arr == 0) break;
+    }
+  }
+
+}
--- a/solr/solrj/src/test/org/apache/solr/common/util/TestJsonRecordReader.java
+++ b/solr/solrj/src/test/org/apache/solr/common/util/TestJsonRecordReader.java
@ -0,0 +1,159 @@
+package org.apache.solr.common.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.solr.SolrTestCaseJ4;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+
+public class TestJsonRecordReader  extends SolrTestCaseJ4 {
+  public void testOneLevelSplit() throws IOException {
+    String json ="{\n" +
+        " \"a\":\"A\" ,\n" +
+        " \"b\":[\n" +
+        "     {\"c\":\"C\",\"d\":\"D\" ,\"e\": {\n" +
+        "                         \"s\":\"S\",\n" +
+        "                         \"t\":3}},\n" +
+        "     {\"c\":\"C1\",\"d\":\"D1\"},\n" +
+        "     {\"c\":\"C2\",\"d\":\"D2\"}\n" +
+        " ]\n" +
+        "}";
+//    System.out.println(json);
+//    All parameters are mapped with field name
+    JsonRecordReader streamer = JsonRecordReader.getInst("/b", Arrays.asList(
+        "a_s:/a",
+        "c_s:/b/c",
+        "d_s:/b/d",
+        "e_s:/b/e/s",
+        "e_i:/b/e/t"
+    ));
+
+    List<Map<String, Object>> records = streamer.getAllRecords(new StringReader(json));
+    assertEquals(3, records.size());
+    assertEquals( 3l, ((Map)records.get(0)).get("e_i") );
+    assertEquals( "D2", ((Map)records.get(2)).get("d_s") );
+    assertNull( ((Map)records.get(1)).get("e_s") );
+    assertNull( ((Map)records.get(2)).get("e_s") );
+    assertNull( ((Map)records.get(1)).get("e_i") );
+    assertNull( ((Map)records.get(2)).get("e_i") );
+
+    //    All parameters but /b/c is omitted
+    streamer = JsonRecordReader.getInst("/b", Arrays.asList(
+        "a:/a",
+        "d:/b/d",
+        "s:/b/e/s",
+        "t:/b/e/t"
+    ));
+    records = streamer.getAllRecords(new StringReader(json));
+    for (Map<String, Object> record : records) {
+      assertNull( record.get("c") );
+
+    }
+
+    //one nested /b/e/* object is completely ignored
+    streamer = JsonRecordReader.getInst("/b", Arrays.asList(
+        "a:/a",
+        "c:/b/c",
+        "d:/b/d"
+    ));
+    records = streamer.getAllRecords(new StringReader(json));
+    for (Map<String, Object> record : records) {
+      assertNull( record.get("s") );
+      assertNull( record.get("t") );
+    }
+
+    //nested /b/e/* object is completely ignored even though /b/e is mapped
+    streamer = JsonRecordReader.getInst("/b", Arrays.asList(
+        "a_s:/a",
+        "c_s:/b/c",
+        "d_s:/b/d",
+        "e:/b/e"
+
+    ));
+    records = streamer.getAllRecords(new StringReader(json));
+    for (Map<String, Object> record : records) {
+      assertNull( record.get("s") );
+      assertNull( record.get("t") );
+      assertNull( record.get("e") );
+    }
+
+
+
+    streamer = JsonRecordReader.getInst("/b", Arrays.asList(
+        "a_s:/a",
+        "c_s:/b/c",
+        "d_s:/b/d",
+        "/b/e/*"
+    ));
+    records = streamer.getAllRecords(new StringReader(json));
+    assertEquals(3, records.size());
+    assertEquals( 3l, ((Map)records.get(0)).get("t") );
+    assertEquals( "S", ((Map)records.get(0)).get("s") );
+    assertNull( ((Map)records.get(1)).get("s") );
+    assertNull( ((Map)records.get(2)).get("s") );
+
+
+
+
+  }
+
+  public void testRecursiveWildCard() throws IOException {
+    String json ="{\n" +
+        " \"a\":\"A\" ,\n" +
+        " \"b\":[\n" +
+        "     {\"c\":\"C\",\"d\":\"D\" ,\"e\": {\n" +
+        "                         \"s\":\"S\",\n" +
+        "                         \"t\":3 ,\"u\":{\"v\":3.1234,\"w\":false}}},\n" +
+        "     {\"c\":\"C1\",\"d\":\"D1\"},\n" +
+        "     {\"c\":\"C2\",\"d\":\"D2\"}\n" +
+        " ]\n" +
+        "}";
+    JsonRecordReader streamer;
+    List<Map<String, Object>> records;
+
+    streamer = JsonRecordReader.getInst("/b", Collections.singletonList("/b/**"));
+    records = streamer.getAllRecords(new StringReader(json));
+    assertEquals(3, records.size());
+    assertEquals("records "+records,  3l, ((Map)records.get(0)).get("t") );
+    assertEquals( "records "+records,"S", ((Map)records.get(0)).get("s") );
+    assertEquals( "records "+records,3.1234, ((Map)records.get(0)).get("v") );
+    assertEquals( "records "+records,false, ((Map)records.get(0)).get("w") );
+    for (Map<String, Object> record : records) {
+      assertNotNull("records "+records,record.get("c"));
+      assertNotNull("records "+records,record.get("d"));
+    }
+
+    streamer = JsonRecordReader.getInst("/", Collections.singletonList("/**"));
+    records = streamer.getAllRecords(new StringReader(json));
+    assertEquals(1, records.size());
+    assertEquals(3, ((List)((Map)records.get(0)).get("c")).size() );
+    assertEquals(3, ((List)((Map)records.get(0)).get("d")).size() );
+    assertEquals("records "+records,  3l, ((Map)records.get(0)).get("t") );
+    assertEquals( "records "+records,"S", ((Map)records.get(0)).get("s") );
+    assertEquals( "records "+records,"A", ((Map)records.get(0)).get("a") );
+    assertEquals( "records "+records,false, ((Map)records.get(0)).get("w") );
+
+  }
+
+}