From 34d9f0a7a32e975435d3b0770155e67565f74735 Mon Sep 17 00:00:00 2001 From: Noble Paul Date: Wed, 1 Jun 2016 18:33:54 +0530 Subject: [PATCH] SOLR-7123: '/update/json/docs' path supports nested documents --- solr/CHANGES.txt | 2 + .../solr/handler/loader/JsonLoader.java | 37 +++++-- .../apache/solr/handler/JsonLoaderTest.java | 21 ++++ .../solr/common/util/JsonRecordReader.java | 100 ++++++++++++++---- .../common/util/TestJsonRecordReader.java | 62 ++++++++--- 5 files changed, 177 insertions(+), 45 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 532f0b4047a..d395dfc1d4b 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -148,6 +148,8 @@ New Features * SOLR-8583: Apply highlighting to hl.alternateField by default for Default and FastVectorHighlighter. Turn off with hl.highlightAlternate=false (janhoy, David Smiley) +* SOLR-7123: '/update/json/docs' path supports nested documents (noble) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java b/solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java index ba800ff3faf..ffbfe972569 100644 --- a/solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java +++ b/solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java @@ -64,7 +64,7 @@ import static org.apache.solr.common.params.CommonParams.PATH; */ public class JsonLoader extends ContentStreamLoader { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private static final String CHILD_DOC_KEY = "_childDocuments_"; + public static final String CHILD_DOC_KEY = "_childDocuments_"; @Override public String getDefaultWT() { @@ -125,8 +125,9 @@ public class JsonLoader extends ContentStreamLoader { String path = (String) req.getContext().get(PATH); if (UpdateRequestHandler.DOC_PATH.equals(path) || "false".equals(req.getParams().get("json.command"))) { String split = req.getParams().get("split"); + String childSplit = req.getParams().get("child.split"); String[] f = req.getParams().getParams("f"); - handleSplitMode(split, f, reader); + handleSplitMode(split, childSplit, f, reader); return; } parser = new JSONParser(reader); @@ -193,7 +194,7 @@ public class JsonLoader extends ContentStreamLoader { } } - private void handleSplitMode(String split, String[] fields, final Reader reader) throws IOException { + private void handleSplitMode(String split, String childSplit, String[] fields, final Reader reader) throws IOException { if (split == null) split = "/"; if (fields == null || fields.length == 0) fields = new String[]{"$FQN:/**"}; final boolean echo = "true".equals(req.getParams().get("echo")); @@ -208,7 +209,7 @@ public class JsonLoader extends ContentStreamLoader { } - JsonRecordReader jsonRecordReader = JsonRecordReader.getInst(split, Arrays.asList(fields)); + JsonRecordReader jsonRecordReader = JsonRecordReader.getInst(split, childSplit, Arrays.asList(fields)); jsonRecordReader.streamRecords(parser, new JsonRecordReader.Handler() { ArrayList docs = null; @@ -221,15 +222,16 @@ public class JsonLoader extends ContentStreamLoader { docs = new ArrayList(); rsp.add("docs", docs); } + if (copy.containsKey(null)) { + copy.put(CHILD_DOC_KEY, copy.get(null)); + copy.remove(null); + } docs.add(copy); } else { AddUpdateCommand cmd = new AddUpdateCommand(req); cmd.commitWithin = commitWithin; cmd.overwrite = overwrite; - cmd.solrDoc = new SolrInputDocument(); - for (Map.Entry entry : copy.entrySet()) { - cmd.solrDoc.setField(entry.getKey(), entry.getValue()); - } + cmd.solrDoc = buildDoc(copy); try { processor.processAdd(cmd); } catch (IOException e) { @@ -240,6 +242,25 @@ public class JsonLoader extends ContentStreamLoader { }); } + private SolrInputDocument buildDoc(Map m) { + SolrInputDocument result = new SolrInputDocument(); + for (Map.Entry e : m.entrySet()) { + if (e.getKey() == null) {// special case. JsonRecordReader emits child docs with null key + if (e.getValue() instanceof List) { + List value = (List) e.getValue(); + for (Object o : value) { + if (o instanceof Map) result.addChildDocument(buildDoc((Map) o)); + } + } else if (e.getValue() instanceof Map) { + result.addChildDocument(buildDoc((Map) e)); + } + } else { + result.setField(e.getKey(), e.getValue()); + } + } + return result; + } + private Map getDocMap(Map record, JSONParser parser, String srcField, boolean mapUniqueKeyOnly) { Map result = record; if (srcField != null && parser instanceof RecordingJSONParser) { diff --git a/solr/core/src/test/org/apache/solr/handler/JsonLoaderTest.java b/solr/core/src/test/org/apache/solr/handler/JsonLoaderTest.java index e27ca1a0639..a904d9ef02e 100644 --- a/solr/core/src/test/org/apache/solr/handler/JsonLoaderTest.java +++ b/solr/core/src/test/org/apache/solr/handler/JsonLoaderTest.java @@ -366,7 +366,28 @@ public class JsonLoaderTest extends SolrTestCaseJ4 { obj = (Map) ObjectBuilder.fromJSON(content); assertEquals("2", obj.get("id")); + String json = "{a:{" + + "b:[{c:c1, e:e1},{c:c2, e :e2, d:{p:q}}]," + + "x:y" + + "}}"; + req = req("split", "/", "child.split" , "/a/b" ); + req.getContext().put("path","/update/json/docs"); + rsp = new SolrQueryResponse(); + p = new BufferingRequestProcessor(null); + loader = new JsonLoader(); + loader.load(req, rsp, new ContentStreamBase.StringStream(json), p); + assertEquals( 1, p.addCommands.size() ); + assertEquals("y", p.addCommands.get(0).solrDoc.getFieldValue("a.x")); + List children = p.addCommands.get(0).solrDoc.getChildDocuments(); + assertEquals(2, children.size()); + SolrInputDocument d = children.get(0); + assertEquals(d.getFieldValue("c"), "c1"); + assertEquals(d.getFieldValue("e"), "e1"); + d = children.get(1); + assertEquals(d.getFieldValue("c"), "c2"); + assertEquals(d.getFieldValue("e"), "e2"); + assertEquals(d.getFieldValue("d.p"), "q"); } diff --git a/solr/solrj/src/java/org/apache/solr/common/util/JsonRecordReader.java b/solr/solrj/src/java/org/apache/solr/common/util/JsonRecordReader.java index 5470446bd8d..12c0d833500 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/JsonRecordReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/JsonRecordReader.java @@ -34,9 +34,11 @@ public class JsonRecordReader { private Node rootNode = new Node("/", (Node) null); - public static JsonRecordReader getInst(String split, List fieldMappings) { + public static JsonRecordReader getInst(String split, String childSplit, List fieldMappings) { - JsonRecordReader jsonRecordReader = new JsonRecordReader(split); + JsonRecordReader jsonRecordReader = new JsonRecordReader(); + jsonRecordReader.addSplit(split); + if (childSplit != null) jsonRecordReader.addSplit(childSplit); for (String s : fieldMappings) { String path = s; int idx = s.indexOf(':'); @@ -50,14 +52,21 @@ public class JsonRecordReader { return jsonRecordReader; } + public static JsonRecordReader getInst(String split, List fieldMappings) { + return getInst(split, null, fieldMappings); + } + + private JsonRecordReader() { + } /** - * A constructor called with a '|' separated list of path expressions + * a '|' separated list of path expressions * which define sub sections of the JSON stream that are to be emitted as * separate records. + * It is possible to have multiple levels of split one for parent and one for child + * each child record (or a list of records) will be emitted as a part of the parent record with + * null as the key * - * @param splitPath The PATH for which a record is emitted. Once the - * path tag is encountered, the Node.getInst method starts collecting wanted - * fields and at the close of the tag, a record is emitted containing all + * @param splitPath The PATH for which a record is emitted. A record is emitted containing all * fields collected since the tag start. Once * emitted the collected fields are cleared. Any fields collected in the * parent tag or above will also be included in the record, but these are @@ -65,7 +74,8 @@ public class JsonRecordReader { *

* It uses the ' | ' syntax of PATH to pass in multiple paths. */ - private JsonRecordReader(String splitPath) { + + void addSplit(String splitPath) { String[] splits = splitPath.split("\\|"); for (String split : splits) { split = split.trim(); @@ -93,7 +103,7 @@ public class JsonRecordReader { if (!path.startsWith("/")) throw new RuntimeException("All paths must start with '/' " + path); List paths = splitEscapeQuote(path); if (paths.size() == 0) { - if (isRecord) rootNode.isRecord = true; + if (isRecord) rootNode.setAsRecord(); return;//the path is "/" } // deal with how split behaves when separator starts with an empty string! @@ -113,11 +123,8 @@ public class JsonRecordReader { */ public List> getAllRecords(Reader r) throws IOException { final List> results = new ArrayList<>(); - streamRecords(r, new Handler() { - @Override - public void handle(Map record, String path) { - results.add(record); - } + streamRecords(r, (record, path) -> { + results.add(record); }); return results; } @@ -158,6 +165,7 @@ public class JsonRecordReader { Node parent; // parent Node in the tree boolean isLeaf = false; // flag: store/emit streamed text for this node boolean isRecord = false; //flag: this Node starts a new record + boolean isChildRecord = false; Node wildCardChild; Node recursiveWildCardChild; private boolean useFqn = false; @@ -176,6 +184,24 @@ public class JsonRecordReader { this.fieldName = fieldName; // name to store collected values against } + void setAsRecord() { + if (isMyChildARecord()) throw new RuntimeException(name + " has a parent node at my level or lower"); + isChildRecord = hasParentRecord(); + isRecord = true; + } + + private boolean hasParentRecord() { + return isRecord || parent != null && parent.hasParentRecord(); + } + + private boolean isMyChildARecord() { + if (isRecord) return true; + for (Node node : childNodes.values()) { + if (node.isMyChildARecord()) return true; + } + return false; + } + /** * Walk the Node tree propagating any wild Descendant information to @@ -222,7 +248,7 @@ public class JsonRecordReader { assert !WILDCARD_PATH.equals(n.name); assert !RECURSIVE_WILDCARD_PATH.equals(n.name); // split attribute - n.isRecord = true; // flag: split attribute, prepare to emit rec + n.setAsRecord(); // flag: split attribute, prepare to emit rec n.splitPath = fieldName; // the full split attribute path } else { if (n.name.equals(WILDCARD_PATH)) { @@ -340,17 +366,30 @@ public class JsonRecordReader { @Override public void walk(int event) throws IOException { if (event == OBJECT_START) { - node.handleObjectStart(parser, handler, values, stack, isRecordStarted, this); + walkObject(); } else if (event == ARRAY_START) { for (; ; ) { event = parser.nextEvent(); if (event == ARRAY_END) break; if (event == OBJECT_START) { - node.handleObjectStart(parser, handler, values, stack, isRecordStarted, this); + walkObject(); } } } + } + void walkObject() throws IOException { + if (node.isChildRecord) { + node.handleObjectStart(parser, + (record, path) -> addChildDoc2ParentDoc(record, values), + new LinkedHashMap<>(), + new Stack<>(), + true, + this + ); + } else { + node.handleObjectStart(parser, handler, values, stack, isRecordStarted, this); + } } } @@ -372,7 +411,7 @@ public class JsonRecordReader { if (node == null) node = recursiveWildCardChild; if (node != null) { - if (node.isLeaf) {//this is a leaf collect data here + if (node.isLeaf) {//this is a leaf. Collect data here event = parser.nextEvent(); String nameInRecord = node.fieldName == null ? getNameInRecord(name, frameWrapper, node) : node.fieldName; MethodFrameWrapper runnable = null; @@ -420,10 +459,27 @@ public class JsonRecordReader { } } + private void addChildDoc2ParentDoc(Map record, Map values) { + Object oldVal = values.get(null); + if (oldVal == null) { + values.put(null, record); + } else if (oldVal instanceof List) { + ((List) oldVal).add(record); + } else { + ArrayList l = new ArrayList(); + l.add(oldVal); + l.add(record); + values.put(null, l); + } + } + + /** + * Construct the name as it would appear in the final record + */ private String getNameInRecord(String name, MethodFrameWrapper frameWrapper, Node n) { - if (frameWrapper == null || !n.useFqn) return name; + if (frameWrapper == null || !n.useFqn || frameWrapper.node.isChildRecord) return name; StringBuilder sb = new StringBuilder(); - frameWrapper.prependName(sb); + frameWrapper.addName(sb); return sb.append(DELIM).append(name).toString(); } @@ -539,9 +595,9 @@ public class JsonRecordReader { MethodFrameWrapper parent; String name; - void prependName(StringBuilder sb) { - if (parent != null) { - parent.prependName(sb); + void addName(StringBuilder sb) { + if (parent != null && !parent.node.isChildRecord) { + parent.addName(sb); sb.append(DELIM); } sb.append(name); diff --git a/solr/solrj/src/test/org/apache/solr/common/util/TestJsonRecordReader.java b/solr/solrj/src/test/org/apache/solr/common/util/TestJsonRecordReader.java index 328d7b3bf8d..e11cc2953dc 100644 --- a/solr/solrj/src/test/org/apache/solr/common/util/TestJsonRecordReader.java +++ b/solr/solrj/src/test/org/apache/solr/common/util/TestJsonRecordReader.java @@ -17,6 +17,7 @@ package org.apache.solr.common.util; import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.handler.loader.JsonLoader; import org.apache.solr.util.RecordingJSONParser; import java.io.IOException; @@ -202,6 +203,44 @@ public class TestJsonRecordReader extends SolrTestCaseJ4 { } + public void testNestedDocs() throws Exception { + String json = "{a:{" + + "b:{c:d}," + + "x:y" + + "}}"; + JsonRecordReader streamer = JsonRecordReader.getInst("/", "/a/b", Arrays.asList("/a/x", "/a/b/*")); + streamer.streamRecords(new StringReader(json), (record, path) -> { + assertEquals(record.get("x"), "y"); + assertEquals(((Map) record.get(null)).get("c"), "d"); + }); + json = "{a:{" + + "b:[{c:c1, e:e1},{c:c2, e :e2, d:{p:q}}]," + + "x:y" + + "}}"; + streamer.streamRecords(new StringReader(json), (record, path) -> { + assertEquals(record.get("x"), "y"); + List l = (List) record.get(null); + Map m = (Map) l.get(0); + assertEquals(m.get("c"), "c1"); + assertEquals(m.get("e"), "e1"); + m = (Map) l.get(1); + assertEquals(m.get("c"), "c2"); + assertEquals(m.get("e"), "e2"); + }); + streamer = JsonRecordReader.getInst("/", "/a/b", Arrays.asList("$FQN:/**")); + streamer.streamRecords(new StringReader(json), (record, path) -> { + assertEquals(record.get("a.x"), "y"); + List l = (List) record.get(null); + Map m = (Map) l.get(0); + assertEquals(m.get("c"), "c1"); + assertEquals(m.get("e"), "e1"); + m = (Map) l.get(1); + assertEquals(m.get("c"), "c2"); + assertEquals(m.get("e"), "e2"); + assertEquals(m.get("d.p"), "q"); + }); + } + public void testNestedJsonWithFloats() throws Exception { String json = "{\n" + @@ -264,16 +303,13 @@ public class TestJsonRecordReader extends SolrTestCaseJ4 { final AtomicReference> ref = new AtomicReference<>(); streamer = JsonRecordReader.getInst("/", Collections.singletonList("$FQN:/**")); - streamer.streamRecords(new StringReader(json), new JsonRecordReader.Handler() { - @Override - public void handle(Map record, String path) { - System.gc(); - if (ref.get() != null) { - assertNull("This reference is still intact :" +ref.get().get() ,ref.get().get()); - } - String fName = record.keySet().iterator().next(); - ref.set(new WeakReference(fName)); + streamer.streamRecords(new StringReader(json), (record, path) -> { + System.gc(); + if (ref.get() != null) { + assertNull("This reference is still intact :" + ref.get().get(), ref.get().get()); } + String fName = record.keySet().iterator().next(); + ref.set(new WeakReference<>(fName)); }); @@ -621,12 +657,8 @@ public class TestJsonRecordReader extends SolrTestCaseJ4 { RecordingJSONParser parser = new RecordingJSONParser(new StringReader(json)); JsonRecordReader recordReader = JsonRecordReader.getInst("/",Collections.singletonList("/**")); try { - recordReader.streamRecords(parser, new JsonRecordReader.Handler() { - @Override - public void handle(Map record, String path) { - /*don't care*/ - } - }); + recordReader.streamRecords(parser, (record, path) -> { + }); /*don't care*/ } catch (RuntimeException e) { parser.error("").printStackTrace(); throw e;