SOLR-7123: '/update/json/docs' path supports nested documents

This commit is contained in:
Noble Paul 2016-06-01 18:33:54 +05:30
parent 18256fc287
commit 34d9f0a7a3
5 changed files with 177 additions and 45 deletions

View File

@ -148,6 +148,8 @@ New Features
* SOLR-8583: Apply highlighting to hl.alternateField by default for Default and FastVectorHighlighter.
Turn off with hl.highlightAlternate=false (janhoy, David Smiley)
* SOLR-7123: '/update/json/docs' path supports nested documents (noble)
Bug Fixes
----------------------

View File

@ -64,7 +64,7 @@ import static org.apache.solr.common.params.CommonParams.PATH;
*/
public class JsonLoader extends ContentStreamLoader {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final String CHILD_DOC_KEY = "_childDocuments_";
public static final String CHILD_DOC_KEY = "_childDocuments_";
@Override
public String getDefaultWT() {
@ -125,8 +125,9 @@ public class JsonLoader extends ContentStreamLoader {
String path = (String) req.getContext().get(PATH);
if (UpdateRequestHandler.DOC_PATH.equals(path) || "false".equals(req.getParams().get("json.command"))) {
String split = req.getParams().get("split");
String childSplit = req.getParams().get("child.split");
String[] f = req.getParams().getParams("f");
handleSplitMode(split, f, reader);
handleSplitMode(split, childSplit, f, reader);
return;
}
parser = new JSONParser(reader);
@ -193,7 +194,7 @@ public class JsonLoader extends ContentStreamLoader {
}
}
private void handleSplitMode(String split, String[] fields, final Reader reader) throws IOException {
private void handleSplitMode(String split, String childSplit, String[] fields, final Reader reader) throws IOException {
if (split == null) split = "/";
if (fields == null || fields.length == 0) fields = new String[]{"$FQN:/**"};
final boolean echo = "true".equals(req.getParams().get("echo"));
@ -208,7 +209,7 @@ public class JsonLoader extends ContentStreamLoader {
}
JsonRecordReader jsonRecordReader = JsonRecordReader.getInst(split, Arrays.asList(fields));
JsonRecordReader jsonRecordReader = JsonRecordReader.getInst(split, childSplit, Arrays.asList(fields));
jsonRecordReader.streamRecords(parser, new JsonRecordReader.Handler() {
ArrayList docs = null;
@ -221,15 +222,16 @@ public class JsonLoader extends ContentStreamLoader {
docs = new ArrayList();
rsp.add("docs", docs);
}
if (copy.containsKey(null)) {
copy.put(CHILD_DOC_KEY, copy.get(null));
copy.remove(null);
}
docs.add(copy);
} else {
AddUpdateCommand cmd = new AddUpdateCommand(req);
cmd.commitWithin = commitWithin;
cmd.overwrite = overwrite;
cmd.solrDoc = new SolrInputDocument();
for (Map.Entry<String, Object> entry : copy.entrySet()) {
cmd.solrDoc.setField(entry.getKey(), entry.getValue());
}
cmd.solrDoc = buildDoc(copy);
try {
processor.processAdd(cmd);
} catch (IOException e) {
@ -240,6 +242,25 @@ public class JsonLoader extends ContentStreamLoader {
});
}
private SolrInputDocument buildDoc(Map<String, Object> m) {
SolrInputDocument result = new SolrInputDocument();
for (Map.Entry<String, Object> e : m.entrySet()) {
if (e.getKey() == null) {// special case. JsonRecordReader emits child docs with null key
if (e.getValue() instanceof List) {
List value = (List) e.getValue();
for (Object o : value) {
if (o instanceof Map) result.addChildDocument(buildDoc((Map) o));
}
} else if (e.getValue() instanceof Map) {
result.addChildDocument(buildDoc((Map) e));
}
} else {
result.setField(e.getKey(), e.getValue());
}
}
return result;
}
private Map<String, Object> getDocMap(Map<String, Object> record, JSONParser parser, String srcField, boolean mapUniqueKeyOnly) {
Map result = record;
if (srcField != null && parser instanceof RecordingJSONParser) {

View File

@ -366,7 +366,28 @@ public class JsonLoaderTest extends SolrTestCaseJ4 {
obj = (Map) ObjectBuilder.fromJSON(content);
assertEquals("2", obj.get("id"));
String json = "{a:{" +
"b:[{c:c1, e:e1},{c:c2, e :e2, d:{p:q}}]," +
"x:y" +
"}}";
req = req("split", "/", "child.split" , "/a/b" );
req.getContext().put("path","/update/json/docs");
rsp = new SolrQueryResponse();
p = new BufferingRequestProcessor(null);
loader = new JsonLoader();
loader.load(req, rsp, new ContentStreamBase.StringStream(json), p);
assertEquals( 1, p.addCommands.size() );
assertEquals("y", p.addCommands.get(0).solrDoc.getFieldValue("a.x"));
List<SolrInputDocument> children = p.addCommands.get(0).solrDoc.getChildDocuments();
assertEquals(2, children.size());
SolrInputDocument d = children.get(0);
assertEquals(d.getFieldValue("c"), "c1");
assertEquals(d.getFieldValue("e"), "e1");
d = children.get(1);
assertEquals(d.getFieldValue("c"), "c2");
assertEquals(d.getFieldValue("e"), "e2");
assertEquals(d.getFieldValue("d.p"), "q");
}

View File

@ -34,9 +34,11 @@ public class JsonRecordReader {
private Node rootNode = new Node("/", (Node) null);
public static JsonRecordReader getInst(String split, List<String> fieldMappings) {
public static JsonRecordReader getInst(String split, String childSplit, List<String> fieldMappings) {
JsonRecordReader jsonRecordReader = new JsonRecordReader(split);
JsonRecordReader jsonRecordReader = new JsonRecordReader();
jsonRecordReader.addSplit(split);
if (childSplit != null) jsonRecordReader.addSplit(childSplit);
for (String s : fieldMappings) {
String path = s;
int idx = s.indexOf(':');
@ -50,14 +52,21 @@ public class JsonRecordReader {
return jsonRecordReader;
}
public static JsonRecordReader getInst(String split, List<String> fieldMappings) {
return getInst(split, null, fieldMappings);
}
private JsonRecordReader() {
}
/**
* A constructor called with a '|' separated list of path expressions
* a '|' separated list of path expressions
* which define sub sections of the JSON stream that are to be emitted as
* separate records.
* It is possible to have multiple levels of split one for parent and one for child
* each child record (or a list of records) will be emitted as a part of the parent record with
* null as the key
*
* @param splitPath The PATH for which a record is emitted. Once the
* path tag is encountered, the Node.getInst method starts collecting wanted
* fields and at the close of the tag, a record is emitted containing all
* @param splitPath The PATH for which a record is emitted. A record is emitted containing all
* fields collected since the tag start. Once
* emitted the collected fields are cleared. Any fields collected in the
* parent tag or above will also be included in the record, but these are
@ -65,7 +74,8 @@ public class JsonRecordReader {
* <p>
* It uses the ' | ' syntax of PATH to pass in multiple paths.
*/
private JsonRecordReader(String splitPath) {
void addSplit(String splitPath) {
String[] splits = splitPath.split("\\|");
for (String split : splits) {
split = split.trim();
@ -93,7 +103,7 @@ public class JsonRecordReader {
if (!path.startsWith("/")) throw new RuntimeException("All paths must start with '/' " + path);
List<String> paths = splitEscapeQuote(path);
if (paths.size() == 0) {
if (isRecord) rootNode.isRecord = true;
if (isRecord) rootNode.setAsRecord();
return;//the path is "/"
}
// deal with how split behaves when separator starts with an empty string!
@ -113,11 +123,8 @@ public class JsonRecordReader {
*/
public List<Map<String, Object>> getAllRecords(Reader r) throws IOException {
final List<Map<String, Object>> results = new ArrayList<>();
streamRecords(r, new Handler() {
@Override
public void handle(Map<String, Object> record, String path) {
streamRecords(r, (record, path) -> {
results.add(record);
}
});
return results;
}
@ -158,6 +165,7 @@ public class JsonRecordReader {
Node parent; // parent Node in the tree
boolean isLeaf = false; // flag: store/emit streamed text for this node
boolean isRecord = false; //flag: this Node starts a new record
boolean isChildRecord = false;
Node wildCardChild;
Node recursiveWildCardChild;
private boolean useFqn = false;
@ -176,6 +184,24 @@ public class JsonRecordReader {
this.fieldName = fieldName; // name to store collected values against
}
void setAsRecord() {
if (isMyChildARecord()) throw new RuntimeException(name + " has a parent node at my level or lower");
isChildRecord = hasParentRecord();
isRecord = true;
}
private boolean hasParentRecord() {
return isRecord || parent != null && parent.hasParentRecord();
}
private boolean isMyChildARecord() {
if (isRecord) return true;
for (Node node : childNodes.values()) {
if (node.isMyChildARecord()) return true;
}
return false;
}
/**
* Walk the Node tree propagating any wild Descendant information to
@ -222,7 +248,7 @@ public class JsonRecordReader {
assert !WILDCARD_PATH.equals(n.name);
assert !RECURSIVE_WILDCARD_PATH.equals(n.name);
// split attribute
n.isRecord = true; // flag: split attribute, prepare to emit rec
n.setAsRecord(); // flag: split attribute, prepare to emit rec
n.splitPath = fieldName; // the full split attribute path
} else {
if (n.name.equals(WILDCARD_PATH)) {
@ -340,17 +366,30 @@ public class JsonRecordReader {
@Override
public void walk(int event) throws IOException {
if (event == OBJECT_START) {
node.handleObjectStart(parser, handler, values, stack, isRecordStarted, this);
walkObject();
} else if (event == ARRAY_START) {
for (; ; ) {
event = parser.nextEvent();
if (event == ARRAY_END) break;
if (event == OBJECT_START) {
node.handleObjectStart(parser, handler, values, stack, isRecordStarted, this);
walkObject();
}
}
}
}
void walkObject() throws IOException {
if (node.isChildRecord) {
node.handleObjectStart(parser,
(record, path) -> addChildDoc2ParentDoc(record, values),
new LinkedHashMap<>(),
new Stack<>(),
true,
this
);
} else {
node.handleObjectStart(parser, handler, values, stack, isRecordStarted, this);
}
}
}
@ -372,7 +411,7 @@ public class JsonRecordReader {
if (node == null) node = recursiveWildCardChild;
if (node != null) {
if (node.isLeaf) {//this is a leaf collect data here
if (node.isLeaf) {//this is a leaf. Collect data here
event = parser.nextEvent();
String nameInRecord = node.fieldName == null ? getNameInRecord(name, frameWrapper, node) : node.fieldName;
MethodFrameWrapper runnable = null;
@ -420,10 +459,27 @@ public class JsonRecordReader {
}
}
private void addChildDoc2ParentDoc(Map<String, Object> record, Map<String, Object> values) {
Object oldVal = values.get(null);
if (oldVal == null) {
values.put(null, record);
} else if (oldVal instanceof List) {
((List) oldVal).add(record);
} else {
ArrayList l = new ArrayList();
l.add(oldVal);
l.add(record);
values.put(null, l);
}
}
/**
* Construct the name as it would appear in the final record
*/
private String getNameInRecord(String name, MethodFrameWrapper frameWrapper, Node n) {
if (frameWrapper == null || !n.useFqn) return name;
if (frameWrapper == null || !n.useFqn || frameWrapper.node.isChildRecord) return name;
StringBuilder sb = new StringBuilder();
frameWrapper.prependName(sb);
frameWrapper.addName(sb);
return sb.append(DELIM).append(name).toString();
}
@ -539,9 +595,9 @@ public class JsonRecordReader {
MethodFrameWrapper parent;
String name;
void prependName(StringBuilder sb) {
if (parent != null) {
parent.prependName(sb);
void addName(StringBuilder sb) {
if (parent != null && !parent.node.isChildRecord) {
parent.addName(sb);
sb.append(DELIM);
}
sb.append(name);

View File

@ -17,6 +17,7 @@
package org.apache.solr.common.util;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.handler.loader.JsonLoader;
import org.apache.solr.util.RecordingJSONParser;
import java.io.IOException;
@ -202,6 +203,44 @@ public class TestJsonRecordReader extends SolrTestCaseJ4 {
}
public void testNestedDocs() throws Exception {
String json = "{a:{" +
"b:{c:d}," +
"x:y" +
"}}";
JsonRecordReader streamer = JsonRecordReader.getInst("/", "/a/b", Arrays.asList("/a/x", "/a/b/*"));
streamer.streamRecords(new StringReader(json), (record, path) -> {
assertEquals(record.get("x"), "y");
assertEquals(((Map) record.get(null)).get("c"), "d");
});
json = "{a:{" +
"b:[{c:c1, e:e1},{c:c2, e :e2, d:{p:q}}]," +
"x:y" +
"}}";
streamer.streamRecords(new StringReader(json), (record, path) -> {
assertEquals(record.get("x"), "y");
List l = (List) record.get(null);
Map m = (Map) l.get(0);
assertEquals(m.get("c"), "c1");
assertEquals(m.get("e"), "e1");
m = (Map) l.get(1);
assertEquals(m.get("c"), "c2");
assertEquals(m.get("e"), "e2");
});
streamer = JsonRecordReader.getInst("/", "/a/b", Arrays.asList("$FQN:/**"));
streamer.streamRecords(new StringReader(json), (record, path) -> {
assertEquals(record.get("a.x"), "y");
List l = (List) record.get(null);
Map m = (Map) l.get(0);
assertEquals(m.get("c"), "c1");
assertEquals(m.get("e"), "e1");
m = (Map) l.get(1);
assertEquals(m.get("c"), "c2");
assertEquals(m.get("e"), "e2");
assertEquals(m.get("d.p"), "q");
});
}
public void testNestedJsonWithFloats() throws Exception {
String json = "{\n" +
@ -264,16 +303,13 @@ public class TestJsonRecordReader extends SolrTestCaseJ4 {
final AtomicReference<WeakReference<String>> ref = new AtomicReference<>();
streamer = JsonRecordReader.getInst("/", Collections.singletonList("$FQN:/**"));
streamer.streamRecords(new StringReader(json), new JsonRecordReader.Handler() {
@Override
public void handle(Map<String, Object> record, String path) {
streamer.streamRecords(new StringReader(json), (record, path) -> {
System.gc();
if (ref.get() != null) {
assertNull("This reference is still intact :" + ref.get().get(), ref.get().get());
}
String fName = record.keySet().iterator().next();
ref.set(new WeakReference<String>(fName));
}
ref.set(new WeakReference<>(fName));
});
@ -621,12 +657,8 @@ public class TestJsonRecordReader extends SolrTestCaseJ4 {
RecordingJSONParser parser = new RecordingJSONParser(new StringReader(json));
JsonRecordReader recordReader = JsonRecordReader.getInst("/",Collections.singletonList("/**"));
try {
recordReader.streamRecords(parser, new JsonRecordReader.Handler() {
@Override
public void handle(Map<String, Object> record, String path) {
/*don't care*/
}
});
recordReader.streamRecords(parser, (record, path) -> {
}); /*don't care*/
} catch (RuntimeException e) {
parser.error("").printStackTrace();
throw e;