From 38bf976cd4b9e324c21664bd7ae3d554df803705 Mon Sep 17 00:00:00 2001 From: Moshe Date: Tue, 7 Aug 2018 13:52:11 -0400 Subject: [PATCH] SOLR-12485: Solr-Update XML format now accepts child documents under a field thus providing a semantic relationship. (like allowed for JSON). Improved XML.java a bit Closes #430 --- solr/CHANGES.txt | 3 + .../apache/solr/handler/loader/XMLLoader.java | 18 ++- .../solr/update/AddBlockUpdateTest.java | 148 ++++++++++++++++++ .../solr/client/solrj/util/ClientUtils.java | 20 ++- .../java/org/apache/solr/common/util/XML.java | 100 +++--------- .../org/apache/solr/util/BaseTestHarness.java | 23 +-- 6 files changed, 216 insertions(+), 96 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 94960bb1d33..3ede98b7c98 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -150,6 +150,9 @@ New Features * SOLR-12592: support #EQUAL function, range operator, decimal and percentage in cores in autoscaling policies (noble) +* SOLR-12485: Uploading docs in XML now supports child documents as field values, thus providing a label to the + relationship instead of the current "anonymous" relationship. (Moshe Bla, David Smiley) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java b/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java index a07aff24a20..724a40c129b 100644 --- a/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java +++ b/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java @@ -404,6 +404,7 @@ public class XMLLoader extends ContentStreamLoader { StringBuilder text = new StringBuilder(); String name = null; boolean isNull = false; + boolean isLabeledChildDoc = false; String update = null; Collection subDocs = null; Map> updateMap = null; @@ -453,7 +454,13 @@ public class XMLLoader extends ContentStreamLoader { } break; } - doc.addField(name, v); + if(!isLabeledChildDoc){ + // only add data if this is not a childDoc, since it was added already + doc.addField(name, v); + } else { + // reset so next field is not treated as child doc + isLabeledChildDoc = false; + } // field is over name = null; } @@ -463,6 +470,15 @@ public class XMLLoader extends ContentStreamLoader { text.setLength(0); String localName = parser.getLocalName(); if ("doc".equals(localName)) { + if(name != null) { + // flag to prevent spaces after doc from being added + isLabeledChildDoc = true; + if(!doc.containsKey(name)) { + doc.setField(name, Lists.newArrayList()); + } + doc.addField(name, readDoc(parser)); + break; + } if (subDocs == null) subDocs = Lists.newArrayList(); subDocs.add(readDoc(parser)); diff --git a/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java b/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java index a302d585305..50c0c39277e 100644 --- a/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java +++ b/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java @@ -501,6 +501,154 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 { } + @Test + public void testXMLMultiLevelLabeledChildren() throws IOException, XMLStreamException { + String xml_doc1 = + "" + + " 1" + + " " + + " X" + + " " + + " " + + " 2" + + " y" + + " " + + " " + + " 3" + + " z" + + " " + + " " + + ""; + + String xml_doc2 = + "" + + " 4" + + " A" + + " " + + " " + + " 5" + + " b" + + " " + + " " + + " 7" + + " d" + + " " + + " " + + " " + + " " + + " " + + " " + + " 6" + + " c" + + " " + + " " + + ""; + + XMLStreamReader parser = + inputFactory.createXMLStreamReader(new StringReader(xml_doc1)); + parser.next(); // read the START document... + //null for the processor is all right here + XMLLoader loader = new XMLLoader(); + SolrInputDocument document1 = loader.readDoc(parser); + + XMLStreamReader parser2 = + inputFactory.createXMLStreamReader(new StringReader(xml_doc2)); + parser2.next(); // read the START document... + //null for the processor is all right here + //XMLLoader loader = new XMLLoader(); + SolrInputDocument document2 = loader.readDoc(parser2); + + assertFalse(document1.hasChildDocuments()); + assertEquals(document1.toString(), sdoc("id", "1", "empty_s", "", "parent_s", "X", "test", + sdocs(sdoc("id", "2", "child_s", "y"), sdoc("id", "3", "child_s", "z"))).toString()); + + assertFalse(document2.hasChildDocuments()); + assertEquals(document2.toString(), sdoc("id", "4", "parent_s", "A", "test", + sdocs(sdoc("id", "5", "child_s", "b", "grandChild", Collections.singleton(sdoc("id", "7", "child_s", "d"))), + sdoc("id", "6", "child_s", "c"))).toString()); + } + + @Test + public void testXMLLabeledChildren() throws IOException, XMLStreamException { + UpdateRequest req = new UpdateRequest(); + + List docs = new ArrayList<>(); + + String xml_doc1 = + "" + + " 1" + + " " + + " X" + + " " + + " " + + " 2" + + " y" + + " "+ + " " + + " 3" + + " z" + + " " + + " " + + ""; + + String xml_doc2 = + "" + + " 4" + + " A" + + " " + + " " + + " 5" + + " b" + + " "+ + " " + + " " + + " " + + " 6" + + " c" + + " " + + " " + + ""; + + XMLStreamReader parser = + inputFactory.createXMLStreamReader( new StringReader( xml_doc1 ) ); + parser.next(); // read the START document... + //null for the processor is all right here + XMLLoader loader = new XMLLoader(); + SolrInputDocument document1 = loader.readDoc( parser ); + + XMLStreamReader parser2 = + inputFactory.createXMLStreamReader( new StringReader( xml_doc2 ) ); + parser2.next(); // read the START document... + //null for the processor is all right here + //XMLLoader loader = new XMLLoader(); + SolrInputDocument document2 = loader.readDoc( parser2 ); + + assertFalse(document1.hasChildDocuments()); + assertEquals(document1.toString(), sdoc("id", "1", "empty_s", "", "parent_s", "X", "test", + sdocs(sdoc("id", "2", "child_s", "y"), sdoc("id", "3", "child_s", "z"))).toString()); + + assertFalse(document2.hasChildDocuments()); + assertEquals(document2.toString(), sdoc("id", "4", "parent_s", "A", "test", + sdocs(sdoc("id", "5", "child_s", "b"), sdoc("id", "6", "child_s", "c"))).toString()); + + docs.add(document1); + docs.add(document2); + + Collections.shuffle(docs, random()); + req.add(docs); + + RequestWriter requestWriter = new RequestWriter(); + OutputStream os = new ByteArrayOutputStream(); + requestWriter.write(req, os); + assertBlockU(os.toString()); + assertU(commit()); + + final SolrIndexSearcher searcher = getSearcher(); + assertSingleParentOf(searcher, one("yz"), "X"); + assertSingleParentOf(searcher, one("bc"), "A"); + + } + @Test public void testJavaBinCodecNestedRelation() throws IOException { SolrInputDocument topDocument = new SolrInputDocument(); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java b/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java index 54986db5b0c..26a188d67b2 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java @@ -72,7 +72,9 @@ public class ClientUtils for( Object v : field ) { String update = null; - if (v instanceof Map) { + if(v instanceof SolrInputDocument) { + writeVal(writer, name, v , null); + } else if (v instanceof Map) { // currently only supports a single value for (Entry entry : ((Map)v).entrySet()) { update = entry.getKey().toString(); @@ -112,20 +114,28 @@ public class ClientUtils v = Base64.byteArrayToBase64(bytes.array(), bytes.position(),bytes.limit() - bytes.position()); } + XML.Writable valWriter = null; + if(v instanceof SolrInputDocument) { + final SolrInputDocument solrDoc = (SolrInputDocument) v; + valWriter = (writer1) -> writeXML(solrDoc, writer1); + } else if(v != null) { + final Object val = v; + valWriter = (writer1) -> XML.escapeCharData(val.toString(), writer1); + } + if (update == null) { if (v != null) { - XML.writeXML(writer, "field", v.toString(), "name", name ); + XML.writeXML(writer, "field", valWriter, "name", name); } } else { if (v == null) { - XML.writeXML(writer, "field", null, "name", name, "update", update, "null", true); + XML.writeXML(writer, "field", (XML.Writable) null, "name", name, "update", update, "null", true); } else { - XML.writeXML(writer, "field", v.toString(), "name", name, "update", update); + XML.writeXML(writer, "field", valWriter, "name", name, "update", update); } } } - public static String toXML( SolrInputDocument doc ) { StringWriter str = new StringWriter(); diff --git a/solr/solrj/src/java/org/apache/solr/common/util/XML.java b/solr/solrj/src/java/org/apache/solr/common/util/XML.java index c6e520523e8..9d1b8a8ba3b 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/XML.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/XML.java @@ -16,9 +16,10 @@ */ package org.apache.solr.common.util; -import java.io.Writer; import java.io.IOException; +import java.io.Writer; import java.util.Map; +import java.util.stream.Stream; /** * @@ -37,9 +38,7 @@ public class XML { private static final String[] attribute_escapes= {"#0;","#1;","#2;","#3;","#4;","#5;","#6;","#7;","#8;",null,null,"#11;","#12;",null,"#14;","#15;","#16;","#17;","#18;","#19;","#20;","#21;","#22;","#23;","#24;","#25;","#26;","#27;","#28;","#29;","#30;","#31;",null,null,""",null,null,null,"&",null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,"<"}; - - - /***************************************** + /* #Simple python script used to generate the escape table above. -YCS # #use individual char arrays or one big char array for better efficiency @@ -65,13 +64,9 @@ public class XML { result += val + ',' print result - ****************************************/ + */ -/********* - * - * @throws IOException If there is a low-level I/O error. - */ public static void escapeCharData(String str, Writer out) throws IOException { escape(str, out, chardata_escapes); } @@ -84,53 +79,28 @@ public class XML { escape(chars, start, length, out, attribute_escapes); } - - public final static void writeXML(Writer out, String tag, String val) throws IOException { - out.write('<'); - out.write(tag); - if (val == null) { - out.write('/'); - out.write('>'); - } else { - out.write('>'); - escapeCharData(val,out); - out.write('<'); - out.write('/'); - out.write(tag); - out.write('>'); - } - } - - /** does NOT escape character data in val, must already be valid XML */ + /** does NOT escape character data in val; it must already be valid XML. Attributes are always escaped. */ public final static void writeUnescapedXML(Writer out, String tag, String val, Object... attrs) throws IOException { - out.write('<'); - out.write(tag); - for (int i=0; i'); - } else { - out.write('>'); - out.write(val); - out.write('<'); - out.write('/'); - out.write(tag); - out.write('>'); - } + writeXML(out, tag, (writer1) -> writer1.write(val), attrs); } - /** escapes character data in val */ + /** escapes character data in val and attributes */ public final static void writeXML(Writer out, String tag, String val, Object... attrs) throws IOException { + final Writable writable = val != null ? (writer1) -> XML.escapeCharData(val, writer1) : null; + writeXML(out, tag, writable, attrs); + } + + /** escapes character data in val and attributes */ + public static void writeXML(Writer out, String tag, String val, Map attrs) throws IOException { + writeXML(out, tag, val, attrs.entrySet().stream().flatMap((entry) -> Stream.of(entry.getKey(), entry.getValue())).toArray()); + } + + /** @lucene.internal */ + public final static void writeXML(Writer out, String tag, Writable valWritable, Object... attrs) throws IOException { out.write('<'); out.write(tag); - for (int i=0; i'); } else { out.write('>'); - escapeCharData(val,out); + valWritable.write(out); out.write('<'); out.write('/'); out.write(tag); @@ -151,29 +121,9 @@ public class XML { } } - /** escapes character data in val */ - public static void writeXML(Writer out, String tag, String val, Map attrs) throws IOException { - out.write('<'); - out.write(tag); - for (Map.Entry entry : attrs.entrySet()) { - out.write(' '); - out.write(entry.getKey()); - out.write('='); - out.write('"'); - escapeAttributeValue(entry.getValue(), out); - out.write('"'); - } - if (val == null) { - out.write('/'); - out.write('>'); - } else { - out.write('>'); - escapeCharData(val,out); - out.write('<'); - out.write('/'); - out.write(tag); - out.write('>'); - } + @FunctionalInterface + public interface Writable { + void write(Writer w) throws IOException; } private static void escape(char [] chars, int offset, int length, Writer out, String [] escapes) throws IOException{ diff --git a/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java b/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java index a84d6d17dc2..ad1d38e4ddc 100644 --- a/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java +++ b/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java @@ -15,11 +15,6 @@ * limitations under the License. */ package org.apache.solr.util; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.util.XML; -import org.w3c.dom.Document; -import org.xml.sax.SAXException; - import javax.xml.namespace.QName; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -27,13 +22,17 @@ import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; - import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.XML; +import org.w3c.dom.Document; +import org.xml.sax.SAXException; + abstract public class BaseTestHarness { private static final ThreadLocal builderTL = new ThreadLocal<>(); private static final ThreadLocal xpathTL = new ThreadLocal<>(); @@ -200,15 +199,9 @@ abstract public class BaseTestHarness { public static String simpleTag(String tag, String... args) { try { - StringWriter r = new StringWriter(); - - // this is annoying - if (null == args || 0 == args.length) { - XML.writeXML(r, tag, null); - } else { - XML.writeXML(r, tag, null, (Object[])args); - } - return r.getBuffer().toString(); + StringWriter writer = new StringWriter(); + XML.writeXML(writer, tag, (String) null, (Object[])args); + return writer.getBuffer().toString(); } catch (IOException e) { throw new RuntimeException ("this should never happen with a StringWriter", e);