SOLR-12485: Solr-Update XML format now accepts child documents under a field

thus providing a semantic relationship.  (like allowed for JSON).
Improved XML.java a bit
Closes #430
This commit is contained in:
Moshe 2018-08-07 13:52:11 -04:00 committed by David Smiley
parent c3887b351e
commit 38bf976cd4
6 changed files with 216 additions and 96 deletions

View File

@ -150,6 +150,9 @@ New Features
* SOLR-12592: support #EQUAL function, range operator, decimal and percentage in cores in autoscaling policies (noble)
* SOLR-12485: Uploading docs in XML now supports child documents as field values, thus providing a label to the
relationship instead of the current "anonymous" relationship. (Moshe Bla, David Smiley)
Bug Fixes
----------------------

View File

@ -404,6 +404,7 @@ public class XMLLoader extends ContentStreamLoader {
StringBuilder text = new StringBuilder();
String name = null;
boolean isNull = false;
boolean isLabeledChildDoc = false;
String update = null;
Collection<SolrInputDocument> subDocs = null;
Map<String, Map<String, Object>> updateMap = null;
@ -453,7 +454,13 @@ public class XMLLoader extends ContentStreamLoader {
}
break;
}
doc.addField(name, v);
if(!isLabeledChildDoc){
// only add data if this is not a childDoc, since it was added already
doc.addField(name, v);
} else {
// reset so next field is not treated as child doc
isLabeledChildDoc = false;
}
// field is over
name = null;
}
@ -463,6 +470,15 @@ public class XMLLoader extends ContentStreamLoader {
text.setLength(0);
String localName = parser.getLocalName();
if ("doc".equals(localName)) {
if(name != null) {
// flag to prevent spaces after doc from being added
isLabeledChildDoc = true;
if(!doc.containsKey(name)) {
doc.setField(name, Lists.newArrayList());
}
doc.addField(name, readDoc(parser));
break;
}
if (subDocs == null)
subDocs = Lists.newArrayList();
subDocs.add(readDoc(parser));

View File

@ -501,6 +501,154 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
}
@Test
public void testXMLMultiLevelLabeledChildren() throws IOException, XMLStreamException {
String xml_doc1 =
"<doc >" +
" <field name=\"id\">1</field>" +
" <field name=\"empty_s\"></field>" +
" <field name=\"parent_s\">X</field>" +
" <field name=\"test\">" +
" <doc> " +
" <field name=\"id\" >2</field>" +
" <field name=\"child_s\">y</field>" +
" </doc>" +
" <doc> " +
" <field name=\"id\" >3</field>" +
" <field name=\"child_s\">z</field>" +
" </doc>" +
" </field> " +
"</doc>";
String xml_doc2 =
"<doc >" +
" <field name=\"id\">4</field>" +
" <field name=\"parent_s\">A</field>" +
" <field name=\"test\">" +
" <doc> " +
" <field name=\"id\" >5</field>" +
" <field name=\"child_s\">b</field>" +
" <field name=\"grandChild\">" +
" <doc> " +
" <field name=\"id\" >7</field>" +
" <field name=\"child_s\">d</field>" +
" </doc>" +
" </field>" +
" </doc>" +
" </field>" +
" <field name=\"test\">" +
" <doc> " +
" <field name=\"id\" >6</field>" +
" <field name=\"child_s\">c</field>" +
" </doc>" +
" </field> " +
"</doc>";
XMLStreamReader parser =
inputFactory.createXMLStreamReader(new StringReader(xml_doc1));
parser.next(); // read the START document...
//null for the processor is all right here
XMLLoader loader = new XMLLoader();
SolrInputDocument document1 = loader.readDoc(parser);
XMLStreamReader parser2 =
inputFactory.createXMLStreamReader(new StringReader(xml_doc2));
parser2.next(); // read the START document...
//null for the processor is all right here
//XMLLoader loader = new XMLLoader();
SolrInputDocument document2 = loader.readDoc(parser2);
assertFalse(document1.hasChildDocuments());
assertEquals(document1.toString(), sdoc("id", "1", "empty_s", "", "parent_s", "X", "test",
sdocs(sdoc("id", "2", "child_s", "y"), sdoc("id", "3", "child_s", "z"))).toString());
assertFalse(document2.hasChildDocuments());
assertEquals(document2.toString(), sdoc("id", "4", "parent_s", "A", "test",
sdocs(sdoc("id", "5", "child_s", "b", "grandChild", Collections.singleton(sdoc("id", "7", "child_s", "d"))),
sdoc("id", "6", "child_s", "c"))).toString());
}
@Test
public void testXMLLabeledChildren() throws IOException, XMLStreamException {
UpdateRequest req = new UpdateRequest();
List<SolrInputDocument> docs = new ArrayList<>();
String xml_doc1 =
"<doc >" +
" <field name=\"id\">1</field>" +
" <field name=\"empty_s\"></field>" +
" <field name=\"parent_s\">X</field>" +
" <field name=\"test\">" +
" <doc> " +
" <field name=\"id\" >2</field>" +
" <field name=\"child_s\">y</field>" +
" </doc>"+
" <doc> " +
" <field name=\"id\" >3</field>" +
" <field name=\"child_s\">z</field>" +
" </doc>" +
" </field> " +
"</doc>";
String xml_doc2 =
"<doc >" +
" <field name=\"id\">4</field>" +
" <field name=\"parent_s\">A</field>" +
" <field name=\"test\">" +
" <doc> " +
" <field name=\"id\" >5</field>" +
" <field name=\"child_s\">b</field>" +
" </doc>"+
" </field>" +
" <field name=\"test\">" +
" <doc> " +
" <field name=\"id\" >6</field>" +
" <field name=\"child_s\">c</field>" +
" </doc>" +
" </field> " +
"</doc>";
XMLStreamReader parser =
inputFactory.createXMLStreamReader( new StringReader( xml_doc1 ) );
parser.next(); // read the START document...
//null for the processor is all right here
XMLLoader loader = new XMLLoader();
SolrInputDocument document1 = loader.readDoc( parser );
XMLStreamReader parser2 =
inputFactory.createXMLStreamReader( new StringReader( xml_doc2 ) );
parser2.next(); // read the START document...
//null for the processor is all right here
//XMLLoader loader = new XMLLoader();
SolrInputDocument document2 = loader.readDoc( parser2 );
assertFalse(document1.hasChildDocuments());
assertEquals(document1.toString(), sdoc("id", "1", "empty_s", "", "parent_s", "X", "test",
sdocs(sdoc("id", "2", "child_s", "y"), sdoc("id", "3", "child_s", "z"))).toString());
assertFalse(document2.hasChildDocuments());
assertEquals(document2.toString(), sdoc("id", "4", "parent_s", "A", "test",
sdocs(sdoc("id", "5", "child_s", "b"), sdoc("id", "6", "child_s", "c"))).toString());
docs.add(document1);
docs.add(document2);
Collections.shuffle(docs, random());
req.add(docs);
RequestWriter requestWriter = new RequestWriter();
OutputStream os = new ByteArrayOutputStream();
requestWriter.write(req, os);
assertBlockU(os.toString());
assertU(commit());
final SolrIndexSearcher searcher = getSearcher();
assertSingleParentOf(searcher, one("yz"), "X");
assertSingleParentOf(searcher, one("bc"), "A");
}
@Test
public void testJavaBinCodecNestedRelation() throws IOException {
SolrInputDocument topDocument = new SolrInputDocument();

View File

@ -72,7 +72,9 @@ public class ClientUtils
for( Object v : field ) {
String update = null;
if (v instanceof Map) {
if(v instanceof SolrInputDocument) {
writeVal(writer, name, v , null);
} else if (v instanceof Map) {
// currently only supports a single value
for (Entry<Object,Object> entry : ((Map<Object,Object>)v).entrySet()) {
update = entry.getKey().toString();
@ -112,20 +114,28 @@ public class ClientUtils
v = Base64.byteArrayToBase64(bytes.array(), bytes.position(),bytes.limit() - bytes.position());
}
XML.Writable valWriter = null;
if(v instanceof SolrInputDocument) {
final SolrInputDocument solrDoc = (SolrInputDocument) v;
valWriter = (writer1) -> writeXML(solrDoc, writer1);
} else if(v != null) {
final Object val = v;
valWriter = (writer1) -> XML.escapeCharData(val.toString(), writer1);
}
if (update == null) {
if (v != null) {
XML.writeXML(writer, "field", v.toString(), "name", name );
XML.writeXML(writer, "field", valWriter, "name", name);
}
} else {
if (v == null) {
XML.writeXML(writer, "field", null, "name", name, "update", update, "null", true);
XML.writeXML(writer, "field", (XML.Writable) null, "name", name, "update", update, "null", true);
} else {
XML.writeXML(writer, "field", v.toString(), "name", name, "update", update);
XML.writeXML(writer, "field", valWriter, "name", name, "update", update);
}
}
}
public static String toXML( SolrInputDocument doc )
{
StringWriter str = new StringWriter();

View File

@ -16,9 +16,10 @@
*/
package org.apache.solr.common.util;
import java.io.Writer;
import java.io.IOException;
import java.io.Writer;
import java.util.Map;
import java.util.stream.Stream;
/**
*
@ -37,9 +38,7 @@ public class XML {
private static final String[] attribute_escapes=
{"#0;","#1;","#2;","#3;","#4;","#5;","#6;","#7;","#8;",null,null,"#11;","#12;",null,"#14;","#15;","#16;","#17;","#18;","#19;","#20;","#21;","#22;","#23;","#24;","#25;","#26;","#27;","#28;","#29;","#30;","#31;",null,null,"&quot;",null,null,null,"&amp;",null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,"&lt;"};
/*****************************************
/*
#Simple python script used to generate the escape table above. -YCS
#
#use individual char arrays or one big char array for better efficiency
@ -65,13 +64,9 @@ public class XML {
result += val + ','
print result
****************************************/
*/
/*********
*
* @throws IOException If there is a low-level I/O error.
*/
public static void escapeCharData(String str, Writer out) throws IOException {
escape(str, out, chardata_escapes);
}
@ -84,53 +79,28 @@ public class XML {
escape(chars, start, length, out, attribute_escapes);
}
public final static void writeXML(Writer out, String tag, String val) throws IOException {
out.write('<');
out.write(tag);
if (val == null) {
out.write('/');
out.write('>');
} else {
out.write('>');
escapeCharData(val,out);
out.write('<');
out.write('/');
out.write(tag);
out.write('>');
}
}
/** does NOT escape character data in val, must already be valid XML */
/** does NOT escape character data in val; it must already be valid XML. Attributes are always escaped. */
public final static void writeUnescapedXML(Writer out, String tag, String val, Object... attrs) throws IOException {
out.write('<');
out.write(tag);
for (int i=0; i<attrs.length; i++) {
out.write(' ');
out.write(attrs[i++].toString());
out.write('=');
out.write('"');
out.write(attrs[i].toString());
out.write('"');
}
if (val == null) {
out.write('/');
out.write('>');
} else {
out.write('>');
out.write(val);
out.write('<');
out.write('/');
out.write(tag);
out.write('>');
}
writeXML(out, tag, (writer1) -> writer1.write(val), attrs);
}
/** escapes character data in val */
/** escapes character data in val and attributes */
public final static void writeXML(Writer out, String tag, String val, Object... attrs) throws IOException {
final Writable writable = val != null ? (writer1) -> XML.escapeCharData(val, writer1) : null;
writeXML(out, tag, writable, attrs);
}
/** escapes character data in val and attributes */
public static void writeXML(Writer out, String tag, String val, Map<String, String> attrs) throws IOException {
writeXML(out, tag, val, attrs.entrySet().stream().flatMap((entry) -> Stream.of(entry.getKey(), entry.getValue())).toArray());
}
/** @lucene.internal */
public final static void writeXML(Writer out, String tag, Writable valWritable, Object... attrs) throws IOException {
out.write('<');
out.write(tag);
for (int i=0; i<attrs.length; i++) {
final int attrsLen = attrs == null ? 0 : attrs.length;
for (int i = 0; i< attrsLen; i++) {
out.write(' ');
out.write(attrs[i++].toString());
out.write('=');
@ -138,12 +108,12 @@ public class XML {
escapeAttributeValue(attrs[i].toString(), out);
out.write('"');
}
if (val == null) {
if (valWritable == null) {
out.write('/');
out.write('>');
} else {
out.write('>');
escapeCharData(val,out);
valWritable.write(out);
out.write('<');
out.write('/');
out.write(tag);
@ -151,29 +121,9 @@ public class XML {
}
}
/** escapes character data in val */
public static void writeXML(Writer out, String tag, String val, Map<String, String> attrs) throws IOException {
out.write('<');
out.write(tag);
for (Map.Entry<String, String> entry : attrs.entrySet()) {
out.write(' ');
out.write(entry.getKey());
out.write('=');
out.write('"');
escapeAttributeValue(entry.getValue(), out);
out.write('"');
}
if (val == null) {
out.write('/');
out.write('>');
} else {
out.write('>');
escapeCharData(val,out);
out.write('<');
out.write('/');
out.write(tag);
out.write('>');
}
@FunctionalInterface
public interface Writable {
void write(Writer w) throws IOException;
}
private static void escape(char [] chars, int offset, int length, Writer out, String [] escapes) throws IOException{

View File

@ -15,11 +15,6 @@
* limitations under the License.
*/
package org.apache.solr.util;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.XML;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
@ -27,13 +22,17 @@ import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.XML;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
abstract public class BaseTestHarness {
private static final ThreadLocal<DocumentBuilder> builderTL = new ThreadLocal<>();
private static final ThreadLocal<XPath> xpathTL = new ThreadLocal<>();
@ -200,15 +199,9 @@ abstract public class BaseTestHarness {
public static String simpleTag(String tag, String... args) {
try {
StringWriter r = new StringWriter();
// this is annoying
if (null == args || 0 == args.length) {
XML.writeXML(r, tag, null);
} else {
XML.writeXML(r, tag, null, (Object[])args);
}
return r.getBuffer().toString();
StringWriter writer = new StringWriter();
XML.writeXML(writer, tag, (String) null, (Object[])args);
return writer.getBuffer().toString();
} catch (IOException e) {
throw new RuntimeException
("this should never happen with a StringWriter", e);