mirror of https://github.com/apache/lucene.git
SOLR-12485: Solr-Update XML format now accepts child documents under a field
thus providing a semantic relationship. (like allowed for JSON). Improved XML.java a bit Closes #430
This commit is contained in:
parent
c3887b351e
commit
38bf976cd4
|
@ -150,6 +150,9 @@ New Features
|
|||
|
||||
* SOLR-12592: support #EQUAL function, range operator, decimal and percentage in cores in autoscaling policies (noble)
|
||||
|
||||
* SOLR-12485: Uploading docs in XML now supports child documents as field values, thus providing a label to the
|
||||
relationship instead of the current "anonymous" relationship. (Moshe Bla, David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -404,6 +404,7 @@ public class XMLLoader extends ContentStreamLoader {
|
|||
StringBuilder text = new StringBuilder();
|
||||
String name = null;
|
||||
boolean isNull = false;
|
||||
boolean isLabeledChildDoc = false;
|
||||
String update = null;
|
||||
Collection<SolrInputDocument> subDocs = null;
|
||||
Map<String, Map<String, Object>> updateMap = null;
|
||||
|
@ -453,7 +454,13 @@ public class XMLLoader extends ContentStreamLoader {
|
|||
}
|
||||
break;
|
||||
}
|
||||
doc.addField(name, v);
|
||||
if(!isLabeledChildDoc){
|
||||
// only add data if this is not a childDoc, since it was added already
|
||||
doc.addField(name, v);
|
||||
} else {
|
||||
// reset so next field is not treated as child doc
|
||||
isLabeledChildDoc = false;
|
||||
}
|
||||
// field is over
|
||||
name = null;
|
||||
}
|
||||
|
@ -463,6 +470,15 @@ public class XMLLoader extends ContentStreamLoader {
|
|||
text.setLength(0);
|
||||
String localName = parser.getLocalName();
|
||||
if ("doc".equals(localName)) {
|
||||
if(name != null) {
|
||||
// flag to prevent spaces after doc from being added
|
||||
isLabeledChildDoc = true;
|
||||
if(!doc.containsKey(name)) {
|
||||
doc.setField(name, Lists.newArrayList());
|
||||
}
|
||||
doc.addField(name, readDoc(parser));
|
||||
break;
|
||||
}
|
||||
if (subDocs == null)
|
||||
subDocs = Lists.newArrayList();
|
||||
subDocs.add(readDoc(parser));
|
||||
|
|
|
@ -501,6 +501,154 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testXMLMultiLevelLabeledChildren() throws IOException, XMLStreamException {
|
||||
String xml_doc1 =
|
||||
"<doc >" +
|
||||
" <field name=\"id\">1</field>" +
|
||||
" <field name=\"empty_s\"></field>" +
|
||||
" <field name=\"parent_s\">X</field>" +
|
||||
" <field name=\"test\">" +
|
||||
" <doc> " +
|
||||
" <field name=\"id\" >2</field>" +
|
||||
" <field name=\"child_s\">y</field>" +
|
||||
" </doc>" +
|
||||
" <doc> " +
|
||||
" <field name=\"id\" >3</field>" +
|
||||
" <field name=\"child_s\">z</field>" +
|
||||
" </doc>" +
|
||||
" </field> " +
|
||||
"</doc>";
|
||||
|
||||
String xml_doc2 =
|
||||
"<doc >" +
|
||||
" <field name=\"id\">4</field>" +
|
||||
" <field name=\"parent_s\">A</field>" +
|
||||
" <field name=\"test\">" +
|
||||
" <doc> " +
|
||||
" <field name=\"id\" >5</field>" +
|
||||
" <field name=\"child_s\">b</field>" +
|
||||
" <field name=\"grandChild\">" +
|
||||
" <doc> " +
|
||||
" <field name=\"id\" >7</field>" +
|
||||
" <field name=\"child_s\">d</field>" +
|
||||
" </doc>" +
|
||||
" </field>" +
|
||||
" </doc>" +
|
||||
" </field>" +
|
||||
" <field name=\"test\">" +
|
||||
" <doc> " +
|
||||
" <field name=\"id\" >6</field>" +
|
||||
" <field name=\"child_s\">c</field>" +
|
||||
" </doc>" +
|
||||
" </field> " +
|
||||
"</doc>";
|
||||
|
||||
XMLStreamReader parser =
|
||||
inputFactory.createXMLStreamReader(new StringReader(xml_doc1));
|
||||
parser.next(); // read the START document...
|
||||
//null for the processor is all right here
|
||||
XMLLoader loader = new XMLLoader();
|
||||
SolrInputDocument document1 = loader.readDoc(parser);
|
||||
|
||||
XMLStreamReader parser2 =
|
||||
inputFactory.createXMLStreamReader(new StringReader(xml_doc2));
|
||||
parser2.next(); // read the START document...
|
||||
//null for the processor is all right here
|
||||
//XMLLoader loader = new XMLLoader();
|
||||
SolrInputDocument document2 = loader.readDoc(parser2);
|
||||
|
||||
assertFalse(document1.hasChildDocuments());
|
||||
assertEquals(document1.toString(), sdoc("id", "1", "empty_s", "", "parent_s", "X", "test",
|
||||
sdocs(sdoc("id", "2", "child_s", "y"), sdoc("id", "3", "child_s", "z"))).toString());
|
||||
|
||||
assertFalse(document2.hasChildDocuments());
|
||||
assertEquals(document2.toString(), sdoc("id", "4", "parent_s", "A", "test",
|
||||
sdocs(sdoc("id", "5", "child_s", "b", "grandChild", Collections.singleton(sdoc("id", "7", "child_s", "d"))),
|
||||
sdoc("id", "6", "child_s", "c"))).toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testXMLLabeledChildren() throws IOException, XMLStreamException {
|
||||
UpdateRequest req = new UpdateRequest();
|
||||
|
||||
List<SolrInputDocument> docs = new ArrayList<>();
|
||||
|
||||
String xml_doc1 =
|
||||
"<doc >" +
|
||||
" <field name=\"id\">1</field>" +
|
||||
" <field name=\"empty_s\"></field>" +
|
||||
" <field name=\"parent_s\">X</field>" +
|
||||
" <field name=\"test\">" +
|
||||
" <doc> " +
|
||||
" <field name=\"id\" >2</field>" +
|
||||
" <field name=\"child_s\">y</field>" +
|
||||
" </doc>"+
|
||||
" <doc> " +
|
||||
" <field name=\"id\" >3</field>" +
|
||||
" <field name=\"child_s\">z</field>" +
|
||||
" </doc>" +
|
||||
" </field> " +
|
||||
"</doc>";
|
||||
|
||||
String xml_doc2 =
|
||||
"<doc >" +
|
||||
" <field name=\"id\">4</field>" +
|
||||
" <field name=\"parent_s\">A</field>" +
|
||||
" <field name=\"test\">" +
|
||||
" <doc> " +
|
||||
" <field name=\"id\" >5</field>" +
|
||||
" <field name=\"child_s\">b</field>" +
|
||||
" </doc>"+
|
||||
" </field>" +
|
||||
" <field name=\"test\">" +
|
||||
" <doc> " +
|
||||
" <field name=\"id\" >6</field>" +
|
||||
" <field name=\"child_s\">c</field>" +
|
||||
" </doc>" +
|
||||
" </field> " +
|
||||
"</doc>";
|
||||
|
||||
XMLStreamReader parser =
|
||||
inputFactory.createXMLStreamReader( new StringReader( xml_doc1 ) );
|
||||
parser.next(); // read the START document...
|
||||
//null for the processor is all right here
|
||||
XMLLoader loader = new XMLLoader();
|
||||
SolrInputDocument document1 = loader.readDoc( parser );
|
||||
|
||||
XMLStreamReader parser2 =
|
||||
inputFactory.createXMLStreamReader( new StringReader( xml_doc2 ) );
|
||||
parser2.next(); // read the START document...
|
||||
//null for the processor is all right here
|
||||
//XMLLoader loader = new XMLLoader();
|
||||
SolrInputDocument document2 = loader.readDoc( parser2 );
|
||||
|
||||
assertFalse(document1.hasChildDocuments());
|
||||
assertEquals(document1.toString(), sdoc("id", "1", "empty_s", "", "parent_s", "X", "test",
|
||||
sdocs(sdoc("id", "2", "child_s", "y"), sdoc("id", "3", "child_s", "z"))).toString());
|
||||
|
||||
assertFalse(document2.hasChildDocuments());
|
||||
assertEquals(document2.toString(), sdoc("id", "4", "parent_s", "A", "test",
|
||||
sdocs(sdoc("id", "5", "child_s", "b"), sdoc("id", "6", "child_s", "c"))).toString());
|
||||
|
||||
docs.add(document1);
|
||||
docs.add(document2);
|
||||
|
||||
Collections.shuffle(docs, random());
|
||||
req.add(docs);
|
||||
|
||||
RequestWriter requestWriter = new RequestWriter();
|
||||
OutputStream os = new ByteArrayOutputStream();
|
||||
requestWriter.write(req, os);
|
||||
assertBlockU(os.toString());
|
||||
assertU(commit());
|
||||
|
||||
final SolrIndexSearcher searcher = getSearcher();
|
||||
assertSingleParentOf(searcher, one("yz"), "X");
|
||||
assertSingleParentOf(searcher, one("bc"), "A");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJavaBinCodecNestedRelation() throws IOException {
|
||||
SolrInputDocument topDocument = new SolrInputDocument();
|
||||
|
|
|
@ -72,7 +72,9 @@ public class ClientUtils
|
|||
for( Object v : field ) {
|
||||
String update = null;
|
||||
|
||||
if (v instanceof Map) {
|
||||
if(v instanceof SolrInputDocument) {
|
||||
writeVal(writer, name, v , null);
|
||||
} else if (v instanceof Map) {
|
||||
// currently only supports a single value
|
||||
for (Entry<Object,Object> entry : ((Map<Object,Object>)v).entrySet()) {
|
||||
update = entry.getKey().toString();
|
||||
|
@ -112,20 +114,28 @@ public class ClientUtils
|
|||
v = Base64.byteArrayToBase64(bytes.array(), bytes.position(),bytes.limit() - bytes.position());
|
||||
}
|
||||
|
||||
XML.Writable valWriter = null;
|
||||
if(v instanceof SolrInputDocument) {
|
||||
final SolrInputDocument solrDoc = (SolrInputDocument) v;
|
||||
valWriter = (writer1) -> writeXML(solrDoc, writer1);
|
||||
} else if(v != null) {
|
||||
final Object val = v;
|
||||
valWriter = (writer1) -> XML.escapeCharData(val.toString(), writer1);
|
||||
}
|
||||
|
||||
if (update == null) {
|
||||
if (v != null) {
|
||||
XML.writeXML(writer, "field", v.toString(), "name", name );
|
||||
XML.writeXML(writer, "field", valWriter, "name", name);
|
||||
}
|
||||
} else {
|
||||
if (v == null) {
|
||||
XML.writeXML(writer, "field", null, "name", name, "update", update, "null", true);
|
||||
XML.writeXML(writer, "field", (XML.Writable) null, "name", name, "update", update, "null", true);
|
||||
} else {
|
||||
XML.writeXML(writer, "field", v.toString(), "name", name, "update", update);
|
||||
XML.writeXML(writer, "field", valWriter, "name", name, "update", update);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static String toXML( SolrInputDocument doc )
|
||||
{
|
||||
StringWriter str = new StringWriter();
|
||||
|
|
|
@ -16,9 +16,10 @@
|
|||
*/
|
||||
package org.apache.solr.common.util;
|
||||
|
||||
import java.io.Writer;
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -37,9 +38,7 @@ public class XML {
|
|||
private static final String[] attribute_escapes=
|
||||
{"#0;","#1;","#2;","#3;","#4;","#5;","#6;","#7;","#8;",null,null,"#11;","#12;",null,"#14;","#15;","#16;","#17;","#18;","#19;","#20;","#21;","#22;","#23;","#24;","#25;","#26;","#27;","#28;","#29;","#30;","#31;",null,null,""",null,null,null,"&",null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,"<"};
|
||||
|
||||
|
||||
|
||||
/*****************************************
|
||||
/*
|
||||
#Simple python script used to generate the escape table above. -YCS
|
||||
#
|
||||
#use individual char arrays or one big char array for better efficiency
|
||||
|
@ -65,13 +64,9 @@ public class XML {
|
|||
result += val + ','
|
||||
|
||||
print result
|
||||
****************************************/
|
||||
*/
|
||||
|
||||
|
||||
/*********
|
||||
*
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
*/
|
||||
public static void escapeCharData(String str, Writer out) throws IOException {
|
||||
escape(str, out, chardata_escapes);
|
||||
}
|
||||
|
@ -84,53 +79,28 @@ public class XML {
|
|||
escape(chars, start, length, out, attribute_escapes);
|
||||
}
|
||||
|
||||
|
||||
public final static void writeXML(Writer out, String tag, String val) throws IOException {
|
||||
out.write('<');
|
||||
out.write(tag);
|
||||
if (val == null) {
|
||||
out.write('/');
|
||||
out.write('>');
|
||||
} else {
|
||||
out.write('>');
|
||||
escapeCharData(val,out);
|
||||
out.write('<');
|
||||
out.write('/');
|
||||
out.write(tag);
|
||||
out.write('>');
|
||||
}
|
||||
}
|
||||
|
||||
/** does NOT escape character data in val, must already be valid XML */
|
||||
/** does NOT escape character data in val; it must already be valid XML. Attributes are always escaped. */
|
||||
public final static void writeUnescapedXML(Writer out, String tag, String val, Object... attrs) throws IOException {
|
||||
out.write('<');
|
||||
out.write(tag);
|
||||
for (int i=0; i<attrs.length; i++) {
|
||||
out.write(' ');
|
||||
out.write(attrs[i++].toString());
|
||||
out.write('=');
|
||||
out.write('"');
|
||||
out.write(attrs[i].toString());
|
||||
out.write('"');
|
||||
}
|
||||
if (val == null) {
|
||||
out.write('/');
|
||||
out.write('>');
|
||||
} else {
|
||||
out.write('>');
|
||||
out.write(val);
|
||||
out.write('<');
|
||||
out.write('/');
|
||||
out.write(tag);
|
||||
out.write('>');
|
||||
}
|
||||
writeXML(out, tag, (writer1) -> writer1.write(val), attrs);
|
||||
}
|
||||
|
||||
/** escapes character data in val */
|
||||
/** escapes character data in val and attributes */
|
||||
public final static void writeXML(Writer out, String tag, String val, Object... attrs) throws IOException {
|
||||
final Writable writable = val != null ? (writer1) -> XML.escapeCharData(val, writer1) : null;
|
||||
writeXML(out, tag, writable, attrs);
|
||||
}
|
||||
|
||||
/** escapes character data in val and attributes */
|
||||
public static void writeXML(Writer out, String tag, String val, Map<String, String> attrs) throws IOException {
|
||||
writeXML(out, tag, val, attrs.entrySet().stream().flatMap((entry) -> Stream.of(entry.getKey(), entry.getValue())).toArray());
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public final static void writeXML(Writer out, String tag, Writable valWritable, Object... attrs) throws IOException {
|
||||
out.write('<');
|
||||
out.write(tag);
|
||||
for (int i=0; i<attrs.length; i++) {
|
||||
final int attrsLen = attrs == null ? 0 : attrs.length;
|
||||
for (int i = 0; i< attrsLen; i++) {
|
||||
out.write(' ');
|
||||
out.write(attrs[i++].toString());
|
||||
out.write('=');
|
||||
|
@ -138,12 +108,12 @@ public class XML {
|
|||
escapeAttributeValue(attrs[i].toString(), out);
|
||||
out.write('"');
|
||||
}
|
||||
if (val == null) {
|
||||
if (valWritable == null) {
|
||||
out.write('/');
|
||||
out.write('>');
|
||||
} else {
|
||||
out.write('>');
|
||||
escapeCharData(val,out);
|
||||
valWritable.write(out);
|
||||
out.write('<');
|
||||
out.write('/');
|
||||
out.write(tag);
|
||||
|
@ -151,29 +121,9 @@ public class XML {
|
|||
}
|
||||
}
|
||||
|
||||
/** escapes character data in val */
|
||||
public static void writeXML(Writer out, String tag, String val, Map<String, String> attrs) throws IOException {
|
||||
out.write('<');
|
||||
out.write(tag);
|
||||
for (Map.Entry<String, String> entry : attrs.entrySet()) {
|
||||
out.write(' ');
|
||||
out.write(entry.getKey());
|
||||
out.write('=');
|
||||
out.write('"');
|
||||
escapeAttributeValue(entry.getValue(), out);
|
||||
out.write('"');
|
||||
}
|
||||
if (val == null) {
|
||||
out.write('/');
|
||||
out.write('>');
|
||||
} else {
|
||||
out.write('>');
|
||||
escapeCharData(val,out);
|
||||
out.write('<');
|
||||
out.write('/');
|
||||
out.write(tag);
|
||||
out.write('>');
|
||||
}
|
||||
@FunctionalInterface
|
||||
public interface Writable {
|
||||
void write(Writer w) throws IOException;
|
||||
}
|
||||
|
||||
private static void escape(char [] chars, int offset, int length, Writer out, String [] escapes) throws IOException{
|
||||
|
|
|
@ -15,11 +15,6 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.util;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.XML;
|
||||
import org.w3c.dom.Document;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
@ -27,13 +22,17 @@ import javax.xml.xpath.XPath;
|
|||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.XML;
|
||||
import org.w3c.dom.Document;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
abstract public class BaseTestHarness {
|
||||
private static final ThreadLocal<DocumentBuilder> builderTL = new ThreadLocal<>();
|
||||
private static final ThreadLocal<XPath> xpathTL = new ThreadLocal<>();
|
||||
|
@ -200,15 +199,9 @@ abstract public class BaseTestHarness {
|
|||
|
||||
public static String simpleTag(String tag, String... args) {
|
||||
try {
|
||||
StringWriter r = new StringWriter();
|
||||
|
||||
// this is annoying
|
||||
if (null == args || 0 == args.length) {
|
||||
XML.writeXML(r, tag, null);
|
||||
} else {
|
||||
XML.writeXML(r, tag, null, (Object[])args);
|
||||
}
|
||||
return r.getBuffer().toString();
|
||||
StringWriter writer = new StringWriter();
|
||||
XML.writeXML(writer, tag, (String) null, (Object[])args);
|
||||
return writer.getBuffer().toString();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException
|
||||
("this should never happen with a StringWriter", e);
|
||||
|
|
Loading…
Reference in New Issue