mirror of https://github.com/apache/lucene.git
SOLR-2960: XPathEntityProcessor was adding spurious nulls to multi-valued fields
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1553285 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a0049aa5f9
commit
35b159f14c
|
@ -230,6 +230,9 @@ Optimizations
|
||||||
* SOLR-5576: Improve concurrency when registering and waiting for all
|
* SOLR-5576: Improve concurrency when registering and waiting for all
|
||||||
SolrCore's to register a DOWN state. (Christine Poerschke via Mark Miller)
|
SolrCore's to register a DOWN state. (Christine Poerschke via Mark Miller)
|
||||||
|
|
||||||
|
* SOLR-2960: fix DIH XPathEntityProcessor to add the correct number of "null"
|
||||||
|
placeholders for multi-valued fields (Michael Watts via James Dyer)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -296,7 +296,7 @@ public class XPathRecordReader {
|
||||||
for (Node n : childNodes) {
|
for (Node n : childNodes) {
|
||||||
// For the multivalue child nodes where we could have, but
|
// For the multivalue child nodes where we could have, but
|
||||||
// didnt, collect text. Push a null string into values.
|
// didnt, collect text. Push a null string into values.
|
||||||
if (!childrenFound.contains(n)) n.putNulls(values);
|
if (!childrenFound.contains(n)) n.putNulls(values, valuesAddedinThisFrame);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
@ -429,18 +429,28 @@ public class XPathRecordReader {
|
||||||
* pushing a null string onto every multiValued fieldName's List of values
|
* pushing a null string onto every multiValued fieldName's List of values
|
||||||
* where a value has not been provided from the stream.
|
* where a value has not been provided from the stream.
|
||||||
*/
|
*/
|
||||||
private void putNulls(Map<String, Object> values) {
|
private void putNulls(Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
|
||||||
if (attributes != null) {
|
if (attributes != null) {
|
||||||
for (Node n : attributes) {
|
for (Node n : attributes) {
|
||||||
if (n.multiValued)
|
if (n.multiValued) {
|
||||||
putText(values, null, n.fieldName, true);
|
putANull(n.fieldName, values, valuesAddedinThisFrame);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (hasText && multiValued)
|
}
|
||||||
putText(values, null, fieldName, true);
|
if (hasText && multiValued) {
|
||||||
|
putANull(fieldName, values, valuesAddedinThisFrame);
|
||||||
|
}
|
||||||
if (childNodes != null) {
|
if (childNodes != null) {
|
||||||
for (Node childNode : childNodes)
|
for (Node childNode : childNodes) {
|
||||||
childNode.putNulls(values);
|
childNode.putNulls(values, valuesAddedinThisFrame);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void putANull(String thisFieldName, Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
|
||||||
|
putText(values, null, thisFieldName, true);
|
||||||
|
if( valuesAddedinThisFrame != null) {
|
||||||
|
valuesAddedinThisFrame.add(thisFieldName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -94,6 +94,128 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase
|
||||||
assertEquals("ü", l.get(2));
|
assertEquals("ü", l.get(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||||
|
@Test
|
||||||
|
public void testMultiValuedWithMultipleDocuments() throws Exception {
|
||||||
|
Map entityAttrs = createMap("name", "e", "url", "testdata.xml", XPathEntityProcessor.FOR_EACH, "/documents/doc");
|
||||||
|
List fields = new ArrayList();
|
||||||
|
fields.add(createMap("column", "id", "xpath", "/documents/doc/id", DataImporter.MULTI_VALUED, "false"));
|
||||||
|
fields.add(createMap("column", "a", "xpath", "/documents/doc/a", DataImporter.MULTI_VALUED, "true"));
|
||||||
|
fields.add(createMap("column", "s1dataA", "xpath", "/documents/doc/sec1/s1dataA", DataImporter.MULTI_VALUED, "true"));
|
||||||
|
fields.add(createMap("column", "s1dataB", "xpath", "/documents/doc/sec1/s1dataB", DataImporter.MULTI_VALUED, "true"));
|
||||||
|
fields.add(createMap("column", "s1dataC", "xpath", "/documents/doc/sec1/s1dataC", DataImporter.MULTI_VALUED, "true"));
|
||||||
|
|
||||||
|
Context c = getContext(null,
|
||||||
|
new VariableResolver(), getDataSource(textMultipleDocuments), Context.FULL_DUMP, fields, entityAttrs);
|
||||||
|
XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor();
|
||||||
|
xPathEntityProcessor.init(c);
|
||||||
|
List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
|
||||||
|
while (true) {
|
||||||
|
Map<String, Object> row = xPathEntityProcessor.nextRow();
|
||||||
|
if (row == null)
|
||||||
|
break;
|
||||||
|
result.add(row);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
assertEquals("1", result.get(0).get("id"));
|
||||||
|
List a = (List)result.get(0).get("a");
|
||||||
|
List s1dataA = (List)result.get(0).get("s1dataA");
|
||||||
|
List s1dataB = (List)result.get(0).get("s1dataB");
|
||||||
|
List s1dataC = (List)result.get(0).get("s1dataC");
|
||||||
|
assertEquals(2, a.size());
|
||||||
|
assertEquals("id1-a1", a.get(0));
|
||||||
|
assertEquals("id1-a2", a.get(1));
|
||||||
|
assertEquals(3, s1dataA.size());
|
||||||
|
assertEquals("id1-s1dataA-1", s1dataA.get(0));
|
||||||
|
assertNull(s1dataA.get(1));
|
||||||
|
assertEquals("id1-s1dataA-3", s1dataA.get(2));
|
||||||
|
assertEquals(3, s1dataB.size());
|
||||||
|
assertEquals("id1-s1dataB-1", s1dataB.get(0));
|
||||||
|
assertEquals("id1-s1dataB-2", s1dataB.get(1));
|
||||||
|
assertEquals("id1-s1dataB-3", s1dataB.get(2));
|
||||||
|
assertEquals(3, s1dataC.size());
|
||||||
|
assertNull(s1dataC.get(0));
|
||||||
|
assertNull(s1dataC.get(1));
|
||||||
|
assertNull(s1dataC.get(2));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
assertEquals("2", result.get(1).get("id"));
|
||||||
|
List a = (List)result.get(1).get("a");
|
||||||
|
List s1dataA = (List)result.get(1).get("s1dataA");
|
||||||
|
List s1dataB = (List)result.get(1).get("s1dataB");
|
||||||
|
List s1dataC = (List)result.get(1).get("s1dataC");
|
||||||
|
assertTrue(a==null || a.size()==0);
|
||||||
|
assertEquals(1, s1dataA.size());
|
||||||
|
assertNull(s1dataA.get(0));
|
||||||
|
assertEquals(1, s1dataB.size());
|
||||||
|
assertEquals("id2-s1dataB-1", s1dataB.get(0));
|
||||||
|
assertEquals(1, s1dataC.size());
|
||||||
|
assertNull(s1dataC.get(0));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
assertEquals("3", result.get(2).get("id"));
|
||||||
|
List a = (List)result.get(2).get("a");
|
||||||
|
List s1dataA = (List)result.get(2).get("s1dataA");
|
||||||
|
List s1dataB = (List)result.get(2).get("s1dataB");
|
||||||
|
List s1dataC = (List)result.get(2).get("s1dataC");
|
||||||
|
assertTrue(a==null || a.size()==0);
|
||||||
|
assertEquals(1, s1dataA.size());
|
||||||
|
assertEquals("id3-s1dataA-1", s1dataA.get(0));
|
||||||
|
assertEquals(1, s1dataB.size());
|
||||||
|
assertNull(s1dataB.get(0));
|
||||||
|
assertEquals(1, s1dataC.size());
|
||||||
|
assertNull(s1dataC.get(0));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
assertEquals("4", result.get(3).get("id"));
|
||||||
|
List a = (List)result.get(3).get("a");
|
||||||
|
List s1dataA = (List)result.get(3).get("s1dataA");
|
||||||
|
List s1dataB = (List)result.get(3).get("s1dataB");
|
||||||
|
List s1dataC = (List)result.get(3).get("s1dataC");
|
||||||
|
assertTrue(a==null || a.size()==0);
|
||||||
|
assertEquals(1, s1dataA.size());
|
||||||
|
assertEquals("id4-s1dataA-1", s1dataA.get(0));
|
||||||
|
assertEquals(1, s1dataB.size());
|
||||||
|
assertEquals("id4-s1dataB-1", s1dataB.get(0));
|
||||||
|
assertEquals(1, s1dataC.size());
|
||||||
|
assertEquals("id4-s1dataC-1", s1dataC.get(0));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
assertEquals("5", result.get(4).get("id"));
|
||||||
|
List a = (List)result.get(4).get("a");
|
||||||
|
List s1dataA = (List)result.get(4).get("s1dataA");
|
||||||
|
List s1dataB = (List)result.get(4).get("s1dataB");
|
||||||
|
List s1dataC = (List)result.get(4).get("s1dataC");
|
||||||
|
assertTrue(a==null || a.size()==0);
|
||||||
|
assertEquals(1, s1dataA.size());
|
||||||
|
assertNull(s1dataA.get(0));
|
||||||
|
assertEquals(1, s1dataB.size());
|
||||||
|
assertNull(s1dataB.get(0));
|
||||||
|
assertEquals(1, s1dataC.size());
|
||||||
|
assertEquals("id5-s1dataC-1", s1dataC.get(0));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
assertEquals("6", result.get(5).get("id"));
|
||||||
|
List a = (List)result.get(5).get("a");
|
||||||
|
List s1dataA = (List)result.get(5).get("s1dataA");
|
||||||
|
List s1dataB = (List)result.get(5).get("s1dataB");
|
||||||
|
List s1dataC = (List)result.get(5).get("s1dataC");
|
||||||
|
assertTrue(a==null || a.size()==0);
|
||||||
|
assertEquals(3, s1dataA.size());
|
||||||
|
assertEquals("id6-s1dataA-1", s1dataA.get(0));
|
||||||
|
assertEquals("id6-s1dataA-2", s1dataA.get(1));
|
||||||
|
assertNull(s1dataA.get(2));
|
||||||
|
assertEquals(3, s1dataB.size());
|
||||||
|
assertEquals("id6-s1dataB-1", s1dataB.get(0));
|
||||||
|
assertEquals("id6-s1dataB-2", s1dataB.get(1));
|
||||||
|
assertEquals("id6-s1dataB-3", s1dataB.get(2));
|
||||||
|
assertEquals(3, s1dataC.size());
|
||||||
|
assertEquals("id6-s1dataC-1", s1dataC.get(0));
|
||||||
|
assertNull(s1dataC.get(1));
|
||||||
|
assertEquals("id6-s1dataC-3", s1dataC.get(2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMultiValuedFlatten() throws Exception {
|
public void testMultiValuedFlatten() throws Exception {
|
||||||
Map entityAttrs = createMap("name", "e", "url", "testdata.xml",
|
Map entityAttrs = createMap("name", "e", "url", "testdata.xml",
|
||||||
|
@ -305,4 +427,68 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase
|
||||||
private static final String testXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE root [\n<!ENTITY uuml \"ü\" >\n]>\n<root><a>1</a><a>2</a><a>ü</a></root>";
|
private static final String testXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE root [\n<!ENTITY uuml \"ü\" >\n]>\n<root><a>1</a><a>2</a><a>ü</a></root>";
|
||||||
|
|
||||||
private static final String testXmlFlatten = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><root><a>1<b>B</b>2</a></root>";
|
private static final String testXmlFlatten = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><root><a>1<b>B</b>2</a></root>";
|
||||||
|
|
||||||
|
private static final String textMultipleDocuments =
|
||||||
|
"<?xml version=\"1.0\" ?>" +
|
||||||
|
"<documents>" +
|
||||||
|
" <doc>" +
|
||||||
|
" <id>1</id>" +
|
||||||
|
" <a>id1-a1</a>" +
|
||||||
|
" <a>id1-a2</a>" +
|
||||||
|
" <sec1>" +
|
||||||
|
" <s1dataA>id1-s1dataA-1</s1dataA>" +
|
||||||
|
" <s1dataB>id1-s1dataB-1</s1dataB>" +
|
||||||
|
" </sec1>" +
|
||||||
|
" <sec1>" +
|
||||||
|
" <s1dataB>id1-s1dataB-2</s1dataB>" +
|
||||||
|
" </sec1>" +
|
||||||
|
" <sec1>" +
|
||||||
|
" <s1dataA>id1-s1dataA-3</s1dataA>" +
|
||||||
|
" <s1dataB>id1-s1dataB-3</s1dataB>" +
|
||||||
|
" </sec1>" +
|
||||||
|
" </doc>" +
|
||||||
|
" <doc>" +
|
||||||
|
" <id>2</id>" +
|
||||||
|
" <sec1>" +
|
||||||
|
" <s1dataB>id2-s1dataB-1</s1dataB>" +
|
||||||
|
" </sec1>" +
|
||||||
|
" </doc>" +
|
||||||
|
" <doc>" +
|
||||||
|
" <id>3</id>" +
|
||||||
|
" <sec1>" +
|
||||||
|
" <s1dataA>id3-s1dataA-1</s1dataA>" +
|
||||||
|
" </sec1>" +
|
||||||
|
" </doc>" +
|
||||||
|
" <doc>" +
|
||||||
|
" <id>4</id>" +
|
||||||
|
" <sec1>" +
|
||||||
|
" <s1dataA>id4-s1dataA-1</s1dataA>" +
|
||||||
|
" <s1dataB>id4-s1dataB-1</s1dataB>" +
|
||||||
|
" <s1dataC>id4-s1dataC-1</s1dataC>" +
|
||||||
|
" </sec1>" +
|
||||||
|
" </doc>" +
|
||||||
|
" <doc>" +
|
||||||
|
" <id>5</id>" +
|
||||||
|
" <sec1>" +
|
||||||
|
" <s1dataC>id5-s1dataC-1</s1dataC>" +
|
||||||
|
" </sec1>" +
|
||||||
|
" </doc>" +
|
||||||
|
" <doc>" +
|
||||||
|
" <id>6</id>" +
|
||||||
|
" <sec1>" +
|
||||||
|
" <s1dataA>id6-s1dataA-1</s1dataA>" +
|
||||||
|
" <s1dataB>id6-s1dataB-1</s1dataB>" +
|
||||||
|
" <s1dataC>id6-s1dataC-1</s1dataC>" +
|
||||||
|
" </sec1>" +
|
||||||
|
" <sec1>" +
|
||||||
|
" <s1dataA>id6-s1dataA-2</s1dataA>" +
|
||||||
|
" <s1dataB>id6-s1dataB-2</s1dataB>" +
|
||||||
|
" </sec1>" +
|
||||||
|
" <sec1>" +
|
||||||
|
" <s1dataB>id6-s1dataB-3</s1dataB>" +
|
||||||
|
" <s1dataC>id6-s1dataC-3</s1dataC>" +
|
||||||
|
" </sec1>" +
|
||||||
|
" </doc>" +
|
||||||
|
"</documents>"
|
||||||
|
;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue