SOLR-2960: XPathEntityProcessor was adding spurious nulls to multi-valued fields

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1553285 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
James Dyer 2013-12-24 15:12:07 +00:00
parent a0049aa5f9
commit 35b159f14c
3 changed files with 207 additions and 8 deletions

View File

@ -230,6 +230,9 @@ Optimizations
* SOLR-5576: Improve concurrency when registering and waiting for all
SolrCore's to register a DOWN state. (Christine Poerschke via Mark Miller)
* SOLR-2960: fix DIH XPathEntityProcessor to add the correct number of "null"
placeholders for multi-valued fields (Michael Watts via James Dyer)
Other Changes
---------------------

View File

@ -296,7 +296,7 @@ public class XPathRecordReader {
for (Node n : childNodes) {
// For the multivalue child nodes where we could have, but
// didnt, collect text. Push a null string into values.
if (!childrenFound.contains(n)) n.putNulls(values);
if (!childrenFound.contains(n)) n.putNulls(values, valuesAddedinThisFrame);
}
}
return;
@ -429,18 +429,28 @@ public class XPathRecordReader {
* pushing a null string onto every multiValued fieldName's List of values
* where a value has not been provided from the stream.
*/
private void putNulls(Map<String, Object> values) {
private void putNulls(Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
if (attributes != null) {
for (Node n : attributes) {
if (n.multiValued)
putText(values, null, n.fieldName, true);
if (n.multiValued) {
putANull(n.fieldName, values, valuesAddedinThisFrame);
}
}
}
if (hasText && multiValued)
putText(values, null, fieldName, true);
if (hasText && multiValued) {
putANull(fieldName, values, valuesAddedinThisFrame);
}
if (childNodes != null) {
for (Node childNode : childNodes)
childNode.putNulls(values);
for (Node childNode : childNodes) {
childNode.putNulls(values, valuesAddedinThisFrame);
}
}
}
private void putANull(String thisFieldName, Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
putText(values, null, thisFieldName, true);
if( valuesAddedinThisFrame != null) {
valuesAddedinThisFrame.add(thisFieldName);
}
}

View File

@ -93,6 +93,128 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase
assertEquals("2", l.get(1));
assertEquals("ü", l.get(2));
}
@SuppressWarnings({"rawtypes", "unchecked"})
@Test
public void testMultiValuedWithMultipleDocuments() throws Exception {
Map entityAttrs = createMap("name", "e", "url", "testdata.xml", XPathEntityProcessor.FOR_EACH, "/documents/doc");
List fields = new ArrayList();
fields.add(createMap("column", "id", "xpath", "/documents/doc/id", DataImporter.MULTI_VALUED, "false"));
fields.add(createMap("column", "a", "xpath", "/documents/doc/a", DataImporter.MULTI_VALUED, "true"));
fields.add(createMap("column", "s1dataA", "xpath", "/documents/doc/sec1/s1dataA", DataImporter.MULTI_VALUED, "true"));
fields.add(createMap("column", "s1dataB", "xpath", "/documents/doc/sec1/s1dataB", DataImporter.MULTI_VALUED, "true"));
fields.add(createMap("column", "s1dataC", "xpath", "/documents/doc/sec1/s1dataC", DataImporter.MULTI_VALUED, "true"));
Context c = getContext(null,
new VariableResolver(), getDataSource(textMultipleDocuments), Context.FULL_DUMP, fields, entityAttrs);
XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor();
xPathEntityProcessor.init(c);
List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
while (true) {
Map<String, Object> row = xPathEntityProcessor.nextRow();
if (row == null)
break;
result.add(row);
}
{
assertEquals("1", result.get(0).get("id"));
List a = (List)result.get(0).get("a");
List s1dataA = (List)result.get(0).get("s1dataA");
List s1dataB = (List)result.get(0).get("s1dataB");
List s1dataC = (List)result.get(0).get("s1dataC");
assertEquals(2, a.size());
assertEquals("id1-a1", a.get(0));
assertEquals("id1-a2", a.get(1));
assertEquals(3, s1dataA.size());
assertEquals("id1-s1dataA-1", s1dataA.get(0));
assertNull(s1dataA.get(1));
assertEquals("id1-s1dataA-3", s1dataA.get(2));
assertEquals(3, s1dataB.size());
assertEquals("id1-s1dataB-1", s1dataB.get(0));
assertEquals("id1-s1dataB-2", s1dataB.get(1));
assertEquals("id1-s1dataB-3", s1dataB.get(2));
assertEquals(3, s1dataC.size());
assertNull(s1dataC.get(0));
assertNull(s1dataC.get(1));
assertNull(s1dataC.get(2));
}
{
assertEquals("2", result.get(1).get("id"));
List a = (List)result.get(1).get("a");
List s1dataA = (List)result.get(1).get("s1dataA");
List s1dataB = (List)result.get(1).get("s1dataB");
List s1dataC = (List)result.get(1).get("s1dataC");
assertTrue(a==null || a.size()==0);
assertEquals(1, s1dataA.size());
assertNull(s1dataA.get(0));
assertEquals(1, s1dataB.size());
assertEquals("id2-s1dataB-1", s1dataB.get(0));
assertEquals(1, s1dataC.size());
assertNull(s1dataC.get(0));
}
{
assertEquals("3", result.get(2).get("id"));
List a = (List)result.get(2).get("a");
List s1dataA = (List)result.get(2).get("s1dataA");
List s1dataB = (List)result.get(2).get("s1dataB");
List s1dataC = (List)result.get(2).get("s1dataC");
assertTrue(a==null || a.size()==0);
assertEquals(1, s1dataA.size());
assertEquals("id3-s1dataA-1", s1dataA.get(0));
assertEquals(1, s1dataB.size());
assertNull(s1dataB.get(0));
assertEquals(1, s1dataC.size());
assertNull(s1dataC.get(0));
}
{
assertEquals("4", result.get(3).get("id"));
List a = (List)result.get(3).get("a");
List s1dataA = (List)result.get(3).get("s1dataA");
List s1dataB = (List)result.get(3).get("s1dataB");
List s1dataC = (List)result.get(3).get("s1dataC");
assertTrue(a==null || a.size()==0);
assertEquals(1, s1dataA.size());
assertEquals("id4-s1dataA-1", s1dataA.get(0));
assertEquals(1, s1dataB.size());
assertEquals("id4-s1dataB-1", s1dataB.get(0));
assertEquals(1, s1dataC.size());
assertEquals("id4-s1dataC-1", s1dataC.get(0));
}
{
assertEquals("5", result.get(4).get("id"));
List a = (List)result.get(4).get("a");
List s1dataA = (List)result.get(4).get("s1dataA");
List s1dataB = (List)result.get(4).get("s1dataB");
List s1dataC = (List)result.get(4).get("s1dataC");
assertTrue(a==null || a.size()==0);
assertEquals(1, s1dataA.size());
assertNull(s1dataA.get(0));
assertEquals(1, s1dataB.size());
assertNull(s1dataB.get(0));
assertEquals(1, s1dataC.size());
assertEquals("id5-s1dataC-1", s1dataC.get(0));
}
{
assertEquals("6", result.get(5).get("id"));
List a = (List)result.get(5).get("a");
List s1dataA = (List)result.get(5).get("s1dataA");
List s1dataB = (List)result.get(5).get("s1dataB");
List s1dataC = (List)result.get(5).get("s1dataC");
assertTrue(a==null || a.size()==0);
assertEquals(3, s1dataA.size());
assertEquals("id6-s1dataA-1", s1dataA.get(0));
assertEquals("id6-s1dataA-2", s1dataA.get(1));
assertNull(s1dataA.get(2));
assertEquals(3, s1dataB.size());
assertEquals("id6-s1dataB-1", s1dataB.get(0));
assertEquals("id6-s1dataB-2", s1dataB.get(1));
assertEquals("id6-s1dataB-3", s1dataB.get(2));
assertEquals(3, s1dataC.size());
assertEquals("id6-s1dataC-1", s1dataC.get(0));
assertNull(s1dataC.get(1));
assertEquals("id6-s1dataC-3", s1dataC.get(2));
}
}
@Test
public void testMultiValuedFlatten() throws Exception {
@ -305,4 +427,68 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase
private static final String testXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE root [\n<!ENTITY uuml \"&#252;\" >\n]>\n<root><a>1</a><a>2</a><a>&uuml;</a></root>";
private static final String testXmlFlatten = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><root><a>1<b>B</b>2</a></root>";
private static final String textMultipleDocuments =
"<?xml version=\"1.0\" ?>" +
"<documents>" +
" <doc>" +
" <id>1</id>" +
" <a>id1-a1</a>" +
" <a>id1-a2</a>" +
" <sec1>" +
" <s1dataA>id1-s1dataA-1</s1dataA>" +
" <s1dataB>id1-s1dataB-1</s1dataB>" +
" </sec1>" +
" <sec1>" +
" <s1dataB>id1-s1dataB-2</s1dataB>" +
" </sec1>" +
" <sec1>" +
" <s1dataA>id1-s1dataA-3</s1dataA>" +
" <s1dataB>id1-s1dataB-3</s1dataB>" +
" </sec1>" +
" </doc>" +
" <doc>" +
" <id>2</id>" +
" <sec1>" +
" <s1dataB>id2-s1dataB-1</s1dataB>" +
" </sec1>" +
" </doc>" +
" <doc>" +
" <id>3</id>" +
" <sec1>" +
" <s1dataA>id3-s1dataA-1</s1dataA>" +
" </sec1>" +
" </doc>" +
" <doc>" +
" <id>4</id>" +
" <sec1>" +
" <s1dataA>id4-s1dataA-1</s1dataA>" +
" <s1dataB>id4-s1dataB-1</s1dataB>" +
" <s1dataC>id4-s1dataC-1</s1dataC>" +
" </sec1>" +
" </doc>" +
" <doc>" +
" <id>5</id>" +
" <sec1>" +
" <s1dataC>id5-s1dataC-1</s1dataC>" +
" </sec1>" +
" </doc>" +
" <doc>" +
" <id>6</id>" +
" <sec1>" +
" <s1dataA>id6-s1dataA-1</s1dataA>" +
" <s1dataB>id6-s1dataB-1</s1dataB>" +
" <s1dataC>id6-s1dataC-1</s1dataC>" +
" </sec1>" +
" <sec1>" +
" <s1dataA>id6-s1dataA-2</s1dataA>" +
" <s1dataB>id6-s1dataB-2</s1dataB>" +
" </sec1>" +
" <sec1>" +
" <s1dataB>id6-s1dataB-3</s1dataB>" +
" <s1dataC>id6-s1dataC-3</s1dataC>" +
" </sec1>" +
" </doc>" +
"</documents>"
;
}