mirror of https://github.com/apache/lucene.git
SOLR-2960: XPathEntityProcessor was adding spurious nulls to multi-valued fields
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1553285 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a0049aa5f9
commit
35b159f14c
|
@ -230,6 +230,9 @@ Optimizations
|
|||
* SOLR-5576: Improve concurrency when registering and waiting for all
|
||||
SolrCore's to register a DOWN state. (Christine Poerschke via Mark Miller)
|
||||
|
||||
* SOLR-2960: fix DIH XPathEntityProcessor to add the correct number of "null"
|
||||
placeholders for multi-valued fields (Michael Watts via James Dyer)
|
||||
|
||||
Other Changes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -296,7 +296,7 @@ public class XPathRecordReader {
|
|||
for (Node n : childNodes) {
|
||||
// For the multivalue child nodes where we could have, but
|
||||
// didnt, collect text. Push a null string into values.
|
||||
if (!childrenFound.contains(n)) n.putNulls(values);
|
||||
if (!childrenFound.contains(n)) n.putNulls(values, valuesAddedinThisFrame);
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
@ -429,18 +429,28 @@ public class XPathRecordReader {
|
|||
* pushing a null string onto every multiValued fieldName's List of values
|
||||
* where a value has not been provided from the stream.
|
||||
*/
|
||||
private void putNulls(Map<String, Object> values) {
|
||||
private void putNulls(Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
|
||||
if (attributes != null) {
|
||||
for (Node n : attributes) {
|
||||
if (n.multiValued)
|
||||
putText(values, null, n.fieldName, true);
|
||||
if (n.multiValued) {
|
||||
putANull(n.fieldName, values, valuesAddedinThisFrame);
|
||||
}
|
||||
}
|
||||
if (hasText && multiValued)
|
||||
putText(values, null, fieldName, true);
|
||||
}
|
||||
if (hasText && multiValued) {
|
||||
putANull(fieldName, values, valuesAddedinThisFrame);
|
||||
}
|
||||
if (childNodes != null) {
|
||||
for (Node childNode : childNodes)
|
||||
childNode.putNulls(values);
|
||||
for (Node childNode : childNodes) {
|
||||
childNode.putNulls(values, valuesAddedinThisFrame);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void putANull(String thisFieldName, Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
|
||||
putText(values, null, thisFieldName, true);
|
||||
if( valuesAddedinThisFrame != null) {
|
||||
valuesAddedinThisFrame.add(thisFieldName);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -94,6 +94,128 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase
|
|||
assertEquals("ü", l.get(2));
|
||||
}
|
||||
|
||||
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||
@Test
|
||||
public void testMultiValuedWithMultipleDocuments() throws Exception {
|
||||
Map entityAttrs = createMap("name", "e", "url", "testdata.xml", XPathEntityProcessor.FOR_EACH, "/documents/doc");
|
||||
List fields = new ArrayList();
|
||||
fields.add(createMap("column", "id", "xpath", "/documents/doc/id", DataImporter.MULTI_VALUED, "false"));
|
||||
fields.add(createMap("column", "a", "xpath", "/documents/doc/a", DataImporter.MULTI_VALUED, "true"));
|
||||
fields.add(createMap("column", "s1dataA", "xpath", "/documents/doc/sec1/s1dataA", DataImporter.MULTI_VALUED, "true"));
|
||||
fields.add(createMap("column", "s1dataB", "xpath", "/documents/doc/sec1/s1dataB", DataImporter.MULTI_VALUED, "true"));
|
||||
fields.add(createMap("column", "s1dataC", "xpath", "/documents/doc/sec1/s1dataC", DataImporter.MULTI_VALUED, "true"));
|
||||
|
||||
Context c = getContext(null,
|
||||
new VariableResolver(), getDataSource(textMultipleDocuments), Context.FULL_DUMP, fields, entityAttrs);
|
||||
XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor();
|
||||
xPathEntityProcessor.init(c);
|
||||
List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
|
||||
while (true) {
|
||||
Map<String, Object> row = xPathEntityProcessor.nextRow();
|
||||
if (row == null)
|
||||
break;
|
||||
result.add(row);
|
||||
}
|
||||
{
|
||||
assertEquals("1", result.get(0).get("id"));
|
||||
List a = (List)result.get(0).get("a");
|
||||
List s1dataA = (List)result.get(0).get("s1dataA");
|
||||
List s1dataB = (List)result.get(0).get("s1dataB");
|
||||
List s1dataC = (List)result.get(0).get("s1dataC");
|
||||
assertEquals(2, a.size());
|
||||
assertEquals("id1-a1", a.get(0));
|
||||
assertEquals("id1-a2", a.get(1));
|
||||
assertEquals(3, s1dataA.size());
|
||||
assertEquals("id1-s1dataA-1", s1dataA.get(0));
|
||||
assertNull(s1dataA.get(1));
|
||||
assertEquals("id1-s1dataA-3", s1dataA.get(2));
|
||||
assertEquals(3, s1dataB.size());
|
||||
assertEquals("id1-s1dataB-1", s1dataB.get(0));
|
||||
assertEquals("id1-s1dataB-2", s1dataB.get(1));
|
||||
assertEquals("id1-s1dataB-3", s1dataB.get(2));
|
||||
assertEquals(3, s1dataC.size());
|
||||
assertNull(s1dataC.get(0));
|
||||
assertNull(s1dataC.get(1));
|
||||
assertNull(s1dataC.get(2));
|
||||
}
|
||||
{
|
||||
assertEquals("2", result.get(1).get("id"));
|
||||
List a = (List)result.get(1).get("a");
|
||||
List s1dataA = (List)result.get(1).get("s1dataA");
|
||||
List s1dataB = (List)result.get(1).get("s1dataB");
|
||||
List s1dataC = (List)result.get(1).get("s1dataC");
|
||||
assertTrue(a==null || a.size()==0);
|
||||
assertEquals(1, s1dataA.size());
|
||||
assertNull(s1dataA.get(0));
|
||||
assertEquals(1, s1dataB.size());
|
||||
assertEquals("id2-s1dataB-1", s1dataB.get(0));
|
||||
assertEquals(1, s1dataC.size());
|
||||
assertNull(s1dataC.get(0));
|
||||
}
|
||||
{
|
||||
assertEquals("3", result.get(2).get("id"));
|
||||
List a = (List)result.get(2).get("a");
|
||||
List s1dataA = (List)result.get(2).get("s1dataA");
|
||||
List s1dataB = (List)result.get(2).get("s1dataB");
|
||||
List s1dataC = (List)result.get(2).get("s1dataC");
|
||||
assertTrue(a==null || a.size()==0);
|
||||
assertEquals(1, s1dataA.size());
|
||||
assertEquals("id3-s1dataA-1", s1dataA.get(0));
|
||||
assertEquals(1, s1dataB.size());
|
||||
assertNull(s1dataB.get(0));
|
||||
assertEquals(1, s1dataC.size());
|
||||
assertNull(s1dataC.get(0));
|
||||
}
|
||||
{
|
||||
assertEquals("4", result.get(3).get("id"));
|
||||
List a = (List)result.get(3).get("a");
|
||||
List s1dataA = (List)result.get(3).get("s1dataA");
|
||||
List s1dataB = (List)result.get(3).get("s1dataB");
|
||||
List s1dataC = (List)result.get(3).get("s1dataC");
|
||||
assertTrue(a==null || a.size()==0);
|
||||
assertEquals(1, s1dataA.size());
|
||||
assertEquals("id4-s1dataA-1", s1dataA.get(0));
|
||||
assertEquals(1, s1dataB.size());
|
||||
assertEquals("id4-s1dataB-1", s1dataB.get(0));
|
||||
assertEquals(1, s1dataC.size());
|
||||
assertEquals("id4-s1dataC-1", s1dataC.get(0));
|
||||
}
|
||||
{
|
||||
assertEquals("5", result.get(4).get("id"));
|
||||
List a = (List)result.get(4).get("a");
|
||||
List s1dataA = (List)result.get(4).get("s1dataA");
|
||||
List s1dataB = (List)result.get(4).get("s1dataB");
|
||||
List s1dataC = (List)result.get(4).get("s1dataC");
|
||||
assertTrue(a==null || a.size()==0);
|
||||
assertEquals(1, s1dataA.size());
|
||||
assertNull(s1dataA.get(0));
|
||||
assertEquals(1, s1dataB.size());
|
||||
assertNull(s1dataB.get(0));
|
||||
assertEquals(1, s1dataC.size());
|
||||
assertEquals("id5-s1dataC-1", s1dataC.get(0));
|
||||
}
|
||||
{
|
||||
assertEquals("6", result.get(5).get("id"));
|
||||
List a = (List)result.get(5).get("a");
|
||||
List s1dataA = (List)result.get(5).get("s1dataA");
|
||||
List s1dataB = (List)result.get(5).get("s1dataB");
|
||||
List s1dataC = (List)result.get(5).get("s1dataC");
|
||||
assertTrue(a==null || a.size()==0);
|
||||
assertEquals(3, s1dataA.size());
|
||||
assertEquals("id6-s1dataA-1", s1dataA.get(0));
|
||||
assertEquals("id6-s1dataA-2", s1dataA.get(1));
|
||||
assertNull(s1dataA.get(2));
|
||||
assertEquals(3, s1dataB.size());
|
||||
assertEquals("id6-s1dataB-1", s1dataB.get(0));
|
||||
assertEquals("id6-s1dataB-2", s1dataB.get(1));
|
||||
assertEquals("id6-s1dataB-3", s1dataB.get(2));
|
||||
assertEquals(3, s1dataC.size());
|
||||
assertEquals("id6-s1dataC-1", s1dataC.get(0));
|
||||
assertNull(s1dataC.get(1));
|
||||
assertEquals("id6-s1dataC-3", s1dataC.get(2));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiValuedFlatten() throws Exception {
|
||||
Map entityAttrs = createMap("name", "e", "url", "testdata.xml",
|
||||
|
@ -305,4 +427,68 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase
|
|||
private static final String testXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE root [\n<!ENTITY uuml \"ü\" >\n]>\n<root><a>1</a><a>2</a><a>ü</a></root>";
|
||||
|
||||
private static final String testXmlFlatten = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><root><a>1<b>B</b>2</a></root>";
|
||||
|
||||
private static final String textMultipleDocuments =
|
||||
"<?xml version=\"1.0\" ?>" +
|
||||
"<documents>" +
|
||||
" <doc>" +
|
||||
" <id>1</id>" +
|
||||
" <a>id1-a1</a>" +
|
||||
" <a>id1-a2</a>" +
|
||||
" <sec1>" +
|
||||
" <s1dataA>id1-s1dataA-1</s1dataA>" +
|
||||
" <s1dataB>id1-s1dataB-1</s1dataB>" +
|
||||
" </sec1>" +
|
||||
" <sec1>" +
|
||||
" <s1dataB>id1-s1dataB-2</s1dataB>" +
|
||||
" </sec1>" +
|
||||
" <sec1>" +
|
||||
" <s1dataA>id1-s1dataA-3</s1dataA>" +
|
||||
" <s1dataB>id1-s1dataB-3</s1dataB>" +
|
||||
" </sec1>" +
|
||||
" </doc>" +
|
||||
" <doc>" +
|
||||
" <id>2</id>" +
|
||||
" <sec1>" +
|
||||
" <s1dataB>id2-s1dataB-1</s1dataB>" +
|
||||
" </sec1>" +
|
||||
" </doc>" +
|
||||
" <doc>" +
|
||||
" <id>3</id>" +
|
||||
" <sec1>" +
|
||||
" <s1dataA>id3-s1dataA-1</s1dataA>" +
|
||||
" </sec1>" +
|
||||
" </doc>" +
|
||||
" <doc>" +
|
||||
" <id>4</id>" +
|
||||
" <sec1>" +
|
||||
" <s1dataA>id4-s1dataA-1</s1dataA>" +
|
||||
" <s1dataB>id4-s1dataB-1</s1dataB>" +
|
||||
" <s1dataC>id4-s1dataC-1</s1dataC>" +
|
||||
" </sec1>" +
|
||||
" </doc>" +
|
||||
" <doc>" +
|
||||
" <id>5</id>" +
|
||||
" <sec1>" +
|
||||
" <s1dataC>id5-s1dataC-1</s1dataC>" +
|
||||
" </sec1>" +
|
||||
" </doc>" +
|
||||
" <doc>" +
|
||||
" <id>6</id>" +
|
||||
" <sec1>" +
|
||||
" <s1dataA>id6-s1dataA-1</s1dataA>" +
|
||||
" <s1dataB>id6-s1dataB-1</s1dataB>" +
|
||||
" <s1dataC>id6-s1dataC-1</s1dataC>" +
|
||||
" </sec1>" +
|
||||
" <sec1>" +
|
||||
" <s1dataA>id6-s1dataA-2</s1dataA>" +
|
||||
" <s1dataB>id6-s1dataB-2</s1dataB>" +
|
||||
" </sec1>" +
|
||||
" <sec1>" +
|
||||
" <s1dataB>id6-s1dataB-3</s1dataB>" +
|
||||
" <s1dataC>id6-s1dataC-3</s1dataC>" +
|
||||
" </sec1>" +
|
||||
" </doc>" +
|
||||
"</documents>"
|
||||
;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue