From 35b159f14c23fea58e0d98a4af24ace50ce1fac4 Mon Sep 17 00:00:00 2001 From: James Dyer Date: Tue, 24 Dec 2013 15:12:07 +0000 Subject: [PATCH] SOLR-2960: XPathEntityProcessor was adding spurious nulls to multi-valued fields git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1553285 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../handler/dataimport/XPathRecordReader.java | 26 ++- .../dataimport/TestXPathEntityProcessor.java | 186 ++++++++++++++++++ 3 files changed, 207 insertions(+), 8 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index efa5cd070ea..875665d2cf4 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -230,6 +230,9 @@ Optimizations * SOLR-5576: Improve concurrency when registering and waiting for all SolrCore's to register a DOWN state. (Christine Poerschke via Mark Miller) +* SOLR-2960: fix DIH XPathEntityProcessor to add the correct number of "null" + placeholders for multi-valued fields (Michael Watts via James Dyer) + Other Changes --------------------- diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java index e5c808ddd15..23f4755fb3d 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java @@ -296,7 +296,7 @@ public class XPathRecordReader { for (Node n : childNodes) { // For the multivalue child nodes where we could have, but // didnt, collect text. Push a null string into values. - if (!childrenFound.contains(n)) n.putNulls(values); + if (!childrenFound.contains(n)) n.putNulls(values, valuesAddedinThisFrame); } } return; @@ -429,18 +429,28 @@ public class XPathRecordReader { * pushing a null string onto every multiValued fieldName's List of values * where a value has not been provided from the stream. */ - private void putNulls(Map values) { + private void putNulls(Map values, Set valuesAddedinThisFrame) { if (attributes != null) { for (Node n : attributes) { - if (n.multiValued) - putText(values, null, n.fieldName, true); + if (n.multiValued) { + putANull(n.fieldName, values, valuesAddedinThisFrame); + } } } - if (hasText && multiValued) - putText(values, null, fieldName, true); + if (hasText && multiValued) { + putANull(fieldName, values, valuesAddedinThisFrame); + } if (childNodes != null) { - for (Node childNode : childNodes) - childNode.putNulls(values); + for (Node childNode : childNodes) { + childNode.putNulls(values, valuesAddedinThisFrame); + } + } + } + + private void putANull(String thisFieldName, Map values, Set valuesAddedinThisFrame) { + putText(values, null, thisFieldName, true); + if( valuesAddedinThisFrame != null) { + valuesAddedinThisFrame.add(thisFieldName); } } diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java index 416dc2077ac..3d105f67327 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java @@ -93,6 +93,128 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase assertEquals("2", l.get(1)); assertEquals("ΓΌ", l.get(2)); } + + @SuppressWarnings({"rawtypes", "unchecked"}) + @Test + public void testMultiValuedWithMultipleDocuments() throws Exception { + Map entityAttrs = createMap("name", "e", "url", "testdata.xml", XPathEntityProcessor.FOR_EACH, "/documents/doc"); + List fields = new ArrayList(); + fields.add(createMap("column", "id", "xpath", "/documents/doc/id", DataImporter.MULTI_VALUED, "false")); + fields.add(createMap("column", "a", "xpath", "/documents/doc/a", DataImporter.MULTI_VALUED, "true")); + fields.add(createMap("column", "s1dataA", "xpath", "/documents/doc/sec1/s1dataA", DataImporter.MULTI_VALUED, "true")); + fields.add(createMap("column", "s1dataB", "xpath", "/documents/doc/sec1/s1dataB", DataImporter.MULTI_VALUED, "true")); + fields.add(createMap("column", "s1dataC", "xpath", "/documents/doc/sec1/s1dataC", DataImporter.MULTI_VALUED, "true")); + + Context c = getContext(null, + new VariableResolver(), getDataSource(textMultipleDocuments), Context.FULL_DUMP, fields, entityAttrs); + XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); + xPathEntityProcessor.init(c); + List> result = new ArrayList>(); + while (true) { + Map row = xPathEntityProcessor.nextRow(); + if (row == null) + break; + result.add(row); + } + { + assertEquals("1", result.get(0).get("id")); + List a = (List)result.get(0).get("a"); + List s1dataA = (List)result.get(0).get("s1dataA"); + List s1dataB = (List)result.get(0).get("s1dataB"); + List s1dataC = (List)result.get(0).get("s1dataC"); + assertEquals(2, a.size()); + assertEquals("id1-a1", a.get(0)); + assertEquals("id1-a2", a.get(1)); + assertEquals(3, s1dataA.size()); + assertEquals("id1-s1dataA-1", s1dataA.get(0)); + assertNull(s1dataA.get(1)); + assertEquals("id1-s1dataA-3", s1dataA.get(2)); + assertEquals(3, s1dataB.size()); + assertEquals("id1-s1dataB-1", s1dataB.get(0)); + assertEquals("id1-s1dataB-2", s1dataB.get(1)); + assertEquals("id1-s1dataB-3", s1dataB.get(2)); + assertEquals(3, s1dataC.size()); + assertNull(s1dataC.get(0)); + assertNull(s1dataC.get(1)); + assertNull(s1dataC.get(2)); + } + { + assertEquals("2", result.get(1).get("id")); + List a = (List)result.get(1).get("a"); + List s1dataA = (List)result.get(1).get("s1dataA"); + List s1dataB = (List)result.get(1).get("s1dataB"); + List s1dataC = (List)result.get(1).get("s1dataC"); + assertTrue(a==null || a.size()==0); + assertEquals(1, s1dataA.size()); + assertNull(s1dataA.get(0)); + assertEquals(1, s1dataB.size()); + assertEquals("id2-s1dataB-1", s1dataB.get(0)); + assertEquals(1, s1dataC.size()); + assertNull(s1dataC.get(0)); + } + { + assertEquals("3", result.get(2).get("id")); + List a = (List)result.get(2).get("a"); + List s1dataA = (List)result.get(2).get("s1dataA"); + List s1dataB = (List)result.get(2).get("s1dataB"); + List s1dataC = (List)result.get(2).get("s1dataC"); + assertTrue(a==null || a.size()==0); + assertEquals(1, s1dataA.size()); + assertEquals("id3-s1dataA-1", s1dataA.get(0)); + assertEquals(1, s1dataB.size()); + assertNull(s1dataB.get(0)); + assertEquals(1, s1dataC.size()); + assertNull(s1dataC.get(0)); + } + { + assertEquals("4", result.get(3).get("id")); + List a = (List)result.get(3).get("a"); + List s1dataA = (List)result.get(3).get("s1dataA"); + List s1dataB = (List)result.get(3).get("s1dataB"); + List s1dataC = (List)result.get(3).get("s1dataC"); + assertTrue(a==null || a.size()==0); + assertEquals(1, s1dataA.size()); + assertEquals("id4-s1dataA-1", s1dataA.get(0)); + assertEquals(1, s1dataB.size()); + assertEquals("id4-s1dataB-1", s1dataB.get(0)); + assertEquals(1, s1dataC.size()); + assertEquals("id4-s1dataC-1", s1dataC.get(0)); + } + { + assertEquals("5", result.get(4).get("id")); + List a = (List)result.get(4).get("a"); + List s1dataA = (List)result.get(4).get("s1dataA"); + List s1dataB = (List)result.get(4).get("s1dataB"); + List s1dataC = (List)result.get(4).get("s1dataC"); + assertTrue(a==null || a.size()==0); + assertEquals(1, s1dataA.size()); + assertNull(s1dataA.get(0)); + assertEquals(1, s1dataB.size()); + assertNull(s1dataB.get(0)); + assertEquals(1, s1dataC.size()); + assertEquals("id5-s1dataC-1", s1dataC.get(0)); + } + { + assertEquals("6", result.get(5).get("id")); + List a = (List)result.get(5).get("a"); + List s1dataA = (List)result.get(5).get("s1dataA"); + List s1dataB = (List)result.get(5).get("s1dataB"); + List s1dataC = (List)result.get(5).get("s1dataC"); + assertTrue(a==null || a.size()==0); + assertEquals(3, s1dataA.size()); + assertEquals("id6-s1dataA-1", s1dataA.get(0)); + assertEquals("id6-s1dataA-2", s1dataA.get(1)); + assertNull(s1dataA.get(2)); + assertEquals(3, s1dataB.size()); + assertEquals("id6-s1dataB-1", s1dataB.get(0)); + assertEquals("id6-s1dataB-2", s1dataB.get(1)); + assertEquals("id6-s1dataB-3", s1dataB.get(2)); + assertEquals(3, s1dataC.size()); + assertEquals("id6-s1dataC-1", s1dataC.get(0)); + assertNull(s1dataC.get(1)); + assertEquals("id6-s1dataC-3", s1dataC.get(2)); + } + } @Test public void testMultiValuedFlatten() throws Exception { @@ -305,4 +427,68 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase private static final String testXml = "\n\n]>\n12ü"; private static final String testXmlFlatten = "1B2"; + + private static final String textMultipleDocuments = + "" + + "" + + " " + + " 1" + + " id1-a1" + + " id1-a2" + + " " + + " id1-s1dataA-1" + + " id1-s1dataB-1" + + " " + + " " + + " id1-s1dataB-2" + + " " + + " " + + " id1-s1dataA-3" + + " id1-s1dataB-3" + + " " + + " " + + " " + + " 2" + + " " + + " id2-s1dataB-1" + + " " + + " " + + " " + + " 3" + + " " + + " id3-s1dataA-1" + + " " + + " " + + " " + + " 4" + + " " + + " id4-s1dataA-1" + + " id4-s1dataB-1" + + " id4-s1dataC-1" + + " " + + " " + + " " + + " 5" + + " " + + " id5-s1dataC-1" + + " " + + " " + + " " + + " 6" + + " " + + " id6-s1dataA-1" + + " id6-s1dataB-1" + + " id6-s1dataC-1" + + " " + + " " + + " id6-s1dataA-2" + + " id6-s1dataB-2" + + " " + + " " + + " id6-s1dataB-3" + + " id6-s1dataC-3" + + " " + + " " + + "" + ; }