diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index d35c674cf35..303a0bcc1aa 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -242,6 +242,9 @@ Bug Fixes (Cao Manh Dat, Lance Norskog, Webster Homer, hossman, yonik) * SOLR-9692: blockUnknown property makes inter-node communication impossible (noble) + +* SOLR-2094: XPathEntityProcessor should reinitialize the XPathRecordReader instance if + the 'forEach' or 'xpath' attributes are templates & it is not a root entity (Cao Manh Dat, noble) Optimizations ---------------------- diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java index 637e1ef9844..cc28dc4144e 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java @@ -85,12 +85,14 @@ public class XPathEntityProcessor extends EntityProcessorBase { protected int blockingQueueSize = 1000; protected Thread publisherThread; + + protected boolean reinitXPathReader = true; @Override @SuppressWarnings("unchecked") public void init(Context context) { super.init(context); - if (xpathReader == null) + if (reinitXPathReader) initXpathReader(context.getVariableResolver()); pk = context.getEntityAttribute("pk"); dataSource = context.getDataSource(); @@ -99,6 +101,7 @@ public class XPathEntityProcessor extends EntityProcessorBase { } private void initXpathReader(VariableResolver resolver) { + reinitXPathReader = false; useSolrAddXml = Boolean.parseBoolean(context .getEntityAttribute(USE_SOLR_ADD_SCHEMA)); streamRows = Boolean.parseBoolean(context @@ -147,11 +150,12 @@ public class XPathEntityProcessor extends EntityProcessorBase { xpathReader.addField("name", "/add/doc/field/@name", true); xpathReader.addField("value", "/add/doc/field", true); } else { - String forEachXpath = context.getEntityAttribute(FOR_EACH); + String forEachXpath = context.getResolvedEntityAttribute(FOR_EACH); if (forEachXpath == null) throw new DataImportHandlerException(SEVERE, "Entity : " + context.getEntityAttribute("name") + " must have a 'forEach' attribute"); + if (forEachXpath.equals(context.getEntityAttribute(FOR_EACH))) reinitXPathReader = true; try { xpathReader = new XPathRecordReader(forEachXpath); @@ -164,6 +168,10 @@ public class XPathEntityProcessor extends EntityProcessorBase { } String xpath = field.get(XPATH); xpath = context.replaceTokens(xpath); + //!xpath.equals(field.get(XPATH) means the field xpath has a template + //in that case ensure that the XPathRecordReader is reinitialized + //for each xml + if (!xpath.equals(field.get(XPATH)) && !context.isRootEntity()) reinitXPathReader = true; xpathReader.addField(field.get(DataImporter.COLUMN), xpath, Boolean.parseBoolean(field.get(DataImporter.MULTI_VALUED)), diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockStringDataSource.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockStringDataSource.java new file mode 100644 index 00000000000..7c9a6d17ccc --- /dev/null +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockStringDataSource.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.handler.dataimport; + + +import java.io.Reader; +import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +public class MockStringDataSource extends DataSource { + + private static Map cache = new HashMap<>(); + + public static void setData(String query, + String data) { + cache.put(query, data); + } + + public static void clearCache() { + cache.clear(); + } + @Override + public void init(Context context, Properties initProps) { + + } + + @Override + public Reader getData(String query) { + return new StringReader(cache.get(query)); + } + + @Override + public void close() { + cache.clear(); + + } +} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder.java index 527dad0ec5e..39dd89151af 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder.java @@ -39,9 +39,10 @@ public class TestDocBuilder extends AbstractDataImportHandlerTestCase { @After public void tearDown() throws Exception { MockDataSource.clearCache(); + MockStringDataSource.clearCache(); super.tearDown(); } - + @Test public void loadClass() throws Exception { @SuppressWarnings("unchecked") @@ -180,6 +181,52 @@ public class TestDocBuilder extends AbstractDataImportHandlerTestCase { assertEquals(3, di.getDocBuilder().importStatistics.rowsCount.get()); } + @Test + public void templateXPath() { + DataImporter di = new DataImporter(); + di.loadAndInit(dc_variableXpath); + DIHConfiguration cfg = di.getConfig(); + + RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null); + List> l = new ArrayList<>(); + l.add(createMap("id", 1, "name", "iphone", "manufacturer", "Apple")); + l.add(createMap("id", 2, "name", "ipad", "manufacturer", "Apple")); + l.add(createMap("id", 3, "name", "pixel", "manufacturer", "Google")); + + MockDataSource.setIterator("select * from x", l.iterator()); + + List> nestedData = new ArrayList<>(); + nestedData.add(createMap("founded", "Cupertino, California, U.S", "year", "1976", "year2", "1976")); + nestedData.add(createMap("founded", "Cupertino, California, U.S", "year", "1976", "year2", "1976")); + nestedData.add(createMap("founded", "Menlo Park, California, U.S", "year", "1998", "year2", "1998")); + + MockStringDataSource.setData("companies.xml", xml_attrVariableXpath); + MockStringDataSource.setData("companies2.xml", xml_variableXpath); + MockStringDataSource.setData("companies3.xml", xml_variableForEach); + + SolrWriterImpl swi = new SolrWriterImpl(); + di.runCmd(rp, swi); + assertEquals(Boolean.TRUE, swi.deleteAllCalled); + assertEquals(Boolean.TRUE, swi.commitCalled); + assertEquals(Boolean.TRUE, swi.finishCalled); + assertEquals(3, swi.docs.size()); + for (int i = 0; i < l.size(); i++) { + SolrInputDocument doc = swi.docs.get(i); + + Map map = l.get(i); + for (Map.Entry entry : map.entrySet()) { + assertEquals(entry.getValue(), doc.getFieldValue(entry.getKey())); + } + + map = nestedData.get(i); + for (Map.Entry entry : map.entrySet()) { + assertEquals(entry.getValue(), doc.getFieldValue(entry.getKey())); + } + } + assertEquals(1, di.getDocBuilder().importStatistics.queryCount.get()); + assertEquals(3, di.getDocBuilder().importStatistics.docCount.get()); + } + static class SolrWriterImpl extends SolrWriter { List docs = new ArrayList<>(); @@ -215,21 +262,73 @@ public class TestDocBuilder extends AbstractDataImportHandlerTestCase { } public static final String dc_singleEntity = "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " " + " \n" - + " \n" + ""; + + "\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " " + " \n" + + " \n" + ""; public static final String dc_deltaConfig = "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " " + " \n" - + " \n" + ""; + + "\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " " + " \n" + + " \n" + ""; + + public static final String dc_variableXpath = "\n" + + "\n" + + "\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " " + + " " + + " " + + " " + + " " + + " " + + " " + + " " + + " " + + " " + + " \n" + + " \n" + ""; + + + public static final String xml_variableForEach = "\n" + + "\t\n" + + "\t\t1976\n" + + "\t\n" + + "\t\n" + + "\t\t1998\n" + + "\t\n" + + ""; + + public static final String xml_variableXpath = "\n" + + "\t\n" + + "\t\t\n" + + "\t\t\tCupertino, California, U.S\n" + + "\t\t\t\t\n" + + "\t\n" + + "\t\n" + + "\t\t\n" + + "\t\t\tMenlo Park, California, U.S\n" + + "\t\t\n" + + "\t\n" + + ""; + + public static final String xml_attrVariableXpath = "\n" + + "\t\n" + + "\t\t1976\n" + + "\t\n" + + "\t\n" + + "\t\t1998\t\t\n" + + "\t\n" + + ""; }