mirror of https://github.com/apache/lucene.git
SOLR-2094: XPathEntityProcessor should reinitialize the XPathRecordReader instance if the 'forEach' or 'xpath' attributes are templates & it is not a root entity
This commit is contained in:
parent
8c020e395b
commit
fa6fbc0834
|
@ -242,6 +242,9 @@ Bug Fixes
|
|||
(Cao Manh Dat, Lance Norskog, Webster Homer, hossman, yonik)
|
||||
|
||||
* SOLR-9692: blockUnknown property makes inter-node communication impossible (noble)
|
||||
|
||||
* SOLR-2094: XPathEntityProcessor should reinitialize the XPathRecordReader instance if
|
||||
the 'forEach' or 'xpath' attributes are templates & it is not a root entity (Cao Manh Dat, noble)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
|
|
@ -85,12 +85,14 @@ public class XPathEntityProcessor extends EntityProcessorBase {
|
|||
protected int blockingQueueSize = 1000;
|
||||
|
||||
protected Thread publisherThread;
|
||||
|
||||
protected boolean reinitXPathReader = true;
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
if (xpathReader == null)
|
||||
if (reinitXPathReader)
|
||||
initXpathReader(context.getVariableResolver());
|
||||
pk = context.getEntityAttribute("pk");
|
||||
dataSource = context.getDataSource();
|
||||
|
@ -99,6 +101,7 @@ public class XPathEntityProcessor extends EntityProcessorBase {
|
|||
}
|
||||
|
||||
private void initXpathReader(VariableResolver resolver) {
|
||||
reinitXPathReader = false;
|
||||
useSolrAddXml = Boolean.parseBoolean(context
|
||||
.getEntityAttribute(USE_SOLR_ADD_SCHEMA));
|
||||
streamRows = Boolean.parseBoolean(context
|
||||
|
@ -147,11 +150,12 @@ public class XPathEntityProcessor extends EntityProcessorBase {
|
|||
xpathReader.addField("name", "/add/doc/field/@name", true);
|
||||
xpathReader.addField("value", "/add/doc/field", true);
|
||||
} else {
|
||||
String forEachXpath = context.getEntityAttribute(FOR_EACH);
|
||||
String forEachXpath = context.getResolvedEntityAttribute(FOR_EACH);
|
||||
if (forEachXpath == null)
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"Entity : " + context.getEntityAttribute("name")
|
||||
+ " must have a 'forEach' attribute");
|
||||
if (forEachXpath.equals(context.getEntityAttribute(FOR_EACH))) reinitXPathReader = true;
|
||||
|
||||
try {
|
||||
xpathReader = new XPathRecordReader(forEachXpath);
|
||||
|
@ -164,6 +168,10 @@ public class XPathEntityProcessor extends EntityProcessorBase {
|
|||
}
|
||||
String xpath = field.get(XPATH);
|
||||
xpath = context.replaceTokens(xpath);
|
||||
//!xpath.equals(field.get(XPATH) means the field xpath has a template
|
||||
//in that case ensure that the XPathRecordReader is reinitialized
|
||||
//for each xml
|
||||
if (!xpath.equals(field.get(XPATH)) && !context.isRootEntity()) reinitXPathReader = true;
|
||||
xpathReader.addField(field.get(DataImporter.COLUMN),
|
||||
xpath,
|
||||
Boolean.parseBoolean(field.get(DataImporter.MULTI_VALUED)),
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
public class MockStringDataSource extends DataSource<Reader> {
|
||||
|
||||
private static Map<String, String> cache = new HashMap<>();
|
||||
|
||||
public static void setData(String query,
|
||||
String data) {
|
||||
cache.put(query, data);
|
||||
}
|
||||
|
||||
public static void clearCache() {
|
||||
cache.clear();
|
||||
}
|
||||
@Override
|
||||
public void init(Context context, Properties initProps) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader getData(String query) {
|
||||
return new StringReader(cache.get(query));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
cache.clear();
|
||||
|
||||
}
|
||||
}
|
|
@ -39,9 +39,10 @@ public class TestDocBuilder extends AbstractDataImportHandlerTestCase {
|
|||
@After
|
||||
public void tearDown() throws Exception {
|
||||
MockDataSource.clearCache();
|
||||
MockStringDataSource.clearCache();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void loadClass() throws Exception {
|
||||
@SuppressWarnings("unchecked")
|
||||
|
@ -180,6 +181,52 @@ public class TestDocBuilder extends AbstractDataImportHandlerTestCase {
|
|||
assertEquals(3, di.getDocBuilder().importStatistics.rowsCount.get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void templateXPath() {
|
||||
DataImporter di = new DataImporter();
|
||||
di.loadAndInit(dc_variableXpath);
|
||||
DIHConfiguration cfg = di.getConfig();
|
||||
|
||||
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
|
||||
List<Map<String, Object>> l = new ArrayList<>();
|
||||
l.add(createMap("id", 1, "name", "iphone", "manufacturer", "Apple"));
|
||||
l.add(createMap("id", 2, "name", "ipad", "manufacturer", "Apple"));
|
||||
l.add(createMap("id", 3, "name", "pixel", "manufacturer", "Google"));
|
||||
|
||||
MockDataSource.setIterator("select * from x", l.iterator());
|
||||
|
||||
List<Map<String,Object>> nestedData = new ArrayList<>();
|
||||
nestedData.add(createMap("founded", "Cupertino, California, U.S", "year", "1976", "year2", "1976"));
|
||||
nestedData.add(createMap("founded", "Cupertino, California, U.S", "year", "1976", "year2", "1976"));
|
||||
nestedData.add(createMap("founded", "Menlo Park, California, U.S", "year", "1998", "year2", "1998"));
|
||||
|
||||
MockStringDataSource.setData("companies.xml", xml_attrVariableXpath);
|
||||
MockStringDataSource.setData("companies2.xml", xml_variableXpath);
|
||||
MockStringDataSource.setData("companies3.xml", xml_variableForEach);
|
||||
|
||||
SolrWriterImpl swi = new SolrWriterImpl();
|
||||
di.runCmd(rp, swi);
|
||||
assertEquals(Boolean.TRUE, swi.deleteAllCalled);
|
||||
assertEquals(Boolean.TRUE, swi.commitCalled);
|
||||
assertEquals(Boolean.TRUE, swi.finishCalled);
|
||||
assertEquals(3, swi.docs.size());
|
||||
for (int i = 0; i < l.size(); i++) {
|
||||
SolrInputDocument doc = swi.docs.get(i);
|
||||
|
||||
Map<String, Object> map = l.get(i);
|
||||
for (Map.Entry<String, Object> entry : map.entrySet()) {
|
||||
assertEquals(entry.getValue(), doc.getFieldValue(entry.getKey()));
|
||||
}
|
||||
|
||||
map = nestedData.get(i);
|
||||
for (Map.Entry<String, Object> entry : map.entrySet()) {
|
||||
assertEquals(entry.getValue(), doc.getFieldValue(entry.getKey()));
|
||||
}
|
||||
}
|
||||
assertEquals(1, di.getDocBuilder().importStatistics.queryCount.get());
|
||||
assertEquals(3, di.getDocBuilder().importStatistics.docCount.get());
|
||||
}
|
||||
|
||||
static class SolrWriterImpl extends SolrWriter {
|
||||
List<SolrInputDocument> docs = new ArrayList<>();
|
||||
|
||||
|
@ -215,21 +262,73 @@ public class TestDocBuilder extends AbstractDataImportHandlerTestCase {
|
|||
}
|
||||
|
||||
public static final String dc_singleEntity = "<dataConfig>\n"
|
||||
+ "<dataSource type=\"MockDataSource\"/>\n"
|
||||
+ " <document name=\"X\" >\n"
|
||||
+ " <entity name=\"x\" query=\"select * from x\">\n"
|
||||
+ " <field column=\"id\"/>\n"
|
||||
+ " <field column=\"desc\"/>\n"
|
||||
+ " <field column=\"desc\" name=\"desc_s\" />" + " </entity>\n"
|
||||
+ " </document>\n" + "</dataConfig>";
|
||||
+ "<dataSource type=\"MockDataSource\"/>\n"
|
||||
+ " <document name=\"X\" >\n"
|
||||
+ " <entity name=\"x\" query=\"select * from x\">\n"
|
||||
+ " <field column=\"id\"/>\n"
|
||||
+ " <field column=\"desc\"/>\n"
|
||||
+ " <field column=\"desc\" name=\"desc_s\" />" + " </entity>\n"
|
||||
+ " </document>\n" + "</dataConfig>";
|
||||
|
||||
public static final String dc_deltaConfig = "<dataConfig>\n"
|
||||
+ "<dataSource type=\"MockDataSource\"/>\n"
|
||||
+ " <document name=\"X\" >\n"
|
||||
+ " <entity name=\"x\" query=\"select * from x\" deltaQuery=\"select id from x\">\n"
|
||||
+ " <field column=\"id\"/>\n"
|
||||
+ " <field column=\"desc\"/>\n"
|
||||
+ " <field column=\"desc\" name=\"desc_s\" />" + " </entity>\n"
|
||||
+ " </document>\n" + "</dataConfig>";
|
||||
+ "<dataSource type=\"MockDataSource\"/>\n"
|
||||
+ " <document name=\"X\" >\n"
|
||||
+ " <entity name=\"x\" query=\"select * from x\" deltaQuery=\"select id from x\">\n"
|
||||
+ " <field column=\"id\"/>\n"
|
||||
+ " <field column=\"desc\"/>\n"
|
||||
+ " <field column=\"desc\" name=\"desc_s\" />" + " </entity>\n"
|
||||
+ " </document>\n" + "</dataConfig>";
|
||||
|
||||
public static final String dc_variableXpath = "<dataConfig>\n"
|
||||
+ "<dataSource type=\"MockDataSource\"/>\n"
|
||||
+ "<dataSource name=\"xml\" type=\"MockStringDataSource\"/>\n"
|
||||
+ " <document name=\"X\" >\n"
|
||||
+ " <entity name=\"x\" query=\"select * from x\">\n"
|
||||
+ " <field column=\"id\"/>\n"
|
||||
+ " <field column=\"name\"/>\n"
|
||||
+ " <field column=\"manufacturer\"/>"
|
||||
+ " <entity name=\"c1\" url=\"companies.xml\" dataSource=\"xml\" forEach=\"/companies/company\" processor=\"XPathEntityProcessor\">"
|
||||
+ " <field column=\"year\" xpath=\"/companies/company/year[@name='p_${x.manufacturer}_s']\" />"
|
||||
+ " </entity>"
|
||||
+ " <entity name=\"c2\" url=\"companies2.xml\" dataSource=\"xml\" forEach=\"/companies/company\" processor=\"XPathEntityProcessor\">"
|
||||
+ " <field column=\"founded\" xpath=\"/companies/company/p_${x.manufacturer}_s/founded\" />"
|
||||
+ " </entity>"
|
||||
+ " <entity name=\"c3\" url=\"companies3.xml\" dataSource=\"xml\" forEach=\"/companies/${x.manufacturer}\" processor=\"XPathEntityProcessor\">"
|
||||
+ " <field column=\"year2\" xpath=\"/companies/${x.manufacturer}/year\" />"
|
||||
+ " </entity>"
|
||||
+ " </entity>\n"
|
||||
+ " </document>\n" + "</dataConfig>";
|
||||
|
||||
|
||||
public static final String xml_variableForEach = "<companies>\n" +
|
||||
"\t<Apple>\n" +
|
||||
"\t\t<year>1976</year>\n" +
|
||||
"\t</Apple>\n" +
|
||||
"\t<Google>\n" +
|
||||
"\t\t<year>1998</year>\n" +
|
||||
"\t</Google>\n" +
|
||||
"</companies>";
|
||||
|
||||
public static final String xml_variableXpath = "<companies>\n" +
|
||||
"\t<company>\n" +
|
||||
"\t\t<p_Apple_s>\n" +
|
||||
"\t\t\t<founded>Cupertino, California, U.S</founded>\n" +
|
||||
"\t\t</p_Apple_s>\t\t\n" +
|
||||
"\t</company>\n" +
|
||||
"\t<company>\n" +
|
||||
"\t\t<p_Google_s>\n" +
|
||||
"\t\t\t<founded>Menlo Park, California, U.S</founded>\n" +
|
||||
"\t\t</p_Google_s>\n" +
|
||||
"\t</company>\n" +
|
||||
"</companies>";
|
||||
|
||||
public static final String xml_attrVariableXpath = "<companies>\n" +
|
||||
"\t<company>\n" +
|
||||
"\t\t<year name='p_Apple_s'>1976</year>\n" +
|
||||
"\t</company>\n" +
|
||||
"\t<company>\n" +
|
||||
"\t\t<year name='p_Google_s'>1998</year>\t\t\n" +
|
||||
"\t</company>\n" +
|
||||
"</companies>";
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue