action) {
- Thread ct = Thread.currentThread();
- ClassLoader prev = ct.getContextClassLoader();
- try {
- ct.setContextClassLoader(loader);
- return action.get();
- } finally {
- ct.setContextClassLoader(prev);
- }
- }
-
-}
diff --git a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
deleted file mode 100644
index 78a53fac258..00000000000
--- a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.EmptyParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.html.HtmlMapper;
-import org.apache.tika.parser.html.IdentityHtmlMapper;
-import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.ContentHandlerDecorator;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.Attributes;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.TransformerConfigurationException;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.sax.SAXTransformerFactory;
-import javax.xml.transform.sax.TransformerHandler;
-import javax.xml.transform.stream.StreamResult;
-import java.io.File;
-import java.io.InputStream;
-import java.io.StringWriter;
-import java.io.Writer;
-import java.util.HashMap;
-import java.util.Locale;
-import java.util.Map;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-import static org.apache.solr.handler.dataimport.DataImporter.COLUMN;
-import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL;
-/**
- * An implementation of {@link EntityProcessor} which reads data from rich docs
- * using Apache Tika
- *
- *
To index latitude/longitude data that might
- * be extracted from a file's metadata, identify
- * the geo field for this information with this attribute:
- * spatialMetadataField
- *
- * @since solr 3.1
- */
-public class TikaEntityProcessor extends EntityProcessorBase {
- private static Parser EMPTY_PARSER = new EmptyParser();
- private TikaConfig tikaConfig;
- private String format = "text";
- private boolean done = false;
- private boolean extractEmbedded = false;
- private String parser;
- static final String AUTO_PARSER = "org.apache.tika.parser.AutoDetectParser";
- private String htmlMapper;
- private String spatialMetadataField;
-
- @Override
- public void init(Context context) {
- super.init(context);
- done = false;
- }
-
- @Override
- protected void firstInit(Context context) {
- super.firstInit(context);
- // See similar code in ExtractingRequestHandler.inform
- try {
- String tikaConfigLoc = context.getResolvedEntityAttribute("tikaConfig");
- if (tikaConfigLoc == null) {
- ClassLoader classLoader = context.getSolrCore().getResourceLoader().getClassLoader();
- try (InputStream is = classLoader.getResourceAsStream("solr-default-tika-config.xml")) {
- tikaConfig = new TikaConfig(is);
- }
- } else {
- File configFile = new File(tikaConfigLoc);
- if (configFile.isAbsolute()) {
- tikaConfig = new TikaConfig(configFile);
- } else { // in conf/
- try (InputStream is = context.getSolrCore().getResourceLoader().openResource(tikaConfigLoc)) {
- tikaConfig = new TikaConfig(is);
- }
- }
- }
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e,"Unable to load Tika Config");
- }
-
- String extractEmbeddedString = context.getResolvedEntityAttribute("extractEmbedded");
- if ("true".equals(extractEmbeddedString)) {
- extractEmbedded = true;
- }
- format = context.getResolvedEntityAttribute("format");
- if(format == null)
- format = "text";
- if (!"html".equals(format) && !"xml".equals(format) && !"text".equals(format)&& !"none".equals(format) )
- throw new DataImportHandlerException(SEVERE, "'format' can be one of text|html|xml|none");
-
- htmlMapper = context.getResolvedEntityAttribute("htmlMapper");
- if (htmlMapper == null)
- htmlMapper = "default";
- if (!"default".equals(htmlMapper) && !"identity".equals(htmlMapper))
- throw new DataImportHandlerException(SEVERE, "'htmlMapper', if present, must be 'default' or 'identity'");
-
- parser = context.getResolvedEntityAttribute("parser");
- if(parser == null) {
- parser = AUTO_PARSER;
- }
-
- spatialMetadataField = context.getResolvedEntityAttribute("spatialMetadataField");
- }
-
- @Override
- public Map nextRow() {
- if(done) return null;
- Map row = new HashMap<>();
- @SuppressWarnings({"unchecked"})
- DataSource dataSource = context.getDataSource();
- InputStream is = dataSource.getData(context.getResolvedEntityAttribute(URL));
- ContentHandler contentHandler = null;
- Metadata metadata = new Metadata();
- StringWriter sw = new StringWriter();
- try {
- if ("html".equals(format)) {
- contentHandler = getHtmlHandler(sw);
- } else if ("xml".equals(format)) {
- contentHandler = getXmlContentHandler(sw);
- } else if ("text".equals(format)) {
- contentHandler = getTextContentHandler(sw);
- } else if("none".equals(format)){
- contentHandler = new DefaultHandler();
- }
- } catch (TransformerConfigurationException e) {
- wrapAndThrow(SEVERE, e, "Unable to create content handler");
- }
- Parser tikaParser = null;
- if(parser.equals(AUTO_PARSER)){
- tikaParser = new AutoDetectParser(tikaConfig);
- } else {
- tikaParser = context.getSolrCore().getResourceLoader().newInstance(parser, Parser.class);
- }
- try {
- ParseContext context = new ParseContext();
- if ("identity".equals(htmlMapper)){
- context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
- }
- if (extractEmbedded) {
- context.set(Parser.class, tikaParser);
- } else {
- context.set(Parser.class, EMPTY_PARSER);
- }
- tikaParser.parse(is, contentHandler, metadata , context);
- } catch (Exception e) {
- if(SKIP.equals(onError)) {
- throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW,
- "Document skipped :" + e.getMessage());
- }
- wrapAndThrow(SEVERE, e, "Unable to read content");
- }
- IOUtils.closeQuietly(is);
- for (Map field : context.getAllEntityFields()) {
- if (!"true".equals(field.get("meta"))) continue;
- String col = field.get(COLUMN);
- String s = metadata.get(col);
- if (s != null) row.put(col, s);
- }
- if(!"none".equals(format) ) row.put("text", sw.toString());
- tryToAddLatLon(metadata, row);
- done = true;
- return row;
- }
-
- private void tryToAddLatLon(Metadata metadata, Map row) {
- if (spatialMetadataField == null) return;
- String latString = metadata.get(Metadata.LATITUDE);
- String lonString = metadata.get(Metadata.LONGITUDE);
- if (latString != null && lonString != null) {
- row.put(spatialMetadataField, String.format(Locale.ROOT, "%s,%s", latString, lonString));
- }
- }
-
- private static ContentHandler getHtmlHandler(Writer writer)
- throws TransformerConfigurationException {
- SAXTransformerFactory factory = (SAXTransformerFactory)
- TransformerFactory.newInstance();
- TransformerHandler handler = factory.newTransformerHandler();
- handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
- handler.setResult(new StreamResult(writer));
- return new ContentHandlerDecorator(handler) {
- @Override
- public void startElement(
- String uri, String localName, String name, Attributes atts)
- throws SAXException {
- if (XHTMLContentHandler.XHTML.equals(uri)) {
- uri = null;
- }
- if (!"head".equals(localName)) {
- super.startElement(uri, localName, name, atts);
- }
- }
-
- @Override
- public void endElement(String uri, String localName, String name)
- throws SAXException {
- if (XHTMLContentHandler.XHTML.equals(uri)) {
- uri = null;
- }
- if (!"head".equals(localName)) {
- super.endElement(uri, localName, name);
- }
- }
-
- @Override
- public void startPrefixMapping(String prefix, String uri) {/*no op*/ }
-
- @Override
- public void endPrefixMapping(String prefix) {/*no op*/ }
- };
- }
-
- private static ContentHandler getTextContentHandler(Writer writer) {
- return new BodyContentHandler(writer);
- }
-
- private static ContentHandler getXmlContentHandler(Writer writer)
- throws TransformerConfigurationException {
- SAXTransformerFactory factory = (SAXTransformerFactory)
- TransformerFactory.newInstance();
- TransformerHandler handler = factory.newTransformerHandler();
- handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
- handler.setResult(new StreamResult(writer));
- return handler;
- }
-
-}
diff --git a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/package.html b/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/package.html
deleted file mode 100644
index 9a7f6f260e2..00000000000
--- a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/package.html
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-
-
-
-Plugins for DataImportHandler
that have additional dependencies.
-
-
diff --git a/solr/contrib/dataimporthandler-extras/src/java/overview.html b/solr/contrib/dataimporthandler-extras/src/java/overview.html
deleted file mode 100644
index 5a55432908e..00000000000
--- a/solr/contrib/dataimporthandler-extras/src/java/overview.html
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-Apache Solr Search Server: DataImportHandler Extras contrib. This contrib module is deprecated as of 8.6
-
-
diff --git a/solr/contrib/dataimporthandler-extras/src/resources/solr-default-tika-config.xml b/solr/contrib/dataimporthandler-extras/src/resources/solr-default-tika-config.xml
deleted file mode 100644
index b598d9e867e..00000000000
--- a/solr/contrib/dataimporthandler-extras/src/resources/solr-default-tika-config.xml
+++ /dev/null
@@ -1,20 +0,0 @@
-
-
-
-
-
\ No newline at end of file
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc
deleted file mode 100644
index 5944c24b2cf..00000000000
Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc and /dev/null differ
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr-word.pdf b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr-word.pdf
deleted file mode 100644
index bd8b865905f..00000000000
Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr-word.pdf and /dev/null differ
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
deleted file mode 100644
index 793482a4991..00000000000
--- a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
+++ /dev/null
@@ -1,205 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-solrconfig.xml b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-solrconfig.xml
deleted file mode 100644
index 344589e340d..00000000000
--- a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-solrconfig.xml
+++ /dev/null
@@ -1,277 +0,0 @@
-
-
-
-
- ${tests.luceneMatchVersion:LATEST}
-
- ${useCompoundFile:false}
-
-
-
- ${solr.data.dir:}
-
-
-
-
-
-
-
-
-
-
- 100000
-
-
-
-
-
-
- ${solr.max.booleanClauses:1024}
-
-
-
-
-
-
-
-
-
-
-
-
- true
-
-
-
-
-
-
-
- 50
-
-
- 200
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- false
-
-
- 4
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
-
-
-
-
-
-
-
-
-
-
-
- explicit
-
-
-
-
-
-
- *:*
-
-
-
-
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/structured.html b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/structured.html
deleted file mode 100644
index 103748120c1..00000000000
--- a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/structured.html
+++ /dev/null
@@ -1,29 +0,0 @@
-
-
-
-
-
- Title in the header
-
-
-H1 Header
-Basic div
-Div with attribute
-
-
-
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_jpeg.jpg b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_jpeg.jpg
deleted file mode 100644
index 10d1ebb2d32..00000000000
Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_jpeg.jpg and /dev/null differ
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_recursive_embedded.docx b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_recursive_embedded.docx
deleted file mode 100644
index cd562cbb82d..00000000000
Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_recursive_embedded.docx and /dev/null differ
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_vsdx.vsdx b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_vsdx.vsdx
deleted file mode 100644
index 659ecdd5853..00000000000
Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_vsdx.vsdx and /dev/null differ
diff --git a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java
deleted file mode 100644
index 027a8d73279..00000000000
--- a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.SolrInputDocument;
-import org.junit.Ignore;
-import org.junit.Test;
-
-import java.text.ParseException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-// Test mailbox is like this: foldername(mailcount)
-// top1(2) -> child11(6)
-// -> child12(0)
-// top2(2) -> child21(1)
-// -> grandchild211(2)
-// -> grandchild212(1)
-// -> child22(2)
-
-/**
- * Test for MailEntityProcessor. The tests are marked as ignored because we'd need a mail server (real or mocked) for
- * these to work.
- *
- * TODO: Find a way to make the tests actually test code
- *
- *
- * @see org.apache.solr.handler.dataimport.MailEntityProcessor
- * @since solr 1.4
- */
-@Ignore("Needs a Mock Mail Server to work")
-public class TestMailEntityProcessor extends AbstractDataImportHandlerTestCase {
-
- // Credentials
- private static final String user = "user";
- private static final String password = "password";
- private static final String host = "host";
- private static final String protocol = "imaps";
-
- private static Map paramMap = new HashMap<>();
-
- @Test
- @Ignore("Needs a Mock Mail Server to work")
- public void testConnection() {
- // also tests recurse = false and default settings
- paramMap.put("folders", "top2");
- paramMap.put("recurse", "false");
- paramMap.put("processAttachement", "false");
- DataImporter di = new DataImporter();
- di.loadAndInit(getConfigFromMap(paramMap));
- @SuppressWarnings({"unchecked"})
- RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
- SolrWriterImpl swi = new SolrWriterImpl();
- di.runCmd(rp, swi);
- assertEquals("top1 did not return 2 messages", swi.docs.size(), 2);
- }
-
- @Test
- @Ignore("Needs a Mock Mail Server to work")
- public void testRecursion() {
- paramMap.put("folders", "top2");
- paramMap.put("recurse", "true");
- paramMap.put("processAttachement", "false");
- DataImporter di = new DataImporter();
- di.loadAndInit(getConfigFromMap(paramMap));
- @SuppressWarnings({"unchecked"})
- RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
- SolrWriterImpl swi = new SolrWriterImpl();
- di.runCmd(rp, swi);
- assertEquals("top2 and its children did not return 8 messages", swi.docs.size(), 8);
- }
-
- @Test
- @Ignore("Needs a Mock Mail Server to work")
- public void testExclude() {
- paramMap.put("folders", "top2");
- paramMap.put("recurse", "true");
- paramMap.put("processAttachement", "false");
- paramMap.put("exclude", ".*grandchild.*");
- DataImporter di = new DataImporter();
- di.loadAndInit(getConfigFromMap(paramMap));
- @SuppressWarnings({"unchecked"})
- RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
- SolrWriterImpl swi = new SolrWriterImpl();
- di.runCmd(rp, swi);
- assertEquals("top2 and its direct children did not return 5 messages", swi.docs.size(), 5);
- }
-
- @Test
- @Ignore("Needs a Mock Mail Server to work")
- public void testInclude() {
- paramMap.put("folders", "top2");
- paramMap.put("recurse", "true");
- paramMap.put("processAttachement", "false");
- paramMap.put("include", ".*grandchild.*");
- DataImporter di = new DataImporter();
- di.loadAndInit(getConfigFromMap(paramMap));
- @SuppressWarnings({"unchecked"})
- RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
- SolrWriterImpl swi = new SolrWriterImpl();
- di.runCmd(rp, swi);
- assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
- }
-
- @Test
- @Ignore("Needs a Mock Mail Server to work")
- public void testIncludeAndExclude() {
- paramMap.put("folders", "top1,top2");
- paramMap.put("recurse", "true");
- paramMap.put("processAttachement", "false");
- paramMap.put("exclude", ".*top1.*");
- paramMap.put("include", ".*grandchild.*");
- DataImporter di = new DataImporter();
- di.loadAndInit(getConfigFromMap(paramMap));
- @SuppressWarnings({"unchecked"})
- RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
- SolrWriterImpl swi = new SolrWriterImpl();
- di.runCmd(rp, swi);
- assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
- }
-
- @Test
- @Ignore("Needs a Mock Mail Server to work")
- @SuppressWarnings({"unchecked"})
- public void testFetchTimeSince() throws ParseException {
- paramMap.put("folders", "top1/child11");
- paramMap.put("recurse", "true");
- paramMap.put("processAttachement", "false");
- paramMap.put("fetchMailsSince", "2008-12-26 00:00:00");
- DataImporter di = new DataImporter();
- di.loadAndInit(getConfigFromMap(paramMap));
- RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
- SolrWriterImpl swi = new SolrWriterImpl();
- di.runCmd(rp, swi);
- assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
- }
-
- private String getConfigFromMap(Map params) {
- String conf =
- "" +
- "" +
- " " +
- " " +
- " ";
- params.put("user", user);
- params.put("password", password);
- params.put("host", host);
- params.put("protocol", protocol);
- StringBuilder attribs = new StringBuilder("");
- for (String key : params.keySet())
- attribs.append(" ").append(key).append("=" + "\"").append(params.get(key)).append("\"");
- attribs.append(" ");
- return conf.replace("someconfig", attribs.toString());
- }
-
- static class SolrWriterImpl extends SolrWriter {
- List docs = new ArrayList<>();
- Boolean deleteAllCalled;
- Boolean commitCalled;
-
- public SolrWriterImpl() {
- super(null, null);
- }
-
- @Override
- public boolean upload(SolrInputDocument doc) {
- return docs.add(doc);
- }
-
-
- @Override
- public void doDeleteAll() {
- deleteAllCalled = Boolean.TRUE;
- }
-
- @Override
- public void commit(boolean b) {
- commitCalled = Boolean.TRUE;
- }
- }
-}
diff --git a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
deleted file mode 100644
index 05acfca90c3..00000000000
--- a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import java.util.Locale;
-
-/**Testcase for TikaEntityProcessor
- *
- * @since solr 3.1
- */
-public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase {
- private String conf =
- "" +
- " " +
- " " +
- " " +
- " " +
- " " +
- " " +
- " " +
- " " +
- " ";
-
- private String skipOnErrConf =
- "" +
- " " +
- " " +
- " " +
- "" +
- " " +
- " " +
- " " +
- " " +
- " " +
- " ";
-
- private String spatialConf =
- "" +
- " " +
- " " +
- " " +
- " " +
- " " +
- " " +
- " ";
-
- private String vsdxConf =
- "" +
- " " +
- " " +
- " " +
- " " +
- " " +
- " " +
- " ";
-
- private String[] tests = {
- "//*[@numFound='1']"
- ,"//str[@name='author'][.='Grant Ingersoll']"
- ,"//str[@name='title'][.='solr-word']"
- ,"//str[@name='text']"
- };
-
- private String[] testsHTMLDefault = {
- "//*[@numFound='1']"
- , "//str[@name='text'][contains(.,'Basic div')]"
- , "//str[@name='text'][contains(.,'')]"
- , "//str[@name='text'][not(contains(.,''))]" //default mapper lower-cases elements as it maps
- , "//str[@name='text'][not(contains(.,'
'))]"
- };
-
- private String[] testsHTMLIdentity = {
- "//*[@numFound='1']"
- , "//str[@name='text'][contains(.,'Basic div')]"
- , "//str[@name='text'][contains(.,'
')]"
- , "//str[@name='text'][contains(.,'')]"
- , "//str[@name='text'][contains(.,'class=\"classAttribute\"')]" //attributes are lower-cased
- };
-
- private String[] testsSpatial = {
- "//*[@numFound='1']"
- };
-
- private String[] testsEmbedded = {
- "//*[@numFound='1']",
- "//str[@name='text'][contains(.,'When in the Course')]"
- };
-
- private String[] testsIgnoreEmbedded = {
- "//*[@numFound='1']",
- "//str[@name='text'][not(contains(.,'When in the Course'))]"
- };
-
- private String[] testsVSDX = {
- "//*[@numFound='1']",
- "//str[@name='text'][contains(.,'Arrears')]"
- };
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)",
- new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage()));
- initCore("dataimport-solrconfig.xml", "dataimport-schema-no-unique-key.xml", getFile("dihextras/solr").getAbsolutePath());
- }
-
- @Test
- public void testIndexingWithTikaEntityProcessor() throws Exception {
- runFullImport(conf);
- assertQ(req("*:*"), tests );
- }
-
- @Test
- public void testSkip() throws Exception {
- runFullImport(skipOnErrConf);
- assertQ(req("*:*"), "//*[@numFound='1']");
- }
-
- @Test
- public void testVSDX() throws Exception {
- //this ensures that we've included the curvesapi dependency
- //and that the ConnectsType class is bundled with poi-ooxml-schemas.
- runFullImport(vsdxConf);
- assertQ(req("*:*"), testsVSDX);
- }
-
- @Test
- public void testTikaHTMLMapperEmpty() throws Exception {
- runFullImport(getConfigHTML(null));
- assertQ(req("*:*"), testsHTMLDefault);
- }
-
- @Test
- public void testTikaHTMLMapperDefault() throws Exception {
- runFullImport(getConfigHTML("default"));
- assertQ(req("*:*"), testsHTMLDefault);
- }
-
- @Test
- public void testTikaHTMLMapperIdentity() throws Exception {
- runFullImport(getConfigHTML("identity"));
- assertQ(req("*:*"), testsHTMLIdentity);
- }
-
- @Test
- public void testTikaGeoMetadata() throws Exception {
- runFullImport(spatialConf);
- String pt = "38.97,-77.018";
- Double distance = 5.0d;
- assertQ(req("q", "*:* OR foo_i:" + random().nextInt(100), "fq",
- "{!geofilt sfield=\"home\"}\"",
- "pt", pt, "d", String.valueOf(distance)), testsSpatial);
- }
-
- private String getConfigHTML(String htmlMapper) {
- return
- "
" +
- " " +
- " " +
- " " +
- " " +
- " " +
- " " +
- " ";
-
- }
-
- @Test
- public void testEmbeddedDocsLegacy() throws Exception {
- //test legacy behavior: ignore embedded docs
- runFullImport(conf);
- assertQ(req("*:*"), testsIgnoreEmbedded);
- }
-
- @Test
- public void testEmbeddedDocsTrue() throws Exception {
- runFullImport(getConfigEmbedded(true));
- assertQ(req("*:*"), testsEmbedded);
- }
-
- @Test
- public void testEmbeddedDocsFalse() throws Exception {
- runFullImport(getConfigEmbedded(false));
- assertQ(req("*:*"), testsIgnoreEmbedded);
- }
-
- private String getConfigEmbedded(boolean extractEmbedded) {
- return
- "
" +
- " " +
- " " +
- " " +
- " " +
- " " +
- " " +
- " " +
- " " +
- " ";
- }
-}
diff --git a/solr/contrib/dataimporthandler/README.md b/solr/contrib/dataimporthandler/README.md
deleted file mode 100644
index 8dc9391494e..00000000000
--- a/solr/contrib/dataimporthandler/README.md
+++ /dev/null
@@ -1,26 +0,0 @@
-Apache Solr - DataImportHandler
-================================
-
-Introduction
-------------
-DataImportHandler is a data import tool for Solr which makes importing data from Databases, XML files and
-HTTP data sources quick and easy.
-
-Important Note
---------------
-Although Solr strives to be agnostic of the Locale where the server is
-running, some code paths in DataImportHandler are known to depend on the
-System default Locale, Timezone, or Charset. It is recommended that when
-running Solr you set the following system properties:
- -Duser.language=xx -Duser.country=YY -Duser.timezone=ZZZ
-
-where xx, YY, and ZZZ are consistent with any database server's configuration.
-
-Deprecation notice
-------------------
-This contrib module is deprecated as of v8.6, scheduled for removal in Solr 9.0.
-The reason is that DIH is no longer being maintained in a manner we feel is necessary in order to keep it
-healthy and secure. Also it was not designed to work with SolrCloud and does not meet current performance requirements.
-
-The project hopes that the community will take over maintenance of DIH as a 3rd party package (See SOLR-14066 for more details). Please reach out to us at the dev@ mailing list if you want to help.
-
diff --git a/solr/contrib/dataimporthandler/build.gradle b/solr/contrib/dataimporthandler/build.gradle
deleted file mode 100644
index 9286d4317b6..00000000000
--- a/solr/contrib/dataimporthandler/build.gradle
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-apply plugin: 'java-library'
-
-description = 'Data Import Handler'
-
-dependencies {
- implementation project(':solr:core')
-
- testImplementation project(':solr:test-framework')
-
- testImplementation('org.mockito:mockito-core', {
- exclude group: "net.bytebuddy", module: "byte-buddy-agent"
- })
- testImplementation ('org.hsqldb:hsqldb')
- testImplementation ('org.apache.derby:derby')
- testImplementation ('org.objenesis:objenesis')
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java
deleted file mode 100644
index f4b1d7a24bf..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.util.ContentStream;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-
-import java.io.InputStream;
-import java.io.IOException;
-import java.util.Properties;
-/**
- *
A data source implementation which can be used to read binary stream from content streams.
Refer to http://wiki.apache.org/solr/DataImportHandler for more
- * details.
- *
- * This API is experimental and may change in the future.
- *
- * @since solr 3.1
- */
-
-public class BinContentStreamDataSource extends DataSource {
- private ContextImpl context;
- private ContentStream contentStream;
- private InputStream in;
-
-
- @Override
- public void init(Context context, Properties initProps) {
- this.context = (ContextImpl) context;
- }
-
- @Override
- public InputStream getData(String query) {
- contentStream = context.getDocBuilder().getReqParams().getContentStream();
- if (contentStream == null)
- throw new DataImportHandlerException(SEVERE, "No stream available. The request has no body");
- try {
- return in = contentStream.getStream();
- } catch (IOException e) {
- DataImportHandlerException.wrapAndThrow(SEVERE, e);
- return null;
- }
- }
-
- @Override
- public void close() {
- if (contentStream != null) {
- try {
- if (in == null) in = contentStream.getStream();
- in.close();
- } catch (IOException e) {
- /*no op*/
- }
- }
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinFileDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinFileDataSource.java
deleted file mode 100644
index dc7a0f552e3..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinFileDataSource.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-
-import java.io.InputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.util.Properties;
-/**
- *
- * A DataSource which reads from local files
- *
- *
- * Refer to http://wiki.apache.org/solr/DataImportHandler
- * for more details.
- *
- *
- * This API is experimental and may change in the future.
- *
- * @since solr 3.1
- */
-
-public class BinFileDataSource extends DataSource{
- protected String basePath;
- @Override
- public void init(Context context, Properties initProps) {
- basePath = initProps.getProperty(FileDataSource.BASE_PATH);
- }
-
- @Override
- public InputStream getData(String query) {
- File f = FileDataSource.getFile(basePath,query);
- try {
- return new FileInputStream(f);
- } catch (FileNotFoundException e) {
- wrapAndThrow(SEVERE,e,"Unable to open file "+f.getAbsolutePath());
- return null;
- }
- }
-
- @Override
- public void close() {
-
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinURLDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinURLDataSource.java
deleted file mode 100644
index 03a30ab07a9..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinURLDataSource.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
-import static org.apache.solr.handler.dataimport.URLDataSource.*;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.InputStream;
-import java.lang.invoke.MethodHandles;
-import java.net.URL;
-import java.net.URLConnection;
-import java.util.Properties;
-/**
- * A data source implementation which can be used to read binary streams using HTTP.
Refer to http://wiki.apache.org/solr/DataImportHandler for more
- * details.
- *
- * This API is experimental and may change in the future.
- *
- * @since solr 3.1
- */
-public class BinURLDataSource extends DataSource{
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- private String baseUrl;
- private int connectionTimeout = CONNECTION_TIMEOUT;
-
- private int readTimeout = READ_TIMEOUT;
-
- private Context context;
-
- private Properties initProps;
-
- public BinURLDataSource() { }
-
- @Override
- public void init(Context context, Properties initProps) {
- this.context = context;
- this.initProps = initProps;
-
- baseUrl = getInitPropWithReplacements(BASE_URL);
- String cTimeout = getInitPropWithReplacements(CONNECTION_TIMEOUT_FIELD_NAME);
- String rTimeout = getInitPropWithReplacements(READ_TIMEOUT_FIELD_NAME);
- if (cTimeout != null) {
- try {
- connectionTimeout = Integer.parseInt(cTimeout);
- } catch (NumberFormatException e) {
- log.warn("Invalid connection timeout: {}", cTimeout);
- }
- }
- if (rTimeout != null) {
- try {
- readTimeout = Integer.parseInt(rTimeout);
- } catch (NumberFormatException e) {
- log.warn("Invalid read timeout: {}", rTimeout);
- }
- }
- }
-
- @Override
- public InputStream getData(String query) {
- URL url = null;
- try {
- if (URIMETHOD.matcher(query).find()) url = new URL(query);
- else url = new URL(baseUrl + query);
- log.debug("Accessing URL: {}", url);
- URLConnection conn = url.openConnection();
- conn.setConnectTimeout(connectionTimeout);
- conn.setReadTimeout(readTimeout);
- return conn.getInputStream();
- } catch (Exception e) {
- log.error("Exception thrown while getting data", e);
- wrapAndThrow (SEVERE, e, "Exception in invoking url " + url);
- return null;//unreachable
- }
- }
-
- @Override
- public void close() { }
-
- private String getInitPropWithReplacements(String propertyName) {
- final String expr = initProps.getProperty(propertyName);
- if (expr == null) {
- return null;
- }
- return context.replaceTokens(expr);
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/CachePropertyUtil.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/CachePropertyUtil.java
deleted file mode 100644
index 544761f8d88..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/CachePropertyUtil.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-public class CachePropertyUtil {
- public static String getAttributeValueAsString(Context context, String attr) {
- Object o = context.getSessionAttribute(attr, Context.SCOPE_ENTITY);
- if (o == null) {
- o = context.getResolvedEntityAttribute(attr);
- }
- if (o == null && context.getRequestParameters() != null) {
- o = context.getRequestParameters().get(attr);
- }
- if (o == null) {
- return null;
- }
- return o.toString();
- }
-
- public static Object getAttributeValue(Context context, String attr) {
- Object o = context.getSessionAttribute(attr, Context.SCOPE_ENTITY);
- if (o == null) {
- o = context.getResolvedEntityAttribute(attr);
- }
- if (o == null && context.getRequestParameters() != null) {
- o = context.getRequestParameters().get(attr);
- }
- if (o == null) {
- return null;
- }
- return o;
- }
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ClobTransformer.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ClobTransformer.java
deleted file mode 100644
index 2e9d93a0c1a..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ClobTransformer.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import static org.apache.solr.handler.dataimport.HTMLStripTransformer.TRUE;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.sql.Clob;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-/**
- * {@link Transformer} instance which converts a {@link Clob} to a {@link String}.
- *
- * Refer to http://wiki.apache.org/solr/DataImportHandler
- * for more details.
- *
- * This API is experimental and subject to change
- *
- * @since solr 1.4
- */
-public class ClobTransformer extends Transformer {
- @Override
- public Object transformRow(Map aRow, Context context) {
- for (Map map : context.getAllEntityFields()) {
- if (!TRUE.equals(map.get(CLOB))) continue;
- String column = map.get(DataImporter.COLUMN);
- String srcCol = map.get(RegexTransformer.SRC_COL_NAME);
- if (srcCol == null)
- srcCol = column;
- Object o = aRow.get(srcCol);
- if (o instanceof List) {
- @SuppressWarnings({"unchecked"})
- List inputs = (List) o;
- List results = new ArrayList<>();
- for (Object input : inputs) {
- if (input instanceof Clob) {
- Clob clob = (Clob) input;
- results.add(readFromClob(clob));
- }
- }
- aRow.put(column, results);
- } else {
- if (o instanceof Clob) {
- Clob clob = (Clob) o;
- aRow.put(column, readFromClob(clob));
- }
- }
- }
- return aRow;
- }
-
- private String readFromClob(Clob clob) {
- Reader reader = FieldReaderDataSource.readCharStream(clob);
- StringBuilder sb = new StringBuilder();
- char[] buf = new char[1024];
- int len;
- try {
- while ((len = reader.read(buf)) != -1) {
- sb.append(buf, 0, len);
- }
- } catch (IOException e) {
- DataImportHandlerException.wrapAndThrow(DataImportHandlerException.SEVERE, e);
- }
- return sb.toString();
- }
-
- public static final String CLOB = "clob";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ConfigParseUtil.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ConfigParseUtil.java
deleted file mode 100644
index 179df231526..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ConfigParseUtil.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.w3c.dom.Element;
-import org.w3c.dom.NamedNodeMap;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-
-public class ConfigParseUtil {
- public static String getStringAttribute(Element e, String name, String def) {
- String r = e.getAttribute(name);
- if (r == null || "".equals(r.trim()))
- r = def;
- return r;
- }
-
- public static HashMap getAllAttributes(Element e) {
- HashMap m = new HashMap<>();
- NamedNodeMap nnm = e.getAttributes();
- for (int i = 0; i < nnm.getLength(); i++) {
- m.put(nnm.item(i).getNodeName(), nnm.item(i).getNodeValue());
- }
- return m;
- }
-
- public static String getText(Node elem, StringBuilder buffer) {
- if (elem.getNodeType() != Node.CDATA_SECTION_NODE) {
- NodeList childs = elem.getChildNodes();
- for (int i = 0; i < childs.getLength(); i++) {
- Node child = childs.item(i);
- short childType = child.getNodeType();
- if (childType != Node.COMMENT_NODE
- && childType != Node.PROCESSING_INSTRUCTION_NODE) {
- getText(child, buffer);
- }
- }
- } else {
- buffer.append(elem.getNodeValue());
- }
-
- return buffer.toString();
- }
-
- public static List getChildNodes(Element e, String byName) {
- List result = new ArrayList<>();
- NodeList l = e.getChildNodes();
- for (int i = 0; i < l.getLength(); i++) {
- if (e.equals(l.item(i).getParentNode())
- && byName.equals(l.item(i).getNodeName()))
- result.add((Element) l.item(i));
- }
- return result;
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java
deleted file mode 100644
index 4482160c22b..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.util.ContentStream;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.util.Properties;
-
-/**
- * A DataSource implementation which reads from the ContentStream of a POST request
- *
- * Refer to http://wiki.apache.org/solr/DataImportHandler
- * for more details.
- *
- * This API is experimental and may change in the future.
- *
- * @since solr 1.4
- */
-public class ContentStreamDataSource extends DataSource {
- private ContextImpl context;
- private ContentStream contentStream;
- private Reader reader;
-
- @Override
- public void init(Context context, Properties initProps) {
- this.context = (ContextImpl) context;
- }
-
- @Override
- public Reader getData(String query) {
- contentStream = context.getDocBuilder().getReqParams().getContentStream();
- if (contentStream == null)
- throw new DataImportHandlerException(SEVERE, "No stream available. The request has no body");
- try {
- return reader = contentStream.getReader();
- } catch (IOException e) {
- DataImportHandlerException.wrapAndThrow(SEVERE, e);
- return null;
- }
- }
-
- @Override
- public void close() {
- if (contentStream != null) {
- try {
- if (reader == null) reader = contentStream.getReader();
- reader.close();
- } catch (IOException e) {
- }
- }
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Context.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Context.java
deleted file mode 100644
index 70dbbcb6ec6..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Context.java
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.core.SolrCore;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- *
- * This abstract class gives access to all available objects. So any
- * component implemented by a user can have the full power of DataImportHandler
- *
- *
- * Refer to http://wiki.apache.org/solr/DataImportHandler
- * for more details.
- *
- *
- * This API is experimental and subject to change
- *
- * @since solr 1.3
- */
-public abstract class Context {
- public static final String FULL_DUMP = "FULL_DUMP", DELTA_DUMP = "DELTA_DUMP", FIND_DELTA = "FIND_DELTA";
-
- /**
- * An object stored in entity scope is valid only for the current entity for the current document only.
- */
- public static final String SCOPE_ENTITY = "entity";
-
- /**
- * An object stored in global scope is available for the current import only but across entities and documents.
- */
- public static final String SCOPE_GLOBAL = "global";
-
- /**
- * An object stored in document scope is available for the current document only but across entities.
- */
- public static final String SCOPE_DOC = "document";
-
- /**
- * An object stored in 'solrcore' scope is available across imports, entities and documents throughout the life of
- * a solr core. A solr core unload or reload will destroy this data.
- */
- public static final String SCOPE_SOLR_CORE = "solrcore";
-
- /**
- * Get the value of any attribute put into this entity
- *
- * @param name name of the attribute eg: 'name'
- * @return value of named attribute in entity
- */
- public abstract String getEntityAttribute(String name);
-
- /**
- * Get the value of any attribute put into this entity after resolving all variables found in the attribute value
- * @param name name of the attribute
- * @return value of the named attribute after resolving all variables
- */
- public abstract String getResolvedEntityAttribute(String name);
-
- /**
- * Returns all the fields put into an entity. each item (which is a map ) in
- * the list corresponds to one field. each if the map contains the attribute
- * names and values in a field
- *
- * @return all fields in an entity
- */
- public abstract List> getAllEntityFields();
-
- /**
- * Returns the VariableResolver used in this entity which can be used to
- * resolve the tokens in ${<namespce.name>}
- *
- * @return a VariableResolver instance
- * @see org.apache.solr.handler.dataimport.VariableResolver
- */
-
- public abstract VariableResolver getVariableResolver();
-
- /**
- * Gets the datasource instance defined for this entity. Do not close() this instance.
- * Transformers should use the getDataSource(String name) method.
- *
- * @return a new DataSource instance as configured for the current entity
- * @see org.apache.solr.handler.dataimport.DataSource
- * @see #getDataSource(String)
- */
- @SuppressWarnings({"rawtypes"})
- public abstract DataSource getDataSource();
-
- /**
- * Gets a new DataSource instance with a name. Ensure that you close() this after use
- * because this is created just for this method call.
- *
- * @param name Name of the dataSource as defined in the dataSource tag
- * @return a new DataSource instance
- * @see org.apache.solr.handler.dataimport.DataSource
- */
- @SuppressWarnings({"rawtypes"})
- public abstract DataSource getDataSource(String name);
-
- /**
- * Returns the instance of EntityProcessor used for this entity
- *
- * @return instance of EntityProcessor used for the current entity
- * @see org.apache.solr.handler.dataimport.EntityProcessor
- */
- public abstract EntityProcessor getEntityProcessor();
-
- /**
- * Store values in a certain name and scope (entity, document,global)
- *
- * @param name the key
- * @param val the value
- * @param scope the scope in which the given key, value pair is to be stored
- */
- public abstract void setSessionAttribute(String name, Object val, String scope);
-
- /**
- * get a value by name in the given scope (entity, document,global)
- *
- * @param name the key
- * @param scope the scope from which the value is to be retrieved
- * @return the object stored in the given scope with the given key
- */
- public abstract Object getSessionAttribute(String name, String scope);
-
- /**
- * Get the context instance for the parent entity. works only in the full dump
- * If the current entity is rootmost a null is returned
- *
- * @return parent entity's Context
- */
- public abstract Context getParentContext();
-
- /**
- * The request parameters passed over HTTP for this command the values in the
- * map are either String(for single valued parameters) or List<String> (for
- * multi-valued parameters)
- *
- * @return the request parameters passed in the URL to initiate this process
- */
- public abstract Map getRequestParameters();
-
- /**
- * Returns if the current entity is the root entity
- *
- * @return true if current entity is the root entity, false otherwise
- */
- public abstract boolean isRootEntity();
-
- /**
- * Returns the current process FULL_DUMP, DELTA_DUMP, FIND_DELTA
- *
- * @return the type of the current running process
- */
- public abstract String currentProcess();
-
- /**
- * Exposing the actual SolrCore to the components
- *
- * @return the core
- */
- public abstract SolrCore getSolrCore();
-
- /**
- * Makes available some basic running statistics such as "docCount",
- * "deletedDocCount", "rowCount", "queryCount" and "skipDocCount"
- *
- * @return a Map containing running statistics of the current import
- */
- public abstract Map getStats();
-
- /**
- * Returns the text specified in the script tag in the data-config.xml
- */
- public abstract String getScript();
-
- /**
- * Returns the language of the script as specified in the script tag in data-config.xml
- */
- public abstract String getScriptLanguage();
-
- /**delete a document by id
- */
- public abstract void deleteDoc(String id);
-
- /**delete documents by query
- */
- public abstract void deleteDocByQuery(String query);
-
- /**Use this directly to resolve variable
- * @param var the variable name
- * @return the resolved value
- */
- public abstract Object resolve(String var);
-
- /** Resolve variables in a template
- *
- * @return The string w/ variables resolved
- */
- public abstract String replaceTokens(String template);
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java
deleted file mode 100644
index 3d9f3868508..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.dataimport.config.Script;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- *
- * An implementation for the Context
- *
- * This API is experimental and subject to change
- *
- * @since solr 1.3
- */
-public class ContextImpl extends Context {
- protected EntityProcessorWrapper epw;
-
- private ContextImpl parent;
-
- private VariableResolver resolver;
-
- @SuppressWarnings({"rawtypes"})
- private DataSource ds;
-
- private String currProcess;
-
- private Map requestParams;
-
- private DataImporter dataImporter;
-
- private Map entitySession, globalSession;
-
- private Exception lastException = null;
-
- DocBuilder.DocWrapper doc;
-
- DocBuilder docBuilder;
-
-
-
- public ContextImpl(EntityProcessorWrapper epw, VariableResolver resolver,
- @SuppressWarnings({"rawtypes"})DataSource ds, String currProcess,
- Map global, ContextImpl parentContext, DocBuilder docBuilder) {
- this.epw = epw;
- this.docBuilder = docBuilder;
- this.resolver = resolver;
- this.ds = ds;
- this.currProcess = currProcess;
- if (docBuilder != null) {
- this.requestParams = docBuilder.getReqParams().getRawParams();
- dataImporter = docBuilder.dataImporter;
- }
- globalSession = global;
- parent = parentContext;
- }
-
- @Override
- public String getEntityAttribute(String name) {
- return epw==null || epw.getEntity() == null ? null : epw.getEntity().getAllAttributes().get(name);
- }
-
- @Override
- public String getResolvedEntityAttribute(String name) {
- return epw==null || epw.getEntity() == null ? null : resolver.replaceTokens(epw.getEntity().getAllAttributes().get(name));
- }
-
- @Override
- public List> getAllEntityFields() {
- return epw==null || epw.getEntity() == null ? Collections.emptyList() : epw.getEntity().getAllFieldsList();
- }
-
- @Override
- public VariableResolver getVariableResolver() {
- return resolver;
- }
-
- @Override
- @SuppressWarnings({"rawtypes"})
- public DataSource getDataSource() {
- if (ds != null) return ds;
- if(epw==null) { return null; }
- if (epw!=null && epw.getDatasource() == null) {
- epw.setDatasource(dataImporter.getDataSourceInstance(epw.getEntity(), epw.getEntity().getDataSourceName(), this));
- }
- if (epw!=null && epw.getDatasource() != null && docBuilder != null && docBuilder.verboseDebug &&
- Context.FULL_DUMP.equals(currentProcess())) {
- //debug is not yet implemented properly for deltas
- epw.setDatasource(docBuilder.getDebugLogger().wrapDs(epw.getDatasource()));
- }
- return epw.getDatasource();
- }
-
- @Override
- @SuppressWarnings({"rawtypes"})
- public DataSource getDataSource(String name) {
- return dataImporter.getDataSourceInstance(epw==null ? null : epw.getEntity(), name, this);
- }
-
- @Override
- public boolean isRootEntity() {
- return epw==null ? false : epw.getEntity().isDocRoot();
- }
-
- @Override
- public String currentProcess() {
- return currProcess;
- }
-
- @Override
- public Map getRequestParameters() {
- return requestParams;
- }
-
- @Override
- public EntityProcessor getEntityProcessor() {
- return epw;
- }
-
- @Override
- public void setSessionAttribute(String name, Object val, String scope) {
- if(name == null) {
- return;
- }
- if (Context.SCOPE_ENTITY.equals(scope)) {
- if (entitySession == null) {
- entitySession = new HashMap<>();
- }
- entitySession.put(name, val);
- } else if (Context.SCOPE_GLOBAL.equals(scope)) {
- if (globalSession != null) {
- globalSession.put(name, val);
- }
- } else if (Context.SCOPE_DOC.equals(scope)) {
- DocBuilder.DocWrapper doc = getDocument();
- if (doc != null) {
- doc.setSessionAttribute(name, val);
- }
- } else if (SCOPE_SOLR_CORE.equals(scope)){
- if(dataImporter != null) {
- dataImporter.putToCoreScopeSession(name, val);
- }
- }
- }
-
- @Override
- public Object getSessionAttribute(String name, String scope) {
- if (Context.SCOPE_ENTITY.equals(scope)) {
- if (entitySession == null)
- return null;
- return entitySession.get(name);
- } else if (Context.SCOPE_GLOBAL.equals(scope)) {
- if (globalSession != null) {
- return globalSession.get(name);
- }
- } else if (Context.SCOPE_DOC.equals(scope)) {
- DocBuilder.DocWrapper doc = getDocument();
- return doc == null ? null: doc.getSessionAttribute(name);
- } else if (SCOPE_SOLR_CORE.equals(scope)){
- return dataImporter == null ? null : dataImporter.getFromCoreScopeSession(name);
- }
- return null;
- }
-
- @Override
- public Context getParentContext() {
- return parent;
- }
-
- private DocBuilder.DocWrapper getDocument() {
- ContextImpl c = this;
- while (true) {
- if (c.doc != null)
- return c.doc;
- if (c.parent != null)
- c = c.parent;
- else
- return null;
- }
- }
-
- void setDoc(DocBuilder.DocWrapper docWrapper) {
- this.doc = docWrapper;
- }
-
-
- @Override
- public SolrCore getSolrCore() {
- return dataImporter == null ? null : dataImporter.getCore();
- }
-
-
- @Override
- public Map getStats() {
- return docBuilder != null ? docBuilder.importStatistics.getStatsSnapshot() : Collections.emptyMap();
- }
-
- @Override
- public String getScript() {
- if (dataImporter != null) {
- Script script = dataImporter.getConfig().getScript();
- return script == null ? null : script.getText();
- }
- return null;
- }
-
- @Override
- public String getScriptLanguage() {
- if (dataImporter != null) {
- Script script = dataImporter.getConfig().getScript();
- return script == null ? null : script.getLanguage();
- }
- return null;
- }
-
- @Override
- public void deleteDoc(String id) {
- if(docBuilder != null){
- docBuilder.writer.deleteDoc(id);
- }
- }
-
- @Override
- public void deleteDocByQuery(String query) {
- if(docBuilder != null){
- docBuilder.writer.deleteByQuery(query);
- }
- }
-
- DocBuilder getDocBuilder(){
- return docBuilder;
- }
- @Override
- public Object resolve(String var) {
- return resolver.resolve(var);
- }
-
- @Override
- public String replaceTokens(String template) {
- return resolver.replaceTokens(template);
- }
-
- public Exception getLastException() { return lastException; }
-
- public void setLastException(Exception lastException) {this.lastException = lastException; }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCache.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCache.java
deleted file mode 100644
index a67b3e42963..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCache.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.util.Iterator;
-import java.util.Map;
-
-/**
- *
- * A cache that allows a DIH entity's data to persist locally prior being joined
- * to other data and/or indexed.
- *
- *
- * @lucene.experimental
- */
-public interface DIHCache extends Iterable> {
-
- /**
- *
- * Opens the cache using the specified properties. The {@link Context}
- * includes any parameters needed by the cache impl. This must be called
- * before any read/write operations are permitted.
- */
- void open(Context context);
-
- /**
- *
- * Releases resources used by this cache, if possible. The cache is flushed
- * but not destroyed.
- *
- */
- void close();
-
- /**
- *
- * Persists any pending data to the cache
- *
- */
- void flush();
-
- /**
- *
- * Closes the cache, if open. Then removes all data, possibly removing the
- * cache entirely from persistent storage.
- *
- */
- public void destroy();
-
- /**
- *
- * Adds a document. If a document already exists with the same key, both
- * documents will exist in the cache, as the cache allows duplicate keys. To
- * update a key's documents, first call delete(Object key).
- *
- */
- void add(Map rec);
-
- /**
- *
- * Returns an iterator, allowing callers to iterate through the entire cache
- * in key, then insertion, order.
- *
- */
- @Override
- Iterator> iterator();
-
- /**
- *
- * Returns an iterator, allowing callers to iterate through all documents that
- * match the given key in insertion order.
- *
- */
- Iterator> iterator(Object key);
-
- /**
- *
- * Delete all documents associated with the given key
- *
- */
- void delete(Object key);
-
- /**
- *
- * Delete all data from the cache,leaving the empty cache intact.
- *
- */
- void deleteAll();
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCacheSupport.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCacheSupport.java
deleted file mode 100644
index 2f3d95743fd..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCacheSupport.java
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-
-import java.lang.invoke.MethodHandles;
-import java.lang.reflect.Constructor;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-
-import org.apache.solr.common.SolrException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DIHCacheSupport {
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- private String cacheForeignKey;
- private String cacheImplName;
- private Map queryVsCache = new HashMap<>();
- private Map>> queryVsCacheIterator;
- private Iterator> dataSourceRowCache;
- private boolean cacheDoKeyLookup;
-
- public DIHCacheSupport(Context context, String cacheImplName) {
- this.cacheImplName = cacheImplName;
-
- Relation r = new Relation(context);
- cacheDoKeyLookup = r.doKeyLookup;
- String cacheKey = r.primaryKey;
- cacheForeignKey = r.foreignKey;
-
- context.setSessionAttribute(DIHCacheSupport.CACHE_PRIMARY_KEY, cacheKey,
- Context.SCOPE_ENTITY);
- context.setSessionAttribute(DIHCacheSupport.CACHE_FOREIGN_KEY, cacheForeignKey,
- Context.SCOPE_ENTITY);
- context.setSessionAttribute(DIHCacheSupport.CACHE_DELETE_PRIOR_DATA,
- "true", Context.SCOPE_ENTITY);
- context.setSessionAttribute(DIHCacheSupport.CACHE_READ_ONLY, "false",
- Context.SCOPE_ENTITY);
- }
-
- static class Relation{
- protected final boolean doKeyLookup;
- protected final String foreignKey;
- protected final String primaryKey;
-
- public Relation(Context context) {
- String where = context.getEntityAttribute("where");
- String cacheKey = context.getEntityAttribute(DIHCacheSupport.CACHE_PRIMARY_KEY);
- String lookupKey = context.getEntityAttribute(DIHCacheSupport.CACHE_FOREIGN_KEY);
- if (cacheKey != null && lookupKey == null) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
- "'cacheKey' is specified for the entity "
- + context.getEntityAttribute("name")
- + " but 'cacheLookup' is missing");
-
- }
- if (where == null && cacheKey == null) {
- doKeyLookup = false;
- primaryKey = null;
- foreignKey = null;
- } else {
- if (where != null) {
- String[] splits = where.split("=");
- primaryKey = splits[0];
- foreignKey = splits[1].trim();
- } else {
- primaryKey = cacheKey;
- foreignKey = lookupKey;
- }
- doKeyLookup = true;
- }
- }
-
- @Override
- public String toString() {
- return "Relation "
- + primaryKey + "="+foreignKey ;
- }
-
-
- }
-
- private DIHCache instantiateCache(Context context) {
- DIHCache cache = null;
- try {
- @SuppressWarnings("unchecked")
- Class cacheClass = DocBuilder.loadClass(cacheImplName, context
- .getSolrCore());
- Constructor constr = cacheClass.getConstructor();
- cache = constr.newInstance();
- cache.open(context);
- } catch (Exception e) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
- "Unable to load Cache implementation:" + cacheImplName, e);
- }
- return cache;
- }
-
- public void initNewParent(Context context) {
- dataSourceRowCache = null;
- queryVsCacheIterator = new HashMap<>();
- for (Map.Entry entry : queryVsCache.entrySet()) {
- queryVsCacheIterator.put(entry.getKey(), entry.getValue().iterator());
- }
- }
-
- public void destroyAll() {
- if (queryVsCache != null) {
- for (DIHCache cache : queryVsCache.values()) {
- cache.destroy();
- }
- }
- queryVsCache = null;
- dataSourceRowCache = null;
- cacheForeignKey = null;
- }
-
- /**
- *
- * Get all the rows from the datasource for the given query and cache them
- *
- */
- public void populateCache(String query,
- Iterator> rowIterator) {
- Map aRow = null;
- DIHCache cache = queryVsCache.get(query);
- while ((aRow = getNextFromCache(query, rowIterator)) != null) {
- cache.add(aRow);
- }
- }
-
- private Map getNextFromCache(String query,
- Iterator> rowIterator) {
- try {
- if (rowIterator == null) return null;
- if (rowIterator.hasNext()) return rowIterator.next();
- return null;
- } catch (Exception e) {
- SolrException.log(log, "getNextFromCache() failed for query '" + query
- + "'", e);
- wrapAndThrow(DataImportHandlerException.WARN, e);
- return null;
- }
- }
-
- public Map getCacheData(Context context, String query,
- Iterator> rowIterator) {
- if (cacheDoKeyLookup) {
- return getIdCacheData(context, query, rowIterator);
- } else {
- return getSimpleCacheData(context, query, rowIterator);
- }
- }
-
- /**
- * If the where clause is present the cache is sql Vs Map of key Vs List of
- * Rows.
- *
- * @param query
- * the query string for which cached data is to be returned
- *
- * @return the cached row corresponding to the given query after all variables
- * have been resolved
- */
- protected Map getIdCacheData(Context context, String query,
- Iterator> rowIterator) {
- Object key = context.resolve(cacheForeignKey);
- if (key == null) {
- throw new DataImportHandlerException(DataImportHandlerException.WARN,
- "The cache lookup value : " + cacheForeignKey
- + " is resolved to be null in the entity :"
- + context.getEntityAttribute("name"));
-
- }
- if (dataSourceRowCache == null) {
- DIHCache cache = queryVsCache.get(query);
-
- if (cache == null) {
- cache = instantiateCache(context);
- queryVsCache.put(query, cache);
- populateCache(query, rowIterator);
- }
- dataSourceRowCache = cache.iterator(key);
- }
- return getFromRowCacheTransformed();
- }
-
- /**
- * If where clause is not present the cache is a Map of query vs List of Rows.
- *
- * @param query
- * string for which cached row is to be returned
- *
- * @return the cached row corresponding to the given query
- */
- protected Map getSimpleCacheData(Context context,
- String query, Iterator> rowIterator) {
- if (dataSourceRowCache == null) {
- DIHCache cache = queryVsCache.get(query);
- if (cache == null) {
- cache = instantiateCache(context);
- queryVsCache.put(query, cache);
- populateCache(query, rowIterator);
- queryVsCacheIterator.put(query, cache.iterator());
- }
- Iterator> cacheIter = queryVsCacheIterator.get(query);
- dataSourceRowCache = cacheIter;
- }
-
- return getFromRowCacheTransformed();
- }
-
- protected Map getFromRowCacheTransformed() {
- if (dataSourceRowCache == null || !dataSourceRowCache.hasNext()) {
- dataSourceRowCache = null;
- return null;
- }
- Map r = dataSourceRowCache.next();
- return r;
- }
-
- /**
- *
- * Specify the class for the cache implementation
- *
- */
- public static final String CACHE_IMPL = "cacheImpl";
-
- /**
- *
- * If the cache supports persistent data, set to "true" to delete any prior
- * persisted data before running the entity.
- *
- */
-
- public static final String CACHE_DELETE_PRIOR_DATA = "cacheDeletePriorData";
- /**
- *
- * Specify the Foreign Key from the parent entity to join on. Use if the cache
- * is on a child entity.
- *
- */
- public static final String CACHE_FOREIGN_KEY = "cacheLookup";
-
- /**
- *
- * Specify the Primary Key field from this Entity to map the input records
- * with
- *
- */
- public static final String CACHE_PRIMARY_KEY = "cacheKey";
- /**
- *
- * If true, a pre-existing cache is re-opened for read-only access.
- *
- */
- public static final String CACHE_READ_ONLY = "cacheReadOnly";
-
-
-
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHLogLevels.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHLogLevels.java
deleted file mode 100644
index 24732d1454f..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHLogLevels.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-public enum DIHLogLevels {
- START_ENTITY, END_ENTITY, TRANSFORMED_ROW, ENTITY_META, PRE_TRANSFORMER_ROW, START_DOC, END_DOC, ENTITY_OUT, ROW_END, TRANSFORMER_EXCEPTION, ENTITY_EXCEPTION, DISABLE_LOGGING, ENABLE_LOGGING, NONE
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java
deleted file mode 100644
index f51ef0713b6..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.util.Date;
-import java.util.Map;
-
-/**
- * Implementations write out properties about the last data import
- * for use by the next import. ex: to persist the last import timestamp
- * so that future delta imports can know what needs to be updated.
- *
- * @lucene.experimental
- */
-public abstract class DIHProperties {
-
- public abstract void init(DataImporter dataImporter, Map initParams);
-
- public abstract boolean isWritable();
-
- public abstract void persist(Map props);
-
- public abstract Map readIndexerProperties();
-
- public abstract String convertDateToString(Date d);
-
- public Date getCurrentTimestamp() {
- return new Date();
- }
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriter.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriter.java
deleted file mode 100644
index bdb988d4836..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriter.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.solr.common.SolrInputDocument;
-
-/**
- * @lucene.experimental
- *
- */
-public interface DIHWriter {
-
- /**
- *
- * If this writer supports transactions or commit points, then commit any changes,
- * optionally optimizing the data for read/write performance
- *
- */
- public void commit(boolean optimize);
-
- /**
- *
- * Release resources used by this writer. After calling close, reads & updates will throw exceptions.
- *
- */
- public void close();
-
- /**
- *
- * If this writer supports transactions or commit points, then roll back any uncommitted changes.
- *
- */
- public void rollback();
-
- /**
- *
- * Delete from the writer's underlying data store based the passed-in writer-specific query. (Optional Operation)
- *
- */
- public void deleteByQuery(String q);
-
- /**
- *
- * Delete everything from the writer's underlying data store
- *
- */
- public void doDeleteAll();
-
- /**
- *
- * Delete from the writer's underlying data store based on the passed-in Primary Key
- *
- */
- public void deleteDoc(Object key);
-
-
-
- /**
- *
- * Add a document to this writer's underlying data store.
- *
- * @return true on success, false on failure
- */
- public boolean upload(SolrInputDocument doc);
-
-
-
- /**
- *
- * Provide context information for this writer. init() should be called before using the writer.
- *
- */
- public void init(Context context) ;
-
-
- /**
- *
- * Specify the keys to be modified by a delta update (required by writers that can store duplicate keys)
- *
- */
- public void setDeltaKeys(Set> deltaKeys) ;
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriterBase.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriterBase.java
deleted file mode 100644
index 43e92c37f79..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriterBase.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
-public abstract class DIHWriterBase implements DIHWriter {
- protected String keyFieldName;
- protected Set deltaKeys = null;
-
- @Override
- public void setDeltaKeys(Set> passedInDeltaKeys) {
- deltaKeys = new HashSet<>();
- for (Map aMap : passedInDeltaKeys) {
- if (aMap.size() > 0) {
- Object key = null;
- if (keyFieldName != null) {
- key = aMap.get(keyFieldName);
- } else {
- key = aMap.entrySet().iterator().next();
- }
- if (key != null) {
- deltaKeys.add(key);
- }
- }
- }
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java
deleted file mode 100644
index 278de7dfb5d..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.lang.invoke.MethodHandles;
-import java.lang.reflect.Constructor;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.params.CommonParams;
-import org.apache.solr.common.params.MapSolrParams;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.ContentStream;
-import org.apache.solr.common.util.ContentStreamBase;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.StrUtils;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.core.SolrResourceLoader;
-import org.apache.solr.handler.RequestHandlerBase;
-import org.apache.solr.metrics.MetricsMap;
-import org.apache.solr.metrics.SolrMetricsContext;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.RawResponseWriter;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.update.processor.UpdateRequestProcessor;
-import org.apache.solr.update.processor.UpdateRequestProcessorChain;
-import org.apache.solr.util.plugin.SolrCoreAware;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static org.apache.solr.handler.dataimport.DataImporter.IMPORT_CMD;
-
-/**
- *
- * Solr Request Handler for data import from databases and REST data sources.
- *
- *
- * It is configured in solrconfig.xml
- *
- *
- * Refer to http://wiki.apache.org/solr/DataImportHandler
- * for more details.
- *
- *
- * This API is experimental and subject to change
- *
- * @deprecated since 8.6
- * @since solr 1.3
- */
-@Deprecated(since = "8.6")
-public class DataImportHandler extends RequestHandlerBase implements
- SolrCoreAware {
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- private DataImporter importer;
-
- private boolean debugEnabled = true;
-
- private String myName = "dataimport";
-
- private MetricsMap metrics;
-
- private static final String PARAM_WRITER_IMPL = "writerImpl";
- private static final String DEFAULT_WRITER_NAME = "SolrWriter";
- static final String ENABLE_DIH_DATA_CONFIG_PARAM = "enable.dih.dataConfigParam";
-
- final boolean dataConfigParam_enabled = Boolean.getBoolean(ENABLE_DIH_DATA_CONFIG_PARAM);
-
- public DataImporter getImporter() {
- return this.importer;
- }
-
- @Override
-
- public void init(@SuppressWarnings({"rawtypes"})NamedList args) {
- super.init(args);
- Map macro = new HashMap<>();
- macro.put("expandMacros", "false");
- defaults = SolrParams.wrapDefaults(defaults, new MapSolrParams(macro));
- log.warn("Data Import Handler is deprecated as of Solr 8.6. See SOLR-14066 for more details.");
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public void inform(SolrCore core) {
- try {
- String name = getPluginInfo().name;
- if (name.startsWith("/")) {
- myName = name.substring(1);
- }
- // some users may have '/' in the handler name. replace with '_'
- myName = myName.replaceAll("/", "_");
- debugEnabled = StrUtils.parseBool((String)initArgs.get(ENABLE_DEBUG), true);
- importer = new DataImporter(core, myName);
- } catch (Exception e) {
- log.error( DataImporter.MSG.LOAD_EXP, e);
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, DataImporter.MSG.LOAD_EXP, e);
- }
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp)
- throws Exception {
- rsp.setHttpCaching(false);
-
- //TODO: figure out why just the first one is OK...
- ContentStream contentStream = null;
- Iterable streams = req.getContentStreams();
- if(streams != null){
- for (ContentStream stream : streams) {
- contentStream = stream;
- break;
- }
- }
- SolrParams params = req.getParams();
- @SuppressWarnings({"rawtypes"})
- NamedList defaultParams = (NamedList) initArgs.get("defaults");
- RequestInfo requestParams = new RequestInfo(req, getParamsMap(params), contentStream);
- String command = requestParams.getCommand();
-
- if (DataImporter.SHOW_CONF_CMD.equals(command)) {
- String dataConfigFile = params.get("config");
- String dataConfig = params.get("dataConfig"); // needn't check dataConfigParam_enabled; we don't execute it
- if(dataConfigFile != null) {
- dataConfig = SolrWriter.getResourceAsString(req.getCore().getResourceLoader().openResource(dataConfigFile));
- }
- if(dataConfig==null) {
- rsp.add("status", DataImporter.MSG.NO_CONFIG_FOUND);
- } else {
- // Modify incoming request params to add wt=raw
- ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams());
- rawParams.set(CommonParams.WT, "raw");
- req.setParams(rawParams);
- ContentStreamBase content = new ContentStreamBase.StringStream(dataConfig);
- rsp.add(RawResponseWriter.CONTENT, content);
- }
- return;
- }
-
- if (params.get("dataConfig") != null && dataConfigParam_enabled == false) {
- throw new SolrException(SolrException.ErrorCode.FORBIDDEN,
- "Use of the dataConfig param (DIH debug mode) requires the system property " +
- ENABLE_DIH_DATA_CONFIG_PARAM + " because it's a security risk.");
- }
-
- rsp.add("initArgs", initArgs);
- String message = "";
-
- if (command != null) {
- rsp.add("command", command);
- }
- // If importer is still null
- if (importer == null) {
- rsp.add("status", DataImporter.MSG.NO_INIT);
- return;
- }
-
- if (command != null && DataImporter.ABORT_CMD.equals(command)) {
- importer.runCmd(requestParams, null);
- } else if (importer.isBusy()) {
- message = DataImporter.MSG.CMD_RUNNING;
- } else if (command != null) {
- if (DataImporter.FULL_IMPORT_CMD.equals(command)
- || DataImporter.DELTA_IMPORT_CMD.equals(command) ||
- IMPORT_CMD.equals(command)) {
- importer.maybeReloadConfiguration(requestParams, defaultParams);
- UpdateRequestProcessorChain processorChain =
- req.getCore().getUpdateProcessorChain(params);
- UpdateRequestProcessor processor = processorChain.createProcessor(req, rsp);
- SolrResourceLoader loader = req.getCore().getResourceLoader();
- DIHWriter sw = getSolrWriter(processor, loader, requestParams, req);
-
- if (requestParams.isDebug()) {
- if (debugEnabled) {
- // Synchronous request for the debug mode
- importer.runCmd(requestParams, sw);
- rsp.add("mode", "debug");
- rsp.add("documents", requestParams.getDebugInfo().debugDocuments);
- if (requestParams.getDebugInfo().debugVerboseOutput != null) {
- rsp.add("verbose-output", requestParams.getDebugInfo().debugVerboseOutput);
- }
- } else {
- message = DataImporter.MSG.DEBUG_NOT_ENABLED;
- }
- } else {
- // Asynchronous request for normal mode
- if(requestParams.getContentStream() == null && !requestParams.isSyncMode()){
- importer.runAsync(requestParams, sw);
- } else {
- importer.runCmd(requestParams, sw);
- }
- }
- } else if (DataImporter.RELOAD_CONF_CMD.equals(command)) {
- if(importer.maybeReloadConfiguration(requestParams, defaultParams)) {
- message = DataImporter.MSG.CONFIG_RELOADED;
- } else {
- message = DataImporter.MSG.CONFIG_NOT_RELOADED;
- }
- }
- }
- rsp.add("status", importer.isBusy() ? "busy" : "idle");
- rsp.add("importResponse", message);
- rsp.add("statusMessages", importer.getStatusMessages());
- }
-
- /** The value is converted to a String or {@code List} if multi-valued. */
- private Map getParamsMap(SolrParams params) {
- Map result = new HashMap<>();
- for (Map.Entry pair : params){
- String s = pair.getKey();
- String[] val = pair.getValue();
- if (val == null || val.length < 1)
- continue;
- if (val.length == 1)
- result.put(s, val[0]);
- else
- result.put(s, Arrays.asList(val));
- }
- return result;
- }
-
- private DIHWriter getSolrWriter(final UpdateRequestProcessor processor,
- final SolrResourceLoader loader, final RequestInfo requestParams,
- SolrQueryRequest req) {
- SolrParams reqParams = req.getParams();
- String writerClassStr = null;
- if (reqParams != null && reqParams.get(PARAM_WRITER_IMPL) != null) {
- writerClassStr = reqParams.get(PARAM_WRITER_IMPL);
- }
- DIHWriter writer;
- if (writerClassStr != null
- && !writerClassStr.equals(DEFAULT_WRITER_NAME)
- && !writerClassStr.equals(DocBuilder.class.getPackage().getName() + "."
- + DEFAULT_WRITER_NAME)) {
- try {
- @SuppressWarnings("unchecked")
- Class writerClass = DocBuilder.loadClass(writerClassStr, req.getCore());
- @SuppressWarnings({"rawtypes"})
- Constructor cnstr = writerClass.getConstructor(new Class[] {
- UpdateRequestProcessor.class, SolrQueryRequest.class});
- return cnstr.newInstance((Object) processor, (Object) req);
- } catch (Exception e) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
- "Unable to load Writer implementation:" + writerClassStr, e);
- }
- } else {
- return new SolrWriter(processor, req) {
- @Override
- public boolean upload(SolrInputDocument document) {
- try {
- return super.upload(document);
- } catch (RuntimeException e) {
- log.error("Exception while adding: {}", document, e);
- return false;
- }
- }
- };
- }
- }
-
- @Override
- public void initializeMetrics(SolrMetricsContext parentContext, String scope) {
- super.initializeMetrics(parentContext, scope);
- metrics = new MetricsMap((detailed, map) -> {
- if (importer != null) {
- DocBuilder.Statistics cumulative = importer.cumulativeStatistics;
-
- map.put("Status", importer.getStatus().toString());
-
- if (importer.docBuilder != null) {
- DocBuilder.Statistics running = importer.docBuilder.importStatistics;
- map.put("Documents Processed", running.docCount);
- map.put("Requests made to DataSource", running.queryCount);
- map.put("Rows Fetched", running.rowsCount);
- map.put("Documents Deleted", running.deletedDocCount);
- map.put("Documents Skipped", running.skipDocCount);
- }
-
- map.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, cumulative.docCount);
- map.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, cumulative.queryCount);
- map.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED, cumulative.rowsCount);
- map.put(DataImporter.MSG.TOTAL_DOCS_DELETED, cumulative.deletedDocCount);
- map.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED, cumulative.skipDocCount);
- }
- });
- solrMetricsContext.gauge(metrics, true, "importer", getCategory().toString(), scope);
- }
-
- // //////////////////////SolrInfoMBeans methods //////////////////////
-
- @Override
- public String getDescription() {
- return DataImporter.MSG.JMX_DESC;
- }
-
- public static final String ENABLE_DEBUG = "enableDebug";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java
deleted file mode 100644
index e69b3fd9063..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-/**
- * Exception class for all DataImportHandler exceptions
- *
- * This API is experimental and subject to change
- *
- * @since solr 1.3
- */
-public class DataImportHandlerException extends RuntimeException {
- private int errCode;
-
- public boolean debugged = false;
-
- public static final int SEVERE = 500, WARN = 400, SKIP = 300, SKIP_ROW =301;
-
- public DataImportHandlerException(int err) {
- super();
- errCode = err;
- }
-
- public DataImportHandlerException(int err, String message) {
- super(message + (SolrWriter.getDocCount() == null ? "" : MSG + SolrWriter.getDocCount()));
- errCode = err;
- }
-
- public DataImportHandlerException(int err, String message, Throwable cause) {
- super(message + (SolrWriter.getDocCount() == null ? "" : MSG + SolrWriter.getDocCount()), cause);
- errCode = err;
- }
-
- public DataImportHandlerException(int err, Throwable cause) {
- super(cause);
- errCode = err;
- }
-
- public int getErrCode() {
- return errCode;
- }
-
- public static DataImportHandlerException wrapAndThrow(int err, Exception e) {
- if (e instanceof DataImportHandlerException) {
- throw (DataImportHandlerException) e;
- } else {
- throw new DataImportHandlerException(err, e);
- }
- }
-
- public static DataImportHandlerException wrapAndThrow(int err, Exception e, String msg) {
- if (e instanceof DataImportHandlerException) {
- throw (DataImportHandlerException) e;
- } else {
- throw new DataImportHandlerException(err, msg, e);
- }
- }
-
-
- public static final String MSG = " Processing Document # ";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java
deleted file mode 100644
index c5b2f70bf22..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java
+++ /dev/null
@@ -1,628 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.EmptyEntityResolver;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.util.SystemIdResolver;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.XMLErrorLogger;
-import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
-import org.apache.solr.handler.dataimport.config.ConfigParseUtil;
-import org.apache.solr.handler.dataimport.config.DIHConfiguration;
-import org.apache.solr.handler.dataimport.config.Entity;
-import org.apache.solr.handler.dataimport.config.PropertyWriter;
-import org.apache.solr.handler.dataimport.config.Script;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-import static org.apache.solr.handler.dataimport.DocBuilder.loadClass;
-import static org.apache.solr.handler.dataimport.config.ConfigNameConstants.CLASS;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.NodeList;
-import org.xml.sax.InputSource;
-import org.apache.commons.io.IOUtils;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.locks.ReentrantLock;
-
-/**
- *
Stores all configuration information for pulling and indexing data.
- *
- * This API is experimental and subject to change
- *
- * @since solr 1.3
- */
-public class DataImporter {
-
- public enum Status {
- IDLE, RUNNING_FULL_DUMP, RUNNING_DELTA_DUMP, JOB_FAILED
- }
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- private static final XMLErrorLogger XMLLOG = new XMLErrorLogger(log);
-
- private Status status = Status.IDLE;
- private DIHConfiguration config;
- private Date indexStartTime;
- private Properties store = new Properties();
- private Map> requestLevelDataSourceProps = new HashMap<>();
- private IndexSchema schema;
- public DocBuilder docBuilder;
- public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics();
- private SolrCore core;
- private Map coreScopeSession = new ConcurrentHashMap<>();
- private ReentrantLock importLock = new ReentrantLock();
- private boolean isDeltaImportSupported = false;
- private final String handlerName;
-
- /**
- * Only for testing purposes
- */
- DataImporter() {
- this.handlerName = "dataimport" ;
- }
-
- DataImporter(SolrCore core, String handlerName) {
- this.handlerName = handlerName;
- this.core = core;
- this.schema = core.getLatestSchema();
- }
-
-
-
-
- boolean maybeReloadConfiguration(RequestInfo params,
- NamedList> defaultParams) throws IOException {
- if (importLock.tryLock()) {
- boolean success = false;
- try {
- if (null != params.getRequest()) {
- if (schema != params.getRequest().getSchema()) {
- schema = params.getRequest().getSchema();
- }
- }
- String dataConfigText = params.getDataConfig();
- String dataconfigFile = params.getConfigFile();
- InputSource is = null;
- if(dataConfigText!=null && dataConfigText.length()>0) {
- is = new InputSource(new StringReader(dataConfigText));
- } else if(dataconfigFile!=null) {
- is = new InputSource(core.getResourceLoader().openResource(dataconfigFile));
- is.setSystemId(SystemIdResolver.createSystemIdFromResourceName(dataconfigFile));
- log.info("Loading DIH Configuration: {}", dataconfigFile);
- }
- if(is!=null) {
- config = loadDataConfig(is);
- success = true;
- }
-
- Map> dsProps = new HashMap<>();
- if(defaultParams!=null) {
- int position = 0;
- while (position < defaultParams.size()) {
- if (defaultParams.getName(position) == null) {
- break;
- }
- String name = defaultParams.getName(position);
- if (name.equals("datasource")) {
- success = true;
- @SuppressWarnings({"rawtypes"})
- NamedList dsConfig = (NamedList) defaultParams.getVal(position);
- log.info("Getting configuration for Global Datasource...");
- Map props = new HashMap<>();
- for (int i = 0; i < dsConfig.size(); i++) {
- props.put(dsConfig.getName(i), dsConfig.getVal(i).toString());
- }
- log.info("Adding properties to datasource: {}", props);
- dsProps.put((String) dsConfig.get("name"), props);
- }
- position++;
- }
- }
- requestLevelDataSourceProps = Collections.unmodifiableMap(dsProps);
- } catch(IOException ioe) {
- throw ioe;
- } finally {
- importLock.unlock();
- }
- return success;
- } else {
- return false;
- }
- }
-
-
-
- public String getHandlerName() {
- return handlerName;
- }
-
- public IndexSchema getSchema() {
- return schema;
- }
-
- /**
- * Used by tests
- */
- void loadAndInit(String configStr) {
- config = loadDataConfig(new InputSource(new StringReader(configStr)));
- }
-
- void loadAndInit(InputSource configFile) {
- config = loadDataConfig(configFile);
- }
-
- public DIHConfiguration loadDataConfig(InputSource configFile) {
-
- DIHConfiguration dihcfg = null;
- try {
- DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
- dbf.setValidating(false);
-
- // only enable xinclude, if XML is coming from safe source (local file)
- // and a a SolrCore and SystemId is present (makes no sense otherwise):
- if (core != null && configFile.getSystemId() != null) {
- try {
- dbf.setXIncludeAware(true);
- dbf.setNamespaceAware(true);
- } catch( UnsupportedOperationException e ) {
- log.warn( "XML parser doesn't support XInclude option" );
- }
- }
-
- DocumentBuilder builder = dbf.newDocumentBuilder();
- // only enable xinclude / external entities, if XML is coming from
- // safe source (local file) and a a SolrCore and SystemId is present:
- if (core != null && configFile.getSystemId() != null) {
- builder.setEntityResolver(new SystemIdResolver(core.getResourceLoader()));
- } else {
- // Don't allow external entities without having a system ID:
- builder.setEntityResolver(EmptyEntityResolver.SAX_INSTANCE);
- }
- builder.setErrorHandler(XMLLOG);
- Document document;
- try {
- document = builder.parse(configFile);
- } finally {
- // some XML parsers are broken and don't close the byte stream (but they should according to spec)
- IOUtils.closeQuietly(configFile.getByteStream());
- }
-
- dihcfg = readFromXml(document);
- log.info("Data Configuration loaded successfully");
- } catch (Exception e) {
- throw new DataImportHandlerException(SEVERE,
- "Data Config problem: " + e.getMessage(), e);
- }
- for (Entity e : dihcfg.getEntities()) {
- if (e.getAllAttributes().containsKey(SqlEntityProcessor.DELTA_QUERY)) {
- isDeltaImportSupported = true;
- break;
- }
- }
- return dihcfg;
- }
-
- public DIHConfiguration readFromXml(Document xmlDocument) {
- DIHConfiguration config;
- List> functions = new ArrayList<>();
- Script script = null;
- Map> dataSources = new HashMap<>();
-
- NodeList dataConfigTags = xmlDocument.getElementsByTagName("dataConfig");
- if(dataConfigTags == null || dataConfigTags.getLength() == 0) {
- throw new DataImportHandlerException(SEVERE, "the root node '' is missing");
- }
- Element e = (Element) dataConfigTags.item(0);
- List documentTags = ConfigParseUtil.getChildNodes(e, "document");
- if (documentTags.isEmpty()) {
- throw new DataImportHandlerException(SEVERE, "DataImportHandler " +
- "configuration file must have one node.");
- }
-
- List scriptTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.SCRIPT);
- if (!scriptTags.isEmpty()) {
- script = new Script(scriptTags.get(0));
- }
-
- // Add the provided evaluators
- List functionTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.FUNCTION);
- if (!functionTags.isEmpty()) {
- for (Element element : functionTags) {
- String func = ConfigParseUtil.getStringAttribute(element, NAME, null);
- String clz = ConfigParseUtil.getStringAttribute(element, ConfigNameConstants.CLASS, null);
- if (func == null || clz == null){
- throw new DataImportHandlerException(
- SEVERE,
- " must have a 'name' and 'class' attributes");
- } else {
- functions.add(ConfigParseUtil.getAllAttributes(element));
- }
- }
- }
- List dataSourceTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.DATA_SRC);
- if (!dataSourceTags.isEmpty()) {
- for (Element element : dataSourceTags) {
- Map p = new HashMap<>();
- HashMap attrs = ConfigParseUtil.getAllAttributes(element);
- for (Map.Entry entry : attrs.entrySet()) {
- p.put(entry.getKey(), entry.getValue());
- }
- dataSources.put(p.get("name"), p);
- }
- }
- if(dataSources.get(null) == null){
- for (Map properties : dataSources.values()) {
- dataSources.put(null,properties);
- break;
- }
- }
- PropertyWriter pw = null;
- List propertyWriterTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.PROPERTY_WRITER);
- if (propertyWriterTags.isEmpty()) {
- boolean zookeeper = false;
- if (this.core != null
- && this.core.getCoreContainer().isZooKeeperAware()) {
- zookeeper = true;
- }
- pw = new PropertyWriter(zookeeper ? "ZKPropertiesWriter"
- : "SimplePropertiesWriter", Collections. emptyMap());
- } else if (propertyWriterTags.size() > 1) {
- throw new DataImportHandlerException(SEVERE, "Only one "
- + ConfigNameConstants.PROPERTY_WRITER + " can be configured.");
- } else {
- Element pwElement = propertyWriterTags.get(0);
- String type = null;
- Map params = new HashMap<>();
- for (Map.Entry entry : ConfigParseUtil.getAllAttributes(
- pwElement).entrySet()) {
- if (TYPE.equals(entry.getKey())) {
- type = entry.getValue();
- } else {
- params.put(entry.getKey(), entry.getValue());
- }
- }
- if (type == null) {
- throw new DataImportHandlerException(SEVERE, "The "
- + ConfigNameConstants.PROPERTY_WRITER + " element must specify "
- + TYPE);
- }
- pw = new PropertyWriter(type, params);
- }
- return new DIHConfiguration(documentTags.get(0), this, functions, script, dataSources, pw);
- }
-
- @SuppressWarnings("unchecked")
- private DIHProperties createPropertyWriter() {
- DIHProperties propWriter = null;
- PropertyWriter configPw = config.getPropertyWriter();
- try {
- Class writerClass = DocBuilder.loadClass(configPw.getType(), this.core);
- propWriter = writerClass.getConstructor().newInstance();
- propWriter.init(this, configPw.getParameters());
- } catch (Exception e) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Unable to PropertyWriter implementation:" + configPw.getType(), e);
- }
- return propWriter;
- }
-
- public DIHConfiguration getConfig() {
- return config;
- }
-
- Date getIndexStartTime() {
- return indexStartTime;
- }
-
- void setIndexStartTime(Date indextStartTime) {
- this.indexStartTime = indextStartTime;
- }
-
- void store(Object key, Object value) {
- store.put(key, value);
- }
-
- Object retrieve(Object key) {
- return store.get(key);
- }
-
- @SuppressWarnings({"unchecked", "rawtypes"})
- public DataSource getDataSourceInstance(Entity key, String name, Context ctx) {
- Map p = requestLevelDataSourceProps.get(name);
- if (p == null)
- p = config.getDataSources().get(name);
- if (p == null)
- p = requestLevelDataSourceProps.get(null);// for default data source
- if (p == null)
- p = config.getDataSources().get(null);
- if (p == null)
- throw new DataImportHandlerException(SEVERE,
- "No dataSource :" + name + " available for entity :" + key.getName());
- String type = p.get(TYPE);
- @SuppressWarnings({"rawtypes"})
- DataSource dataSrc = null;
- if (type == null) {
- dataSrc = new JdbcDataSource();
- } else {
- try {
- dataSrc = (DataSource) DocBuilder.loadClass(type, getCore()).getConstructor().newInstance();
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e, "Invalid type for data source: " + type);
- }
- }
- try {
- Properties copyProps = new Properties();
- copyProps.putAll(p);
- Map map = ctx.getRequestParameters();
- if (map.containsKey("rows")) {
- int rows = Integer.parseInt((String) map.get("rows"));
- if (map.containsKey("start")) {
- rows += Integer.parseInt((String) map.get("start"));
- }
- copyProps.setProperty("maxRows", String.valueOf(rows));
- }
- dataSrc.init(ctx, copyProps);
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e, "Failed to initialize DataSource: " + key.getDataSourceName());
- }
- return dataSrc;
- }
-
- public Status getStatus() {
- return status;
- }
-
- public void setStatus(Status status) {
- this.status = status;
- }
-
- public boolean isBusy() {
- return importLock.isLocked();
- }
-
- public void doFullImport(DIHWriter writer, RequestInfo requestParams) {
- log.info("Starting Full Import");
- setStatus(Status.RUNNING_FULL_DUMP);
- try {
- DIHProperties dihPropWriter = createPropertyWriter();
- setIndexStartTime(dihPropWriter.getCurrentTimestamp());
- docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams);
- checkWritablePersistFile(writer, dihPropWriter);
- docBuilder.execute();
- if (!requestParams.isDebug())
- cumulativeStatistics.add(docBuilder.importStatistics);
- } catch (Exception e) {
- SolrException.log(log, "Full Import failed", e);
- docBuilder.handleError("Full Import failed", e);
- } finally {
- setStatus(Status.IDLE);
- DocBuilder.INSTANCE.set(null);
- }
-
- }
-
- private void checkWritablePersistFile(DIHWriter writer, DIHProperties dihPropWriter) {
- if (isDeltaImportSupported && !dihPropWriter.isWritable()) {
- throw new DataImportHandlerException(SEVERE,
- "Properties is not writable. Delta imports are supported by data config but will not work.");
- }
- }
-
- public void doDeltaImport(DIHWriter writer, RequestInfo requestParams) {
- log.info("Starting Delta Import");
- setStatus(Status.RUNNING_DELTA_DUMP);
- try {
- DIHProperties dihPropWriter = createPropertyWriter();
- setIndexStartTime(dihPropWriter.getCurrentTimestamp());
- docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams);
- checkWritablePersistFile(writer, dihPropWriter);
- docBuilder.execute();
- if (!requestParams.isDebug())
- cumulativeStatistics.add(docBuilder.importStatistics);
- } catch (Exception e) {
- log.error("Delta Import Failed", e);
- docBuilder.handleError("Delta Import Failed", e);
- } finally {
- setStatus(Status.IDLE);
- DocBuilder.INSTANCE.set(null);
- }
-
- }
-
- public void runAsync(final RequestInfo reqParams, final DIHWriter sw) {
- new Thread(() -> runCmd(reqParams, sw)).start();
- }
-
- void runCmd(RequestInfo reqParams, DIHWriter sw) {
- String command = reqParams.getCommand();
- if (command.equals(ABORT_CMD)) {
- if (docBuilder != null) {
- docBuilder.abort();
- }
- return;
- }
- if (!importLock.tryLock()){
- log.warn("Import command failed . another import is running");
- return;
- }
- try {
- if (FULL_IMPORT_CMD.equals(command) || IMPORT_CMD.equals(command)) {
- doFullImport(sw, reqParams);
- } else if (command.equals(DELTA_IMPORT_CMD)) {
- doDeltaImport(sw, reqParams);
- }
- } finally {
- importLock.unlock();
- }
- }
-
- @SuppressWarnings("unchecked")
- Map getStatusMessages() {
- //this map object is a Collections.synchronizedMap(new LinkedHashMap()). if we
- // synchronize on the object it must be safe to iterate through the map
- @SuppressWarnings({"rawtypes"})
- Map statusMessages = (Map) retrieve(STATUS_MSGS);
- Map result = new LinkedHashMap<>();
- if (statusMessages != null) {
- synchronized (statusMessages) {
- for (Object o : statusMessages.entrySet()) {
- @SuppressWarnings({"rawtypes"})
- Map.Entry e = (Map.Entry) o;
- //the toString is taken because some of the Objects create the data lazily when toString() is called
- result.put((String) e.getKey(), e.getValue().toString());
- }
- }
- }
- return result;
-
- }
-
- public DocBuilder getDocBuilder() {
- return docBuilder;
- }
-
- public DocBuilder getDocBuilder(DIHWriter writer, RequestInfo requestParams) {
- DIHProperties dihPropWriter = createPropertyWriter();
- return new DocBuilder(this, writer, dihPropWriter, requestParams);
- }
-
- Map getEvaluators() {
- return getEvaluators(config.getFunctions());
- }
-
- /**
- * used by tests.
- */
- @SuppressWarnings({"unchecked"})
- Map getEvaluators(List> fn) {
- Map evaluators = new HashMap<>();
- evaluators.put(Evaluator.DATE_FORMAT_EVALUATOR, new DateFormatEvaluator());
- evaluators.put(Evaluator.SQL_ESCAPE_EVALUATOR, new SqlEscapingEvaluator());
- evaluators.put(Evaluator.URL_ENCODE_EVALUATOR, new UrlEvaluator());
- evaluators.put(Evaluator.ESCAPE_SOLR_QUERY_CHARS, new SolrQueryEscapingEvaluator());
- SolrCore core = docBuilder == null ? null : docBuilder.dataImporter.getCore();
- for (Map map : fn) {
- try {
- evaluators.put(map.get(NAME), (Evaluator) loadClass(map.get(CLASS), core).getConstructor().newInstance());
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e, "Unable to instantiate evaluator: " + map.get(CLASS));
- }
- }
- return evaluators;
- }
-
- static final ThreadLocal QUERY_COUNT = new ThreadLocal() {
- @Override
- protected AtomicLong initialValue() {
- return new AtomicLong();
- }
- };
-
-
-
- static final class MSG {
- public static final String NO_CONFIG_FOUND = "Configuration not found";
-
- public static final String NO_INIT = "DataImportHandler started. Not Initialized. No commands can be run";
-
- public static final String INVALID_CONFIG = "FATAL: Could not create importer. DataImporter config invalid";
-
- public static final String LOAD_EXP = "Exception while loading DataImporter";
-
- public static final String JMX_DESC = "Manage data import from databases to Solr";
-
- public static final String CMD_RUNNING = "A command is still running...";
-
- public static final String DEBUG_NOT_ENABLED = "Debug not enabled. Add a tag true in solrconfig.xml";
-
- public static final String CONFIG_RELOADED = "Configuration Re-loaded sucessfully";
-
- public static final String CONFIG_NOT_RELOADED = "Configuration NOT Re-loaded...Data Importer is busy.";
-
- public static final String TOTAL_DOC_PROCESSED = "Total Documents Processed";
-
- public static final String TOTAL_FAILED_DOCS = "Total Documents Failed";
-
- public static final String TOTAL_QUERIES_EXECUTED = "Total Requests made to DataSource";
-
- public static final String TOTAL_ROWS_EXECUTED = "Total Rows Fetched";
-
- public static final String TOTAL_DOCS_DELETED = "Total Documents Deleted";
-
- public static final String TOTAL_DOCS_SKIPPED = "Total Documents Skipped";
- }
-
- public SolrCore getCore() {
- return core;
- }
-
- void putToCoreScopeSession(String key, Object val) {
- coreScopeSession.put(key, val);
- }
- Object getFromCoreScopeSession(String key) {
- return coreScopeSession.get(key);
- }
-
- public static final String COLUMN = "column";
-
- public static final String TYPE = "type";
-
- public static final String DATA_SRC = "dataSource";
-
- public static final String MULTI_VALUED = "multiValued";
-
- public static final String NAME = "name";
-
- public static final String STATUS_MSGS = "status-messages";
-
- public static final String FULL_IMPORT_CMD = "full-import";
-
- public static final String IMPORT_CMD = "import";
-
- public static final String DELTA_IMPORT_CMD = "delta-import";
-
- public static final String ABORT_CMD = "abort";
-
- public static final String DEBUG_MODE = "debug";
-
- public static final String RELOAD_CONF_CMD = "reload-config";
-
- public static final String SHOW_CONF_CMD = "show-config";
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataSource.java
deleted file mode 100644
index aeded279cbe..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataSource.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.io.Closeable;
-import java.util.Properties;
-
-/**
- *
- * Provides data from a source with a given query.
- *
- *
- * Implementation of this abstract class must provide a default no-arg constructor
- *
- *
- * Refer to http://wiki.apache.org/solr/DataImportHandler
- * for more details.
- *
- *
- * This API is experimental and may change in the future.
- *
- * @since solr 1.3
- */
-public abstract class DataSource implements Closeable {
-
- /**
- * Initializes the DataSource with the Context
and
- * initialization properties.
- *
- * This is invoked by the DataImporter
after creating an
- * instance of this class.
- */
- public abstract void init(Context context, Properties initProps);
-
- /**
- * Get records for the given query.The return type depends on the
- * implementation .
- *
- * @param query The query string. It can be a SQL for JdbcDataSource or a URL
- * for HttpDataSource or a file location for FileDataSource or a custom
- * format for your own custom DataSource.
- * @return Depends on the implementation. For instance JdbcDataSource returns
- * an Iterator<Map <String,Object>>
- */
- public abstract T getData(String query);
-
- /**
- * Cleans up resources of this DataSource after use.
- */
- public abstract void close();
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java
deleted file mode 100644
index f4df82080aa..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.IllformedLocaleException;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
-import java.util.TimeZone;
-
-import org.apache.solr.common.util.SuppressForbidden;
-import org.apache.solr.handler.dataimport.config.EntityField;
-import org.apache.solr.util.DateMathParser;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-
-/**
- *
Formats values using a given date format.
- * Pass three parameters:
- *
- * An {@link EntityField} or a date expression to be parsed with
- * the {@link DateMathParser} class If the value is in a String,
- * then it is assumed to be a datemath expression, otherwise it
- * resolved using a {@link VariableResolver} instance
- * A date format see {@link SimpleDateFormat} for the syntax.
- * The {@link Locale} to parse.
- * (optional. Defaults to the Root Locale)
- *
- */
-public class DateFormatEvaluator extends Evaluator {
-
- public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
- protected Map availableLocales = new HashMap<>();
- protected Set availableTimezones = new HashSet<>();
-
- @SuppressForbidden(reason = "Usage of outdated locale parsing with Locale#toString() because of backwards compatibility")
- public DateFormatEvaluator() {
- for (Locale locale : Locale.getAvailableLocales()) {
- availableLocales.put(locale.toString(), locale);
- }
- for (String tz : TimeZone.getAvailableIDs()) {
- availableTimezones.add(tz);
- }
- }
-
- private SimpleDateFormat getDateFormat(String pattern, TimeZone timezone, Locale locale) {
- final SimpleDateFormat sdf = new SimpleDateFormat(pattern, locale);
- sdf.setTimeZone(timezone);
- return sdf;
- }
-
- @Override
- public String evaluate(String expression, Context context) {
- List l = parseParams(expression, context.getVariableResolver());
- if (l.size() < 2 || l.size() > 4) {
- throw new DataImportHandlerException(SEVERE, "'formatDate()' must have two, three or four parameters ");
- }
- Object o = l.get(0);
- Object format = l.get(1);
- if (format instanceof VariableWrapper) {
- VariableWrapper wrapper = (VariableWrapper) format;
- o = wrapper.resolve();
- format = o.toString();
- }
- Locale locale = Locale.ENGLISH; // we default to ENGLISH for dates for full Java 9 compatibility
- if(l.size()>2) {
- Object localeObj = l.get(2);
- String localeStr = null;
- if (localeObj instanceof VariableWrapper) {
- localeStr = ((VariableWrapper) localeObj).resolve().toString();
- } else {
- localeStr = localeObj.toString();
- }
- locale = availableLocales.get(localeStr);
- if (locale == null) try {
- locale = new Locale.Builder().setLanguageTag(localeStr).build();
- } catch (IllformedLocaleException ex) {
- throw new DataImportHandlerException(SEVERE, "Malformed / non-existent locale: " + localeStr, ex);
- }
- }
- TimeZone tz = TimeZone.getDefault(); // DWS TODO: is this the right default for us? Deserves explanation if so.
- if(l.size()==4) {
- Object tzObj = l.get(3);
- String tzStr = null;
- if (tzObj instanceof VariableWrapper) {
- tzStr = ((VariableWrapper) tzObj).resolve().toString();
- } else {
- tzStr = tzObj.toString();
- }
- if(availableTimezones.contains(tzStr)) {
- tz = TimeZone.getTimeZone(tzStr);
- } else {
- throw new DataImportHandlerException(SEVERE, "Unsupported Timezone: " + tzStr);
- }
- }
- String dateFmt = format.toString();
- SimpleDateFormat fmt = getDateFormat(dateFmt, tz, locale);
- Date date = null;
- if (o instanceof VariableWrapper) {
- date = evaluateWrapper((VariableWrapper) o, locale, tz);
- } else {
- date = evaluateString(o.toString(), locale, tz);
- }
- return fmt.format(date);
- }
-
- /**
- * NOTE: declared as a method to allow for extensibility
- *
- * @lucene.experimental this API is experimental and subject to change
- * @return the result of evaluating a string
- */
- protected Date evaluateWrapper(VariableWrapper variableWrapper, Locale locale, TimeZone tz) {
- Date date = null;
- Object variableval = resolveWrapper(variableWrapper,locale,tz);
- if (variableval instanceof Date) {
- date = (Date) variableval;
- } else {
- String s = variableval.toString();
- try {
- date = getDateFormat(DEFAULT_DATE_FORMAT, tz, locale).parse(s);
- } catch (ParseException exp) {
- wrapAndThrow(SEVERE, exp, "Invalid expression for date");
- }
- }
- return date;
- }
-
- /**
- * NOTE: declared as a method to allow for extensibility
- * @lucene.experimental
- * @return the result of evaluating a string
- */
- protected Date evaluateString(String datemathfmt, Locale locale, TimeZone tz) {
- // note: DMP does not use the locale but perhaps a subclass might use it, for e.g. parsing a date in a custom
- // string that doesn't necessarily have date math?
- //TODO refactor DateMathParser.parseMath a bit to have a static method for this logic.
- if (datemathfmt.startsWith("NOW")) {
- datemathfmt = datemathfmt.substring("NOW".length());
- }
- try {
- DateMathParser parser = new DateMathParser(tz);
- parser.setNow(new Date());// thus do *not* use SolrRequestInfo
- return parser.parseMath(datemathfmt);
- } catch (ParseException e) {
- throw wrapAndThrow(SEVERE, e, "Invalid expression for date");
- }
- }
-
- /**
- * NOTE: declared as a method to allow for extensibility
- * @lucene.experimental
- * @return the result of resolving the variable wrapper
- */
- protected Object resolveWrapper(VariableWrapper variableWrapper, Locale locale, TimeZone tz) {
- return variableWrapper.resolve();
- }
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java
deleted file mode 100644
index 61edbe61117..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.lang.invoke.MethodHandles;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.*;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- *
- * {@link Transformer} instance which creates {@link Date} instances out of {@link String}s.
- *
- *
- * Refer to http://wiki.apache.org/solr/DataImportHandler
- * for more details.
- *
- * This API is experimental and subject to change
- *
- * @since solr 1.3
- */
-public class DateFormatTransformer extends Transformer {
- private Map fmtCache = new HashMap<>();
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- @Override
- @SuppressWarnings("unchecked")
- public Object transformRow(Map aRow, Context context) {
-
- for (Map map : context.getAllEntityFields()) {
- Locale locale = Locale.ENGLISH; // we default to ENGLISH for dates for full Java 9 compatibility
- String customLocale = map.get(LOCALE);
- if (customLocale != null) {
- try {
- locale = new Locale.Builder().setLanguageTag(customLocale).build();
- } catch (IllformedLocaleException e) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Invalid Locale specified: " + customLocale, e);
- }
- }
-
- String fmt = map.get(DATE_TIME_FMT);
- if (fmt == null)
- continue;
- VariableResolver resolver = context.getVariableResolver();
- fmt = resolver.replaceTokens(fmt);
- String column = map.get(DataImporter.COLUMN);
- String srcCol = map.get(RegexTransformer.SRC_COL_NAME);
- if (srcCol == null)
- srcCol = column;
- try {
- Object o = aRow.get(srcCol);
- if (o instanceof List) {
- @SuppressWarnings({"rawtypes"})
- List inputs = (List) o;
- List results = new ArrayList<>();
- for (Object input : inputs) {
- results.add(process(input, fmt, locale));
- }
- aRow.put(column, results);
- } else {
- if (o != null) {
- aRow.put(column, process(o, fmt, locale));
- }
- }
- } catch (ParseException e) {
- log.warn("Could not parse a Date field ", e);
- }
- }
- return aRow;
- }
-
- private Date process(Object value, String format, Locale locale) throws ParseException {
- if (value == null) return null;
- String strVal = value.toString().trim();
- if (strVal.length() == 0)
- return null;
- SimpleDateFormat fmt = fmtCache.get(format);
- if (fmt == null) {
- fmt = new SimpleDateFormat(format, locale);
- fmtCache.put(format, fmt);
- }
- return fmt.parse(strVal);
- }
-
- public static final String DATE_TIME_FMT = "dateTimeFormat";
-
- public static final String LOCALE = "locale";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugInfo.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugInfo.java
deleted file mode 100644
index 623832fc9c4..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugInfo.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.util.AbstractList;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.StrUtils;
-
-public class DebugInfo {
-
- private static final class ChildRollupDocs extends AbstractList {
-
- private List delegate = new ArrayList<>();
-
- @Override
- public SolrInputDocument get(int index) {
- return delegate.get(index);
- }
-
- @Override
- public int size() {
- return delegate.size();
- }
-
- public boolean add(SolrInputDocument e) {
- SolrInputDocument transformed = e.deepCopy();
- if (transformed.hasChildDocuments()) {
- ChildRollupDocs childList = new ChildRollupDocs();
- childList.addAll(transformed.getChildDocuments());
- transformed.addField("_childDocuments_", childList);
- transformed.getChildDocuments().clear();
- }
- return delegate.add(transformed);
- }
- }
-
- public List debugDocuments = new ChildRollupDocs();
-
- public NamedList debugVerboseOutput = null;
- public boolean verbose;
-
- public DebugInfo(Map requestParams) {
- verbose = StrUtils.parseBool((String) requestParams.get("verbose"), false);
- debugVerboseOutput = new NamedList<>();
- }
-}
-
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java
deleted file mode 100644
index 9de42fc6f3a..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-import org.apache.solr.common.util.NamedList;
-
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.text.MessageFormat;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Stack;
-
-/**
- *
- * Implements most of the interactive development functionality
- *
- *
- *
- * Refer to http://wiki.apache.org/solr/DataImportHandler
- * for more details.
- *
- *
- * This API is experimental and subject to change
- *
- * @since solr 1.3
- */
-class DebugLogger {
- private Stack debugStack;
-
- @SuppressWarnings({"rawtypes"})
- NamedList output;
-// private final SolrWriter writer1;
-
- private static final String LINE = "---------------------------------------------";
-
- private MessageFormat fmt = new MessageFormat(
- "----------- row #{0}-------------", Locale.ROOT);
-
- boolean enabled = true;
-
- @SuppressWarnings({"rawtypes"})
- public DebugLogger() {
-// writer = solrWriter;
- output = new NamedList();
- debugStack = new Stack() {
-
- @Override
- public DebugInfo pop() {
- if (size() == 1)
- throw new DataImportHandlerException(
- DataImportHandlerException.SEVERE, "Stack is becoming empty");
- return super.pop();
- }
- };
- debugStack.push(new DebugInfo(null, DIHLogLevels.NONE, null));
- output = debugStack.peek().lst;
- }
-
- private DebugInfo peekStack() {
- return debugStack.isEmpty() ? null : debugStack.peek();
- }
-
- @SuppressWarnings({"unchecked"})
- public void log(DIHLogLevels event, String name, Object row) {
- if (event == DIHLogLevels.DISABLE_LOGGING) {
- enabled = false;
- return;
- } else if (event == DIHLogLevels.ENABLE_LOGGING) {
- enabled = true;
- return;
- }
-
- if (!enabled && event != DIHLogLevels.START_ENTITY
- && event != DIHLogLevels.END_ENTITY) {
- return;
- }
-
- if (event == DIHLogLevels.START_DOC) {
- debugStack.push(new DebugInfo(null, DIHLogLevels.START_DOC, peekStack()));
- } else if (DIHLogLevels.START_ENTITY == event) {
- debugStack
- .push(new DebugInfo(name, DIHLogLevels.START_ENTITY, peekStack()));
- } else if (DIHLogLevels.ENTITY_OUT == event
- || DIHLogLevels.PRE_TRANSFORMER_ROW == event) {
- if (debugStack.peek().type == DIHLogLevels.START_ENTITY
- || debugStack.peek().type == DIHLogLevels.START_DOC) {
- debugStack.peek().lst.add(null, fmt.format(new Object[]{++debugStack
- .peek().rowCount}));
- addToNamedList(debugStack.peek().lst, row);
- debugStack.peek().lst.add(null, LINE);
- }
- } else if (event == DIHLogLevels.ROW_END) {
- popAllTransformers();
- } else if (DIHLogLevels.END_ENTITY == event) {
- while (debugStack.pop().type != DIHLogLevels.START_ENTITY)
- ;
- } else if (DIHLogLevels.END_DOC == event) {
- while (debugStack.pop().type != DIHLogLevels.START_DOC)
- ;
- } else if (event == DIHLogLevels.TRANSFORMER_EXCEPTION) {
- debugStack.push(new DebugInfo(name, event, peekStack()));
- debugStack.peek().lst.add("EXCEPTION",
- getStacktraceString((Exception) row));
- } else if (DIHLogLevels.TRANSFORMED_ROW == event) {
- debugStack.push(new DebugInfo(name, event, peekStack()));
- debugStack.peek().lst.add(null, LINE);
- addToNamedList(debugStack.peek().lst, row);
- debugStack.peek().lst.add(null, LINE);
- if (row instanceof DataImportHandlerException) {
- DataImportHandlerException dataImportHandlerException = (DataImportHandlerException) row;
- dataImportHandlerException.debugged = true;
- }
- } else if (DIHLogLevels.ENTITY_META == event) {
- popAllTransformers();
- debugStack.peek().lst.add(name, row);
- } else if (DIHLogLevels.ENTITY_EXCEPTION == event) {
- if (row instanceof DataImportHandlerException) {
- DataImportHandlerException dihe = (DataImportHandlerException) row;
- if (dihe.debugged)
- return;
- dihe.debugged = true;
- }
-
- popAllTransformers();
- debugStack.peek().lst.add("EXCEPTION",
- getStacktraceString((Exception) row));
- }
- }
-
- private void popAllTransformers() {
- while (true) {
- DIHLogLevels type = debugStack.peek().type;
- if (type == DIHLogLevels.START_DOC || type == DIHLogLevels.START_ENTITY)
- break;
- debugStack.pop();
- }
- }
-
- @SuppressWarnings({"unchecked"})
- private void addToNamedList(@SuppressWarnings({"rawtypes"})NamedList nl, Object row) {
- if (row instanceof List) {
- @SuppressWarnings({"rawtypes"})
- List list = (List) row;
- @SuppressWarnings({"rawtypes"})
- NamedList l = new NamedList();
- nl.add(null, l);
- for (Object o : list) {
- Map map = (Map) o;
- for (Map.Entry entry : map.entrySet())
- nl.add(entry.getKey(), entry.getValue());
- }
- } else if (row instanceof Map) {
- Map map = (Map) row;
- for (Map.Entry entry : map.entrySet())
- nl.add(entry.getKey(), entry.getValue());
- }
- }
-
- @SuppressWarnings({"rawtypes"})
- DataSource wrapDs(final DataSource ds) {
- return new DataSource() {
- @Override
- public void init(Context context, Properties initProps) {
- ds.init(context, initProps);
- }
-
- @Override
- public void close() {
- ds.close();
- }
-
- @Override
- public Object getData(String query) {
- log(DIHLogLevels.ENTITY_META, "query", query);
- long start = System.nanoTime();
- try {
- return ds.getData(query);
- } catch (DataImportHandlerException de) {
- log(DIHLogLevels.ENTITY_EXCEPTION,
- null, de);
- throw de;
- } catch (Exception e) {
- log(DIHLogLevels.ENTITY_EXCEPTION,
- null, e);
- DataImportHandlerException de = new DataImportHandlerException(
- DataImportHandlerException.SEVERE, "", e);
- de.debugged = true;
- throw de;
- } finally {
- log(DIHLogLevels.ENTITY_META, "time-taken", DocBuilder
- .getTimeElapsedSince(start));
- }
- }
- };
- }
-
- Transformer wrapTransformer(final Transformer t) {
- return new Transformer() {
- @Override
- public Object transformRow(Map row, Context context) {
- log(DIHLogLevels.PRE_TRANSFORMER_ROW, null, row);
- String tName = getTransformerName(t);
- Object result = null;
- try {
- result = t.transformRow(row, context);
- log(DIHLogLevels.TRANSFORMED_ROW, tName, result);
- } catch (DataImportHandlerException de) {
- log(DIHLogLevels.TRANSFORMER_EXCEPTION, tName, de);
- de.debugged = true;
- throw de;
- } catch (Exception e) {
- log(DIHLogLevels.TRANSFORMER_EXCEPTION, tName, e);
- DataImportHandlerException de = new DataImportHandlerException(DataImportHandlerException.SEVERE, "", e);
- de.debugged = true;
- throw de;
- }
- return result;
- }
- };
- }
-
- public static String getStacktraceString(Exception e) {
- StringWriter sw = new StringWriter();
- e.printStackTrace(new PrintWriter(sw));
- return sw.toString();
- }
-
- static String getTransformerName(Transformer t) {
- @SuppressWarnings({"rawtypes"})
- Class transClass = t.getClass();
- if (t instanceof EntityProcessorWrapper.ReflectionTransformer) {
- return ((EntityProcessorWrapper.ReflectionTransformer) t).trans;
- }
- if (t instanceof ScriptTransformer) {
- ScriptTransformer scriptTransformer = (ScriptTransformer) t;
- return "script:" + scriptTransformer.getFunctionName();
- }
- if (transClass.getPackage().equals(DebugLogger.class.getPackage())) {
- return transClass.getSimpleName();
- } else {
- return transClass.getName();
- }
- }
-
- private static class DebugInfo {
- String name;
-
- int tCount, rowCount;
-
- @SuppressWarnings({"rawtypes"})
- NamedList lst;
-
- DIHLogLevels type;
-
- DebugInfo parent;
-
- @SuppressWarnings({"unchecked", "rawtypes"})
- public DebugInfo(String name, DIHLogLevels type, DebugInfo parent) {
- this.name = name;
- this.type = type;
- this.parent = parent;
- lst = new NamedList();
- if (parent != null) {
- String displayName = null;
- if (type == DIHLogLevels.START_ENTITY) {
- displayName = "entity:" + name;
- } else if (type == DIHLogLevels.TRANSFORMED_ROW
- || type == DIHLogLevels.TRANSFORMER_EXCEPTION) {
- displayName = "transformer:" + name;
- } else if (type == DIHLogLevels.START_DOC) {
- this.name = displayName = "document#" + SolrWriter.getDocCount();
- }
- parent.lst.add(displayName, lst);
- }
- }
- }
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java
deleted file mode 100644
index 8115695d40a..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java
+++ /dev/null
@@ -1,1020 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.util.IOUtils;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
-import org.apache.solr.handler.dataimport.config.DIHConfiguration;
-import org.apache.solr.handler.dataimport.config.Entity;
-import org.apache.solr.handler.dataimport.config.EntityField;
-
-import static org.apache.solr.handler.dataimport.SolrWriter.LAST_INDEX_KEY;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.lang.invoke.MethodHandles;
-import java.text.SimpleDateFormat;
-import java.util.*;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicLong;
-
-/**
- * {@link DocBuilder} is responsible for creating Solr documents out of the given configuration. It also maintains
- * statistics information. It depends on the {@link EntityProcessor} implementations to fetch data.
- *
- * This API is experimental and subject to change
- *
- * @since solr 1.3
- */
-public class DocBuilder {
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- private static final AtomicBoolean WARNED_ABOUT_INDEX_TIME_BOOSTS = new AtomicBoolean();
-
- private static final Date EPOCH = new Date(0);
- public static final String DELETE_DOC_BY_ID = "$deleteDocById";
- public static final String DELETE_DOC_BY_QUERY = "$deleteDocByQuery";
- public static final String DOC_BOOST = "$docBoost";
- public static final String SKIP_DOC = "$skipDoc";
- public static final String SKIP_ROW = "$skipRow";
-
- DataImporter dataImporter;
-
- private DIHConfiguration config;
-
- private EntityProcessorWrapper currentEntityProcessorWrapper;
-
- @SuppressWarnings({"unchecked", "rawtypes"})
- private Map statusMessages = Collections.synchronizedMap(new LinkedHashMap());
-
- public Statistics importStatistics = new Statistics();
-
- DIHWriter writer;
-
- boolean verboseDebug = false;
-
- Map session = new HashMap<>();
-
- static final ThreadLocal INSTANCE = new ThreadLocal<>();
- private Map persistedProperties;
-
- private DIHProperties propWriter;
- private DebugLogger debugLogger;
- private final RequestInfo reqParams;
-
- public DocBuilder(DataImporter dataImporter, DIHWriter solrWriter, DIHProperties propWriter, RequestInfo reqParams) {
- INSTANCE.set(this);
- this.dataImporter = dataImporter;
- this.reqParams = reqParams;
- this.propWriter = propWriter;
- DataImporter.QUERY_COUNT.set(importStatistics.queryCount);
- verboseDebug = reqParams.isDebug() && reqParams.getDebugInfo().verbose;
- persistedProperties = propWriter.readIndexerProperties();
-
- writer = solrWriter;
- ContextImpl ctx = new ContextImpl(null, null, null, null, reqParams.getRawParams(), null, this);
- if (writer != null) {
- writer.init(ctx);
- }
- }
-
-
- DebugLogger getDebugLogger(){
- if (debugLogger == null) {
- debugLogger = new DebugLogger();
- }
- return debugLogger;
- }
-
- private VariableResolver getVariableResolver() {
- try {
- VariableResolver resolver = null;
- String epoch = propWriter.convertDateToString(EPOCH);
- if(dataImporter != null && dataImporter.getCore() != null
- && dataImporter.getCore().getCoreDescriptor().getSubstitutableProperties() != null){
- resolver = new VariableResolver(dataImporter.getCore().getCoreDescriptor().getSubstitutableProperties());
- } else {
- resolver = new VariableResolver();
- }
- resolver.setEvaluators(dataImporter.getEvaluators());
- Map indexerNamespace = new HashMap<>();
- if (persistedProperties.get(LAST_INDEX_TIME) != null) {
- indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.get(LAST_INDEX_TIME));
- } else {
- // set epoch
- indexerNamespace.put(LAST_INDEX_TIME, epoch);
- }
- indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime());
- indexerNamespace.put("request", new HashMap<>(reqParams.getRawParams()));
- indexerNamespace.put("handlerName", dataImporter.getHandlerName());
- for (Entity entity : dataImporter.getConfig().getEntities()) {
- Map entityNamespace = new HashMap<>();
- String key = SolrWriter.LAST_INDEX_KEY;
- Object lastIndex = persistedProperties.get(entity.getName() + "." + key);
- if (lastIndex != null) {
- entityNamespace.put(SolrWriter.LAST_INDEX_KEY, lastIndex);
- } else {
- entityNamespace.put(SolrWriter.LAST_INDEX_KEY, epoch);
- }
- indexerNamespace.put(entity.getName(), entityNamespace);
- }
- resolver.addNamespace(ConfigNameConstants.IMPORTER_NS_SHORT, indexerNamespace);
- resolver.addNamespace(ConfigNameConstants.IMPORTER_NS, indexerNamespace);
- return resolver;
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e);
- // unreachable statement
- return null;
- }
- }
-
- private void invokeEventListener(String className) {
- invokeEventListener(className, null);
- }
-
-
- private void invokeEventListener(String className, Exception lastException) {
- try {
- @SuppressWarnings({"unchecked"})
- EventListener listener = (EventListener) loadClass(className, dataImporter.getCore()).getConstructor().newInstance();
- notifyListener(listener, lastException);
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e, "Unable to load class : " + className);
- }
- }
-
- private void notifyListener(EventListener listener, Exception lastException) {
- String currentProcess;
- if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP) {
- currentProcess = Context.DELTA_DUMP;
- } else {
- currentProcess = Context.FULL_DUMP;
- }
- ContextImpl ctx = new ContextImpl(null, getVariableResolver(), null, currentProcess, session, null, this);
- ctx.setLastException(lastException);
- listener.onEvent(ctx);
- }
-
- @SuppressWarnings("unchecked")
- public void execute() {
- List epwList = null;
- try {
- dataImporter.store(DataImporter.STATUS_MSGS, statusMessages);
- config = dataImporter.getConfig();
- final AtomicLong startTime = new AtomicLong(System.nanoTime());
- statusMessages.put(TIME_ELAPSED, new Object() {
- @Override
- public String toString() {
- return getTimeElapsedSince(startTime.get());
- }
- });
-
- statusMessages.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED,
- importStatistics.queryCount);
- statusMessages.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED,
- importStatistics.rowsCount);
- statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED,
- importStatistics.docCount);
- statusMessages.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED,
- importStatistics.skipDocCount);
-
- List entities = reqParams.getEntitiesToRun();
-
- // Trigger onImportStart
- if (config.getOnImportStart() != null) {
- invokeEventListener(config.getOnImportStart());
- }
- AtomicBoolean fullCleanDone = new AtomicBoolean(false);
- //we must not do a delete of *:* multiple times if there are multiple root entities to be run
- Map lastIndexTimeProps = new HashMap<>();
- lastIndexTimeProps.put(LAST_INDEX_KEY, dataImporter.getIndexStartTime());
-
- epwList = new ArrayList<>(config.getEntities().size());
- for (Entity e : config.getEntities()) {
- epwList.add(getEntityProcessorWrapper(e));
- }
- for (EntityProcessorWrapper epw : epwList) {
- if (entities != null && !entities.contains(epw.getEntity().getName()))
- continue;
- lastIndexTimeProps.put(epw.getEntity().getName() + "." + LAST_INDEX_KEY, propWriter.getCurrentTimestamp());
- currentEntityProcessorWrapper = epw;
- String delQuery = epw.getEntity().getAllAttributes().get("preImportDeleteQuery");
- if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP) {
- cleanByQuery(delQuery, fullCleanDone);
- doDelta();
- delQuery = epw.getEntity().getAllAttributes().get("postImportDeleteQuery");
- if (delQuery != null) {
- fullCleanDone.set(false);
- cleanByQuery(delQuery, fullCleanDone);
- }
- } else {
- cleanByQuery(delQuery, fullCleanDone);
- doFullDump();
- delQuery = epw.getEntity().getAllAttributes().get("postImportDeleteQuery");
- if (delQuery != null) {
- fullCleanDone.set(false);
- cleanByQuery(delQuery, fullCleanDone);
- }
- }
- }
-
- if (stop.get()) {
- // Dont commit if aborted using command=abort
- statusMessages.put("Aborted", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).format(new Date()));
- handleError("Aborted", null);
- } else {
- // Do not commit unnecessarily if this is a delta-import and no documents were created or deleted
- if (!reqParams.isClean()) {
- if (importStatistics.docCount.get() > 0 || importStatistics.deletedDocCount.get() > 0) {
- finish(lastIndexTimeProps);
- }
- } else {
- // Finished operation normally, commit now
- finish(lastIndexTimeProps);
- }
-
- if (config.getOnImportEnd() != null) {
- invokeEventListener(config.getOnImportEnd());
- }
- }
-
- statusMessages.remove(TIME_ELAPSED);
- statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, ""+ importStatistics.docCount.get());
- if(importStatistics.failedDocCount.get() > 0)
- statusMessages.put(DataImporter.MSG.TOTAL_FAILED_DOCS, ""+ importStatistics.failedDocCount.get());
-
- statusMessages.put("Time taken", getTimeElapsedSince(startTime.get()));
- if (log.isInfoEnabled()) {
- log.info("Time taken = {}", getTimeElapsedSince(startTime.get()));
- }
- } catch(Exception e)
- {
- throw new RuntimeException(e);
- } finally {
- // Cannot use IOUtils.closeQuietly since DIH relies on exceptions bubbling out of writer.close() to indicate
- // success/failure of the run.
- RuntimeException raisedDuringClose = null;
- try {
- if (writer != null) {
- writer.close();
- }
- } catch (RuntimeException e) {
- if (log.isWarnEnabled()) {
- log.warn("Exception encountered while closing DIHWriter " + writer + "; temporarily suppressing to ensure other DocBuilder elements are closed", e); // logOk
- }
- raisedDuringClose = e;
- }
-
- if (epwList != null) {
- closeEntityProcessorWrappers(epwList);
- }
- if(reqParams.isDebug()) {
- reqParams.getDebugInfo().debugVerboseOutput = getDebugLogger().output;
- }
-
- if (raisedDuringClose != null) {
- throw raisedDuringClose;
- }
- }
- }
- private void closeEntityProcessorWrappers(List epwList) {
- for(EntityProcessorWrapper epw : epwList) {
- IOUtils.closeQuietly(epw);
-
- if(epw.getDatasource() != null) {
- IOUtils.closeQuietly(epw.getDatasource());
- }
- closeEntityProcessorWrappers(epw.getChildren());
- }
- }
-
- @SuppressWarnings("unchecked")
- private void finish(Map lastIndexTimeProps) {
- log.info("Import completed successfully");
- statusMessages.put("", "Indexing completed. Added/Updated: "
- + importStatistics.docCount + " documents. Deleted "
- + importStatistics.deletedDocCount + " documents.");
- if(reqParams.isCommit()) {
- writer.commit(reqParams.isOptimize());
- addStatusMessage("Committed");
- if (reqParams.isOptimize())
- addStatusMessage("Optimized");
- }
- try {
- propWriter.persist(lastIndexTimeProps);
- } catch (Exception e) {
- log.error("Could not write property file", e);
- statusMessages.put("error", "Could not write property file. Delta imports will not work. " +
- "Make sure your conf directory is writable");
- }
- }
-
- @SuppressWarnings({"unchecked"})
- void handleError(String message, Exception e) {
- if (!dataImporter.getCore().getCoreContainer().isZooKeeperAware()) {
- writer.rollback();
- }
-
- statusMessages.put(message, "Indexing error");
- addStatusMessage(message);
- if ((config != null) && (config.getOnError() != null)) {
- invokeEventListener(config.getOnError(), e);
- }
- }
-
- private void doFullDump() {
- addStatusMessage("Full Dump Started");
- buildDocument(getVariableResolver(), null, null, currentEntityProcessorWrapper, true, null);
- }
-
- @SuppressWarnings("unchecked")
- private void doDelta() {
- addStatusMessage("Delta Dump started");
- VariableResolver resolver = getVariableResolver();
-
- if (config.getDeleteQuery() != null) {
- writer.deleteByQuery(config.getDeleteQuery());
- }
-
- addStatusMessage("Identifying Delta");
- log.info("Starting delta collection.");
- Set> deletedKeys = new HashSet<>();
- Set> allPks = collectDelta(currentEntityProcessorWrapper, resolver, deletedKeys);
- if (stop.get())
- return;
- addStatusMessage("Deltas Obtained");
- addStatusMessage("Building documents");
- if (!deletedKeys.isEmpty()) {
- allPks.removeAll(deletedKeys);
- deleteAll(deletedKeys);
- // Make sure that documents are not re-created
- }
- deletedKeys = null;
- writer.setDeltaKeys(allPks);
-
- statusMessages.put("Total Changed Documents", allPks.size());
- VariableResolver vri = getVariableResolver();
- Iterator> pkIter = allPks.iterator();
- while (pkIter.hasNext()) {
- Map map = pkIter.next();
- vri.addNamespace(ConfigNameConstants.IMPORTER_NS_SHORT + ".delta", map);
- buildDocument(vri, null, map, currentEntityProcessorWrapper, true, null);
- pkIter.remove();
- // check for abort
- if (stop.get())
- break;
- }
-
- if (!stop.get()) {
- log.info("Delta Import completed successfully");
- }
- }
-
- private void deleteAll(Set> deletedKeys) {
- log.info("Deleting stale documents ");
- Iterator> iter = deletedKeys.iterator();
- while (iter.hasNext()) {
- Map map = iter.next();
- String keyName = currentEntityProcessorWrapper.getEntity().isDocRoot() ? currentEntityProcessorWrapper.getEntity().getPk() : currentEntityProcessorWrapper.getEntity().getSchemaPk();
- Object key = map.get(keyName);
- if(key == null) {
- keyName = findMatchingPkColumn(keyName, map);
- key = map.get(keyName);
- }
- if(key == null) {
- log.warn("no key was available for deleted pk query. keyName = {}", keyName);
- continue;
- }
- writer.deleteDoc(key);
- importStatistics.deletedDocCount.incrementAndGet();
- iter.remove();
- }
- }
-
- @SuppressWarnings("unchecked")
- public void addStatusMessage(String msg) {
- statusMessages.put(msg, new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).format(new Date()));
- }
-
- private void resetEntity(EntityProcessorWrapper epw) {
- epw.setInitialized(false);
- for (EntityProcessorWrapper child : epw.getChildren()) {
- resetEntity(child);
- }
-
- }
-
- private void buildDocument(VariableResolver vr, DocWrapper doc,
- Map pk, EntityProcessorWrapper epw, boolean isRoot,
- ContextImpl parentCtx) {
- List entitiesToDestroy = new ArrayList<>();
- try {
- buildDocument(vr, doc, pk, epw, isRoot, parentCtx, entitiesToDestroy);
- } catch (Exception e) {
- throw new RuntimeException(e);
- } finally {
- for (EntityProcessorWrapper entityWrapper : entitiesToDestroy) {
- entityWrapper.destroy();
- }
- resetEntity(epw);
- }
- }
-
- @SuppressWarnings("unchecked")
- private void buildDocument(VariableResolver vr, DocWrapper doc,
- Map pk, EntityProcessorWrapper epw, boolean isRoot,
- ContextImpl parentCtx, List entitiesToDestroy) {
-
- ContextImpl ctx = new ContextImpl(epw, vr, null,
- pk == null ? Context.FULL_DUMP : Context.DELTA_DUMP,
- session, parentCtx, this);
- epw.init(ctx);
- if (!epw.isInitialized()) {
- entitiesToDestroy.add(epw);
- epw.setInitialized(true);
- }
-
- if (reqParams.getStart() > 0) {
- getDebugLogger().log(DIHLogLevels.DISABLE_LOGGING, null, null);
- }
-
- if (verboseDebug) {
- getDebugLogger().log(DIHLogLevels.START_ENTITY, epw.getEntity().getName(), null);
- }
-
- int seenDocCount = 0;
-
- try {
- while (true) {
- if (stop.get())
- return;
- if(importStatistics.docCount.get() > (reqParams.getStart() + reqParams.getRows())) break;
- try {
- seenDocCount++;
-
- if (seenDocCount > reqParams.getStart()) {
- getDebugLogger().log(DIHLogLevels.ENABLE_LOGGING, null, null);
- }
-
- if (verboseDebug && epw.getEntity().isDocRoot()) {
- getDebugLogger().log(DIHLogLevels.START_DOC, epw.getEntity().getName(), null);
- }
- if (doc == null && epw.getEntity().isDocRoot()) {
- doc = new DocWrapper();
- ctx.setDoc(doc);
- Entity e = epw.getEntity();
- while (e.getParentEntity() != null) {
- addFields(e.getParentEntity(), doc, (Map) vr
- .resolve(e.getParentEntity().getName()), vr);
- e = e.getParentEntity();
- }
- }
-
- Map arow = epw.nextRow();
- if (arow == null) {
- break;
- }
-
- // Support for start parameter in debug mode
- if (epw.getEntity().isDocRoot()) {
- if (seenDocCount <= reqParams.getStart())
- continue;
- if (seenDocCount > reqParams.getStart() + reqParams.getRows()) {
- log.info("Indexing stopped at docCount = {}", importStatistics.docCount);
- break;
- }
- }
-
- if (verboseDebug) {
- getDebugLogger().log(DIHLogLevels.ENTITY_OUT, epw.getEntity().getName(), arow);
- }
- importStatistics.rowsCount.incrementAndGet();
-
- DocWrapper childDoc = null;
- if (doc != null) {
- if (epw.getEntity().isChild()) {
- childDoc = new DocWrapper();
- handleSpecialCommands(arow, childDoc);
- addFields(epw.getEntity(), childDoc, arow, vr);
- doc.addChildDocument(childDoc);
- } else {
- handleSpecialCommands(arow, doc);
- vr.addNamespace(epw.getEntity().getName(), arow);
- addFields(epw.getEntity(), doc, arow, vr);
- vr.removeNamespace(epw.getEntity().getName());
- }
- }
- if (epw.getEntity().getChildren() != null) {
- vr.addNamespace(epw.getEntity().getName(), arow);
- for (EntityProcessorWrapper child : epw.getChildren()) {
- if (childDoc != null) {
- buildDocument(vr, childDoc,
- child.getEntity().isDocRoot() ? pk : null, child, false, ctx, entitiesToDestroy);
- } else {
- buildDocument(vr, doc,
- child.getEntity().isDocRoot() ? pk : null, child, false, ctx, entitiesToDestroy);
- }
- }
- vr.removeNamespace(epw.getEntity().getName());
- }
- if (epw.getEntity().isDocRoot()) {
- if (stop.get())
- return;
- if (!doc.isEmpty()) {
- boolean result = writer.upload(doc);
- if(reqParams.isDebug()) {
- reqParams.getDebugInfo().debugDocuments.add(doc);
- }
- doc = null;
- if (result){
- importStatistics.docCount.incrementAndGet();
- } else {
- importStatistics.failedDocCount.incrementAndGet();
- }
- }
- }
- } catch (DataImportHandlerException e) {
- if (verboseDebug) {
- getDebugLogger().log(DIHLogLevels.ENTITY_EXCEPTION, epw.getEntity().getName(), e);
- }
- if(e.getErrCode() == DataImportHandlerException.SKIP_ROW){
- continue;
- }
- if (isRoot) {
- if (e.getErrCode() == DataImportHandlerException.SKIP) {
- importStatistics.skipDocCount.getAndIncrement();
- doc = null;
- } else {
- SolrException.log(log, "Exception while processing: "
- + epw.getEntity().getName() + " document : " + doc, e);
- }
- if (e.getErrCode() == DataImportHandlerException.SEVERE)
- throw e;
- } else
- throw e;
- } catch (Exception t) {
- if (verboseDebug) {
- getDebugLogger().log(DIHLogLevels.ENTITY_EXCEPTION, epw.getEntity().getName(), t);
- }
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE, t);
- } finally {
- if (verboseDebug) {
- getDebugLogger().log(DIHLogLevels.ROW_END, epw.getEntity().getName(), null);
- if (epw.getEntity().isDocRoot())
- getDebugLogger().log(DIHLogLevels.END_DOC, null, null);
- }
- }
- }
- } finally {
- if (verboseDebug) {
- getDebugLogger().log(DIHLogLevels.END_ENTITY, null, null);
- }
- }
- }
-
- static class DocWrapper extends SolrInputDocument {
- //final SolrInputDocument solrDocument = new SolrInputDocument();
- Map session;
-
- public void setSessionAttribute(String key, Object val){
- if(session == null) session = new HashMap<>();
- session.put(key, val);
- }
-
- public Object getSessionAttribute(String key) {
- return session == null ? null : session.get(key);
- }
- }
-
- private void handleSpecialCommands(Map arow, DocWrapper doc) {
- Object value = arow.get(DELETE_DOC_BY_ID);
- if (value != null) {
- if (value instanceof Collection) {
- @SuppressWarnings({"rawtypes"})
- Collection collection = (Collection) value;
- for (Object o : collection) {
- writer.deleteDoc(o.toString());
- importStatistics.deletedDocCount.incrementAndGet();
- }
- } else {
- writer.deleteDoc(value);
- importStatistics.deletedDocCount.incrementAndGet();
- }
- }
- value = arow.get(DELETE_DOC_BY_QUERY);
- if (value != null) {
- if (value instanceof Collection) {
- @SuppressWarnings({"rawtypes"})
- Collection collection = (Collection) value;
- for (Object o : collection) {
- writer.deleteByQuery(o.toString());
- importStatistics.deletedDocCount.incrementAndGet();
- }
- } else {
- writer.deleteByQuery(value.toString());
- importStatistics.deletedDocCount.incrementAndGet();
- }
- }
- value = arow.get(DOC_BOOST);
- if (value != null) {
- String message = "Ignoring document boost: " + value + " as index-time boosts are not supported anymore";
- if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
- log.warn(message);
- } else {
- log.debug(message);
- }
- }
-
- value = arow.get(SKIP_DOC);
- if (value != null) {
- if (Boolean.parseBoolean(value.toString())) {
- throw new DataImportHandlerException(DataImportHandlerException.SKIP,
- "Document skipped :" + arow);
- }
- }
-
- value = arow.get(SKIP_ROW);
- if (value != null) {
- if (Boolean.parseBoolean(value.toString())) {
- throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW);
- }
- }
- }
-
- @SuppressWarnings("unchecked")
- private void addFields(Entity entity, DocWrapper doc,
- Map arow, VariableResolver vr) {
- for (Map.Entry entry : arow.entrySet()) {
- String key = entry.getKey();
- Object value = entry.getValue();
- if (value == null) continue;
- if (key.startsWith("$")) continue;
- Set field = entity.getColNameVsField().get(key);
- IndexSchema schema = null == reqParams.getRequest() ? null : reqParams.getRequest().getSchema();
- if (field == null && schema != null) {
- // This can be a dynamic field or a field which does not have an entry in data-config ( an implicit field)
- SchemaField sf = schema.getFieldOrNull(key);
- if (sf == null) {
- sf = config.getSchemaField(key);
- }
- if (sf != null) {
- addFieldToDoc(entry.getValue(), sf.getName(), sf.multiValued(), doc);
- }
- //else do nothing. if we add it it may fail
- } else {
- if (field != null) {
- for (EntityField f : field) {
- String name = f.getName();
- boolean multiValued = f.isMultiValued();
- boolean toWrite = f.isToWrite();
- if(f.isDynamicName()){
- name = vr.replaceTokens(name);
- SchemaField schemaField = config.getSchemaField(name);
- if(schemaField == null) {
- toWrite = false;
- } else {
- multiValued = schemaField.multiValued();
- toWrite = true;
- }
- }
- if (toWrite) {
- addFieldToDoc(entry.getValue(), name, multiValued, doc);
- }
- }
- }
- }
- }
- }
-
- private void addFieldToDoc(Object value, String name, boolean multiValued, DocWrapper doc) {
- if (value instanceof Collection) {
- @SuppressWarnings({"rawtypes"})
- Collection collection = (Collection) value;
- if (multiValued) {
- for (Object o : collection) {
- if (o != null)
- doc.addField(name, o);
- }
- } else {
- if (doc.getField(name) == null)
- for (Object o : collection) {
- if (o != null) {
- doc.addField(name, o);
- break;
- }
- }
- }
- } else if (multiValued) {
- if (value != null) {
- doc.addField(name, value);
- }
- } else {
- if (doc.getField(name) == null && value != null)
- doc.addField(name, value);
- }
- }
-
- @SuppressWarnings({"unchecked"})
- public EntityProcessorWrapper getEntityProcessorWrapper(Entity entity) {
- EntityProcessor entityProcessor = null;
- if (entity.getProcessorName() == null) {
- entityProcessor = new SqlEntityProcessor();
- } else {
- try {
- entityProcessor = (EntityProcessor) loadClass(entity.getProcessorName(), dataImporter.getCore())
- .getConstructor().newInstance();
- } catch (Exception e) {
- wrapAndThrow (SEVERE,e,
- "Unable to load EntityProcessor implementation for entity:" + entity.getName());
- }
- }
- EntityProcessorWrapper epw = new EntityProcessorWrapper(entityProcessor, entity, this);
- for(Entity e1 : entity.getChildren()) {
- epw.getChildren().add(getEntityProcessorWrapper(e1));
- }
-
- return epw;
- }
-
- private String findMatchingPkColumn(String pk, Map row) {
- if (row.containsKey(pk)) {
- throw new IllegalArgumentException(String.format(Locale.ROOT,
- "deltaQuery returned a row with null for primary key %s", pk));
- }
- String resolvedPk = null;
- for (String columnName : row.keySet()) {
- if (columnName.endsWith("." + pk) || pk.endsWith("." + columnName)) {
- if (resolvedPk != null)
- throw new IllegalArgumentException(
- String.format(Locale.ROOT,
- "deltaQuery has more than one column (%s and %s) that might resolve to declared primary key pk='%s'",
- resolvedPk, columnName, pk));
- resolvedPk = columnName;
- }
- }
- if (resolvedPk == null) {
- throw new IllegalArgumentException(
- String
- .format(
- Locale.ROOT,
- "deltaQuery has no column to resolve to declared primary key pk='%s'",
- pk));
- }
- if (log.isInfoEnabled()) {
- log.info(String.format(Locale.ROOT,
- "Resolving deltaQuery column '%s' to match entity's declared pk '%s'",
- resolvedPk, pk));
- }
- return resolvedPk;
- }
-
- /**
- * Collects unique keys of all Solr documents for whom one or more source tables have been changed since the last
- * indexed time.
Note: In our definition, unique key of Solr document is the primary key of the top level
- * entity (unless skipped using docRoot=false) in the Solr document in data-config.xml
- *
- * @return an iterator to the list of keys for which Solr documents should be updated.
- */
- @SuppressWarnings({"unchecked", "rawtypes"})
- public Set> collectDelta(EntityProcessorWrapper epw, VariableResolver resolver,
- Set> deletedRows) {
- //someone called abort
- if (stop.get())
- return new HashSet();
-
- ContextImpl context1 = new ContextImpl(epw, resolver, null, Context.FIND_DELTA, session, null, this);
- epw.init(context1);
-
- Set> myModifiedPks = new HashSet<>();
-
-
-
- for (EntityProcessorWrapper childEpw : epw.getChildren()) {
- //this ensures that we start from the leaf nodes
- myModifiedPks.addAll(collectDelta(childEpw, resolver, deletedRows));
- //someone called abort
- if (stop.get())
- return new HashSet();
- }
-
- // identifying the modified rows for this entity
- Map> deltaSet = new HashMap<>();
- if (log.isInfoEnabled()) {
- log.info("Running ModifiedRowKey() for Entity: {}", epw.getEntity().getName());
- }
- //get the modified rows in this entity
- String pk = epw.getEntity().getPk();
- while (true) {
- Map row = epw.nextModifiedRowKey();
-
- if (row == null)
- break;
-
- Object pkValue = row.get(pk);
- if (pkValue == null) {
- pk = findMatchingPkColumn(pk, row);
- pkValue = row.get(pk);
- }
-
- deltaSet.put(pkValue.toString(), row);
- importStatistics.rowsCount.incrementAndGet();
- // check for abort
- if (stop.get())
- return new HashSet();
- }
- //get the deleted rows for this entity
- Set> deletedSet = new HashSet<>();
- while (true) {
- Map row = epw.nextDeletedRowKey();
- if (row == null)
- break;
-
- deletedSet.add(row);
-
- Object pkValue = row.get(pk);
- if (pkValue == null) {
- pk = findMatchingPkColumn(pk, row);
- pkValue = row.get(pk);
- }
-
- // Remove deleted rows from the delta rows
- String deletedRowPk = pkValue.toString();
- if (deltaSet.containsKey(deletedRowPk)) {
- deltaSet.remove(deletedRowPk);
- }
-
- importStatistics.rowsCount.incrementAndGet();
- // check for abort
- if (stop.get())
- return new HashSet();
- }
-
- if (log.isInfoEnabled()) {
- log.info("Completed ModifiedRowKey for Entity: {} rows obtained: {}", epw.getEntity().getName(), deltaSet.size());
- log.info("Completed DeletedRowKey for Entity: {} rows obtained : {}", epw.getEntity().getName(), deletedSet.size()); // logOk
- }
-
- myModifiedPks.addAll(deltaSet.values());
- Set> parentKeyList = new HashSet<>();
- //all that we have captured is useless (in a sub-entity) if no rows in the parent is modified because of these
- //propogate up the changes in the chain
- if (epw.getEntity().getParentEntity() != null) {
- // identifying deleted rows with deltas
-
- for (Map row : myModifiedPks) {
- resolver.addNamespace(epw.getEntity().getName(), row);
- getModifiedParentRows(resolver, epw.getEntity().getName(), epw, parentKeyList);
- // check for abort
- if (stop.get())
- return new HashSet();
- }
- // running the same for deletedrows
- for (Map row : deletedSet) {
- resolver.addNamespace(epw.getEntity().getName(), row);
- getModifiedParentRows(resolver, epw.getEntity().getName(), epw, parentKeyList);
- // check for abort
- if (stop.get())
- return new HashSet();
- }
- }
- if (log.isInfoEnabled()) {
- log.info("Completed parentDeltaQuery for Entity: {}", epw.getEntity().getName());
- }
- if (epw.getEntity().isDocRoot())
- deletedRows.addAll(deletedSet);
-
- // Do not use entity.isDocRoot here because one of descendant entities may set rootEntity="true"
- return epw.getEntity().getParentEntity() == null ?
- myModifiedPks : new HashSet<>(parentKeyList);
- }
-
- private void getModifiedParentRows(VariableResolver resolver,
- String entity, EntityProcessor entityProcessor,
- Set> parentKeyList) {
- try {
- while (true) {
- Map parentRow = entityProcessor
- .nextModifiedParentRowKey();
- if (parentRow == null)
- break;
-
- parentKeyList.add(parentRow);
- importStatistics.rowsCount.incrementAndGet();
- // check for abort
- if (stop.get())
- return;
- }
-
- } finally {
- resolver.removeNamespace(entity);
- }
- }
-
- public void abort() {
- stop.set(true);
- }
-
- private AtomicBoolean stop = new AtomicBoolean(false);
-
- public static final String TIME_ELAPSED = "Time Elapsed";
-
- static String getTimeElapsedSince(long l) {
- l = TimeUnit.MILLISECONDS.convert(System.nanoTime() - l, TimeUnit.NANOSECONDS);
- return (l / (60000 * 60)) + ":" + (l / 60000) % 60 + ":" + (l / 1000)
- % 60 + "." + l % 1000;
- }
-
- public RequestInfo getReqParams() {
- return reqParams;
- }
-
- @SuppressWarnings({"unchecked", "rawtypes"})
- static Class loadClass(String name, SolrCore core) throws ClassNotFoundException {
- try {
- return core != null ?
- core.getResourceLoader().findClass(name, Object.class) :
- Class.forName(name);
- } catch (Exception e) {
- try {
- String n = DocBuilder.class.getPackage().getName() + "." + name;
- return core != null ?
- core.getResourceLoader().findClass(n, Object.class) :
- Class.forName(n);
- } catch (Exception e1) {
- throw new ClassNotFoundException("Unable to load " + name + " or " + DocBuilder.class.getPackage().getName() + "." + name, e);
- }
- }
- }
-
- public static class Statistics {
- public AtomicLong docCount = new AtomicLong();
-
- public AtomicLong deletedDocCount = new AtomicLong();
-
- public AtomicLong failedDocCount = new AtomicLong();
-
- public AtomicLong rowsCount = new AtomicLong();
-
- public AtomicLong queryCount = new AtomicLong();
-
- public AtomicLong skipDocCount = new AtomicLong();
-
- public Statistics add(Statistics stats) {
- this.docCount.addAndGet(stats.docCount.get());
- this.deletedDocCount.addAndGet(stats.deletedDocCount.get());
- this.rowsCount.addAndGet(stats.rowsCount.get());
- this.queryCount.addAndGet(stats.queryCount.get());
-
- return this;
- }
-
- public Map getStatsSnapshot() {
- Map result = new HashMap<>();
- result.put("docCount", docCount.get());
- result.put("deletedDocCount", deletedDocCount.get());
- result.put("rowCount", rowsCount.get());
- result.put("queryCount", rowsCount.get());
- result.put("skipDocCount", skipDocCount.get());
- return result;
- }
-
- }
-
- private void cleanByQuery(String delQuery, AtomicBoolean completeCleanDone) {
- delQuery = getVariableResolver().replaceTokens(delQuery);
- if (reqParams.isClean()) {
- if (delQuery == null && !completeCleanDone.get()) {
- writer.doDeleteAll();
- completeCleanDone.set(true);
- } else if (delQuery != null) {
- writer.deleteByQuery(delQuery);
- }
- }
- }
-
- public static final String LAST_INDEX_TIME = "last_index_time";
- public static final String INDEX_START_TIME = "index_start_time";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessor.java
deleted file mode 100644
index 7ded623486e..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessor.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.io.Closeable;
-import java.util.Map;
-
-/**
- *
- * An instance of entity processor serves an entity. It is reused throughout the
- * import process.
- *
- *
- * Implementations of this abstract class must provide a public no-args constructor.
- *
- *
- * Refer to http://wiki.apache.org/solr/DataImportHandler
- * for more details.
- *
- *
- * This API is experimental and may change in the future.
- *
- * @since solr 1.3
- */
-public abstract class EntityProcessor implements Closeable {
-
- /**
- * This method is called when it starts processing an entity. When it comes
- * back to the entity it is called again. So it can reset anything at that point.
- * For a rootmost entity this is called only once for an ingestion. For sub-entities , this
- * is called multiple once for each row from its parent entity
- *
- * @param context The current context
- */
- public abstract void init(Context context);
-
- /**
- * This method helps streaming the data for each row . The implementation
- * would fetch as many rows as needed and gives one 'row' at a time. Only this
- * method is used during a full import
- *
- * @return A 'row'. The 'key' for the map is the column name and the 'value'
- * is the value of that column. If there are no more rows to be
- * returned, return 'null'
- */
- public abstract Map nextRow();
-
- /**
- * This is used for delta-import. It gives the pks of the changed rows in this
- * entity
- *
- * @return the pk vs value of all changed rows
- */
- public abstract Map nextModifiedRowKey();
-
- /**
- * This is used during delta-import. It gives the primary keys of the rows
- * that are deleted from this entity. If this entity is the root entity, solr
- * document is deleted. If this is a sub-entity, the Solr document is
- * considered as 'changed' and will be recreated
- *
- * @return the pk vs value of all changed rows
- */
- public abstract Map nextDeletedRowKey();
-
- /**
- * This is used during delta-import. This gives the primary keys and their
- * values of all the rows changed in a parent entity due to changes in this
- * entity.
- *
- * @return the pk vs value of all changed rows in the parent entity
- */
- public abstract Map nextModifiedParentRowKey();
-
- /**
- * Invoked for each entity at the very end of the import to do any needed cleanup tasks.
- *
- */
- public abstract void destroy();
-
- /**
- * Invoked after the transformers are invoked. EntityProcessors can add, remove or modify values
- * added by Transformers in this method.
- *
- * @param r The transformed row
- * @since solr 1.4
- */
- public void postTransform(Map r) {
- }
-
- /**
- * Invoked when the Entity processor is destroyed towards the end of import.
- *
- * @since solr 1.4
- */
- public void close() {
- //no-op
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
deleted file mode 100644
index 8311f362840..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.SolrException;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.lang.invoke.MethodHandles;
-import java.util.*;
-
-/**
- * Base class for all implementations of {@link EntityProcessor}
Most implementations of {@link EntityProcessor}
- * extend this base class which provides common functionality.
- *
- * This API is experimental and subject to change
- *
- * @since solr 1.3
- */
-public class EntityProcessorBase extends EntityProcessor {
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- protected boolean isFirstInit = true;
-
- protected String entityName;
-
- protected Context context;
-
- protected Iterator> rowIterator;
-
- protected String query;
-
- protected String onError = ABORT;
-
- protected DIHCacheSupport cacheSupport = null;
-
- private Zipper zipper;
-
-
- @Override
- public void init(Context context) {
- this.context = context;
- if (isFirstInit) {
- firstInit(context);
- }
- if(zipper!=null){
- zipper.onNewParent(context);
- }else{
- if(cacheSupport!=null) {
- cacheSupport.initNewParent(context);
- }
- }
- }
-
- /**
- * first time init call. do one-time operations here
- * it's necessary to call it from the overridden method,
- * otherwise it throws NPE on accessing zipper from nextRow()
- */
- protected void firstInit(Context context) {
- entityName = context.getEntityAttribute("name");
- String s = context.getEntityAttribute(ON_ERROR);
- if (s != null) onError = s;
-
- zipper = Zipper.createOrNull(context);
-
- if(zipper==null){
- initCache(context);
- }
- isFirstInit = false;
- }
-
- protected void initCache(Context context) {
- String cacheImplName = context
- .getResolvedEntityAttribute(DIHCacheSupport.CACHE_IMPL);
-
- if (cacheImplName != null ) {
- cacheSupport = new DIHCacheSupport(context, cacheImplName);
- }
- }
-
- @Override
- public Map nextModifiedRowKey() {
- return null;
- }
-
- @Override
- public Map nextDeletedRowKey() {
- return null;
- }
-
- @Override
- public Map nextModifiedParentRowKey() {
- return null;
- }
-
- /**
- * For a simple implementation, this is the only method that the sub-class should implement. This is intended to
- * stream rows one-by-one. Return null to signal end of rows
- *
- * @return a row where the key is the name of the field and value can be any Object or a Collection of objects. Return
- * null to signal end of rows
- */
- @Override
- public Map nextRow() {
- return null;// do not do anything
- }
-
- protected Map getNext() {
- if(zipper!=null){
- return zipper.supplyNextChild(rowIterator);
- }else{
- if(cacheSupport==null) {
- try {
- if (rowIterator == null)
- return null;
- if (rowIterator.hasNext())
- return rowIterator.next();
- query = null;
- rowIterator = null;
- return null;
- } catch (Exception e) {
- SolrException.log(log, "getNext() failed for query '" + query + "'", e);
- query = null;
- rowIterator = null;
- wrapAndThrow(DataImportHandlerException.WARN, e);
- return null;
- }
- } else {
- return cacheSupport.getCacheData(context, query, rowIterator);
- }
- }
- }
-
-
- @Override
- public void destroy() {
- query = null;
- if(cacheSupport!=null){
- cacheSupport.destroyAll();
- }
- cacheSupport = null;
- }
-
-
-
- public static final String TRANSFORMER = "transformer";
-
- public static final String TRANSFORM_ROW = "transformRow";
-
- public static final String ON_ERROR = "onError";
-
- public static final String ABORT = "abort";
-
- public static final String CONTINUE = "continue";
-
- public static final String SKIP = "skip";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java
deleted file mode 100644
index 6c106bd3617..00000000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
-import org.apache.solr.handler.dataimport.config.Entity;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
-import static org.apache.solr.handler.dataimport.EntityProcessorBase.*;
-import static org.apache.solr.handler.dataimport.EntityProcessorBase.SKIP;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.lang.invoke.MethodHandles;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-
-/**
- * A Wrapper over {@link EntityProcessor} instance which performs transforms and handles multi-row outputs correctly.
- *
- * @since solr 1.4
- */
-public class EntityProcessorWrapper extends EntityProcessor {
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- private EntityProcessor delegate;
- private Entity entity;
- @SuppressWarnings({"rawtypes"})
- private DataSource datasource;
- private List children = new ArrayList<>();
- private DocBuilder docBuilder;
- private boolean initialized;
- private String onError;
- private Context context;
- private VariableResolver resolver;
- private String entityName;
-
- protected List transformers;
-
- protected List> rowcache;
-
- public EntityProcessorWrapper(EntityProcessor delegate, Entity entity, DocBuilder docBuilder) {
- this.delegate = delegate;
- this.entity = entity;
- this.docBuilder = docBuilder;
- }
-
- @Override
- public void init(Context context) {
- rowcache = null;
- this.context = context;
- resolver = context.getVariableResolver();
- if (entityName == null) {
- onError = resolver.replaceTokens(context.getEntityAttribute(ON_ERROR));
- if (onError == null) onError = ABORT;
- entityName = context.getEntityAttribute(ConfigNameConstants.NAME);
- }
- delegate.init(context);
-
- }
-
- @SuppressWarnings({"unchecked"})
- void loadTransformers() {
- String transClasses = context.getEntityAttribute(TRANSFORMER);
-
- if (transClasses == null) {
- transformers = Collections.emptyList();
- return;
- }
-
- String[] transArr = transClasses.split(",");
- transformers = new ArrayList() {
- @Override
- public boolean add(Transformer transformer) {
- if (docBuilder != null && docBuilder.verboseDebug) {
- transformer = docBuilder.getDebugLogger().wrapTransformer(transformer);
- }
- return super.add(transformer);
- }
- };
- for (String aTransArr : transArr) {
- String trans = aTransArr.trim();
- if (trans.startsWith("script:")) {
- // The script transformer is a potential vulnerability, esp. when the script is
- // provided from an untrusted source. Check and don't proceed if source is untrusted.
- checkIfTrusted(trans);
- String functionName = trans.substring("script:".length());
- ScriptTransformer scriptTransformer = new ScriptTransformer();
- scriptTransformer.setFunctionName(functionName);
- transformers.add(scriptTransformer);
- continue;
- }
- try {
- @SuppressWarnings({"rawtypes"})
- Class clazz = DocBuilder.loadClass(trans, context.getSolrCore());
- if (Transformer.class.isAssignableFrom(clazz)) {
- transformers.add((Transformer) clazz.getConstructor().newInstance());
- } else {
- Method meth = clazz.getMethod(TRANSFORM_ROW, Map.class);
- transformers.add(new ReflectionTransformer(meth, clazz, trans));
- }
- } catch (NoSuchMethodException nsme){
- String msg = "Transformer :"
- + trans
- + "does not implement Transformer interface or does not have a transformRow(Map m)method";
- log.error(msg);
- wrapAndThrow(SEVERE, nsme,msg);
- } catch (Exception e) {
- log.error("Unable to load Transformer: {}", aTransArr, e);
- wrapAndThrow(SEVERE, e,"Unable to load Transformer: " + trans);
- }
- }
-
- }
-
- private void checkIfTrusted(String trans) {
- if (docBuilder != null) {
- SolrCore core = docBuilder.dataImporter.getCore();
- boolean trusted = (core != null)? core.getCoreDescriptor().isConfigSetTrusted(): true;
- if (!trusted) {
- Exception ex = new SolrException(ErrorCode.UNAUTHORIZED, "The configset for this collection was uploaded "
- + "without any authentication in place,"
- + " and this transformer is not available for collections with untrusted configsets. To use this transformer,"
- + " re-upload the configset after enabling authentication and authorization.");
- String msg = "Transformer: "
- + trans
- + ". " + ex.getMessage();
- log.error(msg);
- wrapAndThrow(SEVERE, ex, msg);
- }
- }
- }
-
- @SuppressWarnings("unchecked")
- static class ReflectionTransformer extends Transformer {
- final Method meth;
-
- @SuppressWarnings({"rawtypes"})
- final Class clazz;
-
- final String trans;
-
- final Object o;
-
- public ReflectionTransformer(Method meth, @SuppressWarnings({"rawtypes"})Class clazz, String trans)
- throws Exception {
- this.meth = meth;
- this.clazz = clazz;
- this.trans = trans;
- o = clazz.getConstructor().newInstance();
- }
-
- @Override
- public Object transformRow(Map aRow, Context context) {
- try {
- return meth.invoke(o, aRow);
- } catch (Exception e) {
- log.warn("method invocation failed on transformer : {}", trans, e);
- throw new DataImportHandlerException(WARN, e);
- }
- }
- }
-
- protected Map getFromRowCache() {
- Map r = rowcache.remove(0);
- if (rowcache.isEmpty())
- rowcache = null;
- return r;
- }
-
- @SuppressWarnings("unchecked")
- protected Map applyTransformer(Map row) {
- if(row == null) return null;
- if (transformers == null)
- loadTransformers();
- if (transformers == Collections.EMPTY_LIST)
- return row;
- Map transformedRow = row;
- List> rows = null;
- boolean stopTransform = checkStopTransform(row);
- VariableResolver resolver = context.getVariableResolver();
- for (Transformer t : transformers) {
- if (stopTransform) break;
- try {
- if (rows != null) {
- List> tmpRows = new ArrayList<>();
- for (Map map : rows) {
- resolver.addNamespace(entityName, map);
- Object o = t.transformRow(map, context);
- if (o == null)
- continue;
- if (o instanceof Map) {
- @SuppressWarnings({"rawtypes"})
- Map oMap = (Map) o;
- stopTransform = checkStopTransform(oMap);
- tmpRows.add((Map) o);
- } else if (o instanceof List) {
- tmpRows.addAll((List) o);
- } else {
- log.error("Transformer must return Map or a List>");
- }
- }
- rows = tmpRows;
- } else {
- resolver.addNamespace(entityName, transformedRow);
- Object o = t.transformRow(transformedRow, context);
- if (o == null)
- return null;
- if (o instanceof Map) {
- @SuppressWarnings({"rawtypes"})
- Map oMap = (Map) o;
- stopTransform = checkStopTransform(oMap);
- transformedRow = (Map) o;
- } else if (o instanceof List) {
- rows = (List) o;
- } else {
- log.error("Transformer must return Map or a List>");
- }
- }
- } catch (Exception e) {
- log.warn("transformer threw error", e);
- if (ABORT.equals(onError)) {
- wrapAndThrow(SEVERE, e);
- } else if (SKIP.equals(onError)) {
- wrapAndThrow(DataImportHandlerException.SKIP, e);
- }
- // onError = continue
- }
- }
- if (rows == null) {
- return transformedRow;
- } else {
- rowcache = rows;
- return getFromRowCache();
- }
-
- }
-
- private boolean checkStopTransform(@SuppressWarnings({"rawtypes"})Map oMap) {
- return oMap.get("$stopTransform") != null
- && Boolean.parseBoolean(oMap.get("$stopTransform").toString());
- }
-
- @Override
- public Map nextRow() {
- if (rowcache != null) {
- return getFromRowCache();
- }
- while (true) {
- Map arow = null;
- try {
- arow = delegate.nextRow();
- } catch (Exception e) {
- if(ABORT.equals(onError)){
- wrapAndThrow(SEVERE, e);
- } else {
- //SKIP is not really possible. If this calls the nextRow() again the Entityprocessor would be in an inconisttent state
- SolrException.log(log, "Exception in entity : "+ entityName, e);
- return null;
- }
- }
- if (arow == null) {
- return null;
- } else {
- arow = applyTransformer(arow);
- if (arow != null) {
- delegate.postTransform(arow);
- return arow;
- }
- }
- }
- }
-
- @Override
- public Map nextModifiedRowKey() {
- Map row = delegate.nextModifiedRowKey();
- row = applyTransformer(row);
- rowcache = null;
- return row;
- }
-
- @Override
- public Map