mirror of https://github.com/apache/lucene.git
SOLR-14783: Remove DIH from 9.0 (#1794)
* Remove DIH example directory * Remove contrib code directories * Remove contrib package related configurations for build tools * Remove mention of DIH example * remove dih as build dependencies and no-longer needed version pins * Remove README references to DIH * Remove dih mention from the script that probably does need to exist at all * More build artifact references * More removed dependencies leftovers (licenses/versions) * No need to smoke exclude DIH anymore * Remove Admin UI's DIH integration * Remove DIH from shortname package list * Remove unused DIH (related? not?) dataset Unclear what is happening here, but there is no reference to that directory anywhere else The other parallel directories ARE referenced in a TestConfigSetsAPI.java * Hidden Idea files references * No DIH to ignore anymore * Remove last Derby DB references * Remove DIH from documentation Add the information in Major Changes document with the link to the external repo * Added/updated a mention to CHANGES * Fix leftover library mentions * Fix Spellings
This commit is contained in:
parent
c11d32faed
commit
a57ba25400
|
@ -1,9 +0,0 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Derby">
|
||||
<CLASSES>
|
||||
<root url="jar://$PROJECT_DIR$/solr/example/example-DIH/solr/db/lib/derby-10.9.1.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
</library>
|
||||
</component>
|
|
@ -1,9 +0,0 @@
|
|||
<component name="libraryTable">
|
||||
<library name="HSQLDB">
|
||||
<CLASSES>
|
||||
<root url="jar://$PROJECT_DIR$/solr/example/example-DIH/solr/db/lib/hsqldb-2.4.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
</library>
|
||||
</component>
|
|
@ -1,10 +0,0 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Solr DIH core library">
|
||||
<CLASSES>
|
||||
<root url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler/lib" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
<jarDirectory url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler/lib" recursive="false" />
|
||||
</library>
|
||||
</component>
|
|
@ -1,10 +0,0 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Solr DIH extras library">
|
||||
<CLASSES>
|
||||
<root url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler-extras/lib" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
<jarDirectory url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler-extras/lib" recursive="false" />
|
||||
</library>
|
||||
</component>
|
|
@ -1,10 +0,0 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Solr DIH test library">
|
||||
<CLASSES>
|
||||
<root url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler/test-lib" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
<jarDirectory url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler/test-lib" recursive="false" />
|
||||
</library>
|
||||
</component>
|
|
@ -53,8 +53,6 @@
|
|||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
|
||||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/analytics/analytics.iml" />
|
||||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />
|
||||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/dataimporthandler-extras/dataimporthandler-extras.iml" />
|
||||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/dataimporthandler/dataimporthandler.iml" />
|
||||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/extraction/extraction.iml" />
|
||||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/langid/langid.iml" />
|
||||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/ltr/ltr.iml" />
|
||||
|
|
|
@ -284,22 +284,6 @@
|
|||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
|
||||
</configuration>
|
||||
<configuration default="false" name="Solr dataimporthandler contrib" type="JUnit" factoryName="JUnit">
|
||||
<module name="dataimporthandler" />
|
||||
<option name="TEST_OBJECT" value="pattern" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/solr/contrib/solr-dataimporthandler" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp -Djetty.testMode=1 -Djetty.insecurerandom=1 -Dsolr.directoryFactory=org.apache.solr.core.MockDirectoryFactory" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
|
||||
</configuration>
|
||||
<configuration default="false" name="Solr dataimporthandler-extras contrib" type="JUnit" factoryName="JUnit">
|
||||
<module name="dataimporthandler-extras" />
|
||||
<option name="TEST_OBJECT" value="pattern" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/solr/contrib/solr-dataimporthandler-extras" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp -Djetty.testMode=1 -Djetty.insecurerandom=1 -Dsolr.directoryFactory=org.apache.solr.core.MockDirectoryFactory" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
|
||||
</configuration>
|
||||
<configuration default="false" name="Solr extraction contrib" type="JUnit" factoryName="JUnit">
|
||||
<module name="extraction" />
|
||||
<option name="TEST_OBJECT" value="pattern" />
|
||||
|
@ -341,7 +325,7 @@
|
|||
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
|
||||
</configuration>
|
||||
|
||||
<list size="42">
|
||||
<list size="39">
|
||||
<item index="0" class="java.lang.String" itemvalue="JUnit.Lucene core" />
|
||||
<item index="1" class="java.lang.String" itemvalue="JUnit.Module analyzers-common" />
|
||||
<item index="2" class="java.lang.String" itemvalue="JUnit.Module analyzers-icu" />
|
||||
|
@ -376,13 +360,11 @@
|
|||
<item index="32" class="java.lang.String" itemvalue="JUnit.Solr analysis-extras contrib" />
|
||||
<item index="33" class="java.lang.String" itemvalue="JUnit.Solr analytics contrib" />
|
||||
<item index="34" class="java.lang.String" itemvalue="JUnit.Solr clustering contrib" />
|
||||
<item index="35" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler contrib" />
|
||||
<item index="36" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler-extras contrib" />
|
||||
<item index="37" class="java.lang.String" itemvalue="JUnit.Solr extraction contrib" />
|
||||
<item index="38" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
|
||||
<item index="39" class="java.lang.String" itemvalue="JUnit.Solr ltr contrib" />
|
||||
<item index="40" class="java.lang.String" itemvalue="JUnit.Solr prometheus-exporter contrib" />
|
||||
<item index="42" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
|
||||
<item index="35" class="java.lang.String" itemvalue="JUnit.Solr extraction contrib" />
|
||||
<item index="36" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
|
||||
<item index="37" class="java.lang.String" itemvalue="JUnit.Solr ltr contrib" />
|
||||
<item index="38" class="java.lang.String" itemvalue="JUnit.Solr prometheus-exporter contrib" />
|
||||
<item index="39" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
|
||||
</list>
|
||||
</component>
|
||||
</project>
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-dataimporthandler-extras/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-dataimporthandler-extras/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
|
||||
<orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
|
||||
<orderEntry type="module" scope="TEST" module-name="lucene-core" />
|
||||
<orderEntry type="library" name="Solr core library" level="project" />
|
||||
<orderEntry type="library" name="Solrj library" level="project" />
|
||||
<orderEntry type="library" name="Solr DIH extras library" level="project" />
|
||||
<orderEntry type="library" name="Solr extraction library" level="project" />
|
||||
<orderEntry type="module" module-name="solr-core" />
|
||||
<orderEntry type="module" module-name="solrj" />
|
||||
<orderEntry type="module" module-name="dataimporthandler" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-dataimporthandler/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-dataimporthandler/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/webapp" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="HSQLDB" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Derby" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Solr DIH test library" level="project" />
|
||||
<orderEntry type="library" name="Solr example library" level="project" />
|
||||
<orderEntry type="library" name="Solr core library" level="project" />
|
||||
<orderEntry type="library" name="Solrj library" level="project" />
|
||||
<orderEntry type="library" name="Solr DIH core library" level="project" />
|
||||
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
|
||||
<orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
|
||||
<orderEntry type="module" module-name="solr-core" />
|
||||
<orderEntry type="module" module-name="solrj" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
<orderEntry type="module" module-name="lucene-core" />
|
||||
<orderEntry type="module" scope="TEST" module-name="join" />
|
||||
</component>
|
||||
</module>
|
|
@ -0,0 +1,55 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-parent</artifactId>
|
||||
<version>@version@</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-contrib-aggregator</artifactId>
|
||||
<name>Apache Solr Contrib aggregator POM</name>
|
||||
<packaging>pom</packaging>
|
||||
<modules>
|
||||
<module>analysis-extras</module>
|
||||
<module>analytics</module>
|
||||
<module>clustering</module>
|
||||
<module>extraction</module>
|
||||
<module>jaegertracer-configurator</module>
|
||||
<module>langid</module>
|
||||
<module>ltr</module>
|
||||
<module>prometheus-exporter</module>
|
||||
<module>velocity</module>
|
||||
</modules>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-deploy-plugin</artifactId>
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
|
@ -48,33 +48,6 @@ my @moves = (
|
|||
'solr/contrib/clustering/src/main/java'
|
||||
=> 'solr/contrib/clustering/src/java',
|
||||
|
||||
'solr/contrib/dataimporthandler/src/test/java'
|
||||
=> 'solr/contrib/dataimporthandler/src/test',
|
||||
|
||||
'solr/contrib/dataimporthandler/src/test/resources/solr-dih'
|
||||
=> 'solr/contrib/dataimporthandler/src/test-files/dih/solr',
|
||||
|
||||
'solr/contrib/dataimporthandler/src/test/resources'
|
||||
=> 'solr/contrib/dataimporthandler/src/test-files/dih',
|
||||
|
||||
'solr/contrib/dataimporthandler/src/main/java'
|
||||
=> 'solr/contrib/dataimporthandler/src/java',
|
||||
|
||||
'solr/contrib/dataimporthandler/src/main/webapp'
|
||||
=> 'solr/contrib/dataimporthandler/src/webapp',
|
||||
|
||||
'solr/contrib/dataimporthandler/src/extras/test/java'
|
||||
=> 'solr/contrib/dataimporthandler-extras/src/test',
|
||||
|
||||
'solr/contrib/dataimporthandler/src/extras/test/resources/solr-dihextras'
|
||||
=> 'solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr',
|
||||
|
||||
'solr/contrib/dataimporthandler/src/extras/test/resources'
|
||||
=> 'solr/contrib/dataimporthandler-extras/src/test-files/dihextras',
|
||||
|
||||
'solr/contrib/dataimporthandler/src/extras/main/java'
|
||||
=> 'solr/contrib/dataimporthandler-extras/src/java',
|
||||
|
||||
'solr/contrib/extraction/src/test/java'
|
||||
=> 'solr/contrib/extraction/src/test',
|
||||
|
||||
|
|
|
@ -225,8 +225,7 @@ def checkAllJARs(topDir, project, gitRevision, version, tmpDir, baseURL):
|
|||
for file in files:
|
||||
if file.lower().endswith('.jar'):
|
||||
if project == 'solr':
|
||||
if ((normRoot.endswith('/contrib/dataimporthandler-extras/lib') and (file.startswith('javax.mail-') or file.startswith('activation-')))
|
||||
or (normRoot.endswith('/test-framework/lib') and file.startswith('jersey-'))
|
||||
if ((normRoot.endswith('/test-framework/lib') and file.startswith('jersey-'))
|
||||
or (normRoot.endswith('/contrib/extraction/lib') and file.startswith('xml-apis-'))):
|
||||
print(' **WARNING**: skipping check of %s/%s: it has javax.* classes' % (root, file))
|
||||
continue
|
||||
|
|
|
@ -164,10 +164,6 @@ configure(project(":solr:example")) {
|
|||
into "exampledocs/"
|
||||
})
|
||||
|
||||
from(configurations.dih, {
|
||||
into "example-DIH/solr/db/lib"
|
||||
})
|
||||
|
||||
into projectDir
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,8 +20,7 @@
|
|||
configure([project(":lucene:spatial3d"),
|
||||
project(":lucene:analysis:common"),
|
||||
project(":lucene:backward-codecs"),
|
||||
project(":lucene:queryparser"),
|
||||
project(":solr:contrib:dataimporthandler")]) {
|
||||
project(":lucene:queryparser")]) {
|
||||
plugins.withType(JavaPlugin) {
|
||||
configurations {
|
||||
testClassesExported
|
||||
|
@ -56,15 +55,6 @@ configure(project(":solr:contrib:analysis-extras")) {
|
|||
plugins.withType(JavaPlugin) {
|
||||
dependencies {
|
||||
testImplementation project(path: ':lucene:analysis:common', configuration: 'testClassesExported')
|
||||
testImplementation project(path: ':solr:contrib:dataimporthandler', configuration: 'testClassesExported')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
configure(project(":solr:contrib:dataimporthandler-extras")) {
|
||||
plugins.withType(JavaPlugin) {
|
||||
dependencies {
|
||||
testImplementation project(path: ':solr:contrib:dataimporthandler', configuration: 'testClassesExported')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,8 +60,6 @@ configure(rootProject) {
|
|||
":solr:core",
|
||||
":solr:solrj",
|
||||
":solr:contrib:analysis-extras",
|
||||
":solr:contrib:dataimporthandler",
|
||||
":solr:contrib:dataimporthandler-extras",
|
||||
":solr:contrib:analytics",
|
||||
":solr:contrib:clustering",
|
||||
":solr:contrib:extraction",
|
||||
|
|
|
@ -108,7 +108,7 @@ grant {
|
|||
// needed by hadoop htrace
|
||||
permission java.net.NetPermission "getNetworkInformation";
|
||||
|
||||
// needed by DIH
|
||||
// needed by DIH - possibly even after DIH is a package
|
||||
permission java.sql.SQLPermission "deregisterDriver";
|
||||
|
||||
permission java.util.logging.LoggingPermission "control";
|
||||
|
|
|
@ -46,30 +46,6 @@
|
|||
<packageUrl regex="true">^pkg:maven/org\.jruby/dirgra@.*$</packageUrl>
|
||||
<cpe>cpe:/a:jruby:jruby</cpe>
|
||||
</suppress>
|
||||
<suppress>
|
||||
<notes><![CDATA[
|
||||
file name: derby-10.9.1.0.jar
|
||||
Only used in tests and dih-example
|
||||
]]></notes>
|
||||
<packageUrl regex="true">^pkg:maven/org\.apache\.derby/derby@.*$</packageUrl>
|
||||
<cpe>cpe:/a:apache:derby</cpe>
|
||||
</suppress>
|
||||
<suppress>
|
||||
<notes><![CDATA[
|
||||
file name: derby-10.9.1.0.jar
|
||||
Only used in tests and dih-example
|
||||
]]></notes>
|
||||
<packageUrl regex="true">^pkg:maven/org\.apache\.derby/derby@.*$</packageUrl>
|
||||
<vulnerabilityName>CVE-2015-1832</vulnerabilityName>
|
||||
</suppress>
|
||||
<suppress>
|
||||
<notes><![CDATA[
|
||||
file name: derby-10.9.1.0.jar
|
||||
Only used in tests and dih-example
|
||||
]]></notes>
|
||||
<packageUrl regex="true">^pkg:maven/org\.apache\.derby/derby@.*$</packageUrl>
|
||||
<vulnerabilityName>CVE-2018-1313</vulnerabilityName>
|
||||
</suppress>
|
||||
<suppress>
|
||||
<notes><![CDATA[
|
||||
file name: carrot2-guava-18.0.jar
|
||||
|
|
|
@ -0,0 +1,327 @@
|
|||
# The /org/name keys in this file must be kept lexically sorted.
|
||||
# Blank lines, comment lines, and keys that aren't in /org/name format are ignored
|
||||
# when the lexical sort check is performed by the ant check-lib-versions target.
|
||||
|
||||
/com.adobe.xmp/xmpcore = 5.1.3
|
||||
|
||||
com.carrotsearch.randomizedtesting.version = 2.7.6
|
||||
/com.carrotsearch.randomizedtesting/junit4-ant = ${com.carrotsearch.randomizedtesting.version}
|
||||
/com.carrotsearch.randomizedtesting/randomizedtesting-runner = ${com.carrotsearch.randomizedtesting.version}
|
||||
|
||||
/com.carrotsearch.thirdparty/simple-xml-safe = 2.7.1
|
||||
|
||||
/com.carrotsearch/hppc = 0.8.2
|
||||
|
||||
/com.cybozu.labs/langdetect = 1.1-20120112
|
||||
/com.drewnoakes/metadata-extractor = 2.11.0
|
||||
|
||||
/com.epam/parso = 2.0.11
|
||||
|
||||
com.fasterxml.jackson.core.version = 2.10.1
|
||||
/com.fasterxml.jackson.core/jackson-annotations = ${com.fasterxml.jackson.core.version}
|
||||
/com.fasterxml.jackson.core/jackson-core = ${com.fasterxml.jackson.core.version}
|
||||
/com.fasterxml.jackson.core/jackson-databind = ${com.fasterxml.jackson.core.version}
|
||||
/com.fasterxml.jackson.dataformat/jackson-dataformat-smile = ${com.fasterxml.jackson.core.version}
|
||||
|
||||
/com.github.ben-manes.caffeine/caffeine = 2.8.4
|
||||
/com.github.virtuald/curvesapi = 1.06
|
||||
|
||||
/com.github.zafarkhaja/java-semver = 0.9.0
|
||||
|
||||
/com.google.guava/guava = 25.1-jre
|
||||
/com.google.protobuf/protobuf-java = 3.11.0
|
||||
/com.google.re2j/re2j = 1.2
|
||||
/com.googlecode.juniversalchardet/juniversalchardet = 1.0.3
|
||||
/com.googlecode.mp4parser/isoparser = 1.1.22
|
||||
/com.healthmarketscience.jackcess/jackcess = 3.0.1
|
||||
/com.healthmarketscience.jackcess/jackcess-encrypt = 3.0.0
|
||||
/com.ibm.icu/icu4j = 62.2
|
||||
/com.jayway.jsonpath/json-path = 2.4.0
|
||||
/com.lmax/disruptor = 3.4.2
|
||||
/com.pff/java-libpst = 0.8.1
|
||||
|
||||
com.rometools.version = 1.12.2
|
||||
/com.rometools/rome = ${com.rometools.version}
|
||||
/com.rometools/rome-utils = ${com.rometools.version}
|
||||
|
||||
com.sun.jersey.version = 1.19
|
||||
/com.sun.jersey/jersey-servlet = ${com.sun.jersey.version}
|
||||
|
||||
/com.tdunning/t-digest = 3.1
|
||||
/com.vaadin.external.google/android-json = 0.0.20131108.vaadin1
|
||||
/commons-cli/commons-cli = 1.4
|
||||
/commons-codec/commons-codec = 1.13
|
||||
/commons-collections/commons-collections = 3.2.2
|
||||
/commons-io/commons-io = 2.6
|
||||
# necessary to run test or embedded Zookeeper as of 3.6.1
|
||||
commons.lang.version = 2.6
|
||||
/commons-lang/commons-lang = ${commons.lang.version}
|
||||
/commons-logging/commons-logging = 1.1.3
|
||||
/de.l3s.boilerpipe/boilerpipe = 1.1.0
|
||||
|
||||
io.dropwizard.metrics.version = 4.1.5
|
||||
/io.dropwizard.metrics/metrics-core = ${io.dropwizard.metrics.version}
|
||||
/io.dropwizard.metrics/metrics-graphite = ${io.dropwizard.metrics.version}
|
||||
/io.dropwizard.metrics/metrics-jetty9 = ${io.dropwizard.metrics.version}
|
||||
/io.dropwizard.metrics/metrics-jmx = ${io.dropwizard.metrics.version}
|
||||
/io.dropwizard.metrics/metrics-jvm = ${io.dropwizard.metrics.version}
|
||||
|
||||
io.jaegertracing.version = 1.1.0
|
||||
/io.jaegertracing/jaeger-core = ${io.jaegertracing.version}
|
||||
/io.jaegertracing/jaeger-thrift = ${io.jaegertracing.version}
|
||||
|
||||
io.netty.netty.version = 4.1.50.Final
|
||||
/io.netty/netty-buffer = ${io.netty.netty.version}
|
||||
/io.netty/netty-codec = ${io.netty.netty.version}
|
||||
/io.netty/netty-common = ${io.netty.netty.version}
|
||||
/io.netty/netty-handler = ${io.netty.netty.version}
|
||||
/io.netty/netty-resolver = ${io.netty.netty.version}
|
||||
/io.netty/netty-transport = ${io.netty.netty.version}
|
||||
/io.netty/netty-transport-native-epoll = ${io.netty.netty.version}
|
||||
/io.netty/netty-transport-native-unix-common = ${io.netty.netty.version}
|
||||
|
||||
io.opentracing.version = 0.33.0
|
||||
/io.opentracing/opentracing-api = ${io.opentracing.version}
|
||||
/io.opentracing/opentracing-mock = ${io.opentracing.version}
|
||||
/io.opentracing/opentracing-noop = ${io.opentracing.version}
|
||||
/io.opentracing/opentracing-util = ${io.opentracing.version}
|
||||
|
||||
io.prometheus.version = 0.2.0
|
||||
/io.prometheus/simpleclient = ${io.prometheus.version}
|
||||
/io.prometheus/simpleclient_common = ${io.prometheus.version}
|
||||
/io.prometheus/simpleclient_httpserver = ${io.prometheus.version}
|
||||
|
||||
/io.sgr/s2-geometry-library-java = 1.0.0
|
||||
|
||||
/javax.servlet/javax.servlet-api = 3.1.0
|
||||
/junit/junit = 4.12
|
||||
|
||||
/mecab/mecab-ipadic = 2.7.0-20070801
|
||||
/mecab/mecab-ko-dic = 2.0.3-20170922
|
||||
/mecab/mecab-naist-jdic = 0.6.3b-20111013
|
||||
/net.arnx/jsonic = 1.2.7
|
||||
/net.bytebuddy/byte-buddy = 1.9.3
|
||||
/net.hydromatic/eigenbase-properties = 1.1.5
|
||||
|
||||
net.sourceforge.argparse4j.version = 0.8.1
|
||||
/net.sourceforge.argparse4j/argparse4j = ${net.sourceforge.argparse4j.version}
|
||||
|
||||
/net.sourceforge.nekohtml/nekohtml = 1.9.17
|
||||
|
||||
net.thisptr.version = 0.0.8
|
||||
/net.thisptr/jackson-jq = ${net.thisptr.version}
|
||||
|
||||
/org.antlr/antlr4-runtime = 4.5.1-1
|
||||
|
||||
/org.apache.ant/ant = 1.8.2
|
||||
|
||||
org.apache.calcite.avatica.version = 1.13.0
|
||||
/org.apache.calcite.avatica/avatica-core = ${org.apache.calcite.avatica.version}
|
||||
|
||||
org.apache.calcite.version = 1.18.0
|
||||
/org.apache.calcite/calcite-core = ${org.apache.calcite.version}
|
||||
/org.apache.calcite/calcite-linq4j = ${org.apache.calcite.version}
|
||||
|
||||
org.apache.commons.commons-collections4-rev = 4.4
|
||||
/org.apache.commons/commons-collections4 = ${org.apache.commons.commons-collections4-rev}
|
||||
/org.apache.commons/commons-compress = 1.19
|
||||
/org.apache.commons/commons-configuration2 = 2.1.1
|
||||
/org.apache.commons/commons-csv = 1.7
|
||||
/org.apache.commons/commons-exec = 1.3
|
||||
/org.apache.commons/commons-lang3 = 3.9
|
||||
/org.apache.commons/commons-math3 = 3.6.1
|
||||
/org.apache.commons/commons-text = 1.6
|
||||
|
||||
org.apache.curator.version = 2.13.0
|
||||
/org.apache.curator/curator-client = ${org.apache.curator.version}
|
||||
/org.apache.curator/curator-framework = ${org.apache.curator.version}
|
||||
/org.apache.curator/curator-recipes = ${org.apache.curator.version}
|
||||
|
||||
org.apache.hadoop.version = 3.2.0
|
||||
/org.apache.hadoop/hadoop-annotations = ${org.apache.hadoop.version}
|
||||
/org.apache.hadoop/hadoop-auth = ${org.apache.hadoop.version}
|
||||
/org.apache.hadoop/hadoop-common = ${org.apache.hadoop.version}
|
||||
/org.apache.hadoop/hadoop-hdfs = ${org.apache.hadoop.version}
|
||||
/org.apache.hadoop/hadoop-hdfs-client = ${org.apache.hadoop.version}
|
||||
/org.apache.hadoop/hadoop-minikdc = ${org.apache.hadoop.version}
|
||||
|
||||
/org.apache.htrace/htrace-core4 = 4.1.0-incubating
|
||||
|
||||
# The httpcore version is often different from the httpclient and httpmime versions,
|
||||
# so the httpcore version value should not share the same symbolic name with them.
|
||||
/org.apache.httpcomponents/httpclient = 4.5.10
|
||||
/org.apache.httpcomponents/httpcore = 4.4.12
|
||||
/org.apache.httpcomponents/httpmime = 4.5.10
|
||||
|
||||
/org.apache.ivy/ivy = 2.4.0
|
||||
|
||||
org.apache.james.apache.mime4j.version = 0.8.3
|
||||
/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version}
|
||||
/org.apache.james/apache-mime4j-dom = ${org.apache.james.apache.mime4j.version}
|
||||
|
||||
org.apache.kerby.version = 1.0.1
|
||||
/org.apache.kerby/kerb-admin = ${org.apache.kerby.version}
|
||||
/org.apache.kerby/kerb-client = ${org.apache.kerby.version}
|
||||
/org.apache.kerby/kerb-common = ${org.apache.kerby.version}
|
||||
/org.apache.kerby/kerb-core = ${org.apache.kerby.version}
|
||||
/org.apache.kerby/kerb-crypto = ${org.apache.kerby.version}
|
||||
/org.apache.kerby/kerb-identity= ${org.apache.kerby.version}
|
||||
/org.apache.kerby/kerb-server = ${org.apache.kerby.version}
|
||||
/org.apache.kerby/kerb-simplekdc = ${org.apache.kerby.version}
|
||||
/org.apache.kerby/kerb-util = ${org.apache.kerby.version}
|
||||
|
||||
/org.apache.kerby/kerby-asn1 = ${org.apache.kerby.version}
|
||||
/org.apache.kerby/kerby-config = ${org.apache.kerby.version}
|
||||
/org.apache.kerby/kerby-kdc = ${org.apache.kerby.version}
|
||||
/org.apache.kerby/kerby-pkix = ${org.apache.kerby.version}
|
||||
/org.apache.kerby/kerby-util = ${org.apache.kerby.version}
|
||||
|
||||
org.apache.logging.log4j.version = 2.13.2
|
||||
/org.apache.logging.log4j/log4j-1.2-api = ${org.apache.logging.log4j.version}
|
||||
/org.apache.logging.log4j/log4j-api = ${org.apache.logging.log4j.version}
|
||||
/org.apache.logging.log4j/log4j-core = ${org.apache.logging.log4j.version}
|
||||
/org.apache.logging.log4j/log4j-slf4j-impl = ${org.apache.logging.log4j.version}
|
||||
/org.apache.logging.log4j/log4j-web = ${org.apache.logging.log4j.version}
|
||||
|
||||
/org.apache.opennlp/opennlp-tools = 1.9.1
|
||||
|
||||
org.apache.pdfbox.version = 2.0.17
|
||||
/org.apache.pdfbox/fontbox = ${org.apache.pdfbox.version}
|
||||
/org.apache.pdfbox/jempbox = 1.8.16
|
||||
/org.apache.pdfbox/pdfbox = ${org.apache.pdfbox.version}
|
||||
/org.apache.pdfbox/pdfbox-tools = ${org.apache.pdfbox.version}
|
||||
|
||||
org.apache.poi.version = 4.1.1
|
||||
/org.apache.poi/poi = ${org.apache.poi.version}
|
||||
/org.apache.poi/poi-ooxml = ${org.apache.poi.version}
|
||||
/org.apache.poi/poi-ooxml-schemas = ${org.apache.poi.version}
|
||||
/org.apache.poi/poi-scratchpad = ${org.apache.poi.version}
|
||||
|
||||
org.apache.thrift.version = 0.13.0
|
||||
/org.apache.thrift/libthrift = ${org.apache.thrift.version}
|
||||
|
||||
org.apache.tika.version = 1.24
|
||||
/org.apache.tika/tika-core = ${org.apache.tika.version}
|
||||
/org.apache.tika/tika-java7 = ${org.apache.tika.version}
|
||||
/org.apache.tika/tika-parsers = ${org.apache.tika.version}
|
||||
/org.apache.tika/tika-xmp = ${org.apache.tika.version}
|
||||
|
||||
org.apache.velocity.tools.version = 3.0
|
||||
/org.apache.velocity.tools/velocity-tools-generic = ${org.apache.velocity.tools.version}
|
||||
/org.apache.velocity.tools/velocity-tools-view = ${org.apache.velocity.tools.version}
|
||||
/org.apache.velocity.tools/velocity-tools-view-jsp = ${org.apache.velocity.tools.version}
|
||||
|
||||
/org.apache.velocity/velocity-engine-core = 2.0
|
||||
|
||||
/org.apache.xmlbeans/xmlbeans = 3.1.0
|
||||
|
||||
org.apache.zookeeper.version = 3.6.1
|
||||
/org.apache.zookeeper/zookeeper = ${org.apache.zookeeper.version}
|
||||
/org.apache.zookeeper/zookeeper-jute = ${org.apache.zookeeper.version}
|
||||
|
||||
# v1.6.2 of asciidoctor-ant includes asciidoctorj 1.6.2, which uses
|
||||
# asciidoctor 1.5.8, and asciidoctorj-pdf 1.5.0-alpha.16, which is the same
|
||||
# as asciidoctor-pdf 1.5.0-alpha.16
|
||||
/org.asciidoctor/asciidoctor-ant = 1.6.2
|
||||
|
||||
/org.aspectj/aspectjrt = 1.8.0
|
||||
|
||||
/org.bitbucket.b_c/jose4j = 0.6.5
|
||||
|
||||
org.bouncycastle.version = 1.64
|
||||
/org.bouncycastle/bcmail-jdk15on = ${org.bouncycastle.version}
|
||||
/org.bouncycastle/bcpkix-jdk15on = ${org.bouncycastle.version}
|
||||
/org.bouncycastle/bcprov-jdk15on = ${org.bouncycastle.version}
|
||||
|
||||
/org.brotli/dec = 0.1.2
|
||||
|
||||
/org.carrot2.attributes/attributes-binder = 1.3.3
|
||||
/org.carrot2.shaded/carrot2-guava = 18.0
|
||||
|
||||
/org.carrot2/carrot2-mini = 3.16.2
|
||||
|
||||
org.carrot2.morfologik.version = 2.1.5
|
||||
/org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version}
|
||||
/org.carrot2/morfologik-polish = ${org.carrot2.morfologik.version}
|
||||
/org.carrot2/morfologik-stemming = ${org.carrot2.morfologik.version}
|
||||
|
||||
/org.ccil.cowan.tagsoup/tagsoup = 1.2.1
|
||||
|
||||
org.codehaus.janino.version = 3.0.9
|
||||
/org.codehaus.janino/commons-compiler = ${org.codehaus.janino.version}
|
||||
/org.codehaus.janino/janino = ${org.codehaus.janino.version}
|
||||
|
||||
/org.codehaus.woodstox/stax2-api = 3.1.4
|
||||
/org.codehaus.woodstox/woodstox-core-asl = 4.4.1
|
||||
|
||||
org.eclipse.jetty.version = 9.4.27.v20200227
|
||||
/org.eclipse.jetty.http2/http2-client = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty.http2/http2-common = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty.http2/http2-hpack = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty.http2/http2-http-client-transport = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty.http2/http2-server = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-alpn-client = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-alpn-java-client = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-alpn-java-server = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-alpn-server = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-client = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-continuation = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-deploy = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-http = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-io = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-jmx = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-rewrite = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-security = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-server = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-servlet = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-servlets = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-start = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-util = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-webapp = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-xml = ${org.eclipse.jetty.version}
|
||||
|
||||
org.gagravarr.vorbis.java.version = 0.8
|
||||
/org.gagravarr/vorbis-java-core = ${org.gagravarr.vorbis.java.version}
|
||||
/org.gagravarr/vorbis-java-tika = ${org.gagravarr.vorbis.java.version}
|
||||
|
||||
/org.hamcrest/hamcrest = 2.2
|
||||
|
||||
/org.jdom/jdom2 = 2.0.6
|
||||
|
||||
/org.jsoup/jsoup = 1.12.1
|
||||
|
||||
/org.locationtech.jts/jts-core = 1.15.0
|
||||
/org.locationtech.spatial4j/spatial4j = 0.7
|
||||
|
||||
/org.mockito/mockito-core = 2.23.4
|
||||
|
||||
/org.objenesis/objenesis = 2.6
|
||||
|
||||
org.ow2.asm.version = 7.2
|
||||
/org.ow2.asm/asm = ${org.ow2.asm.version}
|
||||
/org.ow2.asm/asm-commons = ${org.ow2.asm.version}
|
||||
|
||||
org.restlet.jee.version = 2.4.3
|
||||
/org.restlet.jee/org.restlet = ${org.restlet.jee.version}
|
||||
/org.restlet.jee/org.restlet.ext.servlet = ${org.restlet.jee.version}
|
||||
|
||||
/org.rrd4j/rrd4j = 3.5
|
||||
|
||||
org.slf4j.version = 1.7.24
|
||||
/org.slf4j/jcl-over-slf4j = ${org.slf4j.version}
|
||||
/org.slf4j/jul-to-slf4j = ${org.slf4j.version}
|
||||
/org.slf4j/slf4j-api = ${org.slf4j.version}
|
||||
/org.slf4j/slf4j-simple = ${org.slf4j.version}
|
||||
|
||||
/org.tallison/jmatio = 1.5
|
||||
/org.tukaani/xz = 1.8
|
||||
|
||||
# required for instantiating a Zookeeper server in tests or embedded
|
||||
org.xerial.snappy.version = 1.1.7.6
|
||||
/org.xerial.snappy/snappy-java = ${org.xerial.snappy.version}
|
||||
|
||||
|
||||
ua.net.nlp.morfologik-ukrainian-search.version = 4.9.1
|
||||
/ua.net.nlp/morfologik-ukrainian-search = ${ua.net.nlp.morfologik-ukrainian-search.version}
|
||||
|
||||
/xerces/xercesImpl = 2.12.0
|
|
@ -53,8 +53,6 @@ include "solr:solrj"
|
|||
include "solr:core"
|
||||
include "solr:server"
|
||||
include "solr:contrib:analysis-extras"
|
||||
include "solr:contrib:dataimporthandler"
|
||||
include "solr:contrib:dataimporthandler-extras"
|
||||
include "solr:contrib:analytics"
|
||||
include "solr:contrib:clustering"
|
||||
include "solr:contrib:extraction"
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
|
||||
/bin/*.pid
|
||||
|
||||
/contrib/dataimporthandler/test-lib/
|
||||
|
||||
/core/test-lib/
|
||||
|
||||
/example/start.jar
|
||||
|
@ -15,9 +13,6 @@
|
|||
/example/solr/zoo_data
|
||||
/example/work/*
|
||||
/example/exampledocs/post.jar
|
||||
/example/example-DIH/**/data
|
||||
/example/example-DIH/**/dataimport.properties
|
||||
/example/example-DIH/solr/mail/lib/*.jar
|
||||
|
||||
/package
|
||||
|
||||
|
|
|
@ -118,6 +118,9 @@ Other Changes
|
|||
|
||||
* LUCENE-9433: Remove Ant support from trunk (Erick Erickson, Uwe Schindler et.al.)
|
||||
|
||||
* SOLR-14783: Remove Data Import Handler (DIH), previously deprecated (Alexandre Rafalovitch)
|
||||
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
* SOLR-14546: Fix for a relatively hard to hit issue in OverseerTaskProcessor that could lead to out of order execution
|
||||
|
|
|
@ -90,15 +90,14 @@ Solr includes a few examples to help you get started. To run a specific example,
|
|||
bin/solr -e <EXAMPLE> where <EXAMPLE> is one of:
|
||||
|
||||
cloud : SolrCloud example
|
||||
dih : Data Import Handler (rdbms, mail, atom, tika)
|
||||
schemaless : Schema-less example (schema is inferred from data during indexing)
|
||||
techproducts : Kitchen sink example providing comprehensive examples of Solr features
|
||||
```
|
||||
|
||||
For instance, if you want to run the Solr Data Import Handler example, do:
|
||||
For instance, if you want to run the SolrCloud example, do:
|
||||
|
||||
```
|
||||
bin/solr -e dih
|
||||
bin/solr -e cloud
|
||||
```
|
||||
|
||||
Indexing Documents
|
||||
|
@ -142,8 +141,7 @@ server/
|
|||
|
||||
example/
|
||||
Contains example documents and an alternative Solr home
|
||||
directory containing examples of how to use the Data Import Handler,
|
||||
see example/example-DIH/README.md for more information.
|
||||
directory containing various examples.
|
||||
|
||||
dist/solr-<component>-XX.jar
|
||||
The Apache Solr libraries. To compile Apache Solr Plugins,
|
||||
|
|
|
@ -386,7 +386,6 @@ function print_usage() {
|
|||
echo " -e <example> Name of the example to run; available examples:"
|
||||
echo " cloud: SolrCloud example"
|
||||
echo " techproducts: Comprehensive example illustrating many of Solr's core capabilities"
|
||||
echo " dih: Data Import Handler"
|
||||
echo " schemaless: Schema-less example"
|
||||
echo ""
|
||||
echo " -a Additional parameters to pass to the JVM when starting Solr, such as to setup"
|
||||
|
|
|
@ -360,7 +360,6 @@ goto done
|
|||
@echo -e example Name of the example to run; available examples:
|
||||
@echo cloud: SolrCloud example
|
||||
@echo techproducts: Comprehensive example illustrating many of Solr's core capabilities
|
||||
@echo dih: Data Import Handler
|
||||
@echo schemaless: Schema-less example
|
||||
@echo.
|
||||
@echo -a opts Additional parameters to pass to the JVM when starting Solr, such as to setup
|
||||
|
|
|
@ -0,0 +1,547 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project name="common-solr" default="default" xmlns:rsel="antlib:org.apache.tools.ant.types.resources.selectors">
|
||||
<description>
|
||||
This file is designed for importing into a main build file, and not intended
|
||||
for standalone use.
|
||||
</description>
|
||||
|
||||
<dirname file="${ant.file.common-solr}" property="common-solr.dir"/>
|
||||
|
||||
<property name="Name" value="Solr" />
|
||||
|
||||
<!-- solr uses Java 11 -->
|
||||
<property name="javac.release" value="11"/>
|
||||
<property name="javac.args" value="-Xlint:-deprecation"/>
|
||||
<property name="javac.profile.args" value=""/>
|
||||
|
||||
<property name="dest" location="${common-solr.dir}/build" />
|
||||
<property name="build.dir" location="${dest}/${ant.project.name}"/>
|
||||
<property name="jacoco.report.dir" location="${dest}/jacoco"/>
|
||||
<property name="dist" location="${common-solr.dir}/dist"/>
|
||||
<property name="package.dir" location="${common-solr.dir}/package"/>
|
||||
<property name="maven.dist.dir" location="${package.dir}/maven"/>
|
||||
<property name="lucene-libs" location="${dest}/lucene-libs" />
|
||||
<property name="tests.userdir" location="src/test-files"/>
|
||||
<property name="tests.policy" location="${common-solr.dir}/server/etc/security.policy"/>
|
||||
<property name="server.dir" location="${common-solr.dir}/server" />
|
||||
<property name="example" location="${common-solr.dir}/example" />
|
||||
<property name="javadoc.dir" location="${dest}/docs"/>
|
||||
<property name="javadoc-online.dir" location="${dest}/docs-online"/>
|
||||
<property name="tests.cleanthreads.sysprop" value="perClass"/>
|
||||
|
||||
<property name="changes.target.dir" location="${dest}/docs/changes"/>
|
||||
<property name="license.dir" location="${common-solr.dir}/licenses"/>
|
||||
|
||||
<property name="solr.tgz.unpack.dir" location="${common-solr.dir}/build/solr.tgz.unpacked"/>
|
||||
<property name="dist.jar.dir.prefix" value="${solr.tgz.unpack.dir}/solr"/>
|
||||
<property name="dist.jar.dir.suffix" value="dist"/>
|
||||
|
||||
<import file="${common-solr.dir}/../lucene/module-build.xml"/>
|
||||
|
||||
<property name="solr.tgz.file" location="${common-solr.dir}/package/solr-${version}.tgz"/>
|
||||
<available file="${solr.tgz.file}" property="solr.tgz.exists"/>
|
||||
<available type="dir" file="${solr.tgz.unpack.dir}" property="solr.tgz.unpack.dir.exists"/>
|
||||
<target name="-ensure-solr-tgz-exists" unless="solr.tgz.exists">
|
||||
<ant dir="${common-solr.dir}" target="create-package" inheritall="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
</target>
|
||||
<target name="-unpack-solr-tgz" unless="${solr.tgz.unpack.dir.exists}">
|
||||
<antcall target="-ensure-solr-tgz-exists">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</antcall>
|
||||
<mkdir dir="${solr.tgz.unpack.dir}"/>
|
||||
<untar compression="gzip" src="${solr.tgz.file}" dest="${solr.tgz.unpack.dir}">
|
||||
<patternset refid="patternset.lucene.solr.jars"/>
|
||||
</untar>
|
||||
</target>
|
||||
|
||||
<!-- backwards compatibility with existing targets/tasks; TODO: remove this! -->
|
||||
<property name="fullnamever" value="${final.name}"/>
|
||||
|
||||
<path id="additional.dependencies">
|
||||
<fileset dir="${common-solr.dir}/core/lib" excludes="${common.classpath.excludes}"/>
|
||||
<fileset dir="${common-solr.dir}/solrj/lib" excludes="${common.classpath.excludes}"/>
|
||||
<fileset dir="${common-solr.dir}/server/lib" excludes="${common.classpath.excludes}"/>
|
||||
<fileset dir="lib" excludes="${common.classpath.excludes}" erroronmissingdir="false"/>
|
||||
</path>
|
||||
|
||||
<path id="solr.lucene.libs">
|
||||
<!-- List of jars that will be used as the foundation for both
|
||||
the base classpath, as well as copied into the lucene-libs dir
|
||||
in the release.
|
||||
-->
|
||||
<!-- NOTE: lucene-core is explicitly not included because of the
|
||||
base.classpath (compilation & tests are done directly against
|
||||
the class files w/o needing to build the jar)
|
||||
-->
|
||||
<pathelement location="${analyzers-common.jar}"/>
|
||||
<pathelement location="${analyzers-kuromoji.jar}"/>
|
||||
<pathelement location="${analyzers-nori.jar}"/>
|
||||
<pathelement location="${analyzers-phonetic.jar}"/>
|
||||
<pathelement location="${codecs.jar}"/>
|
||||
<pathelement location="${backward-codecs.jar}"/>
|
||||
<pathelement location="${highlighter.jar}"/>
|
||||
<pathelement location="${memory.jar}"/>
|
||||
<pathelement location="${misc.jar}"/>
|
||||
<pathelement location="${spatial-extras.jar}"/>
|
||||
<pathelement location="${spatial3d.jar}"/>
|
||||
<pathelement location="${expressions.jar}"/>
|
||||
<pathelement location="${suggest.jar}"/>
|
||||
<pathelement location="${grouping.jar}"/>
|
||||
<pathelement location="${queries.jar}"/>
|
||||
<pathelement location="${queryparser.jar}"/>
|
||||
<pathelement location="${join.jar}"/>
|
||||
<pathelement location="${sandbox.jar}"/>
|
||||
<pathelement location="${classification.jar}"/>
|
||||
</path>
|
||||
|
||||
<path id="solr.base.classpath">
|
||||
<pathelement location="${common-solr.dir}/build/solr-solrj/classes/java"/>
|
||||
<pathelement location="${common-solr.dir}/build/solr-core/classes/java"/>
|
||||
<path refid="solr.lucene.libs" />
|
||||
<path refid="additional.dependencies"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<path id="classpath" refid="solr.base.classpath"/>
|
||||
|
||||
<path id="solr.test.base.classpath">
|
||||
<pathelement path="${common-solr.dir}/build/solr-test-framework/classes/java"/>
|
||||
<fileset dir="${common-solr.dir}/test-framework/lib">
|
||||
<include name="*.jar"/>
|
||||
<exclude name="junit-*.jar" />
|
||||
<exclude name="randomizedtesting-runner-*.jar" />
|
||||
<exclude name="ant*.jar" />
|
||||
</fileset>
|
||||
<pathelement path="src/test-files"/>
|
||||
<path refid="test.base.classpath"/>
|
||||
</path>
|
||||
|
||||
<path id="test.classpath" refid="solr.test.base.classpath"/>
|
||||
|
||||
<macrodef name="solr-contrib-uptodate">
|
||||
<attribute name="name"/>
|
||||
<attribute name="property" default="@{name}.uptodate"/>
|
||||
<attribute name="classpath.property" default="@{name}.jar"/>
|
||||
<!-- set jarfile only, if the target jar file has no generic name -->
|
||||
<attribute name="jarfile" default="${common-solr.dir}/build/contrib/solr-@{name}/solr-@{name}-${version}.jar"/>
|
||||
<sequential>
|
||||
<!--<echo message="Checking '@{jarfile}' against source folder '${common.dir}/contrib/@{name}/src/java'"/>-->
|
||||
<property name="@{classpath.property}" location="@{jarfile}"/>
|
||||
<uptodate property="@{property}" targetfile="@{jarfile}">
|
||||
<srcfiles dir="${common-solr.dir}/contrib/@{name}/src/java" includes="**/*.java"/>
|
||||
</uptodate>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<target name="validate" depends="compile-tools">
|
||||
</target>
|
||||
|
||||
<target name="init-dist" depends="resolve-groovy">
|
||||
<mkdir dir="${build.dir}"/>
|
||||
<mkdir dir="${package.dir}"/>
|
||||
<mkdir dir="${dist}"/>
|
||||
<mkdir dir="${maven.dist.dir}"/>
|
||||
</target>
|
||||
|
||||
<target name="prep-lucene-jars"
|
||||
depends="resolve-groovy,
|
||||
jar-lucene-core, jar-backward-codecs, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-analyzers-nori, jar-codecs,jar-expressions, jar-suggest, jar-highlighter, jar-memory,
|
||||
jar-misc, jar-spatial-extras, jar-spatial3d, jar-grouping, jar-queries, jar-queryparser, jar-join, jar-sandbox, jar-classification">
|
||||
<property name="solr.deps.compiled" value="true"/>
|
||||
</target>
|
||||
|
||||
<target name="lucene-jars-to-solr"
|
||||
depends="-lucene-jars-to-solr-not-for-package,-lucene-jars-to-solr-package"/>
|
||||
|
||||
<target name="-lucene-jars-to-solr-not-for-package" unless="called.from.create-package">
|
||||
<sequential>
|
||||
<antcall target="prep-lucene-jars" inheritall="true"/>
|
||||
<property name="solr.deps.compiled" value="true"/>
|
||||
<copy todir="${lucene-libs}" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
|
||||
<path refid="solr.lucene.libs" />
|
||||
<!-- NOTE: lucene-core is not already included in "solr.lucene.libs" because of its use in classpaths. -->
|
||||
<fileset file="${lucene-core.jar}" />
|
||||
</copy>
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="-lucene-jars-to-solr-package" if="called.from.create-package">
|
||||
<sequential>
|
||||
<antcall target="-unpack-lucene-tgz" inheritall="true"/>
|
||||
<pathconvert property="relative.solr.lucene.libs" pathsep=",">
|
||||
<path refid="solr.lucene.libs"/>
|
||||
<fileset file="${lucene-core.jar}"/>
|
||||
<globmapper from="${common.build.dir}/*" to="*" handledirsep="true"/>
|
||||
</pathconvert>
|
||||
<mkdir dir="${lucene-libs}"/>
|
||||
<copy todir="${lucene-libs}" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
|
||||
<fileset dir="${lucene.tgz.unpack.dir}/lucene-${version}" includes="${relative.solr.lucene.libs}"/>
|
||||
</copy>
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
<!-- Shared core/solrj/test-framework/contrib targets -->
|
||||
|
||||
<macrodef name="solr-jarify" description="Builds a Solr JAR file">
|
||||
<attribute name="basedir" default="${build.dir}/classes/java"/>
|
||||
<attribute name="destfile" default="${build.dir}/${final.name}.jar"/>
|
||||
<attribute name="title" default="Apache Solr Search Server: ${ant.project.name}"/>
|
||||
<attribute name="excludes" default="**/pom.xml,**/*.iml"/>
|
||||
<attribute name="metainf.source.dir" default="${common-solr.dir}"/>
|
||||
<attribute name="implementation.title" default="org.apache.solr"/>
|
||||
<attribute name="manifest.file" default="${manifest.file}"/>
|
||||
<element name="solr-jarify-filesets" optional="true"/>
|
||||
<element name="solr-jarify-additional-manifest-attributes" optional="true"/>
|
||||
<sequential>
|
||||
<jarify basedir="@{basedir}" destfile="@{destfile}"
|
||||
title="@{title}" excludes="@{excludes}"
|
||||
metainf.source.dir="@{metainf.source.dir}"
|
||||
implementation.title="@{implementation.title}"
|
||||
manifest.file="@{manifest.file}">
|
||||
<filesets>
|
||||
<solr-jarify-filesets />
|
||||
</filesets>
|
||||
<jarify-additional-manifest-attributes>
|
||||
<solr-jarify-additional-manifest-attributes />
|
||||
</jarify-additional-manifest-attributes>
|
||||
</jarify>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<target name="jar-core" depends="compile-core">
|
||||
<solr-jarify/>
|
||||
</target>
|
||||
|
||||
<target name="compile-core" depends="prep-lucene-jars,resolve-example,resolve-server,common.compile-core"/>
|
||||
<target name="compile-test" depends="compile-solr-test-framework,common.compile-test"/>
|
||||
|
||||
<target name="dist" depends="jar-core">
|
||||
<copy file="${build.dir}/${fullnamever}.jar" todir="${dist}"/>
|
||||
</target>
|
||||
|
||||
<property name="lucenedocs" location="${common.dir}/build/docs"/>
|
||||
|
||||
<!-- dependency to ensure all lucene javadocs are present -->
|
||||
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-nori,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-backward-codecs,javadocs-codecs,javadocs-expressions,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial-extras,javadocs-join,javadocs-test-framework"/>
|
||||
|
||||
<!-- create javadocs for the current module -->
|
||||
<target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs,javadocs-solr-core,check-javadocs-uptodate" unless="javadocs-uptodate-${name}">
|
||||
<sequential>
|
||||
<mkdir dir="${javadoc.dir}/${name}"/>
|
||||
<solr-invoke-javadoc>
|
||||
<solrsources>
|
||||
<packageset dir="${src.dir}"/>
|
||||
</solrsources>
|
||||
<links>
|
||||
<link href="../solr-solrj"/>
|
||||
<link href="../solr-core"/>
|
||||
</links>
|
||||
</solr-invoke-javadoc>
|
||||
<solr-jarify basedir="${javadoc.dir}/${name}" destfile="${build.dir}/${final.name}-javadoc.jar"/>
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="check-solr-core-javadocs-uptodate" unless="solr-core-javadocs.uptodate">
|
||||
<uptodate property="solr-core-javadocs.uptodate" targetfile="${build.dir}/solr-core/solr-core-${version}-javadoc.jar">
|
||||
<srcfiles dir="${common-solr.dir}/core/src/java" includes="**/*.java"/>
|
||||
</uptodate>
|
||||
</target>
|
||||
|
||||
<target name="check-solrj-javadocs-uptodate" unless="solrj-javadocs.uptodate">
|
||||
<uptodate property="solrj-javadocs.uptodate" targetfile="${build.dir}/solr-solrj/solr-solrj-${version}-javadoc.jar">
|
||||
<srcfiles dir="${common-solr.dir}/solrj/src/java" includes="**/*.java"/>
|
||||
</uptodate>
|
||||
</target>
|
||||
|
||||
<target name="javadocs-solr-core" depends="check-solr-core-javadocs-uptodate" unless="solr-core-javadocs.uptodate">
|
||||
<ant dir="${common-solr.dir}/core" target="javadocs" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="solr-core-javadocs.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<target name="javadocs-solrj" depends="check-solrj-javadocs-uptodate" unless="solrj-javadocs.uptodate">
|
||||
<ant dir="${common-solr.dir}/solrj" target="javadocs" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="solrj-javadocs.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<!-- macro to create solr javadocs with links to lucene. make sure calling task depends on lucene-javadocs -->
|
||||
<macrodef name="solr-invoke-javadoc">
|
||||
<element name="solrsources" optional="yes"/>
|
||||
<element name="links" optional="yes"/>
|
||||
<attribute name="destdir" default="${javadoc.dir}/${name}"/>
|
||||
<attribute name="title" default="${Name} ${version} ${name} API"/>
|
||||
<attribute name="overview" default="${src.dir}/overview.html"/>
|
||||
<sequential>
|
||||
<mkdir dir="@{destdir}"/>
|
||||
<invoke-javadoc destdir="@{destdir}" title="@{title}" overview="@{overview}">
|
||||
<sources>
|
||||
<solrsources/>
|
||||
<link offline="true" href="${lucene.javadoc.url}core" packagelistloc="${lucenedocs}/core"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}analyzers-common" packagelistloc="${lucenedocs}/analyzers-common"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}analyzers-icu" packagelistloc="${lucenedocs}/analyzers-icu"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}analyzers-kuromoji" packagelistloc="${lucenedocs}/analyzers-kuromoji"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}analyzers-nori" packagelistloc="${lucenedocs}/analyzers-nori"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}analyzers-morfologik" packagelistloc="${lucenedocs}/analyzers-morfologik"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}analyzers-phonetic" packagelistloc="${lucenedocs}/analyzers-phonetic"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}analyzers-smartcn" packagelistloc="${lucenedocs}/analyzers-smartcn"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}analyzers-stempel" packagelistloc="${lucenedocs}/analyzers-stempel"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}backward-codecs" packagelistloc="${lucenedocs}/backward-codecs"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}codecs" packagelistloc="${lucenedocs}/codecs"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}expressions" packagelistloc="${lucenedocs}/expressions"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}suggest" packagelistloc="${lucenedocs}/suggest"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}grouping" packagelistloc="${lucenedocs}/grouping"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}join" packagelistloc="${lucenedocs}/join"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}queries" packagelistloc="${lucenedocs}/queries"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}queryparser" packagelistloc="${lucenedocs}/queryparser"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}highlighter" packagelistloc="${lucenedocs}/highlighter"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}memory" packagelistloc="${lucenedocs}/memory"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}misc" packagelistloc="${lucenedocs}/misc"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}classification" packagelistloc="${lucenedocs}/classification"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}spatial-extras" packagelistloc="${lucenedocs}/spatial-extras"/>
|
||||
<links/>
|
||||
<link href=""/>
|
||||
</sources>
|
||||
</invoke-javadoc>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<target name="define-lucene-javadoc-url" depends="resolve-groovy" unless="lucene.javadoc.url">
|
||||
<property name="useLocalJavadocUrl" value=""/>
|
||||
<groovy><![CDATA[
|
||||
String url, version = properties['version'];
|
||||
String useLocalJavadocUrl = properties['useLocalJavadocUrl'];
|
||||
if (version != properties['version.base'] || Boolean.parseBoolean(useLocalJavadocUrl)) {
|
||||
url = new File(properties['common.dir'], 'build' + File.separator + 'docs').toURI().toASCIIString();
|
||||
if (!(url =~ /\/$/)) url += '/';
|
||||
} else {
|
||||
version = version.replace('.', '_');
|
||||
url = 'https://lucene.apache.org/core/' + version + '/';
|
||||
}
|
||||
task.log('Using the following URL to refer to Lucene Javadocs: ' + url);
|
||||
properties['lucene.javadoc.url'] = url;
|
||||
]]></groovy>
|
||||
</target>
|
||||
|
||||
<target name="define-solr-javadoc-url" depends="resolve-groovy" unless="solr.javadoc.url">
|
||||
<groovy><![CDATA[
|
||||
String url, version = properties['version'];
|
||||
if (version != properties['version.base']) {
|
||||
url = '';
|
||||
task.log('Disabled Solr Javadocs online URL for packaging (custom build / SNAPSHOT version).');
|
||||
} else {
|
||||
version = version.replace('.', '_');
|
||||
url = 'https://lucene.apache.org/solr/' + version + '/';
|
||||
task.log('Using the following URL to refer to Solr Javadocs: ' + url);
|
||||
}
|
||||
properties['solr.javadoc.url'] = url;
|
||||
]]></groovy>
|
||||
</target>
|
||||
|
||||
<target name="jar-src">
|
||||
<sequential>
|
||||
<mkdir dir="${build.dir}"/>
|
||||
<solr-jarify basedir="${src.dir}" destfile="${build.dir}/${final.name}-src.jar">
|
||||
<solr-jarify-filesets>
|
||||
<fileset dir="${resources.dir}" erroronmissingdir="no"/>
|
||||
</solr-jarify-filesets>
|
||||
</solr-jarify>
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="-validate-maven-dependencies" depends="-validate-maven-dependencies.init">
|
||||
<m2-validate-dependencies pom.xml="${maven.pom.xml}" licenseDirectory="${license.dir}">
|
||||
<additional-filters>
|
||||
<replaceregex pattern="jetty([^/]+)$" replace="jetty" flags="gi" />
|
||||
<replaceregex pattern="slf4j-([^/]+)$" replace="slf4j" flags="gi" />
|
||||
<replaceregex pattern="(bcmail|bcprov)-([^/]+)$" replace="\1" flags="gi" />
|
||||
</additional-filters>
|
||||
<excludes>
|
||||
<rsel:or>
|
||||
<rsel:name name="**/lucene-*-${maven.version.glob}.jar" handledirsep="true"/>
|
||||
<rsel:name name="**/solr-*-${maven.version.glob}.jar" handledirsep="true"/>
|
||||
<!-- TODO: figure out what is going on here with servlet-apis -->
|
||||
<rsel:name name="**/*servlet*.jar" handledirsep="true"/>
|
||||
</rsel:or>
|
||||
</excludes>
|
||||
</m2-validate-dependencies>
|
||||
</target>
|
||||
|
||||
<!-- Solr core targets -->
|
||||
<target name="compile-solr-core" description="Compile Solr core." unless="solr.core.compiled">
|
||||
<ant dir="${common-solr.dir}/core" target="compile-core" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="solr.core.compiled" value="true"/>
|
||||
</target>
|
||||
<target name="compile-test-solr-core" description="Compile solr core tests">
|
||||
<ant dir="${common-solr.dir}/core" target="compile-test" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="solr.core.compiled" value="true"/>
|
||||
</target>
|
||||
<target name="dist-core" depends="init-dist"
|
||||
description="Creates the Solr JAR Distribution file.">
|
||||
<ant dir="${common-solr.dir}/core" target="dist" inheritall="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
</target>
|
||||
|
||||
<!-- Solrj targets -->
|
||||
<target name="compile-solrj" description="Compile the java client." unless="solrj.compiled">
|
||||
<ant dir="${common-solr.dir}/solrj" target="compile-core" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="solrj.compiled" value="true"/>
|
||||
</target>
|
||||
<target name="compile-test-solrj" description="Compile java client tests">
|
||||
<ant dir="${common-solr.dir}/solrj" target="compile-test" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="solrj.compiled" value="true"/>
|
||||
</target>
|
||||
<target name="dist-solrj" depends="init-dist"
|
||||
description="Creates the Solr-J JAR Distribution file.">
|
||||
<ant dir="${common-solr.dir}/solrj" target="dist" inheritall="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
</target>
|
||||
<target name="jar-solrj" description="Jar Solr-J">
|
||||
<ant dir="${common-solr.dir}/solrj" target="jar-core" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
</target>
|
||||
|
||||
<!-- Solr test-framework targets -->
|
||||
<target name="compile-solr-test-framework" description="Compile the Solr test-framework" unless="solr.test.framework.compiled">
|
||||
<ant dir="${common-solr.dir}/test-framework" target="compile-core" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="solr.core.compiled" value="true"/>
|
||||
<property name="solr.test.framework.compiled" value="true"/>
|
||||
</target>
|
||||
|
||||
<target name="jar-solr-test-framework" depends="compile-solr-test-framework">
|
||||
<ant dir="${common-solr.dir}/test-framework" target="jar-core" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
</target>
|
||||
|
||||
<!-- resolve dependencies in the example (relied upon by compile/tests) -->
|
||||
<target name="resolve-example" unless="example.libs.uptodate">
|
||||
<property name="example.libs.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<!-- resolve dependencies in the server directory (relied upon by compile/tests) -->
|
||||
<target name="resolve-server" unless="server.libs.uptodate">
|
||||
<ant dir="${common-solr.dir}/server" target="resolve" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="server.libs.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<macrodef name="contrib-crawl">
|
||||
<attribute name="target" default=""/>
|
||||
<attribute name="failonerror" default="true"/>
|
||||
<sequential>
|
||||
<subant target="@{target}" failonerror="@{failonerror}" inheritall="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
<fileset dir="." includes="contrib/*/build.xml"/>
|
||||
</subant>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<target name="-compile-test-lucene-analysis">
|
||||
<ant dir="${common.dir}/analysis" target="compile-test" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
</target>
|
||||
|
||||
<target name="-compile-test-lucene-queryparser">
|
||||
<ant dir="${common.dir}/queryparser" target="compile-test" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
</target>
|
||||
|
||||
<target name="-compile-test-lucene-backward-codecs">
|
||||
<ant dir="${common.dir}/backward-codecs" target="compile-test" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
</target>
|
||||
|
||||
<!-- Solr contrib targets -->
|
||||
<target name="-compile-analysis-extras">
|
||||
<ant dir="${common-solr.dir}/contrib/analysis-extras" target="compile" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
</target>
|
||||
|
||||
<target name="compile-contrib" description="Compile contrib modules">
|
||||
<contrib-crawl target="compile-core"/>
|
||||
</target>
|
||||
|
||||
<target name="compile-test-contrib" description="Compile contrib modules' tests">
|
||||
<contrib-crawl target="compile-test"/>
|
||||
</target>
|
||||
|
||||
<target name="javadocs-contrib" description="Compile contrib modules">
|
||||
<contrib-crawl target="javadocs"/>
|
||||
</target>
|
||||
|
||||
<target name="jar-contrib" description="Jar contrib modules">
|
||||
<contrib-crawl target="jar-core"/>
|
||||
</target>
|
||||
|
||||
<target name="contribs-add-to-webapp">
|
||||
<mkdir dir="${dest}/web"/>
|
||||
<delete dir="${dest}/web" includes="**/*" failonerror="false"/>
|
||||
<contrib-crawl target="add-to-webapp"/>
|
||||
</target>
|
||||
|
||||
<!-- Forbidden API Task, customizations for Solr -->
|
||||
<target name="-check-forbidden-all" depends="-init-forbidden-apis,compile-core,compile-test">
|
||||
<property prefix="ivyversions" file="${common.dir}/ivy-versions.properties"/><!-- for commons-io version -->
|
||||
<forbidden-apis suppressAnnotation="**.SuppressForbidden" classpathref="forbidden-apis.allclasses.classpath" targetVersion="${javac.release}">
|
||||
<signatures>
|
||||
<bundled name="jdk-unsafe"/>
|
||||
<bundled name="jdk-deprecated"/>
|
||||
<bundled name="jdk-non-portable"/>
|
||||
<bundled name="jdk-reflection"/>
|
||||
<bundled name="commons-io-unsafe-${ivyversions./commons-io/commons-io}"/>
|
||||
<fileset dir="${common.dir}/tools/forbiddenApis">
|
||||
<include name="base.txt" />
|
||||
<include name="servlet-api.txt" />
|
||||
<include name="solr.txt" />
|
||||
</fileset>
|
||||
</signatures>
|
||||
<fileset dir="${build.dir}/classes/java" excludes="${forbidden-base-excludes}"/>
|
||||
<fileset dir="${build.dir}/classes/test" excludes="${forbidden-tests-excludes}" erroronmissingdir="false"/>
|
||||
</forbidden-apis>
|
||||
</target>
|
||||
|
||||
|
||||
<!-- hack for now to disable *all* Solr tests on Jenkins when "tests.disable-solr" property is set -->
|
||||
<target name="test" unless="tests.disable-solr">
|
||||
<antcall target="common.test" inheritrefs="true" inheritall="true"/>
|
||||
</target>
|
||||
</project>
|
|
@ -1,33 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
apply plugin: 'java-library'
|
||||
|
||||
description = 'Data Import Handler Extras'
|
||||
|
||||
dependencies {
|
||||
implementation project(':solr:core')
|
||||
|
||||
implementation project(':solr:contrib:dataimporthandler')
|
||||
implementation project(':solr:contrib:extraction')
|
||||
|
||||
implementation ('javax.activation:activation')
|
||||
implementation ('com.sun.mail:javax.mail')
|
||||
implementation ('com.sun.mail:gimap')
|
||||
|
||||
testImplementation project(':solr:test-framework')
|
||||
}
|
|
@ -1,901 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import com.sun.mail.imap.IMAPMessage;
|
||||
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
|
||||
import org.apache.solr.util.RTimer;
|
||||
import org.apache.tika.Tika;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.mail.*;
|
||||
import javax.mail.internet.AddressException;
|
||||
import javax.mail.internet.ContentType;
|
||||
import javax.mail.internet.InternetAddress;
|
||||
import javax.mail.internet.MimeMessage;
|
||||
import javax.mail.search.*;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.*;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import com.sun.mail.gimap.GmailFolder;
|
||||
import com.sun.mail.gimap.GmailRawSearchTerm;
|
||||
|
||||
/**
|
||||
* An EntityProcessor instance which can index emails along with their
|
||||
* attachments from POP3 or IMAP sources. Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler"
|
||||
* >http://wiki.apache.org/solr/DataImportHandler</a> for more details. <b>This
|
||||
* API is experimental and subject to change</b>
|
||||
*
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class MailEntityProcessor extends EntityProcessorBase {
|
||||
|
||||
private static final SimpleDateFormat sinceDateParser =
|
||||
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT);
|
||||
private static final SimpleDateFormat afterFmt =
|
||||
new SimpleDateFormat("yyyy/MM/dd", Locale.ROOT);
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
public static interface CustomFilter {
|
||||
public SearchTerm getCustomSearch(Folder folder);
|
||||
}
|
||||
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
// set attributes using XXX getXXXFromContext(attribute, defaultValue);
|
||||
// applies variable resolver and return default if value is not found or null
|
||||
// REQUIRED : connection and folder info
|
||||
user = getStringFromContext("user", null);
|
||||
password = getStringFromContext("password", null);
|
||||
host = getStringFromContext("host", null);
|
||||
protocol = getStringFromContext("protocol", null);
|
||||
folderNames = getStringFromContext("folders", null);
|
||||
// validate
|
||||
if (host == null || protocol == null || user == null || password == null
|
||||
|| folderNames == null) throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"'user|password|protocol|host|folders' are required attributes");
|
||||
|
||||
// OPTIONAL : have defaults and are optional
|
||||
recurse = getBoolFromContext("recurse", true);
|
||||
|
||||
exclude.clear();
|
||||
String excludes = getStringFromContext("exclude", "");
|
||||
if (excludes != null && !excludes.trim().equals("")) {
|
||||
exclude = Arrays.asList(excludes.split(","));
|
||||
}
|
||||
|
||||
include.clear();
|
||||
String includes = getStringFromContext("include", "");
|
||||
if (includes != null && !includes.trim().equals("")) {
|
||||
include = Arrays.asList(includes.split(","));
|
||||
}
|
||||
batchSize = getIntFromContext("batchSize", 20);
|
||||
customFilter = getStringFromContext("customFilter", "");
|
||||
if (filters != null) filters.clear();
|
||||
folderIter = null;
|
||||
msgIter = null;
|
||||
|
||||
String lastIndexTime = null;
|
||||
String command =
|
||||
String.valueOf(context.getRequestParameters().get("command"));
|
||||
if (!DataImporter.FULL_IMPORT_CMD.equals(command))
|
||||
throw new IllegalArgumentException(this.getClass().getSimpleName()+
|
||||
" only supports "+DataImporter.FULL_IMPORT_CMD);
|
||||
|
||||
// Read the last_index_time out of the dataimport.properties if available
|
||||
String cname = getStringFromContext("name", "mailimporter");
|
||||
String varName = ConfigNameConstants.IMPORTER_NS_SHORT + "." + cname + "."
|
||||
+ DocBuilder.LAST_INDEX_TIME;
|
||||
Object varValue = context.getVariableResolver().resolve(varName);
|
||||
log.info("{}={}", varName, varValue);
|
||||
|
||||
if (varValue != null && !"".equals(varValue) &&
|
||||
!"".equals(getStringFromContext("fetchMailsSince", ""))) {
|
||||
|
||||
// need to check if varValue is the epoch, which we'll take to mean the
|
||||
// initial value, in which case means we should use fetchMailsSince instead
|
||||
Date tmp = null;
|
||||
try {
|
||||
tmp = sinceDateParser.parse((String)varValue);
|
||||
if (tmp.getTime() == 0) {
|
||||
log.info("Ignoring initial value {} for {} in favor of fetchMailsSince config parameter"
|
||||
, varValue, varName);
|
||||
tmp = null; // don't use this value
|
||||
}
|
||||
} catch (ParseException e) {
|
||||
// probably ok to ignore this since we have other options below
|
||||
// as we're just trying to figure out if the date is 0
|
||||
log.warn("Failed to parse {} from {} due to", varValue, varName, e);
|
||||
}
|
||||
|
||||
if (tmp == null) {
|
||||
// favor fetchMailsSince in this case because the value from
|
||||
// dataimport.properties is the default/init value
|
||||
varValue = getStringFromContext("fetchMailsSince", "");
|
||||
log.info("fetchMailsSince={}", varValue);
|
||||
}
|
||||
}
|
||||
|
||||
if (varValue == null || "".equals(varValue)) {
|
||||
varName = ConfigNameConstants.IMPORTER_NS_SHORT + "."
|
||||
+ DocBuilder.LAST_INDEX_TIME;
|
||||
varValue = context.getVariableResolver().resolve(varName);
|
||||
log.info("{}={}", varName, varValue);
|
||||
}
|
||||
|
||||
if (varValue != null && varValue instanceof String) {
|
||||
lastIndexTime = (String)varValue;
|
||||
if (lastIndexTime != null && lastIndexTime.length() == 0)
|
||||
lastIndexTime = null;
|
||||
}
|
||||
|
||||
if (lastIndexTime == null)
|
||||
lastIndexTime = getStringFromContext("fetchMailsSince", "");
|
||||
|
||||
log.info("Using lastIndexTime {} for mail import", lastIndexTime);
|
||||
|
||||
this.fetchMailsSince = null;
|
||||
if (lastIndexTime != null && lastIndexTime.length() > 0) {
|
||||
try {
|
||||
fetchMailsSince = sinceDateParser.parse(lastIndexTime);
|
||||
log.info("Parsed fetchMailsSince={}", lastIndexTime);
|
||||
} catch (ParseException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Invalid value for fetchMailSince: " + lastIndexTime, e);
|
||||
}
|
||||
}
|
||||
|
||||
fetchSize = getIntFromContext("fetchSize", 32 * 1024);
|
||||
cTimeout = getIntFromContext("connectTimeout", 30 * 1000);
|
||||
rTimeout = getIntFromContext("readTimeout", 60 * 1000);
|
||||
|
||||
String tmp = context.getEntityAttribute("includeOtherUserFolders");
|
||||
includeOtherUserFolders = (tmp != null && Boolean.valueOf(tmp.trim()));
|
||||
tmp = context.getEntityAttribute("includeSharedFolders");
|
||||
includeSharedFolders = (tmp != null && Boolean.valueOf(tmp.trim()));
|
||||
|
||||
setProcessAttachmentConfig();
|
||||
includeContent = getBoolFromContext("includeContent", true);
|
||||
|
||||
logConfig();
|
||||
}
|
||||
|
||||
private void setProcessAttachmentConfig() {
|
||||
processAttachment = true;
|
||||
String tbval = context.getEntityAttribute("processAttachments");
|
||||
if (tbval == null) {
|
||||
tbval = context.getEntityAttribute("processAttachement");
|
||||
if (tbval != null) processAttachment = Boolean.valueOf(tbval);
|
||||
} else processAttachment = Boolean.valueOf(tbval);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String,Object> nextRow() {
|
||||
Message mail = null;
|
||||
Map<String,Object> row = null;
|
||||
do {
|
||||
// try till there is a valid document or folders get exhausted.
|
||||
// when mail == NULL, it means end of processing
|
||||
mail = getNextMail();
|
||||
|
||||
if (mail != null)
|
||||
row = getDocumentFromMail(mail);
|
||||
|
||||
if (row != null && row.get("folder") == null)
|
||||
row.put("folder", mail.getFolder().getFullName());
|
||||
|
||||
} while (row == null && mail != null);
|
||||
return row;
|
||||
}
|
||||
|
||||
private Message getNextMail() {
|
||||
if (!connected) {
|
||||
// this is needed to load the activation mail stuff correctly
|
||||
// otherwise, the JavaMail multipart support doesn't get configured
|
||||
// correctly, which leads to a class cast exception when processing
|
||||
// multipart messages: IMAPInputStream cannot be cast to
|
||||
// javax.mail.Multipart
|
||||
if (false == withContextClassLoader(getClass().getClassLoader(), this::connectToMailBox)) {
|
||||
return null;
|
||||
}
|
||||
connected = true;
|
||||
}
|
||||
if (folderIter == null) {
|
||||
createFilters();
|
||||
folderIter = new FolderIterator(mailbox);
|
||||
}
|
||||
// get next message from the folder
|
||||
// if folder is exhausted get next folder
|
||||
// loop till a valid mail or all folders exhausted.
|
||||
while (msgIter == null || !msgIter.hasNext()) {
|
||||
Folder next = folderIter.hasNext() ? folderIter.next() : null;
|
||||
if (next == null) return null;
|
||||
|
||||
msgIter = new MessageIterator(next, batchSize);
|
||||
}
|
||||
return msgIter.next();
|
||||
}
|
||||
|
||||
private Map<String,Object> getDocumentFromMail(Message mail) {
|
||||
Map<String,Object> row = new HashMap<>();
|
||||
try {
|
||||
addPartToDocument(mail, row, true);
|
||||
return row;
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to convert message [{}] to document due to: {}"
|
||||
, mail, e, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
public void addPartToDocument(Part part, Map<String,Object> row, boolean outerMost) throws Exception {
|
||||
if (part instanceof Message) {
|
||||
addEnvelopeToDocument(part, row);
|
||||
}
|
||||
|
||||
String ct = part.getContentType().toLowerCase(Locale.ROOT);
|
||||
ContentType ctype = new ContentType(ct);
|
||||
if (part.isMimeType("multipart/*")) {
|
||||
Object content = part.getContent();
|
||||
if (content != null && content instanceof Multipart) {
|
||||
Multipart mp = (Multipart) part.getContent();
|
||||
int count = mp.getCount();
|
||||
if (part.isMimeType("multipart/alternative")) count = 1;
|
||||
for (int i = 0; i < count; i++)
|
||||
addPartToDocument(mp.getBodyPart(i), row, false);
|
||||
} else {
|
||||
log.warn("Multipart content is a not an instance of Multipart! Content is: {}"
|
||||
+ ". Typically, this is due to the Java Activation JAR being loaded by the wrong classloader."
|
||||
, (content != null ? content.getClass().getName() : "null"));
|
||||
}
|
||||
} else if (part.isMimeType("message/rfc822")) {
|
||||
addPartToDocument((Part) part.getContent(), row, false);
|
||||
} else {
|
||||
String disp = part.getDisposition();
|
||||
if (includeContent
|
||||
&& !(disp != null && disp.equalsIgnoreCase(Part.ATTACHMENT))) {
|
||||
InputStream is = part.getInputStream();
|
||||
Metadata contentTypeHint = new Metadata();
|
||||
contentTypeHint.set(Metadata.CONTENT_TYPE, ctype.getBaseType()
|
||||
.toLowerCase(Locale.ENGLISH));
|
||||
String content = (new Tika()).parseToString(is, contentTypeHint);
|
||||
if (row.get(CONTENT) == null) row.put(CONTENT, new ArrayList<String>());
|
||||
List<String> contents = (List<String>) row.get(CONTENT);
|
||||
contents.add(content.trim());
|
||||
row.put(CONTENT, contents);
|
||||
}
|
||||
if (!processAttachment || disp == null
|
||||
|| !disp.equalsIgnoreCase(Part.ATTACHMENT)) return;
|
||||
InputStream is = part.getInputStream();
|
||||
String fileName = part.getFileName();
|
||||
Metadata contentTypeHint = new Metadata();
|
||||
contentTypeHint.set(Metadata.CONTENT_TYPE, ctype.getBaseType()
|
||||
.toLowerCase(Locale.ENGLISH));
|
||||
String content = (new Tika()).parseToString(is, contentTypeHint);
|
||||
if (content == null || content.trim().length() == 0) return;
|
||||
|
||||
if (row.get(ATTACHMENT) == null) row.put(ATTACHMENT,
|
||||
new ArrayList<String>());
|
||||
List<String> contents = (List<String>) row.get(ATTACHMENT);
|
||||
contents.add(content.trim());
|
||||
row.put(ATTACHMENT, contents);
|
||||
if (row.get(ATTACHMENT_NAMES) == null) row.put(ATTACHMENT_NAMES,
|
||||
new ArrayList<String>());
|
||||
List<String> names = (List<String>) row.get(ATTACHMENT_NAMES);
|
||||
names.add(fileName);
|
||||
row.put(ATTACHMENT_NAMES, names);
|
||||
}
|
||||
}
|
||||
|
||||
private void addEnvelopeToDocument(Part part, Map<String,Object> row)
|
||||
throws MessagingException {
|
||||
MimeMessage mail = (MimeMessage) part;
|
||||
Address[] adresses;
|
||||
if ((adresses = mail.getFrom()) != null && adresses.length > 0) row.put(
|
||||
FROM, adresses[0].toString());
|
||||
|
||||
List<String> to = new ArrayList<>();
|
||||
if ((adresses = mail.getRecipients(Message.RecipientType.TO)) != null) addAddressToList(
|
||||
adresses, to);
|
||||
if ((adresses = mail.getRecipients(Message.RecipientType.CC)) != null) addAddressToList(
|
||||
adresses, to);
|
||||
if ((adresses = mail.getRecipients(Message.RecipientType.BCC)) != null) addAddressToList(
|
||||
adresses, to);
|
||||
if (to.size() > 0) row.put(TO_CC_BCC, to);
|
||||
|
||||
row.put(MESSAGE_ID, mail.getMessageID());
|
||||
row.put(SUBJECT, mail.getSubject());
|
||||
|
||||
Date d = mail.getSentDate();
|
||||
if (d != null) {
|
||||
row.put(SENT_DATE, d);
|
||||
}
|
||||
|
||||
List<String> flags = new ArrayList<>();
|
||||
for (Flags.Flag flag : mail.getFlags().getSystemFlags()) {
|
||||
if (flag == Flags.Flag.ANSWERED) flags.add(FLAG_ANSWERED);
|
||||
else if (flag == Flags.Flag.DELETED) flags.add(FLAG_DELETED);
|
||||
else if (flag == Flags.Flag.DRAFT) flags.add(FLAG_DRAFT);
|
||||
else if (flag == Flags.Flag.FLAGGED) flags.add(FLAG_FLAGGED);
|
||||
else if (flag == Flags.Flag.RECENT) flags.add(FLAG_RECENT);
|
||||
else if (flag == Flags.Flag.SEEN) flags.add(FLAG_SEEN);
|
||||
}
|
||||
flags.addAll(Arrays.asList(mail.getFlags().getUserFlags()));
|
||||
if (flags.size() == 0) flags.add(FLAG_NONE);
|
||||
row.put(FLAGS, flags);
|
||||
|
||||
String[] hdrs = mail.getHeader("X-Mailer");
|
||||
if (hdrs != null) row.put(XMAILER, hdrs[0]);
|
||||
}
|
||||
|
||||
private void addAddressToList(Address[] adresses, List<String> to)
|
||||
throws AddressException {
|
||||
for (Address address : adresses) {
|
||||
to.add(address.toString());
|
||||
InternetAddress ia = (InternetAddress) address;
|
||||
if (ia.isGroup()) {
|
||||
InternetAddress[] group = ia.getGroup(false);
|
||||
for (InternetAddress member : group)
|
||||
to.add(member.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean connectToMailBox() {
|
||||
try {
|
||||
Properties props = new Properties();
|
||||
if (System.getProperty("mail.debug") != null)
|
||||
props.setProperty("mail.debug", System.getProperty("mail.debug"));
|
||||
|
||||
if (("imap".equals(protocol) || "imaps".equals(protocol))
|
||||
&& "imap.gmail.com".equals(host)) {
|
||||
log.info("Consider using 'gimaps' protocol instead of '{}' for enabling GMail specific extensions for {}"
|
||||
, protocol, host);
|
||||
}
|
||||
|
||||
props.setProperty("mail.store.protocol", protocol);
|
||||
|
||||
String imapPropPrefix = protocol.startsWith("gimap") ? "gimap" : "imap";
|
||||
props.setProperty("mail." + imapPropPrefix + ".fetchsize", "" + fetchSize);
|
||||
props.setProperty("mail." + imapPropPrefix + ".timeout", "" + rTimeout);
|
||||
props.setProperty("mail." + imapPropPrefix + ".connectiontimeout", "" + cTimeout);
|
||||
|
||||
int port = -1;
|
||||
int colonAt = host.indexOf(":");
|
||||
if (colonAt != -1) {
|
||||
port = Integer.parseInt(host.substring(colonAt + 1));
|
||||
host = host.substring(0, colonAt);
|
||||
}
|
||||
|
||||
Session session = Session.getDefaultInstance(props, null);
|
||||
mailbox = session.getStore(protocol);
|
||||
if (port != -1) {
|
||||
mailbox.connect(host, port, user, password);
|
||||
} else {
|
||||
mailbox.connect(host, user, password);
|
||||
}
|
||||
log.info("Connected to {}'s mailbox on {}", user, host);
|
||||
|
||||
return true;
|
||||
} catch (MessagingException e) {
|
||||
String errMsg = String.format(Locale.ENGLISH,
|
||||
"Failed to connect to %s server %s as user %s due to: %s", protocol,
|
||||
host, user, e.toString());
|
||||
log.error(errMsg, e);
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
errMsg, e);
|
||||
}
|
||||
}
|
||||
|
||||
private void createFilters() {
|
||||
if (fetchMailsSince != null) {
|
||||
filters.add(new MailsSinceLastCheckFilter(fetchMailsSince));
|
||||
}
|
||||
if (customFilter != null && !customFilter.equals("")) {
|
||||
try {
|
||||
Class<?> cf = Class.forName(customFilter);
|
||||
Object obj = cf.getConstructor().newInstance();
|
||||
if (obj instanceof CustomFilter) {
|
||||
filters.add((CustomFilter) obj);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Custom filter could not be created", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void logConfig() {
|
||||
if (!log.isInfoEnabled()) return;
|
||||
|
||||
String lineSep = System.getProperty("line.separator");
|
||||
|
||||
StringBuffer config = new StringBuffer();
|
||||
config.append("user : ").append(user).append(lineSep);
|
||||
config
|
||||
.append("pwd : ")
|
||||
.append(
|
||||
password != null && password.length() > 0 ? "<non-null>" : "<null>")
|
||||
.append(lineSep);
|
||||
config.append("protocol : ").append(protocol)
|
||||
.append(lineSep);
|
||||
config.append("host : ").append(host)
|
||||
.append(lineSep);
|
||||
config.append("folders : ").append(folderNames)
|
||||
.append(lineSep);
|
||||
config.append("recurse : ").append(recurse)
|
||||
.append(lineSep);
|
||||
config.append("exclude : ").append(exclude.toString())
|
||||
.append(lineSep);
|
||||
config.append("include : ").append(include.toString())
|
||||
.append(lineSep);
|
||||
config.append("batchSize : ").append(batchSize)
|
||||
.append(lineSep);
|
||||
config.append("fetchSize : ").append(fetchSize)
|
||||
.append(lineSep);
|
||||
config.append("read timeout : ").append(rTimeout)
|
||||
.append(lineSep);
|
||||
config.append("conection timeout : ").append(cTimeout)
|
||||
.append(lineSep);
|
||||
config.append("custom filter : ").append(customFilter)
|
||||
.append(lineSep);
|
||||
config.append("fetch mail since : ").append(fetchMailsSince)
|
||||
.append(lineSep);
|
||||
config.append("includeContent : ").append(includeContent)
|
||||
.append(lineSep);
|
||||
config.append("processAttachments : ").append(processAttachment)
|
||||
.append(lineSep);
|
||||
config.append("includeOtherUserFolders : ").append(includeOtherUserFolders)
|
||||
.append(lineSep);
|
||||
config.append("includeSharedFolders : ").append(includeSharedFolders)
|
||||
.append(lineSep);
|
||||
log.info("{}", config);
|
||||
}
|
||||
|
||||
class FolderIterator implements Iterator<Folder> {
|
||||
private Store mailbox;
|
||||
private List<String> topLevelFolders;
|
||||
private List<Folder> folders = null;
|
||||
private Folder lastFolder = null;
|
||||
|
||||
public FolderIterator(Store mailBox) {
|
||||
this.mailbox = mailBox;
|
||||
folders = new ArrayList<>();
|
||||
getTopLevelFolders(mailBox);
|
||||
if (includeOtherUserFolders) getOtherUserFolders();
|
||||
if (includeSharedFolders) getSharedFolders();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return !folders.isEmpty();
|
||||
}
|
||||
|
||||
public Folder next() {
|
||||
try {
|
||||
boolean hasMessages = false;
|
||||
Folder next;
|
||||
do {
|
||||
if (lastFolder != null) {
|
||||
lastFolder.close(false);
|
||||
lastFolder = null;
|
||||
}
|
||||
if (folders.isEmpty()) {
|
||||
mailbox.close();
|
||||
return null;
|
||||
}
|
||||
next = folders.remove(0);
|
||||
if (next != null) {
|
||||
String fullName = next.getFullName();
|
||||
if (!excludeFolder(fullName)) {
|
||||
hasMessages = (next.getType() & Folder.HOLDS_MESSAGES) != 0;
|
||||
next.open(Folder.READ_ONLY);
|
||||
lastFolder = next;
|
||||
log.info("Opened folder : {}", fullName);
|
||||
}
|
||||
if (recurse && ((next.getType() & Folder.HOLDS_FOLDERS) != 0)) {
|
||||
Folder[] children = next.list();
|
||||
log.info("Added its children to list : ");
|
||||
for (int i = children.length - 1; i >= 0; i--) {
|
||||
folders.add(0, children[i]);
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("child name : {}", children[i].getFullName());
|
||||
}
|
||||
}
|
||||
if (children.length == 0) log.info("NO children : ");
|
||||
}
|
||||
}
|
||||
} while (!hasMessages);
|
||||
return next;
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to read folders due to: {}", e);
|
||||
// throw new
|
||||
// DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
// "Folder open failed", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("It's read only mode...");
|
||||
}
|
||||
|
||||
private void getTopLevelFolders(Store mailBox) {
|
||||
if (folderNames != null) topLevelFolders = Arrays.asList(folderNames
|
||||
.split(","));
|
||||
for (int i = 0; topLevelFolders != null && i < topLevelFolders.size(); i++) {
|
||||
try {
|
||||
folders.add(mailbox.getFolder(topLevelFolders.get(i)));
|
||||
} catch (MessagingException e) {
|
||||
// skip bad ones unless it's the last one and still no good folder
|
||||
if (folders.size() == 0 && i == topLevelFolders.size() - 1) throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE, "Folder retreival failed");
|
||||
}
|
||||
}
|
||||
if (topLevelFolders == null || topLevelFolders.size() == 0) {
|
||||
try {
|
||||
folders.add(mailBox.getDefaultFolder());
|
||||
} catch (MessagingException e) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE, "Folder retreival failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void getOtherUserFolders() {
|
||||
try {
|
||||
Folder[] ufldrs = mailbox.getUserNamespaces(null);
|
||||
if (ufldrs != null) {
|
||||
log.info("Found {} user namespace folders", ufldrs.length);
|
||||
for (Folder ufldr : ufldrs)
|
||||
folders.add(ufldr);
|
||||
}
|
||||
} catch (MessagingException me) {
|
||||
log.warn("Messaging exception retrieving user namespaces: ", me);
|
||||
}
|
||||
}
|
||||
|
||||
private void getSharedFolders() {
|
||||
try {
|
||||
Folder[] sfldrs = mailbox.getSharedNamespaces();
|
||||
if (sfldrs != null) {
|
||||
log.info("Found {} shared namespace folders", sfldrs.length);
|
||||
for (Folder sfldr : sfldrs)
|
||||
folders.add(sfldr);
|
||||
}
|
||||
} catch (MessagingException me) {
|
||||
log.warn("Messaging exception retrieving shared namespaces: ", me);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean excludeFolder(String name) {
|
||||
for (String s : exclude) {
|
||||
if (name.matches(s)) return true;
|
||||
}
|
||||
for (String s : include) {
|
||||
if (name.matches(s)) return false;
|
||||
}
|
||||
return include.size() > 0;
|
||||
}
|
||||
}
|
||||
|
||||
class MessageIterator extends SearchTerm implements Iterator<Message> {
|
||||
private Folder folder;
|
||||
private Message[] messagesInCurBatch = null;
|
||||
private int current = 0;
|
||||
private int currentBatch = 0;
|
||||
private int batchSize = 0;
|
||||
private int totalInFolder = 0;
|
||||
private boolean doBatching = true;
|
||||
|
||||
public MessageIterator(Folder folder, int batchSize) {
|
||||
super();
|
||||
|
||||
try {
|
||||
this.folder = folder;
|
||||
this.batchSize = batchSize;
|
||||
SearchTerm st = getSearchTerm();
|
||||
|
||||
log.info("SearchTerm={}", st);
|
||||
|
||||
if (st != null || folder instanceof GmailFolder) {
|
||||
doBatching = false;
|
||||
// Searching can still take a while even though we're only pulling
|
||||
// envelopes; unless you're using gmail server-side filter, which is
|
||||
// fast
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Searching folder {} for messages", folder.getName());
|
||||
}
|
||||
final RTimer searchTimer = new RTimer();
|
||||
|
||||
// If using GMail, speed up the envelope processing by doing a
|
||||
// server-side
|
||||
// search for messages occurring on or after the fetch date (at
|
||||
// midnight),
|
||||
// which reduces the number of envelopes we need to pull from the
|
||||
// server
|
||||
// to apply the precise DateTerm filter; GMail server-side search has
|
||||
// date
|
||||
// granularity only but the local filters are also applied
|
||||
|
||||
if (folder instanceof GmailFolder && fetchMailsSince != null) {
|
||||
String afterCrit = "after:" + afterFmt.format(fetchMailsSince);
|
||||
log.info("Added server-side gmail filter: {}", afterCrit);
|
||||
Message[] afterMessages = folder.search(new GmailRawSearchTerm(
|
||||
afterCrit));
|
||||
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("GMail server-side filter found {} messages received {} in folder {}"
|
||||
, afterMessages.length, afterCrit, folder.getName());
|
||||
}
|
||||
|
||||
// now pass in the server-side filtered messages to the local filter
|
||||
messagesInCurBatch = folder.search((st != null ? st : this), afterMessages);
|
||||
} else {
|
||||
messagesInCurBatch = folder.search(st);
|
||||
}
|
||||
totalInFolder = messagesInCurBatch.length;
|
||||
folder.fetch(messagesInCurBatch, fp);
|
||||
current = 0;
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Total messages : {}", totalInFolder);
|
||||
log.info("Search criteria applied. Batching disabled. Took {} (ms)", searchTimer.getTime()); // logOk
|
||||
}
|
||||
} else {
|
||||
totalInFolder = folder.getMessageCount();
|
||||
log.info("Total messages : {}", totalInFolder);
|
||||
getNextBatch(batchSize, folder);
|
||||
}
|
||||
} catch (MessagingException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Message retreival failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void getNextBatch(int batchSize, Folder folder)
|
||||
throws MessagingException {
|
||||
// after each batch invalidate cache
|
||||
if (messagesInCurBatch != null) {
|
||||
for (Message m : messagesInCurBatch) {
|
||||
if (m instanceof IMAPMessage) ((IMAPMessage) m).invalidateHeaders();
|
||||
}
|
||||
}
|
||||
int lastMsg = (currentBatch + 1) * batchSize;
|
||||
lastMsg = lastMsg > totalInFolder ? totalInFolder : lastMsg;
|
||||
messagesInCurBatch = folder.getMessages(currentBatch * batchSize + 1,
|
||||
lastMsg);
|
||||
folder.fetch(messagesInCurBatch, fp);
|
||||
current = 0;
|
||||
currentBatch++;
|
||||
log.info("Current Batch : {}", currentBatch);
|
||||
log.info("Messages in this batch : {}", messagesInCurBatch.length);
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
boolean hasMore = current < messagesInCurBatch.length;
|
||||
if (!hasMore && doBatching && currentBatch * batchSize < totalInFolder) {
|
||||
// try next batch
|
||||
try {
|
||||
getNextBatch(batchSize, folder);
|
||||
hasMore = current < messagesInCurBatch.length;
|
||||
} catch (MessagingException e) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE, "Message retreival failed", e);
|
||||
}
|
||||
}
|
||||
return hasMore;
|
||||
}
|
||||
|
||||
public Message next() {
|
||||
return hasNext() ? messagesInCurBatch[current++] : null;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("It's read only mode...");
|
||||
}
|
||||
|
||||
private SearchTerm getSearchTerm() {
|
||||
if (filters.size() == 0) return null;
|
||||
if (filters.size() == 1) return filters.get(0).getCustomSearch(folder);
|
||||
SearchTerm last = filters.get(0).getCustomSearch(folder);
|
||||
for (int i = 1; i < filters.size(); i++) {
|
||||
CustomFilter filter = filters.get(i);
|
||||
SearchTerm st = filter.getCustomSearch(folder);
|
||||
if (st != null) {
|
||||
last = new AndTerm(last, st);
|
||||
}
|
||||
}
|
||||
return last;
|
||||
}
|
||||
|
||||
public boolean match(Message message) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static class MailsSinceLastCheckFilter implements CustomFilter {
|
||||
|
||||
private Date since;
|
||||
|
||||
public MailsSinceLastCheckFilter(Date date) {
|
||||
since = date;
|
||||
}
|
||||
|
||||
@SuppressWarnings("serial")
|
||||
public SearchTerm getCustomSearch(final Folder folder) {
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Building mail filter for messages in {} that occur after {}"
|
||||
, folder.getName(), sinceDateParser.format(since));
|
||||
}
|
||||
return new DateTerm(ComparisonTerm.GE, since) {
|
||||
private int matched = 0;
|
||||
private int seen = 0;
|
||||
|
||||
@Override
|
||||
public boolean match(Message msg) {
|
||||
boolean isMatch = false;
|
||||
++seen;
|
||||
try {
|
||||
Date msgDate = msg.getReceivedDate();
|
||||
if (msgDate == null) msgDate = msg.getSentDate();
|
||||
|
||||
if (msgDate != null && msgDate.getTime() >= since.getTime()) {
|
||||
++matched;
|
||||
isMatch = true;
|
||||
} else {
|
||||
String msgDateStr = (msgDate != null) ? sinceDateParser.format(msgDate) : "null";
|
||||
String sinceDateStr = (since != null) ? sinceDateParser.format(since) : "null";
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("Message {} was received at [{}], since filter is [{}]"
|
||||
, msg.getSubject(), msgDateStr, sinceDateStr);
|
||||
}
|
||||
}
|
||||
} catch (MessagingException e) {
|
||||
log.warn("Failed to process message due to: {}", e, e);
|
||||
}
|
||||
|
||||
if (seen % 100 == 0) {
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Matched {} of {} messages since: {}"
|
||||
, matched, seen, sinceDateParser.format(since));
|
||||
}
|
||||
}
|
||||
|
||||
return isMatch;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// user settings stored in member variables
|
||||
private String user;
|
||||
private String password;
|
||||
private String host;
|
||||
private String protocol;
|
||||
|
||||
private String folderNames;
|
||||
private List<String> exclude = new ArrayList<>();
|
||||
private List<String> include = new ArrayList<>();
|
||||
private boolean recurse;
|
||||
|
||||
private int batchSize;
|
||||
private int fetchSize;
|
||||
private int cTimeout;
|
||||
private int rTimeout;
|
||||
|
||||
private Date fetchMailsSince;
|
||||
private String customFilter;
|
||||
|
||||
private boolean processAttachment = true;
|
||||
private boolean includeContent = true;
|
||||
private boolean includeOtherUserFolders = false;
|
||||
private boolean includeSharedFolders = false;
|
||||
|
||||
// holds the current state
|
||||
private Store mailbox;
|
||||
private boolean connected = false;
|
||||
private FolderIterator folderIter;
|
||||
private MessageIterator msgIter;
|
||||
private List<CustomFilter> filters = new ArrayList<>();
|
||||
private static FetchProfile fp = new FetchProfile();
|
||||
|
||||
static {
|
||||
fp.add(FetchProfile.Item.ENVELOPE);
|
||||
fp.add(FetchProfile.Item.FLAGS);
|
||||
fp.add("X-Mailer");
|
||||
}
|
||||
|
||||
// Fields To Index
|
||||
// single valued
|
||||
private static final String MESSAGE_ID = "messageId";
|
||||
private static final String SUBJECT = "subject";
|
||||
private static final String FROM = "from";
|
||||
private static final String SENT_DATE = "sentDate";
|
||||
private static final String XMAILER = "xMailer";
|
||||
// multi valued
|
||||
private static final String TO_CC_BCC = "allTo";
|
||||
private static final String FLAGS = "flags";
|
||||
private static final String CONTENT = "content";
|
||||
private static final String ATTACHMENT = "attachment";
|
||||
private static final String ATTACHMENT_NAMES = "attachmentNames";
|
||||
// flag values
|
||||
private static final String FLAG_NONE = "none";
|
||||
private static final String FLAG_ANSWERED = "answered";
|
||||
private static final String FLAG_DELETED = "deleted";
|
||||
private static final String FLAG_DRAFT = "draft";
|
||||
private static final String FLAG_FLAGGED = "flagged";
|
||||
private static final String FLAG_RECENT = "recent";
|
||||
private static final String FLAG_SEEN = "seen";
|
||||
|
||||
private int getIntFromContext(String prop, int ifNull) {
|
||||
int v = ifNull;
|
||||
try {
|
||||
String val = context.getEntityAttribute(prop);
|
||||
if (val != null) {
|
||||
val = context.replaceTokens(val);
|
||||
v = Integer.parseInt(val);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
// do nothing
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
private boolean getBoolFromContext(String prop, boolean ifNull) {
|
||||
boolean v = ifNull;
|
||||
String val = context.getEntityAttribute(prop);
|
||||
if (val != null) {
|
||||
val = context.replaceTokens(val);
|
||||
v = Boolean.valueOf(val);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
private String getStringFromContext(String prop, String ifNull) {
|
||||
String v = ifNull;
|
||||
String val = context.getEntityAttribute(prop);
|
||||
if (val != null) {
|
||||
val = context.replaceTokens(val);
|
||||
v = val;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
@SuppressForbidden(reason = "Uses context class loader as a workaround to inject correct classloader to 3rd party libs")
|
||||
private static <T> T withContextClassLoader(ClassLoader loader, Supplier<T> action) {
|
||||
Thread ct = Thread.currentThread();
|
||||
ClassLoader prev = ct.getContextClassLoader();
|
||||
try {
|
||||
ct.setContextClassLoader(loader);
|
||||
return action.get();
|
||||
} finally {
|
||||
ct.setContextClassLoader(prev);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,253 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.tika.config.TikaConfig;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.parser.AutoDetectParser;
|
||||
import org.apache.tika.parser.EmptyParser;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.apache.tika.parser.html.HtmlMapper;
|
||||
import org.apache.tika.parser.html.IdentityHtmlMapper;
|
||||
import org.apache.tika.sax.BodyContentHandler;
|
||||
import org.apache.tika.sax.ContentHandlerDecorator;
|
||||
import org.apache.tika.sax.XHTMLContentHandler;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
import javax.xml.transform.OutputKeys;
|
||||
import javax.xml.transform.TransformerConfigurationException;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.sax.SAXTransformerFactory;
|
||||
import javax.xml.transform.sax.TransformerHandler;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.io.StringWriter;
|
||||
import java.io.Writer;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
import static org.apache.solr.handler.dataimport.DataImporter.COLUMN;
|
||||
import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL;
|
||||
/**
|
||||
* <p>An implementation of {@link EntityProcessor} which reads data from rich docs
|
||||
* using <a href="http://tika.apache.org/">Apache Tika</a>
|
||||
*
|
||||
* <p>To index latitude/longitude data that might
|
||||
* be extracted from a file's metadata, identify
|
||||
* the geo field for this information with this attribute:
|
||||
* <code>spatialMetadataField</code>
|
||||
*
|
||||
* @since solr 3.1
|
||||
*/
|
||||
public class TikaEntityProcessor extends EntityProcessorBase {
|
||||
private static Parser EMPTY_PARSER = new EmptyParser();
|
||||
private TikaConfig tikaConfig;
|
||||
private String format = "text";
|
||||
private boolean done = false;
|
||||
private boolean extractEmbedded = false;
|
||||
private String parser;
|
||||
static final String AUTO_PARSER = "org.apache.tika.parser.AutoDetectParser";
|
||||
private String htmlMapper;
|
||||
private String spatialMetadataField;
|
||||
|
||||
@Override
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
done = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void firstInit(Context context) {
|
||||
super.firstInit(context);
|
||||
// See similar code in ExtractingRequestHandler.inform
|
||||
try {
|
||||
String tikaConfigLoc = context.getResolvedEntityAttribute("tikaConfig");
|
||||
if (tikaConfigLoc == null) {
|
||||
ClassLoader classLoader = context.getSolrCore().getResourceLoader().getClassLoader();
|
||||
try (InputStream is = classLoader.getResourceAsStream("solr-default-tika-config.xml")) {
|
||||
tikaConfig = new TikaConfig(is);
|
||||
}
|
||||
} else {
|
||||
File configFile = new File(tikaConfigLoc);
|
||||
if (configFile.isAbsolute()) {
|
||||
tikaConfig = new TikaConfig(configFile);
|
||||
} else { // in conf/
|
||||
try (InputStream is = context.getSolrCore().getResourceLoader().openResource(tikaConfigLoc)) {
|
||||
tikaConfig = new TikaConfig(is);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
wrapAndThrow(SEVERE, e,"Unable to load Tika Config");
|
||||
}
|
||||
|
||||
String extractEmbeddedString = context.getResolvedEntityAttribute("extractEmbedded");
|
||||
if ("true".equals(extractEmbeddedString)) {
|
||||
extractEmbedded = true;
|
||||
}
|
||||
format = context.getResolvedEntityAttribute("format");
|
||||
if(format == null)
|
||||
format = "text";
|
||||
if (!"html".equals(format) && !"xml".equals(format) && !"text".equals(format)&& !"none".equals(format) )
|
||||
throw new DataImportHandlerException(SEVERE, "'format' can be one of text|html|xml|none");
|
||||
|
||||
htmlMapper = context.getResolvedEntityAttribute("htmlMapper");
|
||||
if (htmlMapper == null)
|
||||
htmlMapper = "default";
|
||||
if (!"default".equals(htmlMapper) && !"identity".equals(htmlMapper))
|
||||
throw new DataImportHandlerException(SEVERE, "'htmlMapper', if present, must be 'default' or 'identity'");
|
||||
|
||||
parser = context.getResolvedEntityAttribute("parser");
|
||||
if(parser == null) {
|
||||
parser = AUTO_PARSER;
|
||||
}
|
||||
|
||||
spatialMetadataField = context.getResolvedEntityAttribute("spatialMetadataField");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextRow() {
|
||||
if(done) return null;
|
||||
Map<String, Object> row = new HashMap<>();
|
||||
@SuppressWarnings({"unchecked"})
|
||||
DataSource<InputStream> dataSource = context.getDataSource();
|
||||
InputStream is = dataSource.getData(context.getResolvedEntityAttribute(URL));
|
||||
ContentHandler contentHandler = null;
|
||||
Metadata metadata = new Metadata();
|
||||
StringWriter sw = new StringWriter();
|
||||
try {
|
||||
if ("html".equals(format)) {
|
||||
contentHandler = getHtmlHandler(sw);
|
||||
} else if ("xml".equals(format)) {
|
||||
contentHandler = getXmlContentHandler(sw);
|
||||
} else if ("text".equals(format)) {
|
||||
contentHandler = getTextContentHandler(sw);
|
||||
} else if("none".equals(format)){
|
||||
contentHandler = new DefaultHandler();
|
||||
}
|
||||
} catch (TransformerConfigurationException e) {
|
||||
wrapAndThrow(SEVERE, e, "Unable to create content handler");
|
||||
}
|
||||
Parser tikaParser = null;
|
||||
if(parser.equals(AUTO_PARSER)){
|
||||
tikaParser = new AutoDetectParser(tikaConfig);
|
||||
} else {
|
||||
tikaParser = context.getSolrCore().getResourceLoader().newInstance(parser, Parser.class);
|
||||
}
|
||||
try {
|
||||
ParseContext context = new ParseContext();
|
||||
if ("identity".equals(htmlMapper)){
|
||||
context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
|
||||
}
|
||||
if (extractEmbedded) {
|
||||
context.set(Parser.class, tikaParser);
|
||||
} else {
|
||||
context.set(Parser.class, EMPTY_PARSER);
|
||||
}
|
||||
tikaParser.parse(is, contentHandler, metadata , context);
|
||||
} catch (Exception e) {
|
||||
if(SKIP.equals(onError)) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW,
|
||||
"Document skipped :" + e.getMessage());
|
||||
}
|
||||
wrapAndThrow(SEVERE, e, "Unable to read content");
|
||||
}
|
||||
IOUtils.closeQuietly(is);
|
||||
for (Map<String, String> field : context.getAllEntityFields()) {
|
||||
if (!"true".equals(field.get("meta"))) continue;
|
||||
String col = field.get(COLUMN);
|
||||
String s = metadata.get(col);
|
||||
if (s != null) row.put(col, s);
|
||||
}
|
||||
if(!"none".equals(format) ) row.put("text", sw.toString());
|
||||
tryToAddLatLon(metadata, row);
|
||||
done = true;
|
||||
return row;
|
||||
}
|
||||
|
||||
private void tryToAddLatLon(Metadata metadata, Map<String, Object> row) {
|
||||
if (spatialMetadataField == null) return;
|
||||
String latString = metadata.get(Metadata.LATITUDE);
|
||||
String lonString = metadata.get(Metadata.LONGITUDE);
|
||||
if (latString != null && lonString != null) {
|
||||
row.put(spatialMetadataField, String.format(Locale.ROOT, "%s,%s", latString, lonString));
|
||||
}
|
||||
}
|
||||
|
||||
private static ContentHandler getHtmlHandler(Writer writer)
|
||||
throws TransformerConfigurationException {
|
||||
SAXTransformerFactory factory = (SAXTransformerFactory)
|
||||
TransformerFactory.newInstance();
|
||||
TransformerHandler handler = factory.newTransformerHandler();
|
||||
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
|
||||
handler.setResult(new StreamResult(writer));
|
||||
return new ContentHandlerDecorator(handler) {
|
||||
@Override
|
||||
public void startElement(
|
||||
String uri, String localName, String name, Attributes atts)
|
||||
throws SAXException {
|
||||
if (XHTMLContentHandler.XHTML.equals(uri)) {
|
||||
uri = null;
|
||||
}
|
||||
if (!"head".equals(localName)) {
|
||||
super.startElement(uri, localName, name, atts);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endElement(String uri, String localName, String name)
|
||||
throws SAXException {
|
||||
if (XHTMLContentHandler.XHTML.equals(uri)) {
|
||||
uri = null;
|
||||
}
|
||||
if (!"head".equals(localName)) {
|
||||
super.endElement(uri, localName, name);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startPrefixMapping(String prefix, String uri) {/*no op*/ }
|
||||
|
||||
@Override
|
||||
public void endPrefixMapping(String prefix) {/*no op*/ }
|
||||
};
|
||||
}
|
||||
|
||||
private static ContentHandler getTextContentHandler(Writer writer) {
|
||||
return new BodyContentHandler(writer);
|
||||
}
|
||||
|
||||
private static ContentHandler getXmlContentHandler(Writer writer)
|
||||
throws TransformerConfigurationException {
|
||||
SAXTransformerFactory factory = (SAXTransformerFactory)
|
||||
TransformerFactory.newInstance();
|
||||
TransformerHandler handler = factory.newTransformerHandler();
|
||||
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
|
||||
handler.setResult(new StreamResult(writer));
|
||||
return handler;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<!-- not a package-info.java, because we already defined this package in core/ -->
|
||||
<html>
|
||||
<body>
|
||||
Plugins for <code>DataImportHandler</code> that have additional dependencies.
|
||||
</body>
|
||||
</html>
|
|
@ -1,21 +0,0 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<body>
|
||||
Apache Solr Search Server: DataImportHandler Extras contrib. <b>This contrib module is deprecated as of 8.6</b>
|
||||
</body>
|
||||
</html>
|
|
@ -1,20 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<properties>
|
||||
<service-loader initializableProblemHandler="ignore"/>
|
||||
</properties>
|
Binary file not shown.
Binary file not shown.
|
@ -1,205 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
This is the Solr schema file. This file should be named "schema.xml" and
|
||||
should be in the conf directory under the solr home
|
||||
(i.e. ./solr/conf/schema.xml by default)
|
||||
or located where the classloader for the Solr webapp can find it.
|
||||
|
||||
This example schema is the recommended starting point for users.
|
||||
It should be kept correct and concise, usable out-of-the-box.
|
||||
|
||||
For more information, on how to customize this file, please see
|
||||
http://wiki.apache.org/solr/SchemaXml
|
||||
-->
|
||||
|
||||
<schema name="test" version="1.2">
|
||||
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
||||
Applications should change this to reflect the nature of the search collection.
|
||||
version="1.1" is Solr's version number for the schema syntax and semantics. It should
|
||||
not normally be changed by applications.
|
||||
1.0: multiValued attribute did not exist, all fields are multiValued by nature
|
||||
1.1: multiValued attribute introduced, false by default -->
|
||||
|
||||
|
||||
<!-- field type definitions. The "name" attribute is
|
||||
just a label to be used by field definitions. The "class"
|
||||
attribute and any other attributes determine the real
|
||||
behavior of the fieldType.
|
||||
Class names starting with "solr" refer to java classes in the
|
||||
org.apache.solr.analysis package.
|
||||
-->
|
||||
|
||||
<!-- The StrField type is not analyzed, but indexed/stored verbatim.
|
||||
- StrField and TextField support an optional compressThreshold which
|
||||
limits compression (if enabled in the derived fields) to values which
|
||||
exceed a certain size (in characters).
|
||||
-->
|
||||
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
<!-- boolean type: "true" or "false" -->
|
||||
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
<!-- The optional sortMissingLast and sortMissingFirst attributes are
|
||||
currently supported on types that are sorted internally as strings.
|
||||
- If sortMissingLast="true", then a sort on this field will cause documents
|
||||
without the field to come after documents with the field,
|
||||
regardless of the requested sort order (asc or desc).
|
||||
- If sortMissingFirst="true", then a sort on this field will cause documents
|
||||
without the field to come before documents with the field,
|
||||
regardless of the requested sort order.
|
||||
- If sortMissingLast="false" and sortMissingFirst="false" (the default),
|
||||
then default lucene sorting will be used which places docs without the
|
||||
field first in an ascending sort and last in a descending sort.
|
||||
-->
|
||||
|
||||
|
||||
<!--
|
||||
Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
|
||||
-->
|
||||
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
|
||||
<fieldType name="float" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
|
||||
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
|
||||
<fieldType name="double" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
|
||||
<fieldType name="latLon" class="solr.LatLonType" subFieldType="double"/>
|
||||
|
||||
|
||||
<!--
|
||||
Numeric field types that index each value at various levels of precision
|
||||
to accelerate range queries when the number of values between the range
|
||||
endpoints is large. See the javadoc for NumericRangeQuery for internal
|
||||
implementation details.
|
||||
|
||||
Smaller precisionStep values (specified in bits) will lead to more tokens
|
||||
indexed per value, slightly larger index size, and faster range queries.
|
||||
A precisionStep of 0 disables indexing at different precision levels.
|
||||
-->
|
||||
<fieldType name="tint" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
|
||||
<fieldType name="tfloat" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
|
||||
<fieldType name="tlong" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
|
||||
<fieldType name="tdouble" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
|
||||
|
||||
|
||||
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
||||
is a more restricted form of the canonical representation of dateTime
|
||||
http://www.w3.org/TR/xmlschema-2/#dateTime
|
||||
The trailing "Z" designates UTC time and is mandatory.
|
||||
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
||||
All other components are mandatory.
|
||||
|
||||
Expressions can also be used to denote calculations that should be
|
||||
performed relative to "NOW" to determine the value, ie...
|
||||
|
||||
NOW/HOUR
|
||||
... Round to the start of the current hour
|
||||
NOW-1DAY
|
||||
... Exactly 1 day prior to now
|
||||
NOW/DAY+6MONTHS+3DAYS
|
||||
... 6 months and 3 days in the future from the start of
|
||||
the current day
|
||||
|
||||
Consult the TrieDateField javadocs for more information.
|
||||
-->
|
||||
<fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
|
||||
<!-- The "RandomSortField" is not used to store or search any
|
||||
data. You can declare fields of this type it in your schema
|
||||
to generate psuedo-random orderings of your docs for sorting
|
||||
purposes. The ordering is generated based on the field name
|
||||
and the version of the index, As long as the index version
|
||||
remains unchanged, and the same field name is reused,
|
||||
the ordering of the docs will be consistent.
|
||||
If you want differend psuedo-random orderings of documents,
|
||||
for the same version of the index, use a dynamicField and
|
||||
change the name
|
||||
-->
|
||||
<fieldType name="random" class="solr.RandomSortField" indexed="true"/>
|
||||
|
||||
<!-- solr.TextField allows the specification of custom text analyzers
|
||||
specified as a tokenizer and a list of token filters. Different
|
||||
analyzers may be specified for indexing and querying.
|
||||
|
||||
The optional positionIncrementGap puts space between multiple fields of
|
||||
this type on the same document, with the purpose of preventing false phrase
|
||||
matching across fields.
|
||||
|
||||
For more info on customizing your analyzer chain, please see
|
||||
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
||||
-->
|
||||
|
||||
<!-- One can also specify an existing Analyzer class that has a
|
||||
default constructor via the class attribute on the analyzer element
|
||||
<fieldType name="text_greek" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
||||
</fieldType>
|
||||
-->
|
||||
|
||||
<!-- A text field that only splits on whitespace for exact matching of words -->
|
||||
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A text field that uses WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
|
||||
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
|
||||
Synonyms and stopwords are customized by external files, and stemming is enabled.
|
||||
Duplicate tokens at the same position (which may result from Stemmed Synonyms or
|
||||
WordDelim parts) are removed.
|
||||
-->
|
||||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<!-- in this example, we will only use synonyms at query time
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>-->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<!--<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>-->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<!-- since fields of this type are by default not stored or indexed, any data added to
|
||||
them will be ignored outright
|
||||
-->
|
||||
<fieldType name="ignored" stored="false" indexed="false" class="solr.StrField"/>
|
||||
|
||||
<field name="title" type="string" indexed="true" stored="true"/>
|
||||
<field name="author" type="string" indexed="true" stored="true"/>
|
||||
<field name="text" type="text" indexed="true" stored="true"/>
|
||||
<field name="foo_i" type="int" indexed="true" stored="false"/>
|
||||
<field name="home" type="latLon" indexed="true" stored="true"/>
|
||||
</schema>
|
|
@ -1,277 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<config>
|
||||
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
|
||||
<indexConfig>
|
||||
<useCompoundFile>${useCompoundFile:false}</useCompoundFile>
|
||||
</indexConfig>
|
||||
|
||||
<!-- Used to specify an alternate directory to hold all index data
|
||||
other than the default ./data under the Solr home.
|
||||
If replication is in use, this should match the replication configuration. -->
|
||||
<dataDir>${solr.data.dir:}</dataDir>
|
||||
|
||||
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
|
||||
<schemaFactory class="ClassicIndexSchemaFactory"/>
|
||||
|
||||
<!-- the default high-performance update handler -->
|
||||
<updateHandler class="solr.DirectUpdateHandler2">
|
||||
|
||||
<!-- A prefix of "solr." for class names is an alias that
|
||||
causes solr to search appropriate packages, including
|
||||
org.apache.solr.(search|update|request|core|analysis)
|
||||
-->
|
||||
|
||||
<!-- Limit the number of deletions Solr will buffer during doc updating.
|
||||
|
||||
Setting this lower can help bound memory use during indexing.
|
||||
-->
|
||||
<maxPendingDeletes>100000</maxPendingDeletes>
|
||||
|
||||
</updateHandler>
|
||||
|
||||
|
||||
<query>
|
||||
<!-- Maximum number of clauses in a boolean query... can affect
|
||||
range or prefix queries that expand to big boolean
|
||||
queries. An exception is thrown if exceeded. -->
|
||||
<maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses>
|
||||
|
||||
|
||||
<!-- Cache used by SolrIndexSearcher for filters (DocSets),
|
||||
unordered sets of *all* documents that match a query.
|
||||
When a new searcher is opened, its caches may be prepopulated
|
||||
or "autowarmed" using data from caches in the old searcher.
|
||||
autowarmCount is the number of items to prepopulate. For CaffeineCache,
|
||||
the autowarmed items will be the most recently accessed items.
|
||||
Parameters:
|
||||
class - the SolrCache implementation (currently only CaffeineCache)
|
||||
size - the maximum number of entries in the cache
|
||||
initialSize - the initial capacity (number of entries) of
|
||||
the cache. (seel java.util.HashMap)
|
||||
autowarmCount - the number of entries to prepopulate from
|
||||
and old cache.
|
||||
-->
|
||||
<filterCache
|
||||
class="solr.CaffeineCache"
|
||||
size="512"
|
||||
initialSize="512"
|
||||
autowarmCount="256"/>
|
||||
|
||||
<!-- queryResultCache caches results of searches - ordered lists of
|
||||
document ids (DocList) based on a query, a sort, and the range
|
||||
of documents requested. -->
|
||||
<queryResultCache
|
||||
class="solr.CaffeineCache"
|
||||
size="512"
|
||||
initialSize="512"
|
||||
autowarmCount="256"/>
|
||||
|
||||
<!-- documentCache caches Lucene Document objects (the stored fields for each document).
|
||||
Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
|
||||
<documentCache
|
||||
class="solr.CaffeineCache"
|
||||
size="512"
|
||||
initialSize="512"
|
||||
autowarmCount="0"/>
|
||||
|
||||
<!-- If true, stored fields that are not requested will be loaded lazily.
|
||||
|
||||
This can result in a significant speed improvement if the usual case is to
|
||||
not load all stored fields, especially if the skipped fields are large compressed
|
||||
text fields.
|
||||
-->
|
||||
<enableLazyFieldLoading>true</enableLazyFieldLoading>
|
||||
|
||||
<!-- Example of a generic cache. These caches may be accessed by name
|
||||
through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
|
||||
The purpose is to enable easy caching of user/application level data.
|
||||
The regenerator argument should be specified as an implementation
|
||||
of solr.search.CacheRegenerator if autowarming is desired. -->
|
||||
<!--
|
||||
<cache name="myUserCache"
|
||||
class="solr.CaffeineCache"
|
||||
size="4096"
|
||||
initialSize="1024"
|
||||
autowarmCount="1024"
|
||||
regenerator="org.mycompany.mypackage.MyRegenerator"
|
||||
/>
|
||||
-->
|
||||
|
||||
<!-- An optimization that attempts to use a filter to satisfy a search.
|
||||
If the requested sort does not include score, then the filterCache
|
||||
will be checked for a filter matching the query. If found, the filter
|
||||
will be used as the source of document ids, and then the sort will be
|
||||
applied to that.
|
||||
<useFilterForSortedQuery>true</useFilterForSortedQuery>
|
||||
-->
|
||||
|
||||
<!-- An optimization for use with the queryResultCache. When a search
|
||||
is requested, a superset of the requested number of document ids
|
||||
are collected. For example, if a search for a particular query
|
||||
requests matching documents 10 through 19, and queryWindowSize is 50,
|
||||
then documents 0 through 49 will be collected and cached. Any further
|
||||
requests in that range can be satisfied via the cache. -->
|
||||
<queryResultWindowSize>50</queryResultWindowSize>
|
||||
|
||||
<!-- Maximum number of documents to cache for any entry in the
|
||||
queryResultCache. -->
|
||||
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
|
||||
|
||||
<!-- a newSearcher event is fired whenever a new searcher is being prepared
|
||||
and there is a current searcher handling requests (aka registered). -->
|
||||
<!-- QuerySenderListener takes an array of NamedList and executes a
|
||||
local query request for each NamedList in sequence. -->
|
||||
<!--<listener event="newSearcher" class="solr.QuerySenderListener">-->
|
||||
<!--<arr name="queries">-->
|
||||
<!--<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>-->
|
||||
<!--<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>-->
|
||||
<!--<lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>-->
|
||||
<!--</arr>-->
|
||||
<!--</listener>-->
|
||||
|
||||
<!-- a firstSearcher event is fired whenever a new searcher is being
|
||||
prepared but there is no current registered searcher to handle
|
||||
requests or to gain autowarming data from. -->
|
||||
<!--<listener event="firstSearcher" class="solr.QuerySenderListener">-->
|
||||
<!--<arr name="queries">-->
|
||||
<!--</arr>-->
|
||||
<!--</listener>-->
|
||||
|
||||
<!-- If a search request comes in and there is no current registered searcher,
|
||||
then immediately register the still warming searcher and use it. If
|
||||
"false" then all requests will block until the first searcher is done
|
||||
warming. -->
|
||||
<useColdSearcher>false</useColdSearcher>
|
||||
|
||||
<!-- Maximum number of searchers that may be warming in the background
|
||||
concurrently. An error is returned if this limit is exceeded. Recommend
|
||||
1-2 for read-only followers, higher for leaders w/o cache warming. -->
|
||||
<maxWarmingSearchers>4</maxWarmingSearchers>
|
||||
|
||||
</query>
|
||||
|
||||
<requestDispatcher>
|
||||
<!--Make sure your system has some authentication before enabling remote streaming!
|
||||
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="-1" />
|
||||
-->
|
||||
|
||||
<!-- Set HTTP caching related parameters (for proxy caches and clients).
|
||||
|
||||
To get the behaviour of Solr 1.2 (ie: no caching related headers)
|
||||
use the never304="true" option and do not specify a value for
|
||||
<cacheControl>
|
||||
-->
|
||||
<httpCaching never304="true">
|
||||
<!--httpCaching lastModifiedFrom="openTime"
|
||||
etagSeed="Solr"-->
|
||||
<!-- lastModFrom="openTime" is the default, the Last-Modified value
|
||||
(and validation against If-Modified-Since requests) will all be
|
||||
relative to when the current Searcher was opened.
|
||||
You can change it to lastModFrom="dirLastMod" if you want the
|
||||
value to exactly corrispond to when the physical index was last
|
||||
modified.
|
||||
|
||||
etagSeed="..." is an option you can change to force the ETag
|
||||
header (and validation against If-None-Match requests) to be
|
||||
differnet even if the index has not changed (ie: when making
|
||||
significant changes to your config file)
|
||||
|
||||
lastModifiedFrom and etagSeed are both ignored if you use the
|
||||
never304="true" option.
|
||||
-->
|
||||
<!-- If you include a <cacheControl> directive, it will be used to
|
||||
generate a Cache-Control header, as well as an Expires header
|
||||
if the value contains "max-age="
|
||||
|
||||
By default, no Cache-Control header is generated.
|
||||
|
||||
You can use the <cacheControl> option even if you have set
|
||||
never304="true"
|
||||
-->
|
||||
<!-- <cacheControl>max-age=30, public</cacheControl> -->
|
||||
</httpCaching>
|
||||
</requestDispatcher>
|
||||
|
||||
<requestHandler name="/select" class="solr.SearchHandler">
|
||||
<!-- default values for query parameters -->
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
<!--
|
||||
<int name="rows">10</int>
|
||||
<str name="fl">*</str>
|
||||
<str name="version">2.1</str>
|
||||
-->
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
|
||||
</requestHandler>
|
||||
|
||||
<!--
|
||||
|
||||
Search components are registered to SolrCore and used by Search Handlers
|
||||
|
||||
By default, the following components are avaliable:
|
||||
|
||||
<searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" />
|
||||
<searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
|
||||
<searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
|
||||
<searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
|
||||
<searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" />
|
||||
|
||||
If you register a searchComponent to one of the standard names, that will be used instead.
|
||||
|
||||
-->
|
||||
|
||||
<requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
</lst>
|
||||
<!--
|
||||
By default, this will register the following components:
|
||||
|
||||
<arr name="components">
|
||||
<str>query</str>
|
||||
<str>facet</str>
|
||||
<str>mlt</str>
|
||||
<str>highlight</str>
|
||||
<str>debug</str>
|
||||
</arr>
|
||||
|
||||
To insert handlers before or after the 'standard' components, use:
|
||||
|
||||
<arr name="first-components">
|
||||
<str>first</str>
|
||||
</arr>
|
||||
|
||||
<arr name="last-components">
|
||||
<str>last</str>
|
||||
</arr>
|
||||
|
||||
-->
|
||||
</requestHandler>
|
||||
|
||||
<!-- config for the admin interface -->
|
||||
<admin>
|
||||
<defaultQuery>*:*</defaultQuery>
|
||||
</admin>
|
||||
|
||||
</config>
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>Title in the header</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>H1 Header</h1>
|
||||
<div>Basic div</div>
|
||||
<div class="classAttribute">Div with attribute</div>
|
||||
</body>
|
||||
</html>
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 13 KiB |
Binary file not shown.
Binary file not shown.
|
@ -1,199 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
// Test mailbox is like this: foldername(mailcount)
|
||||
// top1(2) -> child11(6)
|
||||
// -> child12(0)
|
||||
// top2(2) -> child21(1)
|
||||
// -> grandchild211(2)
|
||||
// -> grandchild212(1)
|
||||
// -> child22(2)
|
||||
|
||||
/**
|
||||
* Test for MailEntityProcessor. The tests are marked as ignored because we'd need a mail server (real or mocked) for
|
||||
* these to work.
|
||||
*
|
||||
* TODO: Find a way to make the tests actually test code
|
||||
*
|
||||
*
|
||||
* @see org.apache.solr.handler.dataimport.MailEntityProcessor
|
||||
* @since solr 1.4
|
||||
*/
|
||||
@Ignore("Needs a Mock Mail Server to work")
|
||||
public class TestMailEntityProcessor extends AbstractDataImportHandlerTestCase {
|
||||
|
||||
// Credentials
|
||||
private static final String user = "user";
|
||||
private static final String password = "password";
|
||||
private static final String host = "host";
|
||||
private static final String protocol = "imaps";
|
||||
|
||||
private static Map<String, String> paramMap = new HashMap<>();
|
||||
|
||||
@Test
|
||||
@Ignore("Needs a Mock Mail Server to work")
|
||||
public void testConnection() {
|
||||
// also tests recurse = false and default settings
|
||||
paramMap.put("folders", "top2");
|
||||
paramMap.put("recurse", "false");
|
||||
paramMap.put("processAttachement", "false");
|
||||
DataImporter di = new DataImporter();
|
||||
di.loadAndInit(getConfigFromMap(paramMap));
|
||||
@SuppressWarnings({"unchecked"})
|
||||
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
|
||||
SolrWriterImpl swi = new SolrWriterImpl();
|
||||
di.runCmd(rp, swi);
|
||||
assertEquals("top1 did not return 2 messages", swi.docs.size(), 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore("Needs a Mock Mail Server to work")
|
||||
public void testRecursion() {
|
||||
paramMap.put("folders", "top2");
|
||||
paramMap.put("recurse", "true");
|
||||
paramMap.put("processAttachement", "false");
|
||||
DataImporter di = new DataImporter();
|
||||
di.loadAndInit(getConfigFromMap(paramMap));
|
||||
@SuppressWarnings({"unchecked"})
|
||||
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
|
||||
SolrWriterImpl swi = new SolrWriterImpl();
|
||||
di.runCmd(rp, swi);
|
||||
assertEquals("top2 and its children did not return 8 messages", swi.docs.size(), 8);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore("Needs a Mock Mail Server to work")
|
||||
public void testExclude() {
|
||||
paramMap.put("folders", "top2");
|
||||
paramMap.put("recurse", "true");
|
||||
paramMap.put("processAttachement", "false");
|
||||
paramMap.put("exclude", ".*grandchild.*");
|
||||
DataImporter di = new DataImporter();
|
||||
di.loadAndInit(getConfigFromMap(paramMap));
|
||||
@SuppressWarnings({"unchecked"})
|
||||
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
|
||||
SolrWriterImpl swi = new SolrWriterImpl();
|
||||
di.runCmd(rp, swi);
|
||||
assertEquals("top2 and its direct children did not return 5 messages", swi.docs.size(), 5);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore("Needs a Mock Mail Server to work")
|
||||
public void testInclude() {
|
||||
paramMap.put("folders", "top2");
|
||||
paramMap.put("recurse", "true");
|
||||
paramMap.put("processAttachement", "false");
|
||||
paramMap.put("include", ".*grandchild.*");
|
||||
DataImporter di = new DataImporter();
|
||||
di.loadAndInit(getConfigFromMap(paramMap));
|
||||
@SuppressWarnings({"unchecked"})
|
||||
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
|
||||
SolrWriterImpl swi = new SolrWriterImpl();
|
||||
di.runCmd(rp, swi);
|
||||
assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore("Needs a Mock Mail Server to work")
|
||||
public void testIncludeAndExclude() {
|
||||
paramMap.put("folders", "top1,top2");
|
||||
paramMap.put("recurse", "true");
|
||||
paramMap.put("processAttachement", "false");
|
||||
paramMap.put("exclude", ".*top1.*");
|
||||
paramMap.put("include", ".*grandchild.*");
|
||||
DataImporter di = new DataImporter();
|
||||
di.loadAndInit(getConfigFromMap(paramMap));
|
||||
@SuppressWarnings({"unchecked"})
|
||||
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
|
||||
SolrWriterImpl swi = new SolrWriterImpl();
|
||||
di.runCmd(rp, swi);
|
||||
assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore("Needs a Mock Mail Server to work")
|
||||
@SuppressWarnings({"unchecked"})
|
||||
public void testFetchTimeSince() throws ParseException {
|
||||
paramMap.put("folders", "top1/child11");
|
||||
paramMap.put("recurse", "true");
|
||||
paramMap.put("processAttachement", "false");
|
||||
paramMap.put("fetchMailsSince", "2008-12-26 00:00:00");
|
||||
DataImporter di = new DataImporter();
|
||||
di.loadAndInit(getConfigFromMap(paramMap));
|
||||
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
|
||||
SolrWriterImpl swi = new SolrWriterImpl();
|
||||
di.runCmd(rp, swi);
|
||||
assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
|
||||
}
|
||||
|
||||
private String getConfigFromMap(Map<String, String> params) {
|
||||
String conf =
|
||||
"<dataConfig>" +
|
||||
"<document>" +
|
||||
"<entity processor=\"org.apache.solr.handler.dataimport.MailEntityProcessor\" " +
|
||||
"someconfig" +
|
||||
"/>" +
|
||||
"</document>" +
|
||||
"</dataConfig>";
|
||||
params.put("user", user);
|
||||
params.put("password", password);
|
||||
params.put("host", host);
|
||||
params.put("protocol", protocol);
|
||||
StringBuilder attribs = new StringBuilder("");
|
||||
for (String key : params.keySet())
|
||||
attribs.append(" ").append(key).append("=" + "\"").append(params.get(key)).append("\"");
|
||||
attribs.append(" ");
|
||||
return conf.replace("someconfig", attribs.toString());
|
||||
}
|
||||
|
||||
static class SolrWriterImpl extends SolrWriter {
|
||||
List<SolrInputDocument> docs = new ArrayList<>();
|
||||
Boolean deleteAllCalled;
|
||||
Boolean commitCalled;
|
||||
|
||||
public SolrWriterImpl() {
|
||||
super(null, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean upload(SolrInputDocument doc) {
|
||||
return docs.add(doc);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void doDeleteAll() {
|
||||
deleteAllCalled = Boolean.TRUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit(boolean b) {
|
||||
commitCalled = Boolean.TRUE;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,221 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
/**Testcase for TikaEntityProcessor
|
||||
*
|
||||
* @since solr 3.1
|
||||
*/
|
||||
public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase {
|
||||
private String conf =
|
||||
"<dataConfig>" +
|
||||
" <dataSource type=\"BinFileDataSource\"/>" +
|
||||
" <document>" +
|
||||
" <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/solr-word.pdf").getAbsolutePath() + "\" >" +
|
||||
" <field column=\"Author\" meta=\"true\" name=\"author\"/>" +
|
||||
" <field column=\"title\" meta=\"true\" name=\"title\"/>" +
|
||||
" <field column=\"text\"/>" +
|
||||
" </entity>" +
|
||||
" </document>" +
|
||||
"</dataConfig>";
|
||||
|
||||
private String skipOnErrConf =
|
||||
"<dataConfig>" +
|
||||
" <dataSource type=\"BinFileDataSource\"/>" +
|
||||
" <document>" +
|
||||
" <entity name=\"Tika\" onError=\"skip\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/bad.doc").getAbsolutePath() + "\" >" +
|
||||
"<field column=\"content\" name=\"text\"/>" +
|
||||
" </entity>" +
|
||||
" <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/solr-word.pdf").getAbsolutePath() + "\" >" +
|
||||
" <field column=\"text\"/>" +
|
||||
"</entity>" +
|
||||
" </document>" +
|
||||
"</dataConfig>";
|
||||
|
||||
private String spatialConf =
|
||||
"<dataConfig>" +
|
||||
" <dataSource type=\"BinFileDataSource\"/>" +
|
||||
" <document>" +
|
||||
" <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" +
|
||||
getFile("dihextras/test_jpeg.jpg").getAbsolutePath() + "\" spatialMetadataField=\"home\">" +
|
||||
" <field column=\"text\"/>" +
|
||||
" </entity>" +
|
||||
" </document>" +
|
||||
"</dataConfig>";
|
||||
|
||||
private String vsdxConf =
|
||||
"<dataConfig>" +
|
||||
" <dataSource type=\"BinFileDataSource\"/>" +
|
||||
" <document>" +
|
||||
" <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/test_vsdx.vsdx").getAbsolutePath() + "\" >" +
|
||||
" <field column=\"text\"/>" +
|
||||
" </entity>" +
|
||||
" </document>" +
|
||||
"</dataConfig>";
|
||||
|
||||
private String[] tests = {
|
||||
"//*[@numFound='1']"
|
||||
,"//str[@name='author'][.='Grant Ingersoll']"
|
||||
,"//str[@name='title'][.='solr-word']"
|
||||
,"//str[@name='text']"
|
||||
};
|
||||
|
||||
private String[] testsHTMLDefault = {
|
||||
"//*[@numFound='1']"
|
||||
, "//str[@name='text'][contains(.,'Basic div')]"
|
||||
, "//str[@name='text'][contains(.,'<h1>')]"
|
||||
, "//str[@name='text'][not(contains(.,'<div>'))]" //default mapper lower-cases elements as it maps
|
||||
, "//str[@name='text'][not(contains(.,'<DIV>'))]"
|
||||
};
|
||||
|
||||
private String[] testsHTMLIdentity = {
|
||||
"//*[@numFound='1']"
|
||||
, "//str[@name='text'][contains(.,'Basic div')]"
|
||||
, "//str[@name='text'][contains(.,'<h1>')]"
|
||||
, "//str[@name='text'][contains(.,'<div>')]"
|
||||
, "//str[@name='text'][contains(.,'class=\"classAttribute\"')]" //attributes are lower-cased
|
||||
};
|
||||
|
||||
private String[] testsSpatial = {
|
||||
"//*[@numFound='1']"
|
||||
};
|
||||
|
||||
private String[] testsEmbedded = {
|
||||
"//*[@numFound='1']",
|
||||
"//str[@name='text'][contains(.,'When in the Course')]"
|
||||
};
|
||||
|
||||
private String[] testsIgnoreEmbedded = {
|
||||
"//*[@numFound='1']",
|
||||
"//str[@name='text'][not(contains(.,'When in the Course'))]"
|
||||
};
|
||||
|
||||
private String[] testsVSDX = {
|
||||
"//*[@numFound='1']",
|
||||
"//str[@name='text'][contains(.,'Arrears')]"
|
||||
};
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)",
|
||||
new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage()));
|
||||
initCore("dataimport-solrconfig.xml", "dataimport-schema-no-unique-key.xml", getFile("dihextras/solr").getAbsolutePath());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIndexingWithTikaEntityProcessor() throws Exception {
|
||||
runFullImport(conf);
|
||||
assertQ(req("*:*"), tests );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkip() throws Exception {
|
||||
runFullImport(skipOnErrConf);
|
||||
assertQ(req("*:*"), "//*[@numFound='1']");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVSDX() throws Exception {
|
||||
//this ensures that we've included the curvesapi dependency
|
||||
//and that the ConnectsType class is bundled with poi-ooxml-schemas.
|
||||
runFullImport(vsdxConf);
|
||||
assertQ(req("*:*"), testsVSDX);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTikaHTMLMapperEmpty() throws Exception {
|
||||
runFullImport(getConfigHTML(null));
|
||||
assertQ(req("*:*"), testsHTMLDefault);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTikaHTMLMapperDefault() throws Exception {
|
||||
runFullImport(getConfigHTML("default"));
|
||||
assertQ(req("*:*"), testsHTMLDefault);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTikaHTMLMapperIdentity() throws Exception {
|
||||
runFullImport(getConfigHTML("identity"));
|
||||
assertQ(req("*:*"), testsHTMLIdentity);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTikaGeoMetadata() throws Exception {
|
||||
runFullImport(spatialConf);
|
||||
String pt = "38.97,-77.018";
|
||||
Double distance = 5.0d;
|
||||
assertQ(req("q", "*:* OR foo_i:" + random().nextInt(100), "fq",
|
||||
"{!geofilt sfield=\"home\"}\"",
|
||||
"pt", pt, "d", String.valueOf(distance)), testsSpatial);
|
||||
}
|
||||
|
||||
private String getConfigHTML(String htmlMapper) {
|
||||
return
|
||||
"<dataConfig>" +
|
||||
" <dataSource type='BinFileDataSource'/>" +
|
||||
" <document>" +
|
||||
" <entity name='Tika' format='xml' processor='TikaEntityProcessor' " +
|
||||
" url='" + getFile("dihextras/structured.html").getAbsolutePath() + "' " +
|
||||
((htmlMapper == null) ? "" : (" htmlMapper='" + htmlMapper + "'")) + ">" +
|
||||
" <field column='text'/>" +
|
||||
" </entity>" +
|
||||
" </document>" +
|
||||
"</dataConfig>";
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmbeddedDocsLegacy() throws Exception {
|
||||
//test legacy behavior: ignore embedded docs
|
||||
runFullImport(conf);
|
||||
assertQ(req("*:*"), testsIgnoreEmbedded);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmbeddedDocsTrue() throws Exception {
|
||||
runFullImport(getConfigEmbedded(true));
|
||||
assertQ(req("*:*"), testsEmbedded);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmbeddedDocsFalse() throws Exception {
|
||||
runFullImport(getConfigEmbedded(false));
|
||||
assertQ(req("*:*"), testsIgnoreEmbedded);
|
||||
}
|
||||
|
||||
private String getConfigEmbedded(boolean extractEmbedded) {
|
||||
return
|
||||
"<dataConfig>" +
|
||||
" <dataSource type=\"BinFileDataSource\"/>" +
|
||||
" <document>" +
|
||||
" <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" +
|
||||
getFile("dihextras/test_recursive_embedded.docx").getAbsolutePath() + "\" " +
|
||||
" extractEmbedded=\""+extractEmbedded+"\">" +
|
||||
" <field column=\"Author\" meta=\"true\" name=\"author\"/>" +
|
||||
" <field column=\"title\" meta=\"true\" name=\"title\"/>" +
|
||||
" <field column=\"text\"/>" +
|
||||
" </entity>" +
|
||||
" </document>" +
|
||||
"</dataConfig>";
|
||||
}
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
Apache Solr - DataImportHandler
|
||||
================================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
DataImportHandler is a data import tool for Solr which makes importing data from Databases, XML files and
|
||||
HTTP data sources quick and easy.
|
||||
|
||||
Important Note
|
||||
--------------
|
||||
Although Solr strives to be agnostic of the Locale where the server is
|
||||
running, some code paths in DataImportHandler are known to depend on the
|
||||
System default Locale, Timezone, or Charset. It is recommended that when
|
||||
running Solr you set the following system properties:
|
||||
-Duser.language=xx -Duser.country=YY -Duser.timezone=ZZZ
|
||||
|
||||
where xx, YY, and ZZZ are consistent with any database server's configuration.
|
||||
|
||||
Deprecation notice
|
||||
------------------
|
||||
This contrib module is deprecated as of v8.6, scheduled for removal in Solr 9.0.
|
||||
The reason is that DIH is no longer being maintained in a manner we feel is necessary in order to keep it
|
||||
healthy and secure. Also it was not designed to work with SolrCloud and does not meet current performance requirements.
|
||||
|
||||
The project hopes that the community will take over maintenance of DIH as a 3rd party package (See SOLR-14066 for more details). Please reach out to us at the dev@ mailing list if you want to help.
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
apply plugin: 'java-library'
|
||||
|
||||
description = 'Data Import Handler'
|
||||
|
||||
dependencies {
|
||||
implementation project(':solr:core')
|
||||
|
||||
testImplementation project(':solr:test-framework')
|
||||
|
||||
testImplementation('org.mockito:mockito-core', {
|
||||
exclude group: "net.bytebuddy", module: "byte-buddy-agent"
|
||||
})
|
||||
testImplementation ('org.hsqldb:hsqldb')
|
||||
testImplementation ('org.apache.derby:derby')
|
||||
testImplementation ('org.objenesis:objenesis')
|
||||
}
|
|
@ -1,70 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
/**
|
||||
* <p> A data source implementation which can be used to read binary stream from content streams. </p> <p> Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
|
||||
* details. </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 3.1
|
||||
*/
|
||||
|
||||
public class BinContentStreamDataSource extends DataSource<InputStream> {
|
||||
private ContextImpl context;
|
||||
private ContentStream contentStream;
|
||||
private InputStream in;
|
||||
|
||||
|
||||
@Override
|
||||
public void init(Context context, Properties initProps) {
|
||||
this.context = (ContextImpl) context;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream getData(String query) {
|
||||
contentStream = context.getDocBuilder().getReqParams().getContentStream();
|
||||
if (contentStream == null)
|
||||
throw new DataImportHandlerException(SEVERE, "No stream available. The request has no body");
|
||||
try {
|
||||
return in = contentStream.getStream();
|
||||
} catch (IOException e) {
|
||||
DataImportHandlerException.wrapAndThrow(SEVERE, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (contentStream != null) {
|
||||
try {
|
||||
if (in == null) in = contentStream.getStream();
|
||||
in.close();
|
||||
} catch (IOException e) {
|
||||
/*no op*/
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,64 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Properties;
|
||||
/**
|
||||
* <p>
|
||||
* A DataSource which reads from local files
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 3.1
|
||||
*/
|
||||
|
||||
public class BinFileDataSource extends DataSource<InputStream>{
|
||||
protected String basePath;
|
||||
@Override
|
||||
public void init(Context context, Properties initProps) {
|
||||
basePath = initProps.getProperty(FileDataSource.BASE_PATH);
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream getData(String query) {
|
||||
File f = FileDataSource.getFile(basePath,query);
|
||||
try {
|
||||
return new FileInputStream(f);
|
||||
} catch (FileNotFoundException e) {
|
||||
wrapAndThrow(SEVERE,e,"Unable to open file "+f.getAbsolutePath());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
|
||||
}
|
||||
}
|
|
@ -1,104 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
|
||||
import static org.apache.solr.handler.dataimport.URLDataSource.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.util.Properties;
|
||||
/**
|
||||
* <p> A data source implementation which can be used to read binary streams using HTTP. </p> <p> Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
|
||||
* details. </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 3.1
|
||||
*/
|
||||
public class BinURLDataSource extends DataSource<InputStream>{
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private String baseUrl;
|
||||
private int connectionTimeout = CONNECTION_TIMEOUT;
|
||||
|
||||
private int readTimeout = READ_TIMEOUT;
|
||||
|
||||
private Context context;
|
||||
|
||||
private Properties initProps;
|
||||
|
||||
public BinURLDataSource() { }
|
||||
|
||||
@Override
|
||||
public void init(Context context, Properties initProps) {
|
||||
this.context = context;
|
||||
this.initProps = initProps;
|
||||
|
||||
baseUrl = getInitPropWithReplacements(BASE_URL);
|
||||
String cTimeout = getInitPropWithReplacements(CONNECTION_TIMEOUT_FIELD_NAME);
|
||||
String rTimeout = getInitPropWithReplacements(READ_TIMEOUT_FIELD_NAME);
|
||||
if (cTimeout != null) {
|
||||
try {
|
||||
connectionTimeout = Integer.parseInt(cTimeout);
|
||||
} catch (NumberFormatException e) {
|
||||
log.warn("Invalid connection timeout: {}", cTimeout);
|
||||
}
|
||||
}
|
||||
if (rTimeout != null) {
|
||||
try {
|
||||
readTimeout = Integer.parseInt(rTimeout);
|
||||
} catch (NumberFormatException e) {
|
||||
log.warn("Invalid read timeout: {}", rTimeout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream getData(String query) {
|
||||
URL url = null;
|
||||
try {
|
||||
if (URIMETHOD.matcher(query).find()) url = new URL(query);
|
||||
else url = new URL(baseUrl + query);
|
||||
log.debug("Accessing URL: {}", url);
|
||||
URLConnection conn = url.openConnection();
|
||||
conn.setConnectTimeout(connectionTimeout);
|
||||
conn.setReadTimeout(readTimeout);
|
||||
return conn.getInputStream();
|
||||
} catch (Exception e) {
|
||||
log.error("Exception thrown while getting data", e);
|
||||
wrapAndThrow (SEVERE, e, "Exception in invoking url " + url);
|
||||
return null;//unreachable
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() { }
|
||||
|
||||
private String getInitPropWithReplacements(String propertyName) {
|
||||
final String expr = initProps.getProperty(propertyName);
|
||||
if (expr == null) {
|
||||
return null;
|
||||
}
|
||||
return context.replaceTokens(expr);
|
||||
}
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
public class CachePropertyUtil {
|
||||
public static String getAttributeValueAsString(Context context, String attr) {
|
||||
Object o = context.getSessionAttribute(attr, Context.SCOPE_ENTITY);
|
||||
if (o == null) {
|
||||
o = context.getResolvedEntityAttribute(attr);
|
||||
}
|
||||
if (o == null && context.getRequestParameters() != null) {
|
||||
o = context.getRequestParameters().get(attr);
|
||||
}
|
||||
if (o == null) {
|
||||
return null;
|
||||
}
|
||||
return o.toString();
|
||||
}
|
||||
|
||||
public static Object getAttributeValue(Context context, String attr) {
|
||||
Object o = context.getSessionAttribute(attr, Context.SCOPE_ENTITY);
|
||||
if (o == null) {
|
||||
o = context.getResolvedEntityAttribute(attr);
|
||||
}
|
||||
if (o == null && context.getRequestParameters() != null) {
|
||||
o = context.getRequestParameters().get(attr);
|
||||
}
|
||||
if (o == null) {
|
||||
return null;
|
||||
}
|
||||
return o;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,85 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.HTMLStripTransformer.TRUE;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.sql.Clob;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* {@link Transformer} instance which converts a {@link Clob} to a {@link String}.
|
||||
* <p>
|
||||
* Refer to <a href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* <p>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class ClobTransformer extends Transformer {
|
||||
@Override
|
||||
public Object transformRow(Map<String, Object> aRow, Context context) {
|
||||
for (Map<String, String> map : context.getAllEntityFields()) {
|
||||
if (!TRUE.equals(map.get(CLOB))) continue;
|
||||
String column = map.get(DataImporter.COLUMN);
|
||||
String srcCol = map.get(RegexTransformer.SRC_COL_NAME);
|
||||
if (srcCol == null)
|
||||
srcCol = column;
|
||||
Object o = aRow.get(srcCol);
|
||||
if (o instanceof List) {
|
||||
@SuppressWarnings({"unchecked"})
|
||||
List<Clob> inputs = (List<Clob>) o;
|
||||
List<String> results = new ArrayList<>();
|
||||
for (Object input : inputs) {
|
||||
if (input instanceof Clob) {
|
||||
Clob clob = (Clob) input;
|
||||
results.add(readFromClob(clob));
|
||||
}
|
||||
}
|
||||
aRow.put(column, results);
|
||||
} else {
|
||||
if (o instanceof Clob) {
|
||||
Clob clob = (Clob) o;
|
||||
aRow.put(column, readFromClob(clob));
|
||||
}
|
||||
}
|
||||
}
|
||||
return aRow;
|
||||
}
|
||||
|
||||
private String readFromClob(Clob clob) {
|
||||
Reader reader = FieldReaderDataSource.readCharStream(clob);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
char[] buf = new char[1024];
|
||||
int len;
|
||||
try {
|
||||
while ((len = reader.read(buf)) != -1) {
|
||||
sb.append(buf, 0, len);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
DataImportHandlerException.wrapAndThrow(DataImportHandlerException.SEVERE, e);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static final String CLOB = "clob";
|
||||
}
|
|
@ -1,73 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NamedNodeMap;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
public class ConfigParseUtil {
|
||||
public static String getStringAttribute(Element e, String name, String def) {
|
||||
String r = e.getAttribute(name);
|
||||
if (r == null || "".equals(r.trim()))
|
||||
r = def;
|
||||
return r;
|
||||
}
|
||||
|
||||
public static HashMap<String, String> getAllAttributes(Element e) {
|
||||
HashMap<String, String> m = new HashMap<>();
|
||||
NamedNodeMap nnm = e.getAttributes();
|
||||
for (int i = 0; i < nnm.getLength(); i++) {
|
||||
m.put(nnm.item(i).getNodeName(), nnm.item(i).getNodeValue());
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
public static String getText(Node elem, StringBuilder buffer) {
|
||||
if (elem.getNodeType() != Node.CDATA_SECTION_NODE) {
|
||||
NodeList childs = elem.getChildNodes();
|
||||
for (int i = 0; i < childs.getLength(); i++) {
|
||||
Node child = childs.item(i);
|
||||
short childType = child.getNodeType();
|
||||
if (childType != Node.COMMENT_NODE
|
||||
&& childType != Node.PROCESSING_INSTRUCTION_NODE) {
|
||||
getText(child, buffer);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
buffer.append(elem.getNodeValue());
|
||||
}
|
||||
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
public static List<Element> getChildNodes(Element e, String byName) {
|
||||
List<Element> result = new ArrayList<>();
|
||||
NodeList l = e.getChildNodes();
|
||||
for (int i = 0; i < l.getLength(); i++) {
|
||||
if (e.equals(l.item(i).getParentNode())
|
||||
&& byName.equals(l.item(i).getNodeName()))
|
||||
result.add((Element) l.item(i));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -1,69 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* A DataSource implementation which reads from the ContentStream of a POST request
|
||||
* <p>
|
||||
* Refer to <a href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class ContentStreamDataSource extends DataSource<Reader> {
|
||||
private ContextImpl context;
|
||||
private ContentStream contentStream;
|
||||
private Reader reader;
|
||||
|
||||
@Override
|
||||
public void init(Context context, Properties initProps) {
|
||||
this.context = (ContextImpl) context;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader getData(String query) {
|
||||
contentStream = context.getDocBuilder().getReqParams().getContentStream();
|
||||
if (contentStream == null)
|
||||
throw new DataImportHandlerException(SEVERE, "No stream available. The request has no body");
|
||||
try {
|
||||
return reader = contentStream.getReader();
|
||||
} catch (IOException e) {
|
||||
DataImportHandlerException.wrapAndThrow(SEVERE, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (contentStream != null) {
|
||||
try {
|
||||
if (reader == null) reader = contentStream.getReader();
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,221 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This abstract class gives access to all available objects. So any
|
||||
* component implemented by a user can have the full power of DataImportHandler
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class Context {
|
||||
public static final String FULL_DUMP = "FULL_DUMP", DELTA_DUMP = "DELTA_DUMP", FIND_DELTA = "FIND_DELTA";
|
||||
|
||||
/**
|
||||
* An object stored in entity scope is valid only for the current entity for the current document only.
|
||||
*/
|
||||
public static final String SCOPE_ENTITY = "entity";
|
||||
|
||||
/**
|
||||
* An object stored in global scope is available for the current import only but across entities and documents.
|
||||
*/
|
||||
public static final String SCOPE_GLOBAL = "global";
|
||||
|
||||
/**
|
||||
* An object stored in document scope is available for the current document only but across entities.
|
||||
*/
|
||||
public static final String SCOPE_DOC = "document";
|
||||
|
||||
/**
|
||||
* An object stored in 'solrcore' scope is available across imports, entities and documents throughout the life of
|
||||
* a solr core. A solr core unload or reload will destroy this data.
|
||||
*/
|
||||
public static final String SCOPE_SOLR_CORE = "solrcore";
|
||||
|
||||
/**
|
||||
* Get the value of any attribute put into this entity
|
||||
*
|
||||
* @param name name of the attribute eg: 'name'
|
||||
* @return value of named attribute in entity
|
||||
*/
|
||||
public abstract String getEntityAttribute(String name);
|
||||
|
||||
/**
|
||||
* Get the value of any attribute put into this entity after resolving all variables found in the attribute value
|
||||
* @param name name of the attribute
|
||||
* @return value of the named attribute after resolving all variables
|
||||
*/
|
||||
public abstract String getResolvedEntityAttribute(String name);
|
||||
|
||||
/**
|
||||
* Returns all the fields put into an entity. each item (which is a map ) in
|
||||
* the list corresponds to one field. each if the map contains the attribute
|
||||
* names and values in a field
|
||||
*
|
||||
* @return all fields in an entity
|
||||
*/
|
||||
public abstract List<Map<String, String>> getAllEntityFields();
|
||||
|
||||
/**
|
||||
* Returns the VariableResolver used in this entity which can be used to
|
||||
* resolve the tokens in ${<namespce.name>}
|
||||
*
|
||||
* @return a VariableResolver instance
|
||||
* @see org.apache.solr.handler.dataimport.VariableResolver
|
||||
*/
|
||||
|
||||
public abstract VariableResolver getVariableResolver();
|
||||
|
||||
/**
|
||||
* Gets the datasource instance defined for this entity. Do not close() this instance.
|
||||
* Transformers should use the getDataSource(String name) method.
|
||||
*
|
||||
* @return a new DataSource instance as configured for the current entity
|
||||
* @see org.apache.solr.handler.dataimport.DataSource
|
||||
* @see #getDataSource(String)
|
||||
*/
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
public abstract DataSource getDataSource();
|
||||
|
||||
/**
|
||||
* Gets a new DataSource instance with a name. Ensure that you close() this after use
|
||||
* because this is created just for this method call.
|
||||
*
|
||||
* @param name Name of the dataSource as defined in the dataSource tag
|
||||
* @return a new DataSource instance
|
||||
* @see org.apache.solr.handler.dataimport.DataSource
|
||||
*/
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
public abstract DataSource getDataSource(String name);
|
||||
|
||||
/**
|
||||
* Returns the instance of EntityProcessor used for this entity
|
||||
*
|
||||
* @return instance of EntityProcessor used for the current entity
|
||||
* @see org.apache.solr.handler.dataimport.EntityProcessor
|
||||
*/
|
||||
public abstract EntityProcessor getEntityProcessor();
|
||||
|
||||
/**
|
||||
* Store values in a certain name and scope (entity, document,global)
|
||||
*
|
||||
* @param name the key
|
||||
* @param val the value
|
||||
* @param scope the scope in which the given key, value pair is to be stored
|
||||
*/
|
||||
public abstract void setSessionAttribute(String name, Object val, String scope);
|
||||
|
||||
/**
|
||||
* get a value by name in the given scope (entity, document,global)
|
||||
*
|
||||
* @param name the key
|
||||
* @param scope the scope from which the value is to be retrieved
|
||||
* @return the object stored in the given scope with the given key
|
||||
*/
|
||||
public abstract Object getSessionAttribute(String name, String scope);
|
||||
|
||||
/**
|
||||
* Get the context instance for the parent entity. works only in the full dump
|
||||
* If the current entity is rootmost a null is returned
|
||||
*
|
||||
* @return parent entity's Context
|
||||
*/
|
||||
public abstract Context getParentContext();
|
||||
|
||||
/**
|
||||
* The request parameters passed over HTTP for this command the values in the
|
||||
* map are either String(for single valued parameters) or List<String> (for
|
||||
* multi-valued parameters)
|
||||
*
|
||||
* @return the request parameters passed in the URL to initiate this process
|
||||
*/
|
||||
public abstract Map<String, Object> getRequestParameters();
|
||||
|
||||
/**
|
||||
* Returns if the current entity is the root entity
|
||||
*
|
||||
* @return true if current entity is the root entity, false otherwise
|
||||
*/
|
||||
public abstract boolean isRootEntity();
|
||||
|
||||
/**
|
||||
* Returns the current process FULL_DUMP, DELTA_DUMP, FIND_DELTA
|
||||
*
|
||||
* @return the type of the current running process
|
||||
*/
|
||||
public abstract String currentProcess();
|
||||
|
||||
/**
|
||||
* Exposing the actual SolrCore to the components
|
||||
*
|
||||
* @return the core
|
||||
*/
|
||||
public abstract SolrCore getSolrCore();
|
||||
|
||||
/**
|
||||
* Makes available some basic running statistics such as "docCount",
|
||||
* "deletedDocCount", "rowCount", "queryCount" and "skipDocCount"
|
||||
*
|
||||
* @return a Map containing running statistics of the current import
|
||||
*/
|
||||
public abstract Map<String, Object> getStats();
|
||||
|
||||
/**
|
||||
* Returns the text specified in the script tag in the data-config.xml
|
||||
*/
|
||||
public abstract String getScript();
|
||||
|
||||
/**
|
||||
* Returns the language of the script as specified in the script tag in data-config.xml
|
||||
*/
|
||||
public abstract String getScriptLanguage();
|
||||
|
||||
/**delete a document by id
|
||||
*/
|
||||
public abstract void deleteDoc(String id);
|
||||
|
||||
/**delete documents by query
|
||||
*/
|
||||
public abstract void deleteDocByQuery(String query);
|
||||
|
||||
/**Use this directly to resolve variable
|
||||
* @param var the variable name
|
||||
* @return the resolved value
|
||||
*/
|
||||
public abstract Object resolve(String var);
|
||||
|
||||
/** Resolve variables in a template
|
||||
*
|
||||
* @return The string w/ variables resolved
|
||||
*/
|
||||
public abstract String replaceTokens(String template);
|
||||
|
||||
}
|
|
@ -1,264 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.dataimport.config.Script;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* An implementation for the Context
|
||||
* </p>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class ContextImpl extends Context {
|
||||
protected EntityProcessorWrapper epw;
|
||||
|
||||
private ContextImpl parent;
|
||||
|
||||
private VariableResolver resolver;
|
||||
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
private DataSource ds;
|
||||
|
||||
private String currProcess;
|
||||
|
||||
private Map<String, Object> requestParams;
|
||||
|
||||
private DataImporter dataImporter;
|
||||
|
||||
private Map<String, Object> entitySession, globalSession;
|
||||
|
||||
private Exception lastException = null;
|
||||
|
||||
DocBuilder.DocWrapper doc;
|
||||
|
||||
DocBuilder docBuilder;
|
||||
|
||||
|
||||
|
||||
public ContextImpl(EntityProcessorWrapper epw, VariableResolver resolver,
|
||||
@SuppressWarnings({"rawtypes"})DataSource ds, String currProcess,
|
||||
Map<String, Object> global, ContextImpl parentContext, DocBuilder docBuilder) {
|
||||
this.epw = epw;
|
||||
this.docBuilder = docBuilder;
|
||||
this.resolver = resolver;
|
||||
this.ds = ds;
|
||||
this.currProcess = currProcess;
|
||||
if (docBuilder != null) {
|
||||
this.requestParams = docBuilder.getReqParams().getRawParams();
|
||||
dataImporter = docBuilder.dataImporter;
|
||||
}
|
||||
globalSession = global;
|
||||
parent = parentContext;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getEntityAttribute(String name) {
|
||||
return epw==null || epw.getEntity() == null ? null : epw.getEntity().getAllAttributes().get(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getResolvedEntityAttribute(String name) {
|
||||
return epw==null || epw.getEntity() == null ? null : resolver.replaceTokens(epw.getEntity().getAllAttributes().get(name));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Map<String, String>> getAllEntityFields() {
|
||||
return epw==null || epw.getEntity() == null ? Collections.emptyList() : epw.getEntity().getAllFieldsList();
|
||||
}
|
||||
|
||||
@Override
|
||||
public VariableResolver getVariableResolver() {
|
||||
return resolver;
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
public DataSource getDataSource() {
|
||||
if (ds != null) return ds;
|
||||
if(epw==null) { return null; }
|
||||
if (epw!=null && epw.getDatasource() == null) {
|
||||
epw.setDatasource(dataImporter.getDataSourceInstance(epw.getEntity(), epw.getEntity().getDataSourceName(), this));
|
||||
}
|
||||
if (epw!=null && epw.getDatasource() != null && docBuilder != null && docBuilder.verboseDebug &&
|
||||
Context.FULL_DUMP.equals(currentProcess())) {
|
||||
//debug is not yet implemented properly for deltas
|
||||
epw.setDatasource(docBuilder.getDebugLogger().wrapDs(epw.getDatasource()));
|
||||
}
|
||||
return epw.getDatasource();
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
public DataSource getDataSource(String name) {
|
||||
return dataImporter.getDataSourceInstance(epw==null ? null : epw.getEntity(), name, this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isRootEntity() {
|
||||
return epw==null ? false : epw.getEntity().isDocRoot();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String currentProcess() {
|
||||
return currProcess;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> getRequestParameters() {
|
||||
return requestParams;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EntityProcessor getEntityProcessor() {
|
||||
return epw;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setSessionAttribute(String name, Object val, String scope) {
|
||||
if(name == null) {
|
||||
return;
|
||||
}
|
||||
if (Context.SCOPE_ENTITY.equals(scope)) {
|
||||
if (entitySession == null) {
|
||||
entitySession = new HashMap<>();
|
||||
}
|
||||
entitySession.put(name, val);
|
||||
} else if (Context.SCOPE_GLOBAL.equals(scope)) {
|
||||
if (globalSession != null) {
|
||||
globalSession.put(name, val);
|
||||
}
|
||||
} else if (Context.SCOPE_DOC.equals(scope)) {
|
||||
DocBuilder.DocWrapper doc = getDocument();
|
||||
if (doc != null) {
|
||||
doc.setSessionAttribute(name, val);
|
||||
}
|
||||
} else if (SCOPE_SOLR_CORE.equals(scope)){
|
||||
if(dataImporter != null) {
|
||||
dataImporter.putToCoreScopeSession(name, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getSessionAttribute(String name, String scope) {
|
||||
if (Context.SCOPE_ENTITY.equals(scope)) {
|
||||
if (entitySession == null)
|
||||
return null;
|
||||
return entitySession.get(name);
|
||||
} else if (Context.SCOPE_GLOBAL.equals(scope)) {
|
||||
if (globalSession != null) {
|
||||
return globalSession.get(name);
|
||||
}
|
||||
} else if (Context.SCOPE_DOC.equals(scope)) {
|
||||
DocBuilder.DocWrapper doc = getDocument();
|
||||
return doc == null ? null: doc.getSessionAttribute(name);
|
||||
} else if (SCOPE_SOLR_CORE.equals(scope)){
|
||||
return dataImporter == null ? null : dataImporter.getFromCoreScopeSession(name);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Context getParentContext() {
|
||||
return parent;
|
||||
}
|
||||
|
||||
private DocBuilder.DocWrapper getDocument() {
|
||||
ContextImpl c = this;
|
||||
while (true) {
|
||||
if (c.doc != null)
|
||||
return c.doc;
|
||||
if (c.parent != null)
|
||||
c = c.parent;
|
||||
else
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
void setDoc(DocBuilder.DocWrapper docWrapper) {
|
||||
this.doc = docWrapper;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SolrCore getSolrCore() {
|
||||
return dataImporter == null ? null : dataImporter.getCore();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Map<String, Object> getStats() {
|
||||
return docBuilder != null ? docBuilder.importStatistics.getStatsSnapshot() : Collections.<String, Object>emptyMap();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getScript() {
|
||||
if (dataImporter != null) {
|
||||
Script script = dataImporter.getConfig().getScript();
|
||||
return script == null ? null : script.getText();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getScriptLanguage() {
|
||||
if (dataImporter != null) {
|
||||
Script script = dataImporter.getConfig().getScript();
|
||||
return script == null ? null : script.getLanguage();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteDoc(String id) {
|
||||
if(docBuilder != null){
|
||||
docBuilder.writer.deleteDoc(id);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteDocByQuery(String query) {
|
||||
if(docBuilder != null){
|
||||
docBuilder.writer.deleteByQuery(query);
|
||||
}
|
||||
}
|
||||
|
||||
DocBuilder getDocBuilder(){
|
||||
return docBuilder;
|
||||
}
|
||||
@Override
|
||||
public Object resolve(String var) {
|
||||
return resolver.resolve(var);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String replaceTokens(String template) {
|
||||
return resolver.replaceTokens(template);
|
||||
}
|
||||
|
||||
public Exception getLastException() { return lastException; }
|
||||
|
||||
public void setLastException(Exception lastException) {this.lastException = lastException; }
|
||||
}
|
|
@ -1,103 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A cache that allows a DIH entity's data to persist locally prior being joined
|
||||
* to other data and/or indexed.
|
||||
* </p>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface DIHCache extends Iterable<Map<String,Object>> {
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Opens the cache using the specified properties. The {@link Context}
|
||||
* includes any parameters needed by the cache impl. This must be called
|
||||
* before any read/write operations are permitted.
|
||||
*/
|
||||
void open(Context context);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Releases resources used by this cache, if possible. The cache is flushed
|
||||
* but not destroyed.
|
||||
* </p>
|
||||
*/
|
||||
void close();
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Persists any pending data to the cache
|
||||
* </p>
|
||||
*/
|
||||
void flush();
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Closes the cache, if open. Then removes all data, possibly removing the
|
||||
* cache entirely from persistent storage.
|
||||
* </p>
|
||||
*/
|
||||
public void destroy();
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Adds a document. If a document already exists with the same key, both
|
||||
* documents will exist in the cache, as the cache allows duplicate keys. To
|
||||
* update a key's documents, first call delete(Object key).
|
||||
* </p>
|
||||
*/
|
||||
void add(Map<String, Object> rec);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Returns an iterator, allowing callers to iterate through the entire cache
|
||||
* in key, then insertion, order.
|
||||
* </p>
|
||||
*/
|
||||
@Override
|
||||
Iterator<Map<String,Object>> iterator();
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Returns an iterator, allowing callers to iterate through all documents that
|
||||
* match the given key in insertion order.
|
||||
* </p>
|
||||
*/
|
||||
Iterator<Map<String,Object>> iterator(Object key);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Delete all documents associated with the given key
|
||||
* </p>
|
||||
*/
|
||||
void delete(Object key);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Delete all data from the cache,leaving the empty cache intact.
|
||||
* </p>
|
||||
*/
|
||||
void deleteAll();
|
||||
|
||||
}
|
|
@ -1,279 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class DIHCacheSupport {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
private String cacheForeignKey;
|
||||
private String cacheImplName;
|
||||
private Map<String,DIHCache> queryVsCache = new HashMap<>();
|
||||
private Map<String,Iterator<Map<String,Object>>> queryVsCacheIterator;
|
||||
private Iterator<Map<String,Object>> dataSourceRowCache;
|
||||
private boolean cacheDoKeyLookup;
|
||||
|
||||
public DIHCacheSupport(Context context, String cacheImplName) {
|
||||
this.cacheImplName = cacheImplName;
|
||||
|
||||
Relation r = new Relation(context);
|
||||
cacheDoKeyLookup = r.doKeyLookup;
|
||||
String cacheKey = r.primaryKey;
|
||||
cacheForeignKey = r.foreignKey;
|
||||
|
||||
context.setSessionAttribute(DIHCacheSupport.CACHE_PRIMARY_KEY, cacheKey,
|
||||
Context.SCOPE_ENTITY);
|
||||
context.setSessionAttribute(DIHCacheSupport.CACHE_FOREIGN_KEY, cacheForeignKey,
|
||||
Context.SCOPE_ENTITY);
|
||||
context.setSessionAttribute(DIHCacheSupport.CACHE_DELETE_PRIOR_DATA,
|
||||
"true", Context.SCOPE_ENTITY);
|
||||
context.setSessionAttribute(DIHCacheSupport.CACHE_READ_ONLY, "false",
|
||||
Context.SCOPE_ENTITY);
|
||||
}
|
||||
|
||||
static class Relation{
|
||||
protected final boolean doKeyLookup;
|
||||
protected final String foreignKey;
|
||||
protected final String primaryKey;
|
||||
|
||||
public Relation(Context context) {
|
||||
String where = context.getEntityAttribute("where");
|
||||
String cacheKey = context.getEntityAttribute(DIHCacheSupport.CACHE_PRIMARY_KEY);
|
||||
String lookupKey = context.getEntityAttribute(DIHCacheSupport.CACHE_FOREIGN_KEY);
|
||||
if (cacheKey != null && lookupKey == null) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"'cacheKey' is specified for the entity "
|
||||
+ context.getEntityAttribute("name")
|
||||
+ " but 'cacheLookup' is missing");
|
||||
|
||||
}
|
||||
if (where == null && cacheKey == null) {
|
||||
doKeyLookup = false;
|
||||
primaryKey = null;
|
||||
foreignKey = null;
|
||||
} else {
|
||||
if (where != null) {
|
||||
String[] splits = where.split("=");
|
||||
primaryKey = splits[0];
|
||||
foreignKey = splits[1].trim();
|
||||
} else {
|
||||
primaryKey = cacheKey;
|
||||
foreignKey = lookupKey;
|
||||
}
|
||||
doKeyLookup = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Relation "
|
||||
+ primaryKey + "="+foreignKey ;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private DIHCache instantiateCache(Context context) {
|
||||
DIHCache cache = null;
|
||||
try {
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<DIHCache> cacheClass = DocBuilder.loadClass(cacheImplName, context
|
||||
.getSolrCore());
|
||||
Constructor<DIHCache> constr = cacheClass.getConstructor();
|
||||
cache = constr.newInstance();
|
||||
cache.open(context);
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Unable to load Cache implementation:" + cacheImplName, e);
|
||||
}
|
||||
return cache;
|
||||
}
|
||||
|
||||
public void initNewParent(Context context) {
|
||||
dataSourceRowCache = null;
|
||||
queryVsCacheIterator = new HashMap<>();
|
||||
for (Map.Entry<String,DIHCache> entry : queryVsCache.entrySet()) {
|
||||
queryVsCacheIterator.put(entry.getKey(), entry.getValue().iterator());
|
||||
}
|
||||
}
|
||||
|
||||
public void destroyAll() {
|
||||
if (queryVsCache != null) {
|
||||
for (DIHCache cache : queryVsCache.values()) {
|
||||
cache.destroy();
|
||||
}
|
||||
}
|
||||
queryVsCache = null;
|
||||
dataSourceRowCache = null;
|
||||
cacheForeignKey = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Get all the rows from the datasource for the given query and cache them
|
||||
* </p>
|
||||
*/
|
||||
public void populateCache(String query,
|
||||
Iterator<Map<String,Object>> rowIterator) {
|
||||
Map<String,Object> aRow = null;
|
||||
DIHCache cache = queryVsCache.get(query);
|
||||
while ((aRow = getNextFromCache(query, rowIterator)) != null) {
|
||||
cache.add(aRow);
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String,Object> getNextFromCache(String query,
|
||||
Iterator<Map<String,Object>> rowIterator) {
|
||||
try {
|
||||
if (rowIterator == null) return null;
|
||||
if (rowIterator.hasNext()) return rowIterator.next();
|
||||
return null;
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, "getNextFromCache() failed for query '" + query
|
||||
+ "'", e);
|
||||
wrapAndThrow(DataImportHandlerException.WARN, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String,Object> getCacheData(Context context, String query,
|
||||
Iterator<Map<String,Object>> rowIterator) {
|
||||
if (cacheDoKeyLookup) {
|
||||
return getIdCacheData(context, query, rowIterator);
|
||||
} else {
|
||||
return getSimpleCacheData(context, query, rowIterator);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If the where clause is present the cache is sql Vs Map of key Vs List of
|
||||
* Rows.
|
||||
*
|
||||
* @param query
|
||||
* the query string for which cached data is to be returned
|
||||
*
|
||||
* @return the cached row corresponding to the given query after all variables
|
||||
* have been resolved
|
||||
*/
|
||||
protected Map<String,Object> getIdCacheData(Context context, String query,
|
||||
Iterator<Map<String,Object>> rowIterator) {
|
||||
Object key = context.resolve(cacheForeignKey);
|
||||
if (key == null) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.WARN,
|
||||
"The cache lookup value : " + cacheForeignKey
|
||||
+ " is resolved to be null in the entity :"
|
||||
+ context.getEntityAttribute("name"));
|
||||
|
||||
}
|
||||
if (dataSourceRowCache == null) {
|
||||
DIHCache cache = queryVsCache.get(query);
|
||||
|
||||
if (cache == null) {
|
||||
cache = instantiateCache(context);
|
||||
queryVsCache.put(query, cache);
|
||||
populateCache(query, rowIterator);
|
||||
}
|
||||
dataSourceRowCache = cache.iterator(key);
|
||||
}
|
||||
return getFromRowCacheTransformed();
|
||||
}
|
||||
|
||||
/**
|
||||
* If where clause is not present the cache is a Map of query vs List of Rows.
|
||||
*
|
||||
* @param query
|
||||
* string for which cached row is to be returned
|
||||
*
|
||||
* @return the cached row corresponding to the given query
|
||||
*/
|
||||
protected Map<String,Object> getSimpleCacheData(Context context,
|
||||
String query, Iterator<Map<String,Object>> rowIterator) {
|
||||
if (dataSourceRowCache == null) {
|
||||
DIHCache cache = queryVsCache.get(query);
|
||||
if (cache == null) {
|
||||
cache = instantiateCache(context);
|
||||
queryVsCache.put(query, cache);
|
||||
populateCache(query, rowIterator);
|
||||
queryVsCacheIterator.put(query, cache.iterator());
|
||||
}
|
||||
Iterator<Map<String,Object>> cacheIter = queryVsCacheIterator.get(query);
|
||||
dataSourceRowCache = cacheIter;
|
||||
}
|
||||
|
||||
return getFromRowCacheTransformed();
|
||||
}
|
||||
|
||||
protected Map<String,Object> getFromRowCacheTransformed() {
|
||||
if (dataSourceRowCache == null || !dataSourceRowCache.hasNext()) {
|
||||
dataSourceRowCache = null;
|
||||
return null;
|
||||
}
|
||||
Map<String,Object> r = dataSourceRowCache.next();
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Specify the class for the cache implementation
|
||||
* </p>
|
||||
*/
|
||||
public static final String CACHE_IMPL = "cacheImpl";
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* If the cache supports persistent data, set to "true" to delete any prior
|
||||
* persisted data before running the entity.
|
||||
* </p>
|
||||
*/
|
||||
|
||||
public static final String CACHE_DELETE_PRIOR_DATA = "cacheDeletePriorData";
|
||||
/**
|
||||
* <p>
|
||||
* Specify the Foreign Key from the parent entity to join on. Use if the cache
|
||||
* is on a child entity.
|
||||
* </p>
|
||||
*/
|
||||
public static final String CACHE_FOREIGN_KEY = "cacheLookup";
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Specify the Primary Key field from this Entity to map the input records
|
||||
* with
|
||||
* </p>
|
||||
*/
|
||||
public static final String CACHE_PRIMARY_KEY = "cacheKey";
|
||||
/**
|
||||
* <p>
|
||||
* If true, a pre-existing cache is re-opened for read-only access.
|
||||
* </p>
|
||||
*/
|
||||
public static final String CACHE_READ_ONLY = "cacheReadOnly";
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
public enum DIHLogLevels {
|
||||
START_ENTITY, END_ENTITY, TRANSFORMED_ROW, ENTITY_META, PRE_TRANSFORMER_ROW, START_DOC, END_DOC, ENTITY_OUT, ROW_END, TRANSFORMER_EXCEPTION, ENTITY_EXCEPTION, DISABLE_LOGGING, ENABLE_LOGGING, NONE
|
||||
}
|
|
@ -1,45 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Implementations write out properties about the last data import
|
||||
* for use by the next import. ex: to persist the last import timestamp
|
||||
* so that future delta imports can know what needs to be updated.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class DIHProperties {
|
||||
|
||||
public abstract void init(DataImporter dataImporter, Map<String, String> initParams);
|
||||
|
||||
public abstract boolean isWritable();
|
||||
|
||||
public abstract void persist(Map<String, Object> props);
|
||||
|
||||
public abstract Map<String, Object> readIndexerProperties();
|
||||
|
||||
public abstract String convertDateToString(Date d);
|
||||
|
||||
public Date getCurrentTimestamp() {
|
||||
return new Date();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,99 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*
|
||||
*/
|
||||
public interface DIHWriter {
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* If this writer supports transactions or commit points, then commit any changes,
|
||||
* optionally optimizing the data for read/write performance
|
||||
* </p>
|
||||
*/
|
||||
public void commit(boolean optimize);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Release resources used by this writer. After calling close, reads & updates will throw exceptions.
|
||||
* </p>
|
||||
*/
|
||||
public void close();
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* If this writer supports transactions or commit points, then roll back any uncommitted changes.
|
||||
* </p>
|
||||
*/
|
||||
public void rollback();
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Delete from the writer's underlying data store based the passed-in writer-specific query. (Optional Operation)
|
||||
* </p>
|
||||
*/
|
||||
public void deleteByQuery(String q);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Delete everything from the writer's underlying data store
|
||||
* </p>
|
||||
*/
|
||||
public void doDeleteAll();
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Delete from the writer's underlying data store based on the passed-in Primary Key
|
||||
* </p>
|
||||
*/
|
||||
public void deleteDoc(Object key);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Add a document to this writer's underlying data store.
|
||||
* </p>
|
||||
* @return true on success, false on failure
|
||||
*/
|
||||
public boolean upload(SolrInputDocument doc);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Provide context information for this writer. init() should be called before using the writer.
|
||||
* </p>
|
||||
*/
|
||||
public void init(Context context) ;
|
||||
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Specify the keys to be modified by a delta update (required by writers that can store duplicate keys)
|
||||
* </p>
|
||||
*/
|
||||
public void setDeltaKeys(Set<Map<String, Object>> deltaKeys) ;
|
||||
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public abstract class DIHWriterBase implements DIHWriter {
|
||||
protected String keyFieldName;
|
||||
protected Set<Object> deltaKeys = null;
|
||||
|
||||
@Override
|
||||
public void setDeltaKeys(Set<Map<String,Object>> passedInDeltaKeys) {
|
||||
deltaKeys = new HashSet<>();
|
||||
for (Map<String,Object> aMap : passedInDeltaKeys) {
|
||||
if (aMap.size() > 0) {
|
||||
Object key = null;
|
||||
if (keyFieldName != null) {
|
||||
key = aMap.get(keyFieldName);
|
||||
} else {
|
||||
key = aMap.entrySet().iterator().next();
|
||||
}
|
||||
if (key != null) {
|
||||
deltaKeys.add(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,318 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.handler.RequestHandlerBase;
|
||||
import org.apache.solr.metrics.MetricsMap;
|
||||
import org.apache.solr.metrics.SolrMetricsContext;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.RawResponseWriter;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImporter.IMPORT_CMD;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Solr Request Handler for data import from databases and REST data sources.
|
||||
* </p>
|
||||
* <p>
|
||||
* It is configured in solrconfig.xml
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @deprecated since 8.6
|
||||
* @since solr 1.3
|
||||
*/
|
||||
@Deprecated(since = "8.6")
|
||||
public class DataImportHandler extends RequestHandlerBase implements
|
||||
SolrCoreAware {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private DataImporter importer;
|
||||
|
||||
private boolean debugEnabled = true;
|
||||
|
||||
private String myName = "dataimport";
|
||||
|
||||
private MetricsMap metrics;
|
||||
|
||||
private static final String PARAM_WRITER_IMPL = "writerImpl";
|
||||
private static final String DEFAULT_WRITER_NAME = "SolrWriter";
|
||||
static final String ENABLE_DIH_DATA_CONFIG_PARAM = "enable.dih.dataConfigParam";
|
||||
|
||||
final boolean dataConfigParam_enabled = Boolean.getBoolean(ENABLE_DIH_DATA_CONFIG_PARAM);
|
||||
|
||||
public DataImporter getImporter() {
|
||||
return this.importer;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
public void init(@SuppressWarnings({"rawtypes"})NamedList args) {
|
||||
super.init(args);
|
||||
Map<String,String> macro = new HashMap<>();
|
||||
macro.put("expandMacros", "false");
|
||||
defaults = SolrParams.wrapDefaults(defaults, new MapSolrParams(macro));
|
||||
log.warn("Data Import Handler is deprecated as of Solr 8.6. See SOLR-14066 for more details.");
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void inform(SolrCore core) {
|
||||
try {
|
||||
String name = getPluginInfo().name;
|
||||
if (name.startsWith("/")) {
|
||||
myName = name.substring(1);
|
||||
}
|
||||
// some users may have '/' in the handler name. replace with '_'
|
||||
myName = myName.replaceAll("/", "_");
|
||||
debugEnabled = StrUtils.parseBool((String)initArgs.get(ENABLE_DEBUG), true);
|
||||
importer = new DataImporter(core, myName);
|
||||
} catch (Exception e) {
|
||||
log.error( DataImporter.MSG.LOAD_EXP, e);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, DataImporter.MSG.LOAD_EXP, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp)
|
||||
throws Exception {
|
||||
rsp.setHttpCaching(false);
|
||||
|
||||
//TODO: figure out why just the first one is OK...
|
||||
ContentStream contentStream = null;
|
||||
Iterable<ContentStream> streams = req.getContentStreams();
|
||||
if(streams != null){
|
||||
for (ContentStream stream : streams) {
|
||||
contentStream = stream;
|
||||
break;
|
||||
}
|
||||
}
|
||||
SolrParams params = req.getParams();
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
NamedList defaultParams = (NamedList) initArgs.get("defaults");
|
||||
RequestInfo requestParams = new RequestInfo(req, getParamsMap(params), contentStream);
|
||||
String command = requestParams.getCommand();
|
||||
|
||||
if (DataImporter.SHOW_CONF_CMD.equals(command)) {
|
||||
String dataConfigFile = params.get("config");
|
||||
String dataConfig = params.get("dataConfig"); // needn't check dataConfigParam_enabled; we don't execute it
|
||||
if(dataConfigFile != null) {
|
||||
dataConfig = SolrWriter.getResourceAsString(req.getCore().getResourceLoader().openResource(dataConfigFile));
|
||||
}
|
||||
if(dataConfig==null) {
|
||||
rsp.add("status", DataImporter.MSG.NO_CONFIG_FOUND);
|
||||
} else {
|
||||
// Modify incoming request params to add wt=raw
|
||||
ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams());
|
||||
rawParams.set(CommonParams.WT, "raw");
|
||||
req.setParams(rawParams);
|
||||
ContentStreamBase content = new ContentStreamBase.StringStream(dataConfig);
|
||||
rsp.add(RawResponseWriter.CONTENT, content);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (params.get("dataConfig") != null && dataConfigParam_enabled == false) {
|
||||
throw new SolrException(SolrException.ErrorCode.FORBIDDEN,
|
||||
"Use of the dataConfig param (DIH debug mode) requires the system property " +
|
||||
ENABLE_DIH_DATA_CONFIG_PARAM + " because it's a security risk.");
|
||||
}
|
||||
|
||||
rsp.add("initArgs", initArgs);
|
||||
String message = "";
|
||||
|
||||
if (command != null) {
|
||||
rsp.add("command", command);
|
||||
}
|
||||
// If importer is still null
|
||||
if (importer == null) {
|
||||
rsp.add("status", DataImporter.MSG.NO_INIT);
|
||||
return;
|
||||
}
|
||||
|
||||
if (command != null && DataImporter.ABORT_CMD.equals(command)) {
|
||||
importer.runCmd(requestParams, null);
|
||||
} else if (importer.isBusy()) {
|
||||
message = DataImporter.MSG.CMD_RUNNING;
|
||||
} else if (command != null) {
|
||||
if (DataImporter.FULL_IMPORT_CMD.equals(command)
|
||||
|| DataImporter.DELTA_IMPORT_CMD.equals(command) ||
|
||||
IMPORT_CMD.equals(command)) {
|
||||
importer.maybeReloadConfiguration(requestParams, defaultParams);
|
||||
UpdateRequestProcessorChain processorChain =
|
||||
req.getCore().getUpdateProcessorChain(params);
|
||||
UpdateRequestProcessor processor = processorChain.createProcessor(req, rsp);
|
||||
SolrResourceLoader loader = req.getCore().getResourceLoader();
|
||||
DIHWriter sw = getSolrWriter(processor, loader, requestParams, req);
|
||||
|
||||
if (requestParams.isDebug()) {
|
||||
if (debugEnabled) {
|
||||
// Synchronous request for the debug mode
|
||||
importer.runCmd(requestParams, sw);
|
||||
rsp.add("mode", "debug");
|
||||
rsp.add("documents", requestParams.getDebugInfo().debugDocuments);
|
||||
if (requestParams.getDebugInfo().debugVerboseOutput != null) {
|
||||
rsp.add("verbose-output", requestParams.getDebugInfo().debugVerboseOutput);
|
||||
}
|
||||
} else {
|
||||
message = DataImporter.MSG.DEBUG_NOT_ENABLED;
|
||||
}
|
||||
} else {
|
||||
// Asynchronous request for normal mode
|
||||
if(requestParams.getContentStream() == null && !requestParams.isSyncMode()){
|
||||
importer.runAsync(requestParams, sw);
|
||||
} else {
|
||||
importer.runCmd(requestParams, sw);
|
||||
}
|
||||
}
|
||||
} else if (DataImporter.RELOAD_CONF_CMD.equals(command)) {
|
||||
if(importer.maybeReloadConfiguration(requestParams, defaultParams)) {
|
||||
message = DataImporter.MSG.CONFIG_RELOADED;
|
||||
} else {
|
||||
message = DataImporter.MSG.CONFIG_NOT_RELOADED;
|
||||
}
|
||||
}
|
||||
}
|
||||
rsp.add("status", importer.isBusy() ? "busy" : "idle");
|
||||
rsp.add("importResponse", message);
|
||||
rsp.add("statusMessages", importer.getStatusMessages());
|
||||
}
|
||||
|
||||
/** The value is converted to a String or {@code List<String>} if multi-valued. */
|
||||
private Map<String, Object> getParamsMap(SolrParams params) {
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
for (Map.Entry<String, String[]> pair : params){
|
||||
String s = pair.getKey();
|
||||
String[] val = pair.getValue();
|
||||
if (val == null || val.length < 1)
|
||||
continue;
|
||||
if (val.length == 1)
|
||||
result.put(s, val[0]);
|
||||
else
|
||||
result.put(s, Arrays.asList(val));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private DIHWriter getSolrWriter(final UpdateRequestProcessor processor,
|
||||
final SolrResourceLoader loader, final RequestInfo requestParams,
|
||||
SolrQueryRequest req) {
|
||||
SolrParams reqParams = req.getParams();
|
||||
String writerClassStr = null;
|
||||
if (reqParams != null && reqParams.get(PARAM_WRITER_IMPL) != null) {
|
||||
writerClassStr = reqParams.get(PARAM_WRITER_IMPL);
|
||||
}
|
||||
DIHWriter writer;
|
||||
if (writerClassStr != null
|
||||
&& !writerClassStr.equals(DEFAULT_WRITER_NAME)
|
||||
&& !writerClassStr.equals(DocBuilder.class.getPackage().getName() + "."
|
||||
+ DEFAULT_WRITER_NAME)) {
|
||||
try {
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<DIHWriter> writerClass = DocBuilder.loadClass(writerClassStr, req.getCore());
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
Constructor<DIHWriter> cnstr = writerClass.getConstructor(new Class[] {
|
||||
UpdateRequestProcessor.class, SolrQueryRequest.class});
|
||||
return cnstr.newInstance((Object) processor, (Object) req);
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Unable to load Writer implementation:" + writerClassStr, e);
|
||||
}
|
||||
} else {
|
||||
return new SolrWriter(processor, req) {
|
||||
@Override
|
||||
public boolean upload(SolrInputDocument document) {
|
||||
try {
|
||||
return super.upload(document);
|
||||
} catch (RuntimeException e) {
|
||||
log.error("Exception while adding: {}", document, e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initializeMetrics(SolrMetricsContext parentContext, String scope) {
|
||||
super.initializeMetrics(parentContext, scope);
|
||||
metrics = new MetricsMap((detailed, map) -> {
|
||||
if (importer != null) {
|
||||
DocBuilder.Statistics cumulative = importer.cumulativeStatistics;
|
||||
|
||||
map.put("Status", importer.getStatus().toString());
|
||||
|
||||
if (importer.docBuilder != null) {
|
||||
DocBuilder.Statistics running = importer.docBuilder.importStatistics;
|
||||
map.put("Documents Processed", running.docCount);
|
||||
map.put("Requests made to DataSource", running.queryCount);
|
||||
map.put("Rows Fetched", running.rowsCount);
|
||||
map.put("Documents Deleted", running.deletedDocCount);
|
||||
map.put("Documents Skipped", running.skipDocCount);
|
||||
}
|
||||
|
||||
map.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, cumulative.docCount);
|
||||
map.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, cumulative.queryCount);
|
||||
map.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED, cumulative.rowsCount);
|
||||
map.put(DataImporter.MSG.TOTAL_DOCS_DELETED, cumulative.deletedDocCount);
|
||||
map.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED, cumulative.skipDocCount);
|
||||
}
|
||||
});
|
||||
solrMetricsContext.gauge(metrics, true, "importer", getCategory().toString(), scope);
|
||||
}
|
||||
|
||||
// //////////////////////SolrInfoMBeans methods //////////////////////
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return DataImporter.MSG.JMX_DESC;
|
||||
}
|
||||
|
||||
public static final String ENABLE_DEBUG = "enableDebug";
|
||||
}
|
|
@ -1,75 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
/**
|
||||
* <p> Exception class for all DataImportHandler exceptions </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class DataImportHandlerException extends RuntimeException {
|
||||
private int errCode;
|
||||
|
||||
public boolean debugged = false;
|
||||
|
||||
public static final int SEVERE = 500, WARN = 400, SKIP = 300, SKIP_ROW =301;
|
||||
|
||||
public DataImportHandlerException(int err) {
|
||||
super();
|
||||
errCode = err;
|
||||
}
|
||||
|
||||
public DataImportHandlerException(int err, String message) {
|
||||
super(message + (SolrWriter.getDocCount() == null ? "" : MSG + SolrWriter.getDocCount()));
|
||||
errCode = err;
|
||||
}
|
||||
|
||||
public DataImportHandlerException(int err, String message, Throwable cause) {
|
||||
super(message + (SolrWriter.getDocCount() == null ? "" : MSG + SolrWriter.getDocCount()), cause);
|
||||
errCode = err;
|
||||
}
|
||||
|
||||
public DataImportHandlerException(int err, Throwable cause) {
|
||||
super(cause);
|
||||
errCode = err;
|
||||
}
|
||||
|
||||
public int getErrCode() {
|
||||
return errCode;
|
||||
}
|
||||
|
||||
public static DataImportHandlerException wrapAndThrow(int err, Exception e) {
|
||||
if (e instanceof DataImportHandlerException) {
|
||||
throw (DataImportHandlerException) e;
|
||||
} else {
|
||||
throw new DataImportHandlerException(err, e);
|
||||
}
|
||||
}
|
||||
|
||||
public static DataImportHandlerException wrapAndThrow(int err, Exception e, String msg) {
|
||||
if (e instanceof DataImportHandlerException) {
|
||||
throw (DataImportHandlerException) e;
|
||||
} else {
|
||||
throw new DataImportHandlerException(err, msg, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static final String MSG = " Processing Document # ";
|
||||
}
|
|
@ -1,628 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.common.EmptyEntityResolver;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.util.SystemIdResolver;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.XMLErrorLogger;
|
||||
import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
|
||||
import org.apache.solr.handler.dataimport.config.ConfigParseUtil;
|
||||
import org.apache.solr.handler.dataimport.config.DIHConfiguration;
|
||||
import org.apache.solr.handler.dataimport.config.Entity;
|
||||
import org.apache.solr.handler.dataimport.config.PropertyWriter;
|
||||
import org.apache.solr.handler.dataimport.config.Script;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
import static org.apache.solr.handler.dataimport.DocBuilder.loadClass;
|
||||
import static org.apache.solr.handler.dataimport.config.ConfigNameConstants.CLASS;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
/**
|
||||
* <p> Stores all configuration information for pulling and indexing data. </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class DataImporter {
|
||||
|
||||
public enum Status {
|
||||
IDLE, RUNNING_FULL_DUMP, RUNNING_DELTA_DUMP, JOB_FAILED
|
||||
}
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
private static final XMLErrorLogger XMLLOG = new XMLErrorLogger(log);
|
||||
|
||||
private Status status = Status.IDLE;
|
||||
private DIHConfiguration config;
|
||||
private Date indexStartTime;
|
||||
private Properties store = new Properties();
|
||||
private Map<String, Map<String,String>> requestLevelDataSourceProps = new HashMap<>();
|
||||
private IndexSchema schema;
|
||||
public DocBuilder docBuilder;
|
||||
public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics();
|
||||
private SolrCore core;
|
||||
private Map<String, Object> coreScopeSession = new ConcurrentHashMap<>();
|
||||
private ReentrantLock importLock = new ReentrantLock();
|
||||
private boolean isDeltaImportSupported = false;
|
||||
private final String handlerName;
|
||||
|
||||
/**
|
||||
* Only for testing purposes
|
||||
*/
|
||||
DataImporter() {
|
||||
this.handlerName = "dataimport" ;
|
||||
}
|
||||
|
||||
DataImporter(SolrCore core, String handlerName) {
|
||||
this.handlerName = handlerName;
|
||||
this.core = core;
|
||||
this.schema = core.getLatestSchema();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
boolean maybeReloadConfiguration(RequestInfo params,
|
||||
NamedList<?> defaultParams) throws IOException {
|
||||
if (importLock.tryLock()) {
|
||||
boolean success = false;
|
||||
try {
|
||||
if (null != params.getRequest()) {
|
||||
if (schema != params.getRequest().getSchema()) {
|
||||
schema = params.getRequest().getSchema();
|
||||
}
|
||||
}
|
||||
String dataConfigText = params.getDataConfig();
|
||||
String dataconfigFile = params.getConfigFile();
|
||||
InputSource is = null;
|
||||
if(dataConfigText!=null && dataConfigText.length()>0) {
|
||||
is = new InputSource(new StringReader(dataConfigText));
|
||||
} else if(dataconfigFile!=null) {
|
||||
is = new InputSource(core.getResourceLoader().openResource(dataconfigFile));
|
||||
is.setSystemId(SystemIdResolver.createSystemIdFromResourceName(dataconfigFile));
|
||||
log.info("Loading DIH Configuration: {}", dataconfigFile);
|
||||
}
|
||||
if(is!=null) {
|
||||
config = loadDataConfig(is);
|
||||
success = true;
|
||||
}
|
||||
|
||||
Map<String,Map<String,String>> dsProps = new HashMap<>();
|
||||
if(defaultParams!=null) {
|
||||
int position = 0;
|
||||
while (position < defaultParams.size()) {
|
||||
if (defaultParams.getName(position) == null) {
|
||||
break;
|
||||
}
|
||||
String name = defaultParams.getName(position);
|
||||
if (name.equals("datasource")) {
|
||||
success = true;
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
NamedList dsConfig = (NamedList) defaultParams.getVal(position);
|
||||
log.info("Getting configuration for Global Datasource...");
|
||||
Map<String,String> props = new HashMap<>();
|
||||
for (int i = 0; i < dsConfig.size(); i++) {
|
||||
props.put(dsConfig.getName(i), dsConfig.getVal(i).toString());
|
||||
}
|
||||
log.info("Adding properties to datasource: {}", props);
|
||||
dsProps.put((String) dsConfig.get("name"), props);
|
||||
}
|
||||
position++;
|
||||
}
|
||||
}
|
||||
requestLevelDataSourceProps = Collections.unmodifiableMap(dsProps);
|
||||
} catch(IOException ioe) {
|
||||
throw ioe;
|
||||
} finally {
|
||||
importLock.unlock();
|
||||
}
|
||||
return success;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getHandlerName() {
|
||||
return handlerName;
|
||||
}
|
||||
|
||||
public IndexSchema getSchema() {
|
||||
return schema;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used by tests
|
||||
*/
|
||||
void loadAndInit(String configStr) {
|
||||
config = loadDataConfig(new InputSource(new StringReader(configStr)));
|
||||
}
|
||||
|
||||
void loadAndInit(InputSource configFile) {
|
||||
config = loadDataConfig(configFile);
|
||||
}
|
||||
|
||||
public DIHConfiguration loadDataConfig(InputSource configFile) {
|
||||
|
||||
DIHConfiguration dihcfg = null;
|
||||
try {
|
||||
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
||||
dbf.setValidating(false);
|
||||
|
||||
// only enable xinclude, if XML is coming from safe source (local file)
|
||||
// and a a SolrCore and SystemId is present (makes no sense otherwise):
|
||||
if (core != null && configFile.getSystemId() != null) {
|
||||
try {
|
||||
dbf.setXIncludeAware(true);
|
||||
dbf.setNamespaceAware(true);
|
||||
} catch( UnsupportedOperationException e ) {
|
||||
log.warn( "XML parser doesn't support XInclude option" );
|
||||
}
|
||||
}
|
||||
|
||||
DocumentBuilder builder = dbf.newDocumentBuilder();
|
||||
// only enable xinclude / external entities, if XML is coming from
|
||||
// safe source (local file) and a a SolrCore and SystemId is present:
|
||||
if (core != null && configFile.getSystemId() != null) {
|
||||
builder.setEntityResolver(new SystemIdResolver(core.getResourceLoader()));
|
||||
} else {
|
||||
// Don't allow external entities without having a system ID:
|
||||
builder.setEntityResolver(EmptyEntityResolver.SAX_INSTANCE);
|
||||
}
|
||||
builder.setErrorHandler(XMLLOG);
|
||||
Document document;
|
||||
try {
|
||||
document = builder.parse(configFile);
|
||||
} finally {
|
||||
// some XML parsers are broken and don't close the byte stream (but they should according to spec)
|
||||
IOUtils.closeQuietly(configFile.getByteStream());
|
||||
}
|
||||
|
||||
dihcfg = readFromXml(document);
|
||||
log.info("Data Configuration loaded successfully");
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"Data Config problem: " + e.getMessage(), e);
|
||||
}
|
||||
for (Entity e : dihcfg.getEntities()) {
|
||||
if (e.getAllAttributes().containsKey(SqlEntityProcessor.DELTA_QUERY)) {
|
||||
isDeltaImportSupported = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return dihcfg;
|
||||
}
|
||||
|
||||
public DIHConfiguration readFromXml(Document xmlDocument) {
|
||||
DIHConfiguration config;
|
||||
List<Map<String, String >> functions = new ArrayList<>();
|
||||
Script script = null;
|
||||
Map<String, Map<String,String>> dataSources = new HashMap<>();
|
||||
|
||||
NodeList dataConfigTags = xmlDocument.getElementsByTagName("dataConfig");
|
||||
if(dataConfigTags == null || dataConfigTags.getLength() == 0) {
|
||||
throw new DataImportHandlerException(SEVERE, "the root node '<dataConfig>' is missing");
|
||||
}
|
||||
Element e = (Element) dataConfigTags.item(0);
|
||||
List<Element> documentTags = ConfigParseUtil.getChildNodes(e, "document");
|
||||
if (documentTags.isEmpty()) {
|
||||
throw new DataImportHandlerException(SEVERE, "DataImportHandler " +
|
||||
"configuration file must have one <document> node.");
|
||||
}
|
||||
|
||||
List<Element> scriptTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.SCRIPT);
|
||||
if (!scriptTags.isEmpty()) {
|
||||
script = new Script(scriptTags.get(0));
|
||||
}
|
||||
|
||||
// Add the provided evaluators
|
||||
List<Element> functionTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.FUNCTION);
|
||||
if (!functionTags.isEmpty()) {
|
||||
for (Element element : functionTags) {
|
||||
String func = ConfigParseUtil.getStringAttribute(element, NAME, null);
|
||||
String clz = ConfigParseUtil.getStringAttribute(element, ConfigNameConstants.CLASS, null);
|
||||
if (func == null || clz == null){
|
||||
throw new DataImportHandlerException(
|
||||
SEVERE,
|
||||
"<function> must have a 'name' and 'class' attributes");
|
||||
} else {
|
||||
functions.add(ConfigParseUtil.getAllAttributes(element));
|
||||
}
|
||||
}
|
||||
}
|
||||
List<Element> dataSourceTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.DATA_SRC);
|
||||
if (!dataSourceTags.isEmpty()) {
|
||||
for (Element element : dataSourceTags) {
|
||||
Map<String,String> p = new HashMap<>();
|
||||
HashMap<String, String> attrs = ConfigParseUtil.getAllAttributes(element);
|
||||
for (Map.Entry<String, String> entry : attrs.entrySet()) {
|
||||
p.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
dataSources.put(p.get("name"), p);
|
||||
}
|
||||
}
|
||||
if(dataSources.get(null) == null){
|
||||
for (Map<String,String> properties : dataSources.values()) {
|
||||
dataSources.put(null,properties);
|
||||
break;
|
||||
}
|
||||
}
|
||||
PropertyWriter pw = null;
|
||||
List<Element> propertyWriterTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.PROPERTY_WRITER);
|
||||
if (propertyWriterTags.isEmpty()) {
|
||||
boolean zookeeper = false;
|
||||
if (this.core != null
|
||||
&& this.core.getCoreContainer().isZooKeeperAware()) {
|
||||
zookeeper = true;
|
||||
}
|
||||
pw = new PropertyWriter(zookeeper ? "ZKPropertiesWriter"
|
||||
: "SimplePropertiesWriter", Collections.<String,String> emptyMap());
|
||||
} else if (propertyWriterTags.size() > 1) {
|
||||
throw new DataImportHandlerException(SEVERE, "Only one "
|
||||
+ ConfigNameConstants.PROPERTY_WRITER + " can be configured.");
|
||||
} else {
|
||||
Element pwElement = propertyWriterTags.get(0);
|
||||
String type = null;
|
||||
Map<String,String> params = new HashMap<>();
|
||||
for (Map.Entry<String,String> entry : ConfigParseUtil.getAllAttributes(
|
||||
pwElement).entrySet()) {
|
||||
if (TYPE.equals(entry.getKey())) {
|
||||
type = entry.getValue();
|
||||
} else {
|
||||
params.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
if (type == null) {
|
||||
throw new DataImportHandlerException(SEVERE, "The "
|
||||
+ ConfigNameConstants.PROPERTY_WRITER + " element must specify "
|
||||
+ TYPE);
|
||||
}
|
||||
pw = new PropertyWriter(type, params);
|
||||
}
|
||||
return new DIHConfiguration(documentTags.get(0), this, functions, script, dataSources, pw);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private DIHProperties createPropertyWriter() {
|
||||
DIHProperties propWriter = null;
|
||||
PropertyWriter configPw = config.getPropertyWriter();
|
||||
try {
|
||||
Class<DIHProperties> writerClass = DocBuilder.loadClass(configPw.getType(), this.core);
|
||||
propWriter = writerClass.getConstructor().newInstance();
|
||||
propWriter.init(this, configPw.getParameters());
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Unable to PropertyWriter implementation:" + configPw.getType(), e);
|
||||
}
|
||||
return propWriter;
|
||||
}
|
||||
|
||||
public DIHConfiguration getConfig() {
|
||||
return config;
|
||||
}
|
||||
|
||||
Date getIndexStartTime() {
|
||||
return indexStartTime;
|
||||
}
|
||||
|
||||
void setIndexStartTime(Date indextStartTime) {
|
||||
this.indexStartTime = indextStartTime;
|
||||
}
|
||||
|
||||
void store(Object key, Object value) {
|
||||
store.put(key, value);
|
||||
}
|
||||
|
||||
Object retrieve(Object key) {
|
||||
return store.get(key);
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
||||
public DataSource getDataSourceInstance(Entity key, String name, Context ctx) {
|
||||
Map<String,String> p = requestLevelDataSourceProps.get(name);
|
||||
if (p == null)
|
||||
p = config.getDataSources().get(name);
|
||||
if (p == null)
|
||||
p = requestLevelDataSourceProps.get(null);// for default data source
|
||||
if (p == null)
|
||||
p = config.getDataSources().get(null);
|
||||
if (p == null)
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"No dataSource :" + name + " available for entity :" + key.getName());
|
||||
String type = p.get(TYPE);
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
DataSource dataSrc = null;
|
||||
if (type == null) {
|
||||
dataSrc = new JdbcDataSource();
|
||||
} else {
|
||||
try {
|
||||
dataSrc = (DataSource) DocBuilder.loadClass(type, getCore()).getConstructor().newInstance();
|
||||
} catch (Exception e) {
|
||||
wrapAndThrow(SEVERE, e, "Invalid type for data source: " + type);
|
||||
}
|
||||
}
|
||||
try {
|
||||
Properties copyProps = new Properties();
|
||||
copyProps.putAll(p);
|
||||
Map<String, Object> map = ctx.getRequestParameters();
|
||||
if (map.containsKey("rows")) {
|
||||
int rows = Integer.parseInt((String) map.get("rows"));
|
||||
if (map.containsKey("start")) {
|
||||
rows += Integer.parseInt((String) map.get("start"));
|
||||
}
|
||||
copyProps.setProperty("maxRows", String.valueOf(rows));
|
||||
}
|
||||
dataSrc.init(ctx, copyProps);
|
||||
} catch (Exception e) {
|
||||
wrapAndThrow(SEVERE, e, "Failed to initialize DataSource: " + key.getDataSourceName());
|
||||
}
|
||||
return dataSrc;
|
||||
}
|
||||
|
||||
public Status getStatus() {
|
||||
return status;
|
||||
}
|
||||
|
||||
public void setStatus(Status status) {
|
||||
this.status = status;
|
||||
}
|
||||
|
||||
public boolean isBusy() {
|
||||
return importLock.isLocked();
|
||||
}
|
||||
|
||||
public void doFullImport(DIHWriter writer, RequestInfo requestParams) {
|
||||
log.info("Starting Full Import");
|
||||
setStatus(Status.RUNNING_FULL_DUMP);
|
||||
try {
|
||||
DIHProperties dihPropWriter = createPropertyWriter();
|
||||
setIndexStartTime(dihPropWriter.getCurrentTimestamp());
|
||||
docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams);
|
||||
checkWritablePersistFile(writer, dihPropWriter);
|
||||
docBuilder.execute();
|
||||
if (!requestParams.isDebug())
|
||||
cumulativeStatistics.add(docBuilder.importStatistics);
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, "Full Import failed", e);
|
||||
docBuilder.handleError("Full Import failed", e);
|
||||
} finally {
|
||||
setStatus(Status.IDLE);
|
||||
DocBuilder.INSTANCE.set(null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void checkWritablePersistFile(DIHWriter writer, DIHProperties dihPropWriter) {
|
||||
if (isDeltaImportSupported && !dihPropWriter.isWritable()) {
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"Properties is not writable. Delta imports are supported by data config but will not work.");
|
||||
}
|
||||
}
|
||||
|
||||
public void doDeltaImport(DIHWriter writer, RequestInfo requestParams) {
|
||||
log.info("Starting Delta Import");
|
||||
setStatus(Status.RUNNING_DELTA_DUMP);
|
||||
try {
|
||||
DIHProperties dihPropWriter = createPropertyWriter();
|
||||
setIndexStartTime(dihPropWriter.getCurrentTimestamp());
|
||||
docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams);
|
||||
checkWritablePersistFile(writer, dihPropWriter);
|
||||
docBuilder.execute();
|
||||
if (!requestParams.isDebug())
|
||||
cumulativeStatistics.add(docBuilder.importStatistics);
|
||||
} catch (Exception e) {
|
||||
log.error("Delta Import Failed", e);
|
||||
docBuilder.handleError("Delta Import Failed", e);
|
||||
} finally {
|
||||
setStatus(Status.IDLE);
|
||||
DocBuilder.INSTANCE.set(null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void runAsync(final RequestInfo reqParams, final DIHWriter sw) {
|
||||
new Thread(() -> runCmd(reqParams, sw)).start();
|
||||
}
|
||||
|
||||
void runCmd(RequestInfo reqParams, DIHWriter sw) {
|
||||
String command = reqParams.getCommand();
|
||||
if (command.equals(ABORT_CMD)) {
|
||||
if (docBuilder != null) {
|
||||
docBuilder.abort();
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!importLock.tryLock()){
|
||||
log.warn("Import command failed . another import is running");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
if (FULL_IMPORT_CMD.equals(command) || IMPORT_CMD.equals(command)) {
|
||||
doFullImport(sw, reqParams);
|
||||
} else if (command.equals(DELTA_IMPORT_CMD)) {
|
||||
doDeltaImport(sw, reqParams);
|
||||
}
|
||||
} finally {
|
||||
importLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, String> getStatusMessages() {
|
||||
//this map object is a Collections.synchronizedMap(new LinkedHashMap()). if we
|
||||
// synchronize on the object it must be safe to iterate through the map
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
Map statusMessages = (Map) retrieve(STATUS_MSGS);
|
||||
Map<String, String> result = new LinkedHashMap<>();
|
||||
if (statusMessages != null) {
|
||||
synchronized (statusMessages) {
|
||||
for (Object o : statusMessages.entrySet()) {
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
Map.Entry e = (Map.Entry) o;
|
||||
//the toString is taken because some of the Objects create the data lazily when toString() is called
|
||||
result.put((String) e.getKey(), e.getValue().toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
public DocBuilder getDocBuilder() {
|
||||
return docBuilder;
|
||||
}
|
||||
|
||||
public DocBuilder getDocBuilder(DIHWriter writer, RequestInfo requestParams) {
|
||||
DIHProperties dihPropWriter = createPropertyWriter();
|
||||
return new DocBuilder(this, writer, dihPropWriter, requestParams);
|
||||
}
|
||||
|
||||
Map<String, Evaluator> getEvaluators() {
|
||||
return getEvaluators(config.getFunctions());
|
||||
}
|
||||
|
||||
/**
|
||||
* used by tests.
|
||||
*/
|
||||
@SuppressWarnings({"unchecked"})
|
||||
Map<String, Evaluator> getEvaluators(List<Map<String,String>> fn) {
|
||||
Map<String, Evaluator> evaluators = new HashMap<>();
|
||||
evaluators.put(Evaluator.DATE_FORMAT_EVALUATOR, new DateFormatEvaluator());
|
||||
evaluators.put(Evaluator.SQL_ESCAPE_EVALUATOR, new SqlEscapingEvaluator());
|
||||
evaluators.put(Evaluator.URL_ENCODE_EVALUATOR, new UrlEvaluator());
|
||||
evaluators.put(Evaluator.ESCAPE_SOLR_QUERY_CHARS, new SolrQueryEscapingEvaluator());
|
||||
SolrCore core = docBuilder == null ? null : docBuilder.dataImporter.getCore();
|
||||
for (Map<String, String> map : fn) {
|
||||
try {
|
||||
evaluators.put(map.get(NAME), (Evaluator) loadClass(map.get(CLASS), core).getConstructor().newInstance());
|
||||
} catch (Exception e) {
|
||||
wrapAndThrow(SEVERE, e, "Unable to instantiate evaluator: " + map.get(CLASS));
|
||||
}
|
||||
}
|
||||
return evaluators;
|
||||
}
|
||||
|
||||
static final ThreadLocal<AtomicLong> QUERY_COUNT = new ThreadLocal<AtomicLong>() {
|
||||
@Override
|
||||
protected AtomicLong initialValue() {
|
||||
return new AtomicLong();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
static final class MSG {
|
||||
public static final String NO_CONFIG_FOUND = "Configuration not found";
|
||||
|
||||
public static final String NO_INIT = "DataImportHandler started. Not Initialized. No commands can be run";
|
||||
|
||||
public static final String INVALID_CONFIG = "FATAL: Could not create importer. DataImporter config invalid";
|
||||
|
||||
public static final String LOAD_EXP = "Exception while loading DataImporter";
|
||||
|
||||
public static final String JMX_DESC = "Manage data import from databases to Solr";
|
||||
|
||||
public static final String CMD_RUNNING = "A command is still running...";
|
||||
|
||||
public static final String DEBUG_NOT_ENABLED = "Debug not enabled. Add a tag <str name=\"enableDebug\">true</str> in solrconfig.xml";
|
||||
|
||||
public static final String CONFIG_RELOADED = "Configuration Re-loaded sucessfully";
|
||||
|
||||
public static final String CONFIG_NOT_RELOADED = "Configuration NOT Re-loaded...Data Importer is busy.";
|
||||
|
||||
public static final String TOTAL_DOC_PROCESSED = "Total Documents Processed";
|
||||
|
||||
public static final String TOTAL_FAILED_DOCS = "Total Documents Failed";
|
||||
|
||||
public static final String TOTAL_QUERIES_EXECUTED = "Total Requests made to DataSource";
|
||||
|
||||
public static final String TOTAL_ROWS_EXECUTED = "Total Rows Fetched";
|
||||
|
||||
public static final String TOTAL_DOCS_DELETED = "Total Documents Deleted";
|
||||
|
||||
public static final String TOTAL_DOCS_SKIPPED = "Total Documents Skipped";
|
||||
}
|
||||
|
||||
public SolrCore getCore() {
|
||||
return core;
|
||||
}
|
||||
|
||||
void putToCoreScopeSession(String key, Object val) {
|
||||
coreScopeSession.put(key, val);
|
||||
}
|
||||
Object getFromCoreScopeSession(String key) {
|
||||
return coreScopeSession.get(key);
|
||||
}
|
||||
|
||||
public static final String COLUMN = "column";
|
||||
|
||||
public static final String TYPE = "type";
|
||||
|
||||
public static final String DATA_SRC = "dataSource";
|
||||
|
||||
public static final String MULTI_VALUED = "multiValued";
|
||||
|
||||
public static final String NAME = "name";
|
||||
|
||||
public static final String STATUS_MSGS = "status-messages";
|
||||
|
||||
public static final String FULL_IMPORT_CMD = "full-import";
|
||||
|
||||
public static final String IMPORT_CMD = "import";
|
||||
|
||||
public static final String DELTA_IMPORT_CMD = "delta-import";
|
||||
|
||||
public static final String ABORT_CMD = "abort";
|
||||
|
||||
public static final String DEBUG_MODE = "debug";
|
||||
|
||||
public static final String RELOAD_CONF_CMD = "reload-config";
|
||||
|
||||
public static final String SHOW_CONF_CMD = "show-config";
|
||||
|
||||
}
|
|
@ -1,66 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Provides data from a source with a given query.
|
||||
* </p>
|
||||
* <p>
|
||||
* Implementation of this abstract class must provide a default no-arg constructor
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class DataSource<T> implements Closeable {
|
||||
|
||||
/**
|
||||
* Initializes the DataSource with the <code>Context</code> and
|
||||
* initialization properties.
|
||||
* <p>
|
||||
* This is invoked by the <code>DataImporter</code> after creating an
|
||||
* instance of this class.
|
||||
*/
|
||||
public abstract void init(Context context, Properties initProps);
|
||||
|
||||
/**
|
||||
* Get records for the given query.The return type depends on the
|
||||
* implementation .
|
||||
*
|
||||
* @param query The query string. It can be a SQL for JdbcDataSource or a URL
|
||||
* for HttpDataSource or a file location for FileDataSource or a custom
|
||||
* format for your own custom DataSource.
|
||||
* @return Depends on the implementation. For instance JdbcDataSource returns
|
||||
* an Iterator<Map <String,Object>>
|
||||
*/
|
||||
public abstract T getData(String query);
|
||||
|
||||
/**
|
||||
* Cleans up resources of this DataSource after use.
|
||||
*/
|
||||
public abstract void close();
|
||||
}
|
|
@ -1,180 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.IllformedLocaleException;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TimeZone;
|
||||
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.handler.dataimport.config.EntityField;
|
||||
import org.apache.solr.util.DateMathParser;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
|
||||
/**
|
||||
* <p>Formats values using a given date format. </p>
|
||||
* <p>Pass three parameters:
|
||||
* <ul>
|
||||
* <li>An {@link EntityField} or a date expression to be parsed with
|
||||
* the {@link DateMathParser} class If the value is in a String,
|
||||
* then it is assumed to be a datemath expression, otherwise it
|
||||
* resolved using a {@link VariableResolver} instance</li>
|
||||
* <li>A date format see {@link SimpleDateFormat} for the syntax.</li>
|
||||
* <li>The {@link Locale} to parse.
|
||||
* (optional. Defaults to the Root Locale) </li>
|
||||
* </ul>
|
||||
*/
|
||||
public class DateFormatEvaluator extends Evaluator {
|
||||
|
||||
public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
|
||||
protected Map<String, Locale> availableLocales = new HashMap<>();
|
||||
protected Set<String> availableTimezones = new HashSet<>();
|
||||
|
||||
@SuppressForbidden(reason = "Usage of outdated locale parsing with Locale#toString() because of backwards compatibility")
|
||||
public DateFormatEvaluator() {
|
||||
for (Locale locale : Locale.getAvailableLocales()) {
|
||||
availableLocales.put(locale.toString(), locale);
|
||||
}
|
||||
for (String tz : TimeZone.getAvailableIDs()) {
|
||||
availableTimezones.add(tz);
|
||||
}
|
||||
}
|
||||
|
||||
private SimpleDateFormat getDateFormat(String pattern, TimeZone timezone, Locale locale) {
|
||||
final SimpleDateFormat sdf = new SimpleDateFormat(pattern, locale);
|
||||
sdf.setTimeZone(timezone);
|
||||
return sdf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String evaluate(String expression, Context context) {
|
||||
List<Object> l = parseParams(expression, context.getVariableResolver());
|
||||
if (l.size() < 2 || l.size() > 4) {
|
||||
throw new DataImportHandlerException(SEVERE, "'formatDate()' must have two, three or four parameters ");
|
||||
}
|
||||
Object o = l.get(0);
|
||||
Object format = l.get(1);
|
||||
if (format instanceof VariableWrapper) {
|
||||
VariableWrapper wrapper = (VariableWrapper) format;
|
||||
o = wrapper.resolve();
|
||||
format = o.toString();
|
||||
}
|
||||
Locale locale = Locale.ENGLISH; // we default to ENGLISH for dates for full Java 9 compatibility
|
||||
if(l.size()>2) {
|
||||
Object localeObj = l.get(2);
|
||||
String localeStr = null;
|
||||
if (localeObj instanceof VariableWrapper) {
|
||||
localeStr = ((VariableWrapper) localeObj).resolve().toString();
|
||||
} else {
|
||||
localeStr = localeObj.toString();
|
||||
}
|
||||
locale = availableLocales.get(localeStr);
|
||||
if (locale == null) try {
|
||||
locale = new Locale.Builder().setLanguageTag(localeStr).build();
|
||||
} catch (IllformedLocaleException ex) {
|
||||
throw new DataImportHandlerException(SEVERE, "Malformed / non-existent locale: " + localeStr, ex);
|
||||
}
|
||||
}
|
||||
TimeZone tz = TimeZone.getDefault(); // DWS TODO: is this the right default for us? Deserves explanation if so.
|
||||
if(l.size()==4) {
|
||||
Object tzObj = l.get(3);
|
||||
String tzStr = null;
|
||||
if (tzObj instanceof VariableWrapper) {
|
||||
tzStr = ((VariableWrapper) tzObj).resolve().toString();
|
||||
} else {
|
||||
tzStr = tzObj.toString();
|
||||
}
|
||||
if(availableTimezones.contains(tzStr)) {
|
||||
tz = TimeZone.getTimeZone(tzStr);
|
||||
} else {
|
||||
throw new DataImportHandlerException(SEVERE, "Unsupported Timezone: " + tzStr);
|
||||
}
|
||||
}
|
||||
String dateFmt = format.toString();
|
||||
SimpleDateFormat fmt = getDateFormat(dateFmt, tz, locale);
|
||||
Date date = null;
|
||||
if (o instanceof VariableWrapper) {
|
||||
date = evaluateWrapper((VariableWrapper) o, locale, tz);
|
||||
} else {
|
||||
date = evaluateString(o.toString(), locale, tz);
|
||||
}
|
||||
return fmt.format(date);
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: declared as a method to allow for extensibility
|
||||
*
|
||||
* @lucene.experimental this API is experimental and subject to change
|
||||
* @return the result of evaluating a string
|
||||
*/
|
||||
protected Date evaluateWrapper(VariableWrapper variableWrapper, Locale locale, TimeZone tz) {
|
||||
Date date = null;
|
||||
Object variableval = resolveWrapper(variableWrapper,locale,tz);
|
||||
if (variableval instanceof Date) {
|
||||
date = (Date) variableval;
|
||||
} else {
|
||||
String s = variableval.toString();
|
||||
try {
|
||||
date = getDateFormat(DEFAULT_DATE_FORMAT, tz, locale).parse(s);
|
||||
} catch (ParseException exp) {
|
||||
wrapAndThrow(SEVERE, exp, "Invalid expression for date");
|
||||
}
|
||||
}
|
||||
return date;
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: declared as a method to allow for extensibility
|
||||
* @lucene.experimental
|
||||
* @return the result of evaluating a string
|
||||
*/
|
||||
protected Date evaluateString(String datemathfmt, Locale locale, TimeZone tz) {
|
||||
// note: DMP does not use the locale but perhaps a subclass might use it, for e.g. parsing a date in a custom
|
||||
// string that doesn't necessarily have date math?
|
||||
//TODO refactor DateMathParser.parseMath a bit to have a static method for this logic.
|
||||
if (datemathfmt.startsWith("NOW")) {
|
||||
datemathfmt = datemathfmt.substring("NOW".length());
|
||||
}
|
||||
try {
|
||||
DateMathParser parser = new DateMathParser(tz);
|
||||
parser.setNow(new Date());// thus do *not* use SolrRequestInfo
|
||||
return parser.parseMath(datemathfmt);
|
||||
} catch (ParseException e) {
|
||||
throw wrapAndThrow(SEVERE, e, "Invalid expression for date");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: declared as a method to allow for extensibility
|
||||
* @lucene.experimental
|
||||
* @return the result of resolving the variable wrapper
|
||||
*/
|
||||
protected Object resolveWrapper(VariableWrapper variableWrapper, Locale locale, TimeZone tz) {
|
||||
return variableWrapper.resolve();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,106 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.*;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* {@link Transformer} instance which creates {@link Date} instances out of {@link String}s.
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* <p>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class DateFormatTransformer extends Transformer {
|
||||
private Map<String, SimpleDateFormat> fmtCache = new HashMap<>();
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Object transformRow(Map<String, Object> aRow, Context context) {
|
||||
|
||||
for (Map<String, String> map : context.getAllEntityFields()) {
|
||||
Locale locale = Locale.ENGLISH; // we default to ENGLISH for dates for full Java 9 compatibility
|
||||
String customLocale = map.get(LOCALE);
|
||||
if (customLocale != null) {
|
||||
try {
|
||||
locale = new Locale.Builder().setLanguageTag(customLocale).build();
|
||||
} catch (IllformedLocaleException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Invalid Locale specified: " + customLocale, e);
|
||||
}
|
||||
}
|
||||
|
||||
String fmt = map.get(DATE_TIME_FMT);
|
||||
if (fmt == null)
|
||||
continue;
|
||||
VariableResolver resolver = context.getVariableResolver();
|
||||
fmt = resolver.replaceTokens(fmt);
|
||||
String column = map.get(DataImporter.COLUMN);
|
||||
String srcCol = map.get(RegexTransformer.SRC_COL_NAME);
|
||||
if (srcCol == null)
|
||||
srcCol = column;
|
||||
try {
|
||||
Object o = aRow.get(srcCol);
|
||||
if (o instanceof List) {
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
List inputs = (List) o;
|
||||
List<Date> results = new ArrayList<>();
|
||||
for (Object input : inputs) {
|
||||
results.add(process(input, fmt, locale));
|
||||
}
|
||||
aRow.put(column, results);
|
||||
} else {
|
||||
if (o != null) {
|
||||
aRow.put(column, process(o, fmt, locale));
|
||||
}
|
||||
}
|
||||
} catch (ParseException e) {
|
||||
log.warn("Could not parse a Date field ", e);
|
||||
}
|
||||
}
|
||||
return aRow;
|
||||
}
|
||||
|
||||
private Date process(Object value, String format, Locale locale) throws ParseException {
|
||||
if (value == null) return null;
|
||||
String strVal = value.toString().trim();
|
||||
if (strVal.length() == 0)
|
||||
return null;
|
||||
SimpleDateFormat fmt = fmtCache.get(format);
|
||||
if (fmt == null) {
|
||||
fmt = new SimpleDateFormat(format, locale);
|
||||
fmtCache.put(format, fmt);
|
||||
}
|
||||
return fmt.parse(strVal);
|
||||
}
|
||||
|
||||
public static final String DATE_TIME_FMT = "dateTimeFormat";
|
||||
|
||||
public static final String LOCALE = "locale";
|
||||
}
|
|
@ -1,66 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.AbstractList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
|
||||
public class DebugInfo {
|
||||
|
||||
private static final class ChildRollupDocs extends AbstractList<SolrInputDocument> {
|
||||
|
||||
private List<SolrInputDocument> delegate = new ArrayList<>();
|
||||
|
||||
@Override
|
||||
public SolrInputDocument get(int index) {
|
||||
return delegate.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return delegate.size();
|
||||
}
|
||||
|
||||
public boolean add(SolrInputDocument e) {
|
||||
SolrInputDocument transformed = e.deepCopy();
|
||||
if (transformed.hasChildDocuments()) {
|
||||
ChildRollupDocs childList = new ChildRollupDocs();
|
||||
childList.addAll(transformed.getChildDocuments());
|
||||
transformed.addField("_childDocuments_", childList);
|
||||
transformed.getChildDocuments().clear();
|
||||
}
|
||||
return delegate.add(transformed);
|
||||
}
|
||||
}
|
||||
|
||||
public List<SolrInputDocument> debugDocuments = new ChildRollupDocs();
|
||||
|
||||
public NamedList<String> debugVerboseOutput = null;
|
||||
public boolean verbose;
|
||||
|
||||
public DebugInfo(Map<String,Object> requestParams) {
|
||||
verbose = StrUtils.parseBool((String) requestParams.get("verbose"), false);
|
||||
debugVerboseOutput = new NamedList<>();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,295 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Stack;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Implements most of the interactive development functionality
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
class DebugLogger {
|
||||
private Stack<DebugInfo> debugStack;
|
||||
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
NamedList output;
|
||||
// private final SolrWriter writer1;
|
||||
|
||||
private static final String LINE = "---------------------------------------------";
|
||||
|
||||
private MessageFormat fmt = new MessageFormat(
|
||||
"----------- row #{0}-------------", Locale.ROOT);
|
||||
|
||||
boolean enabled = true;
|
||||
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
public DebugLogger() {
|
||||
// writer = solrWriter;
|
||||
output = new NamedList();
|
||||
debugStack = new Stack<DebugInfo>() {
|
||||
|
||||
@Override
|
||||
public DebugInfo pop() {
|
||||
if (size() == 1)
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE, "Stack is becoming empty");
|
||||
return super.pop();
|
||||
}
|
||||
};
|
||||
debugStack.push(new DebugInfo(null, DIHLogLevels.NONE, null));
|
||||
output = debugStack.peek().lst;
|
||||
}
|
||||
|
||||
private DebugInfo peekStack() {
|
||||
return debugStack.isEmpty() ? null : debugStack.peek();
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
public void log(DIHLogLevels event, String name, Object row) {
|
||||
if (event == DIHLogLevels.DISABLE_LOGGING) {
|
||||
enabled = false;
|
||||
return;
|
||||
} else if (event == DIHLogLevels.ENABLE_LOGGING) {
|
||||
enabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!enabled && event != DIHLogLevels.START_ENTITY
|
||||
&& event != DIHLogLevels.END_ENTITY) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (event == DIHLogLevels.START_DOC) {
|
||||
debugStack.push(new DebugInfo(null, DIHLogLevels.START_DOC, peekStack()));
|
||||
} else if (DIHLogLevels.START_ENTITY == event) {
|
||||
debugStack
|
||||
.push(new DebugInfo(name, DIHLogLevels.START_ENTITY, peekStack()));
|
||||
} else if (DIHLogLevels.ENTITY_OUT == event
|
||||
|| DIHLogLevels.PRE_TRANSFORMER_ROW == event) {
|
||||
if (debugStack.peek().type == DIHLogLevels.START_ENTITY
|
||||
|| debugStack.peek().type == DIHLogLevels.START_DOC) {
|
||||
debugStack.peek().lst.add(null, fmt.format(new Object[]{++debugStack
|
||||
.peek().rowCount}));
|
||||
addToNamedList(debugStack.peek().lst, row);
|
||||
debugStack.peek().lst.add(null, LINE);
|
||||
}
|
||||
} else if (event == DIHLogLevels.ROW_END) {
|
||||
popAllTransformers();
|
||||
} else if (DIHLogLevels.END_ENTITY == event) {
|
||||
while (debugStack.pop().type != DIHLogLevels.START_ENTITY)
|
||||
;
|
||||
} else if (DIHLogLevels.END_DOC == event) {
|
||||
while (debugStack.pop().type != DIHLogLevels.START_DOC)
|
||||
;
|
||||
} else if (event == DIHLogLevels.TRANSFORMER_EXCEPTION) {
|
||||
debugStack.push(new DebugInfo(name, event, peekStack()));
|
||||
debugStack.peek().lst.add("EXCEPTION",
|
||||
getStacktraceString((Exception) row));
|
||||
} else if (DIHLogLevels.TRANSFORMED_ROW == event) {
|
||||
debugStack.push(new DebugInfo(name, event, peekStack()));
|
||||
debugStack.peek().lst.add(null, LINE);
|
||||
addToNamedList(debugStack.peek().lst, row);
|
||||
debugStack.peek().lst.add(null, LINE);
|
||||
if (row instanceof DataImportHandlerException) {
|
||||
DataImportHandlerException dataImportHandlerException = (DataImportHandlerException) row;
|
||||
dataImportHandlerException.debugged = true;
|
||||
}
|
||||
} else if (DIHLogLevels.ENTITY_META == event) {
|
||||
popAllTransformers();
|
||||
debugStack.peek().lst.add(name, row);
|
||||
} else if (DIHLogLevels.ENTITY_EXCEPTION == event) {
|
||||
if (row instanceof DataImportHandlerException) {
|
||||
DataImportHandlerException dihe = (DataImportHandlerException) row;
|
||||
if (dihe.debugged)
|
||||
return;
|
||||
dihe.debugged = true;
|
||||
}
|
||||
|
||||
popAllTransformers();
|
||||
debugStack.peek().lst.add("EXCEPTION",
|
||||
getStacktraceString((Exception) row));
|
||||
}
|
||||
}
|
||||
|
||||
private void popAllTransformers() {
|
||||
while (true) {
|
||||
DIHLogLevels type = debugStack.peek().type;
|
||||
if (type == DIHLogLevels.START_DOC || type == DIHLogLevels.START_ENTITY)
|
||||
break;
|
||||
debugStack.pop();
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
private void addToNamedList(@SuppressWarnings({"rawtypes"})NamedList nl, Object row) {
|
||||
if (row instanceof List) {
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
List list = (List) row;
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
NamedList l = new NamedList();
|
||||
nl.add(null, l);
|
||||
for (Object o : list) {
|
||||
Map<String, Object> map = (Map<String, Object>) o;
|
||||
for (Map.Entry<String, Object> entry : map.entrySet())
|
||||
nl.add(entry.getKey(), entry.getValue());
|
||||
}
|
||||
} else if (row instanceof Map) {
|
||||
Map<String, Object> map = (Map<String, Object>) row;
|
||||
for (Map.Entry<String, Object> entry : map.entrySet())
|
||||
nl.add(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
DataSource wrapDs(final DataSource ds) {
|
||||
return new DataSource() {
|
||||
@Override
|
||||
public void init(Context context, Properties initProps) {
|
||||
ds.init(context, initProps);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
ds.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getData(String query) {
|
||||
log(DIHLogLevels.ENTITY_META, "query", query);
|
||||
long start = System.nanoTime();
|
||||
try {
|
||||
return ds.getData(query);
|
||||
} catch (DataImportHandlerException de) {
|
||||
log(DIHLogLevels.ENTITY_EXCEPTION,
|
||||
null, de);
|
||||
throw de;
|
||||
} catch (Exception e) {
|
||||
log(DIHLogLevels.ENTITY_EXCEPTION,
|
||||
null, e);
|
||||
DataImportHandlerException de = new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE, "", e);
|
||||
de.debugged = true;
|
||||
throw de;
|
||||
} finally {
|
||||
log(DIHLogLevels.ENTITY_META, "time-taken", DocBuilder
|
||||
.getTimeElapsedSince(start));
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Transformer wrapTransformer(final Transformer t) {
|
||||
return new Transformer() {
|
||||
@Override
|
||||
public Object transformRow(Map<String, Object> row, Context context) {
|
||||
log(DIHLogLevels.PRE_TRANSFORMER_ROW, null, row);
|
||||
String tName = getTransformerName(t);
|
||||
Object result = null;
|
||||
try {
|
||||
result = t.transformRow(row, context);
|
||||
log(DIHLogLevels.TRANSFORMED_ROW, tName, result);
|
||||
} catch (DataImportHandlerException de) {
|
||||
log(DIHLogLevels.TRANSFORMER_EXCEPTION, tName, de);
|
||||
de.debugged = true;
|
||||
throw de;
|
||||
} catch (Exception e) {
|
||||
log(DIHLogLevels.TRANSFORMER_EXCEPTION, tName, e);
|
||||
DataImportHandlerException de = new DataImportHandlerException(DataImportHandlerException.SEVERE, "", e);
|
||||
de.debugged = true;
|
||||
throw de;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static String getStacktraceString(Exception e) {
|
||||
StringWriter sw = new StringWriter();
|
||||
e.printStackTrace(new PrintWriter(sw));
|
||||
return sw.toString();
|
||||
}
|
||||
|
||||
static String getTransformerName(Transformer t) {
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
Class transClass = t.getClass();
|
||||
if (t instanceof EntityProcessorWrapper.ReflectionTransformer) {
|
||||
return ((EntityProcessorWrapper.ReflectionTransformer) t).trans;
|
||||
}
|
||||
if (t instanceof ScriptTransformer) {
|
||||
ScriptTransformer scriptTransformer = (ScriptTransformer) t;
|
||||
return "script:" + scriptTransformer.getFunctionName();
|
||||
}
|
||||
if (transClass.getPackage().equals(DebugLogger.class.getPackage())) {
|
||||
return transClass.getSimpleName();
|
||||
} else {
|
||||
return transClass.getName();
|
||||
}
|
||||
}
|
||||
|
||||
private static class DebugInfo {
|
||||
String name;
|
||||
|
||||
int tCount, rowCount;
|
||||
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
NamedList lst;
|
||||
|
||||
DIHLogLevels type;
|
||||
|
||||
DebugInfo parent;
|
||||
|
||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
||||
public DebugInfo(String name, DIHLogLevels type, DebugInfo parent) {
|
||||
this.name = name;
|
||||
this.type = type;
|
||||
this.parent = parent;
|
||||
lst = new NamedList();
|
||||
if (parent != null) {
|
||||
String displayName = null;
|
||||
if (type == DIHLogLevels.START_ENTITY) {
|
||||
displayName = "entity:" + name;
|
||||
} else if (type == DIHLogLevels.TRANSFORMED_ROW
|
||||
|| type == DIHLogLevels.TRANSFORMER_EXCEPTION) {
|
||||
displayName = "transformer:" + name;
|
||||
} else if (type == DIHLogLevels.START_DOC) {
|
||||
this.name = displayName = "document#" + SolrWriter.getDocCount();
|
||||
}
|
||||
parent.lst.add(displayName, lst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,114 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* An instance of entity processor serves an entity. It is reused throughout the
|
||||
* import process.
|
||||
* </p>
|
||||
* <p>
|
||||
* Implementations of this abstract class must provide a public no-args constructor.
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class EntityProcessor implements Closeable {
|
||||
|
||||
/**
|
||||
* This method is called when it starts processing an entity. When it comes
|
||||
* back to the entity it is called again. So it can reset anything at that point.
|
||||
* For a rootmost entity this is called only once for an ingestion. For sub-entities , this
|
||||
* is called multiple once for each row from its parent entity
|
||||
*
|
||||
* @param context The current context
|
||||
*/
|
||||
public abstract void init(Context context);
|
||||
|
||||
/**
|
||||
* This method helps streaming the data for each row . The implementation
|
||||
* would fetch as many rows as needed and gives one 'row' at a time. Only this
|
||||
* method is used during a full import
|
||||
*
|
||||
* @return A 'row'. The 'key' for the map is the column name and the 'value'
|
||||
* is the value of that column. If there are no more rows to be
|
||||
* returned, return 'null'
|
||||
*/
|
||||
public abstract Map<String, Object> nextRow();
|
||||
|
||||
/**
|
||||
* This is used for delta-import. It gives the pks of the changed rows in this
|
||||
* entity
|
||||
*
|
||||
* @return the pk vs value of all changed rows
|
||||
*/
|
||||
public abstract Map<String, Object> nextModifiedRowKey();
|
||||
|
||||
/**
|
||||
* This is used during delta-import. It gives the primary keys of the rows
|
||||
* that are deleted from this entity. If this entity is the root entity, solr
|
||||
* document is deleted. If this is a sub-entity, the Solr document is
|
||||
* considered as 'changed' and will be recreated
|
||||
*
|
||||
* @return the pk vs value of all changed rows
|
||||
*/
|
||||
public abstract Map<String, Object> nextDeletedRowKey();
|
||||
|
||||
/**
|
||||
* This is used during delta-import. This gives the primary keys and their
|
||||
* values of all the rows changed in a parent entity due to changes in this
|
||||
* entity.
|
||||
*
|
||||
* @return the pk vs value of all changed rows in the parent entity
|
||||
*/
|
||||
public abstract Map<String, Object> nextModifiedParentRowKey();
|
||||
|
||||
/**
|
||||
* Invoked for each entity at the very end of the import to do any needed cleanup tasks.
|
||||
*
|
||||
*/
|
||||
public abstract void destroy();
|
||||
|
||||
/**
|
||||
* Invoked after the transformers are invoked. EntityProcessors can add, remove or modify values
|
||||
* added by Transformers in this method.
|
||||
*
|
||||
* @param r The transformed row
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public void postTransform(Map<String, Object> r) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Invoked when the Entity processor is destroyed towards the end of import.
|
||||
*
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public void close() {
|
||||
//no-op
|
||||
}
|
||||
}
|
|
@ -1,174 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* <p> Base class for all implementations of {@link EntityProcessor} </p> <p> Most implementations of {@link EntityProcessor}
|
||||
* extend this base class which provides common functionality. </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class EntityProcessorBase extends EntityProcessor {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
protected boolean isFirstInit = true;
|
||||
|
||||
protected String entityName;
|
||||
|
||||
protected Context context;
|
||||
|
||||
protected Iterator<Map<String, Object>> rowIterator;
|
||||
|
||||
protected String query;
|
||||
|
||||
protected String onError = ABORT;
|
||||
|
||||
protected DIHCacheSupport cacheSupport = null;
|
||||
|
||||
private Zipper zipper;
|
||||
|
||||
|
||||
@Override
|
||||
public void init(Context context) {
|
||||
this.context = context;
|
||||
if (isFirstInit) {
|
||||
firstInit(context);
|
||||
}
|
||||
if(zipper!=null){
|
||||
zipper.onNewParent(context);
|
||||
}else{
|
||||
if(cacheSupport!=null) {
|
||||
cacheSupport.initNewParent(context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* first time init call. do one-time operations here
|
||||
* it's necessary to call it from the overridden method,
|
||||
* otherwise it throws NPE on accessing zipper from nextRow()
|
||||
*/
|
||||
protected void firstInit(Context context) {
|
||||
entityName = context.getEntityAttribute("name");
|
||||
String s = context.getEntityAttribute(ON_ERROR);
|
||||
if (s != null) onError = s;
|
||||
|
||||
zipper = Zipper.createOrNull(context);
|
||||
|
||||
if(zipper==null){
|
||||
initCache(context);
|
||||
}
|
||||
isFirstInit = false;
|
||||
}
|
||||
|
||||
protected void initCache(Context context) {
|
||||
String cacheImplName = context
|
||||
.getResolvedEntityAttribute(DIHCacheSupport.CACHE_IMPL);
|
||||
|
||||
if (cacheImplName != null ) {
|
||||
cacheSupport = new DIHCacheSupport(context, cacheImplName);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextModifiedRowKey() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextDeletedRowKey() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextModifiedParentRowKey() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* For a simple implementation, this is the only method that the sub-class should implement. This is intended to
|
||||
* stream rows one-by-one. Return null to signal end of rows
|
||||
*
|
||||
* @return a row where the key is the name of the field and value can be any Object or a Collection of objects. Return
|
||||
* null to signal end of rows
|
||||
*/
|
||||
@Override
|
||||
public Map<String, Object> nextRow() {
|
||||
return null;// do not do anything
|
||||
}
|
||||
|
||||
protected Map<String, Object> getNext() {
|
||||
if(zipper!=null){
|
||||
return zipper.supplyNextChild(rowIterator);
|
||||
}else{
|
||||
if(cacheSupport==null) {
|
||||
try {
|
||||
if (rowIterator == null)
|
||||
return null;
|
||||
if (rowIterator.hasNext())
|
||||
return rowIterator.next();
|
||||
query = null;
|
||||
rowIterator = null;
|
||||
return null;
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, "getNext() failed for query '" + query + "'", e);
|
||||
query = null;
|
||||
rowIterator = null;
|
||||
wrapAndThrow(DataImportHandlerException.WARN, e);
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
return cacheSupport.getCacheData(context, query, rowIterator);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
query = null;
|
||||
if(cacheSupport!=null){
|
||||
cacheSupport.destroyAll();
|
||||
}
|
||||
cacheSupport = null;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static final String TRANSFORMER = "transformer";
|
||||
|
||||
public static final String TRANSFORM_ROW = "transformRow";
|
||||
|
||||
public static final String ON_ERROR = "onError";
|
||||
|
||||
public static final String ABORT = "abort";
|
||||
|
||||
public static final String CONTINUE = "continue";
|
||||
|
||||
public static final String SKIP = "skip";
|
||||
}
|
|
@ -1,357 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
|
||||
import org.apache.solr.handler.dataimport.config.Entity;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
|
||||
import static org.apache.solr.handler.dataimport.EntityProcessorBase.*;
|
||||
import static org.apache.solr.handler.dataimport.EntityProcessorBase.SKIP;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A Wrapper over {@link EntityProcessor} instance which performs transforms and handles multi-row outputs correctly.
|
||||
*
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class EntityProcessorWrapper extends EntityProcessor {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private EntityProcessor delegate;
|
||||
private Entity entity;
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
private DataSource datasource;
|
||||
private List<EntityProcessorWrapper> children = new ArrayList<>();
|
||||
private DocBuilder docBuilder;
|
||||
private boolean initialized;
|
||||
private String onError;
|
||||
private Context context;
|
||||
private VariableResolver resolver;
|
||||
private String entityName;
|
||||
|
||||
protected List<Transformer> transformers;
|
||||
|
||||
protected List<Map<String, Object>> rowcache;
|
||||
|
||||
public EntityProcessorWrapper(EntityProcessor delegate, Entity entity, DocBuilder docBuilder) {
|
||||
this.delegate = delegate;
|
||||
this.entity = entity;
|
||||
this.docBuilder = docBuilder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(Context context) {
|
||||
rowcache = null;
|
||||
this.context = context;
|
||||
resolver = context.getVariableResolver();
|
||||
if (entityName == null) {
|
||||
onError = resolver.replaceTokens(context.getEntityAttribute(ON_ERROR));
|
||||
if (onError == null) onError = ABORT;
|
||||
entityName = context.getEntityAttribute(ConfigNameConstants.NAME);
|
||||
}
|
||||
delegate.init(context);
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
void loadTransformers() {
|
||||
String transClasses = context.getEntityAttribute(TRANSFORMER);
|
||||
|
||||
if (transClasses == null) {
|
||||
transformers = Collections.emptyList();
|
||||
return;
|
||||
}
|
||||
|
||||
String[] transArr = transClasses.split(",");
|
||||
transformers = new ArrayList<Transformer>() {
|
||||
@Override
|
||||
public boolean add(Transformer transformer) {
|
||||
if (docBuilder != null && docBuilder.verboseDebug) {
|
||||
transformer = docBuilder.getDebugLogger().wrapTransformer(transformer);
|
||||
}
|
||||
return super.add(transformer);
|
||||
}
|
||||
};
|
||||
for (String aTransArr : transArr) {
|
||||
String trans = aTransArr.trim();
|
||||
if (trans.startsWith("script:")) {
|
||||
// The script transformer is a potential vulnerability, esp. when the script is
|
||||
// provided from an untrusted source. Check and don't proceed if source is untrusted.
|
||||
checkIfTrusted(trans);
|
||||
String functionName = trans.substring("script:".length());
|
||||
ScriptTransformer scriptTransformer = new ScriptTransformer();
|
||||
scriptTransformer.setFunctionName(functionName);
|
||||
transformers.add(scriptTransformer);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
Class clazz = DocBuilder.loadClass(trans, context.getSolrCore());
|
||||
if (Transformer.class.isAssignableFrom(clazz)) {
|
||||
transformers.add((Transformer) clazz.getConstructor().newInstance());
|
||||
} else {
|
||||
Method meth = clazz.getMethod(TRANSFORM_ROW, Map.class);
|
||||
transformers.add(new ReflectionTransformer(meth, clazz, trans));
|
||||
}
|
||||
} catch (NoSuchMethodException nsme){
|
||||
String msg = "Transformer :"
|
||||
+ trans
|
||||
+ "does not implement Transformer interface or does not have a transformRow(Map<String.Object> m)method";
|
||||
log.error(msg);
|
||||
wrapAndThrow(SEVERE, nsme,msg);
|
||||
} catch (Exception e) {
|
||||
log.error("Unable to load Transformer: {}", aTransArr, e);
|
||||
wrapAndThrow(SEVERE, e,"Unable to load Transformer: " + trans);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void checkIfTrusted(String trans) {
|
||||
if (docBuilder != null) {
|
||||
SolrCore core = docBuilder.dataImporter.getCore();
|
||||
boolean trusted = (core != null)? core.getCoreDescriptor().isConfigSetTrusted(): true;
|
||||
if (!trusted) {
|
||||
Exception ex = new SolrException(ErrorCode.UNAUTHORIZED, "The configset for this collection was uploaded "
|
||||
+ "without any authentication in place,"
|
||||
+ " and this transformer is not available for collections with untrusted configsets. To use this transformer,"
|
||||
+ " re-upload the configset after enabling authentication and authorization.");
|
||||
String msg = "Transformer: "
|
||||
+ trans
|
||||
+ ". " + ex.getMessage();
|
||||
log.error(msg);
|
||||
wrapAndThrow(SEVERE, ex, msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
static class ReflectionTransformer extends Transformer {
|
||||
final Method meth;
|
||||
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
final Class clazz;
|
||||
|
||||
final String trans;
|
||||
|
||||
final Object o;
|
||||
|
||||
public ReflectionTransformer(Method meth, @SuppressWarnings({"rawtypes"})Class clazz, String trans)
|
||||
throws Exception {
|
||||
this.meth = meth;
|
||||
this.clazz = clazz;
|
||||
this.trans = trans;
|
||||
o = clazz.getConstructor().newInstance();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object transformRow(Map<String, Object> aRow, Context context) {
|
||||
try {
|
||||
return meth.invoke(o, aRow);
|
||||
} catch (Exception e) {
|
||||
log.warn("method invocation failed on transformer : {}", trans, e);
|
||||
throw new DataImportHandlerException(WARN, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected Map<String, Object> getFromRowCache() {
|
||||
Map<String, Object> r = rowcache.remove(0);
|
||||
if (rowcache.isEmpty())
|
||||
rowcache = null;
|
||||
return r;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
protected Map<String, Object> applyTransformer(Map<String, Object> row) {
|
||||
if(row == null) return null;
|
||||
if (transformers == null)
|
||||
loadTransformers();
|
||||
if (transformers == Collections.EMPTY_LIST)
|
||||
return row;
|
||||
Map<String, Object> transformedRow = row;
|
||||
List<Map<String, Object>> rows = null;
|
||||
boolean stopTransform = checkStopTransform(row);
|
||||
VariableResolver resolver = context.getVariableResolver();
|
||||
for (Transformer t : transformers) {
|
||||
if (stopTransform) break;
|
||||
try {
|
||||
if (rows != null) {
|
||||
List<Map<String, Object>> tmpRows = new ArrayList<>();
|
||||
for (Map<String, Object> map : rows) {
|
||||
resolver.addNamespace(entityName, map);
|
||||
Object o = t.transformRow(map, context);
|
||||
if (o == null)
|
||||
continue;
|
||||
if (o instanceof Map) {
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
Map oMap = (Map) o;
|
||||
stopTransform = checkStopTransform(oMap);
|
||||
tmpRows.add((Map) o);
|
||||
} else if (o instanceof List) {
|
||||
tmpRows.addAll((List) o);
|
||||
} else {
|
||||
log.error("Transformer must return Map<String, Object> or a List<Map<String, Object>>");
|
||||
}
|
||||
}
|
||||
rows = tmpRows;
|
||||
} else {
|
||||
resolver.addNamespace(entityName, transformedRow);
|
||||
Object o = t.transformRow(transformedRow, context);
|
||||
if (o == null)
|
||||
return null;
|
||||
if (o instanceof Map) {
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
Map oMap = (Map) o;
|
||||
stopTransform = checkStopTransform(oMap);
|
||||
transformedRow = (Map) o;
|
||||
} else if (o instanceof List) {
|
||||
rows = (List) o;
|
||||
} else {
|
||||
log.error("Transformer must return Map<String, Object> or a List<Map<String, Object>>");
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("transformer threw error", e);
|
||||
if (ABORT.equals(onError)) {
|
||||
wrapAndThrow(SEVERE, e);
|
||||
} else if (SKIP.equals(onError)) {
|
||||
wrapAndThrow(DataImportHandlerException.SKIP, e);
|
||||
}
|
||||
// onError = continue
|
||||
}
|
||||
}
|
||||
if (rows == null) {
|
||||
return transformedRow;
|
||||
} else {
|
||||
rowcache = rows;
|
||||
return getFromRowCache();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private boolean checkStopTransform(@SuppressWarnings({"rawtypes"})Map oMap) {
|
||||
return oMap.get("$stopTransform") != null
|
||||
&& Boolean.parseBoolean(oMap.get("$stopTransform").toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextRow() {
|
||||
if (rowcache != null) {
|
||||
return getFromRowCache();
|
||||
}
|
||||
while (true) {
|
||||
Map<String, Object> arow = null;
|
||||
try {
|
||||
arow = delegate.nextRow();
|
||||
} catch (Exception e) {
|
||||
if(ABORT.equals(onError)){
|
||||
wrapAndThrow(SEVERE, e);
|
||||
} else {
|
||||
//SKIP is not really possible. If this calls the nextRow() again the Entityprocessor would be in an inconisttent state
|
||||
SolrException.log(log, "Exception in entity : "+ entityName, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
if (arow == null) {
|
||||
return null;
|
||||
} else {
|
||||
arow = applyTransformer(arow);
|
||||
if (arow != null) {
|
||||
delegate.postTransform(arow);
|
||||
return arow;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextModifiedRowKey() {
|
||||
Map<String, Object> row = delegate.nextModifiedRowKey();
|
||||
row = applyTransformer(row);
|
||||
rowcache = null;
|
||||
return row;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextDeletedRowKey() {
|
||||
Map<String, Object> row = delegate.nextDeletedRowKey();
|
||||
row = applyTransformer(row);
|
||||
rowcache = null;
|
||||
return row;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextModifiedParentRowKey() {
|
||||
return delegate.nextModifiedParentRowKey();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
delegate.destroy();
|
||||
}
|
||||
|
||||
public VariableResolver getVariableResolver() {
|
||||
return context.getVariableResolver();
|
||||
}
|
||||
|
||||
public Context getContext() {
|
||||
return context;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
delegate.close();
|
||||
}
|
||||
|
||||
public Entity getEntity() {
|
||||
return entity;
|
||||
}
|
||||
|
||||
public List<EntityProcessorWrapper> getChildren() {
|
||||
return children;
|
||||
}
|
||||
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
public DataSource getDatasource() {
|
||||
return datasource;
|
||||
}
|
||||
|
||||
public void setDatasource(@SuppressWarnings({"rawtypes"})DataSource datasource) {
|
||||
this.datasource = datasource;
|
||||
}
|
||||
|
||||
public boolean isInitialized() {
|
||||
return initialized;
|
||||
}
|
||||
|
||||
public void setInitialized(boolean initialized) {
|
||||
this.initialized = initialized;
|
||||
}
|
||||
}
|
|
@ -1,140 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Pluggable functions for resolving variables
|
||||
* </p>
|
||||
* <p>
|
||||
* Implementations of this abstract class must provide a public no-arg constructor.
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class Evaluator {
|
||||
|
||||
/**
|
||||
* Return a String after processing an expression and a {@link VariableResolver}
|
||||
*
|
||||
* @see VariableResolver
|
||||
* @param expression string to be evaluated
|
||||
* @param context instance
|
||||
* @return the value of the given expression evaluated using the resolver
|
||||
*/
|
||||
public abstract String evaluate(String expression, Context context);
|
||||
|
||||
/**
|
||||
* Parses a string of expression into separate params. The values are separated by commas. each value will be
|
||||
* translated into one of the following:
|
||||
* <ol>
|
||||
* <li>If it is in single quotes the value will be translated to a String</li>
|
||||
* <li>If is is not in quotes and is a number a it will be translated into a Double</li>
|
||||
* <li>else it is a variable which can be resolved and it will be put in as an instance of VariableWrapper</li>
|
||||
* </ol>
|
||||
*
|
||||
* @param expression the expression to be parsed
|
||||
* @param vr the VariableResolver instance for resolving variables
|
||||
*
|
||||
* @return a List of objects which can either be a string, number or a variable wrapper
|
||||
*/
|
||||
protected List<Object> parseParams(String expression, VariableResolver vr) {
|
||||
List<Object> result = new ArrayList<>();
|
||||
expression = expression.trim();
|
||||
String[] ss = expression.split(",");
|
||||
for (int i = 0; i < ss.length; i++) {
|
||||
ss[i] = ss[i].trim();
|
||||
if (ss[i].startsWith("'")) {//a string param has started
|
||||
StringBuilder sb = new StringBuilder();
|
||||
while (true) {
|
||||
sb.append(ss[i]);
|
||||
if (ss[i].endsWith("'")) break;
|
||||
i++;
|
||||
if (i >= ss.length)
|
||||
throw new DataImportHandlerException(SEVERE, "invalid string at " + ss[i - 1] + " in function params: " + expression);
|
||||
sb.append(",");
|
||||
}
|
||||
String s = sb.substring(1, sb.length() - 1);
|
||||
s = s.replaceAll("\\\\'", "'");
|
||||
result.add(s);
|
||||
} else {
|
||||
if (Character.isDigit(ss[i].charAt(0))) {
|
||||
try {
|
||||
Double doub = Double.parseDouble(ss[i]);
|
||||
result.add(doub);
|
||||
} catch (NumberFormatException e) {
|
||||
if (vr.resolve(ss[i]) == null) {
|
||||
wrapAndThrow(
|
||||
SEVERE, e, "Invalid number :" + ss[i] +
|
||||
"in parameters " + expression);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result.add(getVariableWrapper(ss[i], vr));
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
protected VariableWrapper getVariableWrapper(String s, VariableResolver vr) {
|
||||
return new VariableWrapper(s,vr);
|
||||
}
|
||||
|
||||
static protected class VariableWrapper {
|
||||
public final String varName;
|
||||
public final VariableResolver vr;
|
||||
|
||||
public VariableWrapper(String s, VariableResolver vr) {
|
||||
this.varName = s;
|
||||
this.vr = vr;
|
||||
}
|
||||
|
||||
public Object resolve() {
|
||||
return vr.resolve(varName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
Object o = vr.resolve(varName);
|
||||
return o == null ? null : o.toString();
|
||||
}
|
||||
}
|
||||
|
||||
static Pattern IN_SINGLE_QUOTES = Pattern.compile("^'(.*?)'$");
|
||||
|
||||
public static final String DATE_FORMAT_EVALUATOR = "formatDate";
|
||||
|
||||
public static final String URL_ENCODE_EVALUATOR = "encodeUrl";
|
||||
|
||||
public static final String ESCAPE_SOLR_QUERY_CHARS = "escapeQueryChars";
|
||||
|
||||
public static final String SQL_ESCAPE_EVALUATOR = "escapeSql";
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
/**
|
||||
* Event listener for DataImportHandler
|
||||
*
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public interface EventListener {
|
||||
|
||||
/**
|
||||
* Event callback
|
||||
*
|
||||
* @param ctx the Context in which this event was called
|
||||
*/
|
||||
void onEvent(Context ctx);
|
||||
|
||||
}
|
|
@ -1,122 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.sql.Blob;
|
||||
import java.sql.Clob;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* This can be useful for users who have a DB field containing xml and wish to use a nested {@link XPathEntityProcessor}
|
||||
* <p>
|
||||
* The datasouce may be configured as follows
|
||||
* <p>
|
||||
* <datasource name="f1" type="FieldReaderDataSource" />
|
||||
* <p>
|
||||
* The entity which uses this datasource must keep the url value as the variable name url="field-name"
|
||||
* <p>
|
||||
* The fieldname must be resolvable from {@link VariableResolver}
|
||||
* <p>
|
||||
* This may be used with any {@link EntityProcessor} which uses a {@link DataSource}<{@link Reader}> eg: {@link XPathEntityProcessor}
|
||||
* <p>
|
||||
* Supports String, BLOB, CLOB data types and there is an extra field (in the entity) 'encoding' for BLOB types
|
||||
*
|
||||
* @since 1.4
|
||||
*/
|
||||
public class FieldReaderDataSource extends DataSource<Reader> {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
protected VariableResolver vr;
|
||||
protected String dataField;
|
||||
private String encoding;
|
||||
private EntityProcessorWrapper entityProcessor;
|
||||
|
||||
@Override
|
||||
public void init(Context context, Properties initProps) {
|
||||
dataField = context.getEntityAttribute("dataField");
|
||||
encoding = context.getEntityAttribute("encoding");
|
||||
entityProcessor = (EntityProcessorWrapper) context.getEntityProcessor();
|
||||
/*no op*/
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader getData(String query) {
|
||||
Object o = entityProcessor.getVariableResolver().resolve(dataField);
|
||||
if (o == null) {
|
||||
throw new DataImportHandlerException (SEVERE, "No field available for name : " +dataField);
|
||||
}
|
||||
if (o instanceof String) {
|
||||
return new StringReader((String) o);
|
||||
} else if (o instanceof Clob) {
|
||||
Clob clob = (Clob) o;
|
||||
try {
|
||||
//Most of the JDBC drivers have getCharacterStream defined as public
|
||||
// so let us just check it
|
||||
return readCharStream(clob);
|
||||
} catch (Exception e) {
|
||||
log.info("Unable to get data from CLOB");
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
} else if (o instanceof Blob) {
|
||||
Blob blob = (Blob) o;
|
||||
try {
|
||||
return getReader(blob);
|
||||
} catch (Exception e) {
|
||||
log.info("Unable to get data from BLOB");
|
||||
return null;
|
||||
|
||||
}
|
||||
} else {
|
||||
return new StringReader(o.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static Reader readCharStream(Clob clob) {
|
||||
try {
|
||||
return clob.getCharacterStream();
|
||||
} catch (Exception e) {
|
||||
wrapAndThrow(SEVERE, e,"Unable to get reader from clob");
|
||||
return null;//unreachable
|
||||
}
|
||||
}
|
||||
|
||||
private Reader getReader(Blob blob)
|
||||
throws SQLException, UnsupportedEncodingException {
|
||||
if (encoding == null) {
|
||||
return (new InputStreamReader(blob.getBinaryStream(), StandardCharsets.UTF_8));
|
||||
} else {
|
||||
return (new InputStreamReader(blob.getBinaryStream(), encoding));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
|
||||
}
|
||||
}
|
|
@ -1,85 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.sql.Blob;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
/**
|
||||
* This can be useful for users who have a DB field containing BLOBs which may be Rich documents
|
||||
* <p>
|
||||
* The datasource may be configured as follows
|
||||
* <p>
|
||||
* <dataSource name="f1" type="FieldStreamDataSource" />
|
||||
* <p>
|
||||
* The entity which uses this datasource must keep and attribute dataField
|
||||
* <p>
|
||||
* The fieldname must be resolvable from {@link VariableResolver}
|
||||
* <p>
|
||||
* This may be used with any {@link EntityProcessor} which uses a {@link DataSource}<{@link InputStream}> eg: TikaEntityProcessor
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class FieldStreamDataSource extends DataSource<InputStream> {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
protected VariableResolver vr;
|
||||
protected String dataField;
|
||||
private EntityProcessorWrapper wrapper;
|
||||
|
||||
@Override
|
||||
public void init(Context context, Properties initProps) {
|
||||
dataField = context.getEntityAttribute("dataField");
|
||||
wrapper = (EntityProcessorWrapper) context.getEntityProcessor();
|
||||
/*no op*/
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream getData(String query) {
|
||||
Object o = wrapper.getVariableResolver().resolve(dataField);
|
||||
if (o == null) {
|
||||
throw new DataImportHandlerException(SEVERE, "No field available for name : " + dataField);
|
||||
} else if (o instanceof Blob) {
|
||||
Blob blob = (Blob) o;
|
||||
try {
|
||||
return blob.getBinaryStream();
|
||||
} catch (SQLException sqle) {
|
||||
log.info("Unable to get data from BLOB");
|
||||
return null;
|
||||
}
|
||||
} else if (o instanceof byte[]) {
|
||||
byte[] bytes = (byte[]) o;
|
||||
return new ByteArrayInputStream(bytes);
|
||||
} else {
|
||||
throw new RuntimeException("unsupported type : " + o.getClass());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
}
|
|
@ -1,155 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.io.*;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A {@link DataSource} which reads from local files
|
||||
* </p>
|
||||
* <p>
|
||||
* The file is read with the default platform encoding. It can be overriden by
|
||||
* specifying the encoding in solrconfig.xml
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class FileDataSource extends DataSource<Reader> {
|
||||
public static final String BASE_PATH = "basePath";
|
||||
|
||||
/**
|
||||
* The basePath for this data source
|
||||
*/
|
||||
protected String basePath;
|
||||
|
||||
/**
|
||||
* The encoding using which the given file should be read
|
||||
*/
|
||||
protected String encoding = null;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
@Override
|
||||
public void init(Context context, Properties initProps) {
|
||||
basePath = initProps.getProperty(BASE_PATH);
|
||||
if (initProps.get(URLDataSource.ENCODING) != null)
|
||||
encoding = initProps.getProperty(URLDataSource.ENCODING);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Returns a reader for the given file.
|
||||
* </p>
|
||||
* <p>
|
||||
* If the given file is not absolute, we try to construct an absolute path
|
||||
* using basePath configuration. If that fails, then the relative path is
|
||||
* tried. If file is not found a RuntimeException is thrown.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>It is the responsibility of the calling method to properly close the
|
||||
* returned Reader</b>
|
||||
* </p>
|
||||
*/
|
||||
@Override
|
||||
public Reader getData(String query) {
|
||||
File f = getFile(basePath,query);
|
||||
try {
|
||||
return openStream(f);
|
||||
} catch (Exception e) {
|
||||
wrapAndThrow(SEVERE,e,"Unable to open File : "+f.getAbsolutePath());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static File getFile(String basePath, String query) {
|
||||
try {
|
||||
File file = new File(query);
|
||||
|
||||
// If it's not an absolute path, try relative from basePath.
|
||||
if (!file.isAbsolute()) {
|
||||
// Resolve and correct basePath.
|
||||
File basePathFile;
|
||||
if (basePath == null) {
|
||||
basePathFile = new File(".").getAbsoluteFile();
|
||||
log.warn("FileDataSource.basePath is empty. Resolving to: {}"
|
||||
, basePathFile.getAbsolutePath());
|
||||
} else {
|
||||
basePathFile = new File(basePath);
|
||||
if (!basePathFile.isAbsolute()) {
|
||||
basePathFile = basePathFile.getAbsoluteFile();
|
||||
log.warn("FileDataSource.basePath is not absolute. Resolving to: {}"
|
||||
, basePathFile.getAbsolutePath());
|
||||
}
|
||||
}
|
||||
|
||||
file = new File(basePathFile, query).getAbsoluteFile();
|
||||
}
|
||||
|
||||
if (file.isFile() && file.canRead()) {
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("Accessing File: {}", file.getAbsolutePath());
|
||||
}
|
||||
return file;
|
||||
} else {
|
||||
throw new FileNotFoundException("Could not find file: " + query +
|
||||
" (resolved to: " + file.getAbsolutePath());
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Open a {@link java.io.Reader} for the given file name
|
||||
*
|
||||
* @param file a {@link java.io.File} instance
|
||||
* @return a Reader on the given file
|
||||
* @throws FileNotFoundException if the File does not exist
|
||||
* @throws UnsupportedEncodingException if the encoding is unsupported
|
||||
* @since solr 1.4
|
||||
*/
|
||||
protected Reader openStream(File file) throws FileNotFoundException,
|
||||
UnsupportedEncodingException {
|
||||
if (encoding == null) {
|
||||
return new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8);
|
||||
} else {
|
||||
return new InputStreamReader(new FileInputStream(file), encoding);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
|
||||
}
|
||||
}
|
|
@ -1,305 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FilenameFilter;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.TimeZone;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.solr.util.DateMathParser;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* An {@link EntityProcessor} instance which can stream file names found in a given base
|
||||
* directory matching patterns and returning rows containing file information.
|
||||
* </p>
|
||||
* <p>
|
||||
* It supports querying a give base directory by matching:
|
||||
* <ul>
|
||||
* <li>regular expressions to file names</li>
|
||||
* <li>excluding certain files based on regular expression</li>
|
||||
* <li>last modification date (newer or older than a given date or time)</li>
|
||||
* <li>size (bigger or smaller than size given in bytes)</li>
|
||||
* <li>recursively iterating through sub-directories</li>
|
||||
* </ul>
|
||||
* Its output can be used along with {@link FileDataSource} to read from files in file
|
||||
* systems.
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
* @see Pattern
|
||||
*/
|
||||
public class FileListEntityProcessor extends EntityProcessorBase {
|
||||
/**
|
||||
* A regex pattern to identify files given in data-config.xml after resolving any variables
|
||||
*/
|
||||
protected String fileName;
|
||||
|
||||
/**
|
||||
* The baseDir given in data-config.xml after resolving any variables
|
||||
*/
|
||||
protected String baseDir;
|
||||
|
||||
/**
|
||||
* A Regex pattern of excluded file names as given in data-config.xml after resolving any variables
|
||||
*/
|
||||
protected String excludes;
|
||||
|
||||
/**
|
||||
* The newerThan given in data-config as a {@link java.util.Date}
|
||||
* <p>
|
||||
* <b>Note: </b> This variable is resolved just-in-time in the {@link #nextRow()} method.
|
||||
* </p>
|
||||
*/
|
||||
protected Date newerThan;
|
||||
|
||||
/**
|
||||
* The newerThan given in data-config as a {@link java.util.Date}
|
||||
*/
|
||||
protected Date olderThan;
|
||||
|
||||
/**
|
||||
* The biggerThan given in data-config as a long value
|
||||
* <p>
|
||||
* <b>Note: </b> This variable is resolved just-in-time in the {@link #nextRow()} method.
|
||||
* </p>
|
||||
*/
|
||||
protected long biggerThan = -1;
|
||||
|
||||
/**
|
||||
* The smallerThan given in data-config as a long value
|
||||
* <p>
|
||||
* <b>Note: </b> This variable is resolved just-in-time in the {@link #nextRow()} method.
|
||||
* </p>
|
||||
*/
|
||||
protected long smallerThan = -1;
|
||||
|
||||
/**
|
||||
* The recursive given in data-config. Default value is false.
|
||||
*/
|
||||
protected boolean recursive = false;
|
||||
|
||||
private Pattern fileNamePattern, excludesPattern;
|
||||
|
||||
@Override
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
fileName = context.getEntityAttribute(FILE_NAME);
|
||||
if (fileName != null) {
|
||||
fileName = context.replaceTokens(fileName);
|
||||
fileNamePattern = Pattern.compile(fileName);
|
||||
}
|
||||
baseDir = context.getEntityAttribute(BASE_DIR);
|
||||
if (baseDir == null)
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"'baseDir' is a required attribute");
|
||||
baseDir = context.replaceTokens(baseDir);
|
||||
File dir = new File(baseDir);
|
||||
if (!dir.isDirectory())
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"'baseDir' value: " + baseDir + " is not a directory");
|
||||
|
||||
String r = context.getEntityAttribute(RECURSIVE);
|
||||
if (r != null)
|
||||
recursive = Boolean.parseBoolean(r);
|
||||
excludes = context.getEntityAttribute(EXCLUDES);
|
||||
if (excludes != null) {
|
||||
excludes = context.replaceTokens(excludes);
|
||||
excludesPattern = Pattern.compile(excludes);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Date object corresponding to the given string.
|
||||
*
|
||||
* @param dateStr the date string. It can be a DateMath string or it may have a evaluator function
|
||||
* @return a Date instance corresponding to the input string
|
||||
*/
|
||||
private Date getDate(String dateStr) {
|
||||
if (dateStr == null)
|
||||
return null;
|
||||
|
||||
Matcher m = PLACE_HOLDER_PATTERN.matcher(dateStr);
|
||||
if (m.find()) {
|
||||
Object o = context.resolve(m.group(1));
|
||||
if (o instanceof Date) return (Date)o;
|
||||
dateStr = (String) o;
|
||||
} else {
|
||||
dateStr = context.replaceTokens(dateStr);
|
||||
}
|
||||
m = Evaluator.IN_SINGLE_QUOTES.matcher(dateStr);
|
||||
if (m.find()) {
|
||||
String expr = m.group(1);
|
||||
//TODO refactor DateMathParser.parseMath a bit to have a static method for this logic.
|
||||
if (expr.startsWith("NOW")) {
|
||||
expr = expr.substring("NOW".length());
|
||||
}
|
||||
try {
|
||||
// DWS TODO: is this TimeZone the right default for us? Deserves explanation if so.
|
||||
return new DateMathParser(TimeZone.getDefault()).parseMath(expr);
|
||||
} catch (ParseException exp) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Invalid expression for date", exp);
|
||||
}
|
||||
}
|
||||
try {
|
||||
return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).parse(dateStr);
|
||||
} catch (ParseException exp) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Invalid expression for date", exp);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Long value for the given string after resolving any evaluator or variable.
|
||||
*
|
||||
* @param sizeStr the size as a string
|
||||
* @return the Long value corresponding to the given string
|
||||
*/
|
||||
private Long getSize(String sizeStr) {
|
||||
if (sizeStr == null)
|
||||
return null;
|
||||
|
||||
Matcher m = PLACE_HOLDER_PATTERN.matcher(sizeStr);
|
||||
if (m.find()) {
|
||||
Object o = context.resolve(m.group(1));
|
||||
if (o instanceof Number) {
|
||||
Number number = (Number) o;
|
||||
return number.longValue();
|
||||
}
|
||||
sizeStr = (String) o;
|
||||
} else {
|
||||
sizeStr = context.replaceTokens(sizeStr);
|
||||
}
|
||||
|
||||
return Long.parseLong(sizeStr);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextRow() {
|
||||
if (rowIterator != null)
|
||||
return getNext();
|
||||
List<Map<String, Object>> fileDetails = new ArrayList<>();
|
||||
File dir = new File(baseDir);
|
||||
|
||||
String dateStr = context.getEntityAttribute(NEWER_THAN);
|
||||
newerThan = getDate(dateStr);
|
||||
dateStr = context.getEntityAttribute(OLDER_THAN);
|
||||
olderThan = getDate(dateStr);
|
||||
String biggerThanStr = context.getEntityAttribute(BIGGER_THAN);
|
||||
if (biggerThanStr != null)
|
||||
biggerThan = getSize(biggerThanStr);
|
||||
String smallerThanStr = context.getEntityAttribute(SMALLER_THAN);
|
||||
if (smallerThanStr != null)
|
||||
smallerThan = getSize(smallerThanStr);
|
||||
|
||||
getFolderFiles(dir, fileDetails);
|
||||
rowIterator = fileDetails.iterator();
|
||||
return getNext();
|
||||
}
|
||||
|
||||
private void getFolderFiles(File dir, final List<Map<String, Object>> fileDetails) {
|
||||
// Fetch an array of file objects that pass the filter, however the
|
||||
// returned array is never populated; accept() always returns false.
|
||||
// Rather we make use of the fileDetails array which is populated as
|
||||
// a side affect of the accept method.
|
||||
dir.list(new FilenameFilter() {
|
||||
@Override
|
||||
public boolean accept(File dir, String name) {
|
||||
File fileObj = new File(dir, name);
|
||||
if (fileObj.isDirectory()) {
|
||||
if (recursive) getFolderFiles(fileObj, fileDetails);
|
||||
} else if (fileNamePattern == null) {
|
||||
addDetails(fileDetails, dir, name);
|
||||
} else if (fileNamePattern.matcher(name).find()) {
|
||||
if (excludesPattern != null && excludesPattern.matcher(name).find())
|
||||
return false;
|
||||
addDetails(fileDetails, dir, name);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void addDetails(List<Map<String, Object>> files, File dir, String name) {
|
||||
Map<String, Object> details = new HashMap<>();
|
||||
File aFile = new File(dir, name);
|
||||
if (aFile.isDirectory()) return;
|
||||
long sz = aFile.length();
|
||||
Date lastModified = new Date(aFile.lastModified());
|
||||
if (biggerThan != -1 && sz <= biggerThan)
|
||||
return;
|
||||
if (smallerThan != -1 && sz >= smallerThan)
|
||||
return;
|
||||
if (olderThan != null && lastModified.after(olderThan))
|
||||
return;
|
||||
if (newerThan != null && lastModified.before(newerThan))
|
||||
return;
|
||||
details.put(DIR, dir.getAbsolutePath());
|
||||
details.put(FILE, name);
|
||||
details.put(ABSOLUTE_FILE, aFile.getAbsolutePath());
|
||||
details.put(SIZE, sz);
|
||||
details.put(LAST_MODIFIED, lastModified);
|
||||
files.add(details);
|
||||
}
|
||||
|
||||
public static final Pattern PLACE_HOLDER_PATTERN = Pattern
|
||||
.compile("\\$\\{(.*?)\\}");
|
||||
|
||||
public static final String DIR = "fileDir";
|
||||
|
||||
public static final String FILE = "file";
|
||||
|
||||
public static final String ABSOLUTE_FILE = "fileAbsolutePath";
|
||||
|
||||
public static final String SIZE = "fileSize";
|
||||
|
||||
public static final String LAST_MODIFIED = "fileLastModified";
|
||||
|
||||
public static final String FILE_NAME = "fileName";
|
||||
|
||||
public static final String BASE_DIR = "baseDir";
|
||||
|
||||
public static final String EXCLUDES = "excludes";
|
||||
|
||||
public static final String NEWER_THAN = "newerThan";
|
||||
|
||||
public static final String OLDER_THAN = "olderThan";
|
||||
|
||||
public static final String BIGGER_THAN = "biggerThan";
|
||||
|
||||
public static final String SMALLER_THAN = "smallerThan";
|
||||
|
||||
public static final String RECURSIVE = "recursive";
|
||||
|
||||
}
|
|
@ -1,96 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.io.BufferedReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A {@link Transformer} implementation which strip off HTML tags using {@link HTMLStripCharFilter} This is useful
|
||||
* in case you don't need this HTML anyway.
|
||||
*
|
||||
* @see HTMLStripCharFilter
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class HTMLStripTransformer extends Transformer {
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Object transformRow(Map<String, Object> row, Context context) {
|
||||
List<Map<String, String>> fields = context.getAllEntityFields();
|
||||
for (Map<String, String> field : fields) {
|
||||
String col = field.get(DataImporter.COLUMN);
|
||||
String splitHTML = context.replaceTokens(field.get(STRIP_HTML));
|
||||
if (!TRUE.equals(splitHTML))
|
||||
continue;
|
||||
Object tmpVal = row.get(col);
|
||||
if (tmpVal == null)
|
||||
continue;
|
||||
|
||||
if (tmpVal instanceof List) {
|
||||
List<String> inputs = (List<String>) tmpVal;
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
List results = new ArrayList();
|
||||
for (String input : inputs) {
|
||||
if (input == null)
|
||||
continue;
|
||||
Object o = stripHTML(input, col);
|
||||
if (o != null)
|
||||
results.add(o);
|
||||
}
|
||||
row.put(col, results);
|
||||
} else {
|
||||
String value = tmpVal.toString();
|
||||
Object o = stripHTML(value, col);
|
||||
if (o != null)
|
||||
row.put(col, o);
|
||||
}
|
||||
}
|
||||
return row;
|
||||
}
|
||||
|
||||
private Object stripHTML(String value, String column) {
|
||||
StringBuilder out = new StringBuilder();
|
||||
StringReader strReader = new StringReader(value);
|
||||
try {
|
||||
HTMLStripCharFilter html = new HTMLStripCharFilter(strReader.markSupported() ? strReader : new BufferedReader(strReader));
|
||||
char[] cbuf = new char[1024 * 10];
|
||||
while (true) {
|
||||
int count = html.read(cbuf);
|
||||
if (count == -1)
|
||||
break; // end of stream mark is -1
|
||||
if (count > 0)
|
||||
out.append(cbuf, 0, count);
|
||||
}
|
||||
html.close();
|
||||
} catch (IOException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Failed stripping HTML for column: " + column, e);
|
||||
}
|
||||
return out.toString();
|
||||
}
|
||||
|
||||
public static final String STRIP_HTML = "stripHTML";
|
||||
|
||||
public static final String TRUE = "true";
|
||||
}
|
|
@ -1,583 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.util.CryptoKeys;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.naming.InitialContext;
|
||||
import javax.naming.NamingException;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
import java.sql.*;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* <p> A DataSource implementation which can fetch data using JDBC. </p> <p> Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
|
||||
* details. </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class JdbcDataSource extends
|
||||
DataSource<Iterator<Map<String, Object>>> {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
protected Callable<Connection> factory;
|
||||
|
||||
private long connLastUsed = 0;
|
||||
|
||||
private Connection conn;
|
||||
|
||||
private ResultSetIterator resultSetIterator;
|
||||
|
||||
private Map<String, Integer> fieldNameVsType = new HashMap<>();
|
||||
|
||||
private boolean convertType = false;
|
||||
|
||||
private int batchSize = FETCH_SIZE;
|
||||
|
||||
private int maxRows = 0;
|
||||
|
||||
@Override
|
||||
public void init(Context context, Properties initProps) {
|
||||
resolveVariables(context, initProps);
|
||||
initProps = decryptPwd(context, initProps);
|
||||
Object o = initProps.get(CONVERT_TYPE);
|
||||
if (o != null)
|
||||
convertType = Boolean.parseBoolean(o.toString());
|
||||
|
||||
factory = createConnectionFactory(context, initProps);
|
||||
|
||||
String bsz = initProps.getProperty("batchSize");
|
||||
if (bsz != null) {
|
||||
bsz = context.replaceTokens(bsz);
|
||||
try {
|
||||
batchSize = Integer.parseInt(bsz);
|
||||
if (batchSize == -1)
|
||||
batchSize = Integer.MIN_VALUE;
|
||||
} catch (NumberFormatException e) {
|
||||
log.warn("Invalid batch size: {}", bsz);
|
||||
}
|
||||
}
|
||||
|
||||
for (Map<String, String> map : context.getAllEntityFields()) {
|
||||
String n = map.get(DataImporter.COLUMN);
|
||||
String t = map.get(DataImporter.TYPE);
|
||||
if ("sint".equals(t) || "integer".equals(t))
|
||||
fieldNameVsType.put(n, Types.INTEGER);
|
||||
else if ("slong".equals(t) || "long".equals(t))
|
||||
fieldNameVsType.put(n, Types.BIGINT);
|
||||
else if ("float".equals(t) || "sfloat".equals(t))
|
||||
fieldNameVsType.put(n, Types.FLOAT);
|
||||
else if ("double".equals(t) || "sdouble".equals(t))
|
||||
fieldNameVsType.put(n, Types.DOUBLE);
|
||||
else if ("date".equals(t))
|
||||
fieldNameVsType.put(n, Types.DATE);
|
||||
else if ("boolean".equals(t))
|
||||
fieldNameVsType.put(n, Types.BOOLEAN);
|
||||
else if ("binary".equals(t))
|
||||
fieldNameVsType.put(n, Types.BLOB);
|
||||
else
|
||||
fieldNameVsType.put(n, Types.VARCHAR);
|
||||
}
|
||||
}
|
||||
|
||||
private Properties decryptPwd(Context context, Properties initProps) {
|
||||
String encryptionKey = initProps.getProperty("encryptKeyFile");
|
||||
if (initProps.getProperty("password") != null && encryptionKey != null) {
|
||||
// this means the password is encrypted and use the file to decode it
|
||||
try {
|
||||
try (Reader fr = new InputStreamReader(new FileInputStream(encryptionKey), UTF_8)) {
|
||||
char[] chars = new char[100];//max 100 char password
|
||||
int len = fr.read(chars);
|
||||
if (len < 6)
|
||||
throw new DataImportHandlerException(SEVERE, "There should be a password of length 6 atleast " + encryptionKey);
|
||||
Properties props = new Properties();
|
||||
props.putAll(initProps);
|
||||
String password = null;
|
||||
try {
|
||||
password = CryptoKeys.decodeAES(initProps.getProperty("password"), new String(chars, 0, len)).trim();
|
||||
} catch (SolrException se) {
|
||||
throw new DataImportHandlerException(SEVERE, "Error decoding password", se.getCause());
|
||||
}
|
||||
props.put("password", password);
|
||||
initProps = props;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new DataImportHandlerException(SEVERE, "Could not load encryptKeyFile " + encryptionKey);
|
||||
}
|
||||
}
|
||||
return initProps;
|
||||
}
|
||||
|
||||
protected Callable<Connection> createConnectionFactory(final Context context,
|
||||
final Properties initProps) {
|
||||
// final VariableResolver resolver = context.getVariableResolver();
|
||||
final String jndiName = initProps.getProperty(JNDI_NAME);
|
||||
final String url = initProps.getProperty(URL);
|
||||
final String driver = initProps.getProperty(DRIVER);
|
||||
|
||||
if (url == null && jndiName == null)
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"JDBC URL or JNDI name has to be specified");
|
||||
|
||||
if (driver != null) {
|
||||
try {
|
||||
DocBuilder.loadClass(driver, context.getSolrCore());
|
||||
} catch (ClassNotFoundException e) {
|
||||
wrapAndThrow(SEVERE, e, "Could not load driver: " + driver);
|
||||
}
|
||||
} else {
|
||||
if(jndiName == null){
|
||||
throw new DataImportHandlerException(SEVERE, "One of driver or jndiName must be specified in the data source");
|
||||
}
|
||||
}
|
||||
|
||||
String s = initProps.getProperty("maxRows");
|
||||
if (s != null) {
|
||||
maxRows = Integer.parseInt(s);
|
||||
}
|
||||
|
||||
return factory = new Callable<Connection>() {
|
||||
@Override
|
||||
public Connection call() throws Exception {
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Creating a connection for entity {} with URL: {}"
|
||||
, context.getEntityAttribute(DataImporter.NAME), url);
|
||||
}
|
||||
long start = System.nanoTime();
|
||||
Connection c = null;
|
||||
|
||||
if (jndiName != null) {
|
||||
c = getFromJndi(initProps, jndiName);
|
||||
} else if (url != null) {
|
||||
try {
|
||||
c = DriverManager.getConnection(url, initProps);
|
||||
} catch (SQLException e) {
|
||||
// DriverManager does not allow you to use a driver which is not loaded through
|
||||
// the class loader of the class which is trying to make the connection.
|
||||
// This is a workaround for cases where the user puts the driver jar in the
|
||||
// solr.home/lib or solr.home/core/lib directories.
|
||||
@SuppressWarnings({"unchecked"})
|
||||
Driver d = (Driver) DocBuilder.loadClass(driver, context.getSolrCore()).getConstructor().newInstance();
|
||||
c = d.connect(url, initProps);
|
||||
}
|
||||
}
|
||||
if (c != null) {
|
||||
try {
|
||||
initializeConnection(c, initProps);
|
||||
} catch (SQLException e) {
|
||||
try {
|
||||
c.close();
|
||||
} catch (SQLException e2) {
|
||||
log.warn("Exception closing connection during cleanup", e2);
|
||||
}
|
||||
|
||||
throw new DataImportHandlerException(SEVERE, "Exception initializing SQL connection", e);
|
||||
}
|
||||
}
|
||||
log.info("Time taken for getConnection(): {}"
|
||||
, TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS));
|
||||
return c;
|
||||
}
|
||||
|
||||
private void initializeConnection(Connection c, final Properties initProps)
|
||||
throws SQLException {
|
||||
if (Boolean.parseBoolean(initProps.getProperty("readOnly"))) {
|
||||
c.setReadOnly(true);
|
||||
// Add other sane defaults
|
||||
c.setAutoCommit(true);
|
||||
c.setTransactionIsolation(Connection.TRANSACTION_READ_UNCOMMITTED);
|
||||
c.setHoldability(ResultSet.CLOSE_CURSORS_AT_COMMIT);
|
||||
}
|
||||
if (!Boolean.parseBoolean(initProps.getProperty("autoCommit"))) {
|
||||
c.setAutoCommit(false);
|
||||
}
|
||||
String transactionIsolation = initProps.getProperty("transactionIsolation");
|
||||
if ("TRANSACTION_READ_UNCOMMITTED".equals(transactionIsolation)) {
|
||||
c.setTransactionIsolation(Connection.TRANSACTION_READ_UNCOMMITTED);
|
||||
} else if ("TRANSACTION_READ_COMMITTED".equals(transactionIsolation)) {
|
||||
c.setTransactionIsolation(Connection.TRANSACTION_READ_COMMITTED);
|
||||
} else if ("TRANSACTION_REPEATABLE_READ".equals(transactionIsolation)) {
|
||||
c.setTransactionIsolation(Connection.TRANSACTION_REPEATABLE_READ);
|
||||
} else if ("TRANSACTION_SERIALIZABLE".equals(transactionIsolation)) {
|
||||
c.setTransactionIsolation(Connection.TRANSACTION_SERIALIZABLE);
|
||||
} else if ("TRANSACTION_NONE".equals(transactionIsolation)) {
|
||||
c.setTransactionIsolation(Connection.TRANSACTION_NONE);
|
||||
}
|
||||
String holdability = initProps.getProperty("holdability");
|
||||
if ("CLOSE_CURSORS_AT_COMMIT".equals(holdability)) {
|
||||
c.setHoldability(ResultSet.CLOSE_CURSORS_AT_COMMIT);
|
||||
} else if ("HOLD_CURSORS_OVER_COMMIT".equals(holdability)) {
|
||||
c.setHoldability(ResultSet.HOLD_CURSORS_OVER_COMMIT);
|
||||
}
|
||||
}
|
||||
|
||||
private Connection getFromJndi(final Properties initProps, final String jndiName) throws NamingException,
|
||||
SQLException {
|
||||
|
||||
Connection c = null;
|
||||
InitialContext ctx = new InitialContext();
|
||||
Object jndival = ctx.lookup(jndiName);
|
||||
if (jndival instanceof javax.sql.DataSource) {
|
||||
javax.sql.DataSource dataSource = (javax.sql.DataSource) jndival;
|
||||
String user = (String) initProps.get("user");
|
||||
String pass = (String) initProps.get("password");
|
||||
if(user == null || user.trim().equals("")){
|
||||
c = dataSource.getConnection();
|
||||
} else {
|
||||
c = dataSource.getConnection(user, pass);
|
||||
}
|
||||
} else {
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"the jndi name : '"+jndiName +"' is not a valid javax.sql.DataSource");
|
||||
}
|
||||
return c;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private void resolveVariables(Context ctx, Properties initProps) {
|
||||
for (Map.Entry<Object, Object> entry : initProps.entrySet()) {
|
||||
if (entry.getValue() != null) {
|
||||
entry.setValue(ctx.replaceTokens((String) entry.getValue()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<Map<String, Object>> getData(String query) {
|
||||
if (resultSetIterator != null) {
|
||||
resultSetIterator.close();
|
||||
resultSetIterator = null;
|
||||
}
|
||||
resultSetIterator = createResultSetIterator(query);
|
||||
return resultSetIterator.getIterator();
|
||||
}
|
||||
|
||||
protected ResultSetIterator createResultSetIterator(String query) {
|
||||
return new ResultSetIterator(query);
|
||||
}
|
||||
|
||||
private void logError(String msg, Exception e) {
|
||||
log.warn(msg, e);
|
||||
}
|
||||
|
||||
protected List<String> readFieldNames(ResultSetMetaData metaData)
|
||||
throws SQLException {
|
||||
List<String> colNames = new ArrayList<>();
|
||||
int count = metaData.getColumnCount();
|
||||
for (int i = 0; i < count; i++) {
|
||||
colNames.add(metaData.getColumnLabel(i + 1));
|
||||
}
|
||||
return colNames;
|
||||
}
|
||||
|
||||
protected class ResultSetIterator {
|
||||
private ResultSet resultSet;
|
||||
|
||||
private Statement stmt = null;
|
||||
|
||||
private List<String> colNames;
|
||||
|
||||
private Iterator<Map<String, Object>> rSetIterator;
|
||||
|
||||
public ResultSetIterator(String query) {
|
||||
|
||||
try {
|
||||
Connection c = getConnection();
|
||||
stmt = createStatement(c, batchSize, maxRows);
|
||||
log.debug("Executing SQL: {}", query);
|
||||
long start = System.nanoTime();
|
||||
resultSet = executeStatement(stmt, query);
|
||||
log.trace("Time taken for sql : {}"
|
||||
, TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS));
|
||||
setColNames(resultSet);
|
||||
} catch (Exception e) {
|
||||
close();
|
||||
wrapAndThrow(SEVERE, e, "Unable to execute query: " + query);
|
||||
return;
|
||||
}
|
||||
if (resultSet == null) {
|
||||
close();
|
||||
rSetIterator = new ArrayList<Map<String, Object>>().iterator();
|
||||
return;
|
||||
}
|
||||
|
||||
rSetIterator = createIterator(convertType, fieldNameVsType);
|
||||
}
|
||||
|
||||
|
||||
protected Statement createStatement(final Connection c, final int batchSize, final int maxRows)
|
||||
throws SQLException {
|
||||
Statement statement = c.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
|
||||
statement.setFetchSize(batchSize);
|
||||
statement.setMaxRows(maxRows);
|
||||
return statement;
|
||||
}
|
||||
|
||||
protected ResultSet executeStatement(Statement statement, String query) throws SQLException {
|
||||
boolean resultSetReturned = statement.execute(query);
|
||||
return getNextResultSet(resultSetReturned, statement);
|
||||
}
|
||||
|
||||
protected ResultSet getNextResultSet(final boolean initialResultSetAvailable, final Statement statement) throws SQLException {
|
||||
boolean resultSetAvailable = initialResultSetAvailable;
|
||||
while (!resultSetAvailable && statement.getUpdateCount() != -1) {
|
||||
resultSetAvailable = statement.getMoreResults();
|
||||
}
|
||||
if (resultSetAvailable) {
|
||||
return statement.getResultSet();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
protected void setColNames(final ResultSet resultSet) throws SQLException {
|
||||
if (resultSet != null) {
|
||||
colNames = readFieldNames(resultSet.getMetaData());
|
||||
} else {
|
||||
colNames = Collections.emptyList();
|
||||
}
|
||||
}
|
||||
|
||||
protected Iterator<Map<String,Object>> createIterator(final boolean convertType,
|
||||
final Map<String,Integer> fieldNameVsType) {
|
||||
return new Iterator<Map<String,Object>>() {
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return hasnext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String,Object> next() {
|
||||
return getARow(convertType, fieldNameVsType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {/* do nothing */
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected Map<String,Object> getARow(boolean convertType, Map<String,Integer> fieldNameVsType) {
|
||||
if (getResultSet() == null)
|
||||
return null;
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
for (String colName : getColNames()) {
|
||||
try {
|
||||
if (!convertType) {
|
||||
// Use underlying database's type information except for BigDecimal and BigInteger
|
||||
// which cannot be serialized by JavaBin/XML. See SOLR-6165
|
||||
Object value = getResultSet().getObject(colName);
|
||||
if (value instanceof BigDecimal || value instanceof BigInteger) {
|
||||
result.put(colName, value.toString());
|
||||
} else {
|
||||
result.put(colName, value);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
Integer type = fieldNameVsType.get(colName);
|
||||
if (type == null)
|
||||
type = Types.VARCHAR;
|
||||
switch (type) {
|
||||
case Types.INTEGER:
|
||||
result.put(colName, getResultSet().getInt(colName));
|
||||
break;
|
||||
case Types.FLOAT:
|
||||
result.put(colName, getResultSet().getFloat(colName));
|
||||
break;
|
||||
case Types.BIGINT:
|
||||
result.put(colName, getResultSet().getLong(colName));
|
||||
break;
|
||||
case Types.DOUBLE:
|
||||
result.put(colName, getResultSet().getDouble(colName));
|
||||
break;
|
||||
case Types.DATE:
|
||||
result.put(colName, getResultSet().getTimestamp(colName));
|
||||
break;
|
||||
case Types.BOOLEAN:
|
||||
result.put(colName, getResultSet().getBoolean(colName));
|
||||
break;
|
||||
case Types.BLOB:
|
||||
result.put(colName, getResultSet().getBytes(colName));
|
||||
break;
|
||||
default:
|
||||
result.put(colName, getResultSet().getString(colName));
|
||||
break;
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
logError("Error reading data ", e);
|
||||
wrapAndThrow(SEVERE, e, "Error reading data from database");
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
protected boolean hasnext() {
|
||||
if (getResultSet() == null) {
|
||||
close();
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
if (getResultSet().next()) {
|
||||
return true;
|
||||
} else {
|
||||
closeResultSet();
|
||||
setResultSet(getNextResultSet(getStatement().getMoreResults(), getStatement()));
|
||||
setColNames(getResultSet());
|
||||
return hasnext();
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
close();
|
||||
wrapAndThrow(SEVERE,e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
protected void close() {
|
||||
closeResultSet();
|
||||
try {
|
||||
if (getStatement() != null)
|
||||
getStatement().close();
|
||||
} catch (Exception e) {
|
||||
logError("Exception while closing statement", e);
|
||||
} finally {
|
||||
setStatement(null);
|
||||
}
|
||||
}
|
||||
|
||||
protected void closeResultSet() {
|
||||
try {
|
||||
if (getResultSet() != null) {
|
||||
getResultSet().close();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logError("Exception while closing result set", e);
|
||||
} finally {
|
||||
setResultSet(null);
|
||||
}
|
||||
}
|
||||
|
||||
protected final Iterator<Map<String,Object>> getIterator() {
|
||||
return rSetIterator;
|
||||
}
|
||||
|
||||
|
||||
protected final Statement getStatement() {
|
||||
return stmt;
|
||||
}
|
||||
|
||||
protected final void setStatement(Statement stmt) {
|
||||
this.stmt = stmt;
|
||||
}
|
||||
|
||||
protected final ResultSet getResultSet() {
|
||||
return resultSet;
|
||||
}
|
||||
|
||||
protected final void setResultSet(ResultSet resultSet) {
|
||||
this.resultSet = resultSet;
|
||||
}
|
||||
|
||||
protected final List<String> getColNames() {
|
||||
return colNames;
|
||||
}
|
||||
|
||||
protected final void setColNames(List<String> colNames) {
|
||||
this.colNames = colNames;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected Connection getConnection() throws Exception {
|
||||
long currTime = System.nanoTime();
|
||||
if (currTime - connLastUsed > CONN_TIME_OUT) {
|
||||
synchronized (this) {
|
||||
Connection tmpConn = factory.call();
|
||||
closeConnection();
|
||||
connLastUsed = System.nanoTime();
|
||||
return conn = tmpConn;
|
||||
}
|
||||
|
||||
} else {
|
||||
connLastUsed = currTime;
|
||||
return conn;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isClosed = false;
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (resultSetIterator != null) {
|
||||
resultSetIterator.close();
|
||||
}
|
||||
try {
|
||||
closeConnection();
|
||||
} finally {
|
||||
isClosed = true;
|
||||
}
|
||||
}
|
||||
|
||||
private void closeConnection() {
|
||||
try {
|
||||
if (conn != null) {
|
||||
try {
|
||||
//SOLR-2045
|
||||
conn.commit();
|
||||
} catch(Exception ex) {
|
||||
//ignore.
|
||||
}
|
||||
conn.close();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Ignoring Error when closing connection", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static final long CONN_TIME_OUT = TimeUnit.NANOSECONDS.convert(10, TimeUnit.SECONDS);
|
||||
|
||||
private static final int FETCH_SIZE = 500;
|
||||
|
||||
public static final String URL = "url";
|
||||
|
||||
public static final String JNDI_NAME = "jndiName";
|
||||
|
||||
public static final String DRIVER = "driver";
|
||||
|
||||
public static final String CONVERT_TYPE = "convertType";
|
||||
}
|
|
@ -1,164 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* An {@link EntityProcessor} instance which can stream lines of text read from a
|
||||
* datasource. Options allow lines to be explicitly skipped or included in the index.
|
||||
* </p>
|
||||
* <p>
|
||||
* Attribute summary
|
||||
* <ul>
|
||||
* <li>url is the required location of the input file. If this value is
|
||||
* relative, it assumed to be relative to baseLoc.</li>
|
||||
* <li>acceptLineRegex is an optional attribute that if present discards any
|
||||
* line which does not match the regExp.</li>
|
||||
* <li>skipLineRegex is an optional attribute that is applied after any
|
||||
* acceptLineRegex and discards any line which matches this regExp.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Although envisioned for reading lines from a file or url, LineEntityProcessor may also be useful
|
||||
* for dealing with change lists, where each line contains filenames which can be used by subsequent entities
|
||||
* to parse content from those files.
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.4
|
||||
* @see Pattern
|
||||
*/
|
||||
public class LineEntityProcessor extends EntityProcessorBase {
|
||||
private Pattern acceptLineRegex, skipLineRegex;
|
||||
private String url;
|
||||
private BufferedReader reader;
|
||||
|
||||
/**
|
||||
* Parses each of the entity attributes.
|
||||
*/
|
||||
@Override
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
String s;
|
||||
|
||||
// init a regex to locate files from the input we want to index
|
||||
s = context.getResolvedEntityAttribute(ACCEPT_LINE_REGEX);
|
||||
if (s != null) {
|
||||
acceptLineRegex = Pattern.compile(s);
|
||||
}
|
||||
|
||||
// init a regex to locate files from the input to be skipped
|
||||
s = context.getResolvedEntityAttribute(SKIP_LINE_REGEX);
|
||||
if (s != null) {
|
||||
skipLineRegex = Pattern.compile(s);
|
||||
}
|
||||
|
||||
// the FileName is required.
|
||||
url = context.getResolvedEntityAttribute(URL);
|
||||
if (url == null) throw
|
||||
new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"'"+ URL +"' is a required attribute");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads lines from the url till it finds a lines that matches the
|
||||
* optional acceptLineRegex and does not match the optional skipLineRegex.
|
||||
*
|
||||
* @return A row containing a minimum of one field "rawLine" or null to signal
|
||||
* end of file. The rawLine is the as line as returned by readLine()
|
||||
* from the url. However transformers can be used to create as
|
||||
* many other fields as required.
|
||||
*/
|
||||
@Override
|
||||
public Map<String, Object> nextRow() {
|
||||
if (reader == null) {
|
||||
reader = new BufferedReader((Reader) context.getDataSource().getData(url));
|
||||
}
|
||||
|
||||
String line;
|
||||
|
||||
while ( true ) {
|
||||
// read a line from the input file
|
||||
try {
|
||||
line = reader.readLine();
|
||||
}
|
||||
catch (IOException exp) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Problem reading from input", exp);
|
||||
}
|
||||
|
||||
// end of input
|
||||
if (line == null) {
|
||||
closeResources();
|
||||
return null;
|
||||
}
|
||||
|
||||
// First scan whole line to see if we want it
|
||||
if (acceptLineRegex != null && ! acceptLineRegex.matcher(line).find()) continue;
|
||||
if (skipLineRegex != null && skipLineRegex.matcher(line).find()) continue;
|
||||
// Contruct the 'row' of fields
|
||||
Map<String, Object> row = new HashMap<>();
|
||||
row.put("rawLine", line);
|
||||
return row;
|
||||
}
|
||||
}
|
||||
|
||||
public void closeResources() {
|
||||
if (reader != null) {
|
||||
IOUtils.closeQuietly(reader);
|
||||
}
|
||||
reader= null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
closeResources();
|
||||
super.destroy();
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds the name of entity attribute that will be parsed to obtain
|
||||
* the filename containing the changelist.
|
||||
*/
|
||||
public static final String URL = "url";
|
||||
|
||||
/**
|
||||
* Holds the name of entity attribute that will be parsed to obtain
|
||||
* the pattern to be used when checking to see if a line should
|
||||
* be returned.
|
||||
*/
|
||||
public static final String ACCEPT_LINE_REGEX = "acceptLineRegex";
|
||||
|
||||
/**
|
||||
* Holds the name of entity attribute that will be parsed to obtain
|
||||
* the pattern to be used when checking to see if a line should
|
||||
* be ignored.
|
||||
*/
|
||||
public static final String SKIP_LINE_REGEX = "skipLineRegex";
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A {@link Transformer} implementation which logs messages in a given template format.
|
||||
* <p>
|
||||
* Refer to <a href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class LogTransformer extends Transformer {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
@Override
|
||||
public Object transformRow(Map<String, Object> row, Context ctx) {
|
||||
String expr = ctx.getEntityAttribute(LOG_TEMPLATE);
|
||||
String level = ctx.replaceTokens(ctx.getEntityAttribute(LOG_LEVEL));
|
||||
|
||||
if (expr == null || level == null) return row;
|
||||
|
||||
if ("info".equals(level)) {
|
||||
if (log.isInfoEnabled())
|
||||
log.info(ctx.replaceTokens(expr));
|
||||
} else if ("trace".equals(level)) {
|
||||
if (log.isTraceEnabled())
|
||||
log.trace(ctx.replaceTokens(expr));
|
||||
} else if ("warn".equals(level)) {
|
||||
if (log.isWarnEnabled())
|
||||
log.warn(ctx.replaceTokens(expr));
|
||||
} else if ("error".equals(level)) {
|
||||
if (log.isErrorEnabled())
|
||||
log.error(ctx.replaceTokens(expr));
|
||||
} else if ("debug".equals(level)) {
|
||||
if (log.isDebugEnabled())
|
||||
log.debug(ctx.replaceTokens(expr));
|
||||
}
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
public static final String LOG_TEMPLATE = "logTemplate";
|
||||
public static final String LOG_LEVEL = "logLevel";
|
||||
}
|
|
@ -1,61 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A mock DataSource implementation which can be used for testing.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class MockDataSource extends
|
||||
DataSource<Iterator<Map<String, Object>>> {
|
||||
|
||||
private static Map<String, Iterator<Map<String, Object>>> cache = new HashMap<>();
|
||||
|
||||
public static void setIterator(String query,
|
||||
Iterator<Map<String, Object>> iter) {
|
||||
cache.put(query, iter);
|
||||
}
|
||||
|
||||
public static void clearCache() {
|
||||
cache.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(Context context, Properties initProps) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<Map<String, Object>> getData(String query) {
|
||||
return cache.get(query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
cache.clear();
|
||||
|
||||
}
|
||||
}
|
|
@ -1,134 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
import java.text.NumberFormat;
|
||||
import java.text.ParseException;
|
||||
import java.text.ParsePosition;
|
||||
import java.util.ArrayList;
|
||||
import java.util.IllformedLocaleException;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A {@link Transformer} instance which can extract numbers out of strings. It uses
|
||||
* {@link NumberFormat} class to parse strings and supports
|
||||
* Number, Integer, Currency and Percent styles as supported by
|
||||
* {@link NumberFormat} with configurable locales.
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class NumberFormatTransformer extends Transformer {
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Object transformRow(Map<String, Object> row, Context context) {
|
||||
for (Map<String, String> fld : context.getAllEntityFields()) {
|
||||
String style = context.replaceTokens(fld.get(FORMAT_STYLE));
|
||||
if (style != null) {
|
||||
String column = fld.get(DataImporter.COLUMN);
|
||||
String srcCol = fld.get(RegexTransformer.SRC_COL_NAME);
|
||||
String localeStr = context.replaceTokens(fld.get(LOCALE));
|
||||
if (srcCol == null)
|
||||
srcCol = column;
|
||||
Locale locale = Locale.ROOT;
|
||||
if (localeStr != null) {
|
||||
try {
|
||||
locale = new Locale.Builder().setLanguageTag(localeStr).build();
|
||||
} catch (IllformedLocaleException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Invalid Locale '" + localeStr + "' specified for field: " + fld, e);
|
||||
}
|
||||
}
|
||||
|
||||
Object val = row.get(srcCol);
|
||||
String styleSmall = style.toLowerCase(Locale.ROOT);
|
||||
|
||||
if (val instanceof List) {
|
||||
List<String> inputs = (List) val;
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
List results = new ArrayList();
|
||||
for (String input : inputs) {
|
||||
try {
|
||||
results.add(process(input, styleSmall, locale));
|
||||
} catch (ParseException e) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"Failed to apply NumberFormat on column: " + column, e);
|
||||
}
|
||||
}
|
||||
row.put(column, results);
|
||||
} else {
|
||||
if (val == null || val.toString().trim().equals(""))
|
||||
continue;
|
||||
try {
|
||||
row.put(column, process(val.toString(), styleSmall, locale));
|
||||
} catch (ParseException e) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"Failed to apply NumberFormat on column: " + column, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return row;
|
||||
}
|
||||
|
||||
private Number process(String val, String style, Locale locale) throws ParseException {
|
||||
if (INTEGER.equals(style)) {
|
||||
return parseNumber(val, NumberFormat.getIntegerInstance(locale));
|
||||
} else if (NUMBER.equals(style)) {
|
||||
return parseNumber(val, NumberFormat.getNumberInstance(locale));
|
||||
} else if (CURRENCY.equals(style)) {
|
||||
return parseNumber(val, NumberFormat.getCurrencyInstance(locale));
|
||||
} else if (PERCENT.equals(style)) {
|
||||
return parseNumber(val, NumberFormat.getPercentInstance(locale));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private Number parseNumber(String val, NumberFormat numFormat) throws ParseException {
|
||||
ParsePosition parsePos = new ParsePosition(0);
|
||||
Number num = numFormat.parse(val, parsePos);
|
||||
if (parsePos.getIndex() != val.length()) {
|
||||
throw new ParseException("illegal number format", parsePos.getIndex());
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
public static final String FORMAT_STYLE = "formatStyle";
|
||||
|
||||
public static final String LOCALE = "locale";
|
||||
|
||||
public static final String NUMBER = "number";
|
||||
|
||||
public static final String PERCENT = "percent";
|
||||
|
||||
public static final String INTEGER = "integer";
|
||||
|
||||
public static final String CURRENCY = "currency";
|
||||
}
|
|
@ -1,78 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringWriter;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>An implementation of {@link EntityProcessor} which reads data from a url/file and give out a row which contains one String
|
||||
* value. The name of the field is 'plainText'.
|
||||
*
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class PlainTextEntityProcessor extends EntityProcessorBase {
|
||||
private boolean ended = false;
|
||||
|
||||
@Override
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
ended = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextRow() {
|
||||
if (ended) return null;
|
||||
@SuppressWarnings({"unchecked"})
|
||||
DataSource<Reader> ds = context.getDataSource();
|
||||
String url = context.replaceTokens(context.getEntityAttribute(URL));
|
||||
Reader r = null;
|
||||
try {
|
||||
r = ds.getData(url);
|
||||
} catch (Exception e) {
|
||||
wrapAndThrow(SEVERE, e, "Exception reading url : " + url);
|
||||
}
|
||||
StringWriter sw = new StringWriter();
|
||||
char[] buf = new char[1024];
|
||||
while (true) {
|
||||
int len = 0;
|
||||
try {
|
||||
len = r.read(buf);
|
||||
} catch (IOException e) {
|
||||
IOUtils.closeQuietly(r);
|
||||
wrapAndThrow(SEVERE, e, "Exception reading url : " + url);
|
||||
}
|
||||
if (len <= 0) break;
|
||||
sw.append(new String(buf, 0, len));
|
||||
}
|
||||
Map<String, Object> row = new HashMap<>();
|
||||
row.put(PLAIN_TEXT, sw.toString());
|
||||
ended = true;
|
||||
IOUtils.closeQuietly(r);
|
||||
return row;
|
||||
}
|
||||
|
||||
public static final String PLAIN_TEXT = "plainText";
|
||||
}
|
|
@ -1,200 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A {@link Transformer} implementation which uses Regular Expressions to extract, split
|
||||
* and replace data in fields.
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
* @see Pattern
|
||||
*/
|
||||
public class RegexTransformer extends Transformer {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
@Override
|
||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
||||
public Map<String, Object> transformRow(Map<String, Object> row,
|
||||
Context ctx) {
|
||||
List<Map<String, String>> fields = ctx.getAllEntityFields();
|
||||
for (Map<String, String> field : fields) {
|
||||
String col = field.get(DataImporter.COLUMN);
|
||||
String reStr = ctx.replaceTokens(field.get(REGEX));
|
||||
String splitBy = ctx.replaceTokens(field.get(SPLIT_BY));
|
||||
String replaceWith = ctx.replaceTokens(field.get(REPLACE_WITH));
|
||||
String groupNames = ctx.replaceTokens(field.get(GROUP_NAMES));
|
||||
if (reStr != null || splitBy != null) {
|
||||
String srcColName = field.get(SRC_COL_NAME);
|
||||
if (srcColName == null) {
|
||||
srcColName = col;
|
||||
}
|
||||
Object tmpVal = row.get(srcColName);
|
||||
if (tmpVal == null)
|
||||
continue;
|
||||
|
||||
if (tmpVal instanceof List) {
|
||||
List<String> inputs = (List<String>) tmpVal;
|
||||
List results = new ArrayList();
|
||||
Map<String,List> otherVars= null;
|
||||
for (String input : inputs) {
|
||||
Object o = process(col, reStr, splitBy, replaceWith, input, groupNames);
|
||||
if (o != null){
|
||||
if (o instanceof Map) {
|
||||
Map map = (Map) o;
|
||||
for (Object e : map.entrySet()) {
|
||||
Map.Entry<String ,Object> entry = (Map.Entry<String, Object>) e;
|
||||
List l = results;
|
||||
if(!col.equals(entry.getKey())){
|
||||
if(otherVars == null) otherVars = new HashMap<>();
|
||||
l = otherVars.get(entry.getKey());
|
||||
if(l == null){
|
||||
l = new ArrayList();
|
||||
otherVars.put(entry.getKey(), l);
|
||||
}
|
||||
}
|
||||
if (entry.getValue() instanceof Collection) {
|
||||
l.addAll((Collection) entry.getValue());
|
||||
} else {
|
||||
l.add(entry.getValue());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (o instanceof Collection) {
|
||||
results.addAll((Collection) o);
|
||||
} else {
|
||||
results.add(o);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
row.put(col, results);
|
||||
if(otherVars != null) row.putAll(otherVars);
|
||||
} else {
|
||||
String value = tmpVal.toString();
|
||||
Object o = process(col, reStr, splitBy, replaceWith, value, groupNames);
|
||||
if (o != null){
|
||||
if (o instanceof Map) {
|
||||
row.putAll((Map) o);
|
||||
} else{
|
||||
row.put(col, o);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return row;
|
||||
}
|
||||
|
||||
private Object process(String col, String reStr, String splitBy,
|
||||
String replaceWith, String value, String groupNames) {
|
||||
if (splitBy != null) {
|
||||
return readBySplit(splitBy, value);
|
||||
} else if (replaceWith != null) {
|
||||
Pattern p = getPattern(reStr);
|
||||
Matcher m = p.matcher(value);
|
||||
return m.find() ? m.replaceAll(replaceWith) : value;
|
||||
} else {
|
||||
return readfromRegExp(reStr, value, col, groupNames);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private List<String> readBySplit(String splitBy, String value) {
|
||||
String[] vals = value.split(splitBy);
|
||||
List<String> l = new ArrayList<>(Arrays.asList(vals));
|
||||
return l;
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
||||
private Object readfromRegExp(String reStr, String value, String columnName, String gNames) {
|
||||
String[] groupNames = null;
|
||||
if(gNames != null && gNames.trim().length() >0){
|
||||
groupNames = gNames.split(",");
|
||||
}
|
||||
Pattern regexp = getPattern(reStr);
|
||||
Matcher m = regexp.matcher(value);
|
||||
if (m.find() && m.groupCount() > 0) {
|
||||
if (m.groupCount() > 1) {
|
||||
List l = null;
|
||||
Map<String ,String > map = null;
|
||||
if(groupNames == null){
|
||||
l = new ArrayList();
|
||||
} else {
|
||||
map = new HashMap<>();
|
||||
}
|
||||
for (int i = 1; i <= m.groupCount(); i++) {
|
||||
try {
|
||||
if(l != null){
|
||||
l.add(m.group(i));
|
||||
} else if (map != null ){
|
||||
if(i <= groupNames.length){
|
||||
String nameOfGroup = groupNames[i-1];
|
||||
if(nameOfGroup != null && nameOfGroup.trim().length() >0){
|
||||
map.put(nameOfGroup, m.group(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Parsing failed for field : {}", columnName, e);
|
||||
}
|
||||
}
|
||||
return l == null ? map: l;
|
||||
} else {
|
||||
return m.group(1);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private Pattern getPattern(String reStr) {
|
||||
Pattern result = PATTERN_CACHE.get(reStr);
|
||||
if (result == null) {
|
||||
PATTERN_CACHE.put(reStr, result = Pattern.compile(reStr));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private HashMap<String, Pattern> PATTERN_CACHE = new HashMap<>();
|
||||
|
||||
public static final String REGEX = "regex";
|
||||
|
||||
public static final String REPLACE_WITH = "replaceWith";
|
||||
|
||||
public static final String SPLIT_BY = "splitBy";
|
||||
|
||||
public static final String SRC_COL_NAME = "sourceColName";
|
||||
|
||||
public static final String GROUP_NAMES = "groupNames";
|
||||
|
||||
}
|
|
@ -1,177 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
|
||||
public class RequestInfo {
|
||||
private final String command;
|
||||
private final boolean debug;
|
||||
private final boolean syncMode;
|
||||
private final boolean commit;
|
||||
private final boolean optimize;
|
||||
private final int start;
|
||||
private final long rows;
|
||||
private final boolean clean;
|
||||
private final List<String> entitiesToRun;
|
||||
private final Map<String,Object> rawParams;
|
||||
private final String configFile;
|
||||
private final String dataConfig;
|
||||
private final SolrQueryRequest request;
|
||||
|
||||
//TODO: find a different home for these two...
|
||||
private final ContentStream contentStream;
|
||||
private final DebugInfo debugInfo;
|
||||
|
||||
public RequestInfo(SolrQueryRequest request, Map<String,Object> requestParams, ContentStream stream) {
|
||||
this.request = request;
|
||||
this.contentStream = stream;
|
||||
if (requestParams.containsKey("command")) {
|
||||
command = (String) requestParams.get("command");
|
||||
} else {
|
||||
command = null;
|
||||
}
|
||||
boolean debugMode = StrUtils.parseBool((String) requestParams.get("debug"), false);
|
||||
if (debugMode) {
|
||||
debug = true;
|
||||
debugInfo = new DebugInfo(requestParams);
|
||||
} else {
|
||||
debug = false;
|
||||
debugInfo = null;
|
||||
}
|
||||
if (requestParams.containsKey("clean")) {
|
||||
clean = StrUtils.parseBool( (String) requestParams.get("clean"), true);
|
||||
} else if (DataImporter.DELTA_IMPORT_CMD.equals(command) || DataImporter.IMPORT_CMD.equals(command)) {
|
||||
clean = false;
|
||||
} else {
|
||||
clean = debug ? false : true;
|
||||
}
|
||||
optimize = StrUtils.parseBool((String) requestParams.get("optimize"), false);
|
||||
if(optimize) {
|
||||
commit = true;
|
||||
} else {
|
||||
commit = StrUtils.parseBool( (String) requestParams.get("commit"), (debug ? false : true));
|
||||
}
|
||||
if (requestParams.containsKey("rows")) {
|
||||
rows = Integer.parseInt((String) requestParams.get("rows"));
|
||||
} else {
|
||||
rows = debug ? 10 : Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
if (requestParams.containsKey("start")) {
|
||||
start = Integer.parseInt((String) requestParams.get("start"));
|
||||
} else {
|
||||
start = 0;
|
||||
}
|
||||
syncMode = StrUtils.parseBool((String) requestParams.get("synchronous"), false);
|
||||
|
||||
Object o = requestParams.get("entity");
|
||||
List<String> modifiableEntities = null;
|
||||
if(o != null) {
|
||||
if (o instanceof String) {
|
||||
modifiableEntities = new ArrayList<>();
|
||||
modifiableEntities.add((String) o);
|
||||
} else if (o instanceof List<?>) {
|
||||
@SuppressWarnings("unchecked")
|
||||
List<String> modifiableEntities1 = new ArrayList<>((List<String>) o);
|
||||
modifiableEntities = modifiableEntities1;
|
||||
}
|
||||
entitiesToRun = Collections.unmodifiableList(modifiableEntities);
|
||||
} else {
|
||||
entitiesToRun = null;
|
||||
}
|
||||
String configFileParam = (String) requestParams.get("config");
|
||||
configFile = configFileParam;
|
||||
String dataConfigParam = (String) requestParams.get("dataConfig");
|
||||
if (dataConfigParam != null && dataConfigParam.trim().length() == 0) {
|
||||
// Empty data-config param is not valid, change it to null
|
||||
dataConfigParam = null;
|
||||
}
|
||||
dataConfig = dataConfigParam;
|
||||
this.rawParams = Collections.unmodifiableMap(new HashMap<>(requestParams));
|
||||
}
|
||||
|
||||
public String getCommand() {
|
||||
return command;
|
||||
}
|
||||
|
||||
public boolean isDebug() {
|
||||
return debug;
|
||||
}
|
||||
|
||||
public boolean isSyncMode() {
|
||||
return syncMode;
|
||||
}
|
||||
|
||||
public boolean isCommit() {
|
||||
return commit;
|
||||
}
|
||||
|
||||
public boolean isOptimize() {
|
||||
return optimize;
|
||||
}
|
||||
|
||||
public int getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public long getRows() {
|
||||
return rows;
|
||||
}
|
||||
|
||||
public boolean isClean() {
|
||||
return clean;
|
||||
}
|
||||
/**
|
||||
* Returns null if we are to run all entities, otherwise just run the entities named in the list.
|
||||
*/
|
||||
public List<String> getEntitiesToRun() {
|
||||
return entitiesToRun;
|
||||
}
|
||||
|
||||
public String getDataConfig() {
|
||||
return dataConfig;
|
||||
}
|
||||
|
||||
public Map<String,Object> getRawParams() {
|
||||
return rawParams;
|
||||
}
|
||||
|
||||
public ContentStream getContentStream() {
|
||||
return contentStream;
|
||||
}
|
||||
|
||||
public DebugInfo getDebugInfo() {
|
||||
return debugInfo;
|
||||
}
|
||||
|
||||
public String getConfigFile() {
|
||||
return configFile;
|
||||
}
|
||||
|
||||
public SolrQueryRequest getRequest() {
|
||||
return request;
|
||||
}
|
||||
}
|
|
@ -1,131 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
|
||||
import java.security.AccessControlContext;
|
||||
import java.security.AccessController;
|
||||
import java.security.PrivilegedAction;
|
||||
import java.security.PrivilegedActionException;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.security.ProtectionDomain;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.script.Invocable;
|
||||
import javax.script.ScriptEngine;
|
||||
import javax.script.ScriptEngineManager;
|
||||
import javax.script.ScriptException;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A {@link Transformer} instance capable of executing functions written in scripting
|
||||
* languages as a {@link Transformer} instance.
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class ScriptTransformer extends Transformer {
|
||||
private Invocable engine;
|
||||
private String functionName;
|
||||
|
||||
@Override
|
||||
public Object transformRow(Map<String,Object> row, Context context) {
|
||||
return AccessController.doPrivileged(new PrivilegedAction<Object>() {
|
||||
@Override
|
||||
public Object run() {
|
||||
return transformRowUnsafe(row, context);
|
||||
}
|
||||
}, SCRIPT_SANDBOX);
|
||||
}
|
||||
|
||||
public Object transformRowUnsafe(Map<String, Object> row, Context context) {
|
||||
try {
|
||||
if (engine == null)
|
||||
initEngine(context);
|
||||
if (engine == null)
|
||||
return row;
|
||||
return engine.invokeFunction(functionName, new Object[]{row, context});
|
||||
} catch (DataImportHandlerException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
wrapAndThrow(SEVERE,e, "Error invoking script for entity " + context.getEntityAttribute("name"));
|
||||
}
|
||||
//will not reach here
|
||||
return null;
|
||||
}
|
||||
|
||||
private void initEngine(Context context) {
|
||||
String scriptText = context.getScript();
|
||||
String scriptLang = context.getScriptLanguage();
|
||||
if (scriptText == null) {
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"<script> tag is not present under <dataConfig>");
|
||||
}
|
||||
ScriptEngineManager scriptEngineMgr = new ScriptEngineManager();
|
||||
ScriptEngine scriptEngine = scriptEngineMgr.getEngineByName(scriptLang);
|
||||
if (scriptEngine == null) {
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"Cannot load Script Engine for language: " + scriptLang);
|
||||
}
|
||||
if (scriptEngine instanceof Invocable) {
|
||||
engine = (Invocable) scriptEngine;
|
||||
} else {
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"The installed ScriptEngine for: " + scriptLang
|
||||
+ " does not implement Invocable. Class is "
|
||||
+ scriptEngine.getClass().getName());
|
||||
}
|
||||
try {
|
||||
try {
|
||||
AccessController.doPrivileged(new PrivilegedExceptionAction<Void>() {
|
||||
@Override
|
||||
public Void run() throws ScriptException {
|
||||
scriptEngine.eval(scriptText);
|
||||
return null;
|
||||
}
|
||||
}, SCRIPT_SANDBOX);
|
||||
} catch (PrivilegedActionException e) {
|
||||
throw (ScriptException) e.getException();
|
||||
}
|
||||
} catch (ScriptException e) {
|
||||
wrapAndThrow(SEVERE, e, "'eval' failed with language: " + scriptLang
|
||||
+ " and script: \n" + scriptText);
|
||||
}
|
||||
}
|
||||
|
||||
public void setFunctionName(String methodName) {
|
||||
this.functionName = methodName;
|
||||
}
|
||||
|
||||
public String getFunctionName() {
|
||||
return functionName;
|
||||
}
|
||||
|
||||
// sandbox for script code: zero permissions
|
||||
private static final AccessControlContext SCRIPT_SANDBOX =
|
||||
new AccessControlContext(new ProtectionDomain[] { new ProtectionDomain(null, null) });
|
||||
|
||||
}
|
|
@ -1,247 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.security.AccessControlException;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.IllformedLocaleException;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrPaths;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
/**
|
||||
* <p>
|
||||
* Writes properties using {@link Properties#store} .
|
||||
* The special property "last_index_time" is converted to a formatted date.
|
||||
* Users can configure the location, filename, locale and date format to use.
|
||||
* </p>
|
||||
*/
|
||||
public class SimplePropertiesWriter extends DIHProperties {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
static final String LAST_INDEX_KEY = "last_index_time";
|
||||
|
||||
protected String filename = null;
|
||||
|
||||
protected String configDir = null;
|
||||
|
||||
protected Locale locale = null;
|
||||
|
||||
protected SimpleDateFormat dateFormat = null;
|
||||
|
||||
/**
|
||||
* The locale to use when writing the properties file. Default is {@link Locale#ROOT}
|
||||
*/
|
||||
public static final String LOCALE = "locale";
|
||||
/**
|
||||
* The date format to use when writing values for "last_index_time" to the properties file.
|
||||
* See {@link SimpleDateFormat} for patterns. Default is yyyy-MM-dd HH:mm:ss .
|
||||
*/
|
||||
public static final String DATE_FORMAT = "dateFormat";
|
||||
/**
|
||||
* The directory to save the properties file in. Default is the current core's "config" directory.
|
||||
*/
|
||||
public static final String DIRECTORY = "directory";
|
||||
/**
|
||||
* The filename to save the properties file to. Default is this Handler's name from solrconfig.xml.
|
||||
*/
|
||||
public static final String FILENAME = "filename";
|
||||
|
||||
@Override
|
||||
public void init(DataImporter dataImporter, Map<String, String> params) {
|
||||
if(params.get(FILENAME) != null) {
|
||||
filename = params.get(FILENAME);
|
||||
} else if(dataImporter.getHandlerName()!=null) {
|
||||
filename = dataImporter.getHandlerName() + ".properties";
|
||||
} else {
|
||||
filename = "dataimport.properties";
|
||||
}
|
||||
findDirectory(dataImporter, params);
|
||||
if(params.get(LOCALE) != null) {
|
||||
locale = getLocale(params.get(LOCALE));
|
||||
} else {
|
||||
locale = Locale.ROOT;
|
||||
}
|
||||
if(params.get(DATE_FORMAT) != null) {
|
||||
dateFormat = new SimpleDateFormat(params.get(DATE_FORMAT), locale);
|
||||
} else {
|
||||
dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", locale);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressForbidden(reason = "Usage of outdated locale parsing with Locale#toString() because of backwards compatibility")
|
||||
private Locale getLocale(String name) {
|
||||
if (name == null) {
|
||||
return Locale.ROOT;
|
||||
}
|
||||
for (final Locale l : Locale.getAvailableLocales()) {
|
||||
if(name.equals(l.toString()) || name.equals(l.getDisplayName(Locale.ROOT))) {
|
||||
return locale;
|
||||
}
|
||||
}
|
||||
try {
|
||||
return new Locale.Builder().setLanguageTag(name).build();
|
||||
} catch (IllformedLocaleException ex) {
|
||||
throw new DataImportHandlerException(SEVERE, "Unsupported locale for PropertyWriter: " + name);
|
||||
}
|
||||
}
|
||||
|
||||
protected void findDirectory(DataImporter dataImporter, Map<String, String> params) {
|
||||
if(params.get(DIRECTORY) != null) {
|
||||
configDir = params.get(DIRECTORY);
|
||||
} else {
|
||||
SolrCore core = dataImporter.getCore();
|
||||
if (core == null) {
|
||||
configDir = SolrPaths.locateSolrHome().toString();
|
||||
} else {
|
||||
configDir = core.getResourceLoader().getConfigDir();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private File getPersistFile() {
|
||||
final File filePath;
|
||||
if (new File(filename).isAbsolute() || configDir == null) {
|
||||
filePath = new File(filename);
|
||||
} else {
|
||||
filePath = new File(new File(configDir), filename);
|
||||
}
|
||||
return filePath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isWritable() {
|
||||
File persistFile = getPersistFile();
|
||||
try {
|
||||
return persistFile.exists()
|
||||
? persistFile.canWrite()
|
||||
: persistFile.getParentFile().canWrite();
|
||||
} catch (AccessControlException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String convertDateToString(Date d) {
|
||||
return dateFormat.format(d);
|
||||
}
|
||||
protected Date convertStringToDate(String s) {
|
||||
try {
|
||||
return dateFormat.parse(s);
|
||||
} catch (ParseException e) {
|
||||
throw new DataImportHandlerException(SEVERE, "Value for "
|
||||
+ LAST_INDEX_KEY + " is invalid for date format "
|
||||
+ dateFormat.toLocalizedPattern() + " : " + s);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* {@link DocBuilder} sends the date as an Object because
|
||||
* this class knows how to convert it to a String
|
||||
*/
|
||||
protected Properties mapToProperties(Map<String,Object> propObjs) {
|
||||
Properties p = new Properties();
|
||||
for(Map.Entry<String,Object> entry : propObjs.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
String val = null;
|
||||
String lastKeyPart = key;
|
||||
int lastDotPos = key.lastIndexOf('.');
|
||||
if(lastDotPos!=-1 && key.length() > lastDotPos+1) {
|
||||
lastKeyPart = key.substring(lastDotPos + 1);
|
||||
}
|
||||
if(LAST_INDEX_KEY.equals(lastKeyPart) && entry.getValue() instanceof Date) {
|
||||
val = convertDateToString((Date) entry.getValue());
|
||||
} else {
|
||||
val = entry.getValue().toString();
|
||||
}
|
||||
p.put(key, val);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
/**
|
||||
* We'll send everything back as Strings as this class has
|
||||
* already converted them.
|
||||
*/
|
||||
protected Map<String,Object> propertiesToMap(Properties p) {
|
||||
Map<String,Object> theMap = new HashMap<>();
|
||||
for(Map.Entry<Object,Object> entry : p.entrySet()) {
|
||||
String key = entry.getKey().toString();
|
||||
Object val = entry.getValue().toString();
|
||||
theMap.put(key, val);
|
||||
}
|
||||
return theMap;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void persist(Map<String, Object> propObjs) {
|
||||
Writer propOutput = null;
|
||||
Properties existingProps = mapToProperties(readIndexerProperties());
|
||||
Properties newProps = mapToProperties(propObjs);
|
||||
try {
|
||||
existingProps.putAll(newProps);
|
||||
propOutput = new OutputStreamWriter(new FileOutputStream(getPersistFile()), StandardCharsets.UTF_8);
|
||||
existingProps.store(propOutput, null);
|
||||
log.info("Wrote last indexed time to {}", filename);
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Unable to persist Index Start Time", e);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(propOutput);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> readIndexerProperties() {
|
||||
Properties props = new Properties();
|
||||
InputStream propInput = null;
|
||||
try {
|
||||
String filePath = configDir;
|
||||
if (configDir != null && !configDir.endsWith(File.separator)) {
|
||||
filePath += File.separator;
|
||||
}
|
||||
filePath += filename;
|
||||
propInput = new FileInputStream(filePath);
|
||||
props.load(new InputStreamReader(propInput, StandardCharsets.UTF_8));
|
||||
log.info("Read {}", filename);
|
||||
} catch (Exception e) {
|
||||
log.warn("Unable to read: {}", filename);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(propInput);
|
||||
}
|
||||
return propertiesToMap(props);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,321 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.http.client.HttpClient;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.HttpClientUtil;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder;
|
||||
import org.apache.solr.client.solrj.impl.XMLResponseParser;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.CursorMarkParams;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* An implementation of {@link EntityProcessor} which fetches values from a
|
||||
* separate Solr implementation using the SolrJ client library. Yield a row per
|
||||
* Solr document.
|
||||
* </p>
|
||||
* <p>
|
||||
* Limitations:
|
||||
* All configuration is evaluated at the beginning;
|
||||
* Only one query is walked;
|
||||
* </p>
|
||||
*/
|
||||
public class SolrEntityProcessor extends EntityProcessorBase {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
public static final String SOLR_SERVER = "url";
|
||||
public static final String QUERY = "query";
|
||||
public static final String TIMEOUT = "timeout";
|
||||
|
||||
public static final int TIMEOUT_SECS = 5 * 60; // 5 minutes
|
||||
public static final int ROWS_DEFAULT = 50;
|
||||
|
||||
private SolrClient solrClient = null;
|
||||
private String queryString;
|
||||
private int rows = ROWS_DEFAULT;
|
||||
private String[] filterQueries;
|
||||
private String[] fields;
|
||||
private String requestHandler;// 'qt' param
|
||||
private int timeout = TIMEOUT_SECS;
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
try {
|
||||
solrClient.close();
|
||||
} catch (IOException e) {
|
||||
|
||||
} finally {
|
||||
HttpClientUtil.close(((HttpSolrClient) solrClient).getHttpClient());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method that returns a {@link HttpClient} instance used for interfacing with a source Solr service.
|
||||
* One can override this method to return a differently configured {@link HttpClient} instance.
|
||||
* For example configure https and http authentication.
|
||||
*
|
||||
* @return a {@link HttpClient} instance used for interfacing with a source Solr service
|
||||
*/
|
||||
protected HttpClient getHttpClient() {
|
||||
return HttpClientUtil.createClient(null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void firstInit(Context context) {
|
||||
super.firstInit(context);
|
||||
|
||||
try {
|
||||
String serverPath = context.getResolvedEntityAttribute(SOLR_SERVER);
|
||||
if (serverPath == null) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"SolrEntityProcessor: parameter 'url' is required");
|
||||
}
|
||||
|
||||
HttpClient client = getHttpClient();
|
||||
URL url = new URL(serverPath);
|
||||
// (wt="javabin|xml") default is javabin
|
||||
if ("xml".equals(context.getResolvedEntityAttribute(CommonParams.WT))) {
|
||||
// TODO: it doesn't matter for this impl when passing a client currently, but we should close this!
|
||||
solrClient = new Builder(url.toExternalForm())
|
||||
.withHttpClient(client)
|
||||
.withResponseParser(new XMLResponseParser())
|
||||
.build();
|
||||
log.info("using XMLResponseParser");
|
||||
} else {
|
||||
// TODO: it doesn't matter for this impl when passing a client currently, but we should close this!
|
||||
solrClient = new Builder(url.toExternalForm())
|
||||
.withHttpClient(client)
|
||||
.build();
|
||||
log.info("using BinaryResponseParser");
|
||||
}
|
||||
} catch (MalformedURLException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String,Object> nextRow() {
|
||||
buildIterator();
|
||||
return getNext();
|
||||
}
|
||||
|
||||
/**
|
||||
* The following method changes the rowIterator mutable field. It requires
|
||||
* external synchronization.
|
||||
*/
|
||||
protected void buildIterator() {
|
||||
if (rowIterator != null) {
|
||||
SolrDocumentListIterator documentListIterator = (SolrDocumentListIterator) rowIterator;
|
||||
if (!documentListIterator.hasNext() && documentListIterator.hasMoreRows()) {
|
||||
nextPage();
|
||||
}
|
||||
} else {
|
||||
boolean cursor = Boolean.parseBoolean(context
|
||||
.getResolvedEntityAttribute(CursorMarkParams.CURSOR_MARK_PARAM));
|
||||
rowIterator = !cursor ? new SolrDocumentListIterator(new SolrDocumentList())
|
||||
: new SolrDocumentListCursor(new SolrDocumentList(), CursorMarkParams.CURSOR_MARK_START);
|
||||
nextPage();
|
||||
}
|
||||
}
|
||||
|
||||
protected void nextPage() {
|
||||
((SolrDocumentListIterator)rowIterator).doQuery();
|
||||
}
|
||||
|
||||
class SolrDocumentListCursor extends SolrDocumentListIterator {
|
||||
|
||||
private final String cursorMark;
|
||||
|
||||
public SolrDocumentListCursor(SolrDocumentList solrDocumentList, String cursorMark) {
|
||||
super(solrDocumentList);
|
||||
this.cursorMark = cursorMark;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void passNextPage(SolrQuery solrQuery) {
|
||||
String timeoutAsString = context.getResolvedEntityAttribute(TIMEOUT);
|
||||
if (timeoutAsString != null) {
|
||||
throw new DataImportHandlerException(SEVERE,"cursorMark can't be used with timeout");
|
||||
}
|
||||
|
||||
solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Iterator<Map<String,Object>> createNextPageIterator(QueryResponse response) {
|
||||
return
|
||||
new SolrDocumentListCursor(response.getResults(),
|
||||
response.getNextCursorMark()) ;
|
||||
}
|
||||
}
|
||||
|
||||
class SolrDocumentListIterator implements Iterator<Map<String,Object>> {
|
||||
|
||||
private final int start;
|
||||
private final int size;
|
||||
private final long numFound;
|
||||
private final Iterator<SolrDocument> solrDocumentIterator;
|
||||
|
||||
public SolrDocumentListIterator(SolrDocumentList solrDocumentList) {
|
||||
this.solrDocumentIterator = solrDocumentList.iterator();
|
||||
this.numFound = solrDocumentList.getNumFound();
|
||||
// SolrQuery has the start field of type int while SolrDocumentList of
|
||||
// type long. We are always querying with an int so we can't receive a
|
||||
// long as output. That's the reason why the following cast seems safe
|
||||
this.start = (int) solrDocumentList.getStart();
|
||||
this.size = solrDocumentList.size();
|
||||
}
|
||||
|
||||
protected QueryResponse doQuery() {
|
||||
SolrEntityProcessor.this.queryString = context.getResolvedEntityAttribute(QUERY);
|
||||
if (SolrEntityProcessor.this.queryString == null) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"SolrEntityProcessor: parameter 'query' is required"
|
||||
);
|
||||
}
|
||||
|
||||
String rowsP = context.getResolvedEntityAttribute(CommonParams.ROWS);
|
||||
if (rowsP != null) {
|
||||
rows = Integer.parseInt(rowsP);
|
||||
}
|
||||
|
||||
String sortParam = context.getResolvedEntityAttribute(CommonParams.SORT);
|
||||
|
||||
String fqAsString = context.getResolvedEntityAttribute(CommonParams.FQ);
|
||||
if (fqAsString != null) {
|
||||
SolrEntityProcessor.this.filterQueries = fqAsString.split(",");
|
||||
}
|
||||
|
||||
String fieldsAsString = context.getResolvedEntityAttribute(CommonParams.FL);
|
||||
if (fieldsAsString != null) {
|
||||
SolrEntityProcessor.this.fields = fieldsAsString.split(",");
|
||||
}
|
||||
SolrEntityProcessor.this.requestHandler = context.getResolvedEntityAttribute(CommonParams.QT);
|
||||
|
||||
|
||||
SolrQuery solrQuery = new SolrQuery(queryString);
|
||||
solrQuery.setRows(rows);
|
||||
|
||||
if (sortParam!=null) {
|
||||
solrQuery.setParam(CommonParams.SORT, sortParam);
|
||||
}
|
||||
|
||||
passNextPage(solrQuery);
|
||||
|
||||
if (fields != null) {
|
||||
for (String field : fields) {
|
||||
solrQuery.addField(field);
|
||||
}
|
||||
}
|
||||
solrQuery.setRequestHandler(requestHandler);
|
||||
solrQuery.setFilterQueries(filterQueries);
|
||||
|
||||
|
||||
QueryResponse response = null;
|
||||
try {
|
||||
response = solrClient.query(solrQuery);
|
||||
} catch (SolrServerException | IOException | SolrException e) {
|
||||
if (ABORT.equals(onError)) {
|
||||
wrapAndThrow(SEVERE, e);
|
||||
} else if (SKIP.equals(onError)) {
|
||||
wrapAndThrow(DataImportHandlerException.SKIP_ROW, e);
|
||||
}
|
||||
}
|
||||
|
||||
if (response != null) {
|
||||
SolrEntityProcessor.this.rowIterator = createNextPageIterator(response);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
protected Iterator<Map<String,Object>> createNextPageIterator(QueryResponse response) {
|
||||
return new SolrDocumentListIterator(response.getResults());
|
||||
}
|
||||
|
||||
protected void passNextPage(SolrQuery solrQuery) {
|
||||
String timeoutAsString = context.getResolvedEntityAttribute(TIMEOUT);
|
||||
if (timeoutAsString != null) {
|
||||
SolrEntityProcessor.this.timeout = Integer.parseInt(timeoutAsString);
|
||||
}
|
||||
|
||||
solrQuery.setTimeAllowed(timeout * 1000);
|
||||
|
||||
solrQuery.setStart(getStart() + getSize());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return solrDocumentIterator.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String,Object> next() {
|
||||
SolrDocument solrDocument = solrDocumentIterator.next();
|
||||
|
||||
HashMap<String,Object> map = new HashMap<>();
|
||||
Collection<String> fields = solrDocument.getFieldNames();
|
||||
for (String field : fields) {
|
||||
Object fieldValue = solrDocument.getFieldValue(field);
|
||||
map.put(field, fieldValue);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
public int getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public int getSize() {
|
||||
return size;
|
||||
}
|
||||
|
||||
public boolean hasMoreRows() {
|
||||
return numFound > start + size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.solr.client.solrj.util.ClientUtils;
|
||||
|
||||
public class SolrQueryEscapingEvaluator extends Evaluator {
|
||||
@Override
|
||||
public String evaluate(String expression, Context context) {
|
||||
List<Object> l = parseParams(expression, context.getVariableResolver());
|
||||
if (l.size() != 1) {
|
||||
throw new DataImportHandlerException(SEVERE, "'escapeQueryChars' must have at least one parameter ");
|
||||
}
|
||||
String s = l.get(0).toString();
|
||||
return ClientUtils.escapeQueryChars(s);
|
||||
}
|
||||
}
|
|
@ -1,175 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.UpdateParams;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
import org.apache.solr.update.CommitUpdateCommand;
|
||||
import org.apache.solr.update.DeleteUpdateCommand;
|
||||
import org.apache.solr.update.RollbackUpdateCommand;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
/**
|
||||
* <p> Writes documents to SOLR. </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class SolrWriter extends DIHWriterBase implements DIHWriter {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
public static final String LAST_INDEX_KEY = "last_index_time";
|
||||
|
||||
private final UpdateRequestProcessor processor;
|
||||
private final int commitWithin;
|
||||
|
||||
SolrQueryRequest req;
|
||||
|
||||
public SolrWriter(UpdateRequestProcessor processor, SolrQueryRequest req) {
|
||||
this.processor = processor;
|
||||
this.req = req;
|
||||
commitWithin = (req != null) ? req.getParams().getInt(UpdateParams.COMMIT_WITHIN, -1): -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
try {
|
||||
processor.finish();
|
||||
} catch (IOException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Unable to call finish() on UpdateRequestProcessor", e);
|
||||
} finally {
|
||||
deltaKeys = null;
|
||||
try {
|
||||
processor.close();
|
||||
} catch (IOException e) {
|
||||
SolrException.log(log, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public boolean upload(SolrInputDocument d) {
|
||||
try {
|
||||
AddUpdateCommand command = new AddUpdateCommand(req);
|
||||
command.solrDoc = d;
|
||||
command.commitWithin = commitWithin;
|
||||
processor.processAdd(command);
|
||||
} catch (Exception e) {
|
||||
log.warn("Error creating document : {}", d, e);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteDoc(Object id) {
|
||||
try {
|
||||
log.info("Deleting document: {}", id);
|
||||
DeleteUpdateCommand delCmd = new DeleteUpdateCommand(req);
|
||||
delCmd.setId(id.toString());
|
||||
processor.processDelete(delCmd);
|
||||
} catch (IOException e) {
|
||||
log.error("Exception while deleteing: {}", id, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteByQuery(String query) {
|
||||
try {
|
||||
log.info("Deleting documents from Solr with query: {}", query);
|
||||
DeleteUpdateCommand delCmd = new DeleteUpdateCommand(req);
|
||||
delCmd.query = query;
|
||||
processor.processDelete(delCmd);
|
||||
} catch (IOException e) {
|
||||
log.error("Exception while deleting by query: {}", query, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit(boolean optimize) {
|
||||
try {
|
||||
CommitUpdateCommand commit = new CommitUpdateCommand(req,optimize);
|
||||
processor.processCommit(commit);
|
||||
} catch (Exception e) {
|
||||
log.error("Exception while solr commit.", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rollback() {
|
||||
try {
|
||||
RollbackUpdateCommand rollback = new RollbackUpdateCommand(req);
|
||||
processor.processRollback(rollback);
|
||||
} catch (Exception e) {
|
||||
log.error("Exception during rollback command.", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doDeleteAll() {
|
||||
try {
|
||||
DeleteUpdateCommand deleteCommand = new DeleteUpdateCommand(req);
|
||||
deleteCommand.query = "*:*";
|
||||
processor.processDelete(deleteCommand);
|
||||
} catch (IOException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Exception in full dump while deleting all documents.", e);
|
||||
}
|
||||
}
|
||||
|
||||
static String getResourceAsString(InputStream in) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
|
||||
byte[] buf = new byte[1024];
|
||||
int sz = 0;
|
||||
try {
|
||||
while ((sz = in.read(buf)) != -1) {
|
||||
baos.write(buf, 0, sz);
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
in.close();
|
||||
} catch (Exception e) {
|
||||
|
||||
}
|
||||
}
|
||||
return new String(baos.toByteArray(), StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
static String getDocCount() {
|
||||
if (DocBuilder.INSTANCE.get() != null) {
|
||||
return ""
|
||||
+ (DocBuilder.INSTANCE.get().importStatistics.docCount.get() + 1);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public void init(Context context) {
|
||||
/* NO-OP */
|
||||
}
|
||||
}
|
|
@ -1,238 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
|
||||
public class SortedMapBackedCache implements DIHCache {
|
||||
private SortedMap<Object,List<Map<String,Object>>> theMap = null;
|
||||
private boolean isOpen = false;
|
||||
private boolean isReadOnly = false;
|
||||
String primaryKeyName = null;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void add(Map<String,Object> rec) {
|
||||
checkOpen(true);
|
||||
checkReadOnly();
|
||||
|
||||
if (rec == null || rec.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (primaryKeyName == null) {
|
||||
primaryKeyName = rec.keySet().iterator().next();
|
||||
}
|
||||
|
||||
Object pk = rec.get(primaryKeyName);
|
||||
if (pk instanceof Collection<?>) {
|
||||
Collection<Object> c = (Collection<Object>) pk;
|
||||
if (c.size() != 1) {
|
||||
throw new RuntimeException(
|
||||
"The primary key must have exactly 1 element.");
|
||||
}
|
||||
pk = c.iterator().next();
|
||||
}
|
||||
//Rows with null keys are not added.
|
||||
if(pk==null) {
|
||||
return;
|
||||
}
|
||||
List<Map<String,Object>> thisKeysRecs = theMap.get(pk);
|
||||
if (thisKeysRecs == null) {
|
||||
thisKeysRecs = new ArrayList<>();
|
||||
theMap.put(pk, thisKeysRecs);
|
||||
}
|
||||
thisKeysRecs.add(rec);
|
||||
}
|
||||
|
||||
private void checkOpen(boolean shouldItBe) {
|
||||
if (!isOpen && shouldItBe) {
|
||||
throw new IllegalStateException(
|
||||
"Must call open() before using this cache.");
|
||||
}
|
||||
if (isOpen && !shouldItBe) {
|
||||
throw new IllegalStateException("The cache is already open.");
|
||||
}
|
||||
}
|
||||
|
||||
private void checkReadOnly() {
|
||||
if (isReadOnly) {
|
||||
throw new IllegalStateException("Cache is read-only.");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
isOpen = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void delete(Object key) {
|
||||
checkOpen(true);
|
||||
checkReadOnly();
|
||||
if(key==null) {
|
||||
return;
|
||||
}
|
||||
theMap.remove(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteAll() {
|
||||
deleteAll(false);
|
||||
}
|
||||
|
||||
private void deleteAll(boolean readOnlyOk) {
|
||||
if (!readOnlyOk) {
|
||||
checkReadOnly();
|
||||
}
|
||||
if (theMap != null) {
|
||||
theMap.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
deleteAll(true);
|
||||
theMap = null;
|
||||
isOpen = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() {
|
||||
checkOpen(true);
|
||||
checkReadOnly();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<Map<String,Object>> iterator(Object key) {
|
||||
checkOpen(true);
|
||||
if(key==null) {
|
||||
return null;
|
||||
}
|
||||
if(key instanceof Iterable<?>) {
|
||||
List<Map<String,Object>> vals = new ArrayList<>();
|
||||
Iterator<?> iter = ((Iterable<?>) key).iterator();
|
||||
while(iter.hasNext()) {
|
||||
List<Map<String,Object>> val = theMap.get(iter.next());
|
||||
if(val!=null) {
|
||||
vals.addAll(val);
|
||||
}
|
||||
}
|
||||
if(vals.size()==0) {
|
||||
return null;
|
||||
}
|
||||
return vals.iterator();
|
||||
}
|
||||
List<Map<String,Object>> val = theMap.get(key);
|
||||
if (val == null) {
|
||||
return null;
|
||||
}
|
||||
return val.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<Map<String,Object>> iterator() {
|
||||
return new Iterator<Map<String, Object>>() {
|
||||
private Iterator<Map.Entry<Object,List<Map<String,Object>>>> theMapIter;
|
||||
private List<Map<String,Object>> currentKeyResult = null;
|
||||
private Iterator<Map<String,Object>> currentKeyResultIter = null;
|
||||
|
||||
{
|
||||
theMapIter = theMap.entrySet().iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (currentKeyResultIter != null) {
|
||||
if (currentKeyResultIter.hasNext()) {
|
||||
return true;
|
||||
} else {
|
||||
currentKeyResult = null;
|
||||
currentKeyResultIter = null;
|
||||
}
|
||||
}
|
||||
|
||||
Map.Entry<Object,List<Map<String,Object>>> next = null;
|
||||
if (theMapIter.hasNext()) {
|
||||
next = theMapIter.next();
|
||||
currentKeyResult = next.getValue();
|
||||
currentKeyResultIter = currentKeyResult.iterator();
|
||||
if (currentKeyResultIter.hasNext()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String,Object> next() {
|
||||
if (currentKeyResultIter != null) {
|
||||
if (currentKeyResultIter.hasNext()) {
|
||||
return currentKeyResultIter.next();
|
||||
} else {
|
||||
currentKeyResult = null;
|
||||
currentKeyResultIter = null;
|
||||
}
|
||||
}
|
||||
|
||||
Map.Entry<Object,List<Map<String,Object>>> next = null;
|
||||
if (theMapIter.hasNext()) {
|
||||
next = theMapIter.next();
|
||||
currentKeyResult = next.getValue();
|
||||
currentKeyResultIter = currentKeyResult.iterator();
|
||||
if (currentKeyResultIter.hasNext()) {
|
||||
return currentKeyResultIter.next();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(Context context) {
|
||||
checkOpen(false);
|
||||
isOpen = true;
|
||||
if (theMap == null) {
|
||||
theMap = new TreeMap<>();
|
||||
}
|
||||
|
||||
String pkName = CachePropertyUtil.getAttributeValueAsString(context,
|
||||
DIHCacheSupport.CACHE_PRIMARY_KEY);
|
||||
if (pkName != null) {
|
||||
primaryKeyName = pkName;
|
||||
}
|
||||
isReadOnly = false;
|
||||
String readOnlyStr = CachePropertyUtil.getAttributeValueAsString(context,
|
||||
DIHCacheSupport.CACHE_READ_ONLY);
|
||||
if ("true".equalsIgnoreCase(readOnlyStr)) {
|
||||
isReadOnly = true;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,173 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* An {@link EntityProcessor} instance which provides support for reading from
|
||||
* databases. It is used in conjunction with {@link JdbcDataSource}. This is the default
|
||||
* {@link EntityProcessor} if none is specified explicitly in data-config.xml
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class SqlEntityProcessor extends EntityProcessorBase {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
protected DataSource<Iterator<Map<String, Object>>> dataSource;
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
dataSource = context.getDataSource();
|
||||
}
|
||||
|
||||
protected void initQuery(String q) {
|
||||
try {
|
||||
DataImporter.QUERY_COUNT.get().incrementAndGet();
|
||||
rowIterator = dataSource.getData(q);
|
||||
this.query = q;
|
||||
} catch (DataImportHandlerException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
log.error( "The query failed '{}'", q, e);
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextRow() {
|
||||
if (rowIterator == null) {
|
||||
String q = getQuery();
|
||||
initQuery(context.replaceTokens(q));
|
||||
}
|
||||
return getNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextModifiedRowKey() {
|
||||
if (rowIterator == null) {
|
||||
String deltaQuery = context.getEntityAttribute(DELTA_QUERY);
|
||||
if (deltaQuery == null)
|
||||
return null;
|
||||
initQuery(context.replaceTokens(deltaQuery));
|
||||
}
|
||||
return getNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextDeletedRowKey() {
|
||||
if (rowIterator == null) {
|
||||
String deletedPkQuery = context.getEntityAttribute(DEL_PK_QUERY);
|
||||
if (deletedPkQuery == null)
|
||||
return null;
|
||||
initQuery(context.replaceTokens(deletedPkQuery));
|
||||
}
|
||||
return getNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextModifiedParentRowKey() {
|
||||
if (rowIterator == null) {
|
||||
String parentDeltaQuery = context.getEntityAttribute(PARENT_DELTA_QUERY);
|
||||
if (parentDeltaQuery == null)
|
||||
return null;
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Running parentDeltaQuery for Entity: {}"
|
||||
, context.getEntityAttribute("name"));
|
||||
}
|
||||
initQuery(context.replaceTokens(parentDeltaQuery));
|
||||
}
|
||||
return getNext();
|
||||
}
|
||||
|
||||
public String getQuery() {
|
||||
String queryString = context.getEntityAttribute(QUERY);
|
||||
if (Context.FULL_DUMP.equals(context.currentProcess())) {
|
||||
return queryString;
|
||||
}
|
||||
if (Context.DELTA_DUMP.equals(context.currentProcess())) {
|
||||
String deltaImportQuery = context.getEntityAttribute(DELTA_IMPORT_QUERY);
|
||||
if(deltaImportQuery != null) return deltaImportQuery;
|
||||
}
|
||||
log.warn("'deltaImportQuery' attribute is not specified for entity : {}", entityName);
|
||||
return getDeltaImportQuery(queryString);
|
||||
}
|
||||
|
||||
public String getDeltaImportQuery(String queryString) {
|
||||
StringBuilder sb = new StringBuilder(queryString);
|
||||
if (SELECT_WHERE_PATTERN.matcher(queryString).find()) {
|
||||
sb.append(" and ");
|
||||
} else {
|
||||
sb.append(" where ");
|
||||
}
|
||||
boolean first = true;
|
||||
String[] primaryKeys = context.getEntityAttribute("pk").split(",");
|
||||
for (String primaryKey : primaryKeys) {
|
||||
if (!first) {
|
||||
sb.append(" and ");
|
||||
}
|
||||
first = false;
|
||||
Object val = context.resolve("dataimporter.delta." + primaryKey);
|
||||
if (val == null) {
|
||||
Matcher m = DOT_PATTERN.matcher(primaryKey);
|
||||
if (m.find()) {
|
||||
val = context.resolve("dataimporter.delta." + m.group(1));
|
||||
}
|
||||
}
|
||||
sb.append(primaryKey).append(" = ");
|
||||
if (val instanceof Number) {
|
||||
sb.append(val.toString());
|
||||
} else {
|
||||
sb.append("'").append(val.toString()).append("'");
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static Pattern SELECT_WHERE_PATTERN = Pattern.compile(
|
||||
"^\\s*(select\\b.*?\\b)(where).*", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
public static final String QUERY = "query";
|
||||
|
||||
public static final String DELTA_QUERY = "deltaQuery";
|
||||
|
||||
public static final String DELTA_IMPORT_QUERY = "deltaImportQuery";
|
||||
|
||||
public static final String PARENT_DELTA_QUERY = "parentDeltaQuery";
|
||||
|
||||
public static final String DEL_PK_QUERY = "deletedPkQuery";
|
||||
|
||||
public static final Pattern DOT_PATTERN = Pattern.compile(".*?\\.(.*)$");
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* <p> Escapes values in SQL queries. It escapes the value of the given expression
|
||||
* by replacing all occurrences of single-quotes by two single-quotes and similarily
|
||||
* for double-quotes </p>
|
||||
*/
|
||||
public class SqlEscapingEvaluator extends Evaluator {
|
||||
@Override
|
||||
public String evaluate(String expression, Context context) {
|
||||
List<Object> l = parseParams(expression, context.getVariableResolver());
|
||||
if (l.size() != 1) {
|
||||
throw new DataImportHandlerException(SEVERE, "'escapeSql' must have at least one parameter ");
|
||||
}
|
||||
String s = l.get(0).toString();
|
||||
// escape single quote with two single quotes, double quote
|
||||
// with two doule quotes, and backslash with double backslash.
|
||||
// See: http://dev.mysql.com/doc/refman/4.1/en/mysql-real-escape-string.html
|
||||
return s.replaceAll("'", "''").replaceAll("\"", "\"\"").replaceAll("\\\\", "\\\\\\\\");
|
||||
}
|
||||
}
|
|
@ -1,115 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A {@link Transformer} which can put values into a column by resolving an expression
|
||||
* containing other columns
|
||||
* </p>
|
||||
* <p>
|
||||
* For example:<br>
|
||||
* <field column="name" template="${e.lastName}, ${e.firstName}
|
||||
* ${e.middleName}" /> will produce the name by combining values from
|
||||
* lastName, firstName and middleName fields as given in the template attribute.
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TemplateTransformer extends Transformer {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
private Map<String ,List<String>> templateVsVars = new HashMap<>();
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Object transformRow(Map<String, Object> row, Context context) {
|
||||
|
||||
|
||||
VariableResolver resolver = context.getVariableResolver();
|
||||
// Add current row to the copy of resolver map
|
||||
|
||||
for (Map<String, String> map : context.getAllEntityFields()) {
|
||||
map.entrySet();
|
||||
String expr = map.get(TEMPLATE);
|
||||
if (expr == null)
|
||||
continue;
|
||||
|
||||
String column = map.get(DataImporter.COLUMN);
|
||||
|
||||
// Verify if all variables can be resolved or not
|
||||
boolean resolvable = true;
|
||||
List<String> variables = this.templateVsVars.get(expr);
|
||||
if(variables == null){
|
||||
variables = resolver.getVariables(expr);
|
||||
this.templateVsVars.put(expr, variables);
|
||||
}
|
||||
for (String v : variables) {
|
||||
if (resolver.resolve(v) == null) {
|
||||
log.warn("Unable to resolve variable: {} while parsing expression: {}"
|
||||
,v , expr);
|
||||
resolvable = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!resolvable)
|
||||
continue;
|
||||
if(variables.size() == 1 && expr.startsWith("${") && expr.endsWith("}")){
|
||||
addToRow(column, row, resolver.resolve(variables.get(0)));
|
||||
} else {
|
||||
addToRow(column, row, resolver.replaceTokens(expr));
|
||||
}
|
||||
}
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
private void addToRow(String key, Map<String, Object> row, Object value) {
|
||||
Object prevVal = row.get(key);
|
||||
if (prevVal != null) {
|
||||
if (prevVal instanceof List) {
|
||||
((List) prevVal).add(value);
|
||||
} else {
|
||||
ArrayList<Object> valList = new ArrayList<Object>();
|
||||
valList.add(prevVal);
|
||||
valList.add(value);
|
||||
row.put(key, valList);
|
||||
}
|
||||
} else {
|
||||
row.put(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
public static final String TEMPLATE = "template";
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Use this API to implement a custom transformer for any given entity
|
||||
* </p>
|
||||
* <p>
|
||||
* Implementations of this abstract class must provide a public no-args constructor.
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class Transformer {
|
||||
/**
|
||||
* The input is a row of data and the output has to be a new row.
|
||||
*
|
||||
* @param context The current context
|
||||
* @param row A row of data
|
||||
* @return The changed data. It must be a {@link Map}<{@link String}, {@link Object}> if it returns
|
||||
* only one row or if there are multiple rows to be returned it must
|
||||
* be a {@link java.util.List}<{@link Map}<{@link String}, {@link Object}>>
|
||||
*/
|
||||
public abstract Object transformRow(Map<String, Object> row, Context context);
|
||||
}
|
|
@ -1,154 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Properties;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p> A data source implementation which can be used to read character files using HTTP. </p> <p> Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
|
||||
* details. </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
*
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class URLDataSource extends DataSource<Reader> {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private String baseUrl;
|
||||
|
||||
private String encoding;
|
||||
|
||||
private int connectionTimeout = CONNECTION_TIMEOUT;
|
||||
|
||||
private int readTimeout = READ_TIMEOUT;
|
||||
|
||||
private Context context;
|
||||
|
||||
private Properties initProps;
|
||||
|
||||
public URLDataSource() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(Context context, Properties initProps) {
|
||||
this.context = context;
|
||||
this.initProps = initProps;
|
||||
|
||||
baseUrl = getInitPropWithReplacements(BASE_URL);
|
||||
if (getInitPropWithReplacements(ENCODING) != null)
|
||||
encoding = getInitPropWithReplacements(ENCODING);
|
||||
String cTimeout = getInitPropWithReplacements(CONNECTION_TIMEOUT_FIELD_NAME);
|
||||
String rTimeout = getInitPropWithReplacements(READ_TIMEOUT_FIELD_NAME);
|
||||
if (cTimeout != null) {
|
||||
try {
|
||||
connectionTimeout = Integer.parseInt(cTimeout);
|
||||
} catch (NumberFormatException e) {
|
||||
log.warn("Invalid connection timeout: {}", cTimeout);
|
||||
}
|
||||
}
|
||||
if (rTimeout != null) {
|
||||
try {
|
||||
readTimeout = Integer.parseInt(rTimeout);
|
||||
} catch (NumberFormatException e) {
|
||||
log.warn("Invalid read timeout: {}", rTimeout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader getData(String query) {
|
||||
URL url = null;
|
||||
try {
|
||||
if (URIMETHOD.matcher(query).find()) url = new URL(query);
|
||||
else url = new URL(baseUrl + query);
|
||||
|
||||
log.debug("Accessing URL: {}", url);
|
||||
|
||||
URLConnection conn = url.openConnection();
|
||||
conn.setConnectTimeout(connectionTimeout);
|
||||
conn.setReadTimeout(readTimeout);
|
||||
InputStream in = conn.getInputStream();
|
||||
String enc = encoding;
|
||||
if (enc == null) {
|
||||
String cType = conn.getContentType();
|
||||
if (cType != null) {
|
||||
Matcher m = CHARSET_PATTERN.matcher(cType);
|
||||
if (m.find()) {
|
||||
enc = m.group(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (enc == null)
|
||||
enc = UTF_8;
|
||||
DataImporter.QUERY_COUNT.get().incrementAndGet();
|
||||
return new InputStreamReader(in, enc);
|
||||
} catch (Exception e) {
|
||||
log.error("Exception thrown while getting data", e);
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Exception in invoking url " + url, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
public String getBaseUrl() {
|
||||
return baseUrl;
|
||||
}
|
||||
|
||||
private String getInitPropWithReplacements(String propertyName) {
|
||||
final String expr = initProps.getProperty(propertyName);
|
||||
if (expr == null) {
|
||||
return null;
|
||||
}
|
||||
return context.replaceTokens(expr);
|
||||
}
|
||||
|
||||
static final Pattern URIMETHOD = Pattern.compile("\\w{3,}:/");
|
||||
|
||||
private static final Pattern CHARSET_PATTERN = Pattern.compile(".*?charset=(.*)$", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
public static final String ENCODING = "encoding";
|
||||
|
||||
public static final String BASE_URL = "baseUrl";
|
||||
|
||||
public static final String UTF_8 = StandardCharsets.UTF_8.name();
|
||||
|
||||
public static final String CONNECTION_TIMEOUT_FIELD_NAME = "connectionTimeout";
|
||||
|
||||
public static final String READ_TIMEOUT_FIELD_NAME = "readTimeout";
|
||||
|
||||
public static final int CONNECTION_TIMEOUT = 5000;
|
||||
|
||||
public static final int READ_TIMEOUT = 10000;
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
|
||||
import java.net.URLEncoder;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* <p>Escapes reserved characters in Solr queries</p>
|
||||
*
|
||||
* @see org.apache.solr.client.solrj.util.ClientUtils#escapeQueryChars(String)
|
||||
*/
|
||||
public class UrlEvaluator extends Evaluator {
|
||||
@Override
|
||||
public String evaluate(String expression, Context context) {
|
||||
List<Object> l = parseParams(expression, context.getVariableResolver());
|
||||
if (l.size() != 1) {
|
||||
throw new DataImportHandlerException(SEVERE, "'encodeUrl' must have at least one parameter ");
|
||||
}
|
||||
String s = l.get(0).toString();
|
||||
|
||||
try {
|
||||
return URLEncoder.encode(s.toString(), "UTF-8");
|
||||
} catch (Exception e) {
|
||||
wrapAndThrow(SEVERE, e, "Unable to encode expression: " + expression + " with value: " + s);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,211 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.WeakHashMap;
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.solr.common.util.Cache;
|
||||
import org.apache.solr.common.util.MapBackedCache;
|
||||
import org.apache.solr.update.processor.TemplateUpdateProcessorFactory;
|
||||
|
||||
import static org.apache.solr.update.processor.TemplateUpdateProcessorFactory.Resolved;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A set of nested maps that can resolve variables by namespaces. Variables are
|
||||
* enclosed with a dollar sign then an opening curly brace, ending with a
|
||||
* closing curly brace. Namespaces are delimited with '.' (period).
|
||||
* </p>
|
||||
* <p>
|
||||
* This class also has special logic to resolve evaluator calls by recognizing
|
||||
* the reserved function namespace: dataimporter.functions.xxx
|
||||
* </p>
|
||||
* <p>
|
||||
* This class caches strings that have already been resolved from the current
|
||||
* dih import.
|
||||
* </p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class VariableResolver {
|
||||
|
||||
private static final Pattern DOT_PATTERN = Pattern.compile("[.]");
|
||||
private static final Pattern EVALUATOR_FORMAT_PATTERN = Pattern
|
||||
.compile("^(\\w*?)\\((.*?)\\)$");
|
||||
private Map<String,Object> rootNamespace;
|
||||
private Map<String,Evaluator> evaluators;
|
||||
private Cache<String,Resolved> cache = new MapBackedCache<>(new WeakHashMap<>());
|
||||
private Function<String,Object> fun = this::resolve;
|
||||
|
||||
public static final String FUNCTIONS_NAMESPACE = "dataimporter.functions.";
|
||||
public static final String FUNCTIONS_NAMESPACE_SHORT = "dih.functions.";
|
||||
|
||||
public VariableResolver() {
|
||||
rootNamespace = new HashMap<>();
|
||||
}
|
||||
|
||||
public VariableResolver(Properties defaults) {
|
||||
rootNamespace = new HashMap<>();
|
||||
for (Map.Entry<Object,Object> entry : defaults.entrySet()) {
|
||||
rootNamespace.put(entry.getKey().toString(), entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
public VariableResolver(Map<String,Object> defaults) {
|
||||
rootNamespace = new HashMap<>(defaults);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves a given value with a name
|
||||
*
|
||||
* @param name
|
||||
* the String to be resolved
|
||||
* @return an Object which is the result of evaluation of given name
|
||||
*/
|
||||
public Object resolve(String name) {
|
||||
Object r = null;
|
||||
if (name != null) {
|
||||
String[] nameParts = DOT_PATTERN.split(name);
|
||||
CurrentLevel cr = currentLevelMap(nameParts,
|
||||
rootNamespace, false);
|
||||
Map<String,Object> currentLevel = cr.map;
|
||||
r = currentLevel.get(nameParts[nameParts.length - 1]);
|
||||
if (r == null && name.startsWith(FUNCTIONS_NAMESPACE)
|
||||
&& name.length() > FUNCTIONS_NAMESPACE.length()) {
|
||||
return resolveEvaluator(FUNCTIONS_NAMESPACE, name);
|
||||
}
|
||||
if (r == null && name.startsWith(FUNCTIONS_NAMESPACE_SHORT)
|
||||
&& name.length() > FUNCTIONS_NAMESPACE_SHORT.length()) {
|
||||
return resolveEvaluator(FUNCTIONS_NAMESPACE_SHORT, name);
|
||||
}
|
||||
if (r == null) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for(int i=cr.level ; i<nameParts.length ; i++) {
|
||||
if(sb.length()>0) {
|
||||
sb.append(".");
|
||||
}
|
||||
sb.append(nameParts[i]);
|
||||
}
|
||||
r = cr.map.get(sb.toString());
|
||||
}
|
||||
if (r == null) {
|
||||
r = System.getProperty(name);
|
||||
}
|
||||
}
|
||||
return r == null ? "" : r;
|
||||
}
|
||||
|
||||
private Object resolveEvaluator(String namespace, String name) {
|
||||
if (evaluators == null) {
|
||||
return "";
|
||||
}
|
||||
Matcher m = EVALUATOR_FORMAT_PATTERN.matcher(name
|
||||
.substring(namespace.length()));
|
||||
if (m.find()) {
|
||||
String fname = m.group(1);
|
||||
Evaluator evaluator = evaluators.get(fname);
|
||||
if (evaluator == null) return "";
|
||||
ContextImpl ctx = new ContextImpl(null, this, null, null, null, null,
|
||||
null);
|
||||
String g2 = m.group(2);
|
||||
return evaluator.evaluate(g2, ctx);
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a String with place holders, replace them with the value tokens.
|
||||
*
|
||||
* @return the string with the placeholders replaced with their values
|
||||
*/
|
||||
public String replaceTokens(String template) {
|
||||
return TemplateUpdateProcessorFactory.replaceTokens(template, cache, fun, TemplateUpdateProcessorFactory.DOLLAR_BRACES_PLACEHOLDER_PATTERN);
|
||||
}
|
||||
public void addNamespace(String name, Map<String,Object> newMap) {
|
||||
if (newMap != null) {
|
||||
if (name != null) {
|
||||
String[] nameParts = DOT_PATTERN.split(name);
|
||||
Map<String,Object> nameResolveLevel = currentLevelMap(nameParts,
|
||||
rootNamespace, false).map;
|
||||
nameResolveLevel.put(nameParts[nameParts.length - 1], newMap);
|
||||
} else {
|
||||
for (Map.Entry<String,Object> entry : newMap.entrySet()) {
|
||||
String[] keyParts = DOT_PATTERN.split(entry.getKey());
|
||||
Map<String,Object> currentLevel = rootNamespace;
|
||||
currentLevel = currentLevelMap(keyParts, currentLevel, false).map;
|
||||
currentLevel.put(keyParts[keyParts.length - 1], entry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> getVariables(String expr) {
|
||||
return TemplateUpdateProcessorFactory.getVariables(expr, cache, TemplateUpdateProcessorFactory.DOLLAR_BRACES_PLACEHOLDER_PATTERN);
|
||||
}
|
||||
|
||||
static class CurrentLevel {
|
||||
final Map<String,Object> map;
|
||||
final int level;
|
||||
CurrentLevel(int level, Map<String,Object> map) {
|
||||
this.level = level;
|
||||
this.map = map;
|
||||
}
|
||||
}
|
||||
|
||||
private CurrentLevel currentLevelMap(String[] keyParts,
|
||||
Map<String,Object> currentLevel, boolean includeLastLevel) {
|
||||
int j = includeLastLevel ? keyParts.length : keyParts.length - 1;
|
||||
for (int i = 0; i < j; i++) {
|
||||
Object o = currentLevel.get(keyParts[i]);
|
||||
if (o == null) {
|
||||
if(i == j-1) {
|
||||
Map<String,Object> nextLevel = new HashMap<>();
|
||||
currentLevel.put(keyParts[i], nextLevel);
|
||||
currentLevel = nextLevel;
|
||||
} else {
|
||||
return new CurrentLevel(i, currentLevel);
|
||||
}
|
||||
} else if (o instanceof Map<?,?>) {
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String,Object> nextLevel = (Map<String,Object>) o;
|
||||
currentLevel = nextLevel;
|
||||
} else {
|
||||
throw new AssertionError(
|
||||
"Non-leaf nodes should be of type java.util.Map");
|
||||
}
|
||||
}
|
||||
return new CurrentLevel(j-1, currentLevel);
|
||||
}
|
||||
|
||||
public void removeNamespace(String name) {
|
||||
rootNamespace.remove(name);
|
||||
}
|
||||
|
||||
public void setEvaluators(Map<String,Evaluator> evaluators) {
|
||||
this.evaluators = evaluators;
|
||||
}
|
||||
}
|
|
@ -1,555 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.solr.util.SystemIdResolver;
|
||||
import org.apache.solr.common.util.XMLErrorLogger;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import javax.xml.transform.TransformerException;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
import javax.xml.transform.stream.StreamSource;
|
||||
import java.io.CharArrayReader;
|
||||
import java.io.CharArrayWriter;
|
||||
import java.io.Reader;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
/**
|
||||
* <p> An implementation of {@link EntityProcessor} which uses a streaming xpath parser to extract values out of XML documents.
|
||||
* It is typically used in conjunction with {@link URLDataSource} or {@link FileDataSource}. </p> <p> Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
|
||||
* details. </p>
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
*
|
||||
* @see XPathRecordReader
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class XPathEntityProcessor extends EntityProcessorBase {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
private static final XMLErrorLogger xmllog = new XMLErrorLogger(log);
|
||||
|
||||
private static final Map<String, Object> END_MARKER = new HashMap<>();
|
||||
|
||||
protected List<String> placeHolderVariables;
|
||||
|
||||
protected List<String> commonFields;
|
||||
|
||||
private String pk;
|
||||
|
||||
private XPathRecordReader xpathReader;
|
||||
|
||||
protected DataSource<Reader> dataSource;
|
||||
|
||||
protected javax.xml.transform.Transformer xslTransformer;
|
||||
|
||||
protected boolean useSolrAddXml = false;
|
||||
|
||||
protected boolean streamRows = false;
|
||||
|
||||
// Amount of time to block reading/writing to queue when streaming
|
||||
protected int blockingQueueTimeOut = 10;
|
||||
|
||||
// Units for pumpTimeOut
|
||||
protected TimeUnit blockingQueueTimeOutUnits = TimeUnit.SECONDS;
|
||||
|
||||
// Number of rows to queue for asynchronous processing
|
||||
protected int blockingQueueSize = 1000;
|
||||
|
||||
protected Thread publisherThread;
|
||||
|
||||
protected boolean reinitXPathReader = true;
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
if (reinitXPathReader)
|
||||
initXpathReader(context.getVariableResolver());
|
||||
pk = context.getEntityAttribute("pk");
|
||||
dataSource = context.getDataSource();
|
||||
rowIterator = null;
|
||||
|
||||
}
|
||||
|
||||
private void initXpathReader(VariableResolver resolver) {
|
||||
reinitXPathReader = false;
|
||||
useSolrAddXml = Boolean.parseBoolean(context
|
||||
.getEntityAttribute(USE_SOLR_ADD_SCHEMA));
|
||||
streamRows = Boolean.parseBoolean(context
|
||||
.getEntityAttribute(STREAM));
|
||||
if (context.getResolvedEntityAttribute("batchSize") != null) {
|
||||
blockingQueueSize = Integer.parseInt(context.getEntityAttribute("batchSize"));
|
||||
}
|
||||
if (context.getResolvedEntityAttribute("readTimeOut") != null) {
|
||||
blockingQueueTimeOut = Integer.parseInt(context.getEntityAttribute("readTimeOut"));
|
||||
}
|
||||
String xslt = context.getEntityAttribute(XSL);
|
||||
if (xslt != null) {
|
||||
xslt = context.replaceTokens(xslt);
|
||||
try {
|
||||
// create an instance of TransformerFactory
|
||||
TransformerFactory transFact = TransformerFactory.newInstance();
|
||||
final SolrCore core = context.getSolrCore();
|
||||
final StreamSource xsltSource;
|
||||
if (core != null) {
|
||||
final ResourceLoader loader = core.getResourceLoader();
|
||||
transFact.setURIResolver(new SystemIdResolver(loader).asURIResolver());
|
||||
xsltSource = new StreamSource(loader.openResource(xslt),
|
||||
SystemIdResolver.createSystemIdFromResourceName(xslt));
|
||||
} else {
|
||||
// fallback for tests
|
||||
xsltSource = new StreamSource(xslt);
|
||||
}
|
||||
transFact.setErrorListener(xmllog);
|
||||
try {
|
||||
xslTransformer = transFact.newTransformer(xsltSource);
|
||||
} finally {
|
||||
// some XML parsers are broken and don't close the byte stream (but they should according to spec)
|
||||
IOUtils.closeQuietly(xsltSource.getInputStream());
|
||||
}
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Using xslTransformer: {}", xslTransformer.getClass().getName());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"Error initializing XSL ", e);
|
||||
}
|
||||
}
|
||||
|
||||
if (useSolrAddXml) {
|
||||
// Support solr add documents
|
||||
xpathReader = new XPathRecordReader("/add/doc");
|
||||
xpathReader.addField("name", "/add/doc/field/@name", true);
|
||||
xpathReader.addField("value", "/add/doc/field", true);
|
||||
} else {
|
||||
String forEachXpath = context.getResolvedEntityAttribute(FOR_EACH);
|
||||
if (forEachXpath == null)
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"Entity : " + context.getEntityAttribute("name")
|
||||
+ " must have a 'forEach' attribute");
|
||||
if (forEachXpath.equals(context.getEntityAttribute(FOR_EACH))) reinitXPathReader = true;
|
||||
|
||||
try {
|
||||
xpathReader = new XPathRecordReader(forEachXpath);
|
||||
for (Map<String, String> field : context.getAllEntityFields()) {
|
||||
if (field.get(XPATH) == null)
|
||||
continue;
|
||||
int flags = 0;
|
||||
if ("true".equals(field.get("flatten"))) {
|
||||
flags = XPathRecordReader.FLATTEN;
|
||||
}
|
||||
String xpath = field.get(XPATH);
|
||||
xpath = context.replaceTokens(xpath);
|
||||
//!xpath.equals(field.get(XPATH) means the field xpath has a template
|
||||
//in that case ensure that the XPathRecordReader is reinitialized
|
||||
//for each xml
|
||||
if (!xpath.equals(field.get(XPATH)) && !context.isRootEntity()) reinitXPathReader = true;
|
||||
xpathReader.addField(field.get(DataImporter.COLUMN),
|
||||
xpath,
|
||||
Boolean.parseBoolean(field.get(DataImporter.MULTI_VALUED)),
|
||||
flags);
|
||||
}
|
||||
} catch (RuntimeException e) {
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"Exception while reading xpaths for fields", e);
|
||||
}
|
||||
}
|
||||
String url = context.getEntityAttribute(URL);
|
||||
List<String> l = url == null ? Collections.emptyList() : resolver.getVariables(url);
|
||||
for (String s : l) {
|
||||
if (s.startsWith(entityName + ".")) {
|
||||
if (placeHolderVariables == null)
|
||||
placeHolderVariables = new ArrayList<>();
|
||||
placeHolderVariables.add(s.substring(entityName.length() + 1));
|
||||
}
|
||||
}
|
||||
for (Map<String, String> fld : context.getAllEntityFields()) {
|
||||
if (fld.get(COMMON_FIELD) != null && "true".equals(fld.get(COMMON_FIELD))) {
|
||||
if (commonFields == null)
|
||||
commonFields = new ArrayList<>();
|
||||
commonFields.add(fld.get(DataImporter.COLUMN));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> nextRow() {
|
||||
Map<String, Object> result;
|
||||
|
||||
if (!context.isRootEntity())
|
||||
return fetchNextRow();
|
||||
|
||||
while (true) {
|
||||
result = fetchNextRow();
|
||||
|
||||
if (result == null)
|
||||
return null;
|
||||
|
||||
if (pk == null || result.get(pk) != null)
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void postTransform(Map<String, Object> r) {
|
||||
readUsefulVars(r);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Map<String, Object> fetchNextRow() {
|
||||
Map<String, Object> r = null;
|
||||
while (true) {
|
||||
if (rowIterator == null)
|
||||
initQuery(context.replaceTokens(context.getEntityAttribute(URL)));
|
||||
r = getNext();
|
||||
if (r == null) {
|
||||
Object hasMore = context.getSessionAttribute(HAS_MORE, Context.SCOPE_ENTITY);
|
||||
try {
|
||||
if ("true".equals(hasMore) || Boolean.TRUE.equals(hasMore)) {
|
||||
String url = (String) context.getSessionAttribute(NEXT_URL, Context.SCOPE_ENTITY);
|
||||
if (url == null)
|
||||
url = context.getEntityAttribute(URL);
|
||||
addNamespace();
|
||||
initQuery(context.replaceTokens(url));
|
||||
r = getNext();
|
||||
if (r == null)
|
||||
return null;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
} finally {
|
||||
context.setSessionAttribute(HAS_MORE,null,Context.SCOPE_ENTITY);
|
||||
context.setSessionAttribute(NEXT_URL,null,Context.SCOPE_ENTITY);
|
||||
}
|
||||
}
|
||||
addCommonFields(r);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
private void addNamespace() {
|
||||
Map<String, Object> namespace = new HashMap<>();
|
||||
Set<String> allNames = new HashSet<>();
|
||||
if (commonFields != null) allNames.addAll(commonFields);
|
||||
if (placeHolderVariables != null) allNames.addAll(placeHolderVariables);
|
||||
if(allNames.isEmpty()) return;
|
||||
|
||||
for (String name : allNames) {
|
||||
Object val = context.getSessionAttribute(name, Context.SCOPE_ENTITY);
|
||||
if (val != null) namespace.put(name, val);
|
||||
}
|
||||
context.getVariableResolver().addNamespace(entityName, namespace);
|
||||
}
|
||||
|
||||
private void addCommonFields(Map<String, Object> r) {
|
||||
if(commonFields != null){
|
||||
for (String commonField : commonFields) {
|
||||
if(r.get(commonField) == null) {
|
||||
Object val = context.getSessionAttribute(commonField, Context.SCOPE_ENTITY);
|
||||
if(val != null) r.put(commonField, val);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
private void initQuery(String s) {
|
||||
Reader data = null;
|
||||
try {
|
||||
final List<Map<String, Object>> rows = new ArrayList<>();
|
||||
try {
|
||||
data = dataSource.getData(s);
|
||||
} catch (Exception e) {
|
||||
if (ABORT.equals(onError)) {
|
||||
wrapAndThrow(SEVERE, e);
|
||||
} else if (SKIP.equals(onError)) {
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("Skipping url : {}", s, e);
|
||||
}
|
||||
wrapAndThrow(DataImportHandlerException.SKIP, e);
|
||||
} else {
|
||||
log.warn("Failed for url : {}", s, e);
|
||||
rowIterator = Collections.EMPTY_LIST.iterator();
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (xslTransformer != null) {
|
||||
try {
|
||||
SimpleCharArrayReader caw = new SimpleCharArrayReader();
|
||||
xslTransformer.transform(new StreamSource(data),
|
||||
new StreamResult(caw));
|
||||
data = caw.getReader();
|
||||
} catch (TransformerException e) {
|
||||
if (ABORT.equals(onError)) {
|
||||
wrapAndThrow(SEVERE, e, "Exception in applying XSL Transformation");
|
||||
} else if (SKIP.equals(onError)) {
|
||||
wrapAndThrow(DataImportHandlerException.SKIP, e);
|
||||
} else {
|
||||
log.warn("Failed for url : {}", s, e);
|
||||
rowIterator = Collections.EMPTY_LIST.iterator();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (streamRows) {
|
||||
rowIterator = getRowIterator(data, s);
|
||||
} else {
|
||||
try {
|
||||
xpathReader.streamRecords(data, (record, xpath) -> rows.add(readRow(record, xpath)));
|
||||
} catch (Exception e) {
|
||||
String msg = "Parsing failed for xml, url:" + s + " rows processed:" + rows.size();
|
||||
if (rows.size() > 0) msg += " last row: " + rows.get(rows.size() - 1);
|
||||
if (ABORT.equals(onError)) {
|
||||
wrapAndThrow(SEVERE, e, msg);
|
||||
} else if (SKIP.equals(onError)) {
|
||||
log.warn(msg, e);
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put(DocBuilder.SKIP_DOC, Boolean.TRUE);
|
||||
rows.add(map);
|
||||
} else if (CONTINUE.equals(onError)) {
|
||||
log.warn(msg, e);
|
||||
}
|
||||
}
|
||||
rowIterator = rows.iterator();
|
||||
}
|
||||
} finally {
|
||||
if (!streamRows) {
|
||||
closeIt(data);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private void closeIt(Reader data) {
|
||||
try {
|
||||
data.close();
|
||||
} catch (Exception e) { /* Ignore */
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
protected Map<String, Object> readRow(Map<String, Object> record, String xpath) {
|
||||
if (useSolrAddXml) {
|
||||
List<String> names = (List<String>) record.get("name");
|
||||
List<String> values = (List<String>) record.get("value");
|
||||
Map<String, Object> row = new HashMap<>();
|
||||
for (int i = 0; i < names.size() && i < values.size(); i++) {
|
||||
if (row.containsKey(names.get(i))) {
|
||||
Object existing = row.get(names.get(i));
|
||||
if (existing instanceof List) {
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
List list = (List) existing;
|
||||
list.add(values.get(i));
|
||||
} else {
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
List list = new ArrayList();
|
||||
list.add(existing);
|
||||
list.add(values.get(i));
|
||||
row.put(names.get(i), list);
|
||||
}
|
||||
} else {
|
||||
row.put(names.get(i), values.get(i));
|
||||
}
|
||||
}
|
||||
return row;
|
||||
} else {
|
||||
record.put(XPATH_FIELD_NAME, xpath);
|
||||
return record;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static class SimpleCharArrayReader extends CharArrayWriter {
|
||||
public Reader getReader() {
|
||||
return new CharArrayReader(super.buf, 0, super.count);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Map<String, Object> readUsefulVars(Map<String, Object> r) {
|
||||
Object val = r.get(HAS_MORE);
|
||||
if (val != null)
|
||||
context.setSessionAttribute(HAS_MORE, val,Context.SCOPE_ENTITY);
|
||||
val = r.get(NEXT_URL);
|
||||
if (val != null)
|
||||
context.setSessionAttribute(NEXT_URL, val,Context.SCOPE_ENTITY);
|
||||
if (placeHolderVariables != null) {
|
||||
for (String s : placeHolderVariables) {
|
||||
val = r.get(s);
|
||||
context.setSessionAttribute(s, val,Context.SCOPE_ENTITY);
|
||||
}
|
||||
}
|
||||
if (commonFields != null) {
|
||||
for (String s : commonFields) {
|
||||
Object commonVal = r.get(s);
|
||||
if (commonVal != null) {
|
||||
context.setSessionAttribute(s, commonVal,Context.SCOPE_ENTITY);
|
||||
}
|
||||
}
|
||||
}
|
||||
return r;
|
||||
|
||||
}
|
||||
|
||||
private Iterator<Map<String, Object>> getRowIterator(final Reader data, final String s) {
|
||||
//nothing atomic about it. I just needed a StongReference
|
||||
final AtomicReference<Exception> exp = new AtomicReference<>();
|
||||
final BlockingQueue<Map<String, Object>> blockingQueue = new ArrayBlockingQueue<>(blockingQueueSize);
|
||||
final AtomicBoolean isEnd = new AtomicBoolean(false);
|
||||
final AtomicBoolean throwExp = new AtomicBoolean(true);
|
||||
publisherThread = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
xpathReader.streamRecords(data, (record, xpath) -> {
|
||||
if (isEnd.get()) {
|
||||
throwExp.set(false);
|
||||
//To end the streaming . otherwise the parsing will go on forever
|
||||
//though consumer has gone away
|
||||
throw new RuntimeException("BREAK");
|
||||
}
|
||||
Map<String, Object> row;
|
||||
try {
|
||||
row = readRow(record, xpath);
|
||||
} catch (Exception e) {
|
||||
isEnd.set(true);
|
||||
return;
|
||||
}
|
||||
offer(row);
|
||||
});
|
||||
} catch (Exception e) {
|
||||
if(throwExp.get()) exp.set(e);
|
||||
} finally {
|
||||
closeIt(data);
|
||||
if (!isEnd.get()) {
|
||||
offer(END_MARKER);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void offer(Map<String, Object> row) {
|
||||
try {
|
||||
while (!blockingQueue.offer(row, blockingQueueTimeOut, blockingQueueTimeOutUnits)) {
|
||||
if (isEnd.get()) return;
|
||||
log.debug("Timeout elapsed writing records. Perhaps buffer size should be increased.");
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
return;
|
||||
} finally {
|
||||
synchronized (this) {
|
||||
notifyAll();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
publisherThread.start();
|
||||
|
||||
return new Iterator<Map<String, Object>>() {
|
||||
private Map<String, Object> lastRow;
|
||||
int count = 0;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return !isEnd.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> next() {
|
||||
Map<String, Object> row;
|
||||
|
||||
do {
|
||||
try {
|
||||
row = blockingQueue.poll(blockingQueueTimeOut, blockingQueueTimeOutUnits);
|
||||
if (row == null) {
|
||||
log.debug("Timeout elapsed reading records.");
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
log.debug("Caught InterruptedException while waiting for row. Aborting.");
|
||||
isEnd.set(true);
|
||||
return null;
|
||||
}
|
||||
} while (row == null);
|
||||
|
||||
if (row == END_MARKER) {
|
||||
isEnd.set(true);
|
||||
if (exp.get() != null) {
|
||||
String msg = "Parsing failed for xml, url:" + s + " rows processed in this xml:" + count;
|
||||
if (lastRow != null) msg += " last row in this xml:" + lastRow;
|
||||
if (ABORT.equals(onError)) {
|
||||
wrapAndThrow(SEVERE, exp.get(), msg);
|
||||
} else if (SKIP.equals(onError)) {
|
||||
wrapAndThrow(DataImportHandlerException.SKIP, exp.get());
|
||||
} else {
|
||||
log.warn(msg, exp.get());
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
count++;
|
||||
return lastRow = row;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
/*no op*/
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
public static final String URL = "url";
|
||||
|
||||
public static final String HAS_MORE = "$hasMore";
|
||||
|
||||
public static final String NEXT_URL = "$nextUrl";
|
||||
|
||||
public static final String XPATH_FIELD_NAME = "$forEach";
|
||||
|
||||
public static final String FOR_EACH = "forEach";
|
||||
|
||||
public static final String XPATH = "xpath";
|
||||
|
||||
public static final String COMMON_FIELD = "commonField";
|
||||
|
||||
public static final String USE_SOLR_ADD_SCHEMA = "useSolrAddSchema";
|
||||
|
||||
public static final String XSL = "xsl";
|
||||
|
||||
public static final String STREAM = "stream";
|
||||
|
||||
}
|
|
@ -1,670 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.common.util.XMLErrorLogger;
|
||||
import org.apache.solr.common.EmptyEntityResolver;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import static javax.xml.stream.XMLStreamConstants.*;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A streaming xpath parser which uses StAX for XML parsing. It supports only
|
||||
* a subset of xpath syntax.
|
||||
* </p><pre>
|
||||
* /a/b/subject[@qualifier='fullTitle']
|
||||
* /a/b/subject[@qualifier=]/subtag
|
||||
* /a/b/subject/@qualifier
|
||||
* //a
|
||||
* //a/b...
|
||||
* /a//b
|
||||
* /a//b...
|
||||
* /a/b/c
|
||||
* </pre>
|
||||
* A record is a Map<String,Object> . The key is the provided name
|
||||
* and the value is a String or a List<String>
|
||||
*
|
||||
* This class is thread-safe for parsing xml. But adding fields is not
|
||||
* thread-safe. The recommended usage is to addField() in one thread and
|
||||
* then share the instance across threads.
|
||||
* <p>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class XPathRecordReader {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
private static final XMLErrorLogger XMLLOG = new XMLErrorLogger(log);
|
||||
|
||||
private Node rootNode = new Node("/", null);
|
||||
|
||||
/**
|
||||
* The FLATTEN flag indicates that all text and cdata under a specific
|
||||
* tag should be recursivly fetched and appended to the current Node's
|
||||
* value.
|
||||
*/
|
||||
public static final int FLATTEN = 1;
|
||||
|
||||
/**
|
||||
* A constructor called with a '|' separated list of Xpath expressions
|
||||
* which define sub sections of the XML stream that are to be emitted as
|
||||
* separate records.
|
||||
*
|
||||
* @param forEachXpath The XPATH for which a record is emitted. Once the
|
||||
* xpath tag is encountered, the Node.parse method starts collecting wanted
|
||||
* fields and at the close of the tag, a record is emitted containing all
|
||||
* fields collected since the tag start. Once
|
||||
* emitted the collected fields are cleared. Any fields collected in the
|
||||
* parent tag or above will also be included in the record, but these are
|
||||
* not cleared after emitting the record.
|
||||
*
|
||||
* It uses the ' | ' syntax of XPATH to pass in multiple xpaths.
|
||||
*/
|
||||
public XPathRecordReader(String forEachXpath) {
|
||||
String[] splits = forEachXpath.split("\\|");
|
||||
for (String split : splits) {
|
||||
split = split.trim();
|
||||
if (split.startsWith("//"))
|
||||
throw new RuntimeException("forEach cannot start with '//': " + split);
|
||||
if (split.length() == 0)
|
||||
continue;
|
||||
// The created Node has a name set to the full forEach attribute xpath
|
||||
addField0(split, split, false, true, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper around <code>addField0</code> to create a series of
|
||||
* Nodes based on the supplied Xpath and a given fieldName. The created
|
||||
* nodes are inserted into a Node tree.
|
||||
*
|
||||
* @param name The name for this field in the emitted record
|
||||
* @param xpath The xpath expression for this field
|
||||
* @param multiValued If 'true' then the emitted record will have values in
|
||||
* a List<String>
|
||||
*/
|
||||
public synchronized XPathRecordReader addField(String name, String xpath, boolean multiValued) {
|
||||
addField0(xpath, name, multiValued, false, 0);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper around <code>addField0</code> to create a series of
|
||||
* Nodes based on the supplied Xpath and a given fieldName. The created
|
||||
* nodes are inserted into a Node tree.
|
||||
*
|
||||
* @param name The name for this field in the emitted record
|
||||
* @param xpath The xpath expression for this field
|
||||
* @param multiValued If 'true' then the emitted record will have values in
|
||||
* a List<String>
|
||||
* @param flags FLATTEN: Recursively combine text from all child XML elements
|
||||
*/
|
||||
public synchronized XPathRecordReader addField(String name, String xpath, boolean multiValued, int flags) {
|
||||
addField0(xpath, name, multiValued, false, flags);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits the XPATH into a List of xpath segments and calls build() to
|
||||
* construct a tree of Nodes representing xpath segments. The resulting
|
||||
* tree structure ends up describing all the Xpaths we are interested in.
|
||||
*
|
||||
* @param xpath The xpath expression for this field
|
||||
* @param name The name for this field in the emitted record
|
||||
* @param multiValued If 'true' then the emitted record will have values in
|
||||
* a List<String>
|
||||
* @param isRecord Flags that this XPATH is from a forEach statement
|
||||
* @param flags The only supported flag is 'FLATTEN'
|
||||
*/
|
||||
private void addField0(String xpath, String name, boolean multiValued,
|
||||
boolean isRecord, int flags) {
|
||||
if (!xpath.startsWith("/"))
|
||||
throw new RuntimeException("xpath must start with '/' : " + xpath);
|
||||
List<String> paths = splitEscapeQuote(xpath);
|
||||
// deal with how split behaves when separator starts a string!
|
||||
if ("".equals(paths.get(0).trim()))
|
||||
paths.remove(0);
|
||||
rootNode.build(paths, name, multiValued, isRecord, flags);
|
||||
rootNode.buildOptimise(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses {@link #streamRecords streamRecords} to parse the XML source but with
|
||||
* a handler that collects all the emitted records into a single List which
|
||||
* is returned upon completion.
|
||||
*
|
||||
* @param r the stream reader
|
||||
* @return results a List of emitted records
|
||||
*/
|
||||
public List<Map<String, Object>> getAllRecords(Reader r) {
|
||||
final List<Map<String, Object>> results = new ArrayList<>();
|
||||
streamRecords(r, (record, s) -> results.add(record));
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an XML stream reader on top of whatever reader has been
|
||||
* configured. Then calls parse() with a handler which is
|
||||
* invoked forEach record emitted.
|
||||
*
|
||||
* @param r the stream reader
|
||||
* @param handler The callback instance
|
||||
*/
|
||||
public void streamRecords(Reader r, Handler handler) {
|
||||
try {
|
||||
XMLStreamReader parser = factory.createXMLStreamReader(r);
|
||||
rootNode.parse(parser, handler, new HashMap<>(),
|
||||
new Stack<>(), false);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* For each node/leaf in the Node tree there is one object of this class.
|
||||
* This tree of objects represents all the XPaths we are interested in.
|
||||
* For each Xpath segment of interest we create a node. In most cases the
|
||||
* node (branch) is rather basic , but for the final portion (leaf) of any
|
||||
* Xpath we add more information to the Node. When parsing the XML document
|
||||
* we step though this tree as we stream records from the reader. If the XML
|
||||
* document departs from this tree we skip start tags till we are back on
|
||||
* the tree.
|
||||
*/
|
||||
private static class Node {
|
||||
String name; // genrally: segment of the Xpath represented by this Node
|
||||
String fieldName; // the fieldname in the emitted record (key of the map)
|
||||
String xpathName; // the segment of the Xpath represented by this Node
|
||||
String forEachPath; // the full Xpath from the forEach entity attribute
|
||||
List<Node> attributes; // List of attribute Nodes associated with this Node
|
||||
List<Node> childNodes; // List of immediate child Nodes of this node
|
||||
List<Node> wildCardNodes; // List of '//' style decendants of this Node
|
||||
List<Map.Entry<String, String>> attribAndValues;
|
||||
Node wildAncestor; // ancestor Node containing '//' style decendants
|
||||
Node parent; // parent Node in the tree
|
||||
boolean hasText=false; // flag: store/emit streamed text for this node
|
||||
boolean multiValued=false; //flag: this fields values are returned as a List
|
||||
boolean isRecord=false; //flag: this Node starts a new record
|
||||
private boolean flatten; //flag: child text is also to be emitted
|
||||
|
||||
|
||||
public Node(String name, Node p) {
|
||||
// Create a basic Node, suitable for the mid portions of any Xpath.
|
||||
// Node.xpathName and Node.name are set to same value.
|
||||
xpathName = this.name = name;
|
||||
parent = p;
|
||||
}
|
||||
|
||||
public Node(String name, String fieldName, boolean multiValued) {
|
||||
// This is only called from build() when describing an attribute.
|
||||
this.name = name; // a segment from the Xpath
|
||||
this.fieldName = fieldName; // name to store collected values against
|
||||
this.multiValued = multiValued; // return collected values in a List
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the method where all the XML parsing happens. For each
|
||||
* tag/subtag read from the source, this method is called recursively.
|
||||
*
|
||||
*/
|
||||
private void parse(XMLStreamReader parser,
|
||||
Handler handler,
|
||||
Map<String, Object> values,
|
||||
Stack<Set<String>> stack, // lists of values to purge
|
||||
boolean recordStarted
|
||||
) throws IOException, XMLStreamException {
|
||||
Set<String> valuesAddedinThisFrame = null;
|
||||
if (isRecord) {
|
||||
// This Node is a match for an XPATH from a forEach attribute,
|
||||
// prepare for the clean up that will occurr when the record
|
||||
// is emitted after its END_ELEMENT is matched
|
||||
recordStarted = true;
|
||||
valuesAddedinThisFrame = new HashSet<>();
|
||||
stack.push(valuesAddedinThisFrame);
|
||||
} else if (recordStarted) {
|
||||
// This node is a child of some parent which matched against forEach
|
||||
// attribute. Continue to add values to an existing record.
|
||||
valuesAddedinThisFrame = stack.peek();
|
||||
}
|
||||
|
||||
try {
|
||||
/* The input stream has deposited us at this Node in our tree of
|
||||
* intresting nodes. Depending on how this node is of interest,
|
||||
* process further tokens from the input stream and decide what
|
||||
* we do next
|
||||
*/
|
||||
if (attributes != null) {
|
||||
// we interested in storing attributes from the input stream
|
||||
for (Node node : attributes) {
|
||||
String value = parser.getAttributeValue(null, node.name);
|
||||
if (value != null || (recordStarted && !isRecord)) {
|
||||
putText(values, value, node.fieldName, node.multiValued);
|
||||
valuesAddedinThisFrame.add(node.fieldName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Set<Node> childrenFound = new HashSet<>();
|
||||
int event = -1;
|
||||
int flattenedStarts=0; // our tag depth when flattening elements
|
||||
StringBuilder text = new StringBuilder();
|
||||
|
||||
while (true) {
|
||||
event = parser.next();
|
||||
|
||||
if (event == END_ELEMENT) {
|
||||
if (flattenedStarts > 0) flattenedStarts--;
|
||||
else {
|
||||
if (hasText && valuesAddedinThisFrame != null) {
|
||||
valuesAddedinThisFrame.add(fieldName);
|
||||
putText(values, text.toString(), fieldName, multiValued);
|
||||
}
|
||||
if (isRecord) handler.handle(getDeepCopy(values), forEachPath);
|
||||
if (childNodes != null && recordStarted && !isRecord && !childrenFound.containsAll(childNodes)) {
|
||||
// nonReccord nodes where we have not collected text for ALL
|
||||
// the child nodes.
|
||||
for (Node n : childNodes) {
|
||||
// For the multivalue child nodes where we could have, but
|
||||
// didnt, collect text. Push a null string into values.
|
||||
if (!childrenFound.contains(n)) n.putNulls(values, valuesAddedinThisFrame);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (hasText && (event==CDATA || event==CHARACTERS || event==SPACE)) {
|
||||
text.append(parser.getText());
|
||||
}
|
||||
else if (event == START_ELEMENT) {
|
||||
if ( flatten )
|
||||
flattenedStarts++;
|
||||
else
|
||||
handleStartElement(parser, childrenFound, handler, values, stack, recordStarted);
|
||||
}
|
||||
// END_DOCUMENT is least likely to appear and should be
|
||||
// last in if-then-else skip chain
|
||||
else if (event == END_DOCUMENT) return;
|
||||
}
|
||||
}finally {
|
||||
if ((isRecord || !recordStarted) && !stack.empty()) {
|
||||
Set<String> cleanThis = stack.pop();
|
||||
if (cleanThis != null) {
|
||||
for (String fld : cleanThis) values.remove(fld);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If a new tag is encountered, check if it is of interest or not by seeing
|
||||
* if it matches against our node tree. If we have deperted from the node
|
||||
* tree then walk back though the tree's ancestor nodes checking to see if
|
||||
* any // expressions exist for the node and compare them against the new
|
||||
* tag. If matched then "jump" to that node, otherwise ignore the tag.
|
||||
*
|
||||
* Note, the list of // expressions found while walking back up the tree
|
||||
* is chached in the HashMap decends. Then if the new tag is to be skipped,
|
||||
* any inner chil tags are compared against the cache and jumped to if
|
||||
* matched.
|
||||
*/
|
||||
private void handleStartElement(XMLStreamReader parser, Set<Node> childrenFound,
|
||||
Handler handler, Map<String, Object> values,
|
||||
Stack<Set<String>> stack, boolean recordStarted)
|
||||
throws IOException, XMLStreamException {
|
||||
Node n = getMatchingNode(parser,childNodes);
|
||||
Map<String, Object> decends=new HashMap<>();
|
||||
if (n != null) {
|
||||
childrenFound.add(n);
|
||||
n.parse(parser, handler, values, stack, recordStarted);
|
||||
return;
|
||||
}
|
||||
// The stream has diverged from the tree of interesting elements, but
|
||||
// are there any wildCardNodes ... anywhere in our path from the root?
|
||||
Node dn = this; // checking our Node first!
|
||||
|
||||
do {
|
||||
if (dn.wildCardNodes != null) {
|
||||
// Check to see if the streams tag matches one of the "//" all
|
||||
// decendents type expressions for this node.
|
||||
n = getMatchingNode(parser, dn.wildCardNodes);
|
||||
if (n != null) {
|
||||
childrenFound.add(n);
|
||||
n.parse(parser, handler, values, stack, recordStarted);
|
||||
break;
|
||||
}
|
||||
// add the list of this nodes wild decendents to the cache
|
||||
for (Node nn : dn.wildCardNodes) decends.put(nn.name, nn);
|
||||
}
|
||||
dn = dn.wildAncestor; // leap back along the tree toward root
|
||||
} while (dn != null) ;
|
||||
|
||||
if (n == null) {
|
||||
// we have a START_ELEMENT which is not within the tree of
|
||||
// interesting nodes. Skip over the contents of this element
|
||||
// but recursivly repeat the above for any START_ELEMENTs
|
||||
// found within this element.
|
||||
int count = 1; // we have had our first START_ELEMENT
|
||||
while (count != 0) {
|
||||
int token = parser.next();
|
||||
if (token == START_ELEMENT) {
|
||||
Node nn = (Node) decends.get(parser.getLocalName());
|
||||
if (nn != null) {
|
||||
// We have a //Node which matches the stream's parser.localName
|
||||
childrenFound.add(nn);
|
||||
// Parse the contents of this stream element
|
||||
nn.parse(parser, handler, values, stack, recordStarted);
|
||||
}
|
||||
else count++;
|
||||
}
|
||||
else if (token == END_ELEMENT) count--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check if the current tag is to be parsed or not. We step through the
|
||||
* supplied List "searchList" looking for a match. If matched, return the
|
||||
* Node object.
|
||||
*/
|
||||
private Node getMatchingNode(XMLStreamReader parser,List<Node> searchL){
|
||||
if (searchL == null)
|
||||
return null;
|
||||
String localName = parser.getLocalName();
|
||||
for (Node n : searchL) {
|
||||
if (n.name.equals(localName)) {
|
||||
if (n.attribAndValues == null)
|
||||
return n;
|
||||
if (checkForAttributes(parser, n.attribAndValues))
|
||||
return n;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean checkForAttributes(XMLStreamReader parser,
|
||||
List<Map.Entry<String, String>> attrs) {
|
||||
for (Map.Entry<String, String> e : attrs) {
|
||||
String val = parser.getAttributeValue(null, e.getKey());
|
||||
if (val == null)
|
||||
return false;
|
||||
if (e.getValue() != null && !e.getValue().equals(val))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* A recursive routine that walks the Node tree from a supplied start
|
||||
* pushing a null string onto every multiValued fieldName's List of values
|
||||
* where a value has not been provided from the stream.
|
||||
*/
|
||||
private void putNulls(Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
|
||||
if (attributes != null) {
|
||||
for (Node n : attributes) {
|
||||
if (n.multiValued) {
|
||||
putANull(n.fieldName, values, valuesAddedinThisFrame);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (hasText && multiValued) {
|
||||
putANull(fieldName, values, valuesAddedinThisFrame);
|
||||
}
|
||||
if (childNodes != null) {
|
||||
for (Node childNode : childNodes) {
|
||||
childNode.putNulls(values, valuesAddedinThisFrame);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void putANull(String thisFieldName, Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
|
||||
putText(values, null, thisFieldName, true);
|
||||
if( valuesAddedinThisFrame != null) {
|
||||
valuesAddedinThisFrame.add(thisFieldName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the field name and text into the values Map. If it is a non
|
||||
* multivalued field, then the text is simply placed in the object
|
||||
* portion of the Map. If it is a multivalued field then the text is
|
||||
* pushed onto a List which is the object portion of the Map.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
private void putText(Map<String, Object> values, String value,
|
||||
String fieldName, boolean multiValued) {
|
||||
if (multiValued) {
|
||||
List<String> v = (List<String>) values.get(fieldName);
|
||||
if (v == null) {
|
||||
v = new ArrayList<>();
|
||||
values.put(fieldName, v);
|
||||
}
|
||||
v.add(value);
|
||||
} else {
|
||||
values.put(fieldName, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Walk the Node tree propagating any wildDescentant information to
|
||||
* child nodes. This allows us to optimise the performance of the
|
||||
* main parse method.
|
||||
*/
|
||||
private void buildOptimise(Node wa) {
|
||||
wildAncestor=wa;
|
||||
if ( wildCardNodes != null ) wa = this;
|
||||
if ( childNodes != null )
|
||||
for ( Node n : childNodes ) n.buildOptimise(wa);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a Node tree structure representing all Xpaths of intrest to us.
|
||||
* This must be done before parsing of the XML stream starts. Each node
|
||||
* holds one portion of an Xpath. Taking each Xpath segment in turn this
|
||||
* method walks the Node tree and finds where the new segment should be
|
||||
* inserted. It creates a Node representing a field's name, XPATH and
|
||||
* some flags and inserts the Node into the Node tree.
|
||||
*/
|
||||
private void build(
|
||||
List<String> paths, // a List of segments from the split xpaths
|
||||
String fieldName, // the fieldName assoc with this Xpath
|
||||
boolean multiValued, // flag if this fieldName is multiValued or not
|
||||
boolean record, // is this xpath a record or a field
|
||||
int flags // are we to flatten matching xpaths
|
||||
) {
|
||||
// recursivly walk the paths Lists adding new Nodes as required
|
||||
String xpseg = paths.remove(0); // shift out next Xpath segment
|
||||
|
||||
if (paths.isEmpty() && xpseg.startsWith("@")) {
|
||||
// we have reached end of element portion of Xpath and can now only
|
||||
// have an element attribute. Add it to this nodes list of attributes
|
||||
if (attributes == null) {
|
||||
attributes = new ArrayList<>();
|
||||
}
|
||||
xpseg = xpseg.substring(1); // strip the '@'
|
||||
attributes.add(new Node(xpseg, fieldName, multiValued));
|
||||
}
|
||||
else if ( xpseg.length() == 0) {
|
||||
// we have a '//' selector for all decendents of the current nodes
|
||||
xpseg = paths.remove(0); // shift out next Xpath segment
|
||||
if (wildCardNodes == null) wildCardNodes = new ArrayList<>();
|
||||
Node n = getOrAddNode(xpseg, wildCardNodes);
|
||||
if (paths.isEmpty()) {
|
||||
// We are current a leaf node.
|
||||
// xpath with content we want to store and return
|
||||
n.hasText = true; // we have to store text found here
|
||||
n.fieldName = fieldName; // name to store collected text against
|
||||
n.multiValued = multiValued; // true: text be stored in a List
|
||||
n.flatten = flags == FLATTEN; // true: store text from child tags
|
||||
}
|
||||
else {
|
||||
// recurse to handle next paths segment
|
||||
n.build(paths, fieldName, multiValued, record, flags);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (childNodes == null)
|
||||
childNodes = new ArrayList<>();
|
||||
// does this "name" already exist as a child node.
|
||||
Node n = getOrAddNode(xpseg,childNodes);
|
||||
if (paths.isEmpty()) {
|
||||
// We have emptied paths, we are for the moment a leaf of the tree.
|
||||
// When parsing the actual input we have traversed to a position
|
||||
// where we actutally have to do something. getOrAddNode() will
|
||||
// have created and returned a new minimal Node with name and
|
||||
// xpathName already populated. We need to add more information.
|
||||
if (record) {
|
||||
// forEach attribute
|
||||
n.isRecord = true; // flag: forEach attribute, prepare to emit rec
|
||||
n.forEachPath = fieldName; // the full forEach attribute xpath
|
||||
} else {
|
||||
// xpath with content we want to store and return
|
||||
n.hasText = true; // we have to store text found here
|
||||
n.fieldName = fieldName; // name to store collected text against
|
||||
n.multiValued = multiValued; // true: text be stored in a List
|
||||
n.flatten = flags == FLATTEN; // true: store text from child tags
|
||||
}
|
||||
} else {
|
||||
// recurse to handle next paths segment
|
||||
n.build(paths, fieldName, multiValued, record, flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Node getOrAddNode(String xpathName, List<Node> searchList ) {
|
||||
for (Node n : searchList)
|
||||
if (n.xpathName.equals(xpathName)) return n;
|
||||
// new territory! add a new node for this Xpath bitty
|
||||
Node n = new Node(xpathName, this); // a minimal Node initialization
|
||||
Matcher m = ATTRIB_PRESENT_WITHVAL.matcher(xpathName);
|
||||
if (m.find()) {
|
||||
n.name = m.group(1);
|
||||
int start = m.start(2);
|
||||
while (true) {
|
||||
HashMap<String, String> attribs = new HashMap<>();
|
||||
if (!m.find(start))
|
||||
break;
|
||||
attribs.put(m.group(3), m.group(5));
|
||||
start = m.end(6);
|
||||
if (n.attribAndValues == null)
|
||||
n.attribAndValues = new ArrayList<>();
|
||||
n.attribAndValues.addAll(attribs.entrySet());
|
||||
}
|
||||
}
|
||||
searchList.add(n);
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies a supplied Map to a new Map which is returned. Used to copy a
|
||||
* records values. If a fields value is a List then they have to be
|
||||
* deep-copied for thread safety
|
||||
*/
|
||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
||||
private static Map<String, Object> getDeepCopy(Map<String, Object> values) {
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
for (Map.Entry<String, Object> entry : values.entrySet()) {
|
||||
if (entry.getValue() instanceof List) {
|
||||
result.put(entry.getKey(), new ArrayList((List) entry.getValue()));
|
||||
} else {
|
||||
result.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} // end of class Node
|
||||
|
||||
|
||||
/**
|
||||
* The Xpath is split into segments using the '/' as a separator. However
|
||||
* this method deals with special cases where there is a slash '/' character
|
||||
* inside the attribute value e.g. x/@html='text/html'. We split by '/' but
|
||||
* then reassemble things were the '/' appears within a quoted sub-string.
|
||||
*
|
||||
* We have already enforced that the string must begin with a separator. This
|
||||
* method depends heavily on how split behaves if the string starts with the
|
||||
* separator or if a sequence of multiple separator's appear.
|
||||
*/
|
||||
private static List<String> splitEscapeQuote(String str) {
|
||||
List<String> result = new LinkedList<>();
|
||||
String[] ss = str.split("/");
|
||||
for (int i=0; i<ss.length; i++) { // i=1: skip separator at start of string
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int quoteCount = 0;
|
||||
while (true) {
|
||||
sb.append(ss[i]);
|
||||
for (int j=0; j<ss[i].length(); j++)
|
||||
if (ss[i].charAt(j) == '\'') quoteCount++;
|
||||
// have we got a split inside quoted sub-string?
|
||||
if ((quoteCount % 2) == 0) break;
|
||||
// yes!; replace the '/' and loop to concat next token
|
||||
i++;
|
||||
sb.append("/");
|
||||
}
|
||||
result.add(sb.toString());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
static {
|
||||
EmptyEntityResolver.configureXMLInputFactory(factory);
|
||||
factory.setXMLReporter(XMLLOG);
|
||||
try {
|
||||
// The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
|
||||
// XMLInputFactory, as that implementation tries to cache and reuse the
|
||||
// XMLStreamReader. Setting the parser-specific "reuse-instance" property to false
|
||||
// prevents this.
|
||||
// All other known open-source stax parsers (and the bea ref impl)
|
||||
// have thread-safe factories.
|
||||
factory.setProperty("reuse-instance", Boolean.FALSE);
|
||||
} catch (IllegalArgumentException ex) {
|
||||
// Other implementations will likely throw this exception since "reuse-instance"
|
||||
// isimplementation specific.
|
||||
log.debug("Unable to set the 'reuse-instance' property for the input chain: {}", factory);
|
||||
}
|
||||
}
|
||||
|
||||
/**Implement this interface to stream records as and when one is found.
|
||||
*
|
||||
*/
|
||||
public interface Handler {
|
||||
/**
|
||||
* @param record The record map. The key is the field name as provided in
|
||||
* the addField() methods. The value can be a single String (for single
|
||||
* valued fields) or a List<String> (for multiValued).
|
||||
* @param xpath The forEach XPATH for which this record is being emitted
|
||||
* If there is any change all parsing will be aborted and the Exception
|
||||
* is propagated up
|
||||
*/
|
||||
void handle(Map<String, Object> record, String xpath);
|
||||
}
|
||||
|
||||
private static final Pattern ATTRIB_PRESENT_WITHVAL = Pattern
|
||||
.compile("(\\S*?)?(\\[@)(\\S*?)(='(.*?)')?(\\])");
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue