SOLR-14783: Remove DIH from 9.0 (#1794)

* Remove DIH example directory

* Remove contrib code directories

* Remove contrib package related configurations for build tools

* Remove mention of DIH example

* remove dih as build dependencies and no-longer needed version pins

* Remove README references to DIH

* Remove dih mention from the script that probably does need to exist at all

* More build artifact references

* More removed dependencies leftovers (licenses/versions)

* No need to smoke exclude DIH anymore

* Remove Admin UI's DIH integration

* Remove DIH from shortname package list

* Remove unused DIH (related? not?) dataset
Unclear what is happening here, but there is no reference to that directory anywhere else
The other parallel directories ARE referenced in a TestConfigSetsAPI.java

* Hidden Idea files references

* No DIH to ignore anymore

* Remove last Derby DB references

* Remove DIH from documentation
Add the information in Major Changes document with the link to the external repo

* Added/updated a mention to CHANGES

* Fix leftover library mentions

* Fix Spellings
This commit is contained in:
Alexandre Rafalovitch 2020-08-29 10:52:04 -04:00 committed by GitHub
parent c11d32faed
commit a57ba25400
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
420 changed files with 972 additions and 70374 deletions

View File

@ -1,9 +0,0 @@
<component name="libraryTable">
<library name="Derby">
<CLASSES>
<root url="jar://$PROJECT_DIR$/solr/example/example-DIH/solr/db/lib/derby-10.9.1.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

View File

@ -1,9 +0,0 @@
<component name="libraryTable">
<library name="HSQLDB">
<CLASSES>
<root url="jar://$PROJECT_DIR$/solr/example/example-DIH/solr/db/lib/hsqldb-2.4.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

View File

@ -1,10 +0,0 @@
<component name="libraryTable">
<library name="Solr DIH core library">
<CLASSES>
<root url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler/lib" />
</CLASSES>
<JAVADOC />
<SOURCES />
<jarDirectory url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler/lib" recursive="false" />
</library>
</component>

View File

@ -1,10 +0,0 @@
<component name="libraryTable">
<library name="Solr DIH extras library">
<CLASSES>
<root url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler-extras/lib" />
</CLASSES>
<JAVADOC />
<SOURCES />
<jarDirectory url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler-extras/lib" recursive="false" />
</library>
</component>

View File

@ -1,10 +0,0 @@
<component name="libraryTable">
<library name="Solr DIH test library">
<CLASSES>
<root url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler/test-lib" />
</CLASSES>
<JAVADOC />
<SOURCES />
<jarDirectory url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler/test-lib" recursive="false" />
</library>
</component>

View File

@ -53,8 +53,6 @@
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/analytics/analytics.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/dataimporthandler-extras/dataimporthandler-extras.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/dataimporthandler/dataimporthandler.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/extraction/extraction.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/langid/langid.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/ltr/ltr.iml" />

View File

@ -284,22 +284,6 @@
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
</configuration>
<configuration default="false" name="Solr dataimporthandler contrib" type="JUnit" factoryName="JUnit">
<module name="dataimporthandler" />
<option name="TEST_OBJECT" value="pattern" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/solr/contrib/solr-dataimporthandler" />
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp -Djetty.testMode=1 -Djetty.insecurerandom=1 -Dsolr.directoryFactory=org.apache.solr.core.MockDirectoryFactory" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
</configuration>
<configuration default="false" name="Solr dataimporthandler-extras contrib" type="JUnit" factoryName="JUnit">
<module name="dataimporthandler-extras" />
<option name="TEST_OBJECT" value="pattern" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/solr/contrib/solr-dataimporthandler-extras" />
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp -Djetty.testMode=1 -Djetty.insecurerandom=1 -Dsolr.directoryFactory=org.apache.solr.core.MockDirectoryFactory" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
</configuration>
<configuration default="false" name="Solr extraction contrib" type="JUnit" factoryName="JUnit">
<module name="extraction" />
<option name="TEST_OBJECT" value="pattern" />
@ -341,7 +325,7 @@
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
</configuration>
<list size="42">
<list size="39">
<item index="0" class="java.lang.String" itemvalue="JUnit.Lucene core" />
<item index="1" class="java.lang.String" itemvalue="JUnit.Module analyzers-common" />
<item index="2" class="java.lang.String" itemvalue="JUnit.Module analyzers-icu" />
@ -376,13 +360,11 @@
<item index="32" class="java.lang.String" itemvalue="JUnit.Solr analysis-extras contrib" />
<item index="33" class="java.lang.String" itemvalue="JUnit.Solr analytics contrib" />
<item index="34" class="java.lang.String" itemvalue="JUnit.Solr clustering contrib" />
<item index="35" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler contrib" />
<item index="36" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler-extras contrib" />
<item index="37" class="java.lang.String" itemvalue="JUnit.Solr extraction contrib" />
<item index="38" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
<item index="39" class="java.lang.String" itemvalue="JUnit.Solr ltr contrib" />
<item index="40" class="java.lang.String" itemvalue="JUnit.Solr prometheus-exporter contrib" />
<item index="42" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
<item index="35" class="java.lang.String" itemvalue="JUnit.Solr extraction contrib" />
<item index="36" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
<item index="37" class="java.lang.String" itemvalue="JUnit.Solr ltr contrib" />
<item index="38" class="java.lang.String" itemvalue="JUnit.Solr prometheus-exporter contrib" />
<item index="39" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
</list>
</component>
</project>

View File

@ -1,29 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-dataimporthandler-extras/classes/java" />
<output-test url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-dataimporthandler-extras/classes/test" />
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
<orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
<orderEntry type="module" scope="TEST" module-name="lucene-core" />
<orderEntry type="library" name="Solr core library" level="project" />
<orderEntry type="library" name="Solrj library" level="project" />
<orderEntry type="library" name="Solr DIH extras library" level="project" />
<orderEntry type="library" name="Solr extraction library" level="project" />
<orderEntry type="module" module-name="solr-core" />
<orderEntry type="module" module-name="solrj" />
<orderEntry type="module" module-name="dataimporthandler" />
<orderEntry type="module" module-name="analysis-common" />
</component>
</module>

View File

@ -1,31 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-dataimporthandler/classes/java" />
<output-test url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-dataimporthandler/classes/test" />
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/webapp" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
<orderEntry type="library" scope="TEST" name="HSQLDB" level="project" />
<orderEntry type="library" scope="TEST" name="Derby" level="project" />
<orderEntry type="library" scope="TEST" name="Solr DIH test library" level="project" />
<orderEntry type="library" name="Solr example library" level="project" />
<orderEntry type="library" name="Solr core library" level="project" />
<orderEntry type="library" name="Solrj library" level="project" />
<orderEntry type="library" name="Solr DIH core library" level="project" />
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
<orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
<orderEntry type="module" module-name="solr-core" />
<orderEntry type="module" module-name="solrj" />
<orderEntry type="module" module-name="analysis-common" />
<orderEntry type="module" module-name="lucene-core" />
<orderEntry type="module" scope="TEST" module-name="join" />
</component>
</module>

View File

@ -0,0 +1,55 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.solr</groupId>
<artifactId>solr-parent</artifactId>
<version>@version@</version>
<relativePath>../pom.xml</relativePath>
</parent>
<groupId>org.apache.solr</groupId>
<artifactId>solr-contrib-aggregator</artifactId>
<name>Apache Solr Contrib aggregator POM</name>
<packaging>pom</packaging>
<modules>
<module>analysis-extras</module>
<module>analytics</module>
<module>clustering</module>
<module>extraction</module>
<module>jaegertracer-configurator</module>
<module>langid</module>
<module>ltr</module>
<module>prometheus-exporter</module>
<module>velocity</module>
</modules>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -48,33 +48,6 @@ my @moves = (
'solr/contrib/clustering/src/main/java'
=> 'solr/contrib/clustering/src/java',
'solr/contrib/dataimporthandler/src/test/java'
=> 'solr/contrib/dataimporthandler/src/test',
'solr/contrib/dataimporthandler/src/test/resources/solr-dih'
=> 'solr/contrib/dataimporthandler/src/test-files/dih/solr',
'solr/contrib/dataimporthandler/src/test/resources'
=> 'solr/contrib/dataimporthandler/src/test-files/dih',
'solr/contrib/dataimporthandler/src/main/java'
=> 'solr/contrib/dataimporthandler/src/java',
'solr/contrib/dataimporthandler/src/main/webapp'
=> 'solr/contrib/dataimporthandler/src/webapp',
'solr/contrib/dataimporthandler/src/extras/test/java'
=> 'solr/contrib/dataimporthandler-extras/src/test',
'solr/contrib/dataimporthandler/src/extras/test/resources/solr-dihextras'
=> 'solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr',
'solr/contrib/dataimporthandler/src/extras/test/resources'
=> 'solr/contrib/dataimporthandler-extras/src/test-files/dihextras',
'solr/contrib/dataimporthandler/src/extras/main/java'
=> 'solr/contrib/dataimporthandler-extras/src/java',
'solr/contrib/extraction/src/test/java'
=> 'solr/contrib/extraction/src/test',

View File

@ -225,8 +225,7 @@ def checkAllJARs(topDir, project, gitRevision, version, tmpDir, baseURL):
for file in files:
if file.lower().endswith('.jar'):
if project == 'solr':
if ((normRoot.endswith('/contrib/dataimporthandler-extras/lib') and (file.startswith('javax.mail-') or file.startswith('activation-')))
or (normRoot.endswith('/test-framework/lib') and file.startswith('jersey-'))
if ((normRoot.endswith('/test-framework/lib') and file.startswith('jersey-'))
or (normRoot.endswith('/contrib/extraction/lib') and file.startswith('xml-apis-'))):
print(' **WARNING**: skipping check of %s/%s: it has javax.* classes' % (root, file))
continue

View File

@ -164,10 +164,6 @@ configure(project(":solr:example")) {
into "exampledocs/"
})
from(configurations.dih, {
into "example-DIH/solr/db/lib"
})
into projectDir
}
}

View File

@ -20,8 +20,7 @@
configure([project(":lucene:spatial3d"),
project(":lucene:analysis:common"),
project(":lucene:backward-codecs"),
project(":lucene:queryparser"),
project(":solr:contrib:dataimporthandler")]) {
project(":lucene:queryparser")]) {
plugins.withType(JavaPlugin) {
configurations {
testClassesExported
@ -56,15 +55,6 @@ configure(project(":solr:contrib:analysis-extras")) {
plugins.withType(JavaPlugin) {
dependencies {
testImplementation project(path: ':lucene:analysis:common', configuration: 'testClassesExported')
testImplementation project(path: ':solr:contrib:dataimporthandler', configuration: 'testClassesExported')
}
}
}
configure(project(":solr:contrib:dataimporthandler-extras")) {
plugins.withType(JavaPlugin) {
dependencies {
testImplementation project(path: ':solr:contrib:dataimporthandler', configuration: 'testClassesExported')
}
}
}

View File

@ -60,8 +60,6 @@ configure(rootProject) {
":solr:core",
":solr:solrj",
":solr:contrib:analysis-extras",
":solr:contrib:dataimporthandler",
":solr:contrib:dataimporthandler-extras",
":solr:contrib:analytics",
":solr:contrib:clustering",
":solr:contrib:extraction",

View File

@ -108,7 +108,7 @@ grant {
// needed by hadoop htrace
permission java.net.NetPermission "getNetworkInformation";
// needed by DIH
// needed by DIH - possibly even after DIH is a package
permission java.sql.SQLPermission "deregisterDriver";
permission java.util.logging.LoggingPermission "control";

View File

@ -46,30 +46,6 @@
<packageUrl regex="true">^pkg:maven/org\.jruby/dirgra@.*$</packageUrl>
<cpe>cpe:/a:jruby:jruby</cpe>
</suppress>
<suppress>
<notes><![CDATA[
file name: derby-10.9.1.0.jar
Only used in tests and dih-example
]]></notes>
<packageUrl regex="true">^pkg:maven/org\.apache\.derby/derby@.*$</packageUrl>
<cpe>cpe:/a:apache:derby</cpe>
</suppress>
<suppress>
<notes><![CDATA[
file name: derby-10.9.1.0.jar
Only used in tests and dih-example
]]></notes>
<packageUrl regex="true">^pkg:maven/org\.apache\.derby/derby@.*$</packageUrl>
<vulnerabilityName>CVE-2015-1832</vulnerabilityName>
</suppress>
<suppress>
<notes><![CDATA[
file name: derby-10.9.1.0.jar
Only used in tests and dih-example
]]></notes>
<packageUrl regex="true">^pkg:maven/org\.apache\.derby/derby@.*$</packageUrl>
<vulnerabilityName>CVE-2018-1313</vulnerabilityName>
</suppress>
<suppress>
<notes><![CDATA[
file name: carrot2-guava-18.0.jar

View File

@ -0,0 +1,327 @@
# The /org/name keys in this file must be kept lexically sorted.
# Blank lines, comment lines, and keys that aren't in /org/name format are ignored
# when the lexical sort check is performed by the ant check-lib-versions target.
/com.adobe.xmp/xmpcore = 5.1.3
com.carrotsearch.randomizedtesting.version = 2.7.6
/com.carrotsearch.randomizedtesting/junit4-ant = ${com.carrotsearch.randomizedtesting.version}
/com.carrotsearch.randomizedtesting/randomizedtesting-runner = ${com.carrotsearch.randomizedtesting.version}
/com.carrotsearch.thirdparty/simple-xml-safe = 2.7.1
/com.carrotsearch/hppc = 0.8.2
/com.cybozu.labs/langdetect = 1.1-20120112
/com.drewnoakes/metadata-extractor = 2.11.0
/com.epam/parso = 2.0.11
com.fasterxml.jackson.core.version = 2.10.1
/com.fasterxml.jackson.core/jackson-annotations = ${com.fasterxml.jackson.core.version}
/com.fasterxml.jackson.core/jackson-core = ${com.fasterxml.jackson.core.version}
/com.fasterxml.jackson.core/jackson-databind = ${com.fasterxml.jackson.core.version}
/com.fasterxml.jackson.dataformat/jackson-dataformat-smile = ${com.fasterxml.jackson.core.version}
/com.github.ben-manes.caffeine/caffeine = 2.8.4
/com.github.virtuald/curvesapi = 1.06
/com.github.zafarkhaja/java-semver = 0.9.0
/com.google.guava/guava = 25.1-jre
/com.google.protobuf/protobuf-java = 3.11.0
/com.google.re2j/re2j = 1.2
/com.googlecode.juniversalchardet/juniversalchardet = 1.0.3
/com.googlecode.mp4parser/isoparser = 1.1.22
/com.healthmarketscience.jackcess/jackcess = 3.0.1
/com.healthmarketscience.jackcess/jackcess-encrypt = 3.0.0
/com.ibm.icu/icu4j = 62.2
/com.jayway.jsonpath/json-path = 2.4.0
/com.lmax/disruptor = 3.4.2
/com.pff/java-libpst = 0.8.1
com.rometools.version = 1.12.2
/com.rometools/rome = ${com.rometools.version}
/com.rometools/rome-utils = ${com.rometools.version}
com.sun.jersey.version = 1.19
/com.sun.jersey/jersey-servlet = ${com.sun.jersey.version}
/com.tdunning/t-digest = 3.1
/com.vaadin.external.google/android-json = 0.0.20131108.vaadin1
/commons-cli/commons-cli = 1.4
/commons-codec/commons-codec = 1.13
/commons-collections/commons-collections = 3.2.2
/commons-io/commons-io = 2.6
# necessary to run test or embedded Zookeeper as of 3.6.1
commons.lang.version = 2.6
/commons-lang/commons-lang = ${commons.lang.version}
/commons-logging/commons-logging = 1.1.3
/de.l3s.boilerpipe/boilerpipe = 1.1.0
io.dropwizard.metrics.version = 4.1.5
/io.dropwizard.metrics/metrics-core = ${io.dropwizard.metrics.version}
/io.dropwizard.metrics/metrics-graphite = ${io.dropwizard.metrics.version}
/io.dropwizard.metrics/metrics-jetty9 = ${io.dropwizard.metrics.version}
/io.dropwizard.metrics/metrics-jmx = ${io.dropwizard.metrics.version}
/io.dropwizard.metrics/metrics-jvm = ${io.dropwizard.metrics.version}
io.jaegertracing.version = 1.1.0
/io.jaegertracing/jaeger-core = ${io.jaegertracing.version}
/io.jaegertracing/jaeger-thrift = ${io.jaegertracing.version}
io.netty.netty.version = 4.1.50.Final
/io.netty/netty-buffer = ${io.netty.netty.version}
/io.netty/netty-codec = ${io.netty.netty.version}
/io.netty/netty-common = ${io.netty.netty.version}
/io.netty/netty-handler = ${io.netty.netty.version}
/io.netty/netty-resolver = ${io.netty.netty.version}
/io.netty/netty-transport = ${io.netty.netty.version}
/io.netty/netty-transport-native-epoll = ${io.netty.netty.version}
/io.netty/netty-transport-native-unix-common = ${io.netty.netty.version}
io.opentracing.version = 0.33.0
/io.opentracing/opentracing-api = ${io.opentracing.version}
/io.opentracing/opentracing-mock = ${io.opentracing.version}
/io.opentracing/opentracing-noop = ${io.opentracing.version}
/io.opentracing/opentracing-util = ${io.opentracing.version}
io.prometheus.version = 0.2.0
/io.prometheus/simpleclient = ${io.prometheus.version}
/io.prometheus/simpleclient_common = ${io.prometheus.version}
/io.prometheus/simpleclient_httpserver = ${io.prometheus.version}
/io.sgr/s2-geometry-library-java = 1.0.0
/javax.servlet/javax.servlet-api = 3.1.0
/junit/junit = 4.12
/mecab/mecab-ipadic = 2.7.0-20070801
/mecab/mecab-ko-dic = 2.0.3-20170922
/mecab/mecab-naist-jdic = 0.6.3b-20111013
/net.arnx/jsonic = 1.2.7
/net.bytebuddy/byte-buddy = 1.9.3
/net.hydromatic/eigenbase-properties = 1.1.5
net.sourceforge.argparse4j.version = 0.8.1
/net.sourceforge.argparse4j/argparse4j = ${net.sourceforge.argparse4j.version}
/net.sourceforge.nekohtml/nekohtml = 1.9.17
net.thisptr.version = 0.0.8
/net.thisptr/jackson-jq = ${net.thisptr.version}
/org.antlr/antlr4-runtime = 4.5.1-1
/org.apache.ant/ant = 1.8.2
org.apache.calcite.avatica.version = 1.13.0
/org.apache.calcite.avatica/avatica-core = ${org.apache.calcite.avatica.version}
org.apache.calcite.version = 1.18.0
/org.apache.calcite/calcite-core = ${org.apache.calcite.version}
/org.apache.calcite/calcite-linq4j = ${org.apache.calcite.version}
org.apache.commons.commons-collections4-rev = 4.4
/org.apache.commons/commons-collections4 = ${org.apache.commons.commons-collections4-rev}
/org.apache.commons/commons-compress = 1.19
/org.apache.commons/commons-configuration2 = 2.1.1
/org.apache.commons/commons-csv = 1.7
/org.apache.commons/commons-exec = 1.3
/org.apache.commons/commons-lang3 = 3.9
/org.apache.commons/commons-math3 = 3.6.1
/org.apache.commons/commons-text = 1.6
org.apache.curator.version = 2.13.0
/org.apache.curator/curator-client = ${org.apache.curator.version}
/org.apache.curator/curator-framework = ${org.apache.curator.version}
/org.apache.curator/curator-recipes = ${org.apache.curator.version}
org.apache.hadoop.version = 3.2.0
/org.apache.hadoop/hadoop-annotations = ${org.apache.hadoop.version}
/org.apache.hadoop/hadoop-auth = ${org.apache.hadoop.version}
/org.apache.hadoop/hadoop-common = ${org.apache.hadoop.version}
/org.apache.hadoop/hadoop-hdfs = ${org.apache.hadoop.version}
/org.apache.hadoop/hadoop-hdfs-client = ${org.apache.hadoop.version}
/org.apache.hadoop/hadoop-minikdc = ${org.apache.hadoop.version}
/org.apache.htrace/htrace-core4 = 4.1.0-incubating
# The httpcore version is often different from the httpclient and httpmime versions,
# so the httpcore version value should not share the same symbolic name with them.
/org.apache.httpcomponents/httpclient = 4.5.10
/org.apache.httpcomponents/httpcore = 4.4.12
/org.apache.httpcomponents/httpmime = 4.5.10
/org.apache.ivy/ivy = 2.4.0
org.apache.james.apache.mime4j.version = 0.8.3
/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version}
/org.apache.james/apache-mime4j-dom = ${org.apache.james.apache.mime4j.version}
org.apache.kerby.version = 1.0.1
/org.apache.kerby/kerb-admin = ${org.apache.kerby.version}
/org.apache.kerby/kerb-client = ${org.apache.kerby.version}
/org.apache.kerby/kerb-common = ${org.apache.kerby.version}
/org.apache.kerby/kerb-core = ${org.apache.kerby.version}
/org.apache.kerby/kerb-crypto = ${org.apache.kerby.version}
/org.apache.kerby/kerb-identity= ${org.apache.kerby.version}
/org.apache.kerby/kerb-server = ${org.apache.kerby.version}
/org.apache.kerby/kerb-simplekdc = ${org.apache.kerby.version}
/org.apache.kerby/kerb-util = ${org.apache.kerby.version}
/org.apache.kerby/kerby-asn1 = ${org.apache.kerby.version}
/org.apache.kerby/kerby-config = ${org.apache.kerby.version}
/org.apache.kerby/kerby-kdc = ${org.apache.kerby.version}
/org.apache.kerby/kerby-pkix = ${org.apache.kerby.version}
/org.apache.kerby/kerby-util = ${org.apache.kerby.version}
org.apache.logging.log4j.version = 2.13.2
/org.apache.logging.log4j/log4j-1.2-api = ${org.apache.logging.log4j.version}
/org.apache.logging.log4j/log4j-api = ${org.apache.logging.log4j.version}
/org.apache.logging.log4j/log4j-core = ${org.apache.logging.log4j.version}
/org.apache.logging.log4j/log4j-slf4j-impl = ${org.apache.logging.log4j.version}
/org.apache.logging.log4j/log4j-web = ${org.apache.logging.log4j.version}
/org.apache.opennlp/opennlp-tools = 1.9.1
org.apache.pdfbox.version = 2.0.17
/org.apache.pdfbox/fontbox = ${org.apache.pdfbox.version}
/org.apache.pdfbox/jempbox = 1.8.16
/org.apache.pdfbox/pdfbox = ${org.apache.pdfbox.version}
/org.apache.pdfbox/pdfbox-tools = ${org.apache.pdfbox.version}
org.apache.poi.version = 4.1.1
/org.apache.poi/poi = ${org.apache.poi.version}
/org.apache.poi/poi-ooxml = ${org.apache.poi.version}
/org.apache.poi/poi-ooxml-schemas = ${org.apache.poi.version}
/org.apache.poi/poi-scratchpad = ${org.apache.poi.version}
org.apache.thrift.version = 0.13.0
/org.apache.thrift/libthrift = ${org.apache.thrift.version}
org.apache.tika.version = 1.24
/org.apache.tika/tika-core = ${org.apache.tika.version}
/org.apache.tika/tika-java7 = ${org.apache.tika.version}
/org.apache.tika/tika-parsers = ${org.apache.tika.version}
/org.apache.tika/tika-xmp = ${org.apache.tika.version}
org.apache.velocity.tools.version = 3.0
/org.apache.velocity.tools/velocity-tools-generic = ${org.apache.velocity.tools.version}
/org.apache.velocity.tools/velocity-tools-view = ${org.apache.velocity.tools.version}
/org.apache.velocity.tools/velocity-tools-view-jsp = ${org.apache.velocity.tools.version}
/org.apache.velocity/velocity-engine-core = 2.0
/org.apache.xmlbeans/xmlbeans = 3.1.0
org.apache.zookeeper.version = 3.6.1
/org.apache.zookeeper/zookeeper = ${org.apache.zookeeper.version}
/org.apache.zookeeper/zookeeper-jute = ${org.apache.zookeeper.version}
# v1.6.2 of asciidoctor-ant includes asciidoctorj 1.6.2, which uses
# asciidoctor 1.5.8, and asciidoctorj-pdf 1.5.0-alpha.16, which is the same
# as asciidoctor-pdf 1.5.0-alpha.16
/org.asciidoctor/asciidoctor-ant = 1.6.2
/org.aspectj/aspectjrt = 1.8.0
/org.bitbucket.b_c/jose4j = 0.6.5
org.bouncycastle.version = 1.64
/org.bouncycastle/bcmail-jdk15on = ${org.bouncycastle.version}
/org.bouncycastle/bcpkix-jdk15on = ${org.bouncycastle.version}
/org.bouncycastle/bcprov-jdk15on = ${org.bouncycastle.version}
/org.brotli/dec = 0.1.2
/org.carrot2.attributes/attributes-binder = 1.3.3
/org.carrot2.shaded/carrot2-guava = 18.0
/org.carrot2/carrot2-mini = 3.16.2
org.carrot2.morfologik.version = 2.1.5
/org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version}
/org.carrot2/morfologik-polish = ${org.carrot2.morfologik.version}
/org.carrot2/morfologik-stemming = ${org.carrot2.morfologik.version}
/org.ccil.cowan.tagsoup/tagsoup = 1.2.1
org.codehaus.janino.version = 3.0.9
/org.codehaus.janino/commons-compiler = ${org.codehaus.janino.version}
/org.codehaus.janino/janino = ${org.codehaus.janino.version}
/org.codehaus.woodstox/stax2-api = 3.1.4
/org.codehaus.woodstox/woodstox-core-asl = 4.4.1
org.eclipse.jetty.version = 9.4.27.v20200227
/org.eclipse.jetty.http2/http2-client = ${org.eclipse.jetty.version}
/org.eclipse.jetty.http2/http2-common = ${org.eclipse.jetty.version}
/org.eclipse.jetty.http2/http2-hpack = ${org.eclipse.jetty.version}
/org.eclipse.jetty.http2/http2-http-client-transport = ${org.eclipse.jetty.version}
/org.eclipse.jetty.http2/http2-server = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-alpn-client = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-alpn-java-client = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-alpn-java-server = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-alpn-server = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-client = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-continuation = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-deploy = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-http = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-io = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-jmx = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-rewrite = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-security = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-server = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-servlet = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-servlets = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-start = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-util = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-webapp = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-xml = ${org.eclipse.jetty.version}
org.gagravarr.vorbis.java.version = 0.8
/org.gagravarr/vorbis-java-core = ${org.gagravarr.vorbis.java.version}
/org.gagravarr/vorbis-java-tika = ${org.gagravarr.vorbis.java.version}
/org.hamcrest/hamcrest = 2.2
/org.jdom/jdom2 = 2.0.6
/org.jsoup/jsoup = 1.12.1
/org.locationtech.jts/jts-core = 1.15.0
/org.locationtech.spatial4j/spatial4j = 0.7
/org.mockito/mockito-core = 2.23.4
/org.objenesis/objenesis = 2.6
org.ow2.asm.version = 7.2
/org.ow2.asm/asm = ${org.ow2.asm.version}
/org.ow2.asm/asm-commons = ${org.ow2.asm.version}
org.restlet.jee.version = 2.4.3
/org.restlet.jee/org.restlet = ${org.restlet.jee.version}
/org.restlet.jee/org.restlet.ext.servlet = ${org.restlet.jee.version}
/org.rrd4j/rrd4j = 3.5
org.slf4j.version = 1.7.24
/org.slf4j/jcl-over-slf4j = ${org.slf4j.version}
/org.slf4j/jul-to-slf4j = ${org.slf4j.version}
/org.slf4j/slf4j-api = ${org.slf4j.version}
/org.slf4j/slf4j-simple = ${org.slf4j.version}
/org.tallison/jmatio = 1.5
/org.tukaani/xz = 1.8
# required for instantiating a Zookeeper server in tests or embedded
org.xerial.snappy.version = 1.1.7.6
/org.xerial.snappy/snappy-java = ${org.xerial.snappy.version}
ua.net.nlp.morfologik-ukrainian-search.version = 4.9.1
/ua.net.nlp/morfologik-ukrainian-search = ${ua.net.nlp.morfologik-ukrainian-search.version}
/xerces/xercesImpl = 2.12.0

View File

@ -53,8 +53,6 @@ include "solr:solrj"
include "solr:core"
include "solr:server"
include "solr:contrib:analysis-extras"
include "solr:contrib:dataimporthandler"
include "solr:contrib:dataimporthandler-extras"
include "solr:contrib:analytics"
include "solr:contrib:clustering"
include "solr:contrib:extraction"

5
solr/.gitignore vendored
View File

@ -2,8 +2,6 @@
/bin/*.pid
/contrib/dataimporthandler/test-lib/
/core/test-lib/
/example/start.jar
@ -15,9 +13,6 @@
/example/solr/zoo_data
/example/work/*
/example/exampledocs/post.jar
/example/example-DIH/**/data
/example/example-DIH/**/dataimport.properties
/example/example-DIH/solr/mail/lib/*.jar
/package

View File

@ -118,6 +118,9 @@ Other Changes
* LUCENE-9433: Remove Ant support from trunk (Erick Erickson, Uwe Schindler et.al.)
* SOLR-14783: Remove Data Import Handler (DIH), previously deprecated (Alexandre Rafalovitch)
Bug Fixes
---------------------
* SOLR-14546: Fix for a relatively hard to hit issue in OverseerTaskProcessor that could lead to out of order execution

View File

@ -90,15 +90,14 @@ Solr includes a few examples to help you get started. To run a specific example,
bin/solr -e <EXAMPLE> where <EXAMPLE> is one of:
cloud : SolrCloud example
dih : Data Import Handler (rdbms, mail, atom, tika)
schemaless : Schema-less example (schema is inferred from data during indexing)
techproducts : Kitchen sink example providing comprehensive examples of Solr features
```
For instance, if you want to run the Solr Data Import Handler example, do:
For instance, if you want to run the SolrCloud example, do:
```
bin/solr -e dih
bin/solr -e cloud
```
Indexing Documents
@ -142,8 +141,7 @@ server/
example/
Contains example documents and an alternative Solr home
directory containing examples of how to use the Data Import Handler,
see example/example-DIH/README.md for more information.
directory containing various examples.
dist/solr-<component>-XX.jar
The Apache Solr libraries. To compile Apache Solr Plugins,

View File

@ -386,7 +386,6 @@ function print_usage() {
echo " -e <example> Name of the example to run; available examples:"
echo " cloud: SolrCloud example"
echo " techproducts: Comprehensive example illustrating many of Solr's core capabilities"
echo " dih: Data Import Handler"
echo " schemaless: Schema-less example"
echo ""
echo " -a Additional parameters to pass to the JVM when starting Solr, such as to setup"

View File

@ -360,7 +360,6 @@ goto done
@echo -e example Name of the example to run; available examples:
@echo cloud: SolrCloud example
@echo techproducts: Comprehensive example illustrating many of Solr's core capabilities
@echo dih: Data Import Handler
@echo schemaless: Schema-less example
@echo.
@echo -a opts Additional parameters to pass to the JVM when starting Solr, such as to setup

547
solr/common-build.xml Normal file
View File

@ -0,0 +1,547 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="common-solr" default="default" xmlns:rsel="antlib:org.apache.tools.ant.types.resources.selectors">
<description>
This file is designed for importing into a main build file, and not intended
for standalone use.
</description>
<dirname file="${ant.file.common-solr}" property="common-solr.dir"/>
<property name="Name" value="Solr" />
<!-- solr uses Java 11 -->
<property name="javac.release" value="11"/>
<property name="javac.args" value="-Xlint:-deprecation"/>
<property name="javac.profile.args" value=""/>
<property name="dest" location="${common-solr.dir}/build" />
<property name="build.dir" location="${dest}/${ant.project.name}"/>
<property name="jacoco.report.dir" location="${dest}/jacoco"/>
<property name="dist" location="${common-solr.dir}/dist"/>
<property name="package.dir" location="${common-solr.dir}/package"/>
<property name="maven.dist.dir" location="${package.dir}/maven"/>
<property name="lucene-libs" location="${dest}/lucene-libs" />
<property name="tests.userdir" location="src/test-files"/>
<property name="tests.policy" location="${common-solr.dir}/server/etc/security.policy"/>
<property name="server.dir" location="${common-solr.dir}/server" />
<property name="example" location="${common-solr.dir}/example" />
<property name="javadoc.dir" location="${dest}/docs"/>
<property name="javadoc-online.dir" location="${dest}/docs-online"/>
<property name="tests.cleanthreads.sysprop" value="perClass"/>
<property name="changes.target.dir" location="${dest}/docs/changes"/>
<property name="license.dir" location="${common-solr.dir}/licenses"/>
<property name="solr.tgz.unpack.dir" location="${common-solr.dir}/build/solr.tgz.unpacked"/>
<property name="dist.jar.dir.prefix" value="${solr.tgz.unpack.dir}/solr"/>
<property name="dist.jar.dir.suffix" value="dist"/>
<import file="${common-solr.dir}/../lucene/module-build.xml"/>
<property name="solr.tgz.file" location="${common-solr.dir}/package/solr-${version}.tgz"/>
<available file="${solr.tgz.file}" property="solr.tgz.exists"/>
<available type="dir" file="${solr.tgz.unpack.dir}" property="solr.tgz.unpack.dir.exists"/>
<target name="-ensure-solr-tgz-exists" unless="solr.tgz.exists">
<ant dir="${common-solr.dir}" target="create-package" inheritall="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
</target>
<target name="-unpack-solr-tgz" unless="${solr.tgz.unpack.dir.exists}">
<antcall target="-ensure-solr-tgz-exists">
<propertyset refid="uptodate.and.compiled.properties"/>
</antcall>
<mkdir dir="${solr.tgz.unpack.dir}"/>
<untar compression="gzip" src="${solr.tgz.file}" dest="${solr.tgz.unpack.dir}">
<patternset refid="patternset.lucene.solr.jars"/>
</untar>
</target>
<!-- backwards compatibility with existing targets/tasks; TODO: remove this! -->
<property name="fullnamever" value="${final.name}"/>
<path id="additional.dependencies">
<fileset dir="${common-solr.dir}/core/lib" excludes="${common.classpath.excludes}"/>
<fileset dir="${common-solr.dir}/solrj/lib" excludes="${common.classpath.excludes}"/>
<fileset dir="${common-solr.dir}/server/lib" excludes="${common.classpath.excludes}"/>
<fileset dir="lib" excludes="${common.classpath.excludes}" erroronmissingdir="false"/>
</path>
<path id="solr.lucene.libs">
<!-- List of jars that will be used as the foundation for both
the base classpath, as well as copied into the lucene-libs dir
in the release.
-->
<!-- NOTE: lucene-core is explicitly not included because of the
base.classpath (compilation & tests are done directly against
the class files w/o needing to build the jar)
-->
<pathelement location="${analyzers-common.jar}"/>
<pathelement location="${analyzers-kuromoji.jar}"/>
<pathelement location="${analyzers-nori.jar}"/>
<pathelement location="${analyzers-phonetic.jar}"/>
<pathelement location="${codecs.jar}"/>
<pathelement location="${backward-codecs.jar}"/>
<pathelement location="${highlighter.jar}"/>
<pathelement location="${memory.jar}"/>
<pathelement location="${misc.jar}"/>
<pathelement location="${spatial-extras.jar}"/>
<pathelement location="${spatial3d.jar}"/>
<pathelement location="${expressions.jar}"/>
<pathelement location="${suggest.jar}"/>
<pathelement location="${grouping.jar}"/>
<pathelement location="${queries.jar}"/>
<pathelement location="${queryparser.jar}"/>
<pathelement location="${join.jar}"/>
<pathelement location="${sandbox.jar}"/>
<pathelement location="${classification.jar}"/>
</path>
<path id="solr.base.classpath">
<pathelement location="${common-solr.dir}/build/solr-solrj/classes/java"/>
<pathelement location="${common-solr.dir}/build/solr-core/classes/java"/>
<path refid="solr.lucene.libs" />
<path refid="additional.dependencies"/>
<path refid="base.classpath"/>
</path>
<path id="classpath" refid="solr.base.classpath"/>
<path id="solr.test.base.classpath">
<pathelement path="${common-solr.dir}/build/solr-test-framework/classes/java"/>
<fileset dir="${common-solr.dir}/test-framework/lib">
<include name="*.jar"/>
<exclude name="junit-*.jar" />
<exclude name="randomizedtesting-runner-*.jar" />
<exclude name="ant*.jar" />
</fileset>
<pathelement path="src/test-files"/>
<path refid="test.base.classpath"/>
</path>
<path id="test.classpath" refid="solr.test.base.classpath"/>
<macrodef name="solr-contrib-uptodate">
<attribute name="name"/>
<attribute name="property" default="@{name}.uptodate"/>
<attribute name="classpath.property" default="@{name}.jar"/>
<!-- set jarfile only, if the target jar file has no generic name -->
<attribute name="jarfile" default="${common-solr.dir}/build/contrib/solr-@{name}/solr-@{name}-${version}.jar"/>
<sequential>
<!--<echo message="Checking '@{jarfile}' against source folder '${common.dir}/contrib/@{name}/src/java'"/>-->
<property name="@{classpath.property}" location="@{jarfile}"/>
<uptodate property="@{property}" targetfile="@{jarfile}">
<srcfiles dir="${common-solr.dir}/contrib/@{name}/src/java" includes="**/*.java"/>
</uptodate>
</sequential>
</macrodef>
<target name="validate" depends="compile-tools">
</target>
<target name="init-dist" depends="resolve-groovy">
<mkdir dir="${build.dir}"/>
<mkdir dir="${package.dir}"/>
<mkdir dir="${dist}"/>
<mkdir dir="${maven.dist.dir}"/>
</target>
<target name="prep-lucene-jars"
depends="resolve-groovy,
jar-lucene-core, jar-backward-codecs, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-analyzers-nori, jar-codecs,jar-expressions, jar-suggest, jar-highlighter, jar-memory,
jar-misc, jar-spatial-extras, jar-spatial3d, jar-grouping, jar-queries, jar-queryparser, jar-join, jar-sandbox, jar-classification">
<property name="solr.deps.compiled" value="true"/>
</target>
<target name="lucene-jars-to-solr"
depends="-lucene-jars-to-solr-not-for-package,-lucene-jars-to-solr-package"/>
<target name="-lucene-jars-to-solr-not-for-package" unless="called.from.create-package">
<sequential>
<antcall target="prep-lucene-jars" inheritall="true"/>
<property name="solr.deps.compiled" value="true"/>
<copy todir="${lucene-libs}" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
<path refid="solr.lucene.libs" />
<!-- NOTE: lucene-core is not already included in "solr.lucene.libs" because of its use in classpaths. -->
<fileset file="${lucene-core.jar}" />
</copy>
</sequential>
</target>
<target name="-lucene-jars-to-solr-package" if="called.from.create-package">
<sequential>
<antcall target="-unpack-lucene-tgz" inheritall="true"/>
<pathconvert property="relative.solr.lucene.libs" pathsep=",">
<path refid="solr.lucene.libs"/>
<fileset file="${lucene-core.jar}"/>
<globmapper from="${common.build.dir}/*" to="*" handledirsep="true"/>
</pathconvert>
<mkdir dir="${lucene-libs}"/>
<copy todir="${lucene-libs}" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
<fileset dir="${lucene.tgz.unpack.dir}/lucene-${version}" includes="${relative.solr.lucene.libs}"/>
</copy>
</sequential>
</target>
<!-- Shared core/solrj/test-framework/contrib targets -->
<macrodef name="solr-jarify" description="Builds a Solr JAR file">
<attribute name="basedir" default="${build.dir}/classes/java"/>
<attribute name="destfile" default="${build.dir}/${final.name}.jar"/>
<attribute name="title" default="Apache Solr Search Server: ${ant.project.name}"/>
<attribute name="excludes" default="**/pom.xml,**/*.iml"/>
<attribute name="metainf.source.dir" default="${common-solr.dir}"/>
<attribute name="implementation.title" default="org.apache.solr"/>
<attribute name="manifest.file" default="${manifest.file}"/>
<element name="solr-jarify-filesets" optional="true"/>
<element name="solr-jarify-additional-manifest-attributes" optional="true"/>
<sequential>
<jarify basedir="@{basedir}" destfile="@{destfile}"
title="@{title}" excludes="@{excludes}"
metainf.source.dir="@{metainf.source.dir}"
implementation.title="@{implementation.title}"
manifest.file="@{manifest.file}">
<filesets>
<solr-jarify-filesets />
</filesets>
<jarify-additional-manifest-attributes>
<solr-jarify-additional-manifest-attributes />
</jarify-additional-manifest-attributes>
</jarify>
</sequential>
</macrodef>
<target name="jar-core" depends="compile-core">
<solr-jarify/>
</target>
<target name="compile-core" depends="prep-lucene-jars,resolve-example,resolve-server,common.compile-core"/>
<target name="compile-test" depends="compile-solr-test-framework,common.compile-test"/>
<target name="dist" depends="jar-core">
<copy file="${build.dir}/${fullnamever}.jar" todir="${dist}"/>
</target>
<property name="lucenedocs" location="${common.dir}/build/docs"/>
<!-- dependency to ensure all lucene javadocs are present -->
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-nori,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-backward-codecs,javadocs-codecs,javadocs-expressions,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial-extras,javadocs-join,javadocs-test-framework"/>
<!-- create javadocs for the current module -->
<target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs,javadocs-solr-core,check-javadocs-uptodate" unless="javadocs-uptodate-${name}">
<sequential>
<mkdir dir="${javadoc.dir}/${name}"/>
<solr-invoke-javadoc>
<solrsources>
<packageset dir="${src.dir}"/>
</solrsources>
<links>
<link href="../solr-solrj"/>
<link href="../solr-core"/>
</links>
</solr-invoke-javadoc>
<solr-jarify basedir="${javadoc.dir}/${name}" destfile="${build.dir}/${final.name}-javadoc.jar"/>
</sequential>
</target>
<target name="check-solr-core-javadocs-uptodate" unless="solr-core-javadocs.uptodate">
<uptodate property="solr-core-javadocs.uptodate" targetfile="${build.dir}/solr-core/solr-core-${version}-javadoc.jar">
<srcfiles dir="${common-solr.dir}/core/src/java" includes="**/*.java"/>
</uptodate>
</target>
<target name="check-solrj-javadocs-uptodate" unless="solrj-javadocs.uptodate">
<uptodate property="solrj-javadocs.uptodate" targetfile="${build.dir}/solr-solrj/solr-solrj-${version}-javadoc.jar">
<srcfiles dir="${common-solr.dir}/solrj/src/java" includes="**/*.java"/>
</uptodate>
</target>
<target name="javadocs-solr-core" depends="check-solr-core-javadocs-uptodate" unless="solr-core-javadocs.uptodate">
<ant dir="${common-solr.dir}/core" target="javadocs" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="solr-core-javadocs.uptodate" value="true"/>
</target>
<target name="javadocs-solrj" depends="check-solrj-javadocs-uptodate" unless="solrj-javadocs.uptodate">
<ant dir="${common-solr.dir}/solrj" target="javadocs" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="solrj-javadocs.uptodate" value="true"/>
</target>
<!-- macro to create solr javadocs with links to lucene. make sure calling task depends on lucene-javadocs -->
<macrodef name="solr-invoke-javadoc">
<element name="solrsources" optional="yes"/>
<element name="links" optional="yes"/>
<attribute name="destdir" default="${javadoc.dir}/${name}"/>
<attribute name="title" default="${Name} ${version} ${name} API"/>
<attribute name="overview" default="${src.dir}/overview.html"/>
<sequential>
<mkdir dir="@{destdir}"/>
<invoke-javadoc destdir="@{destdir}" title="@{title}" overview="@{overview}">
<sources>
<solrsources/>
<link offline="true" href="${lucene.javadoc.url}core" packagelistloc="${lucenedocs}/core"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-common" packagelistloc="${lucenedocs}/analyzers-common"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-icu" packagelistloc="${lucenedocs}/analyzers-icu"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-kuromoji" packagelistloc="${lucenedocs}/analyzers-kuromoji"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-nori" packagelistloc="${lucenedocs}/analyzers-nori"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-morfologik" packagelistloc="${lucenedocs}/analyzers-morfologik"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-phonetic" packagelistloc="${lucenedocs}/analyzers-phonetic"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-smartcn" packagelistloc="${lucenedocs}/analyzers-smartcn"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-stempel" packagelistloc="${lucenedocs}/analyzers-stempel"/>
<link offline="true" href="${lucene.javadoc.url}backward-codecs" packagelistloc="${lucenedocs}/backward-codecs"/>
<link offline="true" href="${lucene.javadoc.url}codecs" packagelistloc="${lucenedocs}/codecs"/>
<link offline="true" href="${lucene.javadoc.url}expressions" packagelistloc="${lucenedocs}/expressions"/>
<link offline="true" href="${lucene.javadoc.url}suggest" packagelistloc="${lucenedocs}/suggest"/>
<link offline="true" href="${lucene.javadoc.url}grouping" packagelistloc="${lucenedocs}/grouping"/>
<link offline="true" href="${lucene.javadoc.url}join" packagelistloc="${lucenedocs}/join"/>
<link offline="true" href="${lucene.javadoc.url}queries" packagelistloc="${lucenedocs}/queries"/>
<link offline="true" href="${lucene.javadoc.url}queryparser" packagelistloc="${lucenedocs}/queryparser"/>
<link offline="true" href="${lucene.javadoc.url}highlighter" packagelistloc="${lucenedocs}/highlighter"/>
<link offline="true" href="${lucene.javadoc.url}memory" packagelistloc="${lucenedocs}/memory"/>
<link offline="true" href="${lucene.javadoc.url}misc" packagelistloc="${lucenedocs}/misc"/>
<link offline="true" href="${lucene.javadoc.url}classification" packagelistloc="${lucenedocs}/classification"/>
<link offline="true" href="${lucene.javadoc.url}spatial-extras" packagelistloc="${lucenedocs}/spatial-extras"/>
<links/>
<link href=""/>
</sources>
</invoke-javadoc>
</sequential>
</macrodef>
<target name="define-lucene-javadoc-url" depends="resolve-groovy" unless="lucene.javadoc.url">
<property name="useLocalJavadocUrl" value=""/>
<groovy><![CDATA[
String url, version = properties['version'];
String useLocalJavadocUrl = properties['useLocalJavadocUrl'];
if (version != properties['version.base'] || Boolean.parseBoolean(useLocalJavadocUrl)) {
url = new File(properties['common.dir'], 'build' + File.separator + 'docs').toURI().toASCIIString();
if (!(url =~ /\/$/)) url += '/';
} else {
version = version.replace('.', '_');
url = 'https://lucene.apache.org/core/' + version + '/';
}
task.log('Using the following URL to refer to Lucene Javadocs: ' + url);
properties['lucene.javadoc.url'] = url;
]]></groovy>
</target>
<target name="define-solr-javadoc-url" depends="resolve-groovy" unless="solr.javadoc.url">
<groovy><![CDATA[
String url, version = properties['version'];
if (version != properties['version.base']) {
url = '';
task.log('Disabled Solr Javadocs online URL for packaging (custom build / SNAPSHOT version).');
} else {
version = version.replace('.', '_');
url = 'https://lucene.apache.org/solr/' + version + '/';
task.log('Using the following URL to refer to Solr Javadocs: ' + url);
}
properties['solr.javadoc.url'] = url;
]]></groovy>
</target>
<target name="jar-src">
<sequential>
<mkdir dir="${build.dir}"/>
<solr-jarify basedir="${src.dir}" destfile="${build.dir}/${final.name}-src.jar">
<solr-jarify-filesets>
<fileset dir="${resources.dir}" erroronmissingdir="no"/>
</solr-jarify-filesets>
</solr-jarify>
</sequential>
</target>
<target name="-validate-maven-dependencies" depends="-validate-maven-dependencies.init">
<m2-validate-dependencies pom.xml="${maven.pom.xml}" licenseDirectory="${license.dir}">
<additional-filters>
<replaceregex pattern="jetty([^/]+)$" replace="jetty" flags="gi" />
<replaceregex pattern="slf4j-([^/]+)$" replace="slf4j" flags="gi" />
<replaceregex pattern="(bcmail|bcprov)-([^/]+)$" replace="\1" flags="gi" />
</additional-filters>
<excludes>
<rsel:or>
<rsel:name name="**/lucene-*-${maven.version.glob}.jar" handledirsep="true"/>
<rsel:name name="**/solr-*-${maven.version.glob}.jar" handledirsep="true"/>
<!-- TODO: figure out what is going on here with servlet-apis -->
<rsel:name name="**/*servlet*.jar" handledirsep="true"/>
</rsel:or>
</excludes>
</m2-validate-dependencies>
</target>
<!-- Solr core targets -->
<target name="compile-solr-core" description="Compile Solr core." unless="solr.core.compiled">
<ant dir="${common-solr.dir}/core" target="compile-core" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="solr.core.compiled" value="true"/>
</target>
<target name="compile-test-solr-core" description="Compile solr core tests">
<ant dir="${common-solr.dir}/core" target="compile-test" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="solr.core.compiled" value="true"/>
</target>
<target name="dist-core" depends="init-dist"
description="Creates the Solr JAR Distribution file.">
<ant dir="${common-solr.dir}/core" target="dist" inheritall="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
</target>
<!-- Solrj targets -->
<target name="compile-solrj" description="Compile the java client." unless="solrj.compiled">
<ant dir="${common-solr.dir}/solrj" target="compile-core" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="solrj.compiled" value="true"/>
</target>
<target name="compile-test-solrj" description="Compile java client tests">
<ant dir="${common-solr.dir}/solrj" target="compile-test" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="solrj.compiled" value="true"/>
</target>
<target name="dist-solrj" depends="init-dist"
description="Creates the Solr-J JAR Distribution file.">
<ant dir="${common-solr.dir}/solrj" target="dist" inheritall="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
</target>
<target name="jar-solrj" description="Jar Solr-J">
<ant dir="${common-solr.dir}/solrj" target="jar-core" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
</target>
<!-- Solr test-framework targets -->
<target name="compile-solr-test-framework" description="Compile the Solr test-framework" unless="solr.test.framework.compiled">
<ant dir="${common-solr.dir}/test-framework" target="compile-core" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="solr.core.compiled" value="true"/>
<property name="solr.test.framework.compiled" value="true"/>
</target>
<target name="jar-solr-test-framework" depends="compile-solr-test-framework">
<ant dir="${common-solr.dir}/test-framework" target="jar-core" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
</target>
<!-- resolve dependencies in the example (relied upon by compile/tests) -->
<target name="resolve-example" unless="example.libs.uptodate">
<property name="example.libs.uptodate" value="true"/>
</target>
<!-- resolve dependencies in the server directory (relied upon by compile/tests) -->
<target name="resolve-server" unless="server.libs.uptodate">
<ant dir="${common-solr.dir}/server" target="resolve" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="server.libs.uptodate" value="true"/>
</target>
<macrodef name="contrib-crawl">
<attribute name="target" default=""/>
<attribute name="failonerror" default="true"/>
<sequential>
<subant target="@{target}" failonerror="@{failonerror}" inheritall="false">
<propertyset refid="uptodate.and.compiled.properties"/>
<fileset dir="." includes="contrib/*/build.xml"/>
</subant>
</sequential>
</macrodef>
<target name="-compile-test-lucene-analysis">
<ant dir="${common.dir}/analysis" target="compile-test" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
</target>
<target name="-compile-test-lucene-queryparser">
<ant dir="${common.dir}/queryparser" target="compile-test" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
</target>
<target name="-compile-test-lucene-backward-codecs">
<ant dir="${common.dir}/backward-codecs" target="compile-test" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
</target>
<!-- Solr contrib targets -->
<target name="-compile-analysis-extras">
<ant dir="${common-solr.dir}/contrib/analysis-extras" target="compile" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
</target>
<target name="compile-contrib" description="Compile contrib modules">
<contrib-crawl target="compile-core"/>
</target>
<target name="compile-test-contrib" description="Compile contrib modules' tests">
<contrib-crawl target="compile-test"/>
</target>
<target name="javadocs-contrib" description="Compile contrib modules">
<contrib-crawl target="javadocs"/>
</target>
<target name="jar-contrib" description="Jar contrib modules">
<contrib-crawl target="jar-core"/>
</target>
<target name="contribs-add-to-webapp">
<mkdir dir="${dest}/web"/>
<delete dir="${dest}/web" includes="**/*" failonerror="false"/>
<contrib-crawl target="add-to-webapp"/>
</target>
<!-- Forbidden API Task, customizations for Solr -->
<target name="-check-forbidden-all" depends="-init-forbidden-apis,compile-core,compile-test">
<property prefix="ivyversions" file="${common.dir}/ivy-versions.properties"/><!-- for commons-io version -->
<forbidden-apis suppressAnnotation="**.SuppressForbidden" classpathref="forbidden-apis.allclasses.classpath" targetVersion="${javac.release}">
<signatures>
<bundled name="jdk-unsafe"/>
<bundled name="jdk-deprecated"/>
<bundled name="jdk-non-portable"/>
<bundled name="jdk-reflection"/>
<bundled name="commons-io-unsafe-${ivyversions./commons-io/commons-io}"/>
<fileset dir="${common.dir}/tools/forbiddenApis">
<include name="base.txt" />
<include name="servlet-api.txt" />
<include name="solr.txt" />
</fileset>
</signatures>
<fileset dir="${build.dir}/classes/java" excludes="${forbidden-base-excludes}"/>
<fileset dir="${build.dir}/classes/test" excludes="${forbidden-tests-excludes}" erroronmissingdir="false"/>
</forbidden-apis>
</target>
<!-- hack for now to disable *all* Solr tests on Jenkins when "tests.disable-solr" property is set -->
<target name="test" unless="tests.disable-solr">
<antcall target="common.test" inheritrefs="true" inheritall="true"/>
</target>
</project>

View File

@ -1,33 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
apply plugin: 'java-library'
description = 'Data Import Handler Extras'
dependencies {
implementation project(':solr:core')
implementation project(':solr:contrib:dataimporthandler')
implementation project(':solr:contrib:extraction')
implementation ('javax.activation:activation')
implementation ('com.sun.mail:javax.mail')
implementation ('com.sun.mail:gimap')
testImplementation project(':solr:test-framework')
}

View File

@ -1,901 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import com.sun.mail.imap.IMAPMessage;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
import org.apache.solr.util.RTimer;
import org.apache.tika.Tika;
import org.apache.tika.metadata.Metadata;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.mail.*;
import javax.mail.internet.AddressException;
import javax.mail.internet.ContentType;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import javax.mail.search.*;
import java.io.InputStream;
import java.lang.invoke.MethodHandles;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.function.Supplier;
import com.sun.mail.gimap.GmailFolder;
import com.sun.mail.gimap.GmailRawSearchTerm;
/**
* An EntityProcessor instance which can index emails along with their
* attachments from POP3 or IMAP sources. Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler"
* >http://wiki.apache.org/solr/DataImportHandler</a> for more details. <b>This
* API is experimental and subject to change</b>
*
* @since solr 1.4
*/
public class MailEntityProcessor extends EntityProcessorBase {
private static final SimpleDateFormat sinceDateParser =
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT);
private static final SimpleDateFormat afterFmt =
new SimpleDateFormat("yyyy/MM/dd", Locale.ROOT);
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static interface CustomFilter {
public SearchTerm getCustomSearch(Folder folder);
}
public void init(Context context) {
super.init(context);
// set attributes using XXX getXXXFromContext(attribute, defaultValue);
// applies variable resolver and return default if value is not found or null
// REQUIRED : connection and folder info
user = getStringFromContext("user", null);
password = getStringFromContext("password", null);
host = getStringFromContext("host", null);
protocol = getStringFromContext("protocol", null);
folderNames = getStringFromContext("folders", null);
// validate
if (host == null || protocol == null || user == null || password == null
|| folderNames == null) throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"'user|password|protocol|host|folders' are required attributes");
// OPTIONAL : have defaults and are optional
recurse = getBoolFromContext("recurse", true);
exclude.clear();
String excludes = getStringFromContext("exclude", "");
if (excludes != null && !excludes.trim().equals("")) {
exclude = Arrays.asList(excludes.split(","));
}
include.clear();
String includes = getStringFromContext("include", "");
if (includes != null && !includes.trim().equals("")) {
include = Arrays.asList(includes.split(","));
}
batchSize = getIntFromContext("batchSize", 20);
customFilter = getStringFromContext("customFilter", "");
if (filters != null) filters.clear();
folderIter = null;
msgIter = null;
String lastIndexTime = null;
String command =
String.valueOf(context.getRequestParameters().get("command"));
if (!DataImporter.FULL_IMPORT_CMD.equals(command))
throw new IllegalArgumentException(this.getClass().getSimpleName()+
" only supports "+DataImporter.FULL_IMPORT_CMD);
// Read the last_index_time out of the dataimport.properties if available
String cname = getStringFromContext("name", "mailimporter");
String varName = ConfigNameConstants.IMPORTER_NS_SHORT + "." + cname + "."
+ DocBuilder.LAST_INDEX_TIME;
Object varValue = context.getVariableResolver().resolve(varName);
log.info("{}={}", varName, varValue);
if (varValue != null && !"".equals(varValue) &&
!"".equals(getStringFromContext("fetchMailsSince", ""))) {
// need to check if varValue is the epoch, which we'll take to mean the
// initial value, in which case means we should use fetchMailsSince instead
Date tmp = null;
try {
tmp = sinceDateParser.parse((String)varValue);
if (tmp.getTime() == 0) {
log.info("Ignoring initial value {} for {} in favor of fetchMailsSince config parameter"
, varValue, varName);
tmp = null; // don't use this value
}
} catch (ParseException e) {
// probably ok to ignore this since we have other options below
// as we're just trying to figure out if the date is 0
log.warn("Failed to parse {} from {} due to", varValue, varName, e);
}
if (tmp == null) {
// favor fetchMailsSince in this case because the value from
// dataimport.properties is the default/init value
varValue = getStringFromContext("fetchMailsSince", "");
log.info("fetchMailsSince={}", varValue);
}
}
if (varValue == null || "".equals(varValue)) {
varName = ConfigNameConstants.IMPORTER_NS_SHORT + "."
+ DocBuilder.LAST_INDEX_TIME;
varValue = context.getVariableResolver().resolve(varName);
log.info("{}={}", varName, varValue);
}
if (varValue != null && varValue instanceof String) {
lastIndexTime = (String)varValue;
if (lastIndexTime != null && lastIndexTime.length() == 0)
lastIndexTime = null;
}
if (lastIndexTime == null)
lastIndexTime = getStringFromContext("fetchMailsSince", "");
log.info("Using lastIndexTime {} for mail import", lastIndexTime);
this.fetchMailsSince = null;
if (lastIndexTime != null && lastIndexTime.length() > 0) {
try {
fetchMailsSince = sinceDateParser.parse(lastIndexTime);
log.info("Parsed fetchMailsSince={}", lastIndexTime);
} catch (ParseException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Invalid value for fetchMailSince: " + lastIndexTime, e);
}
}
fetchSize = getIntFromContext("fetchSize", 32 * 1024);
cTimeout = getIntFromContext("connectTimeout", 30 * 1000);
rTimeout = getIntFromContext("readTimeout", 60 * 1000);
String tmp = context.getEntityAttribute("includeOtherUserFolders");
includeOtherUserFolders = (tmp != null && Boolean.valueOf(tmp.trim()));
tmp = context.getEntityAttribute("includeSharedFolders");
includeSharedFolders = (tmp != null && Boolean.valueOf(tmp.trim()));
setProcessAttachmentConfig();
includeContent = getBoolFromContext("includeContent", true);
logConfig();
}
private void setProcessAttachmentConfig() {
processAttachment = true;
String tbval = context.getEntityAttribute("processAttachments");
if (tbval == null) {
tbval = context.getEntityAttribute("processAttachement");
if (tbval != null) processAttachment = Boolean.valueOf(tbval);
} else processAttachment = Boolean.valueOf(tbval);
}
@Override
public Map<String,Object> nextRow() {
Message mail = null;
Map<String,Object> row = null;
do {
// try till there is a valid document or folders get exhausted.
// when mail == NULL, it means end of processing
mail = getNextMail();
if (mail != null)
row = getDocumentFromMail(mail);
if (row != null && row.get("folder") == null)
row.put("folder", mail.getFolder().getFullName());
} while (row == null && mail != null);
return row;
}
private Message getNextMail() {
if (!connected) {
// this is needed to load the activation mail stuff correctly
// otherwise, the JavaMail multipart support doesn't get configured
// correctly, which leads to a class cast exception when processing
// multipart messages: IMAPInputStream cannot be cast to
// javax.mail.Multipart
if (false == withContextClassLoader(getClass().getClassLoader(), this::connectToMailBox)) {
return null;
}
connected = true;
}
if (folderIter == null) {
createFilters();
folderIter = new FolderIterator(mailbox);
}
// get next message from the folder
// if folder is exhausted get next folder
// loop till a valid mail or all folders exhausted.
while (msgIter == null || !msgIter.hasNext()) {
Folder next = folderIter.hasNext() ? folderIter.next() : null;
if (next == null) return null;
msgIter = new MessageIterator(next, batchSize);
}
return msgIter.next();
}
private Map<String,Object> getDocumentFromMail(Message mail) {
Map<String,Object> row = new HashMap<>();
try {
addPartToDocument(mail, row, true);
return row;
} catch (Exception e) {
log.error("Failed to convert message [{}] to document due to: {}"
, mail, e, e);
return null;
}
}
@SuppressWarnings({"unchecked"})
public void addPartToDocument(Part part, Map<String,Object> row, boolean outerMost) throws Exception {
if (part instanceof Message) {
addEnvelopeToDocument(part, row);
}
String ct = part.getContentType().toLowerCase(Locale.ROOT);
ContentType ctype = new ContentType(ct);
if (part.isMimeType("multipart/*")) {
Object content = part.getContent();
if (content != null && content instanceof Multipart) {
Multipart mp = (Multipart) part.getContent();
int count = mp.getCount();
if (part.isMimeType("multipart/alternative")) count = 1;
for (int i = 0; i < count; i++)
addPartToDocument(mp.getBodyPart(i), row, false);
} else {
log.warn("Multipart content is a not an instance of Multipart! Content is: {}"
+ ". Typically, this is due to the Java Activation JAR being loaded by the wrong classloader."
, (content != null ? content.getClass().getName() : "null"));
}
} else if (part.isMimeType("message/rfc822")) {
addPartToDocument((Part) part.getContent(), row, false);
} else {
String disp = part.getDisposition();
if (includeContent
&& !(disp != null && disp.equalsIgnoreCase(Part.ATTACHMENT))) {
InputStream is = part.getInputStream();
Metadata contentTypeHint = new Metadata();
contentTypeHint.set(Metadata.CONTENT_TYPE, ctype.getBaseType()
.toLowerCase(Locale.ENGLISH));
String content = (new Tika()).parseToString(is, contentTypeHint);
if (row.get(CONTENT) == null) row.put(CONTENT, new ArrayList<String>());
List<String> contents = (List<String>) row.get(CONTENT);
contents.add(content.trim());
row.put(CONTENT, contents);
}
if (!processAttachment || disp == null
|| !disp.equalsIgnoreCase(Part.ATTACHMENT)) return;
InputStream is = part.getInputStream();
String fileName = part.getFileName();
Metadata contentTypeHint = new Metadata();
contentTypeHint.set(Metadata.CONTENT_TYPE, ctype.getBaseType()
.toLowerCase(Locale.ENGLISH));
String content = (new Tika()).parseToString(is, contentTypeHint);
if (content == null || content.trim().length() == 0) return;
if (row.get(ATTACHMENT) == null) row.put(ATTACHMENT,
new ArrayList<String>());
List<String> contents = (List<String>) row.get(ATTACHMENT);
contents.add(content.trim());
row.put(ATTACHMENT, contents);
if (row.get(ATTACHMENT_NAMES) == null) row.put(ATTACHMENT_NAMES,
new ArrayList<String>());
List<String> names = (List<String>) row.get(ATTACHMENT_NAMES);
names.add(fileName);
row.put(ATTACHMENT_NAMES, names);
}
}
private void addEnvelopeToDocument(Part part, Map<String,Object> row)
throws MessagingException {
MimeMessage mail = (MimeMessage) part;
Address[] adresses;
if ((adresses = mail.getFrom()) != null && adresses.length > 0) row.put(
FROM, adresses[0].toString());
List<String> to = new ArrayList<>();
if ((adresses = mail.getRecipients(Message.RecipientType.TO)) != null) addAddressToList(
adresses, to);
if ((adresses = mail.getRecipients(Message.RecipientType.CC)) != null) addAddressToList(
adresses, to);
if ((adresses = mail.getRecipients(Message.RecipientType.BCC)) != null) addAddressToList(
adresses, to);
if (to.size() > 0) row.put(TO_CC_BCC, to);
row.put(MESSAGE_ID, mail.getMessageID());
row.put(SUBJECT, mail.getSubject());
Date d = mail.getSentDate();
if (d != null) {
row.put(SENT_DATE, d);
}
List<String> flags = new ArrayList<>();
for (Flags.Flag flag : mail.getFlags().getSystemFlags()) {
if (flag == Flags.Flag.ANSWERED) flags.add(FLAG_ANSWERED);
else if (flag == Flags.Flag.DELETED) flags.add(FLAG_DELETED);
else if (flag == Flags.Flag.DRAFT) flags.add(FLAG_DRAFT);
else if (flag == Flags.Flag.FLAGGED) flags.add(FLAG_FLAGGED);
else if (flag == Flags.Flag.RECENT) flags.add(FLAG_RECENT);
else if (flag == Flags.Flag.SEEN) flags.add(FLAG_SEEN);
}
flags.addAll(Arrays.asList(mail.getFlags().getUserFlags()));
if (flags.size() == 0) flags.add(FLAG_NONE);
row.put(FLAGS, flags);
String[] hdrs = mail.getHeader("X-Mailer");
if (hdrs != null) row.put(XMAILER, hdrs[0]);
}
private void addAddressToList(Address[] adresses, List<String> to)
throws AddressException {
for (Address address : adresses) {
to.add(address.toString());
InternetAddress ia = (InternetAddress) address;
if (ia.isGroup()) {
InternetAddress[] group = ia.getGroup(false);
for (InternetAddress member : group)
to.add(member.toString());
}
}
}
private boolean connectToMailBox() {
try {
Properties props = new Properties();
if (System.getProperty("mail.debug") != null)
props.setProperty("mail.debug", System.getProperty("mail.debug"));
if (("imap".equals(protocol) || "imaps".equals(protocol))
&& "imap.gmail.com".equals(host)) {
log.info("Consider using 'gimaps' protocol instead of '{}' for enabling GMail specific extensions for {}"
, protocol, host);
}
props.setProperty("mail.store.protocol", protocol);
String imapPropPrefix = protocol.startsWith("gimap") ? "gimap" : "imap";
props.setProperty("mail." + imapPropPrefix + ".fetchsize", "" + fetchSize);
props.setProperty("mail." + imapPropPrefix + ".timeout", "" + rTimeout);
props.setProperty("mail." + imapPropPrefix + ".connectiontimeout", "" + cTimeout);
int port = -1;
int colonAt = host.indexOf(":");
if (colonAt != -1) {
port = Integer.parseInt(host.substring(colonAt + 1));
host = host.substring(0, colonAt);
}
Session session = Session.getDefaultInstance(props, null);
mailbox = session.getStore(protocol);
if (port != -1) {
mailbox.connect(host, port, user, password);
} else {
mailbox.connect(host, user, password);
}
log.info("Connected to {}'s mailbox on {}", user, host);
return true;
} catch (MessagingException e) {
String errMsg = String.format(Locale.ENGLISH,
"Failed to connect to %s server %s as user %s due to: %s", protocol,
host, user, e.toString());
log.error(errMsg, e);
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
errMsg, e);
}
}
private void createFilters() {
if (fetchMailsSince != null) {
filters.add(new MailsSinceLastCheckFilter(fetchMailsSince));
}
if (customFilter != null && !customFilter.equals("")) {
try {
Class<?> cf = Class.forName(customFilter);
Object obj = cf.getConstructor().newInstance();
if (obj instanceof CustomFilter) {
filters.add((CustomFilter) obj);
}
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Custom filter could not be created", e);
}
}
}
private void logConfig() {
if (!log.isInfoEnabled()) return;
String lineSep = System.getProperty("line.separator");
StringBuffer config = new StringBuffer();
config.append("user : ").append(user).append(lineSep);
config
.append("pwd : ")
.append(
password != null && password.length() > 0 ? "<non-null>" : "<null>")
.append(lineSep);
config.append("protocol : ").append(protocol)
.append(lineSep);
config.append("host : ").append(host)
.append(lineSep);
config.append("folders : ").append(folderNames)
.append(lineSep);
config.append("recurse : ").append(recurse)
.append(lineSep);
config.append("exclude : ").append(exclude.toString())
.append(lineSep);
config.append("include : ").append(include.toString())
.append(lineSep);
config.append("batchSize : ").append(batchSize)
.append(lineSep);
config.append("fetchSize : ").append(fetchSize)
.append(lineSep);
config.append("read timeout : ").append(rTimeout)
.append(lineSep);
config.append("conection timeout : ").append(cTimeout)
.append(lineSep);
config.append("custom filter : ").append(customFilter)
.append(lineSep);
config.append("fetch mail since : ").append(fetchMailsSince)
.append(lineSep);
config.append("includeContent : ").append(includeContent)
.append(lineSep);
config.append("processAttachments : ").append(processAttachment)
.append(lineSep);
config.append("includeOtherUserFolders : ").append(includeOtherUserFolders)
.append(lineSep);
config.append("includeSharedFolders : ").append(includeSharedFolders)
.append(lineSep);
log.info("{}", config);
}
class FolderIterator implements Iterator<Folder> {
private Store mailbox;
private List<String> topLevelFolders;
private List<Folder> folders = null;
private Folder lastFolder = null;
public FolderIterator(Store mailBox) {
this.mailbox = mailBox;
folders = new ArrayList<>();
getTopLevelFolders(mailBox);
if (includeOtherUserFolders) getOtherUserFolders();
if (includeSharedFolders) getSharedFolders();
}
public boolean hasNext() {
return !folders.isEmpty();
}
public Folder next() {
try {
boolean hasMessages = false;
Folder next;
do {
if (lastFolder != null) {
lastFolder.close(false);
lastFolder = null;
}
if (folders.isEmpty()) {
mailbox.close();
return null;
}
next = folders.remove(0);
if (next != null) {
String fullName = next.getFullName();
if (!excludeFolder(fullName)) {
hasMessages = (next.getType() & Folder.HOLDS_MESSAGES) != 0;
next.open(Folder.READ_ONLY);
lastFolder = next;
log.info("Opened folder : {}", fullName);
}
if (recurse && ((next.getType() & Folder.HOLDS_FOLDERS) != 0)) {
Folder[] children = next.list();
log.info("Added its children to list : ");
for (int i = children.length - 1; i >= 0; i--) {
folders.add(0, children[i]);
if (log.isInfoEnabled()) {
log.info("child name : {}", children[i].getFullName());
}
}
if (children.length == 0) log.info("NO children : ");
}
}
} while (!hasMessages);
return next;
} catch (Exception e) {
log.warn("Failed to read folders due to: {}", e);
// throw new
// DataImportHandlerException(DataImportHandlerException.SEVERE,
// "Folder open failed", e);
}
return null;
}
public void remove() {
throw new UnsupportedOperationException("It's read only mode...");
}
private void getTopLevelFolders(Store mailBox) {
if (folderNames != null) topLevelFolders = Arrays.asList(folderNames
.split(","));
for (int i = 0; topLevelFolders != null && i < topLevelFolders.size(); i++) {
try {
folders.add(mailbox.getFolder(topLevelFolders.get(i)));
} catch (MessagingException e) {
// skip bad ones unless it's the last one and still no good folder
if (folders.size() == 0 && i == topLevelFolders.size() - 1) throw new DataImportHandlerException(
DataImportHandlerException.SEVERE, "Folder retreival failed");
}
}
if (topLevelFolders == null || topLevelFolders.size() == 0) {
try {
folders.add(mailBox.getDefaultFolder());
} catch (MessagingException e) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE, "Folder retreival failed");
}
}
}
private void getOtherUserFolders() {
try {
Folder[] ufldrs = mailbox.getUserNamespaces(null);
if (ufldrs != null) {
log.info("Found {} user namespace folders", ufldrs.length);
for (Folder ufldr : ufldrs)
folders.add(ufldr);
}
} catch (MessagingException me) {
log.warn("Messaging exception retrieving user namespaces: ", me);
}
}
private void getSharedFolders() {
try {
Folder[] sfldrs = mailbox.getSharedNamespaces();
if (sfldrs != null) {
log.info("Found {} shared namespace folders", sfldrs.length);
for (Folder sfldr : sfldrs)
folders.add(sfldr);
}
} catch (MessagingException me) {
log.warn("Messaging exception retrieving shared namespaces: ", me);
}
}
private boolean excludeFolder(String name) {
for (String s : exclude) {
if (name.matches(s)) return true;
}
for (String s : include) {
if (name.matches(s)) return false;
}
return include.size() > 0;
}
}
class MessageIterator extends SearchTerm implements Iterator<Message> {
private Folder folder;
private Message[] messagesInCurBatch = null;
private int current = 0;
private int currentBatch = 0;
private int batchSize = 0;
private int totalInFolder = 0;
private boolean doBatching = true;
public MessageIterator(Folder folder, int batchSize) {
super();
try {
this.folder = folder;
this.batchSize = batchSize;
SearchTerm st = getSearchTerm();
log.info("SearchTerm={}", st);
if (st != null || folder instanceof GmailFolder) {
doBatching = false;
// Searching can still take a while even though we're only pulling
// envelopes; unless you're using gmail server-side filter, which is
// fast
if (log.isInfoEnabled()) {
log.info("Searching folder {} for messages", folder.getName());
}
final RTimer searchTimer = new RTimer();
// If using GMail, speed up the envelope processing by doing a
// server-side
// search for messages occurring on or after the fetch date (at
// midnight),
// which reduces the number of envelopes we need to pull from the
// server
// to apply the precise DateTerm filter; GMail server-side search has
// date
// granularity only but the local filters are also applied
if (folder instanceof GmailFolder && fetchMailsSince != null) {
String afterCrit = "after:" + afterFmt.format(fetchMailsSince);
log.info("Added server-side gmail filter: {}", afterCrit);
Message[] afterMessages = folder.search(new GmailRawSearchTerm(
afterCrit));
if (log.isInfoEnabled()) {
log.info("GMail server-side filter found {} messages received {} in folder {}"
, afterMessages.length, afterCrit, folder.getName());
}
// now pass in the server-side filtered messages to the local filter
messagesInCurBatch = folder.search((st != null ? st : this), afterMessages);
} else {
messagesInCurBatch = folder.search(st);
}
totalInFolder = messagesInCurBatch.length;
folder.fetch(messagesInCurBatch, fp);
current = 0;
if (log.isInfoEnabled()) {
log.info("Total messages : {}", totalInFolder);
log.info("Search criteria applied. Batching disabled. Took {} (ms)", searchTimer.getTime()); // logOk
}
} else {
totalInFolder = folder.getMessageCount();
log.info("Total messages : {}", totalInFolder);
getNextBatch(batchSize, folder);
}
} catch (MessagingException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Message retreival failed", e);
}
}
private void getNextBatch(int batchSize, Folder folder)
throws MessagingException {
// after each batch invalidate cache
if (messagesInCurBatch != null) {
for (Message m : messagesInCurBatch) {
if (m instanceof IMAPMessage) ((IMAPMessage) m).invalidateHeaders();
}
}
int lastMsg = (currentBatch + 1) * batchSize;
lastMsg = lastMsg > totalInFolder ? totalInFolder : lastMsg;
messagesInCurBatch = folder.getMessages(currentBatch * batchSize + 1,
lastMsg);
folder.fetch(messagesInCurBatch, fp);
current = 0;
currentBatch++;
log.info("Current Batch : {}", currentBatch);
log.info("Messages in this batch : {}", messagesInCurBatch.length);
}
public boolean hasNext() {
boolean hasMore = current < messagesInCurBatch.length;
if (!hasMore && doBatching && currentBatch * batchSize < totalInFolder) {
// try next batch
try {
getNextBatch(batchSize, folder);
hasMore = current < messagesInCurBatch.length;
} catch (MessagingException e) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE, "Message retreival failed", e);
}
}
return hasMore;
}
public Message next() {
return hasNext() ? messagesInCurBatch[current++] : null;
}
public void remove() {
throw new UnsupportedOperationException("It's read only mode...");
}
private SearchTerm getSearchTerm() {
if (filters.size() == 0) return null;
if (filters.size() == 1) return filters.get(0).getCustomSearch(folder);
SearchTerm last = filters.get(0).getCustomSearch(folder);
for (int i = 1; i < filters.size(); i++) {
CustomFilter filter = filters.get(i);
SearchTerm st = filter.getCustomSearch(folder);
if (st != null) {
last = new AndTerm(last, st);
}
}
return last;
}
public boolean match(Message message) {
return true;
}
}
static class MailsSinceLastCheckFilter implements CustomFilter {
private Date since;
public MailsSinceLastCheckFilter(Date date) {
since = date;
}
@SuppressWarnings("serial")
public SearchTerm getCustomSearch(final Folder folder) {
if (log.isInfoEnabled()) {
log.info("Building mail filter for messages in {} that occur after {}"
, folder.getName(), sinceDateParser.format(since));
}
return new DateTerm(ComparisonTerm.GE, since) {
private int matched = 0;
private int seen = 0;
@Override
public boolean match(Message msg) {
boolean isMatch = false;
++seen;
try {
Date msgDate = msg.getReceivedDate();
if (msgDate == null) msgDate = msg.getSentDate();
if (msgDate != null && msgDate.getTime() >= since.getTime()) {
++matched;
isMatch = true;
} else {
String msgDateStr = (msgDate != null) ? sinceDateParser.format(msgDate) : "null";
String sinceDateStr = (since != null) ? sinceDateParser.format(since) : "null";
if (log.isDebugEnabled()) {
log.debug("Message {} was received at [{}], since filter is [{}]"
, msg.getSubject(), msgDateStr, sinceDateStr);
}
}
} catch (MessagingException e) {
log.warn("Failed to process message due to: {}", e, e);
}
if (seen % 100 == 0) {
if (log.isInfoEnabled()) {
log.info("Matched {} of {} messages since: {}"
, matched, seen, sinceDateParser.format(since));
}
}
return isMatch;
}
};
}
}
// user settings stored in member variables
private String user;
private String password;
private String host;
private String protocol;
private String folderNames;
private List<String> exclude = new ArrayList<>();
private List<String> include = new ArrayList<>();
private boolean recurse;
private int batchSize;
private int fetchSize;
private int cTimeout;
private int rTimeout;
private Date fetchMailsSince;
private String customFilter;
private boolean processAttachment = true;
private boolean includeContent = true;
private boolean includeOtherUserFolders = false;
private boolean includeSharedFolders = false;
// holds the current state
private Store mailbox;
private boolean connected = false;
private FolderIterator folderIter;
private MessageIterator msgIter;
private List<CustomFilter> filters = new ArrayList<>();
private static FetchProfile fp = new FetchProfile();
static {
fp.add(FetchProfile.Item.ENVELOPE);
fp.add(FetchProfile.Item.FLAGS);
fp.add("X-Mailer");
}
// Fields To Index
// single valued
private static final String MESSAGE_ID = "messageId";
private static final String SUBJECT = "subject";
private static final String FROM = "from";
private static final String SENT_DATE = "sentDate";
private static final String XMAILER = "xMailer";
// multi valued
private static final String TO_CC_BCC = "allTo";
private static final String FLAGS = "flags";
private static final String CONTENT = "content";
private static final String ATTACHMENT = "attachment";
private static final String ATTACHMENT_NAMES = "attachmentNames";
// flag values
private static final String FLAG_NONE = "none";
private static final String FLAG_ANSWERED = "answered";
private static final String FLAG_DELETED = "deleted";
private static final String FLAG_DRAFT = "draft";
private static final String FLAG_FLAGGED = "flagged";
private static final String FLAG_RECENT = "recent";
private static final String FLAG_SEEN = "seen";
private int getIntFromContext(String prop, int ifNull) {
int v = ifNull;
try {
String val = context.getEntityAttribute(prop);
if (val != null) {
val = context.replaceTokens(val);
v = Integer.parseInt(val);
}
} catch (NumberFormatException e) {
// do nothing
}
return v;
}
private boolean getBoolFromContext(String prop, boolean ifNull) {
boolean v = ifNull;
String val = context.getEntityAttribute(prop);
if (val != null) {
val = context.replaceTokens(val);
v = Boolean.valueOf(val);
}
return v;
}
private String getStringFromContext(String prop, String ifNull) {
String v = ifNull;
String val = context.getEntityAttribute(prop);
if (val != null) {
val = context.replaceTokens(val);
v = val;
}
return v;
}
@SuppressForbidden(reason = "Uses context class loader as a workaround to inject correct classloader to 3rd party libs")
private static <T> T withContextClassLoader(ClassLoader loader, Supplier<T> action) {
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(loader);
return action.get();
} finally {
ct.setContextClassLoader(prev);
}
}
}

View File

@ -1,253 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.commons.io.IOUtils;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.html.HtmlMapper;
import org.apache.tika.parser.html.IdentityHtmlMapper;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import static org.apache.solr.handler.dataimport.DataImporter.COLUMN;
import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL;
/**
* <p>An implementation of {@link EntityProcessor} which reads data from rich docs
* using <a href="http://tika.apache.org/">Apache Tika</a>
*
* <p>To index latitude/longitude data that might
* be extracted from a file's metadata, identify
* the geo field for this information with this attribute:
* <code>spatialMetadataField</code>
*
* @since solr 3.1
*/
public class TikaEntityProcessor extends EntityProcessorBase {
private static Parser EMPTY_PARSER = new EmptyParser();
private TikaConfig tikaConfig;
private String format = "text";
private boolean done = false;
private boolean extractEmbedded = false;
private String parser;
static final String AUTO_PARSER = "org.apache.tika.parser.AutoDetectParser";
private String htmlMapper;
private String spatialMetadataField;
@Override
public void init(Context context) {
super.init(context);
done = false;
}
@Override
protected void firstInit(Context context) {
super.firstInit(context);
// See similar code in ExtractingRequestHandler.inform
try {
String tikaConfigLoc = context.getResolvedEntityAttribute("tikaConfig");
if (tikaConfigLoc == null) {
ClassLoader classLoader = context.getSolrCore().getResourceLoader().getClassLoader();
try (InputStream is = classLoader.getResourceAsStream("solr-default-tika-config.xml")) {
tikaConfig = new TikaConfig(is);
}
} else {
File configFile = new File(tikaConfigLoc);
if (configFile.isAbsolute()) {
tikaConfig = new TikaConfig(configFile);
} else { // in conf/
try (InputStream is = context.getSolrCore().getResourceLoader().openResource(tikaConfigLoc)) {
tikaConfig = new TikaConfig(is);
}
}
}
} catch (Exception e) {
wrapAndThrow(SEVERE, e,"Unable to load Tika Config");
}
String extractEmbeddedString = context.getResolvedEntityAttribute("extractEmbedded");
if ("true".equals(extractEmbeddedString)) {
extractEmbedded = true;
}
format = context.getResolvedEntityAttribute("format");
if(format == null)
format = "text";
if (!"html".equals(format) && !"xml".equals(format) && !"text".equals(format)&& !"none".equals(format) )
throw new DataImportHandlerException(SEVERE, "'format' can be one of text|html|xml|none");
htmlMapper = context.getResolvedEntityAttribute("htmlMapper");
if (htmlMapper == null)
htmlMapper = "default";
if (!"default".equals(htmlMapper) && !"identity".equals(htmlMapper))
throw new DataImportHandlerException(SEVERE, "'htmlMapper', if present, must be 'default' or 'identity'");
parser = context.getResolvedEntityAttribute("parser");
if(parser == null) {
parser = AUTO_PARSER;
}
spatialMetadataField = context.getResolvedEntityAttribute("spatialMetadataField");
}
@Override
public Map<String, Object> nextRow() {
if(done) return null;
Map<String, Object> row = new HashMap<>();
@SuppressWarnings({"unchecked"})
DataSource<InputStream> dataSource = context.getDataSource();
InputStream is = dataSource.getData(context.getResolvedEntityAttribute(URL));
ContentHandler contentHandler = null;
Metadata metadata = new Metadata();
StringWriter sw = new StringWriter();
try {
if ("html".equals(format)) {
contentHandler = getHtmlHandler(sw);
} else if ("xml".equals(format)) {
contentHandler = getXmlContentHandler(sw);
} else if ("text".equals(format)) {
contentHandler = getTextContentHandler(sw);
} else if("none".equals(format)){
contentHandler = new DefaultHandler();
}
} catch (TransformerConfigurationException e) {
wrapAndThrow(SEVERE, e, "Unable to create content handler");
}
Parser tikaParser = null;
if(parser.equals(AUTO_PARSER)){
tikaParser = new AutoDetectParser(tikaConfig);
} else {
tikaParser = context.getSolrCore().getResourceLoader().newInstance(parser, Parser.class);
}
try {
ParseContext context = new ParseContext();
if ("identity".equals(htmlMapper)){
context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
}
if (extractEmbedded) {
context.set(Parser.class, tikaParser);
} else {
context.set(Parser.class, EMPTY_PARSER);
}
tikaParser.parse(is, contentHandler, metadata , context);
} catch (Exception e) {
if(SKIP.equals(onError)) {
throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW,
"Document skipped :" + e.getMessage());
}
wrapAndThrow(SEVERE, e, "Unable to read content");
}
IOUtils.closeQuietly(is);
for (Map<String, String> field : context.getAllEntityFields()) {
if (!"true".equals(field.get("meta"))) continue;
String col = field.get(COLUMN);
String s = metadata.get(col);
if (s != null) row.put(col, s);
}
if(!"none".equals(format) ) row.put("text", sw.toString());
tryToAddLatLon(metadata, row);
done = true;
return row;
}
private void tryToAddLatLon(Metadata metadata, Map<String, Object> row) {
if (spatialMetadataField == null) return;
String latString = metadata.get(Metadata.LATITUDE);
String lonString = metadata.get(Metadata.LONGITUDE);
if (latString != null && lonString != null) {
row.put(spatialMetadataField, String.format(Locale.ROOT, "%s,%s", latString, lonString));
}
}
private static ContentHandler getHtmlHandler(Writer writer)
throws TransformerConfigurationException {
SAXTransformerFactory factory = (SAXTransformerFactory)
TransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
handler.setResult(new StreamResult(writer));
return new ContentHandlerDecorator(handler) {
@Override
public void startElement(
String uri, String localName, String name, Attributes atts)
throws SAXException {
if (XHTMLContentHandler.XHTML.equals(uri)) {
uri = null;
}
if (!"head".equals(localName)) {
super.startElement(uri, localName, name, atts);
}
}
@Override
public void endElement(String uri, String localName, String name)
throws SAXException {
if (XHTMLContentHandler.XHTML.equals(uri)) {
uri = null;
}
if (!"head".equals(localName)) {
super.endElement(uri, localName, name);
}
}
@Override
public void startPrefixMapping(String prefix, String uri) {/*no op*/ }
@Override
public void endPrefixMapping(String prefix) {/*no op*/ }
};
}
private static ContentHandler getTextContentHandler(Writer writer) {
return new BodyContentHandler(writer);
}
private static ContentHandler getXmlContentHandler(Writer writer)
throws TransformerConfigurationException {
SAXTransformerFactory factory = (SAXTransformerFactory)
TransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
handler.setResult(new StreamResult(writer));
return handler;
}
}

View File

@ -1,23 +0,0 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- not a package-info.java, because we already defined this package in core/ -->
<html>
<body>
Plugins for <code>DataImportHandler</code> that have additional dependencies.
</body>
</html>

View File

@ -1,21 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<body>
Apache Solr Search Server: DataImportHandler Extras contrib. <b>This contrib module is deprecated as of 8.6</b>
</body>
</html>

View File

@ -1,20 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<properties>
<service-loader initializableProblemHandler="ignore"/>
</properties>

View File

@ -1,205 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
This is the Solr schema file. This file should be named "schema.xml" and
should be in the conf directory under the solr home
(i.e. ./solr/conf/schema.xml by default)
or located where the classloader for the Solr webapp can find it.
This example schema is the recommended starting point for users.
It should be kept correct and concise, usable out-of-the-box.
For more information, on how to customize this file, please see
http://wiki.apache.org/solr/SchemaXml
-->
<schema name="test" version="1.2">
<!-- attribute "name" is the name of this schema and is only used for display purposes.
Applications should change this to reflect the nature of the search collection.
version="1.1" is Solr's version number for the schema syntax and semantics. It should
not normally be changed by applications.
1.0: multiValued attribute did not exist, all fields are multiValued by nature
1.1: multiValued attribute introduced, false by default -->
<!-- field type definitions. The "name" attribute is
just a label to be used by field definitions. The "class"
attribute and any other attributes determine the real
behavior of the fieldType.
Class names starting with "solr" refer to java classes in the
org.apache.solr.analysis package.
-->
<!-- The StrField type is not analyzed, but indexed/stored verbatim.
- StrField and TextField support an optional compressThreshold which
limits compression (if enabled in the derived fields) to values which
exceed a certain size (in characters).
-->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
<!-- The optional sortMissingLast and sortMissingFirst attributes are
currently supported on types that are sorted internally as strings.
- If sortMissingLast="true", then a sort on this field will cause documents
without the field to come after documents with the field,
regardless of the requested sort order (asc or desc).
- If sortMissingFirst="true", then a sort on this field will cause documents
without the field to come before documents with the field,
regardless of the requested sort order.
- If sortMissingLast="false" and sortMissingFirst="false" (the default),
then default lucene sorting will be used which places docs without the
field first in an ascending sort and last in a descending sort.
-->
<!--
Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
-->
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="float" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="double" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="latLon" class="solr.LatLonType" subFieldType="double"/>
<!--
Numeric field types that index each value at various levels of precision
to accelerate range queries when the number of values between the range
endpoints is large. See the javadoc for NumericRangeQuery for internal
implementation details.
Smaller precisionStep values (specified in bits) will lead to more tokens
indexed per value, slightly larger index size, and faster range queries.
A precisionStep of 0 disables indexing at different precision levels.
-->
<fieldType name="tint" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tfloat" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tlong" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdouble" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
is a more restricted form of the canonical representation of dateTime
http://www.w3.org/TR/xmlschema-2/#dateTime
The trailing "Z" designates UTC time and is mandatory.
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
All other components are mandatory.
Expressions can also be used to denote calculations that should be
performed relative to "NOW" to determine the value, ie...
NOW/HOUR
... Round to the start of the current hour
NOW-1DAY
... Exactly 1 day prior to now
NOW/DAY+6MONTHS+3DAYS
... 6 months and 3 days in the future from the start of
the current day
Consult the TrieDateField javadocs for more information.
-->
<fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}" sortMissingLast="true" omitNorms="true"/>
<!-- The "RandomSortField" is not used to store or search any
data. You can declare fields of this type it in your schema
to generate psuedo-random orderings of your docs for sorting
purposes. The ordering is generated based on the field name
and the version of the index, As long as the index version
remains unchanged, and the same field name is reused,
the ordering of the docs will be consistent.
If you want differend psuedo-random orderings of documents,
for the same version of the index, use a dynamicField and
change the name
-->
<fieldType name="random" class="solr.RandomSortField" indexed="true"/>
<!-- solr.TextField allows the specification of custom text analyzers
specified as a tokenizer and a list of token filters. Different
analyzers may be specified for indexing and querying.
The optional positionIncrementGap puts space between multiple fields of
this type on the same document, with the purpose of preventing false phrase
matching across fields.
For more info on customizing your analyzer chain, please see
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
-->
<!-- One can also specify an existing Analyzer class that has a
default constructor via the class attribute on the analyzer element
<fieldType name="text_greek" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
</fieldType>
-->
<!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
<!-- A text field that uses WordDelimiterGraphFilter to enable splitting and matching of
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
Synonyms and stopwords are customized by external files, and stemming is enabled.
Duplicate tokens at the same position (which may result from Stemmed Synonyms or
WordDelim parts) are removed.
-->
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>-->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<!--<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>-->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- since fields of this type are by default not stored or indexed, any data added to
them will be ignored outright
-->
<fieldType name="ignored" stored="false" indexed="false" class="solr.StrField"/>
<field name="title" type="string" indexed="true" stored="true"/>
<field name="author" type="string" indexed="true" stored="true"/>
<field name="text" type="text" indexed="true" stored="true"/>
<field name="foo_i" type="int" indexed="true" stored="false"/>
<field name="home" type="latLon" indexed="true" stored="true"/>
</schema>

View File

@ -1,277 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<config>
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
<indexConfig>
<useCompoundFile>${useCompoundFile:false}</useCompoundFile>
</indexConfig>
<!-- Used to specify an alternate directory to hold all index data
other than the default ./data under the Solr home.
If replication is in use, this should match the replication configuration. -->
<dataDir>${solr.data.dir:}</dataDir>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
<schemaFactory class="ClassicIndexSchemaFactory"/>
<!-- the default high-performance update handler -->
<updateHandler class="solr.DirectUpdateHandler2">
<!-- A prefix of "solr." for class names is an alias that
causes solr to search appropriate packages, including
org.apache.solr.(search|update|request|core|analysis)
-->
<!-- Limit the number of deletions Solr will buffer during doc updating.
Setting this lower can help bound memory use during indexing.
-->
<maxPendingDeletes>100000</maxPendingDeletes>
</updateHandler>
<query>
<!-- Maximum number of clauses in a boolean query... can affect
range or prefix queries that expand to big boolean
queries. An exception is thrown if exceeded. -->
<maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses>
<!-- Cache used by SolrIndexSearcher for filters (DocSets),
unordered sets of *all* documents that match a query.
When a new searcher is opened, its caches may be prepopulated
or "autowarmed" using data from caches in the old searcher.
autowarmCount is the number of items to prepopulate. For CaffeineCache,
the autowarmed items will be the most recently accessed items.
Parameters:
class - the SolrCache implementation (currently only CaffeineCache)
size - the maximum number of entries in the cache
initialSize - the initial capacity (number of entries) of
the cache. (seel java.util.HashMap)
autowarmCount - the number of entries to prepopulate from
and old cache.
-->
<filterCache
class="solr.CaffeineCache"
size="512"
initialSize="512"
autowarmCount="256"/>
<!-- queryResultCache caches results of searches - ordered lists of
document ids (DocList) based on a query, a sort, and the range
of documents requested. -->
<queryResultCache
class="solr.CaffeineCache"
size="512"
initialSize="512"
autowarmCount="256"/>
<!-- documentCache caches Lucene Document objects (the stored fields for each document).
Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
<documentCache
class="solr.CaffeineCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<!-- If true, stored fields that are not requested will be loaded lazily.
This can result in a significant speed improvement if the usual case is to
not load all stored fields, especially if the skipped fields are large compressed
text fields.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!-- Example of a generic cache. These caches may be accessed by name
through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
The purpose is to enable easy caching of user/application level data.
The regenerator argument should be specified as an implementation
of solr.search.CacheRegenerator if autowarming is desired. -->
<!--
<cache name="myUserCache"
class="solr.CaffeineCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
regenerator="org.mycompany.mypackage.MyRegenerator"
/>
-->
<!-- An optimization that attempts to use a filter to satisfy a search.
If the requested sort does not include score, then the filterCache
will be checked for a filter matching the query. If found, the filter
will be used as the source of document ids, and then the sort will be
applied to that.
<useFilterForSortedQuery>true</useFilterForSortedQuery>
-->
<!-- An optimization for use with the queryResultCache. When a search
is requested, a superset of the requested number of document ids
are collected. For example, if a search for a particular query
requests matching documents 10 through 19, and queryWindowSize is 50,
then documents 0 through 49 will be collected and cached. Any further
requests in that range can be satisfied via the cache. -->
<queryResultWindowSize>50</queryResultWindowSize>
<!-- Maximum number of documents to cache for any entry in the
queryResultCache. -->
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered). -->
<!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. -->
<!--<listener event="newSearcher" class="solr.QuerySenderListener">-->
<!--<arr name="queries">-->
<!--<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>-->
<!--<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>-->
<!--<lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>-->
<!--</arr>-->
<!--</listener>-->
<!-- a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain autowarming data from. -->
<!--<listener event="firstSearcher" class="solr.QuerySenderListener">-->
<!--<arr name="queries">-->
<!--</arr>-->
<!--</listener>-->
<!-- If a search request comes in and there is no current registered searcher,
then immediately register the still warming searcher and use it. If
"false" then all requests will block until the first searcher is done
warming. -->
<useColdSearcher>false</useColdSearcher>
<!-- Maximum number of searchers that may be warming in the background
concurrently. An error is returned if this limit is exceeded. Recommend
1-2 for read-only followers, higher for leaders w/o cache warming. -->
<maxWarmingSearchers>4</maxWarmingSearchers>
</query>
<requestDispatcher>
<!--Make sure your system has some authentication before enabling remote streaming!
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="-1" />
-->
<!-- Set HTTP caching related parameters (for proxy caches and clients).
To get the behaviour of Solr 1.2 (ie: no caching related headers)
use the never304="true" option and do not specify a value for
<cacheControl>
-->
<httpCaching never304="true">
<!--httpCaching lastModifiedFrom="openTime"
etagSeed="Solr"-->
<!-- lastModFrom="openTime" is the default, the Last-Modified value
(and validation against If-Modified-Since requests) will all be
relative to when the current Searcher was opened.
You can change it to lastModFrom="dirLastMod" if you want the
value to exactly corrispond to when the physical index was last
modified.
etagSeed="..." is an option you can change to force the ETag
header (and validation against If-None-Match requests) to be
differnet even if the index has not changed (ie: when making
significant changes to your config file)
lastModifiedFrom and etagSeed are both ignored if you use the
never304="true" option.
-->
<!-- If you include a <cacheControl> directive, it will be used to
generate a Cache-Control header, as well as an Expires header
if the value contains "max-age="
By default, no Cache-Control header is generated.
You can use the <cacheControl> option even if you have set
never304="true"
-->
<!-- <cacheControl>max-age=30, public</cacheControl> -->
</httpCaching>
</requestDispatcher>
<requestHandler name="/select" class="solr.SearchHandler">
<!-- default values for query parameters -->
<lst name="defaults">
<str name="echoParams">explicit</str>
<!--
<int name="rows">10</int>
<str name="fl">*</str>
<str name="version">2.1</str>
-->
</lst>
</requestHandler>
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
</requestHandler>
<!--
Search components are registered to SolrCore and used by Search Handlers
By default, the following components are avaliable:
<searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" />
<searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
<searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
<searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
<searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" />
If you register a searchComponent to one of the standard names, that will be used instead.
-->
<requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
</lst>
<!--
By default, this will register the following components:
<arr name="components">
<str>query</str>
<str>facet</str>
<str>mlt</str>
<str>highlight</str>
<str>debug</str>
</arr>
To insert handlers before or after the 'standard' components, use:
<arr name="first-components">
<str>first</str>
</arr>
<arr name="last-components">
<str>last</str>
</arr>
-->
</requestHandler>
<!-- config for the admin interface -->
<admin>
<defaultQuery>*:*</defaultQuery>
</admin>
</config>

View File

@ -1,29 +0,0 @@
<!DOCTYPE html>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>Title in the header</title>
</head>
<body>
<h1>H1 Header</h1>
<div>Basic div</div>
<div class="classAttribute">Div with attribute</div>
</body>
</html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

View File

@ -1,199 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.common.SolrInputDocument;
import org.junit.Ignore;
import org.junit.Test;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
// Test mailbox is like this: foldername(mailcount)
// top1(2) -> child11(6)
// -> child12(0)
// top2(2) -> child21(1)
// -> grandchild211(2)
// -> grandchild212(1)
// -> child22(2)
/**
* Test for MailEntityProcessor. The tests are marked as ignored because we'd need a mail server (real or mocked) for
* these to work.
*
* TODO: Find a way to make the tests actually test code
*
*
* @see org.apache.solr.handler.dataimport.MailEntityProcessor
* @since solr 1.4
*/
@Ignore("Needs a Mock Mail Server to work")
public class TestMailEntityProcessor extends AbstractDataImportHandlerTestCase {
// Credentials
private static final String user = "user";
private static final String password = "password";
private static final String host = "host";
private static final String protocol = "imaps";
private static Map<String, String> paramMap = new HashMap<>();
@Test
@Ignore("Needs a Mock Mail Server to work")
public void testConnection() {
// also tests recurse = false and default settings
paramMap.put("folders", "top2");
paramMap.put("recurse", "false");
paramMap.put("processAttachement", "false");
DataImporter di = new DataImporter();
di.loadAndInit(getConfigFromMap(paramMap));
@SuppressWarnings({"unchecked"})
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
SolrWriterImpl swi = new SolrWriterImpl();
di.runCmd(rp, swi);
assertEquals("top1 did not return 2 messages", swi.docs.size(), 2);
}
@Test
@Ignore("Needs a Mock Mail Server to work")
public void testRecursion() {
paramMap.put("folders", "top2");
paramMap.put("recurse", "true");
paramMap.put("processAttachement", "false");
DataImporter di = new DataImporter();
di.loadAndInit(getConfigFromMap(paramMap));
@SuppressWarnings({"unchecked"})
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
SolrWriterImpl swi = new SolrWriterImpl();
di.runCmd(rp, swi);
assertEquals("top2 and its children did not return 8 messages", swi.docs.size(), 8);
}
@Test
@Ignore("Needs a Mock Mail Server to work")
public void testExclude() {
paramMap.put("folders", "top2");
paramMap.put("recurse", "true");
paramMap.put("processAttachement", "false");
paramMap.put("exclude", ".*grandchild.*");
DataImporter di = new DataImporter();
di.loadAndInit(getConfigFromMap(paramMap));
@SuppressWarnings({"unchecked"})
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
SolrWriterImpl swi = new SolrWriterImpl();
di.runCmd(rp, swi);
assertEquals("top2 and its direct children did not return 5 messages", swi.docs.size(), 5);
}
@Test
@Ignore("Needs a Mock Mail Server to work")
public void testInclude() {
paramMap.put("folders", "top2");
paramMap.put("recurse", "true");
paramMap.put("processAttachement", "false");
paramMap.put("include", ".*grandchild.*");
DataImporter di = new DataImporter();
di.loadAndInit(getConfigFromMap(paramMap));
@SuppressWarnings({"unchecked"})
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
SolrWriterImpl swi = new SolrWriterImpl();
di.runCmd(rp, swi);
assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
}
@Test
@Ignore("Needs a Mock Mail Server to work")
public void testIncludeAndExclude() {
paramMap.put("folders", "top1,top2");
paramMap.put("recurse", "true");
paramMap.put("processAttachement", "false");
paramMap.put("exclude", ".*top1.*");
paramMap.put("include", ".*grandchild.*");
DataImporter di = new DataImporter();
di.loadAndInit(getConfigFromMap(paramMap));
@SuppressWarnings({"unchecked"})
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
SolrWriterImpl swi = new SolrWriterImpl();
di.runCmd(rp, swi);
assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
}
@Test
@Ignore("Needs a Mock Mail Server to work")
@SuppressWarnings({"unchecked"})
public void testFetchTimeSince() throws ParseException {
paramMap.put("folders", "top1/child11");
paramMap.put("recurse", "true");
paramMap.put("processAttachement", "false");
paramMap.put("fetchMailsSince", "2008-12-26 00:00:00");
DataImporter di = new DataImporter();
di.loadAndInit(getConfigFromMap(paramMap));
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
SolrWriterImpl swi = new SolrWriterImpl();
di.runCmd(rp, swi);
assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
}
private String getConfigFromMap(Map<String, String> params) {
String conf =
"<dataConfig>" +
"<document>" +
"<entity processor=\"org.apache.solr.handler.dataimport.MailEntityProcessor\" " +
"someconfig" +
"/>" +
"</document>" +
"</dataConfig>";
params.put("user", user);
params.put("password", password);
params.put("host", host);
params.put("protocol", protocol);
StringBuilder attribs = new StringBuilder("");
for (String key : params.keySet())
attribs.append(" ").append(key).append("=" + "\"").append(params.get(key)).append("\"");
attribs.append(" ");
return conf.replace("someconfig", attribs.toString());
}
static class SolrWriterImpl extends SolrWriter {
List<SolrInputDocument> docs = new ArrayList<>();
Boolean deleteAllCalled;
Boolean commitCalled;
public SolrWriterImpl() {
super(null, null);
}
@Override
public boolean upload(SolrInputDocument doc) {
return docs.add(doc);
}
@Override
public void doDeleteAll() {
deleteAllCalled = Boolean.TRUE;
}
@Override
public void commit(boolean b) {
commitCalled = Boolean.TRUE;
}
}
}

View File

@ -1,221 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.BeforeClass;
import org.junit.Test;
import java.util.Locale;
/**Testcase for TikaEntityProcessor
*
* @since solr 3.1
*/
public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase {
private String conf =
"<dataConfig>" +
" <dataSource type=\"BinFileDataSource\"/>" +
" <document>" +
" <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/solr-word.pdf").getAbsolutePath() + "\" >" +
" <field column=\"Author\" meta=\"true\" name=\"author\"/>" +
" <field column=\"title\" meta=\"true\" name=\"title\"/>" +
" <field column=\"text\"/>" +
" </entity>" +
" </document>" +
"</dataConfig>";
private String skipOnErrConf =
"<dataConfig>" +
" <dataSource type=\"BinFileDataSource\"/>" +
" <document>" +
" <entity name=\"Tika\" onError=\"skip\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/bad.doc").getAbsolutePath() + "\" >" +
"<field column=\"content\" name=\"text\"/>" +
" </entity>" +
" <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/solr-word.pdf").getAbsolutePath() + "\" >" +
" <field column=\"text\"/>" +
"</entity>" +
" </document>" +
"</dataConfig>";
private String spatialConf =
"<dataConfig>" +
" <dataSource type=\"BinFileDataSource\"/>" +
" <document>" +
" <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" +
getFile("dihextras/test_jpeg.jpg").getAbsolutePath() + "\" spatialMetadataField=\"home\">" +
" <field column=\"text\"/>" +
" </entity>" +
" </document>" +
"</dataConfig>";
private String vsdxConf =
"<dataConfig>" +
" <dataSource type=\"BinFileDataSource\"/>" +
" <document>" +
" <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/test_vsdx.vsdx").getAbsolutePath() + "\" >" +
" <field column=\"text\"/>" +
" </entity>" +
" </document>" +
"</dataConfig>";
private String[] tests = {
"//*[@numFound='1']"
,"//str[@name='author'][.='Grant Ingersoll']"
,"//str[@name='title'][.='solr-word']"
,"//str[@name='text']"
};
private String[] testsHTMLDefault = {
"//*[@numFound='1']"
, "//str[@name='text'][contains(.,'Basic div')]"
, "//str[@name='text'][contains(.,'<h1>')]"
, "//str[@name='text'][not(contains(.,'<div>'))]" //default mapper lower-cases elements as it maps
, "//str[@name='text'][not(contains(.,'<DIV>'))]"
};
private String[] testsHTMLIdentity = {
"//*[@numFound='1']"
, "//str[@name='text'][contains(.,'Basic div')]"
, "//str[@name='text'][contains(.,'<h1>')]"
, "//str[@name='text'][contains(.,'<div>')]"
, "//str[@name='text'][contains(.,'class=\"classAttribute\"')]" //attributes are lower-cased
};
private String[] testsSpatial = {
"//*[@numFound='1']"
};
private String[] testsEmbedded = {
"//*[@numFound='1']",
"//str[@name='text'][contains(.,'When in the Course')]"
};
private String[] testsIgnoreEmbedded = {
"//*[@numFound='1']",
"//str[@name='text'][not(contains(.,'When in the Course'))]"
};
private String[] testsVSDX = {
"//*[@numFound='1']",
"//str[@name='text'][contains(.,'Arrears')]"
};
@BeforeClass
public static void beforeClass() throws Exception {
assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)",
new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage()));
initCore("dataimport-solrconfig.xml", "dataimport-schema-no-unique-key.xml", getFile("dihextras/solr").getAbsolutePath());
}
@Test
public void testIndexingWithTikaEntityProcessor() throws Exception {
runFullImport(conf);
assertQ(req("*:*"), tests );
}
@Test
public void testSkip() throws Exception {
runFullImport(skipOnErrConf);
assertQ(req("*:*"), "//*[@numFound='1']");
}
@Test
public void testVSDX() throws Exception {
//this ensures that we've included the curvesapi dependency
//and that the ConnectsType class is bundled with poi-ooxml-schemas.
runFullImport(vsdxConf);
assertQ(req("*:*"), testsVSDX);
}
@Test
public void testTikaHTMLMapperEmpty() throws Exception {
runFullImport(getConfigHTML(null));
assertQ(req("*:*"), testsHTMLDefault);
}
@Test
public void testTikaHTMLMapperDefault() throws Exception {
runFullImport(getConfigHTML("default"));
assertQ(req("*:*"), testsHTMLDefault);
}
@Test
public void testTikaHTMLMapperIdentity() throws Exception {
runFullImport(getConfigHTML("identity"));
assertQ(req("*:*"), testsHTMLIdentity);
}
@Test
public void testTikaGeoMetadata() throws Exception {
runFullImport(spatialConf);
String pt = "38.97,-77.018";
Double distance = 5.0d;
assertQ(req("q", "*:* OR foo_i:" + random().nextInt(100), "fq",
"{!geofilt sfield=\"home\"}\"",
"pt", pt, "d", String.valueOf(distance)), testsSpatial);
}
private String getConfigHTML(String htmlMapper) {
return
"<dataConfig>" +
" <dataSource type='BinFileDataSource'/>" +
" <document>" +
" <entity name='Tika' format='xml' processor='TikaEntityProcessor' " +
" url='" + getFile("dihextras/structured.html").getAbsolutePath() + "' " +
((htmlMapper == null) ? "" : (" htmlMapper='" + htmlMapper + "'")) + ">" +
" <field column='text'/>" +
" </entity>" +
" </document>" +
"</dataConfig>";
}
@Test
public void testEmbeddedDocsLegacy() throws Exception {
//test legacy behavior: ignore embedded docs
runFullImport(conf);
assertQ(req("*:*"), testsIgnoreEmbedded);
}
@Test
public void testEmbeddedDocsTrue() throws Exception {
runFullImport(getConfigEmbedded(true));
assertQ(req("*:*"), testsEmbedded);
}
@Test
public void testEmbeddedDocsFalse() throws Exception {
runFullImport(getConfigEmbedded(false));
assertQ(req("*:*"), testsIgnoreEmbedded);
}
private String getConfigEmbedded(boolean extractEmbedded) {
return
"<dataConfig>" +
" <dataSource type=\"BinFileDataSource\"/>" +
" <document>" +
" <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" +
getFile("dihextras/test_recursive_embedded.docx").getAbsolutePath() + "\" " +
" extractEmbedded=\""+extractEmbedded+"\">" +
" <field column=\"Author\" meta=\"true\" name=\"author\"/>" +
" <field column=\"title\" meta=\"true\" name=\"title\"/>" +
" <field column=\"text\"/>" +
" </entity>" +
" </document>" +
"</dataConfig>";
}
}

View File

@ -1,26 +0,0 @@
Apache Solr - DataImportHandler
================================
Introduction
------------
DataImportHandler is a data import tool for Solr which makes importing data from Databases, XML files and
HTTP data sources quick and easy.
Important Note
--------------
Although Solr strives to be agnostic of the Locale where the server is
running, some code paths in DataImportHandler are known to depend on the
System default Locale, Timezone, or Charset. It is recommended that when
running Solr you set the following system properties:
-Duser.language=xx -Duser.country=YY -Duser.timezone=ZZZ
where xx, YY, and ZZZ are consistent with any database server's configuration.
Deprecation notice
------------------
This contrib module is deprecated as of v8.6, scheduled for removal in Solr 9.0.
The reason is that DIH is no longer being maintained in a manner we feel is necessary in order to keep it
healthy and secure. Also it was not designed to work with SolrCloud and does not meet current performance requirements.
The project hopes that the community will take over maintenance of DIH as a 3rd party package (See SOLR-14066 for more details). Please reach out to us at the dev@ mailing list if you want to help.

View File

@ -1,34 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
apply plugin: 'java-library'
description = 'Data Import Handler'
dependencies {
implementation project(':solr:core')
testImplementation project(':solr:test-framework')
testImplementation('org.mockito:mockito-core', {
exclude group: "net.bytebuddy", module: "byte-buddy-agent"
})
testImplementation ('org.hsqldb:hsqldb')
testImplementation ('org.apache.derby:derby')
testImplementation ('org.objenesis:objenesis')
}

View File

@ -1,70 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.common.util.ContentStream;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import java.io.InputStream;
import java.io.IOException;
import java.util.Properties;
/**
* <p> A data source implementation which can be used to read binary stream from content streams. </p> <p> Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
* details. </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 3.1
*/
public class BinContentStreamDataSource extends DataSource<InputStream> {
private ContextImpl context;
private ContentStream contentStream;
private InputStream in;
@Override
public void init(Context context, Properties initProps) {
this.context = (ContextImpl) context;
}
@Override
public InputStream getData(String query) {
contentStream = context.getDocBuilder().getReqParams().getContentStream();
if (contentStream == null)
throw new DataImportHandlerException(SEVERE, "No stream available. The request has no body");
try {
return in = contentStream.getStream();
} catch (IOException e) {
DataImportHandlerException.wrapAndThrow(SEVERE, e);
return null;
}
}
@Override
public void close() {
if (contentStream != null) {
try {
if (in == null) in = contentStream.getStream();
in.close();
} catch (IOException e) {
/*no op*/
}
}
}
}

View File

@ -1,64 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import java.io.InputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.Properties;
/**
* <p>
* A DataSource which reads from local files
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 3.1
*/
public class BinFileDataSource extends DataSource<InputStream>{
protected String basePath;
@Override
public void init(Context context, Properties initProps) {
basePath = initProps.getProperty(FileDataSource.BASE_PATH);
}
@Override
public InputStream getData(String query) {
File f = FileDataSource.getFile(basePath,query);
try {
return new FileInputStream(f);
} catch (FileNotFoundException e) {
wrapAndThrow(SEVERE,e,"Unable to open file "+f.getAbsolutePath());
return null;
}
}
@Override
public void close() {
}
}

View File

@ -1,104 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
import static org.apache.solr.handler.dataimport.URLDataSource.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.InputStream;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.net.URLConnection;
import java.util.Properties;
/**
* <p> A data source implementation which can be used to read binary streams using HTTP. </p> <p> Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
* details. </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 3.1
*/
public class BinURLDataSource extends DataSource<InputStream>{
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private String baseUrl;
private int connectionTimeout = CONNECTION_TIMEOUT;
private int readTimeout = READ_TIMEOUT;
private Context context;
private Properties initProps;
public BinURLDataSource() { }
@Override
public void init(Context context, Properties initProps) {
this.context = context;
this.initProps = initProps;
baseUrl = getInitPropWithReplacements(BASE_URL);
String cTimeout = getInitPropWithReplacements(CONNECTION_TIMEOUT_FIELD_NAME);
String rTimeout = getInitPropWithReplacements(READ_TIMEOUT_FIELD_NAME);
if (cTimeout != null) {
try {
connectionTimeout = Integer.parseInt(cTimeout);
} catch (NumberFormatException e) {
log.warn("Invalid connection timeout: {}", cTimeout);
}
}
if (rTimeout != null) {
try {
readTimeout = Integer.parseInt(rTimeout);
} catch (NumberFormatException e) {
log.warn("Invalid read timeout: {}", rTimeout);
}
}
}
@Override
public InputStream getData(String query) {
URL url = null;
try {
if (URIMETHOD.matcher(query).find()) url = new URL(query);
else url = new URL(baseUrl + query);
log.debug("Accessing URL: {}", url);
URLConnection conn = url.openConnection();
conn.setConnectTimeout(connectionTimeout);
conn.setReadTimeout(readTimeout);
return conn.getInputStream();
} catch (Exception e) {
log.error("Exception thrown while getting data", e);
wrapAndThrow (SEVERE, e, "Exception in invoking url " + url);
return null;//unreachable
}
}
@Override
public void close() { }
private String getInitPropWithReplacements(String propertyName) {
final String expr = initProps.getProperty(propertyName);
if (expr == null) {
return null;
}
return context.replaceTokens(expr);
}
}

View File

@ -1,48 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
public class CachePropertyUtil {
public static String getAttributeValueAsString(Context context, String attr) {
Object o = context.getSessionAttribute(attr, Context.SCOPE_ENTITY);
if (o == null) {
o = context.getResolvedEntityAttribute(attr);
}
if (o == null && context.getRequestParameters() != null) {
o = context.getRequestParameters().get(attr);
}
if (o == null) {
return null;
}
return o.toString();
}
public static Object getAttributeValue(Context context, String attr) {
Object o = context.getSessionAttribute(attr, Context.SCOPE_ENTITY);
if (o == null) {
o = context.getResolvedEntityAttribute(attr);
}
if (o == null && context.getRequestParameters() != null) {
o = context.getRequestParameters().get(attr);
}
if (o == null) {
return null;
}
return o;
}
}

View File

@ -1,85 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.HTMLStripTransformer.TRUE;
import java.io.IOException;
import java.io.Reader;
import java.sql.Clob;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* {@link Transformer} instance which converts a {@link Clob} to a {@link String}.
* <p>
* Refer to <a href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* <p>
* <b>This API is experimental and subject to change</b>
*
* @since solr 1.4
*/
public class ClobTransformer extends Transformer {
@Override
public Object transformRow(Map<String, Object> aRow, Context context) {
for (Map<String, String> map : context.getAllEntityFields()) {
if (!TRUE.equals(map.get(CLOB))) continue;
String column = map.get(DataImporter.COLUMN);
String srcCol = map.get(RegexTransformer.SRC_COL_NAME);
if (srcCol == null)
srcCol = column;
Object o = aRow.get(srcCol);
if (o instanceof List) {
@SuppressWarnings({"unchecked"})
List<Clob> inputs = (List<Clob>) o;
List<String> results = new ArrayList<>();
for (Object input : inputs) {
if (input instanceof Clob) {
Clob clob = (Clob) input;
results.add(readFromClob(clob));
}
}
aRow.put(column, results);
} else {
if (o instanceof Clob) {
Clob clob = (Clob) o;
aRow.put(column, readFromClob(clob));
}
}
}
return aRow;
}
private String readFromClob(Clob clob) {
Reader reader = FieldReaderDataSource.readCharStream(clob);
StringBuilder sb = new StringBuilder();
char[] buf = new char[1024];
int len;
try {
while ((len = reader.read(buf)) != -1) {
sb.append(buf, 0, len);
}
} catch (IOException e) {
DataImportHandlerException.wrapAndThrow(DataImportHandlerException.SEVERE, e);
}
return sb.toString();
}
public static final String CLOB = "clob";
}

View File

@ -1,73 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class ConfigParseUtil {
public static String getStringAttribute(Element e, String name, String def) {
String r = e.getAttribute(name);
if (r == null || "".equals(r.trim()))
r = def;
return r;
}
public static HashMap<String, String> getAllAttributes(Element e) {
HashMap<String, String> m = new HashMap<>();
NamedNodeMap nnm = e.getAttributes();
for (int i = 0; i < nnm.getLength(); i++) {
m.put(nnm.item(i).getNodeName(), nnm.item(i).getNodeValue());
}
return m;
}
public static String getText(Node elem, StringBuilder buffer) {
if (elem.getNodeType() != Node.CDATA_SECTION_NODE) {
NodeList childs = elem.getChildNodes();
for (int i = 0; i < childs.getLength(); i++) {
Node child = childs.item(i);
short childType = child.getNodeType();
if (childType != Node.COMMENT_NODE
&& childType != Node.PROCESSING_INSTRUCTION_NODE) {
getText(child, buffer);
}
}
} else {
buffer.append(elem.getNodeValue());
}
return buffer.toString();
}
public static List<Element> getChildNodes(Element e, String byName) {
List<Element> result = new ArrayList<>();
NodeList l = e.getChildNodes();
for (int i = 0; i < l.getLength(); i++) {
if (e.equals(l.item(i).getParentNode())
&& byName.equals(l.item(i).getNodeName()))
result.add((Element) l.item(i));
}
return result;
}
}

View File

@ -1,69 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.common.util.ContentStream;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import java.io.IOException;
import java.io.Reader;
import java.util.Properties;
/**
* A DataSource implementation which reads from the ContentStream of a POST request
* <p>
* Refer to <a href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.4
*/
public class ContentStreamDataSource extends DataSource<Reader> {
private ContextImpl context;
private ContentStream contentStream;
private Reader reader;
@Override
public void init(Context context, Properties initProps) {
this.context = (ContextImpl) context;
}
@Override
public Reader getData(String query) {
contentStream = context.getDocBuilder().getReqParams().getContentStream();
if (contentStream == null)
throw new DataImportHandlerException(SEVERE, "No stream available. The request has no body");
try {
return reader = contentStream.getReader();
} catch (IOException e) {
DataImportHandlerException.wrapAndThrow(SEVERE, e);
return null;
}
}
@Override
public void close() {
if (contentStream != null) {
try {
if (reader == null) reader = contentStream.getReader();
reader.close();
} catch (IOException e) {
}
}
}
}

View File

@ -1,221 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.core.SolrCore;
import java.util.List;
import java.util.Map;
/**
* <p>
* This abstract class gives access to all available objects. So any
* component implemented by a user can have the full power of DataImportHandler
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and subject to change</b>
*
* @since solr 1.3
*/
public abstract class Context {
public static final String FULL_DUMP = "FULL_DUMP", DELTA_DUMP = "DELTA_DUMP", FIND_DELTA = "FIND_DELTA";
/**
* An object stored in entity scope is valid only for the current entity for the current document only.
*/
public static final String SCOPE_ENTITY = "entity";
/**
* An object stored in global scope is available for the current import only but across entities and documents.
*/
public static final String SCOPE_GLOBAL = "global";
/**
* An object stored in document scope is available for the current document only but across entities.
*/
public static final String SCOPE_DOC = "document";
/**
* An object stored in 'solrcore' scope is available across imports, entities and documents throughout the life of
* a solr core. A solr core unload or reload will destroy this data.
*/
public static final String SCOPE_SOLR_CORE = "solrcore";
/**
* Get the value of any attribute put into this entity
*
* @param name name of the attribute eg: 'name'
* @return value of named attribute in entity
*/
public abstract String getEntityAttribute(String name);
/**
* Get the value of any attribute put into this entity after resolving all variables found in the attribute value
* @param name name of the attribute
* @return value of the named attribute after resolving all variables
*/
public abstract String getResolvedEntityAttribute(String name);
/**
* Returns all the fields put into an entity. each item (which is a map ) in
* the list corresponds to one field. each if the map contains the attribute
* names and values in a field
*
* @return all fields in an entity
*/
public abstract List<Map<String, String>> getAllEntityFields();
/**
* Returns the VariableResolver used in this entity which can be used to
* resolve the tokens in ${&lt;namespce.name&gt;}
*
* @return a VariableResolver instance
* @see org.apache.solr.handler.dataimport.VariableResolver
*/
public abstract VariableResolver getVariableResolver();
/**
* Gets the datasource instance defined for this entity. Do not close() this instance.
* Transformers should use the getDataSource(String name) method.
*
* @return a new DataSource instance as configured for the current entity
* @see org.apache.solr.handler.dataimport.DataSource
* @see #getDataSource(String)
*/
@SuppressWarnings({"rawtypes"})
public abstract DataSource getDataSource();
/**
* Gets a new DataSource instance with a name. Ensure that you close() this after use
* because this is created just for this method call.
*
* @param name Name of the dataSource as defined in the dataSource tag
* @return a new DataSource instance
* @see org.apache.solr.handler.dataimport.DataSource
*/
@SuppressWarnings({"rawtypes"})
public abstract DataSource getDataSource(String name);
/**
* Returns the instance of EntityProcessor used for this entity
*
* @return instance of EntityProcessor used for the current entity
* @see org.apache.solr.handler.dataimport.EntityProcessor
*/
public abstract EntityProcessor getEntityProcessor();
/**
* Store values in a certain name and scope (entity, document,global)
*
* @param name the key
* @param val the value
* @param scope the scope in which the given key, value pair is to be stored
*/
public abstract void setSessionAttribute(String name, Object val, String scope);
/**
* get a value by name in the given scope (entity, document,global)
*
* @param name the key
* @param scope the scope from which the value is to be retrieved
* @return the object stored in the given scope with the given key
*/
public abstract Object getSessionAttribute(String name, String scope);
/**
* Get the context instance for the parent entity. works only in the full dump
* If the current entity is rootmost a null is returned
*
* @return parent entity's Context
*/
public abstract Context getParentContext();
/**
* The request parameters passed over HTTP for this command the values in the
* map are either String(for single valued parameters) or List&lt;String&gt; (for
* multi-valued parameters)
*
* @return the request parameters passed in the URL to initiate this process
*/
public abstract Map<String, Object> getRequestParameters();
/**
* Returns if the current entity is the root entity
*
* @return true if current entity is the root entity, false otherwise
*/
public abstract boolean isRootEntity();
/**
* Returns the current process FULL_DUMP, DELTA_DUMP, FIND_DELTA
*
* @return the type of the current running process
*/
public abstract String currentProcess();
/**
* Exposing the actual SolrCore to the components
*
* @return the core
*/
public abstract SolrCore getSolrCore();
/**
* Makes available some basic running statistics such as "docCount",
* "deletedDocCount", "rowCount", "queryCount" and "skipDocCount"
*
* @return a Map containing running statistics of the current import
*/
public abstract Map<String, Object> getStats();
/**
* Returns the text specified in the script tag in the data-config.xml
*/
public abstract String getScript();
/**
* Returns the language of the script as specified in the script tag in data-config.xml
*/
public abstract String getScriptLanguage();
/**delete a document by id
*/
public abstract void deleteDoc(String id);
/**delete documents by query
*/
public abstract void deleteDocByQuery(String query);
/**Use this directly to resolve variable
* @param var the variable name
* @return the resolved value
*/
public abstract Object resolve(String var);
/** Resolve variables in a template
*
* @return The string w/ variables resolved
*/
public abstract String replaceTokens(String template);
}

View File

@ -1,264 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.dataimport.config.Script;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* <p>
* An implementation for the Context
* </p>
* <b>This API is experimental and subject to change</b>
*
* @since solr 1.3
*/
public class ContextImpl extends Context {
protected EntityProcessorWrapper epw;
private ContextImpl parent;
private VariableResolver resolver;
@SuppressWarnings({"rawtypes"})
private DataSource ds;
private String currProcess;
private Map<String, Object> requestParams;
private DataImporter dataImporter;
private Map<String, Object> entitySession, globalSession;
private Exception lastException = null;
DocBuilder.DocWrapper doc;
DocBuilder docBuilder;
public ContextImpl(EntityProcessorWrapper epw, VariableResolver resolver,
@SuppressWarnings({"rawtypes"})DataSource ds, String currProcess,
Map<String, Object> global, ContextImpl parentContext, DocBuilder docBuilder) {
this.epw = epw;
this.docBuilder = docBuilder;
this.resolver = resolver;
this.ds = ds;
this.currProcess = currProcess;
if (docBuilder != null) {
this.requestParams = docBuilder.getReqParams().getRawParams();
dataImporter = docBuilder.dataImporter;
}
globalSession = global;
parent = parentContext;
}
@Override
public String getEntityAttribute(String name) {
return epw==null || epw.getEntity() == null ? null : epw.getEntity().getAllAttributes().get(name);
}
@Override
public String getResolvedEntityAttribute(String name) {
return epw==null || epw.getEntity() == null ? null : resolver.replaceTokens(epw.getEntity().getAllAttributes().get(name));
}
@Override
public List<Map<String, String>> getAllEntityFields() {
return epw==null || epw.getEntity() == null ? Collections.emptyList() : epw.getEntity().getAllFieldsList();
}
@Override
public VariableResolver getVariableResolver() {
return resolver;
}
@Override
@SuppressWarnings({"rawtypes"})
public DataSource getDataSource() {
if (ds != null) return ds;
if(epw==null) { return null; }
if (epw!=null && epw.getDatasource() == null) {
epw.setDatasource(dataImporter.getDataSourceInstance(epw.getEntity(), epw.getEntity().getDataSourceName(), this));
}
if (epw!=null && epw.getDatasource() != null && docBuilder != null && docBuilder.verboseDebug &&
Context.FULL_DUMP.equals(currentProcess())) {
//debug is not yet implemented properly for deltas
epw.setDatasource(docBuilder.getDebugLogger().wrapDs(epw.getDatasource()));
}
return epw.getDatasource();
}
@Override
@SuppressWarnings({"rawtypes"})
public DataSource getDataSource(String name) {
return dataImporter.getDataSourceInstance(epw==null ? null : epw.getEntity(), name, this);
}
@Override
public boolean isRootEntity() {
return epw==null ? false : epw.getEntity().isDocRoot();
}
@Override
public String currentProcess() {
return currProcess;
}
@Override
public Map<String, Object> getRequestParameters() {
return requestParams;
}
@Override
public EntityProcessor getEntityProcessor() {
return epw;
}
@Override
public void setSessionAttribute(String name, Object val, String scope) {
if(name == null) {
return;
}
if (Context.SCOPE_ENTITY.equals(scope)) {
if (entitySession == null) {
entitySession = new HashMap<>();
}
entitySession.put(name, val);
} else if (Context.SCOPE_GLOBAL.equals(scope)) {
if (globalSession != null) {
globalSession.put(name, val);
}
} else if (Context.SCOPE_DOC.equals(scope)) {
DocBuilder.DocWrapper doc = getDocument();
if (doc != null) {
doc.setSessionAttribute(name, val);
}
} else if (SCOPE_SOLR_CORE.equals(scope)){
if(dataImporter != null) {
dataImporter.putToCoreScopeSession(name, val);
}
}
}
@Override
public Object getSessionAttribute(String name, String scope) {
if (Context.SCOPE_ENTITY.equals(scope)) {
if (entitySession == null)
return null;
return entitySession.get(name);
} else if (Context.SCOPE_GLOBAL.equals(scope)) {
if (globalSession != null) {
return globalSession.get(name);
}
} else if (Context.SCOPE_DOC.equals(scope)) {
DocBuilder.DocWrapper doc = getDocument();
return doc == null ? null: doc.getSessionAttribute(name);
} else if (SCOPE_SOLR_CORE.equals(scope)){
return dataImporter == null ? null : dataImporter.getFromCoreScopeSession(name);
}
return null;
}
@Override
public Context getParentContext() {
return parent;
}
private DocBuilder.DocWrapper getDocument() {
ContextImpl c = this;
while (true) {
if (c.doc != null)
return c.doc;
if (c.parent != null)
c = c.parent;
else
return null;
}
}
void setDoc(DocBuilder.DocWrapper docWrapper) {
this.doc = docWrapper;
}
@Override
public SolrCore getSolrCore() {
return dataImporter == null ? null : dataImporter.getCore();
}
@Override
public Map<String, Object> getStats() {
return docBuilder != null ? docBuilder.importStatistics.getStatsSnapshot() : Collections.<String, Object>emptyMap();
}
@Override
public String getScript() {
if (dataImporter != null) {
Script script = dataImporter.getConfig().getScript();
return script == null ? null : script.getText();
}
return null;
}
@Override
public String getScriptLanguage() {
if (dataImporter != null) {
Script script = dataImporter.getConfig().getScript();
return script == null ? null : script.getLanguage();
}
return null;
}
@Override
public void deleteDoc(String id) {
if(docBuilder != null){
docBuilder.writer.deleteDoc(id);
}
}
@Override
public void deleteDocByQuery(String query) {
if(docBuilder != null){
docBuilder.writer.deleteByQuery(query);
}
}
DocBuilder getDocBuilder(){
return docBuilder;
}
@Override
public Object resolve(String var) {
return resolver.resolve(var);
}
@Override
public String replaceTokens(String template) {
return resolver.replaceTokens(template);
}
public Exception getLastException() { return lastException; }
public void setLastException(Exception lastException) {this.lastException = lastException; }
}

View File

@ -1,103 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.Iterator;
import java.util.Map;
/**
* <p>
* A cache that allows a DIH entity's data to persist locally prior being joined
* to other data and/or indexed.
* </p>
*
* @lucene.experimental
*/
public interface DIHCache extends Iterable<Map<String,Object>> {
/**
* <p>
* Opens the cache using the specified properties. The {@link Context}
* includes any parameters needed by the cache impl. This must be called
* before any read/write operations are permitted.
*/
void open(Context context);
/**
* <p>
* Releases resources used by this cache, if possible. The cache is flushed
* but not destroyed.
* </p>
*/
void close();
/**
* <p>
* Persists any pending data to the cache
* </p>
*/
void flush();
/**
* <p>
* Closes the cache, if open. Then removes all data, possibly removing the
* cache entirely from persistent storage.
* </p>
*/
public void destroy();
/**
* <p>
* Adds a document. If a document already exists with the same key, both
* documents will exist in the cache, as the cache allows duplicate keys. To
* update a key's documents, first call delete(Object key).
* </p>
*/
void add(Map<String, Object> rec);
/**
* <p>
* Returns an iterator, allowing callers to iterate through the entire cache
* in key, then insertion, order.
* </p>
*/
@Override
Iterator<Map<String,Object>> iterator();
/**
* <p>
* Returns an iterator, allowing callers to iterate through all documents that
* match the given key in insertion order.
* </p>
*/
Iterator<Map<String,Object>> iterator(Object key);
/**
* <p>
* Delete all documents associated with the given key
* </p>
*/
void delete(Object key);
/**
* <p>
* Delete all data from the cache,leaving the empty cache intact.
* </p>
*/
void deleteAll();
}

View File

@ -1,279 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import java.lang.invoke.MethodHandles;
import java.lang.reflect.Constructor;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.solr.common.SolrException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DIHCacheSupport {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private String cacheForeignKey;
private String cacheImplName;
private Map<String,DIHCache> queryVsCache = new HashMap<>();
private Map<String,Iterator<Map<String,Object>>> queryVsCacheIterator;
private Iterator<Map<String,Object>> dataSourceRowCache;
private boolean cacheDoKeyLookup;
public DIHCacheSupport(Context context, String cacheImplName) {
this.cacheImplName = cacheImplName;
Relation r = new Relation(context);
cacheDoKeyLookup = r.doKeyLookup;
String cacheKey = r.primaryKey;
cacheForeignKey = r.foreignKey;
context.setSessionAttribute(DIHCacheSupport.CACHE_PRIMARY_KEY, cacheKey,
Context.SCOPE_ENTITY);
context.setSessionAttribute(DIHCacheSupport.CACHE_FOREIGN_KEY, cacheForeignKey,
Context.SCOPE_ENTITY);
context.setSessionAttribute(DIHCacheSupport.CACHE_DELETE_PRIOR_DATA,
"true", Context.SCOPE_ENTITY);
context.setSessionAttribute(DIHCacheSupport.CACHE_READ_ONLY, "false",
Context.SCOPE_ENTITY);
}
static class Relation{
protected final boolean doKeyLookup;
protected final String foreignKey;
protected final String primaryKey;
public Relation(Context context) {
String where = context.getEntityAttribute("where");
String cacheKey = context.getEntityAttribute(DIHCacheSupport.CACHE_PRIMARY_KEY);
String lookupKey = context.getEntityAttribute(DIHCacheSupport.CACHE_FOREIGN_KEY);
if (cacheKey != null && lookupKey == null) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"'cacheKey' is specified for the entity "
+ context.getEntityAttribute("name")
+ " but 'cacheLookup' is missing");
}
if (where == null && cacheKey == null) {
doKeyLookup = false;
primaryKey = null;
foreignKey = null;
} else {
if (where != null) {
String[] splits = where.split("=");
primaryKey = splits[0];
foreignKey = splits[1].trim();
} else {
primaryKey = cacheKey;
foreignKey = lookupKey;
}
doKeyLookup = true;
}
}
@Override
public String toString() {
return "Relation "
+ primaryKey + "="+foreignKey ;
}
}
private DIHCache instantiateCache(Context context) {
DIHCache cache = null;
try {
@SuppressWarnings("unchecked")
Class<DIHCache> cacheClass = DocBuilder.loadClass(cacheImplName, context
.getSolrCore());
Constructor<DIHCache> constr = cacheClass.getConstructor();
cache = constr.newInstance();
cache.open(context);
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Unable to load Cache implementation:" + cacheImplName, e);
}
return cache;
}
public void initNewParent(Context context) {
dataSourceRowCache = null;
queryVsCacheIterator = new HashMap<>();
for (Map.Entry<String,DIHCache> entry : queryVsCache.entrySet()) {
queryVsCacheIterator.put(entry.getKey(), entry.getValue().iterator());
}
}
public void destroyAll() {
if (queryVsCache != null) {
for (DIHCache cache : queryVsCache.values()) {
cache.destroy();
}
}
queryVsCache = null;
dataSourceRowCache = null;
cacheForeignKey = null;
}
/**
* <p>
* Get all the rows from the datasource for the given query and cache them
* </p>
*/
public void populateCache(String query,
Iterator<Map<String,Object>> rowIterator) {
Map<String,Object> aRow = null;
DIHCache cache = queryVsCache.get(query);
while ((aRow = getNextFromCache(query, rowIterator)) != null) {
cache.add(aRow);
}
}
private Map<String,Object> getNextFromCache(String query,
Iterator<Map<String,Object>> rowIterator) {
try {
if (rowIterator == null) return null;
if (rowIterator.hasNext()) return rowIterator.next();
return null;
} catch (Exception e) {
SolrException.log(log, "getNextFromCache() failed for query '" + query
+ "'", e);
wrapAndThrow(DataImportHandlerException.WARN, e);
return null;
}
}
public Map<String,Object> getCacheData(Context context, String query,
Iterator<Map<String,Object>> rowIterator) {
if (cacheDoKeyLookup) {
return getIdCacheData(context, query, rowIterator);
} else {
return getSimpleCacheData(context, query, rowIterator);
}
}
/**
* If the where clause is present the cache is sql Vs Map of key Vs List of
* Rows.
*
* @param query
* the query string for which cached data is to be returned
*
* @return the cached row corresponding to the given query after all variables
* have been resolved
*/
protected Map<String,Object> getIdCacheData(Context context, String query,
Iterator<Map<String,Object>> rowIterator) {
Object key = context.resolve(cacheForeignKey);
if (key == null) {
throw new DataImportHandlerException(DataImportHandlerException.WARN,
"The cache lookup value : " + cacheForeignKey
+ " is resolved to be null in the entity :"
+ context.getEntityAttribute("name"));
}
if (dataSourceRowCache == null) {
DIHCache cache = queryVsCache.get(query);
if (cache == null) {
cache = instantiateCache(context);
queryVsCache.put(query, cache);
populateCache(query, rowIterator);
}
dataSourceRowCache = cache.iterator(key);
}
return getFromRowCacheTransformed();
}
/**
* If where clause is not present the cache is a Map of query vs List of Rows.
*
* @param query
* string for which cached row is to be returned
*
* @return the cached row corresponding to the given query
*/
protected Map<String,Object> getSimpleCacheData(Context context,
String query, Iterator<Map<String,Object>> rowIterator) {
if (dataSourceRowCache == null) {
DIHCache cache = queryVsCache.get(query);
if (cache == null) {
cache = instantiateCache(context);
queryVsCache.put(query, cache);
populateCache(query, rowIterator);
queryVsCacheIterator.put(query, cache.iterator());
}
Iterator<Map<String,Object>> cacheIter = queryVsCacheIterator.get(query);
dataSourceRowCache = cacheIter;
}
return getFromRowCacheTransformed();
}
protected Map<String,Object> getFromRowCacheTransformed() {
if (dataSourceRowCache == null || !dataSourceRowCache.hasNext()) {
dataSourceRowCache = null;
return null;
}
Map<String,Object> r = dataSourceRowCache.next();
return r;
}
/**
* <p>
* Specify the class for the cache implementation
* </p>
*/
public static final String CACHE_IMPL = "cacheImpl";
/**
* <p>
* If the cache supports persistent data, set to "true" to delete any prior
* persisted data before running the entity.
* </p>
*/
public static final String CACHE_DELETE_PRIOR_DATA = "cacheDeletePriorData";
/**
* <p>
* Specify the Foreign Key from the parent entity to join on. Use if the cache
* is on a child entity.
* </p>
*/
public static final String CACHE_FOREIGN_KEY = "cacheLookup";
/**
* <p>
* Specify the Primary Key field from this Entity to map the input records
* with
* </p>
*/
public static final String CACHE_PRIMARY_KEY = "cacheKey";
/**
* <p>
* If true, a pre-existing cache is re-opened for read-only access.
* </p>
*/
public static final String CACHE_READ_ONLY = "cacheReadOnly";
}

View File

@ -1,21 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
public enum DIHLogLevels {
START_ENTITY, END_ENTITY, TRANSFORMED_ROW, ENTITY_META, PRE_TRANSFORMER_ROW, START_DOC, END_DOC, ENTITY_OUT, ROW_END, TRANSFORMER_EXCEPTION, ENTITY_EXCEPTION, DISABLE_LOGGING, ENABLE_LOGGING, NONE
}

View File

@ -1,45 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.Date;
import java.util.Map;
/**
* Implementations write out properties about the last data import
* for use by the next import. ex: to persist the last import timestamp
* so that future delta imports can know what needs to be updated.
*
* @lucene.experimental
*/
public abstract class DIHProperties {
public abstract void init(DataImporter dataImporter, Map<String, String> initParams);
public abstract boolean isWritable();
public abstract void persist(Map<String, Object> props);
public abstract Map<String, Object> readIndexerProperties();
public abstract String convertDateToString(Date d);
public Date getCurrentTimestamp() {
return new Date();
}
}

View File

@ -1,99 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.Map;
import java.util.Set;
import org.apache.solr.common.SolrInputDocument;
/**
* @lucene.experimental
*
*/
public interface DIHWriter {
/**
* <p>
* If this writer supports transactions or commit points, then commit any changes,
* optionally optimizing the data for read/write performance
* </p>
*/
public void commit(boolean optimize);
/**
* <p>
* Release resources used by this writer. After calling close, reads &amp; updates will throw exceptions.
* </p>
*/
public void close();
/**
* <p>
* If this writer supports transactions or commit points, then roll back any uncommitted changes.
* </p>
*/
public void rollback();
/**
* <p>
* Delete from the writer's underlying data store based the passed-in writer-specific query. (Optional Operation)
* </p>
*/
public void deleteByQuery(String q);
/**
* <p>
* Delete everything from the writer's underlying data store
* </p>
*/
public void doDeleteAll();
/**
* <p>
* Delete from the writer's underlying data store based on the passed-in Primary Key
* </p>
*/
public void deleteDoc(Object key);
/**
* <p>
* Add a document to this writer's underlying data store.
* </p>
* @return true on success, false on failure
*/
public boolean upload(SolrInputDocument doc);
/**
* <p>
* Provide context information for this writer. init() should be called before using the writer.
* </p>
*/
public void init(Context context) ;
/**
* <p>
* Specify the keys to be modified by a delta update (required by writers that can store duplicate keys)
* </p>
*/
public void setDeltaKeys(Set<Map<String, Object>> deltaKeys) ;
}

View File

@ -1,44 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public abstract class DIHWriterBase implements DIHWriter {
protected String keyFieldName;
protected Set<Object> deltaKeys = null;
@Override
public void setDeltaKeys(Set<Map<String,Object>> passedInDeltaKeys) {
deltaKeys = new HashSet<>();
for (Map<String,Object> aMap : passedInDeltaKeys) {
if (aMap.size() > 0) {
Object key = null;
if (keyFieldName != null) {
key = aMap.get(keyFieldName);
} else {
key = aMap.entrySet().iterator().next();
}
if (key != null) {
deltaKeys.add(key);
}
}
}
}
}

View File

@ -1,318 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.lang.invoke.MethodHandles;
import java.lang.reflect.Constructor;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.metrics.MetricsMap;
import org.apache.solr.metrics.SolrMetricsContext;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.RawResponseWriter;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.handler.dataimport.DataImporter.IMPORT_CMD;
/**
* <p>
* Solr Request Handler for data import from databases and REST data sources.
* </p>
* <p>
* It is configured in solrconfig.xml
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and subject to change</b>
*
* @deprecated since 8.6
* @since solr 1.3
*/
@Deprecated(since = "8.6")
public class DataImportHandler extends RequestHandlerBase implements
SolrCoreAware {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private DataImporter importer;
private boolean debugEnabled = true;
private String myName = "dataimport";
private MetricsMap metrics;
private static final String PARAM_WRITER_IMPL = "writerImpl";
private static final String DEFAULT_WRITER_NAME = "SolrWriter";
static final String ENABLE_DIH_DATA_CONFIG_PARAM = "enable.dih.dataConfigParam";
final boolean dataConfigParam_enabled = Boolean.getBoolean(ENABLE_DIH_DATA_CONFIG_PARAM);
public DataImporter getImporter() {
return this.importer;
}
@Override
public void init(@SuppressWarnings({"rawtypes"})NamedList args) {
super.init(args);
Map<String,String> macro = new HashMap<>();
macro.put("expandMacros", "false");
defaults = SolrParams.wrapDefaults(defaults, new MapSolrParams(macro));
log.warn("Data Import Handler is deprecated as of Solr 8.6. See SOLR-14066 for more details.");
}
@Override
@SuppressWarnings("unchecked")
public void inform(SolrCore core) {
try {
String name = getPluginInfo().name;
if (name.startsWith("/")) {
myName = name.substring(1);
}
// some users may have '/' in the handler name. replace with '_'
myName = myName.replaceAll("/", "_");
debugEnabled = StrUtils.parseBool((String)initArgs.get(ENABLE_DEBUG), true);
importer = new DataImporter(core, myName);
} catch (Exception e) {
log.error( DataImporter.MSG.LOAD_EXP, e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, DataImporter.MSG.LOAD_EXP, e);
}
}
@Override
@SuppressWarnings("unchecked")
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp)
throws Exception {
rsp.setHttpCaching(false);
//TODO: figure out why just the first one is OK...
ContentStream contentStream = null;
Iterable<ContentStream> streams = req.getContentStreams();
if(streams != null){
for (ContentStream stream : streams) {
contentStream = stream;
break;
}
}
SolrParams params = req.getParams();
@SuppressWarnings({"rawtypes"})
NamedList defaultParams = (NamedList) initArgs.get("defaults");
RequestInfo requestParams = new RequestInfo(req, getParamsMap(params), contentStream);
String command = requestParams.getCommand();
if (DataImporter.SHOW_CONF_CMD.equals(command)) {
String dataConfigFile = params.get("config");
String dataConfig = params.get("dataConfig"); // needn't check dataConfigParam_enabled; we don't execute it
if(dataConfigFile != null) {
dataConfig = SolrWriter.getResourceAsString(req.getCore().getResourceLoader().openResource(dataConfigFile));
}
if(dataConfig==null) {
rsp.add("status", DataImporter.MSG.NO_CONFIG_FOUND);
} else {
// Modify incoming request params to add wt=raw
ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams());
rawParams.set(CommonParams.WT, "raw");
req.setParams(rawParams);
ContentStreamBase content = new ContentStreamBase.StringStream(dataConfig);
rsp.add(RawResponseWriter.CONTENT, content);
}
return;
}
if (params.get("dataConfig") != null && dataConfigParam_enabled == false) {
throw new SolrException(SolrException.ErrorCode.FORBIDDEN,
"Use of the dataConfig param (DIH debug mode) requires the system property " +
ENABLE_DIH_DATA_CONFIG_PARAM + " because it's a security risk.");
}
rsp.add("initArgs", initArgs);
String message = "";
if (command != null) {
rsp.add("command", command);
}
// If importer is still null
if (importer == null) {
rsp.add("status", DataImporter.MSG.NO_INIT);
return;
}
if (command != null && DataImporter.ABORT_CMD.equals(command)) {
importer.runCmd(requestParams, null);
} else if (importer.isBusy()) {
message = DataImporter.MSG.CMD_RUNNING;
} else if (command != null) {
if (DataImporter.FULL_IMPORT_CMD.equals(command)
|| DataImporter.DELTA_IMPORT_CMD.equals(command) ||
IMPORT_CMD.equals(command)) {
importer.maybeReloadConfiguration(requestParams, defaultParams);
UpdateRequestProcessorChain processorChain =
req.getCore().getUpdateProcessorChain(params);
UpdateRequestProcessor processor = processorChain.createProcessor(req, rsp);
SolrResourceLoader loader = req.getCore().getResourceLoader();
DIHWriter sw = getSolrWriter(processor, loader, requestParams, req);
if (requestParams.isDebug()) {
if (debugEnabled) {
// Synchronous request for the debug mode
importer.runCmd(requestParams, sw);
rsp.add("mode", "debug");
rsp.add("documents", requestParams.getDebugInfo().debugDocuments);
if (requestParams.getDebugInfo().debugVerboseOutput != null) {
rsp.add("verbose-output", requestParams.getDebugInfo().debugVerboseOutput);
}
} else {
message = DataImporter.MSG.DEBUG_NOT_ENABLED;
}
} else {
// Asynchronous request for normal mode
if(requestParams.getContentStream() == null && !requestParams.isSyncMode()){
importer.runAsync(requestParams, sw);
} else {
importer.runCmd(requestParams, sw);
}
}
} else if (DataImporter.RELOAD_CONF_CMD.equals(command)) {
if(importer.maybeReloadConfiguration(requestParams, defaultParams)) {
message = DataImporter.MSG.CONFIG_RELOADED;
} else {
message = DataImporter.MSG.CONFIG_NOT_RELOADED;
}
}
}
rsp.add("status", importer.isBusy() ? "busy" : "idle");
rsp.add("importResponse", message);
rsp.add("statusMessages", importer.getStatusMessages());
}
/** The value is converted to a String or {@code List<String>} if multi-valued. */
private Map<String, Object> getParamsMap(SolrParams params) {
Map<String, Object> result = new HashMap<>();
for (Map.Entry<String, String[]> pair : params){
String s = pair.getKey();
String[] val = pair.getValue();
if (val == null || val.length < 1)
continue;
if (val.length == 1)
result.put(s, val[0]);
else
result.put(s, Arrays.asList(val));
}
return result;
}
private DIHWriter getSolrWriter(final UpdateRequestProcessor processor,
final SolrResourceLoader loader, final RequestInfo requestParams,
SolrQueryRequest req) {
SolrParams reqParams = req.getParams();
String writerClassStr = null;
if (reqParams != null && reqParams.get(PARAM_WRITER_IMPL) != null) {
writerClassStr = reqParams.get(PARAM_WRITER_IMPL);
}
DIHWriter writer;
if (writerClassStr != null
&& !writerClassStr.equals(DEFAULT_WRITER_NAME)
&& !writerClassStr.equals(DocBuilder.class.getPackage().getName() + "."
+ DEFAULT_WRITER_NAME)) {
try {
@SuppressWarnings("unchecked")
Class<DIHWriter> writerClass = DocBuilder.loadClass(writerClassStr, req.getCore());
@SuppressWarnings({"rawtypes"})
Constructor<DIHWriter> cnstr = writerClass.getConstructor(new Class[] {
UpdateRequestProcessor.class, SolrQueryRequest.class});
return cnstr.newInstance((Object) processor, (Object) req);
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Unable to load Writer implementation:" + writerClassStr, e);
}
} else {
return new SolrWriter(processor, req) {
@Override
public boolean upload(SolrInputDocument document) {
try {
return super.upload(document);
} catch (RuntimeException e) {
log.error("Exception while adding: {}", document, e);
return false;
}
}
};
}
}
@Override
public void initializeMetrics(SolrMetricsContext parentContext, String scope) {
super.initializeMetrics(parentContext, scope);
metrics = new MetricsMap((detailed, map) -> {
if (importer != null) {
DocBuilder.Statistics cumulative = importer.cumulativeStatistics;
map.put("Status", importer.getStatus().toString());
if (importer.docBuilder != null) {
DocBuilder.Statistics running = importer.docBuilder.importStatistics;
map.put("Documents Processed", running.docCount);
map.put("Requests made to DataSource", running.queryCount);
map.put("Rows Fetched", running.rowsCount);
map.put("Documents Deleted", running.deletedDocCount);
map.put("Documents Skipped", running.skipDocCount);
}
map.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, cumulative.docCount);
map.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, cumulative.queryCount);
map.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED, cumulative.rowsCount);
map.put(DataImporter.MSG.TOTAL_DOCS_DELETED, cumulative.deletedDocCount);
map.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED, cumulative.skipDocCount);
}
});
solrMetricsContext.gauge(metrics, true, "importer", getCategory().toString(), scope);
}
// //////////////////////SolrInfoMBeans methods //////////////////////
@Override
public String getDescription() {
return DataImporter.MSG.JMX_DESC;
}
public static final String ENABLE_DEBUG = "enableDebug";
}

View File

@ -1,75 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
/**
* <p> Exception class for all DataImportHandler exceptions </p>
* <p>
* <b>This API is experimental and subject to change</b>
*
* @since solr 1.3
*/
public class DataImportHandlerException extends RuntimeException {
private int errCode;
public boolean debugged = false;
public static final int SEVERE = 500, WARN = 400, SKIP = 300, SKIP_ROW =301;
public DataImportHandlerException(int err) {
super();
errCode = err;
}
public DataImportHandlerException(int err, String message) {
super(message + (SolrWriter.getDocCount() == null ? "" : MSG + SolrWriter.getDocCount()));
errCode = err;
}
public DataImportHandlerException(int err, String message, Throwable cause) {
super(message + (SolrWriter.getDocCount() == null ? "" : MSG + SolrWriter.getDocCount()), cause);
errCode = err;
}
public DataImportHandlerException(int err, Throwable cause) {
super(cause);
errCode = err;
}
public int getErrCode() {
return errCode;
}
public static DataImportHandlerException wrapAndThrow(int err, Exception e) {
if (e instanceof DataImportHandlerException) {
throw (DataImportHandlerException) e;
} else {
throw new DataImportHandlerException(err, e);
}
}
public static DataImportHandlerException wrapAndThrow(int err, Exception e, String msg) {
if (e instanceof DataImportHandlerException) {
throw (DataImportHandlerException) e;
} else {
throw new DataImportHandlerException(err, msg, e);
}
}
public static final String MSG = " Processing Document # ";
}

View File

@ -1,628 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.common.EmptyEntityResolver;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.util.SystemIdResolver;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.XMLErrorLogger;
import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
import org.apache.solr.handler.dataimport.config.ConfigParseUtil;
import org.apache.solr.handler.dataimport.config.DIHConfiguration;
import org.apache.solr.handler.dataimport.config.Entity;
import org.apache.solr.handler.dataimport.config.PropertyWriter;
import org.apache.solr.handler.dataimport.config.Script;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import static org.apache.solr.handler.dataimport.DocBuilder.loadClass;
import static org.apache.solr.handler.dataimport.config.ConfigNameConstants.CLASS;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.apache.commons.io.IOUtils;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.IOException;
import java.io.StringReader;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;
/**
* <p> Stores all configuration information for pulling and indexing data. </p>
* <p>
* <b>This API is experimental and subject to change</b>
*
* @since solr 1.3
*/
public class DataImporter {
public enum Status {
IDLE, RUNNING_FULL_DUMP, RUNNING_DELTA_DUMP, JOB_FAILED
}
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final XMLErrorLogger XMLLOG = new XMLErrorLogger(log);
private Status status = Status.IDLE;
private DIHConfiguration config;
private Date indexStartTime;
private Properties store = new Properties();
private Map<String, Map<String,String>> requestLevelDataSourceProps = new HashMap<>();
private IndexSchema schema;
public DocBuilder docBuilder;
public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics();
private SolrCore core;
private Map<String, Object> coreScopeSession = new ConcurrentHashMap<>();
private ReentrantLock importLock = new ReentrantLock();
private boolean isDeltaImportSupported = false;
private final String handlerName;
/**
* Only for testing purposes
*/
DataImporter() {
this.handlerName = "dataimport" ;
}
DataImporter(SolrCore core, String handlerName) {
this.handlerName = handlerName;
this.core = core;
this.schema = core.getLatestSchema();
}
boolean maybeReloadConfiguration(RequestInfo params,
NamedList<?> defaultParams) throws IOException {
if (importLock.tryLock()) {
boolean success = false;
try {
if (null != params.getRequest()) {
if (schema != params.getRequest().getSchema()) {
schema = params.getRequest().getSchema();
}
}
String dataConfigText = params.getDataConfig();
String dataconfigFile = params.getConfigFile();
InputSource is = null;
if(dataConfigText!=null && dataConfigText.length()>0) {
is = new InputSource(new StringReader(dataConfigText));
} else if(dataconfigFile!=null) {
is = new InputSource(core.getResourceLoader().openResource(dataconfigFile));
is.setSystemId(SystemIdResolver.createSystemIdFromResourceName(dataconfigFile));
log.info("Loading DIH Configuration: {}", dataconfigFile);
}
if(is!=null) {
config = loadDataConfig(is);
success = true;
}
Map<String,Map<String,String>> dsProps = new HashMap<>();
if(defaultParams!=null) {
int position = 0;
while (position < defaultParams.size()) {
if (defaultParams.getName(position) == null) {
break;
}
String name = defaultParams.getName(position);
if (name.equals("datasource")) {
success = true;
@SuppressWarnings({"rawtypes"})
NamedList dsConfig = (NamedList) defaultParams.getVal(position);
log.info("Getting configuration for Global Datasource...");
Map<String,String> props = new HashMap<>();
for (int i = 0; i < dsConfig.size(); i++) {
props.put(dsConfig.getName(i), dsConfig.getVal(i).toString());
}
log.info("Adding properties to datasource: {}", props);
dsProps.put((String) dsConfig.get("name"), props);
}
position++;
}
}
requestLevelDataSourceProps = Collections.unmodifiableMap(dsProps);
} catch(IOException ioe) {
throw ioe;
} finally {
importLock.unlock();
}
return success;
} else {
return false;
}
}
public String getHandlerName() {
return handlerName;
}
public IndexSchema getSchema() {
return schema;
}
/**
* Used by tests
*/
void loadAndInit(String configStr) {
config = loadDataConfig(new InputSource(new StringReader(configStr)));
}
void loadAndInit(InputSource configFile) {
config = loadDataConfig(configFile);
}
public DIHConfiguration loadDataConfig(InputSource configFile) {
DIHConfiguration dihcfg = null;
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setValidating(false);
// only enable xinclude, if XML is coming from safe source (local file)
// and a a SolrCore and SystemId is present (makes no sense otherwise):
if (core != null && configFile.getSystemId() != null) {
try {
dbf.setXIncludeAware(true);
dbf.setNamespaceAware(true);
} catch( UnsupportedOperationException e ) {
log.warn( "XML parser doesn't support XInclude option" );
}
}
DocumentBuilder builder = dbf.newDocumentBuilder();
// only enable xinclude / external entities, if XML is coming from
// safe source (local file) and a a SolrCore and SystemId is present:
if (core != null && configFile.getSystemId() != null) {
builder.setEntityResolver(new SystemIdResolver(core.getResourceLoader()));
} else {
// Don't allow external entities without having a system ID:
builder.setEntityResolver(EmptyEntityResolver.SAX_INSTANCE);
}
builder.setErrorHandler(XMLLOG);
Document document;
try {
document = builder.parse(configFile);
} finally {
// some XML parsers are broken and don't close the byte stream (but they should according to spec)
IOUtils.closeQuietly(configFile.getByteStream());
}
dihcfg = readFromXml(document);
log.info("Data Configuration loaded successfully");
} catch (Exception e) {
throw new DataImportHandlerException(SEVERE,
"Data Config problem: " + e.getMessage(), e);
}
for (Entity e : dihcfg.getEntities()) {
if (e.getAllAttributes().containsKey(SqlEntityProcessor.DELTA_QUERY)) {
isDeltaImportSupported = true;
break;
}
}
return dihcfg;
}
public DIHConfiguration readFromXml(Document xmlDocument) {
DIHConfiguration config;
List<Map<String, String >> functions = new ArrayList<>();
Script script = null;
Map<String, Map<String,String>> dataSources = new HashMap<>();
NodeList dataConfigTags = xmlDocument.getElementsByTagName("dataConfig");
if(dataConfigTags == null || dataConfigTags.getLength() == 0) {
throw new DataImportHandlerException(SEVERE, "the root node '<dataConfig>' is missing");
}
Element e = (Element) dataConfigTags.item(0);
List<Element> documentTags = ConfigParseUtil.getChildNodes(e, "document");
if (documentTags.isEmpty()) {
throw new DataImportHandlerException(SEVERE, "DataImportHandler " +
"configuration file must have one <document> node.");
}
List<Element> scriptTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.SCRIPT);
if (!scriptTags.isEmpty()) {
script = new Script(scriptTags.get(0));
}
// Add the provided evaluators
List<Element> functionTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.FUNCTION);
if (!functionTags.isEmpty()) {
for (Element element : functionTags) {
String func = ConfigParseUtil.getStringAttribute(element, NAME, null);
String clz = ConfigParseUtil.getStringAttribute(element, ConfigNameConstants.CLASS, null);
if (func == null || clz == null){
throw new DataImportHandlerException(
SEVERE,
"<function> must have a 'name' and 'class' attributes");
} else {
functions.add(ConfigParseUtil.getAllAttributes(element));
}
}
}
List<Element> dataSourceTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.DATA_SRC);
if (!dataSourceTags.isEmpty()) {
for (Element element : dataSourceTags) {
Map<String,String> p = new HashMap<>();
HashMap<String, String> attrs = ConfigParseUtil.getAllAttributes(element);
for (Map.Entry<String, String> entry : attrs.entrySet()) {
p.put(entry.getKey(), entry.getValue());
}
dataSources.put(p.get("name"), p);
}
}
if(dataSources.get(null) == null){
for (Map<String,String> properties : dataSources.values()) {
dataSources.put(null,properties);
break;
}
}
PropertyWriter pw = null;
List<Element> propertyWriterTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.PROPERTY_WRITER);
if (propertyWriterTags.isEmpty()) {
boolean zookeeper = false;
if (this.core != null
&& this.core.getCoreContainer().isZooKeeperAware()) {
zookeeper = true;
}
pw = new PropertyWriter(zookeeper ? "ZKPropertiesWriter"
: "SimplePropertiesWriter", Collections.<String,String> emptyMap());
} else if (propertyWriterTags.size() > 1) {
throw new DataImportHandlerException(SEVERE, "Only one "
+ ConfigNameConstants.PROPERTY_WRITER + " can be configured.");
} else {
Element pwElement = propertyWriterTags.get(0);
String type = null;
Map<String,String> params = new HashMap<>();
for (Map.Entry<String,String> entry : ConfigParseUtil.getAllAttributes(
pwElement).entrySet()) {
if (TYPE.equals(entry.getKey())) {
type = entry.getValue();
} else {
params.put(entry.getKey(), entry.getValue());
}
}
if (type == null) {
throw new DataImportHandlerException(SEVERE, "The "
+ ConfigNameConstants.PROPERTY_WRITER + " element must specify "
+ TYPE);
}
pw = new PropertyWriter(type, params);
}
return new DIHConfiguration(documentTags.get(0), this, functions, script, dataSources, pw);
}
@SuppressWarnings("unchecked")
private DIHProperties createPropertyWriter() {
DIHProperties propWriter = null;
PropertyWriter configPw = config.getPropertyWriter();
try {
Class<DIHProperties> writerClass = DocBuilder.loadClass(configPw.getType(), this.core);
propWriter = writerClass.getConstructor().newInstance();
propWriter.init(this, configPw.getParameters());
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Unable to PropertyWriter implementation:" + configPw.getType(), e);
}
return propWriter;
}
public DIHConfiguration getConfig() {
return config;
}
Date getIndexStartTime() {
return indexStartTime;
}
void setIndexStartTime(Date indextStartTime) {
this.indexStartTime = indextStartTime;
}
void store(Object key, Object value) {
store.put(key, value);
}
Object retrieve(Object key) {
return store.get(key);
}
@SuppressWarnings({"unchecked", "rawtypes"})
public DataSource getDataSourceInstance(Entity key, String name, Context ctx) {
Map<String,String> p = requestLevelDataSourceProps.get(name);
if (p == null)
p = config.getDataSources().get(name);
if (p == null)
p = requestLevelDataSourceProps.get(null);// for default data source
if (p == null)
p = config.getDataSources().get(null);
if (p == null)
throw new DataImportHandlerException(SEVERE,
"No dataSource :" + name + " available for entity :" + key.getName());
String type = p.get(TYPE);
@SuppressWarnings({"rawtypes"})
DataSource dataSrc = null;
if (type == null) {
dataSrc = new JdbcDataSource();
} else {
try {
dataSrc = (DataSource) DocBuilder.loadClass(type, getCore()).getConstructor().newInstance();
} catch (Exception e) {
wrapAndThrow(SEVERE, e, "Invalid type for data source: " + type);
}
}
try {
Properties copyProps = new Properties();
copyProps.putAll(p);
Map<String, Object> map = ctx.getRequestParameters();
if (map.containsKey("rows")) {
int rows = Integer.parseInt((String) map.get("rows"));
if (map.containsKey("start")) {
rows += Integer.parseInt((String) map.get("start"));
}
copyProps.setProperty("maxRows", String.valueOf(rows));
}
dataSrc.init(ctx, copyProps);
} catch (Exception e) {
wrapAndThrow(SEVERE, e, "Failed to initialize DataSource: " + key.getDataSourceName());
}
return dataSrc;
}
public Status getStatus() {
return status;
}
public void setStatus(Status status) {
this.status = status;
}
public boolean isBusy() {
return importLock.isLocked();
}
public void doFullImport(DIHWriter writer, RequestInfo requestParams) {
log.info("Starting Full Import");
setStatus(Status.RUNNING_FULL_DUMP);
try {
DIHProperties dihPropWriter = createPropertyWriter();
setIndexStartTime(dihPropWriter.getCurrentTimestamp());
docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams);
checkWritablePersistFile(writer, dihPropWriter);
docBuilder.execute();
if (!requestParams.isDebug())
cumulativeStatistics.add(docBuilder.importStatistics);
} catch (Exception e) {
SolrException.log(log, "Full Import failed", e);
docBuilder.handleError("Full Import failed", e);
} finally {
setStatus(Status.IDLE);
DocBuilder.INSTANCE.set(null);
}
}
private void checkWritablePersistFile(DIHWriter writer, DIHProperties dihPropWriter) {
if (isDeltaImportSupported && !dihPropWriter.isWritable()) {
throw new DataImportHandlerException(SEVERE,
"Properties is not writable. Delta imports are supported by data config but will not work.");
}
}
public void doDeltaImport(DIHWriter writer, RequestInfo requestParams) {
log.info("Starting Delta Import");
setStatus(Status.RUNNING_DELTA_DUMP);
try {
DIHProperties dihPropWriter = createPropertyWriter();
setIndexStartTime(dihPropWriter.getCurrentTimestamp());
docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams);
checkWritablePersistFile(writer, dihPropWriter);
docBuilder.execute();
if (!requestParams.isDebug())
cumulativeStatistics.add(docBuilder.importStatistics);
} catch (Exception e) {
log.error("Delta Import Failed", e);
docBuilder.handleError("Delta Import Failed", e);
} finally {
setStatus(Status.IDLE);
DocBuilder.INSTANCE.set(null);
}
}
public void runAsync(final RequestInfo reqParams, final DIHWriter sw) {
new Thread(() -> runCmd(reqParams, sw)).start();
}
void runCmd(RequestInfo reqParams, DIHWriter sw) {
String command = reqParams.getCommand();
if (command.equals(ABORT_CMD)) {
if (docBuilder != null) {
docBuilder.abort();
}
return;
}
if (!importLock.tryLock()){
log.warn("Import command failed . another import is running");
return;
}
try {
if (FULL_IMPORT_CMD.equals(command) || IMPORT_CMD.equals(command)) {
doFullImport(sw, reqParams);
} else if (command.equals(DELTA_IMPORT_CMD)) {
doDeltaImport(sw, reqParams);
}
} finally {
importLock.unlock();
}
}
@SuppressWarnings("unchecked")
Map<String, String> getStatusMessages() {
//this map object is a Collections.synchronizedMap(new LinkedHashMap()). if we
// synchronize on the object it must be safe to iterate through the map
@SuppressWarnings({"rawtypes"})
Map statusMessages = (Map) retrieve(STATUS_MSGS);
Map<String, String> result = new LinkedHashMap<>();
if (statusMessages != null) {
synchronized (statusMessages) {
for (Object o : statusMessages.entrySet()) {
@SuppressWarnings({"rawtypes"})
Map.Entry e = (Map.Entry) o;
//the toString is taken because some of the Objects create the data lazily when toString() is called
result.put((String) e.getKey(), e.getValue().toString());
}
}
}
return result;
}
public DocBuilder getDocBuilder() {
return docBuilder;
}
public DocBuilder getDocBuilder(DIHWriter writer, RequestInfo requestParams) {
DIHProperties dihPropWriter = createPropertyWriter();
return new DocBuilder(this, writer, dihPropWriter, requestParams);
}
Map<String, Evaluator> getEvaluators() {
return getEvaluators(config.getFunctions());
}
/**
* used by tests.
*/
@SuppressWarnings({"unchecked"})
Map<String, Evaluator> getEvaluators(List<Map<String,String>> fn) {
Map<String, Evaluator> evaluators = new HashMap<>();
evaluators.put(Evaluator.DATE_FORMAT_EVALUATOR, new DateFormatEvaluator());
evaluators.put(Evaluator.SQL_ESCAPE_EVALUATOR, new SqlEscapingEvaluator());
evaluators.put(Evaluator.URL_ENCODE_EVALUATOR, new UrlEvaluator());
evaluators.put(Evaluator.ESCAPE_SOLR_QUERY_CHARS, new SolrQueryEscapingEvaluator());
SolrCore core = docBuilder == null ? null : docBuilder.dataImporter.getCore();
for (Map<String, String> map : fn) {
try {
evaluators.put(map.get(NAME), (Evaluator) loadClass(map.get(CLASS), core).getConstructor().newInstance());
} catch (Exception e) {
wrapAndThrow(SEVERE, e, "Unable to instantiate evaluator: " + map.get(CLASS));
}
}
return evaluators;
}
static final ThreadLocal<AtomicLong> QUERY_COUNT = new ThreadLocal<AtomicLong>() {
@Override
protected AtomicLong initialValue() {
return new AtomicLong();
}
};
static final class MSG {
public static final String NO_CONFIG_FOUND = "Configuration not found";
public static final String NO_INIT = "DataImportHandler started. Not Initialized. No commands can be run";
public static final String INVALID_CONFIG = "FATAL: Could not create importer. DataImporter config invalid";
public static final String LOAD_EXP = "Exception while loading DataImporter";
public static final String JMX_DESC = "Manage data import from databases to Solr";
public static final String CMD_RUNNING = "A command is still running...";
public static final String DEBUG_NOT_ENABLED = "Debug not enabled. Add a tag <str name=\"enableDebug\">true</str> in solrconfig.xml";
public static final String CONFIG_RELOADED = "Configuration Re-loaded sucessfully";
public static final String CONFIG_NOT_RELOADED = "Configuration NOT Re-loaded...Data Importer is busy.";
public static final String TOTAL_DOC_PROCESSED = "Total Documents Processed";
public static final String TOTAL_FAILED_DOCS = "Total Documents Failed";
public static final String TOTAL_QUERIES_EXECUTED = "Total Requests made to DataSource";
public static final String TOTAL_ROWS_EXECUTED = "Total Rows Fetched";
public static final String TOTAL_DOCS_DELETED = "Total Documents Deleted";
public static final String TOTAL_DOCS_SKIPPED = "Total Documents Skipped";
}
public SolrCore getCore() {
return core;
}
void putToCoreScopeSession(String key, Object val) {
coreScopeSession.put(key, val);
}
Object getFromCoreScopeSession(String key) {
return coreScopeSession.get(key);
}
public static final String COLUMN = "column";
public static final String TYPE = "type";
public static final String DATA_SRC = "dataSource";
public static final String MULTI_VALUED = "multiValued";
public static final String NAME = "name";
public static final String STATUS_MSGS = "status-messages";
public static final String FULL_IMPORT_CMD = "full-import";
public static final String IMPORT_CMD = "import";
public static final String DELTA_IMPORT_CMD = "delta-import";
public static final String ABORT_CMD = "abort";
public static final String DEBUG_MODE = "debug";
public static final String RELOAD_CONF_CMD = "reload-config";
public static final String SHOW_CONF_CMD = "show-config";
}

View File

@ -1,66 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.io.Closeable;
import java.util.Properties;
/**
* <p>
* Provides data from a source with a given query.
* </p>
* <p>
* Implementation of this abstract class must provide a default no-arg constructor
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.3
*/
public abstract class DataSource<T> implements Closeable {
/**
* Initializes the DataSource with the <code>Context</code> and
* initialization properties.
* <p>
* This is invoked by the <code>DataImporter</code> after creating an
* instance of this class.
*/
public abstract void init(Context context, Properties initProps);
/**
* Get records for the given query.The return type depends on the
* implementation .
*
* @param query The query string. It can be a SQL for JdbcDataSource or a URL
* for HttpDataSource or a file location for FileDataSource or a custom
* format for your own custom DataSource.
* @return Depends on the implementation. For instance JdbcDataSource returns
* an Iterator&lt;Map &lt;String,Object&gt;&gt;
*/
public abstract T getData(String query);
/**
* Cleans up resources of this DataSource after use.
*/
public abstract void close();
}

View File

@ -1,180 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IllformedLocaleException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.handler.dataimport.config.EntityField;
import org.apache.solr.util.DateMathParser;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
/**
* <p>Formats values using a given date format. </p>
* <p>Pass three parameters:
* <ul>
* <li>An {@link EntityField} or a date expression to be parsed with
* the {@link DateMathParser} class If the value is in a String,
* then it is assumed to be a datemath expression, otherwise it
* resolved using a {@link VariableResolver} instance</li>
* <li>A date format see {@link SimpleDateFormat} for the syntax.</li>
* <li>The {@link Locale} to parse.
* (optional. Defaults to the Root Locale) </li>
* </ul>
*/
public class DateFormatEvaluator extends Evaluator {
public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
protected Map<String, Locale> availableLocales = new HashMap<>();
protected Set<String> availableTimezones = new HashSet<>();
@SuppressForbidden(reason = "Usage of outdated locale parsing with Locale#toString() because of backwards compatibility")
public DateFormatEvaluator() {
for (Locale locale : Locale.getAvailableLocales()) {
availableLocales.put(locale.toString(), locale);
}
for (String tz : TimeZone.getAvailableIDs()) {
availableTimezones.add(tz);
}
}
private SimpleDateFormat getDateFormat(String pattern, TimeZone timezone, Locale locale) {
final SimpleDateFormat sdf = new SimpleDateFormat(pattern, locale);
sdf.setTimeZone(timezone);
return sdf;
}
@Override
public String evaluate(String expression, Context context) {
List<Object> l = parseParams(expression, context.getVariableResolver());
if (l.size() < 2 || l.size() > 4) {
throw new DataImportHandlerException(SEVERE, "'formatDate()' must have two, three or four parameters ");
}
Object o = l.get(0);
Object format = l.get(1);
if (format instanceof VariableWrapper) {
VariableWrapper wrapper = (VariableWrapper) format;
o = wrapper.resolve();
format = o.toString();
}
Locale locale = Locale.ENGLISH; // we default to ENGLISH for dates for full Java 9 compatibility
if(l.size()>2) {
Object localeObj = l.get(2);
String localeStr = null;
if (localeObj instanceof VariableWrapper) {
localeStr = ((VariableWrapper) localeObj).resolve().toString();
} else {
localeStr = localeObj.toString();
}
locale = availableLocales.get(localeStr);
if (locale == null) try {
locale = new Locale.Builder().setLanguageTag(localeStr).build();
} catch (IllformedLocaleException ex) {
throw new DataImportHandlerException(SEVERE, "Malformed / non-existent locale: " + localeStr, ex);
}
}
TimeZone tz = TimeZone.getDefault(); // DWS TODO: is this the right default for us? Deserves explanation if so.
if(l.size()==4) {
Object tzObj = l.get(3);
String tzStr = null;
if (tzObj instanceof VariableWrapper) {
tzStr = ((VariableWrapper) tzObj).resolve().toString();
} else {
tzStr = tzObj.toString();
}
if(availableTimezones.contains(tzStr)) {
tz = TimeZone.getTimeZone(tzStr);
} else {
throw new DataImportHandlerException(SEVERE, "Unsupported Timezone: " + tzStr);
}
}
String dateFmt = format.toString();
SimpleDateFormat fmt = getDateFormat(dateFmt, tz, locale);
Date date = null;
if (o instanceof VariableWrapper) {
date = evaluateWrapper((VariableWrapper) o, locale, tz);
} else {
date = evaluateString(o.toString(), locale, tz);
}
return fmt.format(date);
}
/**
* NOTE: declared as a method to allow for extensibility
*
* @lucene.experimental this API is experimental and subject to change
* @return the result of evaluating a string
*/
protected Date evaluateWrapper(VariableWrapper variableWrapper, Locale locale, TimeZone tz) {
Date date = null;
Object variableval = resolveWrapper(variableWrapper,locale,tz);
if (variableval instanceof Date) {
date = (Date) variableval;
} else {
String s = variableval.toString();
try {
date = getDateFormat(DEFAULT_DATE_FORMAT, tz, locale).parse(s);
} catch (ParseException exp) {
wrapAndThrow(SEVERE, exp, "Invalid expression for date");
}
}
return date;
}
/**
* NOTE: declared as a method to allow for extensibility
* @lucene.experimental
* @return the result of evaluating a string
*/
protected Date evaluateString(String datemathfmt, Locale locale, TimeZone tz) {
// note: DMP does not use the locale but perhaps a subclass might use it, for e.g. parsing a date in a custom
// string that doesn't necessarily have date math?
//TODO refactor DateMathParser.parseMath a bit to have a static method for this logic.
if (datemathfmt.startsWith("NOW")) {
datemathfmt = datemathfmt.substring("NOW".length());
}
try {
DateMathParser parser = new DateMathParser(tz);
parser.setNow(new Date());// thus do *not* use SolrRequestInfo
return parser.parseMath(datemathfmt);
} catch (ParseException e) {
throw wrapAndThrow(SEVERE, e, "Invalid expression for date");
}
}
/**
* NOTE: declared as a method to allow for extensibility
* @lucene.experimental
* @return the result of resolving the variable wrapper
*/
protected Object resolveWrapper(VariableWrapper variableWrapper, Locale locale, TimeZone tz) {
return variableWrapper.resolve();
}
}

View File

@ -1,106 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.lang.invoke.MethodHandles;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* {@link Transformer} instance which creates {@link Date} instances out of {@link String}s.
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* <p>
* <b>This API is experimental and subject to change</b>
*
* @since solr 1.3
*/
public class DateFormatTransformer extends Transformer {
private Map<String, SimpleDateFormat> fmtCache = new HashMap<>();
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@Override
@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> aRow, Context context) {
for (Map<String, String> map : context.getAllEntityFields()) {
Locale locale = Locale.ENGLISH; // we default to ENGLISH for dates for full Java 9 compatibility
String customLocale = map.get(LOCALE);
if (customLocale != null) {
try {
locale = new Locale.Builder().setLanguageTag(customLocale).build();
} catch (IllformedLocaleException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Invalid Locale specified: " + customLocale, e);
}
}
String fmt = map.get(DATE_TIME_FMT);
if (fmt == null)
continue;
VariableResolver resolver = context.getVariableResolver();
fmt = resolver.replaceTokens(fmt);
String column = map.get(DataImporter.COLUMN);
String srcCol = map.get(RegexTransformer.SRC_COL_NAME);
if (srcCol == null)
srcCol = column;
try {
Object o = aRow.get(srcCol);
if (o instanceof List) {
@SuppressWarnings({"rawtypes"})
List inputs = (List) o;
List<Date> results = new ArrayList<>();
for (Object input : inputs) {
results.add(process(input, fmt, locale));
}
aRow.put(column, results);
} else {
if (o != null) {
aRow.put(column, process(o, fmt, locale));
}
}
} catch (ParseException e) {
log.warn("Could not parse a Date field ", e);
}
}
return aRow;
}
private Date process(Object value, String format, Locale locale) throws ParseException {
if (value == null) return null;
String strVal = value.toString().trim();
if (strVal.length() == 0)
return null;
SimpleDateFormat fmt = fmtCache.get(format);
if (fmt == null) {
fmt = new SimpleDateFormat(format, locale);
fmtCache.put(format, fmt);
}
return fmt.parse(strVal);
}
public static final String DATE_TIME_FMT = "dateTimeFormat";
public static final String LOCALE = "locale";
}

View File

@ -1,66 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
public class DebugInfo {
private static final class ChildRollupDocs extends AbstractList<SolrInputDocument> {
private List<SolrInputDocument> delegate = new ArrayList<>();
@Override
public SolrInputDocument get(int index) {
return delegate.get(index);
}
@Override
public int size() {
return delegate.size();
}
public boolean add(SolrInputDocument e) {
SolrInputDocument transformed = e.deepCopy();
if (transformed.hasChildDocuments()) {
ChildRollupDocs childList = new ChildRollupDocs();
childList.addAll(transformed.getChildDocuments());
transformed.addField("_childDocuments_", childList);
transformed.getChildDocuments().clear();
}
return delegate.add(transformed);
}
}
public List<SolrInputDocument> debugDocuments = new ChildRollupDocs();
public NamedList<String> debugVerboseOutput = null;
public boolean verbose;
public DebugInfo(Map<String,Object> requestParams) {
verbose = StrUtils.parseBool((String) requestParams.get("verbose"), false);
debugVerboseOutput = new NamedList<>();
}
}

View File

@ -1,295 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.common.util.NamedList;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.text.MessageFormat;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Stack;
/**
* <p>
* Implements most of the interactive development functionality
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @since solr 1.3
*/
class DebugLogger {
private Stack<DebugInfo> debugStack;
@SuppressWarnings({"rawtypes"})
NamedList output;
// private final SolrWriter writer1;
private static final String LINE = "---------------------------------------------";
private MessageFormat fmt = new MessageFormat(
"----------- row #{0}-------------", Locale.ROOT);
boolean enabled = true;
@SuppressWarnings({"rawtypes"})
public DebugLogger() {
// writer = solrWriter;
output = new NamedList();
debugStack = new Stack<DebugInfo>() {
@Override
public DebugInfo pop() {
if (size() == 1)
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE, "Stack is becoming empty");
return super.pop();
}
};
debugStack.push(new DebugInfo(null, DIHLogLevels.NONE, null));
output = debugStack.peek().lst;
}
private DebugInfo peekStack() {
return debugStack.isEmpty() ? null : debugStack.peek();
}
@SuppressWarnings({"unchecked"})
public void log(DIHLogLevels event, String name, Object row) {
if (event == DIHLogLevels.DISABLE_LOGGING) {
enabled = false;
return;
} else if (event == DIHLogLevels.ENABLE_LOGGING) {
enabled = true;
return;
}
if (!enabled && event != DIHLogLevels.START_ENTITY
&& event != DIHLogLevels.END_ENTITY) {
return;
}
if (event == DIHLogLevels.START_DOC) {
debugStack.push(new DebugInfo(null, DIHLogLevels.START_DOC, peekStack()));
} else if (DIHLogLevels.START_ENTITY == event) {
debugStack
.push(new DebugInfo(name, DIHLogLevels.START_ENTITY, peekStack()));
} else if (DIHLogLevels.ENTITY_OUT == event
|| DIHLogLevels.PRE_TRANSFORMER_ROW == event) {
if (debugStack.peek().type == DIHLogLevels.START_ENTITY
|| debugStack.peek().type == DIHLogLevels.START_DOC) {
debugStack.peek().lst.add(null, fmt.format(new Object[]{++debugStack
.peek().rowCount}));
addToNamedList(debugStack.peek().lst, row);
debugStack.peek().lst.add(null, LINE);
}
} else if (event == DIHLogLevels.ROW_END) {
popAllTransformers();
} else if (DIHLogLevels.END_ENTITY == event) {
while (debugStack.pop().type != DIHLogLevels.START_ENTITY)
;
} else if (DIHLogLevels.END_DOC == event) {
while (debugStack.pop().type != DIHLogLevels.START_DOC)
;
} else if (event == DIHLogLevels.TRANSFORMER_EXCEPTION) {
debugStack.push(new DebugInfo(name, event, peekStack()));
debugStack.peek().lst.add("EXCEPTION",
getStacktraceString((Exception) row));
} else if (DIHLogLevels.TRANSFORMED_ROW == event) {
debugStack.push(new DebugInfo(name, event, peekStack()));
debugStack.peek().lst.add(null, LINE);
addToNamedList(debugStack.peek().lst, row);
debugStack.peek().lst.add(null, LINE);
if (row instanceof DataImportHandlerException) {
DataImportHandlerException dataImportHandlerException = (DataImportHandlerException) row;
dataImportHandlerException.debugged = true;
}
} else if (DIHLogLevels.ENTITY_META == event) {
popAllTransformers();
debugStack.peek().lst.add(name, row);
} else if (DIHLogLevels.ENTITY_EXCEPTION == event) {
if (row instanceof DataImportHandlerException) {
DataImportHandlerException dihe = (DataImportHandlerException) row;
if (dihe.debugged)
return;
dihe.debugged = true;
}
popAllTransformers();
debugStack.peek().lst.add("EXCEPTION",
getStacktraceString((Exception) row));
}
}
private void popAllTransformers() {
while (true) {
DIHLogLevels type = debugStack.peek().type;
if (type == DIHLogLevels.START_DOC || type == DIHLogLevels.START_ENTITY)
break;
debugStack.pop();
}
}
@SuppressWarnings({"unchecked"})
private void addToNamedList(@SuppressWarnings({"rawtypes"})NamedList nl, Object row) {
if (row instanceof List) {
@SuppressWarnings({"rawtypes"})
List list = (List) row;
@SuppressWarnings({"rawtypes"})
NamedList l = new NamedList();
nl.add(null, l);
for (Object o : list) {
Map<String, Object> map = (Map<String, Object>) o;
for (Map.Entry<String, Object> entry : map.entrySet())
nl.add(entry.getKey(), entry.getValue());
}
} else if (row instanceof Map) {
Map<String, Object> map = (Map<String, Object>) row;
for (Map.Entry<String, Object> entry : map.entrySet())
nl.add(entry.getKey(), entry.getValue());
}
}
@SuppressWarnings({"rawtypes"})
DataSource wrapDs(final DataSource ds) {
return new DataSource() {
@Override
public void init(Context context, Properties initProps) {
ds.init(context, initProps);
}
@Override
public void close() {
ds.close();
}
@Override
public Object getData(String query) {
log(DIHLogLevels.ENTITY_META, "query", query);
long start = System.nanoTime();
try {
return ds.getData(query);
} catch (DataImportHandlerException de) {
log(DIHLogLevels.ENTITY_EXCEPTION,
null, de);
throw de;
} catch (Exception e) {
log(DIHLogLevels.ENTITY_EXCEPTION,
null, e);
DataImportHandlerException de = new DataImportHandlerException(
DataImportHandlerException.SEVERE, "", e);
de.debugged = true;
throw de;
} finally {
log(DIHLogLevels.ENTITY_META, "time-taken", DocBuilder
.getTimeElapsedSince(start));
}
}
};
}
Transformer wrapTransformer(final Transformer t) {
return new Transformer() {
@Override
public Object transformRow(Map<String, Object> row, Context context) {
log(DIHLogLevels.PRE_TRANSFORMER_ROW, null, row);
String tName = getTransformerName(t);
Object result = null;
try {
result = t.transformRow(row, context);
log(DIHLogLevels.TRANSFORMED_ROW, tName, result);
} catch (DataImportHandlerException de) {
log(DIHLogLevels.TRANSFORMER_EXCEPTION, tName, de);
de.debugged = true;
throw de;
} catch (Exception e) {
log(DIHLogLevels.TRANSFORMER_EXCEPTION, tName, e);
DataImportHandlerException de = new DataImportHandlerException(DataImportHandlerException.SEVERE, "", e);
de.debugged = true;
throw de;
}
return result;
}
};
}
public static String getStacktraceString(Exception e) {
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
return sw.toString();
}
static String getTransformerName(Transformer t) {
@SuppressWarnings({"rawtypes"})
Class transClass = t.getClass();
if (t instanceof EntityProcessorWrapper.ReflectionTransformer) {
return ((EntityProcessorWrapper.ReflectionTransformer) t).trans;
}
if (t instanceof ScriptTransformer) {
ScriptTransformer scriptTransformer = (ScriptTransformer) t;
return "script:" + scriptTransformer.getFunctionName();
}
if (transClass.getPackage().equals(DebugLogger.class.getPackage())) {
return transClass.getSimpleName();
} else {
return transClass.getName();
}
}
private static class DebugInfo {
String name;
int tCount, rowCount;
@SuppressWarnings({"rawtypes"})
NamedList lst;
DIHLogLevels type;
DebugInfo parent;
@SuppressWarnings({"unchecked", "rawtypes"})
public DebugInfo(String name, DIHLogLevels type, DebugInfo parent) {
this.name = name;
this.type = type;
this.parent = parent;
lst = new NamedList();
if (parent != null) {
String displayName = null;
if (type == DIHLogLevels.START_ENTITY) {
displayName = "entity:" + name;
} else if (type == DIHLogLevels.TRANSFORMED_ROW
|| type == DIHLogLevels.TRANSFORMER_EXCEPTION) {
displayName = "transformer:" + name;
} else if (type == DIHLogLevels.START_DOC) {
this.name = displayName = "document#" + SolrWriter.getDocCount();
}
parent.lst.add(displayName, lst);
}
}
}
}

View File

@ -1,114 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.io.Closeable;
import java.util.Map;
/**
* <p>
* An instance of entity processor serves an entity. It is reused throughout the
* import process.
* </p>
* <p>
* Implementations of this abstract class must provide a public no-args constructor.
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.3
*/
public abstract class EntityProcessor implements Closeable {
/**
* This method is called when it starts processing an entity. When it comes
* back to the entity it is called again. So it can reset anything at that point.
* For a rootmost entity this is called only once for an ingestion. For sub-entities , this
* is called multiple once for each row from its parent entity
*
* @param context The current context
*/
public abstract void init(Context context);
/**
* This method helps streaming the data for each row . The implementation
* would fetch as many rows as needed and gives one 'row' at a time. Only this
* method is used during a full import
*
* @return A 'row'. The 'key' for the map is the column name and the 'value'
* is the value of that column. If there are no more rows to be
* returned, return 'null'
*/
public abstract Map<String, Object> nextRow();
/**
* This is used for delta-import. It gives the pks of the changed rows in this
* entity
*
* @return the pk vs value of all changed rows
*/
public abstract Map<String, Object> nextModifiedRowKey();
/**
* This is used during delta-import. It gives the primary keys of the rows
* that are deleted from this entity. If this entity is the root entity, solr
* document is deleted. If this is a sub-entity, the Solr document is
* considered as 'changed' and will be recreated
*
* @return the pk vs value of all changed rows
*/
public abstract Map<String, Object> nextDeletedRowKey();
/**
* This is used during delta-import. This gives the primary keys and their
* values of all the rows changed in a parent entity due to changes in this
* entity.
*
* @return the pk vs value of all changed rows in the parent entity
*/
public abstract Map<String, Object> nextModifiedParentRowKey();
/**
* Invoked for each entity at the very end of the import to do any needed cleanup tasks.
*
*/
public abstract void destroy();
/**
* Invoked after the transformers are invoked. EntityProcessors can add, remove or modify values
* added by Transformers in this method.
*
* @param r The transformed row
* @since solr 1.4
*/
public void postTransform(Map<String, Object> r) {
}
/**
* Invoked when the Entity processor is destroyed towards the end of import.
*
* @since solr 1.4
*/
public void close() {
//no-op
}
}

View File

@ -1,174 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.common.SolrException;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.invoke.MethodHandles;
import java.util.*;
/**
* <p> Base class for all implementations of {@link EntityProcessor} </p> <p> Most implementations of {@link EntityProcessor}
* extend this base class which provides common functionality. </p>
* <p>
* <b>This API is experimental and subject to change</b>
*
* @since solr 1.3
*/
public class EntityProcessorBase extends EntityProcessor {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
protected boolean isFirstInit = true;
protected String entityName;
protected Context context;
protected Iterator<Map<String, Object>> rowIterator;
protected String query;
protected String onError = ABORT;
protected DIHCacheSupport cacheSupport = null;
private Zipper zipper;
@Override
public void init(Context context) {
this.context = context;
if (isFirstInit) {
firstInit(context);
}
if(zipper!=null){
zipper.onNewParent(context);
}else{
if(cacheSupport!=null) {
cacheSupport.initNewParent(context);
}
}
}
/**
* first time init call. do one-time operations here
* it's necessary to call it from the overridden method,
* otherwise it throws NPE on accessing zipper from nextRow()
*/
protected void firstInit(Context context) {
entityName = context.getEntityAttribute("name");
String s = context.getEntityAttribute(ON_ERROR);
if (s != null) onError = s;
zipper = Zipper.createOrNull(context);
if(zipper==null){
initCache(context);
}
isFirstInit = false;
}
protected void initCache(Context context) {
String cacheImplName = context
.getResolvedEntityAttribute(DIHCacheSupport.CACHE_IMPL);
if (cacheImplName != null ) {
cacheSupport = new DIHCacheSupport(context, cacheImplName);
}
}
@Override
public Map<String, Object> nextModifiedRowKey() {
return null;
}
@Override
public Map<String, Object> nextDeletedRowKey() {
return null;
}
@Override
public Map<String, Object> nextModifiedParentRowKey() {
return null;
}
/**
* For a simple implementation, this is the only method that the sub-class should implement. This is intended to
* stream rows one-by-one. Return null to signal end of rows
*
* @return a row where the key is the name of the field and value can be any Object or a Collection of objects. Return
* null to signal end of rows
*/
@Override
public Map<String, Object> nextRow() {
return null;// do not do anything
}
protected Map<String, Object> getNext() {
if(zipper!=null){
return zipper.supplyNextChild(rowIterator);
}else{
if(cacheSupport==null) {
try {
if (rowIterator == null)
return null;
if (rowIterator.hasNext())
return rowIterator.next();
query = null;
rowIterator = null;
return null;
} catch (Exception e) {
SolrException.log(log, "getNext() failed for query '" + query + "'", e);
query = null;
rowIterator = null;
wrapAndThrow(DataImportHandlerException.WARN, e);
return null;
}
} else {
return cacheSupport.getCacheData(context, query, rowIterator);
}
}
}
@Override
public void destroy() {
query = null;
if(cacheSupport!=null){
cacheSupport.destroyAll();
}
cacheSupport = null;
}
public static final String TRANSFORMER = "transformer";
public static final String TRANSFORM_ROW = "transformRow";
public static final String ON_ERROR = "onError";
public static final String ABORT = "abort";
public static final String CONTINUE = "continue";
public static final String SKIP = "skip";
}

View File

@ -1,357 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
import org.apache.solr.handler.dataimport.config.Entity;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
import static org.apache.solr.handler.dataimport.EntityProcessorBase.*;
import static org.apache.solr.handler.dataimport.EntityProcessorBase.SKIP;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.invoke.MethodHandles;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
* A Wrapper over {@link EntityProcessor} instance which performs transforms and handles multi-row outputs correctly.
*
* @since solr 1.4
*/
public class EntityProcessorWrapper extends EntityProcessor {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private EntityProcessor delegate;
private Entity entity;
@SuppressWarnings({"rawtypes"})
private DataSource datasource;
private List<EntityProcessorWrapper> children = new ArrayList<>();
private DocBuilder docBuilder;
private boolean initialized;
private String onError;
private Context context;
private VariableResolver resolver;
private String entityName;
protected List<Transformer> transformers;
protected List<Map<String, Object>> rowcache;
public EntityProcessorWrapper(EntityProcessor delegate, Entity entity, DocBuilder docBuilder) {
this.delegate = delegate;
this.entity = entity;
this.docBuilder = docBuilder;
}
@Override
public void init(Context context) {
rowcache = null;
this.context = context;
resolver = context.getVariableResolver();
if (entityName == null) {
onError = resolver.replaceTokens(context.getEntityAttribute(ON_ERROR));
if (onError == null) onError = ABORT;
entityName = context.getEntityAttribute(ConfigNameConstants.NAME);
}
delegate.init(context);
}
@SuppressWarnings({"unchecked"})
void loadTransformers() {
String transClasses = context.getEntityAttribute(TRANSFORMER);
if (transClasses == null) {
transformers = Collections.emptyList();
return;
}
String[] transArr = transClasses.split(",");
transformers = new ArrayList<Transformer>() {
@Override
public boolean add(Transformer transformer) {
if (docBuilder != null && docBuilder.verboseDebug) {
transformer = docBuilder.getDebugLogger().wrapTransformer(transformer);
}
return super.add(transformer);
}
};
for (String aTransArr : transArr) {
String trans = aTransArr.trim();
if (trans.startsWith("script:")) {
// The script transformer is a potential vulnerability, esp. when the script is
// provided from an untrusted source. Check and don't proceed if source is untrusted.
checkIfTrusted(trans);
String functionName = trans.substring("script:".length());
ScriptTransformer scriptTransformer = new ScriptTransformer();
scriptTransformer.setFunctionName(functionName);
transformers.add(scriptTransformer);
continue;
}
try {
@SuppressWarnings({"rawtypes"})
Class clazz = DocBuilder.loadClass(trans, context.getSolrCore());
if (Transformer.class.isAssignableFrom(clazz)) {
transformers.add((Transformer) clazz.getConstructor().newInstance());
} else {
Method meth = clazz.getMethod(TRANSFORM_ROW, Map.class);
transformers.add(new ReflectionTransformer(meth, clazz, trans));
}
} catch (NoSuchMethodException nsme){
String msg = "Transformer :"
+ trans
+ "does not implement Transformer interface or does not have a transformRow(Map<String.Object> m)method";
log.error(msg);
wrapAndThrow(SEVERE, nsme,msg);
} catch (Exception e) {
log.error("Unable to load Transformer: {}", aTransArr, e);
wrapAndThrow(SEVERE, e,"Unable to load Transformer: " + trans);
}
}
}
private void checkIfTrusted(String trans) {
if (docBuilder != null) {
SolrCore core = docBuilder.dataImporter.getCore();
boolean trusted = (core != null)? core.getCoreDescriptor().isConfigSetTrusted(): true;
if (!trusted) {
Exception ex = new SolrException(ErrorCode.UNAUTHORIZED, "The configset for this collection was uploaded "
+ "without any authentication in place,"
+ " and this transformer is not available for collections with untrusted configsets. To use this transformer,"
+ " re-upload the configset after enabling authentication and authorization.");
String msg = "Transformer: "
+ trans
+ ". " + ex.getMessage();
log.error(msg);
wrapAndThrow(SEVERE, ex, msg);
}
}
}
@SuppressWarnings("unchecked")
static class ReflectionTransformer extends Transformer {
final Method meth;
@SuppressWarnings({"rawtypes"})
final Class clazz;
final String trans;
final Object o;
public ReflectionTransformer(Method meth, @SuppressWarnings({"rawtypes"})Class clazz, String trans)
throws Exception {
this.meth = meth;
this.clazz = clazz;
this.trans = trans;
o = clazz.getConstructor().newInstance();
}
@Override
public Object transformRow(Map<String, Object> aRow, Context context) {
try {
return meth.invoke(o, aRow);
} catch (Exception e) {
log.warn("method invocation failed on transformer : {}", trans, e);
throw new DataImportHandlerException(WARN, e);
}
}
}
protected Map<String, Object> getFromRowCache() {
Map<String, Object> r = rowcache.remove(0);
if (rowcache.isEmpty())
rowcache = null;
return r;
}
@SuppressWarnings("unchecked")
protected Map<String, Object> applyTransformer(Map<String, Object> row) {
if(row == null) return null;
if (transformers == null)
loadTransformers();
if (transformers == Collections.EMPTY_LIST)
return row;
Map<String, Object> transformedRow = row;
List<Map<String, Object>> rows = null;
boolean stopTransform = checkStopTransform(row);
VariableResolver resolver = context.getVariableResolver();
for (Transformer t : transformers) {
if (stopTransform) break;
try {
if (rows != null) {
List<Map<String, Object>> tmpRows = new ArrayList<>();
for (Map<String, Object> map : rows) {
resolver.addNamespace(entityName, map);
Object o = t.transformRow(map, context);
if (o == null)
continue;
if (o instanceof Map) {
@SuppressWarnings({"rawtypes"})
Map oMap = (Map) o;
stopTransform = checkStopTransform(oMap);
tmpRows.add((Map) o);
} else if (o instanceof List) {
tmpRows.addAll((List) o);
} else {
log.error("Transformer must return Map<String, Object> or a List<Map<String, Object>>");
}
}
rows = tmpRows;
} else {
resolver.addNamespace(entityName, transformedRow);
Object o = t.transformRow(transformedRow, context);
if (o == null)
return null;
if (o instanceof Map) {
@SuppressWarnings({"rawtypes"})
Map oMap = (Map) o;
stopTransform = checkStopTransform(oMap);
transformedRow = (Map) o;
} else if (o instanceof List) {
rows = (List) o;
} else {
log.error("Transformer must return Map<String, Object> or a List<Map<String, Object>>");
}
}
} catch (Exception e) {
log.warn("transformer threw error", e);
if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, e);
} else if (SKIP.equals(onError)) {
wrapAndThrow(DataImportHandlerException.SKIP, e);
}
// onError = continue
}
}
if (rows == null) {
return transformedRow;
} else {
rowcache = rows;
return getFromRowCache();
}
}
private boolean checkStopTransform(@SuppressWarnings({"rawtypes"})Map oMap) {
return oMap.get("$stopTransform") != null
&& Boolean.parseBoolean(oMap.get("$stopTransform").toString());
}
@Override
public Map<String, Object> nextRow() {
if (rowcache != null) {
return getFromRowCache();
}
while (true) {
Map<String, Object> arow = null;
try {
arow = delegate.nextRow();
} catch (Exception e) {
if(ABORT.equals(onError)){
wrapAndThrow(SEVERE, e);
} else {
//SKIP is not really possible. If this calls the nextRow() again the Entityprocessor would be in an inconisttent state
SolrException.log(log, "Exception in entity : "+ entityName, e);
return null;
}
}
if (arow == null) {
return null;
} else {
arow = applyTransformer(arow);
if (arow != null) {
delegate.postTransform(arow);
return arow;
}
}
}
}
@Override
public Map<String, Object> nextModifiedRowKey() {
Map<String, Object> row = delegate.nextModifiedRowKey();
row = applyTransformer(row);
rowcache = null;
return row;
}
@Override
public Map<String, Object> nextDeletedRowKey() {
Map<String, Object> row = delegate.nextDeletedRowKey();
row = applyTransformer(row);
rowcache = null;
return row;
}
@Override
public Map<String, Object> nextModifiedParentRowKey() {
return delegate.nextModifiedParentRowKey();
}
@Override
public void destroy() {
delegate.destroy();
}
public VariableResolver getVariableResolver() {
return context.getVariableResolver();
}
public Context getContext() {
return context;
}
@Override
public void close() {
delegate.close();
}
public Entity getEntity() {
return entity;
}
public List<EntityProcessorWrapper> getChildren() {
return children;
}
@SuppressWarnings({"rawtypes"})
public DataSource getDatasource() {
return datasource;
}
public void setDatasource(@SuppressWarnings({"rawtypes"})DataSource datasource) {
this.datasource = datasource;
}
public boolean isInitialized() {
return initialized;
}
public void setInitialized(boolean initialized) {
this.initialized = initialized;
}
}

View File

@ -1,140 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
/**
* <p>
* Pluggable functions for resolving variables
* </p>
* <p>
* Implementations of this abstract class must provide a public no-arg constructor.
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.3
*/
public abstract class Evaluator {
/**
* Return a String after processing an expression and a {@link VariableResolver}
*
* @see VariableResolver
* @param expression string to be evaluated
* @param context instance
* @return the value of the given expression evaluated using the resolver
*/
public abstract String evaluate(String expression, Context context);
/**
* Parses a string of expression into separate params. The values are separated by commas. each value will be
* translated into one of the following:
* &lt;ol&gt;
* &lt;li&gt;If it is in single quotes the value will be translated to a String&lt;/li&gt;
* &lt;li&gt;If is is not in quotes and is a number a it will be translated into a Double&lt;/li&gt;
* &lt;li&gt;else it is a variable which can be resolved and it will be put in as an instance of VariableWrapper&lt;/li&gt;
* &lt;/ol&gt;
*
* @param expression the expression to be parsed
* @param vr the VariableResolver instance for resolving variables
*
* @return a List of objects which can either be a string, number or a variable wrapper
*/
protected List<Object> parseParams(String expression, VariableResolver vr) {
List<Object> result = new ArrayList<>();
expression = expression.trim();
String[] ss = expression.split(",");
for (int i = 0; i < ss.length; i++) {
ss[i] = ss[i].trim();
if (ss[i].startsWith("'")) {//a string param has started
StringBuilder sb = new StringBuilder();
while (true) {
sb.append(ss[i]);
if (ss[i].endsWith("'")) break;
i++;
if (i >= ss.length)
throw new DataImportHandlerException(SEVERE, "invalid string at " + ss[i - 1] + " in function params: " + expression);
sb.append(",");
}
String s = sb.substring(1, sb.length() - 1);
s = s.replaceAll("\\\\'", "'");
result.add(s);
} else {
if (Character.isDigit(ss[i].charAt(0))) {
try {
Double doub = Double.parseDouble(ss[i]);
result.add(doub);
} catch (NumberFormatException e) {
if (vr.resolve(ss[i]) == null) {
wrapAndThrow(
SEVERE, e, "Invalid number :" + ss[i] +
"in parameters " + expression);
}
}
} else {
result.add(getVariableWrapper(ss[i], vr));
}
}
}
return result;
}
protected VariableWrapper getVariableWrapper(String s, VariableResolver vr) {
return new VariableWrapper(s,vr);
}
static protected class VariableWrapper {
public final String varName;
public final VariableResolver vr;
public VariableWrapper(String s, VariableResolver vr) {
this.varName = s;
this.vr = vr;
}
public Object resolve() {
return vr.resolve(varName);
}
@Override
public String toString() {
Object o = vr.resolve(varName);
return o == null ? null : o.toString();
}
}
static Pattern IN_SINGLE_QUOTES = Pattern.compile("^'(.*?)'$");
public static final String DATE_FORMAT_EVALUATOR = "formatDate";
public static final String URL_ENCODE_EVALUATOR = "encodeUrl";
public static final String ESCAPE_SOLR_QUERY_CHARS = "escapeQueryChars";
public static final String SQL_ESCAPE_EVALUATOR = "escapeSql";
}

View File

@ -1,35 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
/**
* Event listener for DataImportHandler
*
* <b>This API is experimental and subject to change</b>
*
* @since solr 1.4
*/
public interface EventListener {
/**
* Event callback
*
* @param ctx the Context in which this event was called
*/
void onEvent(Context ctx);
}

View File

@ -1,122 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.SQLException;
import java.util.Properties;
/**
* This can be useful for users who have a DB field containing xml and wish to use a nested {@link XPathEntityProcessor}
* <p>
* The datasouce may be configured as follows
* <p>
* &lt;datasource name="f1" type="FieldReaderDataSource" /&gt;
* <p>
* The entity which uses this datasource must keep the url value as the variable name url="field-name"
* <p>
* The fieldname must be resolvable from {@link VariableResolver}
* <p>
* This may be used with any {@link EntityProcessor} which uses a {@link DataSource}&lt;{@link Reader}&gt; eg: {@link XPathEntityProcessor}
* <p>
* Supports String, BLOB, CLOB data types and there is an extra field (in the entity) 'encoding' for BLOB types
*
* @since 1.4
*/
public class FieldReaderDataSource extends DataSource<Reader> {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
protected VariableResolver vr;
protected String dataField;
private String encoding;
private EntityProcessorWrapper entityProcessor;
@Override
public void init(Context context, Properties initProps) {
dataField = context.getEntityAttribute("dataField");
encoding = context.getEntityAttribute("encoding");
entityProcessor = (EntityProcessorWrapper) context.getEntityProcessor();
/*no op*/
}
@Override
public Reader getData(String query) {
Object o = entityProcessor.getVariableResolver().resolve(dataField);
if (o == null) {
throw new DataImportHandlerException (SEVERE, "No field available for name : " +dataField);
}
if (o instanceof String) {
return new StringReader((String) o);
} else if (o instanceof Clob) {
Clob clob = (Clob) o;
try {
//Most of the JDBC drivers have getCharacterStream defined as public
// so let us just check it
return readCharStream(clob);
} catch (Exception e) {
log.info("Unable to get data from CLOB");
return null;
}
} else if (o instanceof Blob) {
Blob blob = (Blob) o;
try {
return getReader(blob);
} catch (Exception e) {
log.info("Unable to get data from BLOB");
return null;
}
} else {
return new StringReader(o.toString());
}
}
static Reader readCharStream(Clob clob) {
try {
return clob.getCharacterStream();
} catch (Exception e) {
wrapAndThrow(SEVERE, e,"Unable to get reader from clob");
return null;//unreachable
}
}
private Reader getReader(Blob blob)
throws SQLException, UnsupportedEncodingException {
if (encoding == null) {
return (new InputStreamReader(blob.getBinaryStream(), StandardCharsets.UTF_8));
} else {
return (new InputStreamReader(blob.getBinaryStream(), encoding));
}
}
@Override
public void close() {
}
}

View File

@ -1,85 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.lang.invoke.MethodHandles;
import java.sql.Blob;
import java.sql.SQLException;
import java.util.Properties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This can be useful for users who have a DB field containing BLOBs which may be Rich documents
* <p>
* The datasource may be configured as follows
* <p>
* &lt;dataSource name="f1" type="FieldStreamDataSource" /&gt;
* <p>
* The entity which uses this datasource must keep and attribute dataField
* <p>
* The fieldname must be resolvable from {@link VariableResolver}
* <p>
* This may be used with any {@link EntityProcessor} which uses a {@link DataSource}&lt;{@link InputStream}&gt; eg: TikaEntityProcessor
*
* @since 3.1
*/
public class FieldStreamDataSource extends DataSource<InputStream> {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
protected VariableResolver vr;
protected String dataField;
private EntityProcessorWrapper wrapper;
@Override
public void init(Context context, Properties initProps) {
dataField = context.getEntityAttribute("dataField");
wrapper = (EntityProcessorWrapper) context.getEntityProcessor();
/*no op*/
}
@Override
public InputStream getData(String query) {
Object o = wrapper.getVariableResolver().resolve(dataField);
if (o == null) {
throw new DataImportHandlerException(SEVERE, "No field available for name : " + dataField);
} else if (o instanceof Blob) {
Blob blob = (Blob) o;
try {
return blob.getBinaryStream();
} catch (SQLException sqle) {
log.info("Unable to get data from BLOB");
return null;
}
} else if (o instanceof byte[]) {
byte[] bytes = (byte[]) o;
return new ByteArrayInputStream(bytes);
} else {
throw new RuntimeException("unsupported type : " + o.getClass());
}
}
@Override
public void close() {
}
}

View File

@ -1,155 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.io.*;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.util.Properties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
/**
* <p>
* A {@link DataSource} which reads from local files
* </p>
* <p>
* The file is read with the default platform encoding. It can be overriden by
* specifying the encoding in solrconfig.xml
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.3
*/
public class FileDataSource extends DataSource<Reader> {
public static final String BASE_PATH = "basePath";
/**
* The basePath for this data source
*/
protected String basePath;
/**
* The encoding using which the given file should be read
*/
protected String encoding = null;
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@Override
public void init(Context context, Properties initProps) {
basePath = initProps.getProperty(BASE_PATH);
if (initProps.get(URLDataSource.ENCODING) != null)
encoding = initProps.getProperty(URLDataSource.ENCODING);
}
/**
* <p>
* Returns a reader for the given file.
* </p>
* <p>
* If the given file is not absolute, we try to construct an absolute path
* using basePath configuration. If that fails, then the relative path is
* tried. If file is not found a RuntimeException is thrown.
* </p>
* <p>
* <b>It is the responsibility of the calling method to properly close the
* returned Reader</b>
* </p>
*/
@Override
public Reader getData(String query) {
File f = getFile(basePath,query);
try {
return openStream(f);
} catch (Exception e) {
wrapAndThrow(SEVERE,e,"Unable to open File : "+f.getAbsolutePath());
return null;
}
}
static File getFile(String basePath, String query) {
try {
File file = new File(query);
// If it's not an absolute path, try relative from basePath.
if (!file.isAbsolute()) {
// Resolve and correct basePath.
File basePathFile;
if (basePath == null) {
basePathFile = new File(".").getAbsoluteFile();
log.warn("FileDataSource.basePath is empty. Resolving to: {}"
, basePathFile.getAbsolutePath());
} else {
basePathFile = new File(basePath);
if (!basePathFile.isAbsolute()) {
basePathFile = basePathFile.getAbsoluteFile();
log.warn("FileDataSource.basePath is not absolute. Resolving to: {}"
, basePathFile.getAbsolutePath());
}
}
file = new File(basePathFile, query).getAbsoluteFile();
}
if (file.isFile() && file.canRead()) {
if (log.isDebugEnabled()) {
log.debug("Accessing File: {}", file.getAbsolutePath());
}
return file;
} else {
throw new FileNotFoundException("Could not find file: " + query +
" (resolved to: " + file.getAbsolutePath());
}
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
/**
* Open a {@link java.io.Reader} for the given file name
*
* @param file a {@link java.io.File} instance
* @return a Reader on the given file
* @throws FileNotFoundException if the File does not exist
* @throws UnsupportedEncodingException if the encoding is unsupported
* @since solr 1.4
*/
protected Reader openStream(File file) throws FileNotFoundException,
UnsupportedEncodingException {
if (encoding == null) {
return new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8);
} else {
return new InputStreamReader(new FileInputStream(file), encoding);
}
}
@Override
public void close() {
}
}

View File

@ -1,305 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.io.File;
import java.io.FilenameFilter;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.solr.util.DateMathParser;
/**
* <p>
* An {@link EntityProcessor} instance which can stream file names found in a given base
* directory matching patterns and returning rows containing file information.
* </p>
* <p>
* It supports querying a give base directory by matching:
* <ul>
* <li>regular expressions to file names</li>
* <li>excluding certain files based on regular expression</li>
* <li>last modification date (newer or older than a given date or time)</li>
* <li>size (bigger or smaller than size given in bytes)</li>
* <li>recursively iterating through sub-directories</li>
* </ul>
* Its output can be used along with {@link FileDataSource} to read from files in file
* systems.
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.3
* @see Pattern
*/
public class FileListEntityProcessor extends EntityProcessorBase {
/**
* A regex pattern to identify files given in data-config.xml after resolving any variables
*/
protected String fileName;
/**
* The baseDir given in data-config.xml after resolving any variables
*/
protected String baseDir;
/**
* A Regex pattern of excluded file names as given in data-config.xml after resolving any variables
*/
protected String excludes;
/**
* The newerThan given in data-config as a {@link java.util.Date}
* <p>
* <b>Note: </b> This variable is resolved just-in-time in the {@link #nextRow()} method.
* </p>
*/
protected Date newerThan;
/**
* The newerThan given in data-config as a {@link java.util.Date}
*/
protected Date olderThan;
/**
* The biggerThan given in data-config as a long value
* <p>
* <b>Note: </b> This variable is resolved just-in-time in the {@link #nextRow()} method.
* </p>
*/
protected long biggerThan = -1;
/**
* The smallerThan given in data-config as a long value
* <p>
* <b>Note: </b> This variable is resolved just-in-time in the {@link #nextRow()} method.
* </p>
*/
protected long smallerThan = -1;
/**
* The recursive given in data-config. Default value is false.
*/
protected boolean recursive = false;
private Pattern fileNamePattern, excludesPattern;
@Override
public void init(Context context) {
super.init(context);
fileName = context.getEntityAttribute(FILE_NAME);
if (fileName != null) {
fileName = context.replaceTokens(fileName);
fileNamePattern = Pattern.compile(fileName);
}
baseDir = context.getEntityAttribute(BASE_DIR);
if (baseDir == null)
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"'baseDir' is a required attribute");
baseDir = context.replaceTokens(baseDir);
File dir = new File(baseDir);
if (!dir.isDirectory())
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"'baseDir' value: " + baseDir + " is not a directory");
String r = context.getEntityAttribute(RECURSIVE);
if (r != null)
recursive = Boolean.parseBoolean(r);
excludes = context.getEntityAttribute(EXCLUDES);
if (excludes != null) {
excludes = context.replaceTokens(excludes);
excludesPattern = Pattern.compile(excludes);
}
}
/**
* Get the Date object corresponding to the given string.
*
* @param dateStr the date string. It can be a DateMath string or it may have a evaluator function
* @return a Date instance corresponding to the input string
*/
private Date getDate(String dateStr) {
if (dateStr == null)
return null;
Matcher m = PLACE_HOLDER_PATTERN.matcher(dateStr);
if (m.find()) {
Object o = context.resolve(m.group(1));
if (o instanceof Date) return (Date)o;
dateStr = (String) o;
} else {
dateStr = context.replaceTokens(dateStr);
}
m = Evaluator.IN_SINGLE_QUOTES.matcher(dateStr);
if (m.find()) {
String expr = m.group(1);
//TODO refactor DateMathParser.parseMath a bit to have a static method for this logic.
if (expr.startsWith("NOW")) {
expr = expr.substring("NOW".length());
}
try {
// DWS TODO: is this TimeZone the right default for us? Deserves explanation if so.
return new DateMathParser(TimeZone.getDefault()).parseMath(expr);
} catch (ParseException exp) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Invalid expression for date", exp);
}
}
try {
return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).parse(dateStr);
} catch (ParseException exp) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Invalid expression for date", exp);
}
}
/**
* Get the Long value for the given string after resolving any evaluator or variable.
*
* @param sizeStr the size as a string
* @return the Long value corresponding to the given string
*/
private Long getSize(String sizeStr) {
if (sizeStr == null)
return null;
Matcher m = PLACE_HOLDER_PATTERN.matcher(sizeStr);
if (m.find()) {
Object o = context.resolve(m.group(1));
if (o instanceof Number) {
Number number = (Number) o;
return number.longValue();
}
sizeStr = (String) o;
} else {
sizeStr = context.replaceTokens(sizeStr);
}
return Long.parseLong(sizeStr);
}
@Override
public Map<String, Object> nextRow() {
if (rowIterator != null)
return getNext();
List<Map<String, Object>> fileDetails = new ArrayList<>();
File dir = new File(baseDir);
String dateStr = context.getEntityAttribute(NEWER_THAN);
newerThan = getDate(dateStr);
dateStr = context.getEntityAttribute(OLDER_THAN);
olderThan = getDate(dateStr);
String biggerThanStr = context.getEntityAttribute(BIGGER_THAN);
if (biggerThanStr != null)
biggerThan = getSize(biggerThanStr);
String smallerThanStr = context.getEntityAttribute(SMALLER_THAN);
if (smallerThanStr != null)
smallerThan = getSize(smallerThanStr);
getFolderFiles(dir, fileDetails);
rowIterator = fileDetails.iterator();
return getNext();
}
private void getFolderFiles(File dir, final List<Map<String, Object>> fileDetails) {
// Fetch an array of file objects that pass the filter, however the
// returned array is never populated; accept() always returns false.
// Rather we make use of the fileDetails array which is populated as
// a side affect of the accept method.
dir.list(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
File fileObj = new File(dir, name);
if (fileObj.isDirectory()) {
if (recursive) getFolderFiles(fileObj, fileDetails);
} else if (fileNamePattern == null) {
addDetails(fileDetails, dir, name);
} else if (fileNamePattern.matcher(name).find()) {
if (excludesPattern != null && excludesPattern.matcher(name).find())
return false;
addDetails(fileDetails, dir, name);
}
return false;
}
});
}
private void addDetails(List<Map<String, Object>> files, File dir, String name) {
Map<String, Object> details = new HashMap<>();
File aFile = new File(dir, name);
if (aFile.isDirectory()) return;
long sz = aFile.length();
Date lastModified = new Date(aFile.lastModified());
if (biggerThan != -1 && sz <= biggerThan)
return;
if (smallerThan != -1 && sz >= smallerThan)
return;
if (olderThan != null && lastModified.after(olderThan))
return;
if (newerThan != null && lastModified.before(newerThan))
return;
details.put(DIR, dir.getAbsolutePath());
details.put(FILE, name);
details.put(ABSOLUTE_FILE, aFile.getAbsolutePath());
details.put(SIZE, sz);
details.put(LAST_MODIFIED, lastModified);
files.add(details);
}
public static final Pattern PLACE_HOLDER_PATTERN = Pattern
.compile("\\$\\{(.*?)\\}");
public static final String DIR = "fileDir";
public static final String FILE = "file";
public static final String ABSOLUTE_FILE = "fileAbsolutePath";
public static final String SIZE = "fileSize";
public static final String LAST_MODIFIED = "fileLastModified";
public static final String FILE_NAME = "fileName";
public static final String BASE_DIR = "baseDir";
public static final String EXCLUDES = "excludes";
public static final String NEWER_THAN = "newerThan";
public static final String OLDER_THAN = "olderThan";
public static final String BIGGER_THAN = "biggerThan";
public static final String SMALLER_THAN = "smallerThan";
public static final String RECURSIVE = "recursive";
}

View File

@ -1,96 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
import java.io.IOException;
import java.io.StringReader;
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* A {@link Transformer} implementation which strip off HTML tags using {@link HTMLStripCharFilter} This is useful
* in case you don't need this HTML anyway.
*
* @see HTMLStripCharFilter
* @since solr 1.4
*/
public class HTMLStripTransformer extends Transformer {
@Override
@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> row, Context context) {
List<Map<String, String>> fields = context.getAllEntityFields();
for (Map<String, String> field : fields) {
String col = field.get(DataImporter.COLUMN);
String splitHTML = context.replaceTokens(field.get(STRIP_HTML));
if (!TRUE.equals(splitHTML))
continue;
Object tmpVal = row.get(col);
if (tmpVal == null)
continue;
if (tmpVal instanceof List) {
List<String> inputs = (List<String>) tmpVal;
@SuppressWarnings({"rawtypes"})
List results = new ArrayList();
for (String input : inputs) {
if (input == null)
continue;
Object o = stripHTML(input, col);
if (o != null)
results.add(o);
}
row.put(col, results);
} else {
String value = tmpVal.toString();
Object o = stripHTML(value, col);
if (o != null)
row.put(col, o);
}
}
return row;
}
private Object stripHTML(String value, String column) {
StringBuilder out = new StringBuilder();
StringReader strReader = new StringReader(value);
try {
HTMLStripCharFilter html = new HTMLStripCharFilter(strReader.markSupported() ? strReader : new BufferedReader(strReader));
char[] cbuf = new char[1024 * 10];
while (true) {
int count = html.read(cbuf);
if (count == -1)
break; // end of stream mark is -1
if (count > 0)
out.append(cbuf, 0, count);
}
html.close();
} catch (IOException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Failed stripping HTML for column: " + column, e);
}
return out.toString();
}
public static final String STRIP_HTML = "stripHTML";
public static final String TRUE = "true";
}

View File

@ -1,583 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import org.apache.solr.common.SolrException;
import org.apache.solr.util.CryptoKeys;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.naming.InitialContext;
import javax.naming.NamingException;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.sql.*;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
/**
* <p> A DataSource implementation which can fetch data using JDBC. </p> <p> Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
* details. </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.3
*/
public class JdbcDataSource extends
DataSource<Iterator<Map<String, Object>>> {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
protected Callable<Connection> factory;
private long connLastUsed = 0;
private Connection conn;
private ResultSetIterator resultSetIterator;
private Map<String, Integer> fieldNameVsType = new HashMap<>();
private boolean convertType = false;
private int batchSize = FETCH_SIZE;
private int maxRows = 0;
@Override
public void init(Context context, Properties initProps) {
resolveVariables(context, initProps);
initProps = decryptPwd(context, initProps);
Object o = initProps.get(CONVERT_TYPE);
if (o != null)
convertType = Boolean.parseBoolean(o.toString());
factory = createConnectionFactory(context, initProps);
String bsz = initProps.getProperty("batchSize");
if (bsz != null) {
bsz = context.replaceTokens(bsz);
try {
batchSize = Integer.parseInt(bsz);
if (batchSize == -1)
batchSize = Integer.MIN_VALUE;
} catch (NumberFormatException e) {
log.warn("Invalid batch size: {}", bsz);
}
}
for (Map<String, String> map : context.getAllEntityFields()) {
String n = map.get(DataImporter.COLUMN);
String t = map.get(DataImporter.TYPE);
if ("sint".equals(t) || "integer".equals(t))
fieldNameVsType.put(n, Types.INTEGER);
else if ("slong".equals(t) || "long".equals(t))
fieldNameVsType.put(n, Types.BIGINT);
else if ("float".equals(t) || "sfloat".equals(t))
fieldNameVsType.put(n, Types.FLOAT);
else if ("double".equals(t) || "sdouble".equals(t))
fieldNameVsType.put(n, Types.DOUBLE);
else if ("date".equals(t))
fieldNameVsType.put(n, Types.DATE);
else if ("boolean".equals(t))
fieldNameVsType.put(n, Types.BOOLEAN);
else if ("binary".equals(t))
fieldNameVsType.put(n, Types.BLOB);
else
fieldNameVsType.put(n, Types.VARCHAR);
}
}
private Properties decryptPwd(Context context, Properties initProps) {
String encryptionKey = initProps.getProperty("encryptKeyFile");
if (initProps.getProperty("password") != null && encryptionKey != null) {
// this means the password is encrypted and use the file to decode it
try {
try (Reader fr = new InputStreamReader(new FileInputStream(encryptionKey), UTF_8)) {
char[] chars = new char[100];//max 100 char password
int len = fr.read(chars);
if (len < 6)
throw new DataImportHandlerException(SEVERE, "There should be a password of length 6 atleast " + encryptionKey);
Properties props = new Properties();
props.putAll(initProps);
String password = null;
try {
password = CryptoKeys.decodeAES(initProps.getProperty("password"), new String(chars, 0, len)).trim();
} catch (SolrException se) {
throw new DataImportHandlerException(SEVERE, "Error decoding password", se.getCause());
}
props.put("password", password);
initProps = props;
}
} catch (IOException e) {
throw new DataImportHandlerException(SEVERE, "Could not load encryptKeyFile " + encryptionKey);
}
}
return initProps;
}
protected Callable<Connection> createConnectionFactory(final Context context,
final Properties initProps) {
// final VariableResolver resolver = context.getVariableResolver();
final String jndiName = initProps.getProperty(JNDI_NAME);
final String url = initProps.getProperty(URL);
final String driver = initProps.getProperty(DRIVER);
if (url == null && jndiName == null)
throw new DataImportHandlerException(SEVERE,
"JDBC URL or JNDI name has to be specified");
if (driver != null) {
try {
DocBuilder.loadClass(driver, context.getSolrCore());
} catch (ClassNotFoundException e) {
wrapAndThrow(SEVERE, e, "Could not load driver: " + driver);
}
} else {
if(jndiName == null){
throw new DataImportHandlerException(SEVERE, "One of driver or jndiName must be specified in the data source");
}
}
String s = initProps.getProperty("maxRows");
if (s != null) {
maxRows = Integer.parseInt(s);
}
return factory = new Callable<Connection>() {
@Override
public Connection call() throws Exception {
if (log.isInfoEnabled()) {
log.info("Creating a connection for entity {} with URL: {}"
, context.getEntityAttribute(DataImporter.NAME), url);
}
long start = System.nanoTime();
Connection c = null;
if (jndiName != null) {
c = getFromJndi(initProps, jndiName);
} else if (url != null) {
try {
c = DriverManager.getConnection(url, initProps);
} catch (SQLException e) {
// DriverManager does not allow you to use a driver which is not loaded through
// the class loader of the class which is trying to make the connection.
// This is a workaround for cases where the user puts the driver jar in the
// solr.home/lib or solr.home/core/lib directories.
@SuppressWarnings({"unchecked"})
Driver d = (Driver) DocBuilder.loadClass(driver, context.getSolrCore()).getConstructor().newInstance();
c = d.connect(url, initProps);
}
}
if (c != null) {
try {
initializeConnection(c, initProps);
} catch (SQLException e) {
try {
c.close();
} catch (SQLException e2) {
log.warn("Exception closing connection during cleanup", e2);
}
throw new DataImportHandlerException(SEVERE, "Exception initializing SQL connection", e);
}
}
log.info("Time taken for getConnection(): {}"
, TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS));
return c;
}
private void initializeConnection(Connection c, final Properties initProps)
throws SQLException {
if (Boolean.parseBoolean(initProps.getProperty("readOnly"))) {
c.setReadOnly(true);
// Add other sane defaults
c.setAutoCommit(true);
c.setTransactionIsolation(Connection.TRANSACTION_READ_UNCOMMITTED);
c.setHoldability(ResultSet.CLOSE_CURSORS_AT_COMMIT);
}
if (!Boolean.parseBoolean(initProps.getProperty("autoCommit"))) {
c.setAutoCommit(false);
}
String transactionIsolation = initProps.getProperty("transactionIsolation");
if ("TRANSACTION_READ_UNCOMMITTED".equals(transactionIsolation)) {
c.setTransactionIsolation(Connection.TRANSACTION_READ_UNCOMMITTED);
} else if ("TRANSACTION_READ_COMMITTED".equals(transactionIsolation)) {
c.setTransactionIsolation(Connection.TRANSACTION_READ_COMMITTED);
} else if ("TRANSACTION_REPEATABLE_READ".equals(transactionIsolation)) {
c.setTransactionIsolation(Connection.TRANSACTION_REPEATABLE_READ);
} else if ("TRANSACTION_SERIALIZABLE".equals(transactionIsolation)) {
c.setTransactionIsolation(Connection.TRANSACTION_SERIALIZABLE);
} else if ("TRANSACTION_NONE".equals(transactionIsolation)) {
c.setTransactionIsolation(Connection.TRANSACTION_NONE);
}
String holdability = initProps.getProperty("holdability");
if ("CLOSE_CURSORS_AT_COMMIT".equals(holdability)) {
c.setHoldability(ResultSet.CLOSE_CURSORS_AT_COMMIT);
} else if ("HOLD_CURSORS_OVER_COMMIT".equals(holdability)) {
c.setHoldability(ResultSet.HOLD_CURSORS_OVER_COMMIT);
}
}
private Connection getFromJndi(final Properties initProps, final String jndiName) throws NamingException,
SQLException {
Connection c = null;
InitialContext ctx = new InitialContext();
Object jndival = ctx.lookup(jndiName);
if (jndival instanceof javax.sql.DataSource) {
javax.sql.DataSource dataSource = (javax.sql.DataSource) jndival;
String user = (String) initProps.get("user");
String pass = (String) initProps.get("password");
if(user == null || user.trim().equals("")){
c = dataSource.getConnection();
} else {
c = dataSource.getConnection(user, pass);
}
} else {
throw new DataImportHandlerException(SEVERE,
"the jndi name : '"+jndiName +"' is not a valid javax.sql.DataSource");
}
return c;
}
};
}
private void resolveVariables(Context ctx, Properties initProps) {
for (Map.Entry<Object, Object> entry : initProps.entrySet()) {
if (entry.getValue() != null) {
entry.setValue(ctx.replaceTokens((String) entry.getValue()));
}
}
}
@Override
public Iterator<Map<String, Object>> getData(String query) {
if (resultSetIterator != null) {
resultSetIterator.close();
resultSetIterator = null;
}
resultSetIterator = createResultSetIterator(query);
return resultSetIterator.getIterator();
}
protected ResultSetIterator createResultSetIterator(String query) {
return new ResultSetIterator(query);
}
private void logError(String msg, Exception e) {
log.warn(msg, e);
}
protected List<String> readFieldNames(ResultSetMetaData metaData)
throws SQLException {
List<String> colNames = new ArrayList<>();
int count = metaData.getColumnCount();
for (int i = 0; i < count; i++) {
colNames.add(metaData.getColumnLabel(i + 1));
}
return colNames;
}
protected class ResultSetIterator {
private ResultSet resultSet;
private Statement stmt = null;
private List<String> colNames;
private Iterator<Map<String, Object>> rSetIterator;
public ResultSetIterator(String query) {
try {
Connection c = getConnection();
stmt = createStatement(c, batchSize, maxRows);
log.debug("Executing SQL: {}", query);
long start = System.nanoTime();
resultSet = executeStatement(stmt, query);
log.trace("Time taken for sql : {}"
, TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS));
setColNames(resultSet);
} catch (Exception e) {
close();
wrapAndThrow(SEVERE, e, "Unable to execute query: " + query);
return;
}
if (resultSet == null) {
close();
rSetIterator = new ArrayList<Map<String, Object>>().iterator();
return;
}
rSetIterator = createIterator(convertType, fieldNameVsType);
}
protected Statement createStatement(final Connection c, final int batchSize, final int maxRows)
throws SQLException {
Statement statement = c.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
statement.setFetchSize(batchSize);
statement.setMaxRows(maxRows);
return statement;
}
protected ResultSet executeStatement(Statement statement, String query) throws SQLException {
boolean resultSetReturned = statement.execute(query);
return getNextResultSet(resultSetReturned, statement);
}
protected ResultSet getNextResultSet(final boolean initialResultSetAvailable, final Statement statement) throws SQLException {
boolean resultSetAvailable = initialResultSetAvailable;
while (!resultSetAvailable && statement.getUpdateCount() != -1) {
resultSetAvailable = statement.getMoreResults();
}
if (resultSetAvailable) {
return statement.getResultSet();
}
return null;
}
protected void setColNames(final ResultSet resultSet) throws SQLException {
if (resultSet != null) {
colNames = readFieldNames(resultSet.getMetaData());
} else {
colNames = Collections.emptyList();
}
}
protected Iterator<Map<String,Object>> createIterator(final boolean convertType,
final Map<String,Integer> fieldNameVsType) {
return new Iterator<Map<String,Object>>() {
@Override
public boolean hasNext() {
return hasnext();
}
@Override
public Map<String,Object> next() {
return getARow(convertType, fieldNameVsType);
}
@Override
public void remove() {/* do nothing */
}
};
}
protected Map<String,Object> getARow(boolean convertType, Map<String,Integer> fieldNameVsType) {
if (getResultSet() == null)
return null;
Map<String, Object> result = new HashMap<>();
for (String colName : getColNames()) {
try {
if (!convertType) {
// Use underlying database's type information except for BigDecimal and BigInteger
// which cannot be serialized by JavaBin/XML. See SOLR-6165
Object value = getResultSet().getObject(colName);
if (value instanceof BigDecimal || value instanceof BigInteger) {
result.put(colName, value.toString());
} else {
result.put(colName, value);
}
continue;
}
Integer type = fieldNameVsType.get(colName);
if (type == null)
type = Types.VARCHAR;
switch (type) {
case Types.INTEGER:
result.put(colName, getResultSet().getInt(colName));
break;
case Types.FLOAT:
result.put(colName, getResultSet().getFloat(colName));
break;
case Types.BIGINT:
result.put(colName, getResultSet().getLong(colName));
break;
case Types.DOUBLE:
result.put(colName, getResultSet().getDouble(colName));
break;
case Types.DATE:
result.put(colName, getResultSet().getTimestamp(colName));
break;
case Types.BOOLEAN:
result.put(colName, getResultSet().getBoolean(colName));
break;
case Types.BLOB:
result.put(colName, getResultSet().getBytes(colName));
break;
default:
result.put(colName, getResultSet().getString(colName));
break;
}
} catch (SQLException e) {
logError("Error reading data ", e);
wrapAndThrow(SEVERE, e, "Error reading data from database");
}
}
return result;
}
protected boolean hasnext() {
if (getResultSet() == null) {
close();
return false;
}
try {
if (getResultSet().next()) {
return true;
} else {
closeResultSet();
setResultSet(getNextResultSet(getStatement().getMoreResults(), getStatement()));
setColNames(getResultSet());
return hasnext();
}
} catch (SQLException e) {
close();
wrapAndThrow(SEVERE,e);
return false;
}
}
protected void close() {
closeResultSet();
try {
if (getStatement() != null)
getStatement().close();
} catch (Exception e) {
logError("Exception while closing statement", e);
} finally {
setStatement(null);
}
}
protected void closeResultSet() {
try {
if (getResultSet() != null) {
getResultSet().close();
}
} catch (Exception e) {
logError("Exception while closing result set", e);
} finally {
setResultSet(null);
}
}
protected final Iterator<Map<String,Object>> getIterator() {
return rSetIterator;
}
protected final Statement getStatement() {
return stmt;
}
protected final void setStatement(Statement stmt) {
this.stmt = stmt;
}
protected final ResultSet getResultSet() {
return resultSet;
}
protected final void setResultSet(ResultSet resultSet) {
this.resultSet = resultSet;
}
protected final List<String> getColNames() {
return colNames;
}
protected final void setColNames(List<String> colNames) {
this.colNames = colNames;
}
}
protected Connection getConnection() throws Exception {
long currTime = System.nanoTime();
if (currTime - connLastUsed > CONN_TIME_OUT) {
synchronized (this) {
Connection tmpConn = factory.call();
closeConnection();
connLastUsed = System.nanoTime();
return conn = tmpConn;
}
} else {
connLastUsed = currTime;
return conn;
}
}
private boolean isClosed = false;
@Override
public void close() {
if (resultSetIterator != null) {
resultSetIterator.close();
}
try {
closeConnection();
} finally {
isClosed = true;
}
}
private void closeConnection() {
try {
if (conn != null) {
try {
//SOLR-2045
conn.commit();
} catch(Exception ex) {
//ignore.
}
conn.close();
}
} catch (Exception e) {
log.error("Ignoring Error when closing connection", e);
}
}
private static final long CONN_TIME_OUT = TimeUnit.NANOSECONDS.convert(10, TimeUnit.SECONDS);
private static final int FETCH_SIZE = 500;
public static final String URL = "url";
public static final String JNDI_NAME = "jndiName";
public static final String DRIVER = "driver";
public static final String CONVERT_TYPE = "convertType";
}

View File

@ -1,164 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.io.*;
import java.util.*;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
/**
* <p>
* An {@link EntityProcessor} instance which can stream lines of text read from a
* datasource. Options allow lines to be explicitly skipped or included in the index.
* </p>
* <p>
* Attribute summary
* <ul>
* <li>url is the required location of the input file. If this value is
* relative, it assumed to be relative to baseLoc.</li>
* <li>acceptLineRegex is an optional attribute that if present discards any
* line which does not match the regExp.</li>
* <li>skipLineRegex is an optional attribute that is applied after any
* acceptLineRegex and discards any line which matches this regExp.</li>
* </ul>
* <p>
* Although envisioned for reading lines from a file or url, LineEntityProcessor may also be useful
* for dealing with change lists, where each line contains filenames which can be used by subsequent entities
* to parse content from those files.
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.4
* @see Pattern
*/
public class LineEntityProcessor extends EntityProcessorBase {
private Pattern acceptLineRegex, skipLineRegex;
private String url;
private BufferedReader reader;
/**
* Parses each of the entity attributes.
*/
@Override
public void init(Context context) {
super.init(context);
String s;
// init a regex to locate files from the input we want to index
s = context.getResolvedEntityAttribute(ACCEPT_LINE_REGEX);
if (s != null) {
acceptLineRegex = Pattern.compile(s);
}
// init a regex to locate files from the input to be skipped
s = context.getResolvedEntityAttribute(SKIP_LINE_REGEX);
if (s != null) {
skipLineRegex = Pattern.compile(s);
}
// the FileName is required.
url = context.getResolvedEntityAttribute(URL);
if (url == null) throw
new DataImportHandlerException(DataImportHandlerException.SEVERE,
"'"+ URL +"' is a required attribute");
}
/**
* Reads lines from the url till it finds a lines that matches the
* optional acceptLineRegex and does not match the optional skipLineRegex.
*
* @return A row containing a minimum of one field "rawLine" or null to signal
* end of file. The rawLine is the as line as returned by readLine()
* from the url. However transformers can be used to create as
* many other fields as required.
*/
@Override
public Map<String, Object> nextRow() {
if (reader == null) {
reader = new BufferedReader((Reader) context.getDataSource().getData(url));
}
String line;
while ( true ) {
// read a line from the input file
try {
line = reader.readLine();
}
catch (IOException exp) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Problem reading from input", exp);
}
// end of input
if (line == null) {
closeResources();
return null;
}
// First scan whole line to see if we want it
if (acceptLineRegex != null && ! acceptLineRegex.matcher(line).find()) continue;
if (skipLineRegex != null && skipLineRegex.matcher(line).find()) continue;
// Contruct the 'row' of fields
Map<String, Object> row = new HashMap<>();
row.put("rawLine", line);
return row;
}
}
public void closeResources() {
if (reader != null) {
IOUtils.closeQuietly(reader);
}
reader= null;
}
@Override
public void destroy() {
closeResources();
super.destroy();
}
/**
* Holds the name of entity attribute that will be parsed to obtain
* the filename containing the changelist.
*/
public static final String URL = "url";
/**
* Holds the name of entity attribute that will be parsed to obtain
* the pattern to be used when checking to see if a line should
* be returned.
*/
public static final String ACCEPT_LINE_REGEX = "acceptLineRegex";
/**
* Holds the name of entity attribute that will be parsed to obtain
* the pattern to be used when checking to see if a line should
* be ignored.
*/
public static final String SKIP_LINE_REGEX = "skipLineRegex";
}

View File

@ -1,67 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.invoke.MethodHandles;
import java.util.Map;
/**
* A {@link Transformer} implementation which logs messages in a given template format.
* <p>
* Refer to <a href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.4
*/
public class LogTransformer extends Transformer {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@Override
public Object transformRow(Map<String, Object> row, Context ctx) {
String expr = ctx.getEntityAttribute(LOG_TEMPLATE);
String level = ctx.replaceTokens(ctx.getEntityAttribute(LOG_LEVEL));
if (expr == null || level == null) return row;
if ("info".equals(level)) {
if (log.isInfoEnabled())
log.info(ctx.replaceTokens(expr));
} else if ("trace".equals(level)) {
if (log.isTraceEnabled())
log.trace(ctx.replaceTokens(expr));
} else if ("warn".equals(level)) {
if (log.isWarnEnabled())
log.warn(ctx.replaceTokens(expr));
} else if ("error".equals(level)) {
if (log.isErrorEnabled())
log.error(ctx.replaceTokens(expr));
} else if ("debug".equals(level)) {
if (log.isDebugEnabled())
log.debug(ctx.replaceTokens(expr));
}
return row;
}
public static final String LOG_TEMPLATE = "logTemplate";
public static final String LOG_LEVEL = "logLevel";
}

View File

@ -1,61 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
/**
* <p>
* A mock DataSource implementation which can be used for testing.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.3
*/
public class MockDataSource extends
DataSource<Iterator<Map<String, Object>>> {
private static Map<String, Iterator<Map<String, Object>>> cache = new HashMap<>();
public static void setIterator(String query,
Iterator<Map<String, Object>> iter) {
cache.put(query, iter);
}
public static void clearCache() {
cache.clear();
}
@Override
public void init(Context context, Properties initProps) {
}
@Override
public Iterator<Map<String, Object>> getData(String query) {
return cache.get(query);
}
@Override
public void close() {
cache.clear();
}
}

View File

@ -1,134 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.text.NumberFormat;
import java.text.ParseException;
import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.IllformedLocaleException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
/**
* <p>
* A {@link Transformer} instance which can extract numbers out of strings. It uses
* {@link NumberFormat} class to parse strings and supports
* Number, Integer, Currency and Percent styles as supported by
* {@link NumberFormat} with configurable locales.
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.3
*/
public class NumberFormatTransformer extends Transformer {
@Override
@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> row, Context context) {
for (Map<String, String> fld : context.getAllEntityFields()) {
String style = context.replaceTokens(fld.get(FORMAT_STYLE));
if (style != null) {
String column = fld.get(DataImporter.COLUMN);
String srcCol = fld.get(RegexTransformer.SRC_COL_NAME);
String localeStr = context.replaceTokens(fld.get(LOCALE));
if (srcCol == null)
srcCol = column;
Locale locale = Locale.ROOT;
if (localeStr != null) {
try {
locale = new Locale.Builder().setLanguageTag(localeStr).build();
} catch (IllformedLocaleException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Invalid Locale '" + localeStr + "' specified for field: " + fld, e);
}
}
Object val = row.get(srcCol);
String styleSmall = style.toLowerCase(Locale.ROOT);
if (val instanceof List) {
List<String> inputs = (List) val;
@SuppressWarnings({"rawtypes"})
List results = new ArrayList();
for (String input : inputs) {
try {
results.add(process(input, styleSmall, locale));
} catch (ParseException e) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"Failed to apply NumberFormat on column: " + column, e);
}
}
row.put(column, results);
} else {
if (val == null || val.toString().trim().equals(""))
continue;
try {
row.put(column, process(val.toString(), styleSmall, locale));
} catch (ParseException e) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"Failed to apply NumberFormat on column: " + column, e);
}
}
}
}
return row;
}
private Number process(String val, String style, Locale locale) throws ParseException {
if (INTEGER.equals(style)) {
return parseNumber(val, NumberFormat.getIntegerInstance(locale));
} else if (NUMBER.equals(style)) {
return parseNumber(val, NumberFormat.getNumberInstance(locale));
} else if (CURRENCY.equals(style)) {
return parseNumber(val, NumberFormat.getCurrencyInstance(locale));
} else if (PERCENT.equals(style)) {
return parseNumber(val, NumberFormat.getPercentInstance(locale));
}
return null;
}
private Number parseNumber(String val, NumberFormat numFormat) throws ParseException {
ParsePosition parsePos = new ParsePosition(0);
Number num = numFormat.parse(val, parsePos);
if (parsePos.getIndex() != val.length()) {
throw new ParseException("illegal number format", parsePos.getIndex());
}
return num;
}
public static final String FORMAT_STYLE = "formatStyle";
public static final String LOCALE = "locale";
public static final String NUMBER = "number";
public static final String PERCENT = "percent";
public static final String INTEGER = "integer";
public static final String CURRENCY = "currency";
}

View File

@ -1,78 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL;
import org.apache.commons.io.IOUtils;
import java.io.IOException;
import java.io.Reader;
import java.io.StringWriter;
import java.util.HashMap;
import java.util.Map;
/**
* <p>An implementation of {@link EntityProcessor} which reads data from a url/file and give out a row which contains one String
* value. The name of the field is 'plainText'.
*
* @since solr 1.4
*/
public class PlainTextEntityProcessor extends EntityProcessorBase {
private boolean ended = false;
@Override
public void init(Context context) {
super.init(context);
ended = false;
}
@Override
public Map<String, Object> nextRow() {
if (ended) return null;
@SuppressWarnings({"unchecked"})
DataSource<Reader> ds = context.getDataSource();
String url = context.replaceTokens(context.getEntityAttribute(URL));
Reader r = null;
try {
r = ds.getData(url);
} catch (Exception e) {
wrapAndThrow(SEVERE, e, "Exception reading url : " + url);
}
StringWriter sw = new StringWriter();
char[] buf = new char[1024];
while (true) {
int len = 0;
try {
len = r.read(buf);
} catch (IOException e) {
IOUtils.closeQuietly(r);
wrapAndThrow(SEVERE, e, "Exception reading url : " + url);
}
if (len <= 0) break;
sw.append(new String(buf, 0, len));
}
Map<String, Object> row = new HashMap<>();
row.put(PLAIN_TEXT, sw.toString());
ended = true;
IOUtils.closeQuietly(r);
return row;
}
public static final String PLAIN_TEXT = "plainText";
}

View File

@ -1,200 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.lang.invoke.MethodHandles;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>
* A {@link Transformer} implementation which uses Regular Expressions to extract, split
* and replace data in fields.
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.3
* @see Pattern
*/
public class RegexTransformer extends Transformer {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@Override
@SuppressWarnings({"unchecked", "rawtypes"})
public Map<String, Object> transformRow(Map<String, Object> row,
Context ctx) {
List<Map<String, String>> fields = ctx.getAllEntityFields();
for (Map<String, String> field : fields) {
String col = field.get(DataImporter.COLUMN);
String reStr = ctx.replaceTokens(field.get(REGEX));
String splitBy = ctx.replaceTokens(field.get(SPLIT_BY));
String replaceWith = ctx.replaceTokens(field.get(REPLACE_WITH));
String groupNames = ctx.replaceTokens(field.get(GROUP_NAMES));
if (reStr != null || splitBy != null) {
String srcColName = field.get(SRC_COL_NAME);
if (srcColName == null) {
srcColName = col;
}
Object tmpVal = row.get(srcColName);
if (tmpVal == null)
continue;
if (tmpVal instanceof List) {
List<String> inputs = (List<String>) tmpVal;
List results = new ArrayList();
Map<String,List> otherVars= null;
for (String input : inputs) {
Object o = process(col, reStr, splitBy, replaceWith, input, groupNames);
if (o != null){
if (o instanceof Map) {
Map map = (Map) o;
for (Object e : map.entrySet()) {
Map.Entry<String ,Object> entry = (Map.Entry<String, Object>) e;
List l = results;
if(!col.equals(entry.getKey())){
if(otherVars == null) otherVars = new HashMap<>();
l = otherVars.get(entry.getKey());
if(l == null){
l = new ArrayList();
otherVars.put(entry.getKey(), l);
}
}
if (entry.getValue() instanceof Collection) {
l.addAll((Collection) entry.getValue());
} else {
l.add(entry.getValue());
}
}
} else {
if (o instanceof Collection) {
results.addAll((Collection) o);
} else {
results.add(o);
}
}
}
}
row.put(col, results);
if(otherVars != null) row.putAll(otherVars);
} else {
String value = tmpVal.toString();
Object o = process(col, reStr, splitBy, replaceWith, value, groupNames);
if (o != null){
if (o instanceof Map) {
row.putAll((Map) o);
} else{
row.put(col, o);
}
}
}
}
}
return row;
}
private Object process(String col, String reStr, String splitBy,
String replaceWith, String value, String groupNames) {
if (splitBy != null) {
return readBySplit(splitBy, value);
} else if (replaceWith != null) {
Pattern p = getPattern(reStr);
Matcher m = p.matcher(value);
return m.find() ? m.replaceAll(replaceWith) : value;
} else {
return readfromRegExp(reStr, value, col, groupNames);
}
}
@SuppressWarnings("unchecked")
private List<String> readBySplit(String splitBy, String value) {
String[] vals = value.split(splitBy);
List<String> l = new ArrayList<>(Arrays.asList(vals));
return l;
}
@SuppressWarnings({"unchecked", "rawtypes"})
private Object readfromRegExp(String reStr, String value, String columnName, String gNames) {
String[] groupNames = null;
if(gNames != null && gNames.trim().length() >0){
groupNames = gNames.split(",");
}
Pattern regexp = getPattern(reStr);
Matcher m = regexp.matcher(value);
if (m.find() && m.groupCount() > 0) {
if (m.groupCount() > 1) {
List l = null;
Map<String ,String > map = null;
if(groupNames == null){
l = new ArrayList();
} else {
map = new HashMap<>();
}
for (int i = 1; i <= m.groupCount(); i++) {
try {
if(l != null){
l.add(m.group(i));
} else if (map != null ){
if(i <= groupNames.length){
String nameOfGroup = groupNames[i-1];
if(nameOfGroup != null && nameOfGroup.trim().length() >0){
map.put(nameOfGroup, m.group(i));
}
}
}
} catch (Exception e) {
log.warn("Parsing failed for field : {}", columnName, e);
}
}
return l == null ? map: l;
} else {
return m.group(1);
}
}
return null;
}
private Pattern getPattern(String reStr) {
Pattern result = PATTERN_CACHE.get(reStr);
if (result == null) {
PATTERN_CACHE.put(reStr, result = Pattern.compile(reStr));
}
return result;
}
private HashMap<String, Pattern> PATTERN_CACHE = new HashMap<>();
public static final String REGEX = "regex";
public static final String REPLACE_WITH = "replaceWith";
public static final String SPLIT_BY = "splitBy";
public static final String SRC_COL_NAME = "sourceColName";
public static final String GROUP_NAMES = "groupNames";
}

View File

@ -1,177 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.request.SolrQueryRequest;
public class RequestInfo {
private final String command;
private final boolean debug;
private final boolean syncMode;
private final boolean commit;
private final boolean optimize;
private final int start;
private final long rows;
private final boolean clean;
private final List<String> entitiesToRun;
private final Map<String,Object> rawParams;
private final String configFile;
private final String dataConfig;
private final SolrQueryRequest request;
//TODO: find a different home for these two...
private final ContentStream contentStream;
private final DebugInfo debugInfo;
public RequestInfo(SolrQueryRequest request, Map<String,Object> requestParams, ContentStream stream) {
this.request = request;
this.contentStream = stream;
if (requestParams.containsKey("command")) {
command = (String) requestParams.get("command");
} else {
command = null;
}
boolean debugMode = StrUtils.parseBool((String) requestParams.get("debug"), false);
if (debugMode) {
debug = true;
debugInfo = new DebugInfo(requestParams);
} else {
debug = false;
debugInfo = null;
}
if (requestParams.containsKey("clean")) {
clean = StrUtils.parseBool( (String) requestParams.get("clean"), true);
} else if (DataImporter.DELTA_IMPORT_CMD.equals(command) || DataImporter.IMPORT_CMD.equals(command)) {
clean = false;
} else {
clean = debug ? false : true;
}
optimize = StrUtils.parseBool((String) requestParams.get("optimize"), false);
if(optimize) {
commit = true;
} else {
commit = StrUtils.parseBool( (String) requestParams.get("commit"), (debug ? false : true));
}
if (requestParams.containsKey("rows")) {
rows = Integer.parseInt((String) requestParams.get("rows"));
} else {
rows = debug ? 10 : Long.MAX_VALUE;
}
if (requestParams.containsKey("start")) {
start = Integer.parseInt((String) requestParams.get("start"));
} else {
start = 0;
}
syncMode = StrUtils.parseBool((String) requestParams.get("synchronous"), false);
Object o = requestParams.get("entity");
List<String> modifiableEntities = null;
if(o != null) {
if (o instanceof String) {
modifiableEntities = new ArrayList<>();
modifiableEntities.add((String) o);
} else if (o instanceof List<?>) {
@SuppressWarnings("unchecked")
List<String> modifiableEntities1 = new ArrayList<>((List<String>) o);
modifiableEntities = modifiableEntities1;
}
entitiesToRun = Collections.unmodifiableList(modifiableEntities);
} else {
entitiesToRun = null;
}
String configFileParam = (String) requestParams.get("config");
configFile = configFileParam;
String dataConfigParam = (String) requestParams.get("dataConfig");
if (dataConfigParam != null && dataConfigParam.trim().length() == 0) {
// Empty data-config param is not valid, change it to null
dataConfigParam = null;
}
dataConfig = dataConfigParam;
this.rawParams = Collections.unmodifiableMap(new HashMap<>(requestParams));
}
public String getCommand() {
return command;
}
public boolean isDebug() {
return debug;
}
public boolean isSyncMode() {
return syncMode;
}
public boolean isCommit() {
return commit;
}
public boolean isOptimize() {
return optimize;
}
public int getStart() {
return start;
}
public long getRows() {
return rows;
}
public boolean isClean() {
return clean;
}
/**
* Returns null if we are to run all entities, otherwise just run the entities named in the list.
*/
public List<String> getEntitiesToRun() {
return entitiesToRun;
}
public String getDataConfig() {
return dataConfig;
}
public Map<String,Object> getRawParams() {
return rawParams;
}
public ContentStream getContentStream() {
return contentStream;
}
public DebugInfo getDebugInfo() {
return debugInfo;
}
public String getConfigFile() {
return configFile;
}
public SolrQueryRequest getRequest() {
return request;
}
}

View File

@ -1,131 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import java.security.AccessControlContext;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
import java.security.ProtectionDomain;
import java.util.Map;
import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
/**
* <p>
* A {@link Transformer} instance capable of executing functions written in scripting
* languages as a {@link Transformer} instance.
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.3
*/
public class ScriptTransformer extends Transformer {
private Invocable engine;
private String functionName;
@Override
public Object transformRow(Map<String,Object> row, Context context) {
return AccessController.doPrivileged(new PrivilegedAction<Object>() {
@Override
public Object run() {
return transformRowUnsafe(row, context);
}
}, SCRIPT_SANDBOX);
}
public Object transformRowUnsafe(Map<String, Object> row, Context context) {
try {
if (engine == null)
initEngine(context);
if (engine == null)
return row;
return engine.invokeFunction(functionName, new Object[]{row, context});
} catch (DataImportHandlerException e) {
throw e;
} catch (Exception e) {
wrapAndThrow(SEVERE,e, "Error invoking script for entity " + context.getEntityAttribute("name"));
}
//will not reach here
return null;
}
private void initEngine(Context context) {
String scriptText = context.getScript();
String scriptLang = context.getScriptLanguage();
if (scriptText == null) {
throw new DataImportHandlerException(SEVERE,
"<script> tag is not present under <dataConfig>");
}
ScriptEngineManager scriptEngineMgr = new ScriptEngineManager();
ScriptEngine scriptEngine = scriptEngineMgr.getEngineByName(scriptLang);
if (scriptEngine == null) {
throw new DataImportHandlerException(SEVERE,
"Cannot load Script Engine for language: " + scriptLang);
}
if (scriptEngine instanceof Invocable) {
engine = (Invocable) scriptEngine;
} else {
throw new DataImportHandlerException(SEVERE,
"The installed ScriptEngine for: " + scriptLang
+ " does not implement Invocable. Class is "
+ scriptEngine.getClass().getName());
}
try {
try {
AccessController.doPrivileged(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws ScriptException {
scriptEngine.eval(scriptText);
return null;
}
}, SCRIPT_SANDBOX);
} catch (PrivilegedActionException e) {
throw (ScriptException) e.getException();
}
} catch (ScriptException e) {
wrapAndThrow(SEVERE, e, "'eval' failed with language: " + scriptLang
+ " and script: \n" + scriptText);
}
}
public void setFunctionName(String methodName) {
this.functionName = methodName;
}
public String getFunctionName() {
return functionName;
}
// sandbox for script code: zero permissions
private static final AccessControlContext SCRIPT_SANDBOX =
new AccessControlContext(new ProtectionDomain[] { new ProtectionDomain(null, null) });
}

View File

@ -1,247 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.security.AccessControlException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.IllformedLocaleException;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrPaths;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
/**
* <p>
* Writes properties using {@link Properties#store} .
* The special property "last_index_time" is converted to a formatted date.
* Users can configure the location, filename, locale and date format to use.
* </p>
*/
public class SimplePropertiesWriter extends DIHProperties {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
static final String LAST_INDEX_KEY = "last_index_time";
protected String filename = null;
protected String configDir = null;
protected Locale locale = null;
protected SimpleDateFormat dateFormat = null;
/**
* The locale to use when writing the properties file. Default is {@link Locale#ROOT}
*/
public static final String LOCALE = "locale";
/**
* The date format to use when writing values for "last_index_time" to the properties file.
* See {@link SimpleDateFormat} for patterns. Default is yyyy-MM-dd HH:mm:ss .
*/
public static final String DATE_FORMAT = "dateFormat";
/**
* The directory to save the properties file in. Default is the current core's "config" directory.
*/
public static final String DIRECTORY = "directory";
/**
* The filename to save the properties file to. Default is this Handler's name from solrconfig.xml.
*/
public static final String FILENAME = "filename";
@Override
public void init(DataImporter dataImporter, Map<String, String> params) {
if(params.get(FILENAME) != null) {
filename = params.get(FILENAME);
} else if(dataImporter.getHandlerName()!=null) {
filename = dataImporter.getHandlerName() + ".properties";
} else {
filename = "dataimport.properties";
}
findDirectory(dataImporter, params);
if(params.get(LOCALE) != null) {
locale = getLocale(params.get(LOCALE));
} else {
locale = Locale.ROOT;
}
if(params.get(DATE_FORMAT) != null) {
dateFormat = new SimpleDateFormat(params.get(DATE_FORMAT), locale);
} else {
dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", locale);
}
}
@SuppressForbidden(reason = "Usage of outdated locale parsing with Locale#toString() because of backwards compatibility")
private Locale getLocale(String name) {
if (name == null) {
return Locale.ROOT;
}
for (final Locale l : Locale.getAvailableLocales()) {
if(name.equals(l.toString()) || name.equals(l.getDisplayName(Locale.ROOT))) {
return locale;
}
}
try {
return new Locale.Builder().setLanguageTag(name).build();
} catch (IllformedLocaleException ex) {
throw new DataImportHandlerException(SEVERE, "Unsupported locale for PropertyWriter: " + name);
}
}
protected void findDirectory(DataImporter dataImporter, Map<String, String> params) {
if(params.get(DIRECTORY) != null) {
configDir = params.get(DIRECTORY);
} else {
SolrCore core = dataImporter.getCore();
if (core == null) {
configDir = SolrPaths.locateSolrHome().toString();
} else {
configDir = core.getResourceLoader().getConfigDir();
}
}
}
private File getPersistFile() {
final File filePath;
if (new File(filename).isAbsolute() || configDir == null) {
filePath = new File(filename);
} else {
filePath = new File(new File(configDir), filename);
}
return filePath;
}
@Override
public boolean isWritable() {
File persistFile = getPersistFile();
try {
return persistFile.exists()
? persistFile.canWrite()
: persistFile.getParentFile().canWrite();
} catch (AccessControlException e) {
return false;
}
}
@Override
public String convertDateToString(Date d) {
return dateFormat.format(d);
}
protected Date convertStringToDate(String s) {
try {
return dateFormat.parse(s);
} catch (ParseException e) {
throw new DataImportHandlerException(SEVERE, "Value for "
+ LAST_INDEX_KEY + " is invalid for date format "
+ dateFormat.toLocalizedPattern() + " : " + s);
}
}
/**
* {@link DocBuilder} sends the date as an Object because
* this class knows how to convert it to a String
*/
protected Properties mapToProperties(Map<String,Object> propObjs) {
Properties p = new Properties();
for(Map.Entry<String,Object> entry : propObjs.entrySet()) {
String key = entry.getKey();
String val = null;
String lastKeyPart = key;
int lastDotPos = key.lastIndexOf('.');
if(lastDotPos!=-1 && key.length() > lastDotPos+1) {
lastKeyPart = key.substring(lastDotPos + 1);
}
if(LAST_INDEX_KEY.equals(lastKeyPart) && entry.getValue() instanceof Date) {
val = convertDateToString((Date) entry.getValue());
} else {
val = entry.getValue().toString();
}
p.put(key, val);
}
return p;
}
/**
* We'll send everything back as Strings as this class has
* already converted them.
*/
protected Map<String,Object> propertiesToMap(Properties p) {
Map<String,Object> theMap = new HashMap<>();
for(Map.Entry<Object,Object> entry : p.entrySet()) {
String key = entry.getKey().toString();
Object val = entry.getValue().toString();
theMap.put(key, val);
}
return theMap;
}
@Override
public void persist(Map<String, Object> propObjs) {
Writer propOutput = null;
Properties existingProps = mapToProperties(readIndexerProperties());
Properties newProps = mapToProperties(propObjs);
try {
existingProps.putAll(newProps);
propOutput = new OutputStreamWriter(new FileOutputStream(getPersistFile()), StandardCharsets.UTF_8);
existingProps.store(propOutput, null);
log.info("Wrote last indexed time to {}", filename);
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Unable to persist Index Start Time", e);
} finally {
IOUtils.closeWhileHandlingException(propOutput);
}
}
@Override
public Map<String, Object> readIndexerProperties() {
Properties props = new Properties();
InputStream propInput = null;
try {
String filePath = configDir;
if (configDir != null && !configDir.endsWith(File.separator)) {
filePath += File.separator;
}
filePath += filename;
propInput = new FileInputStream(filePath);
props.load(new InputStreamReader(propInput, StandardCharsets.UTF_8));
log.info("Read {}", filename);
} catch (Exception e) {
log.warn("Unable to read: {}", filename);
} finally {
IOUtils.closeWhileHandlingException(propInput);
}
return propertiesToMap(props);
}
}

View File

@ -1,321 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.http.client.HttpClient;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.CursorMarkParams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* An implementation of {@link EntityProcessor} which fetches values from a
* separate Solr implementation using the SolrJ client library. Yield a row per
* Solr document.
* </p>
* <p>
* Limitations:
* All configuration is evaluated at the beginning;
* Only one query is walked;
* </p>
*/
public class SolrEntityProcessor extends EntityProcessorBase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String SOLR_SERVER = "url";
public static final String QUERY = "query";
public static final String TIMEOUT = "timeout";
public static final int TIMEOUT_SECS = 5 * 60; // 5 minutes
public static final int ROWS_DEFAULT = 50;
private SolrClient solrClient = null;
private String queryString;
private int rows = ROWS_DEFAULT;
private String[] filterQueries;
private String[] fields;
private String requestHandler;// 'qt' param
private int timeout = TIMEOUT_SECS;
@Override
public void destroy() {
try {
solrClient.close();
} catch (IOException e) {
} finally {
HttpClientUtil.close(((HttpSolrClient) solrClient).getHttpClient());
}
}
/**
* Factory method that returns a {@link HttpClient} instance used for interfacing with a source Solr service.
* One can override this method to return a differently configured {@link HttpClient} instance.
* For example configure https and http authentication.
*
* @return a {@link HttpClient} instance used for interfacing with a source Solr service
*/
protected HttpClient getHttpClient() {
return HttpClientUtil.createClient(null);
}
@Override
protected void firstInit(Context context) {
super.firstInit(context);
try {
String serverPath = context.getResolvedEntityAttribute(SOLR_SERVER);
if (serverPath == null) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"SolrEntityProcessor: parameter 'url' is required");
}
HttpClient client = getHttpClient();
URL url = new URL(serverPath);
// (wt="javabin|xml") default is javabin
if ("xml".equals(context.getResolvedEntityAttribute(CommonParams.WT))) {
// TODO: it doesn't matter for this impl when passing a client currently, but we should close this!
solrClient = new Builder(url.toExternalForm())
.withHttpClient(client)
.withResponseParser(new XMLResponseParser())
.build();
log.info("using XMLResponseParser");
} else {
// TODO: it doesn't matter for this impl when passing a client currently, but we should close this!
solrClient = new Builder(url.toExternalForm())
.withHttpClient(client)
.build();
log.info("using BinaryResponseParser");
}
} catch (MalformedURLException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, e);
}
}
@Override
public Map<String,Object> nextRow() {
buildIterator();
return getNext();
}
/**
* The following method changes the rowIterator mutable field. It requires
* external synchronization.
*/
protected void buildIterator() {
if (rowIterator != null) {
SolrDocumentListIterator documentListIterator = (SolrDocumentListIterator) rowIterator;
if (!documentListIterator.hasNext() && documentListIterator.hasMoreRows()) {
nextPage();
}
} else {
boolean cursor = Boolean.parseBoolean(context
.getResolvedEntityAttribute(CursorMarkParams.CURSOR_MARK_PARAM));
rowIterator = !cursor ? new SolrDocumentListIterator(new SolrDocumentList())
: new SolrDocumentListCursor(new SolrDocumentList(), CursorMarkParams.CURSOR_MARK_START);
nextPage();
}
}
protected void nextPage() {
((SolrDocumentListIterator)rowIterator).doQuery();
}
class SolrDocumentListCursor extends SolrDocumentListIterator {
private final String cursorMark;
public SolrDocumentListCursor(SolrDocumentList solrDocumentList, String cursorMark) {
super(solrDocumentList);
this.cursorMark = cursorMark;
}
@Override
protected void passNextPage(SolrQuery solrQuery) {
String timeoutAsString = context.getResolvedEntityAttribute(TIMEOUT);
if (timeoutAsString != null) {
throw new DataImportHandlerException(SEVERE,"cursorMark can't be used with timeout");
}
solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
}
@Override
protected Iterator<Map<String,Object>> createNextPageIterator(QueryResponse response) {
return
new SolrDocumentListCursor(response.getResults(),
response.getNextCursorMark()) ;
}
}
class SolrDocumentListIterator implements Iterator<Map<String,Object>> {
private final int start;
private final int size;
private final long numFound;
private final Iterator<SolrDocument> solrDocumentIterator;
public SolrDocumentListIterator(SolrDocumentList solrDocumentList) {
this.solrDocumentIterator = solrDocumentList.iterator();
this.numFound = solrDocumentList.getNumFound();
// SolrQuery has the start field of type int while SolrDocumentList of
// type long. We are always querying with an int so we can't receive a
// long as output. That's the reason why the following cast seems safe
this.start = (int) solrDocumentList.getStart();
this.size = solrDocumentList.size();
}
protected QueryResponse doQuery() {
SolrEntityProcessor.this.queryString = context.getResolvedEntityAttribute(QUERY);
if (SolrEntityProcessor.this.queryString == null) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"SolrEntityProcessor: parameter 'query' is required"
);
}
String rowsP = context.getResolvedEntityAttribute(CommonParams.ROWS);
if (rowsP != null) {
rows = Integer.parseInt(rowsP);
}
String sortParam = context.getResolvedEntityAttribute(CommonParams.SORT);
String fqAsString = context.getResolvedEntityAttribute(CommonParams.FQ);
if (fqAsString != null) {
SolrEntityProcessor.this.filterQueries = fqAsString.split(",");
}
String fieldsAsString = context.getResolvedEntityAttribute(CommonParams.FL);
if (fieldsAsString != null) {
SolrEntityProcessor.this.fields = fieldsAsString.split(",");
}
SolrEntityProcessor.this.requestHandler = context.getResolvedEntityAttribute(CommonParams.QT);
SolrQuery solrQuery = new SolrQuery(queryString);
solrQuery.setRows(rows);
if (sortParam!=null) {
solrQuery.setParam(CommonParams.SORT, sortParam);
}
passNextPage(solrQuery);
if (fields != null) {
for (String field : fields) {
solrQuery.addField(field);
}
}
solrQuery.setRequestHandler(requestHandler);
solrQuery.setFilterQueries(filterQueries);
QueryResponse response = null;
try {
response = solrClient.query(solrQuery);
} catch (SolrServerException | IOException | SolrException e) {
if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, e);
} else if (SKIP.equals(onError)) {
wrapAndThrow(DataImportHandlerException.SKIP_ROW, e);
}
}
if (response != null) {
SolrEntityProcessor.this.rowIterator = createNextPageIterator(response);
}
return response;
}
protected Iterator<Map<String,Object>> createNextPageIterator(QueryResponse response) {
return new SolrDocumentListIterator(response.getResults());
}
protected void passNextPage(SolrQuery solrQuery) {
String timeoutAsString = context.getResolvedEntityAttribute(TIMEOUT);
if (timeoutAsString != null) {
SolrEntityProcessor.this.timeout = Integer.parseInt(timeoutAsString);
}
solrQuery.setTimeAllowed(timeout * 1000);
solrQuery.setStart(getStart() + getSize());
}
@Override
public boolean hasNext() {
return solrDocumentIterator.hasNext();
}
@Override
public Map<String,Object> next() {
SolrDocument solrDocument = solrDocumentIterator.next();
HashMap<String,Object> map = new HashMap<>();
Collection<String> fields = solrDocument.getFieldNames();
for (String field : fields) {
Object fieldValue = solrDocument.getFieldValue(field);
map.put(field, fieldValue);
}
return map;
}
public int getStart() {
return start;
}
public int getSize() {
return size;
}
public boolean hasMoreRows() {
return numFound > start + size;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
}

View File

@ -1,35 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import java.util.List;
import org.apache.solr.client.solrj.util.ClientUtils;
public class SolrQueryEscapingEvaluator extends Evaluator {
@Override
public String evaluate(String expression, Context context) {
List<Object> l = parseParams(expression, context.getVariableResolver());
if (l.size() != 1) {
throw new DataImportHandlerException(SEVERE, "'escapeQueryChars' must have at least one parameter ");
}
String s = l.get(0).toString();
return ClientUtils.escapeQueryChars(s);
}
}

View File

@ -1,175 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.UpdateParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.CommitUpdateCommand;
import org.apache.solr.update.DeleteUpdateCommand;
import org.apache.solr.update.RollbackUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
/**
* <p> Writes documents to SOLR. </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.3
*/
public class SolrWriter extends DIHWriterBase implements DIHWriter {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String LAST_INDEX_KEY = "last_index_time";
private final UpdateRequestProcessor processor;
private final int commitWithin;
SolrQueryRequest req;
public SolrWriter(UpdateRequestProcessor processor, SolrQueryRequest req) {
this.processor = processor;
this.req = req;
commitWithin = (req != null) ? req.getParams().getInt(UpdateParams.COMMIT_WITHIN, -1): -1;
}
@Override
public void close() {
try {
processor.finish();
} catch (IOException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Unable to call finish() on UpdateRequestProcessor", e);
} finally {
deltaKeys = null;
try {
processor.close();
} catch (IOException e) {
SolrException.log(log, e);
}
}
}
@Override
public boolean upload(SolrInputDocument d) {
try {
AddUpdateCommand command = new AddUpdateCommand(req);
command.solrDoc = d;
command.commitWithin = commitWithin;
processor.processAdd(command);
} catch (Exception e) {
log.warn("Error creating document : {}", d, e);
return false;
}
return true;
}
@Override
public void deleteDoc(Object id) {
try {
log.info("Deleting document: {}", id);
DeleteUpdateCommand delCmd = new DeleteUpdateCommand(req);
delCmd.setId(id.toString());
processor.processDelete(delCmd);
} catch (IOException e) {
log.error("Exception while deleteing: {}", id, e);
}
}
@Override
public void deleteByQuery(String query) {
try {
log.info("Deleting documents from Solr with query: {}", query);
DeleteUpdateCommand delCmd = new DeleteUpdateCommand(req);
delCmd.query = query;
processor.processDelete(delCmd);
} catch (IOException e) {
log.error("Exception while deleting by query: {}", query, e);
}
}
@Override
public void commit(boolean optimize) {
try {
CommitUpdateCommand commit = new CommitUpdateCommand(req,optimize);
processor.processCommit(commit);
} catch (Exception e) {
log.error("Exception while solr commit.", e);
}
}
@Override
public void rollback() {
try {
RollbackUpdateCommand rollback = new RollbackUpdateCommand(req);
processor.processRollback(rollback);
} catch (Exception e) {
log.error("Exception during rollback command.", e);
}
}
@Override
public void doDeleteAll() {
try {
DeleteUpdateCommand deleteCommand = new DeleteUpdateCommand(req);
deleteCommand.query = "*:*";
processor.processDelete(deleteCommand);
} catch (IOException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Exception in full dump while deleting all documents.", e);
}
}
static String getResourceAsString(InputStream in) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
byte[] buf = new byte[1024];
int sz = 0;
try {
while ((sz = in.read(buf)) != -1) {
baos.write(buf, 0, sz);
}
} finally {
try {
in.close();
} catch (Exception e) {
}
}
return new String(baos.toByteArray(), StandardCharsets.UTF_8);
}
static String getDocCount() {
if (DocBuilder.INSTANCE.get() != null) {
return ""
+ (DocBuilder.INSTANCE.get().importStatistics.docCount.get() + 1);
} else {
return null;
}
}
@Override
public void init(Context context) {
/* NO-OP */
}
}

View File

@ -1,238 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
public class SortedMapBackedCache implements DIHCache {
private SortedMap<Object,List<Map<String,Object>>> theMap = null;
private boolean isOpen = false;
private boolean isReadOnly = false;
String primaryKeyName = null;
@SuppressWarnings("unchecked")
@Override
public void add(Map<String,Object> rec) {
checkOpen(true);
checkReadOnly();
if (rec == null || rec.size() == 0) {
return;
}
if (primaryKeyName == null) {
primaryKeyName = rec.keySet().iterator().next();
}
Object pk = rec.get(primaryKeyName);
if (pk instanceof Collection<?>) {
Collection<Object> c = (Collection<Object>) pk;
if (c.size() != 1) {
throw new RuntimeException(
"The primary key must have exactly 1 element.");
}
pk = c.iterator().next();
}
//Rows with null keys are not added.
if(pk==null) {
return;
}
List<Map<String,Object>> thisKeysRecs = theMap.get(pk);
if (thisKeysRecs == null) {
thisKeysRecs = new ArrayList<>();
theMap.put(pk, thisKeysRecs);
}
thisKeysRecs.add(rec);
}
private void checkOpen(boolean shouldItBe) {
if (!isOpen && shouldItBe) {
throw new IllegalStateException(
"Must call open() before using this cache.");
}
if (isOpen && !shouldItBe) {
throw new IllegalStateException("The cache is already open.");
}
}
private void checkReadOnly() {
if (isReadOnly) {
throw new IllegalStateException("Cache is read-only.");
}
}
@Override
public void close() {
isOpen = false;
}
@Override
public void delete(Object key) {
checkOpen(true);
checkReadOnly();
if(key==null) {
return;
}
theMap.remove(key);
}
@Override
public void deleteAll() {
deleteAll(false);
}
private void deleteAll(boolean readOnlyOk) {
if (!readOnlyOk) {
checkReadOnly();
}
if (theMap != null) {
theMap.clear();
}
}
@Override
public void destroy() {
deleteAll(true);
theMap = null;
isOpen = false;
}
@Override
public void flush() {
checkOpen(true);
checkReadOnly();
}
@Override
public Iterator<Map<String,Object>> iterator(Object key) {
checkOpen(true);
if(key==null) {
return null;
}
if(key instanceof Iterable<?>) {
List<Map<String,Object>> vals = new ArrayList<>();
Iterator<?> iter = ((Iterable<?>) key).iterator();
while(iter.hasNext()) {
List<Map<String,Object>> val = theMap.get(iter.next());
if(val!=null) {
vals.addAll(val);
}
}
if(vals.size()==0) {
return null;
}
return vals.iterator();
}
List<Map<String,Object>> val = theMap.get(key);
if (val == null) {
return null;
}
return val.iterator();
}
@Override
public Iterator<Map<String,Object>> iterator() {
return new Iterator<Map<String, Object>>() {
private Iterator<Map.Entry<Object,List<Map<String,Object>>>> theMapIter;
private List<Map<String,Object>> currentKeyResult = null;
private Iterator<Map<String,Object>> currentKeyResultIter = null;
{
theMapIter = theMap.entrySet().iterator();
}
@Override
public boolean hasNext() {
if (currentKeyResultIter != null) {
if (currentKeyResultIter.hasNext()) {
return true;
} else {
currentKeyResult = null;
currentKeyResultIter = null;
}
}
Map.Entry<Object,List<Map<String,Object>>> next = null;
if (theMapIter.hasNext()) {
next = theMapIter.next();
currentKeyResult = next.getValue();
currentKeyResultIter = currentKeyResult.iterator();
if (currentKeyResultIter.hasNext()) {
return true;
}
}
return false;
}
@Override
public Map<String,Object> next() {
if (currentKeyResultIter != null) {
if (currentKeyResultIter.hasNext()) {
return currentKeyResultIter.next();
} else {
currentKeyResult = null;
currentKeyResultIter = null;
}
}
Map.Entry<Object,List<Map<String,Object>>> next = null;
if (theMapIter.hasNext()) {
next = theMapIter.next();
currentKeyResult = next.getValue();
currentKeyResultIter = currentKeyResult.iterator();
if (currentKeyResultIter.hasNext()) {
return currentKeyResultIter.next();
}
}
return null;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
@Override
public void open(Context context) {
checkOpen(false);
isOpen = true;
if (theMap == null) {
theMap = new TreeMap<>();
}
String pkName = CachePropertyUtil.getAttributeValueAsString(context,
DIHCacheSupport.CACHE_PRIMARY_KEY);
if (pkName != null) {
primaryKeyName = pkName;
}
isReadOnly = false;
String readOnlyStr = CachePropertyUtil.getAttributeValueAsString(context,
DIHCacheSupport.CACHE_READ_ONLY);
if ("true".equalsIgnoreCase(readOnlyStr)) {
isReadOnly = true;
}
}
}

View File

@ -1,173 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.lang.invoke.MethodHandles;
import java.util.Iterator;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>
* An {@link EntityProcessor} instance which provides support for reading from
* databases. It is used in conjunction with {@link JdbcDataSource}. This is the default
* {@link EntityProcessor} if none is specified explicitly in data-config.xml
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
*
* @since solr 1.3
*/
public class SqlEntityProcessor extends EntityProcessorBase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
protected DataSource<Iterator<Map<String, Object>>> dataSource;
@Override
@SuppressWarnings("unchecked")
public void init(Context context) {
super.init(context);
dataSource = context.getDataSource();
}
protected void initQuery(String q) {
try {
DataImporter.QUERY_COUNT.get().incrementAndGet();
rowIterator = dataSource.getData(q);
this.query = q;
} catch (DataImportHandlerException e) {
throw e;
} catch (Exception e) {
log.error( "The query failed '{}'", q, e);
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, e);
}
}
@Override
public Map<String, Object> nextRow() {
if (rowIterator == null) {
String q = getQuery();
initQuery(context.replaceTokens(q));
}
return getNext();
}
@Override
public Map<String, Object> nextModifiedRowKey() {
if (rowIterator == null) {
String deltaQuery = context.getEntityAttribute(DELTA_QUERY);
if (deltaQuery == null)
return null;
initQuery(context.replaceTokens(deltaQuery));
}
return getNext();
}
@Override
public Map<String, Object> nextDeletedRowKey() {
if (rowIterator == null) {
String deletedPkQuery = context.getEntityAttribute(DEL_PK_QUERY);
if (deletedPkQuery == null)
return null;
initQuery(context.replaceTokens(deletedPkQuery));
}
return getNext();
}
@Override
public Map<String, Object> nextModifiedParentRowKey() {
if (rowIterator == null) {
String parentDeltaQuery = context.getEntityAttribute(PARENT_DELTA_QUERY);
if (parentDeltaQuery == null)
return null;
if (log.isInfoEnabled()) {
log.info("Running parentDeltaQuery for Entity: {}"
, context.getEntityAttribute("name"));
}
initQuery(context.replaceTokens(parentDeltaQuery));
}
return getNext();
}
public String getQuery() {
String queryString = context.getEntityAttribute(QUERY);
if (Context.FULL_DUMP.equals(context.currentProcess())) {
return queryString;
}
if (Context.DELTA_DUMP.equals(context.currentProcess())) {
String deltaImportQuery = context.getEntityAttribute(DELTA_IMPORT_QUERY);
if(deltaImportQuery != null) return deltaImportQuery;
}
log.warn("'deltaImportQuery' attribute is not specified for entity : {}", entityName);
return getDeltaImportQuery(queryString);
}
public String getDeltaImportQuery(String queryString) {
StringBuilder sb = new StringBuilder(queryString);
if (SELECT_WHERE_PATTERN.matcher(queryString).find()) {
sb.append(" and ");
} else {
sb.append(" where ");
}
boolean first = true;
String[] primaryKeys = context.getEntityAttribute("pk").split(",");
for (String primaryKey : primaryKeys) {
if (!first) {
sb.append(" and ");
}
first = false;
Object val = context.resolve("dataimporter.delta." + primaryKey);
if (val == null) {
Matcher m = DOT_PATTERN.matcher(primaryKey);
if (m.find()) {
val = context.resolve("dataimporter.delta." + m.group(1));
}
}
sb.append(primaryKey).append(" = ");
if (val instanceof Number) {
sb.append(val.toString());
} else {
sb.append("'").append(val.toString()).append("'");
}
}
return sb.toString();
}
private static Pattern SELECT_WHERE_PATTERN = Pattern.compile(
"^\\s*(select\\b.*?\\b)(where).*", Pattern.CASE_INSENSITIVE);
public static final String QUERY = "query";
public static final String DELTA_QUERY = "deltaQuery";
public static final String DELTA_IMPORT_QUERY = "deltaImportQuery";
public static final String PARENT_DELTA_QUERY = "parentDeltaQuery";
public static final String DEL_PK_QUERY = "deletedPkQuery";
public static final Pattern DOT_PATTERN = Pattern.compile(".*?\\.(.*)$");
}

View File

@ -1,41 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import java.util.List;
/**
* <p> Escapes values in SQL queries. It escapes the value of the given expression
* by replacing all occurrences of single-quotes by two single-quotes and similarily
* for double-quotes </p>
*/
public class SqlEscapingEvaluator extends Evaluator {
@Override
public String evaluate(String expression, Context context) {
List<Object> l = parseParams(expression, context.getVariableResolver());
if (l.size() != 1) {
throw new DataImportHandlerException(SEVERE, "'escapeSql' must have at least one parameter ");
}
String s = l.get(0).toString();
// escape single quote with two single quotes, double quote
// with two doule quotes, and backslash with double backslash.
// See: http://dev.mysql.com/doc/refman/4.1/en/mysql-real-escape-string.html
return s.replaceAll("'", "''").replaceAll("\"", "\"\"").replaceAll("\\\\", "\\\\\\\\");
}
}

View File

@ -1,115 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.lang.invoke.MethodHandles;
import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* A {@link Transformer} which can put values into a column by resolving an expression
* containing other columns
* </p>
* <p>
* For example:<br>
* &lt;field column="name" template="${e.lastName}, ${e.firstName}
* ${e.middleName}" /&gt; will produce the name by combining values from
* lastName, firstName and middleName fields as given in the template attribute.
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
*
* @since solr 1.3
*/
public class TemplateTransformer extends Transformer {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private Map<String ,List<String>> templateVsVars = new HashMap<>();
@Override
@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> row, Context context) {
VariableResolver resolver = context.getVariableResolver();
// Add current row to the copy of resolver map
for (Map<String, String> map : context.getAllEntityFields()) {
map.entrySet();
String expr = map.get(TEMPLATE);
if (expr == null)
continue;
String column = map.get(DataImporter.COLUMN);
// Verify if all variables can be resolved or not
boolean resolvable = true;
List<String> variables = this.templateVsVars.get(expr);
if(variables == null){
variables = resolver.getVariables(expr);
this.templateVsVars.put(expr, variables);
}
for (String v : variables) {
if (resolver.resolve(v) == null) {
log.warn("Unable to resolve variable: {} while parsing expression: {}"
,v , expr);
resolvable = false;
}
}
if (!resolvable)
continue;
if(variables.size() == 1 && expr.startsWith("${") && expr.endsWith("}")){
addToRow(column, row, resolver.resolve(variables.get(0)));
} else {
addToRow(column, row, resolver.replaceTokens(expr));
}
}
return row;
}
@SuppressWarnings({"unchecked"})
private void addToRow(String key, Map<String, Object> row, Object value) {
Object prevVal = row.get(key);
if (prevVal != null) {
if (prevVal instanceof List) {
((List) prevVal).add(value);
} else {
ArrayList<Object> valList = new ArrayList<Object>();
valList.add(prevVal);
valList.add(value);
row.put(key, valList);
}
} else {
row.put(key, value);
}
}
public static final String TEMPLATE = "template";
}

View File

@ -1,50 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.Map;
/**
* <p>
* Use this API to implement a custom transformer for any given entity
* </p>
* <p>
* Implementations of this abstract class must provide a public no-args constructor.
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
*
* @since solr 1.3
*/
public abstract class Transformer {
/**
* The input is a row of data and the output has to be a new row.
*
* @param context The current context
* @param row A row of data
* @return The changed data. It must be a {@link Map}&lt;{@link String}, {@link Object}&gt; if it returns
* only one row or if there are multiple rows to be returned it must
* be a {@link java.util.List}&lt;{@link Map}&lt;{@link String}, {@link Object}&gt;&gt;
*/
public abstract Object transformRow(Map<String, Object> row, Context context);
}

View File

@ -1,154 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.StandardCharsets;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p> A data source implementation which can be used to read character files using HTTP. </p> <p> Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
* details. </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
*
* @since solr 1.4
*/
public class URLDataSource extends DataSource<Reader> {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private String baseUrl;
private String encoding;
private int connectionTimeout = CONNECTION_TIMEOUT;
private int readTimeout = READ_TIMEOUT;
private Context context;
private Properties initProps;
public URLDataSource() {
}
@Override
public void init(Context context, Properties initProps) {
this.context = context;
this.initProps = initProps;
baseUrl = getInitPropWithReplacements(BASE_URL);
if (getInitPropWithReplacements(ENCODING) != null)
encoding = getInitPropWithReplacements(ENCODING);
String cTimeout = getInitPropWithReplacements(CONNECTION_TIMEOUT_FIELD_NAME);
String rTimeout = getInitPropWithReplacements(READ_TIMEOUT_FIELD_NAME);
if (cTimeout != null) {
try {
connectionTimeout = Integer.parseInt(cTimeout);
} catch (NumberFormatException e) {
log.warn("Invalid connection timeout: {}", cTimeout);
}
}
if (rTimeout != null) {
try {
readTimeout = Integer.parseInt(rTimeout);
} catch (NumberFormatException e) {
log.warn("Invalid read timeout: {}", rTimeout);
}
}
}
@Override
public Reader getData(String query) {
URL url = null;
try {
if (URIMETHOD.matcher(query).find()) url = new URL(query);
else url = new URL(baseUrl + query);
log.debug("Accessing URL: {}", url);
URLConnection conn = url.openConnection();
conn.setConnectTimeout(connectionTimeout);
conn.setReadTimeout(readTimeout);
InputStream in = conn.getInputStream();
String enc = encoding;
if (enc == null) {
String cType = conn.getContentType();
if (cType != null) {
Matcher m = CHARSET_PATTERN.matcher(cType);
if (m.find()) {
enc = m.group(1);
}
}
}
if (enc == null)
enc = UTF_8;
DataImporter.QUERY_COUNT.get().incrementAndGet();
return new InputStreamReader(in, enc);
} catch (Exception e) {
log.error("Exception thrown while getting data", e);
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Exception in invoking url " + url, e);
}
}
@Override
public void close() {
}
public String getBaseUrl() {
return baseUrl;
}
private String getInitPropWithReplacements(String propertyName) {
final String expr = initProps.getProperty(propertyName);
if (expr == null) {
return null;
}
return context.replaceTokens(expr);
}
static final Pattern URIMETHOD = Pattern.compile("\\w{3,}:/");
private static final Pattern CHARSET_PATTERN = Pattern.compile(".*?charset=(.*)$", Pattern.CASE_INSENSITIVE);
public static final String ENCODING = "encoding";
public static final String BASE_URL = "baseUrl";
public static final String UTF_8 = StandardCharsets.UTF_8.name();
public static final String CONNECTION_TIMEOUT_FIELD_NAME = "connectionTimeout";
public static final String READ_TIMEOUT_FIELD_NAME = "readTimeout";
public static final int CONNECTION_TIMEOUT = 5000;
public static final int READ_TIMEOUT = 10000;
}

View File

@ -1,46 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import java.net.URLEncoder;
import java.util.List;
/**
* <p>Escapes reserved characters in Solr queries</p>
*
* @see org.apache.solr.client.solrj.util.ClientUtils#escapeQueryChars(String)
*/
public class UrlEvaluator extends Evaluator {
@Override
public String evaluate(String expression, Context context) {
List<Object> l = parseParams(expression, context.getVariableResolver());
if (l.size() != 1) {
throw new DataImportHandlerException(SEVERE, "'encodeUrl' must have at least one parameter ");
}
String s = l.get(0).toString();
try {
return URLEncoder.encode(s.toString(), "UTF-8");
} catch (Exception e) {
wrapAndThrow(SEVERE, e, "Unable to encode expression: " + expression + " with value: " + s);
return null;
}
}
}

View File

@ -1,211 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.WeakHashMap;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.solr.common.util.Cache;
import org.apache.solr.common.util.MapBackedCache;
import org.apache.solr.update.processor.TemplateUpdateProcessorFactory;
import static org.apache.solr.update.processor.TemplateUpdateProcessorFactory.Resolved;
/**
* <p>
* A set of nested maps that can resolve variables by namespaces. Variables are
* enclosed with a dollar sign then an opening curly brace, ending with a
* closing curly brace. Namespaces are delimited with '.' (period).
* </p>
* <p>
* This class also has special logic to resolve evaluator calls by recognizing
* the reserved function namespace: dataimporter.functions.xxx
* </p>
* <p>
* This class caches strings that have already been resolved from the current
* dih import.
* </p>
* <b>This API is experimental and may change in the future.</b>
*
*
* @since solr 1.3
*/
public class VariableResolver {
private static final Pattern DOT_PATTERN = Pattern.compile("[.]");
private static final Pattern EVALUATOR_FORMAT_PATTERN = Pattern
.compile("^(\\w*?)\\((.*?)\\)$");
private Map<String,Object> rootNamespace;
private Map<String,Evaluator> evaluators;
private Cache<String,Resolved> cache = new MapBackedCache<>(new WeakHashMap<>());
private Function<String,Object> fun = this::resolve;
public static final String FUNCTIONS_NAMESPACE = "dataimporter.functions.";
public static final String FUNCTIONS_NAMESPACE_SHORT = "dih.functions.";
public VariableResolver() {
rootNamespace = new HashMap<>();
}
public VariableResolver(Properties defaults) {
rootNamespace = new HashMap<>();
for (Map.Entry<Object,Object> entry : defaults.entrySet()) {
rootNamespace.put(entry.getKey().toString(), entry.getValue());
}
}
public VariableResolver(Map<String,Object> defaults) {
rootNamespace = new HashMap<>(defaults);
}
/**
* Resolves a given value with a name
*
* @param name
* the String to be resolved
* @return an Object which is the result of evaluation of given name
*/
public Object resolve(String name) {
Object r = null;
if (name != null) {
String[] nameParts = DOT_PATTERN.split(name);
CurrentLevel cr = currentLevelMap(nameParts,
rootNamespace, false);
Map<String,Object> currentLevel = cr.map;
r = currentLevel.get(nameParts[nameParts.length - 1]);
if (r == null && name.startsWith(FUNCTIONS_NAMESPACE)
&& name.length() > FUNCTIONS_NAMESPACE.length()) {
return resolveEvaluator(FUNCTIONS_NAMESPACE, name);
}
if (r == null && name.startsWith(FUNCTIONS_NAMESPACE_SHORT)
&& name.length() > FUNCTIONS_NAMESPACE_SHORT.length()) {
return resolveEvaluator(FUNCTIONS_NAMESPACE_SHORT, name);
}
if (r == null) {
StringBuilder sb = new StringBuilder();
for(int i=cr.level ; i<nameParts.length ; i++) {
if(sb.length()>0) {
sb.append(".");
}
sb.append(nameParts[i]);
}
r = cr.map.get(sb.toString());
}
if (r == null) {
r = System.getProperty(name);
}
}
return r == null ? "" : r;
}
private Object resolveEvaluator(String namespace, String name) {
if (evaluators == null) {
return "";
}
Matcher m = EVALUATOR_FORMAT_PATTERN.matcher(name
.substring(namespace.length()));
if (m.find()) {
String fname = m.group(1);
Evaluator evaluator = evaluators.get(fname);
if (evaluator == null) return "";
ContextImpl ctx = new ContextImpl(null, this, null, null, null, null,
null);
String g2 = m.group(2);
return evaluator.evaluate(g2, ctx);
} else {
return "";
}
}
/**
* Given a String with place holders, replace them with the value tokens.
*
* @return the string with the placeholders replaced with their values
*/
public String replaceTokens(String template) {
return TemplateUpdateProcessorFactory.replaceTokens(template, cache, fun, TemplateUpdateProcessorFactory.DOLLAR_BRACES_PLACEHOLDER_PATTERN);
}
public void addNamespace(String name, Map<String,Object> newMap) {
if (newMap != null) {
if (name != null) {
String[] nameParts = DOT_PATTERN.split(name);
Map<String,Object> nameResolveLevel = currentLevelMap(nameParts,
rootNamespace, false).map;
nameResolveLevel.put(nameParts[nameParts.length - 1], newMap);
} else {
for (Map.Entry<String,Object> entry : newMap.entrySet()) {
String[] keyParts = DOT_PATTERN.split(entry.getKey());
Map<String,Object> currentLevel = rootNamespace;
currentLevel = currentLevelMap(keyParts, currentLevel, false).map;
currentLevel.put(keyParts[keyParts.length - 1], entry.getValue());
}
}
}
}
public List<String> getVariables(String expr) {
return TemplateUpdateProcessorFactory.getVariables(expr, cache, TemplateUpdateProcessorFactory.DOLLAR_BRACES_PLACEHOLDER_PATTERN);
}
static class CurrentLevel {
final Map<String,Object> map;
final int level;
CurrentLevel(int level, Map<String,Object> map) {
this.level = level;
this.map = map;
}
}
private CurrentLevel currentLevelMap(String[] keyParts,
Map<String,Object> currentLevel, boolean includeLastLevel) {
int j = includeLastLevel ? keyParts.length : keyParts.length - 1;
for (int i = 0; i < j; i++) {
Object o = currentLevel.get(keyParts[i]);
if (o == null) {
if(i == j-1) {
Map<String,Object> nextLevel = new HashMap<>();
currentLevel.put(keyParts[i], nextLevel);
currentLevel = nextLevel;
} else {
return new CurrentLevel(i, currentLevel);
}
} else if (o instanceof Map<?,?>) {
@SuppressWarnings("unchecked")
Map<String,Object> nextLevel = (Map<String,Object>) o;
currentLevel = nextLevel;
} else {
throw new AssertionError(
"Non-leaf nodes should be of type java.util.Map");
}
}
return new CurrentLevel(j-1, currentLevel);
}
public void removeNamespace(String name) {
rootNamespace.remove(name);
}
public void setEvaluators(Map<String,Evaluator> evaluators) {
this.evaluators = evaluators;
}
}

View File

@ -1,555 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
import org.apache.solr.core.SolrCore;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.solr.util.SystemIdResolver;
import org.apache.solr.common.util.XMLErrorLogger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.io.IOUtils;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.io.CharArrayReader;
import java.io.CharArrayWriter;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.util.*;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
/**
* <p> An implementation of {@link EntityProcessor} which uses a streaming xpath parser to extract values out of XML documents.
* It is typically used in conjunction with {@link URLDataSource} or {@link FileDataSource}. </p> <p> Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
* details. </p>
* <p>
* <b>This API is experimental and may change in the future.</b>
*
*
* @see XPathRecordReader
* @since solr 1.3
*/
public class XPathEntityProcessor extends EntityProcessorBase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final XMLErrorLogger xmllog = new XMLErrorLogger(log);
private static final Map<String, Object> END_MARKER = new HashMap<>();
protected List<String> placeHolderVariables;
protected List<String> commonFields;
private String pk;
private XPathRecordReader xpathReader;
protected DataSource<Reader> dataSource;
protected javax.xml.transform.Transformer xslTransformer;
protected boolean useSolrAddXml = false;
protected boolean streamRows = false;
// Amount of time to block reading/writing to queue when streaming
protected int blockingQueueTimeOut = 10;
// Units for pumpTimeOut
protected TimeUnit blockingQueueTimeOutUnits = TimeUnit.SECONDS;
// Number of rows to queue for asynchronous processing
protected int blockingQueueSize = 1000;
protected Thread publisherThread;
protected boolean reinitXPathReader = true;
@Override
@SuppressWarnings("unchecked")
public void init(Context context) {
super.init(context);
if (reinitXPathReader)
initXpathReader(context.getVariableResolver());
pk = context.getEntityAttribute("pk");
dataSource = context.getDataSource();
rowIterator = null;
}
private void initXpathReader(VariableResolver resolver) {
reinitXPathReader = false;
useSolrAddXml = Boolean.parseBoolean(context
.getEntityAttribute(USE_SOLR_ADD_SCHEMA));
streamRows = Boolean.parseBoolean(context
.getEntityAttribute(STREAM));
if (context.getResolvedEntityAttribute("batchSize") != null) {
blockingQueueSize = Integer.parseInt(context.getEntityAttribute("batchSize"));
}
if (context.getResolvedEntityAttribute("readTimeOut") != null) {
blockingQueueTimeOut = Integer.parseInt(context.getEntityAttribute("readTimeOut"));
}
String xslt = context.getEntityAttribute(XSL);
if (xslt != null) {
xslt = context.replaceTokens(xslt);
try {
// create an instance of TransformerFactory
TransformerFactory transFact = TransformerFactory.newInstance();
final SolrCore core = context.getSolrCore();
final StreamSource xsltSource;
if (core != null) {
final ResourceLoader loader = core.getResourceLoader();
transFact.setURIResolver(new SystemIdResolver(loader).asURIResolver());
xsltSource = new StreamSource(loader.openResource(xslt),
SystemIdResolver.createSystemIdFromResourceName(xslt));
} else {
// fallback for tests
xsltSource = new StreamSource(xslt);
}
transFact.setErrorListener(xmllog);
try {
xslTransformer = transFact.newTransformer(xsltSource);
} finally {
// some XML parsers are broken and don't close the byte stream (but they should according to spec)
IOUtils.closeQuietly(xsltSource.getInputStream());
}
if (log.isInfoEnabled()) {
log.info("Using xslTransformer: {}", xslTransformer.getClass().getName());
}
} catch (Exception e) {
throw new DataImportHandlerException(SEVERE,
"Error initializing XSL ", e);
}
}
if (useSolrAddXml) {
// Support solr add documents
xpathReader = new XPathRecordReader("/add/doc");
xpathReader.addField("name", "/add/doc/field/@name", true);
xpathReader.addField("value", "/add/doc/field", true);
} else {
String forEachXpath = context.getResolvedEntityAttribute(FOR_EACH);
if (forEachXpath == null)
throw new DataImportHandlerException(SEVERE,
"Entity : " + context.getEntityAttribute("name")
+ " must have a 'forEach' attribute");
if (forEachXpath.equals(context.getEntityAttribute(FOR_EACH))) reinitXPathReader = true;
try {
xpathReader = new XPathRecordReader(forEachXpath);
for (Map<String, String> field : context.getAllEntityFields()) {
if (field.get(XPATH) == null)
continue;
int flags = 0;
if ("true".equals(field.get("flatten"))) {
flags = XPathRecordReader.FLATTEN;
}
String xpath = field.get(XPATH);
xpath = context.replaceTokens(xpath);
//!xpath.equals(field.get(XPATH) means the field xpath has a template
//in that case ensure that the XPathRecordReader is reinitialized
//for each xml
if (!xpath.equals(field.get(XPATH)) && !context.isRootEntity()) reinitXPathReader = true;
xpathReader.addField(field.get(DataImporter.COLUMN),
xpath,
Boolean.parseBoolean(field.get(DataImporter.MULTI_VALUED)),
flags);
}
} catch (RuntimeException e) {
throw new DataImportHandlerException(SEVERE,
"Exception while reading xpaths for fields", e);
}
}
String url = context.getEntityAttribute(URL);
List<String> l = url == null ? Collections.emptyList() : resolver.getVariables(url);
for (String s : l) {
if (s.startsWith(entityName + ".")) {
if (placeHolderVariables == null)
placeHolderVariables = new ArrayList<>();
placeHolderVariables.add(s.substring(entityName.length() + 1));
}
}
for (Map<String, String> fld : context.getAllEntityFields()) {
if (fld.get(COMMON_FIELD) != null && "true".equals(fld.get(COMMON_FIELD))) {
if (commonFields == null)
commonFields = new ArrayList<>();
commonFields.add(fld.get(DataImporter.COLUMN));
}
}
}
@Override
public Map<String, Object> nextRow() {
Map<String, Object> result;
if (!context.isRootEntity())
return fetchNextRow();
while (true) {
result = fetchNextRow();
if (result == null)
return null;
if (pk == null || result.get(pk) != null)
return result;
}
}
@Override
public void postTransform(Map<String, Object> r) {
readUsefulVars(r);
}
@SuppressWarnings("unchecked")
private Map<String, Object> fetchNextRow() {
Map<String, Object> r = null;
while (true) {
if (rowIterator == null)
initQuery(context.replaceTokens(context.getEntityAttribute(URL)));
r = getNext();
if (r == null) {
Object hasMore = context.getSessionAttribute(HAS_MORE, Context.SCOPE_ENTITY);
try {
if ("true".equals(hasMore) || Boolean.TRUE.equals(hasMore)) {
String url = (String) context.getSessionAttribute(NEXT_URL, Context.SCOPE_ENTITY);
if (url == null)
url = context.getEntityAttribute(URL);
addNamespace();
initQuery(context.replaceTokens(url));
r = getNext();
if (r == null)
return null;
} else {
return null;
}
} finally {
context.setSessionAttribute(HAS_MORE,null,Context.SCOPE_ENTITY);
context.setSessionAttribute(NEXT_URL,null,Context.SCOPE_ENTITY);
}
}
addCommonFields(r);
return r;
}
}
private void addNamespace() {
Map<String, Object> namespace = new HashMap<>();
Set<String> allNames = new HashSet<>();
if (commonFields != null) allNames.addAll(commonFields);
if (placeHolderVariables != null) allNames.addAll(placeHolderVariables);
if(allNames.isEmpty()) return;
for (String name : allNames) {
Object val = context.getSessionAttribute(name, Context.SCOPE_ENTITY);
if (val != null) namespace.put(name, val);
}
context.getVariableResolver().addNamespace(entityName, namespace);
}
private void addCommonFields(Map<String, Object> r) {
if(commonFields != null){
for (String commonField : commonFields) {
if(r.get(commonField) == null) {
Object val = context.getSessionAttribute(commonField, Context.SCOPE_ENTITY);
if(val != null) r.put(commonField, val);
}
}
}
}
@SuppressWarnings({"unchecked"})
private void initQuery(String s) {
Reader data = null;
try {
final List<Map<String, Object>> rows = new ArrayList<>();
try {
data = dataSource.getData(s);
} catch (Exception e) {
if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, e);
} else if (SKIP.equals(onError)) {
if (log.isDebugEnabled()) {
log.debug("Skipping url : {}", s, e);
}
wrapAndThrow(DataImportHandlerException.SKIP, e);
} else {
log.warn("Failed for url : {}", s, e);
rowIterator = Collections.EMPTY_LIST.iterator();
return;
}
}
if (xslTransformer != null) {
try {
SimpleCharArrayReader caw = new SimpleCharArrayReader();
xslTransformer.transform(new StreamSource(data),
new StreamResult(caw));
data = caw.getReader();
} catch (TransformerException e) {
if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, e, "Exception in applying XSL Transformation");
} else if (SKIP.equals(onError)) {
wrapAndThrow(DataImportHandlerException.SKIP, e);
} else {
log.warn("Failed for url : {}", s, e);
rowIterator = Collections.EMPTY_LIST.iterator();
return;
}
}
}
if (streamRows) {
rowIterator = getRowIterator(data, s);
} else {
try {
xpathReader.streamRecords(data, (record, xpath) -> rows.add(readRow(record, xpath)));
} catch (Exception e) {
String msg = "Parsing failed for xml, url:" + s + " rows processed:" + rows.size();
if (rows.size() > 0) msg += " last row: " + rows.get(rows.size() - 1);
if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, e, msg);
} else if (SKIP.equals(onError)) {
log.warn(msg, e);
Map<String, Object> map = new HashMap<>();
map.put(DocBuilder.SKIP_DOC, Boolean.TRUE);
rows.add(map);
} else if (CONTINUE.equals(onError)) {
log.warn(msg, e);
}
}
rowIterator = rows.iterator();
}
} finally {
if (!streamRows) {
closeIt(data);
}
}
}
private void closeIt(Reader data) {
try {
data.close();
} catch (Exception e) { /* Ignore */
}
}
@SuppressWarnings({"unchecked"})
protected Map<String, Object> readRow(Map<String, Object> record, String xpath) {
if (useSolrAddXml) {
List<String> names = (List<String>) record.get("name");
List<String> values = (List<String>) record.get("value");
Map<String, Object> row = new HashMap<>();
for (int i = 0; i < names.size() && i < values.size(); i++) {
if (row.containsKey(names.get(i))) {
Object existing = row.get(names.get(i));
if (existing instanceof List) {
@SuppressWarnings({"rawtypes"})
List list = (List) existing;
list.add(values.get(i));
} else {
@SuppressWarnings({"rawtypes"})
List list = new ArrayList();
list.add(existing);
list.add(values.get(i));
row.put(names.get(i), list);
}
} else {
row.put(names.get(i), values.get(i));
}
}
return row;
} else {
record.put(XPATH_FIELD_NAME, xpath);
return record;
}
}
private static class SimpleCharArrayReader extends CharArrayWriter {
public Reader getReader() {
return new CharArrayReader(super.buf, 0, super.count);
}
}
@SuppressWarnings("unchecked")
private Map<String, Object> readUsefulVars(Map<String, Object> r) {
Object val = r.get(HAS_MORE);
if (val != null)
context.setSessionAttribute(HAS_MORE, val,Context.SCOPE_ENTITY);
val = r.get(NEXT_URL);
if (val != null)
context.setSessionAttribute(NEXT_URL, val,Context.SCOPE_ENTITY);
if (placeHolderVariables != null) {
for (String s : placeHolderVariables) {
val = r.get(s);
context.setSessionAttribute(s, val,Context.SCOPE_ENTITY);
}
}
if (commonFields != null) {
for (String s : commonFields) {
Object commonVal = r.get(s);
if (commonVal != null) {
context.setSessionAttribute(s, commonVal,Context.SCOPE_ENTITY);
}
}
}
return r;
}
private Iterator<Map<String, Object>> getRowIterator(final Reader data, final String s) {
//nothing atomic about it. I just needed a StongReference
final AtomicReference<Exception> exp = new AtomicReference<>();
final BlockingQueue<Map<String, Object>> blockingQueue = new ArrayBlockingQueue<>(blockingQueueSize);
final AtomicBoolean isEnd = new AtomicBoolean(false);
final AtomicBoolean throwExp = new AtomicBoolean(true);
publisherThread = new Thread() {
@Override
public void run() {
try {
xpathReader.streamRecords(data, (record, xpath) -> {
if (isEnd.get()) {
throwExp.set(false);
//To end the streaming . otherwise the parsing will go on forever
//though consumer has gone away
throw new RuntimeException("BREAK");
}
Map<String, Object> row;
try {
row = readRow(record, xpath);
} catch (Exception e) {
isEnd.set(true);
return;
}
offer(row);
});
} catch (Exception e) {
if(throwExp.get()) exp.set(e);
} finally {
closeIt(data);
if (!isEnd.get()) {
offer(END_MARKER);
}
}
}
private void offer(Map<String, Object> row) {
try {
while (!blockingQueue.offer(row, blockingQueueTimeOut, blockingQueueTimeOutUnits)) {
if (isEnd.get()) return;
log.debug("Timeout elapsed writing records. Perhaps buffer size should be increased.");
}
} catch (InterruptedException e) {
return;
} finally {
synchronized (this) {
notifyAll();
}
}
}
};
publisherThread.start();
return new Iterator<Map<String, Object>>() {
private Map<String, Object> lastRow;
int count = 0;
@Override
public boolean hasNext() {
return !isEnd.get();
}
@Override
public Map<String, Object> next() {
Map<String, Object> row;
do {
try {
row = blockingQueue.poll(blockingQueueTimeOut, blockingQueueTimeOutUnits);
if (row == null) {
log.debug("Timeout elapsed reading records.");
}
} catch (InterruptedException e) {
log.debug("Caught InterruptedException while waiting for row. Aborting.");
isEnd.set(true);
return null;
}
} while (row == null);
if (row == END_MARKER) {
isEnd.set(true);
if (exp.get() != null) {
String msg = "Parsing failed for xml, url:" + s + " rows processed in this xml:" + count;
if (lastRow != null) msg += " last row in this xml:" + lastRow;
if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, exp.get(), msg);
} else if (SKIP.equals(onError)) {
wrapAndThrow(DataImportHandlerException.SKIP, exp.get());
} else {
log.warn(msg, exp.get());
}
}
return null;
}
count++;
return lastRow = row;
}
@Override
public void remove() {
/*no op*/
}
};
}
public static final String URL = "url";
public static final String HAS_MORE = "$hasMore";
public static final String NEXT_URL = "$nextUrl";
public static final String XPATH_FIELD_NAME = "$forEach";
public static final String FOR_EACH = "forEach";
public static final String XPATH = "xpath";
public static final String COMMON_FIELD = "commonField";
public static final String USE_SOLR_ADD_SCHEMA = "useSolrAddSchema";
public static final String XSL = "xsl";
public static final String STREAM = "stream";
}

View File

@ -1,670 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.common.util.XMLErrorLogger;
import org.apache.solr.common.EmptyEntityResolver;
import javax.xml.stream.XMLInputFactory;
import static javax.xml.stream.XMLStreamConstants.*;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* A streaming xpath parser which uses StAX for XML parsing. It supports only
* a subset of xpath syntax.
* </p><pre>
* /a/b/subject[@qualifier='fullTitle']
* /a/b/subject[@qualifier=]/subtag
* /a/b/subject/@qualifier
* //a
* //a/b...
* /a//b
* /a//b...
* /a/b/c
* </pre>
* A record is a Map&lt;String,Object&gt; . The key is the provided name
* and the value is a String or a List&lt;String&gt;
*
* This class is thread-safe for parsing xml. But adding fields is not
* thread-safe. The recommended usage is to addField() in one thread and
* then share the instance across threads.
* <p>
* <b>This API is experimental and may change in the future.</b>
*
* @since solr 1.3
*/
public class XPathRecordReader {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final XMLErrorLogger XMLLOG = new XMLErrorLogger(log);
private Node rootNode = new Node("/", null);
/**
* The FLATTEN flag indicates that all text and cdata under a specific
* tag should be recursivly fetched and appended to the current Node's
* value.
*/
public static final int FLATTEN = 1;
/**
* A constructor called with a '|' separated list of Xpath expressions
* which define sub sections of the XML stream that are to be emitted as
* separate records.
*
* @param forEachXpath The XPATH for which a record is emitted. Once the
* xpath tag is encountered, the Node.parse method starts collecting wanted
* fields and at the close of the tag, a record is emitted containing all
* fields collected since the tag start. Once
* emitted the collected fields are cleared. Any fields collected in the
* parent tag or above will also be included in the record, but these are
* not cleared after emitting the record.
*
* It uses the ' | ' syntax of XPATH to pass in multiple xpaths.
*/
public XPathRecordReader(String forEachXpath) {
String[] splits = forEachXpath.split("\\|");
for (String split : splits) {
split = split.trim();
if (split.startsWith("//"))
throw new RuntimeException("forEach cannot start with '//': " + split);
if (split.length() == 0)
continue;
// The created Node has a name set to the full forEach attribute xpath
addField0(split, split, false, true, 0);
}
}
/**
* A wrapper around <code>addField0</code> to create a series of
* Nodes based on the supplied Xpath and a given fieldName. The created
* nodes are inserted into a Node tree.
*
* @param name The name for this field in the emitted record
* @param xpath The xpath expression for this field
* @param multiValued If 'true' then the emitted record will have values in
* a List&lt;String&gt;
*/
public synchronized XPathRecordReader addField(String name, String xpath, boolean multiValued) {
addField0(xpath, name, multiValued, false, 0);
return this;
}
/**
* A wrapper around <code>addField0</code> to create a series of
* Nodes based on the supplied Xpath and a given fieldName. The created
* nodes are inserted into a Node tree.
*
* @param name The name for this field in the emitted record
* @param xpath The xpath expression for this field
* @param multiValued If 'true' then the emitted record will have values in
* a List&lt;String&gt;
* @param flags FLATTEN: Recursively combine text from all child XML elements
*/
public synchronized XPathRecordReader addField(String name, String xpath, boolean multiValued, int flags) {
addField0(xpath, name, multiValued, false, flags);
return this;
}
/**
* Splits the XPATH into a List of xpath segments and calls build() to
* construct a tree of Nodes representing xpath segments. The resulting
* tree structure ends up describing all the Xpaths we are interested in.
*
* @param xpath The xpath expression for this field
* @param name The name for this field in the emitted record
* @param multiValued If 'true' then the emitted record will have values in
* a List&lt;String&gt;
* @param isRecord Flags that this XPATH is from a forEach statement
* @param flags The only supported flag is 'FLATTEN'
*/
private void addField0(String xpath, String name, boolean multiValued,
boolean isRecord, int flags) {
if (!xpath.startsWith("/"))
throw new RuntimeException("xpath must start with '/' : " + xpath);
List<String> paths = splitEscapeQuote(xpath);
// deal with how split behaves when separator starts a string!
if ("".equals(paths.get(0).trim()))
paths.remove(0);
rootNode.build(paths, name, multiValued, isRecord, flags);
rootNode.buildOptimise(null);
}
/**
* Uses {@link #streamRecords streamRecords} to parse the XML source but with
* a handler that collects all the emitted records into a single List which
* is returned upon completion.
*
* @param r the stream reader
* @return results a List of emitted records
*/
public List<Map<String, Object>> getAllRecords(Reader r) {
final List<Map<String, Object>> results = new ArrayList<>();
streamRecords(r, (record, s) -> results.add(record));
return results;
}
/**
* Creates an XML stream reader on top of whatever reader has been
* configured. Then calls parse() with a handler which is
* invoked forEach record emitted.
*
* @param r the stream reader
* @param handler The callback instance
*/
public void streamRecords(Reader r, Handler handler) {
try {
XMLStreamReader parser = factory.createXMLStreamReader(r);
rootNode.parse(parser, handler, new HashMap<>(),
new Stack<>(), false);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* For each node/leaf in the Node tree there is one object of this class.
* This tree of objects represents all the XPaths we are interested in.
* For each Xpath segment of interest we create a node. In most cases the
* node (branch) is rather basic , but for the final portion (leaf) of any
* Xpath we add more information to the Node. When parsing the XML document
* we step though this tree as we stream records from the reader. If the XML
* document departs from this tree we skip start tags till we are back on
* the tree.
*/
private static class Node {
String name; // genrally: segment of the Xpath represented by this Node
String fieldName; // the fieldname in the emitted record (key of the map)
String xpathName; // the segment of the Xpath represented by this Node
String forEachPath; // the full Xpath from the forEach entity attribute
List<Node> attributes; // List of attribute Nodes associated with this Node
List<Node> childNodes; // List of immediate child Nodes of this node
List<Node> wildCardNodes; // List of '//' style decendants of this Node
List<Map.Entry<String, String>> attribAndValues;
Node wildAncestor; // ancestor Node containing '//' style decendants
Node parent; // parent Node in the tree
boolean hasText=false; // flag: store/emit streamed text for this node
boolean multiValued=false; //flag: this fields values are returned as a List
boolean isRecord=false; //flag: this Node starts a new record
private boolean flatten; //flag: child text is also to be emitted
public Node(String name, Node p) {
// Create a basic Node, suitable for the mid portions of any Xpath.
// Node.xpathName and Node.name are set to same value.
xpathName = this.name = name;
parent = p;
}
public Node(String name, String fieldName, boolean multiValued) {
// This is only called from build() when describing an attribute.
this.name = name; // a segment from the Xpath
this.fieldName = fieldName; // name to store collected values against
this.multiValued = multiValued; // return collected values in a List
}
/**
* This is the method where all the XML parsing happens. For each
* tag/subtag read from the source, this method is called recursively.
*
*/
private void parse(XMLStreamReader parser,
Handler handler,
Map<String, Object> values,
Stack<Set<String>> stack, // lists of values to purge
boolean recordStarted
) throws IOException, XMLStreamException {
Set<String> valuesAddedinThisFrame = null;
if (isRecord) {
// This Node is a match for an XPATH from a forEach attribute,
// prepare for the clean up that will occurr when the record
// is emitted after its END_ELEMENT is matched
recordStarted = true;
valuesAddedinThisFrame = new HashSet<>();
stack.push(valuesAddedinThisFrame);
} else if (recordStarted) {
// This node is a child of some parent which matched against forEach
// attribute. Continue to add values to an existing record.
valuesAddedinThisFrame = stack.peek();
}
try {
/* The input stream has deposited us at this Node in our tree of
* intresting nodes. Depending on how this node is of interest,
* process further tokens from the input stream and decide what
* we do next
*/
if (attributes != null) {
// we interested in storing attributes from the input stream
for (Node node : attributes) {
String value = parser.getAttributeValue(null, node.name);
if (value != null || (recordStarted && !isRecord)) {
putText(values, value, node.fieldName, node.multiValued);
valuesAddedinThisFrame.add(node.fieldName);
}
}
}
Set<Node> childrenFound = new HashSet<>();
int event = -1;
int flattenedStarts=0; // our tag depth when flattening elements
StringBuilder text = new StringBuilder();
while (true) {
event = parser.next();
if (event == END_ELEMENT) {
if (flattenedStarts > 0) flattenedStarts--;
else {
if (hasText && valuesAddedinThisFrame != null) {
valuesAddedinThisFrame.add(fieldName);
putText(values, text.toString(), fieldName, multiValued);
}
if (isRecord) handler.handle(getDeepCopy(values), forEachPath);
if (childNodes != null && recordStarted && !isRecord && !childrenFound.containsAll(childNodes)) {
// nonReccord nodes where we have not collected text for ALL
// the child nodes.
for (Node n : childNodes) {
// For the multivalue child nodes where we could have, but
// didnt, collect text. Push a null string into values.
if (!childrenFound.contains(n)) n.putNulls(values, valuesAddedinThisFrame);
}
}
return;
}
}
else if (hasText && (event==CDATA || event==CHARACTERS || event==SPACE)) {
text.append(parser.getText());
}
else if (event == START_ELEMENT) {
if ( flatten )
flattenedStarts++;
else
handleStartElement(parser, childrenFound, handler, values, stack, recordStarted);
}
// END_DOCUMENT is least likely to appear and should be
// last in if-then-else skip chain
else if (event == END_DOCUMENT) return;
}
}finally {
if ((isRecord || !recordStarted) && !stack.empty()) {
Set<String> cleanThis = stack.pop();
if (cleanThis != null) {
for (String fld : cleanThis) values.remove(fld);
}
}
}
}
/**
* If a new tag is encountered, check if it is of interest or not by seeing
* if it matches against our node tree. If we have deperted from the node
* tree then walk back though the tree's ancestor nodes checking to see if
* any // expressions exist for the node and compare them against the new
* tag. If matched then "jump" to that node, otherwise ignore the tag.
*
* Note, the list of // expressions found while walking back up the tree
* is chached in the HashMap decends. Then if the new tag is to be skipped,
* any inner chil tags are compared against the cache and jumped to if
* matched.
*/
private void handleStartElement(XMLStreamReader parser, Set<Node> childrenFound,
Handler handler, Map<String, Object> values,
Stack<Set<String>> stack, boolean recordStarted)
throws IOException, XMLStreamException {
Node n = getMatchingNode(parser,childNodes);
Map<String, Object> decends=new HashMap<>();
if (n != null) {
childrenFound.add(n);
n.parse(parser, handler, values, stack, recordStarted);
return;
}
// The stream has diverged from the tree of interesting elements, but
// are there any wildCardNodes ... anywhere in our path from the root?
Node dn = this; // checking our Node first!
do {
if (dn.wildCardNodes != null) {
// Check to see if the streams tag matches one of the "//" all
// decendents type expressions for this node.
n = getMatchingNode(parser, dn.wildCardNodes);
if (n != null) {
childrenFound.add(n);
n.parse(parser, handler, values, stack, recordStarted);
break;
}
// add the list of this nodes wild decendents to the cache
for (Node nn : dn.wildCardNodes) decends.put(nn.name, nn);
}
dn = dn.wildAncestor; // leap back along the tree toward root
} while (dn != null) ;
if (n == null) {
// we have a START_ELEMENT which is not within the tree of
// interesting nodes. Skip over the contents of this element
// but recursivly repeat the above for any START_ELEMENTs
// found within this element.
int count = 1; // we have had our first START_ELEMENT
while (count != 0) {
int token = parser.next();
if (token == START_ELEMENT) {
Node nn = (Node) decends.get(parser.getLocalName());
if (nn != null) {
// We have a //Node which matches the stream's parser.localName
childrenFound.add(nn);
// Parse the contents of this stream element
nn.parse(parser, handler, values, stack, recordStarted);
}
else count++;
}
else if (token == END_ELEMENT) count--;
}
}
}
/**
* Check if the current tag is to be parsed or not. We step through the
* supplied List "searchList" looking for a match. If matched, return the
* Node object.
*/
private Node getMatchingNode(XMLStreamReader parser,List<Node> searchL){
if (searchL == null)
return null;
String localName = parser.getLocalName();
for (Node n : searchL) {
if (n.name.equals(localName)) {
if (n.attribAndValues == null)
return n;
if (checkForAttributes(parser, n.attribAndValues))
return n;
}
}
return null;
}
private boolean checkForAttributes(XMLStreamReader parser,
List<Map.Entry<String, String>> attrs) {
for (Map.Entry<String, String> e : attrs) {
String val = parser.getAttributeValue(null, e.getKey());
if (val == null)
return false;
if (e.getValue() != null && !e.getValue().equals(val))
return false;
}
return true;
}
/**
* A recursive routine that walks the Node tree from a supplied start
* pushing a null string onto every multiValued fieldName's List of values
* where a value has not been provided from the stream.
*/
private void putNulls(Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
if (attributes != null) {
for (Node n : attributes) {
if (n.multiValued) {
putANull(n.fieldName, values, valuesAddedinThisFrame);
}
}
}
if (hasText && multiValued) {
putANull(fieldName, values, valuesAddedinThisFrame);
}
if (childNodes != null) {
for (Node childNode : childNodes) {
childNode.putNulls(values, valuesAddedinThisFrame);
}
}
}
private void putANull(String thisFieldName, Map<String, Object> values, Set<String> valuesAddedinThisFrame) {
putText(values, null, thisFieldName, true);
if( valuesAddedinThisFrame != null) {
valuesAddedinThisFrame.add(thisFieldName);
}
}
/**
* Add the field name and text into the values Map. If it is a non
* multivalued field, then the text is simply placed in the object
* portion of the Map. If it is a multivalued field then the text is
* pushed onto a List which is the object portion of the Map.
*/
@SuppressWarnings("unchecked")
private void putText(Map<String, Object> values, String value,
String fieldName, boolean multiValued) {
if (multiValued) {
List<String> v = (List<String>) values.get(fieldName);
if (v == null) {
v = new ArrayList<>();
values.put(fieldName, v);
}
v.add(value);
} else {
values.put(fieldName, value);
}
}
/**
* Walk the Node tree propagating any wildDescentant information to
* child nodes. This allows us to optimise the performance of the
* main parse method.
*/
private void buildOptimise(Node wa) {
wildAncestor=wa;
if ( wildCardNodes != null ) wa = this;
if ( childNodes != null )
for ( Node n : childNodes ) n.buildOptimise(wa);
}
/**
* Build a Node tree structure representing all Xpaths of intrest to us.
* This must be done before parsing of the XML stream starts. Each node
* holds one portion of an Xpath. Taking each Xpath segment in turn this
* method walks the Node tree and finds where the new segment should be
* inserted. It creates a Node representing a field's name, XPATH and
* some flags and inserts the Node into the Node tree.
*/
private void build(
List<String> paths, // a List of segments from the split xpaths
String fieldName, // the fieldName assoc with this Xpath
boolean multiValued, // flag if this fieldName is multiValued or not
boolean record, // is this xpath a record or a field
int flags // are we to flatten matching xpaths
) {
// recursivly walk the paths Lists adding new Nodes as required
String xpseg = paths.remove(0); // shift out next Xpath segment
if (paths.isEmpty() && xpseg.startsWith("@")) {
// we have reached end of element portion of Xpath and can now only
// have an element attribute. Add it to this nodes list of attributes
if (attributes == null) {
attributes = new ArrayList<>();
}
xpseg = xpseg.substring(1); // strip the '@'
attributes.add(new Node(xpseg, fieldName, multiValued));
}
else if ( xpseg.length() == 0) {
// we have a '//' selector for all decendents of the current nodes
xpseg = paths.remove(0); // shift out next Xpath segment
if (wildCardNodes == null) wildCardNodes = new ArrayList<>();
Node n = getOrAddNode(xpseg, wildCardNodes);
if (paths.isEmpty()) {
// We are current a leaf node.
// xpath with content we want to store and return
n.hasText = true; // we have to store text found here
n.fieldName = fieldName; // name to store collected text against
n.multiValued = multiValued; // true: text be stored in a List
n.flatten = flags == FLATTEN; // true: store text from child tags
}
else {
// recurse to handle next paths segment
n.build(paths, fieldName, multiValued, record, flags);
}
}
else {
if (childNodes == null)
childNodes = new ArrayList<>();
// does this "name" already exist as a child node.
Node n = getOrAddNode(xpseg,childNodes);
if (paths.isEmpty()) {
// We have emptied paths, we are for the moment a leaf of the tree.
// When parsing the actual input we have traversed to a position
// where we actutally have to do something. getOrAddNode() will
// have created and returned a new minimal Node with name and
// xpathName already populated. We need to add more information.
if (record) {
// forEach attribute
n.isRecord = true; // flag: forEach attribute, prepare to emit rec
n.forEachPath = fieldName; // the full forEach attribute xpath
} else {
// xpath with content we want to store and return
n.hasText = true; // we have to store text found here
n.fieldName = fieldName; // name to store collected text against
n.multiValued = multiValued; // true: text be stored in a List
n.flatten = flags == FLATTEN; // true: store text from child tags
}
} else {
// recurse to handle next paths segment
n.build(paths, fieldName, multiValued, record, flags);
}
}
}
private Node getOrAddNode(String xpathName, List<Node> searchList ) {
for (Node n : searchList)
if (n.xpathName.equals(xpathName)) return n;
// new territory! add a new node for this Xpath bitty
Node n = new Node(xpathName, this); // a minimal Node initialization
Matcher m = ATTRIB_PRESENT_WITHVAL.matcher(xpathName);
if (m.find()) {
n.name = m.group(1);
int start = m.start(2);
while (true) {
HashMap<String, String> attribs = new HashMap<>();
if (!m.find(start))
break;
attribs.put(m.group(3), m.group(5));
start = m.end(6);
if (n.attribAndValues == null)
n.attribAndValues = new ArrayList<>();
n.attribAndValues.addAll(attribs.entrySet());
}
}
searchList.add(n);
return n;
}
/**
* Copies a supplied Map to a new Map which is returned. Used to copy a
* records values. If a fields value is a List then they have to be
* deep-copied for thread safety
*/
@SuppressWarnings({"unchecked", "rawtypes"})
private static Map<String, Object> getDeepCopy(Map<String, Object> values) {
Map<String, Object> result = new HashMap<>();
for (Map.Entry<String, Object> entry : values.entrySet()) {
if (entry.getValue() instanceof List) {
result.put(entry.getKey(), new ArrayList((List) entry.getValue()));
} else {
result.put(entry.getKey(), entry.getValue());
}
}
return result;
}
} // end of class Node
/**
* The Xpath is split into segments using the '/' as a separator. However
* this method deals with special cases where there is a slash '/' character
* inside the attribute value e.g. x/@html='text/html'. We split by '/' but
* then reassemble things were the '/' appears within a quoted sub-string.
*
* We have already enforced that the string must begin with a separator. This
* method depends heavily on how split behaves if the string starts with the
* separator or if a sequence of multiple separator's appear.
*/
private static List<String> splitEscapeQuote(String str) {
List<String> result = new LinkedList<>();
String[] ss = str.split("/");
for (int i=0; i<ss.length; i++) { // i=1: skip separator at start of string
StringBuilder sb = new StringBuilder();
int quoteCount = 0;
while (true) {
sb.append(ss[i]);
for (int j=0; j<ss[i].length(); j++)
if (ss[i].charAt(j) == '\'') quoteCount++;
// have we got a split inside quoted sub-string?
if ((quoteCount % 2) == 0) break;
// yes!; replace the '/' and loop to concat next token
i++;
sb.append("/");
}
result.add(sb.toString());
}
return result;
}
static XMLInputFactory factory = XMLInputFactory.newInstance();
static {
EmptyEntityResolver.configureXMLInputFactory(factory);
factory.setXMLReporter(XMLLOG);
try {
// The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
// XMLInputFactory, as that implementation tries to cache and reuse the
// XMLStreamReader. Setting the parser-specific "reuse-instance" property to false
// prevents this.
// All other known open-source stax parsers (and the bea ref impl)
// have thread-safe factories.
factory.setProperty("reuse-instance", Boolean.FALSE);
} catch (IllegalArgumentException ex) {
// Other implementations will likely throw this exception since "reuse-instance"
// isimplementation specific.
log.debug("Unable to set the 'reuse-instance' property for the input chain: {}", factory);
}
}
/**Implement this interface to stream records as and when one is found.
*
*/
public interface Handler {
/**
* @param record The record map. The key is the field name as provided in
* the addField() methods. The value can be a single String (for single
* valued fields) or a List&lt;String&gt; (for multiValued).
* @param xpath The forEach XPATH for which this record is being emitted
* If there is any change all parsing will be aborted and the Exception
* is propagated up
*/
void handle(Map<String, Object> record, String xpath);
}
private static final Pattern ATTRIB_PRESENT_WITHVAL = Pattern
.compile("(\\S*?)?(\\[@)(\\S*?)(='(.*?)')?(\\])");
}

Some files were not shown because too many files have changed in this diff Show More