mirror of https://github.com/apache/lucene.git
SOLR-11694: Remove outdated UIMA module
This commit is contained in:
parent
6d6e67140b
commit
b7d14c50fb
|
@ -264,7 +264,7 @@
|
|||
<!-- TODO: find a better way to exclude duplicate JAR files & fix the servlet-api mess! -->
|
||||
<pathconvert property="netbeans.path.libs" pathsep=":" dirsep="/">
|
||||
<fileset dir="${basedir}/lucene" includes="**/lib/*.jar"
|
||||
excludes="**/*servlet-api*.jar, analysis/uima/**, tools/**, build/**"/>
|
||||
excludes="**/*servlet-api*.jar, tools/**, build/**"/>
|
||||
<fileset dir="${basedir}/solr" includes="**/test-lib/*.jar,**/lib/*.jar"
|
||||
excludes="core/test-lib/*servlet-api*.jar, contrib/analysis-extras/**, test-framework/lib/junit*, test-framework/lib/ant*, test-framework/lib/randomizedtesting*, build/**, dist/**, package/**, server/solr-webapp/**" />
|
||||
<map from="${basedir}/" to=""/>
|
||||
|
@ -311,7 +311,7 @@
|
|||
</pathconvert>
|
||||
<!-- TODO: find a better way to exclude duplicate JAR files & fix the servlet-api mess! -->
|
||||
<pathconvert property="eclipse.fileset.libs" pathsep="|" dirsep="/">
|
||||
<fileset dir="${basedir}/lucene" includes="**/lib/*.jar" excludes="**/*servlet-api*.jar, analysis/uima/**, tools/**, build/**"/>
|
||||
<fileset dir="${basedir}/lucene" includes="**/lib/*.jar" excludes="**/*servlet-api*.jar, tools/**, build/**"/>
|
||||
<fileset dir="${basedir}/solr" includes="**/test-lib/*.jar,**/lib/*.jar" excludes="core/test-lib/*servlet-api*.jar, contrib/analysis-extras/**, test-framework/lib/junit*, test-framework/lib/ant*, test-framework/lib/randomizedtesting*, build/**, dist/**, package/**" />
|
||||
<map from="${basedir}/" to=""/>
|
||||
</pathconvert>
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
<buildFile url="file://$PROJECT_DIR$/lucene/analysis/phonetic/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/analysis/smartcn/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/analysis/stempel/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/analysis/uima/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/benchmark/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/classification/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/codecs/build.xml" />
|
||||
|
@ -48,7 +47,6 @@
|
|||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/extraction/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/langid/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/prometheus-exporter/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/uima/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/velocity/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/solrj/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/test-framework/build.xml" />
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
<module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/phonetic/phonetic.iml" />
|
||||
<module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/smartcn/smartcn.iml" />
|
||||
<module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/stempel/stempel.iml" />
|
||||
<module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/uima/analysis-uima.iml" />
|
||||
|
||||
<module group="Lucene/Other" filepath="$PROJECT_DIR$/lucene/benchmark/src/benchmark.iml" />
|
||||
<module group="Lucene/Other" filepath="$PROJECT_DIR$/lucene/benchmark/conf/benchmark-conf.iml" />
|
||||
|
@ -59,7 +58,6 @@
|
|||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/langid/langid.iml" />
|
||||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/ltr/ltr.iml" />
|
||||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/prometheus-exporter/prometheus-exporter.iml" />
|
||||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/uima/uima.iml" />
|
||||
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/velocity/velocity.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
|
|
|
@ -76,14 +76,6 @@
|
|||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
|
||||
</configuration>
|
||||
<configuration default="false" name="Module analyzers-uima" type="JUnit" factoryName="JUnit">
|
||||
<module name="analysis-uima" />
|
||||
<option name="TEST_OBJECT" value="pattern" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/lucene/analysis/uima" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
|
||||
</configuration>
|
||||
<configuration default="false" name="Module backward-codecs" type="JUnit" factoryName="JUnit">
|
||||
<module name="backward-codecs" />
|
||||
<option name="TEST_OBJECT" value="pattern" />
|
||||
|
@ -332,14 +324,6 @@
|
|||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
|
||||
</configuration>
|
||||
<configuration default="false" name="Solr uima contrib" type="JUnit" factoryName="JUnit">
|
||||
<module name="uima" />
|
||||
<option name="TEST_OBJECT" value="pattern" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/solr/contrib/solr-uima" />
|
||||
<option name="VM_PARAMETERS" value="-ea -Dtests.luceneMatchVersion=@version.base@ -DtempDir=temp -Djetty.testMode=1 -Djetty.insecurerandom=1 -Dsolr.directoryFactory=org.apache.solr.core.MockDirectoryFactory" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
|
||||
</configuration>
|
||||
<configuration default="false" name="Solr velocity contrib" type="JUnit" factoryName="JUnit">
|
||||
<module name="velocity" />
|
||||
<option name="TEST_OBJECT" value="pattern" />
|
||||
|
@ -359,7 +343,6 @@
|
|||
<item index="6" class="java.lang.String" itemvalue="JUnit.Module analyzers-phonetic" />
|
||||
<item index="7" class="java.lang.String" itemvalue="JUnit.Module analyzers-smartcn" />
|
||||
<item index="8" class="java.lang.String" itemvalue="JUnit.Module analyzers-stempel" />
|
||||
<item index="9" class="java.lang.String" itemvalue="JUnit.Module analyzers-uima" />
|
||||
<item index="10" class="java.lang.String" itemvalue="JUnit.Module backward-codecs" />
|
||||
<item index="11" class="java.lang.String" itemvalue="JUnit.Module benchmark" />
|
||||
<item index="12" class="java.lang.String" itemvalue="JUnit.Module classification" />
|
||||
|
@ -391,7 +374,6 @@
|
|||
<item index="38" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
|
||||
<item index="39" class="java.lang.String" itemvalue="JUnit.Solr ltr contrib" />
|
||||
<item index="40" class="java.lang.String" itemvalue="JUnit.Solr prometheus-exporter contrib" />
|
||||
<item index="41" class="java.lang.String" itemvalue="JUnit.Solr uima contrib" />
|
||||
<item index="42" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
|
||||
</list>
|
||||
</component>
|
||||
|
|
|
@ -1,30 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/../../../idea-build/lucene/analysis/uima/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/../../../idea-build/lucene/analysis/uima/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="module-library">
|
||||
<library>
|
||||
<CLASSES>
|
||||
<root url="file://$MODULE_DIR$/lib" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
<jarDirectory url="file://$MODULE_DIR$/lib" recursive="false" />
|
||||
</library>
|
||||
</orderEntry>
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
<orderEntry type="module" module-name="lucene-core" />
|
||||
</component>
|
||||
</module>
|
|
@ -1,36 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-uima/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-uima/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="library" name="Solr core library" level="project" />
|
||||
<orderEntry type="library" name="Solrj library" level="project" />
|
||||
<orderEntry type="module-library">
|
||||
<library>
|
||||
<CLASSES>
|
||||
<root url="file://$MODULE_DIR$/lib" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
<jarDirectory url="file://$MODULE_DIR$/lib" recursive="false" />
|
||||
</library>
|
||||
</orderEntry>
|
||||
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
|
||||
<orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
|
||||
<orderEntry type="module" module-name="solr-core" />
|
||||
<orderEntry type="module" module-name="solrj" />
|
||||
<orderEntry type="module" module-name="lucene-core" />
|
||||
<orderEntry type="module" module-name="analysis-uima" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
</component>
|
||||
</module>
|
|
@ -40,7 +40,6 @@
|
|||
<module>phonetic</module>
|
||||
<module>smartcn</module>
|
||||
<module>stempel</module>
|
||||
<module>uima</module>
|
||||
</modules>
|
||||
<build>
|
||||
<plugins>
|
||||
|
|
|
@ -1,74 +0,0 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-parent</artifactId>
|
||||
<version>@version@</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analyzers-uima</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Lucene UIMA Analysis Components</name>
|
||||
<description>
|
||||
Lucene Integration with UIMA for extracting metadata from arbitrary (text)
|
||||
fields and enrich document with features extracted from UIMA types
|
||||
(language, sentences, concepts, named entities, etc.)
|
||||
</description>
|
||||
<properties>
|
||||
<module-directory>lucene/analysis/uima</module-directory>
|
||||
<relative-top-level>../../../..</relative-top-level>
|
||||
<module-path>${relative-top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>scm:git:${vc-anonymous-base-url}</connection>
|
||||
<developerConnection>scm:git:${vc-dev-base-url}</developerConnection>
|
||||
<url>${vc-browse-base-url};f=${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<!-- lucene-test-framework dependency must be declared before lucene-core -->
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
@lucene-analyzers-uima.internal.dependencies@
|
||||
@lucene-analyzers-uima.external.dependencies@
|
||||
@lucene-analyzers-uima.internal.test.dependencies@
|
||||
@lucene-analyzers-uima.external.test.dependencies@
|
||||
</dependencies>
|
||||
<build>
|
||||
<sourceDirectory>${module-path}/src/java</sourceDirectory>
|
||||
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>${module-path}/src/resources</directory>
|
||||
</resource>
|
||||
</resources>
|
||||
<testResources>
|
||||
<testResource>
|
||||
<directory>${module-path}/src/test-files</directory>
|
||||
</testResource>
|
||||
</testResources>
|
||||
</build>
|
||||
</project>
|
|
@ -40,7 +40,6 @@
|
|||
<module>langid</module>
|
||||
<module>ltr</module>
|
||||
<module>prometheus-exporter</module>
|
||||
<module>uima</module>
|
||||
<module>velocity</module>
|
||||
</modules>
|
||||
<build>
|
||||
|
|
|
@ -1,83 +0,0 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-parent</artifactId>
|
||||
<version>@version@</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-uima</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Apache Solr UIMA integration</name>
|
||||
<description>Apache Solr - UIMA integration</description>
|
||||
<properties>
|
||||
<module-directory>solr/contrib/uima</module-directory>
|
||||
<relative-top-level>../../../..</relative-top-level>
|
||||
<module-path>${relative-top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>scm:git:${vc-anonymous-base-url}</connection>
|
||||
<developerConnection>scm:git:${vc-dev-base-url}</developerConnection>
|
||||
<url>${vc-browse-base-url};f=${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<!-- lucene-test-framework dependency must be declared before lucene-core -->
|
||||
<!-- This dependency cannot be put into solr-parent, because local -->
|
||||
<!-- dependencies are always ordered before inherited dependencies. -->
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-test-framework</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
@solr-uima.internal.dependencies@
|
||||
@solr-uima.external.dependencies@
|
||||
@solr-uima.internal.test.dependencies@
|
||||
@solr-uima.external.test.dependencies@
|
||||
</dependencies>
|
||||
<build>
|
||||
<sourceDirectory>${module-path}/src/java</sourceDirectory>
|
||||
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>${module-path}/src/resources</directory>
|
||||
</resource>
|
||||
</resources>
|
||||
<testResources>
|
||||
<testResource>
|
||||
<directory>${module-path}/src/test-files</directory>
|
||||
</testResource>
|
||||
<testResource>
|
||||
<directory>${top-level}/dev-tools/maven/solr</directory>
|
||||
<includes>
|
||||
<include>maven.testlogging.properties</include>
|
||||
</includes>
|
||||
</testResource>
|
||||
</testResources>
|
||||
</build>
|
||||
</project>
|
|
@ -87,21 +87,6 @@ my @moves = (
|
|||
'solr/contrib/extraction/src/main/java'
|
||||
=> 'solr/contrib/extraction/src/java',
|
||||
|
||||
'solr/contrib/uima/src/test/java'
|
||||
=> 'solr/contrib/uima/src/test',
|
||||
|
||||
'solr/contrib/uima/src/test/resources/solr-uima'
|
||||
=> 'solr/contrib/uima/src/test-files/uima/solr',
|
||||
|
||||
'solr/contrib/uima/src/test/resources'
|
||||
=> 'solr/contrib/uima/src/test-files/uima',
|
||||
|
||||
'solr/contrib/uima/src/main/java'
|
||||
=> 'solr/contrib/uima/src/java',
|
||||
|
||||
'solr/contrib/uima/src/main/resources'
|
||||
=> 'solr/contrib/uima/src/resources',
|
||||
|
||||
'solr/src/test-files/books.csv'
|
||||
=> 'solr/solrj/src/test-files/solrj/books.csv',
|
||||
|
||||
|
|
|
@ -201,7 +201,6 @@ def get_solr_init_changes():
|
|||
Apache Tika %(org.apache.tika.version)s
|
||||
Carrot2 %(/org.carrot2/carrot2-mini)s
|
||||
Velocity %(/org.apache.velocity/velocity)s and Velocity Tools %(/org.apache.velocity/velocity-tools)s
|
||||
Apache UIMA %(org.apache.uima.version)s
|
||||
Apache ZooKeeper %(/org.apache.zookeeper/zookeeper)s
|
||||
Jetty %(org.eclipse.jetty.version)s
|
||||
|
||||
|
|
|
@ -47,10 +47,6 @@ lucene-analyzers-stempel-XX.jar
|
|||
An add-on analysis library that contains a universal algorithmic stemmer,
|
||||
including tables for the Polish language.
|
||||
|
||||
lucene-analyzers-uima-XX.jar
|
||||
An add-on analysis library that contains tokenizers/analyzers using
|
||||
Apache UIMA extracted annotations to identify tokens/types/etc.
|
||||
|
||||
common/src/java
|
||||
icu/src/java
|
||||
kuromoji/src/java
|
||||
|
@ -60,7 +56,6 @@ opennlp/src/java
|
|||
phonetic/src/java
|
||||
smartcn/src/java
|
||||
stempel/src/java
|
||||
uima/src/java
|
||||
The source code for the libraries.
|
||||
|
||||
common/src/test
|
||||
|
@ -72,5 +67,4 @@ opennlp/src/test
|
|||
phonetic/src/test
|
||||
smartcn/src/test
|
||||
stempel/src/test
|
||||
uima/src/test
|
||||
Unit tests for the libraries.
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
- nori: Korean Morphological Analyzer
|
||||
- smartcn: Smart Analyzer for Simplified Chinese Text
|
||||
- stempel: Algorithmic Stemmer for Polish
|
||||
- uima: UIMA Analysis module
|
||||
</description>
|
||||
|
||||
<dirname file="${ant.file.analyzers}" property="analyzers.dir"/>
|
||||
|
@ -86,12 +85,8 @@
|
|||
<ant dir="stempel" />
|
||||
</target>
|
||||
|
||||
<target name="uima">
|
||||
<ant dir="uima" />
|
||||
</target>
|
||||
|
||||
<target name="default" depends="compile"/>
|
||||
<target name="compile" depends="common,icu,kuromoji,morfologik,nori,opennlp,phonetic,smartcn,stempel,uima" />
|
||||
<target name="compile" depends="common,icu,kuromoji,morfologik,nori,opennlp,phonetic,smartcn,stempel" />
|
||||
|
||||
<target name="clean">
|
||||
<forall-analyzers target="clean"/>
|
||||
|
|
|
@ -1,50 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project name="analyzers-uima" default="default">
|
||||
|
||||
<description>
|
||||
Analysis integration with Apache UIMA
|
||||
</description>
|
||||
|
||||
<property name="tests.userdir" value="src/test-files"/>
|
||||
<!-- TODO: why is this limited to one JVM? -->
|
||||
<property name="tests.jvms.override" value="1" />
|
||||
<!-- TODO: go fix this in uima, its stupid -->
|
||||
<property name="tests.policy" location="../../tools/junit4/solr-tests.policy"/>
|
||||
|
||||
<path id="uimajars">
|
||||
<fileset dir="lib"/>
|
||||
</path>
|
||||
|
||||
<import file="../analysis-module-build.xml"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="uimajars"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<path id="test.classpath">
|
||||
<path refid="test.base.classpath"/>
|
||||
<pathelement path="${tests.userdir}"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
|
||||
</project>
|
|
@ -1,30 +0,0 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<ivy-module version="2.0">
|
||||
<info organisation="org.apache.lucene" module="analyzers-uima"/>
|
||||
<configurations defaultconfmapping="compile->master">
|
||||
<conf name="compile" transitive="false"/>
|
||||
</configurations>
|
||||
<dependencies>
|
||||
<dependency org="org.apache.uima" name="Tagger" rev="${/org.apache.uima/Tagger}" conf="compile"/>
|
||||
<dependency org="org.apache.uima" name="WhitespaceTokenizer" rev="${/org.apache.uima/WhitespaceTokenizer}" conf="compile"/>
|
||||
<dependency org="org.apache.uima" name="uimaj-core" rev="${/org.apache.uima/uimaj-core}" conf="compile"/>
|
||||
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
|
||||
</dependencies>
|
||||
</ivy-module>
|
|
@ -1,96 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.cas.CAS;
|
||||
import org.apache.uima.cas.FSIterator;
|
||||
import org.apache.uima.cas.text.AnnotationFS;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Abstract base implementation of a {@link Tokenizer} which is able to analyze the given input with a
|
||||
* UIMA {@link AnalysisEngine}
|
||||
*/
|
||||
public abstract class BaseUIMATokenizer extends Tokenizer {
|
||||
|
||||
protected FSIterator<AnnotationFS> iterator;
|
||||
|
||||
private final String descriptorPath;
|
||||
private final Map<String, Object> configurationParameters;
|
||||
|
||||
protected AnalysisEngine ae;
|
||||
protected CAS cas;
|
||||
|
||||
protected BaseUIMATokenizer
|
||||
(AttributeFactory factory, String descriptorPath, Map<String, Object> configurationParameters) {
|
||||
super(factory);
|
||||
this.descriptorPath = descriptorPath;
|
||||
this.configurationParameters = configurationParameters;
|
||||
}
|
||||
|
||||
/**
|
||||
* analyzes the tokenizer input using the given analysis engine
|
||||
* <p>
|
||||
* {@link #cas} will be filled with extracted metadata (UIMA annotations, feature structures)
|
||||
*
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
*/
|
||||
protected void analyzeInput() throws ResourceInitializationException, AnalysisEngineProcessException, IOException {
|
||||
if (ae == null) {
|
||||
ae = AEProviderFactory.getInstance().getAEProvider(null, descriptorPath, configurationParameters).getAE();
|
||||
}
|
||||
if (cas == null) {
|
||||
cas = ae.newCAS();
|
||||
} else {
|
||||
cas.reset();
|
||||
}
|
||||
cas.setDocumentText(toString(input));
|
||||
ae.process(cas);
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize the FSIterator which is used to build tokens at each incrementToken() method call
|
||||
*
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
*/
|
||||
protected abstract void initializeIterator() throws IOException;
|
||||
|
||||
private String toString(Reader reader) throws IOException {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
int ch;
|
||||
while ((ch = reader.read()) > -1) {
|
||||
stringBuilder.append((char) ch);
|
||||
}
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
iterator = null;
|
||||
}
|
||||
}
|
|
@ -1,90 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.cas.Type;
|
||||
import org.apache.uima.cas.text.AnnotationFS;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* a {@link Tokenizer} which creates tokens from UIMA Annotations
|
||||
*/
|
||||
public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
|
||||
|
||||
private final CharTermAttribute termAttr;
|
||||
|
||||
private final OffsetAttribute offsetAttr;
|
||||
|
||||
private final String tokenTypeString;
|
||||
|
||||
private int finalOffset = 0;
|
||||
|
||||
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
|
||||
this(descriptorPath, tokenType, configurationParameters, DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
|
||||
}
|
||||
|
||||
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters,
|
||||
AttributeFactory factory) {
|
||||
super(factory, descriptorPath, configurationParameters);
|
||||
this.tokenTypeString = tokenType;
|
||||
this.termAttr = addAttribute(CharTermAttribute.class);
|
||||
this.offsetAttr = addAttribute(OffsetAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void initializeIterator() throws IOException {
|
||||
try {
|
||||
analyzeInput();
|
||||
} catch (AnalysisEngineProcessException | ResourceInitializationException e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
finalOffset = correctOffset(cas.getDocumentText().length());
|
||||
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
|
||||
iterator = cas.getAnnotationIndex(tokenType).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (iterator == null) {
|
||||
initializeIterator();
|
||||
}
|
||||
if (iterator.hasNext()) {
|
||||
clearAttributes();
|
||||
AnnotationFS next = iterator.next();
|
||||
termAttr.append(next.getCoveredText());
|
||||
offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() throws IOException {
|
||||
super.end();
|
||||
offsetAttr.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* {@link org.apache.lucene.analysis.util.TokenizerFactory} for {@link UIMAAnnotationsTokenizer}
|
||||
*/
|
||||
public class UIMAAnnotationsTokenizerFactory extends TokenizerFactory {
|
||||
|
||||
private String descriptorPath;
|
||||
private String tokenType;
|
||||
private final Map<String,Object> configurationParameters = new HashMap<>();
|
||||
|
||||
/** Creates a new UIMAAnnotationsTokenizerFactory */
|
||||
public UIMAAnnotationsTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
tokenType = require(args, "tokenType");
|
||||
descriptorPath = require(args, "descriptorPath");
|
||||
configurationParameters.putAll(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public UIMAAnnotationsTokenizer create(AttributeFactory factory) {
|
||||
return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, factory);
|
||||
}
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* An {@link Analyzer} which use the {@link UIMAAnnotationsTokenizer} for creating tokens
|
||||
*/
|
||||
public final class UIMABaseAnalyzer extends Analyzer {
|
||||
|
||||
private final String descriptorPath;
|
||||
private final String tokenType;
|
||||
private final Map<String, Object> configurationParameters;
|
||||
|
||||
public UIMABaseAnalyzer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
|
||||
this.descriptorPath = descriptorPath;
|
||||
this.tokenType = tokenType;
|
||||
this.configurationParameters = configurationParameters;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} which uses the {@link UIMATypeAwareAnnotationsTokenizer} for the tokenization phase
|
||||
*/
|
||||
public final class UIMATypeAwareAnalyzer extends Analyzer {
|
||||
private final String descriptorPath;
|
||||
private final String tokenType;
|
||||
private final String featurePath;
|
||||
private final Map<String, Object> configurationParameters;
|
||||
|
||||
public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath, Map<String, Object> configurationParameters) {
|
||||
this.descriptorPath = descriptorPath;
|
||||
this.tokenType = tokenType;
|
||||
this.featurePath = featurePath;
|
||||
this.configurationParameters = configurationParameters;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters));
|
||||
}
|
||||
}
|
|
@ -1,113 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.cas.CASException;
|
||||
import org.apache.uima.cas.FeaturePath;
|
||||
import org.apache.uima.cas.Type;
|
||||
import org.apache.uima.cas.text.AnnotationFS;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A {@link Tokenizer} which creates token from UIMA Annotations filling also their {@link TypeAttribute} according to
|
||||
* {@link org.apache.uima.cas.FeaturePath}s specified
|
||||
*/
|
||||
public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
|
||||
|
||||
private final TypeAttribute typeAttr;
|
||||
|
||||
private final CharTermAttribute termAttr;
|
||||
|
||||
private final OffsetAttribute offsetAttr;
|
||||
|
||||
private final String tokenTypeString;
|
||||
|
||||
private final String typeAttributeFeaturePath;
|
||||
|
||||
private FeaturePath featurePath;
|
||||
|
||||
private int finalOffset = 0;
|
||||
|
||||
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters) {
|
||||
this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
|
||||
}
|
||||
|
||||
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath,
|
||||
Map<String, Object> configurationParameters, AttributeFactory factory) {
|
||||
super(factory, descriptorPath, configurationParameters);
|
||||
this.tokenTypeString = tokenType;
|
||||
this.termAttr = addAttribute(CharTermAttribute.class);
|
||||
this.typeAttr = addAttribute(TypeAttribute.class);
|
||||
this.offsetAttr = addAttribute(OffsetAttribute.class);
|
||||
this.typeAttributeFeaturePath = typeAttributeFeaturePath;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void initializeIterator() throws IOException {
|
||||
try {
|
||||
analyzeInput();
|
||||
} catch (AnalysisEngineProcessException | ResourceInitializationException e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
featurePath = cas.createFeaturePath();
|
||||
try {
|
||||
featurePath.initialize(typeAttributeFeaturePath);
|
||||
} catch (CASException e) {
|
||||
featurePath = null;
|
||||
throw new IOException(e);
|
||||
}
|
||||
finalOffset = correctOffset(cas.getDocumentText().length());
|
||||
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
|
||||
iterator = cas.getAnnotationIndex(tokenType).iterator();
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (iterator == null) {
|
||||
initializeIterator();
|
||||
}
|
||||
if (iterator.hasNext()) {
|
||||
clearAttributes();
|
||||
AnnotationFS next = iterator.next();
|
||||
termAttr.append(next.getCoveredText());
|
||||
offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
|
||||
typeAttr.setType(featurePath.getValueAsString(next));
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() throws IOException {
|
||||
super.end();
|
||||
offsetAttr.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* {@link org.apache.lucene.analysis.util.TokenizerFactory} for {@link UIMATypeAwareAnnotationsTokenizer}
|
||||
*/
|
||||
public class UIMATypeAwareAnnotationsTokenizerFactory extends TokenizerFactory {
|
||||
|
||||
private String descriptorPath;
|
||||
private String tokenType;
|
||||
private String featurePath;
|
||||
private final Map<String,Object> configurationParameters = new HashMap<>();
|
||||
|
||||
/** Creates a new UIMATypeAwareAnnotationsTokenizerFactory */
|
||||
public UIMATypeAwareAnnotationsTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
featurePath = require(args, "featurePath");
|
||||
tokenType = require(args, "tokenType");
|
||||
descriptorPath = require(args, "descriptorPath");
|
||||
configurationParameters.putAll(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public UIMATypeAwareAnnotationsTokenizer create(AttributeFactory factory) {
|
||||
return new UIMATypeAwareAnnotationsTokenizer
|
||||
(descriptorPath, tokenType, featurePath, configurationParameters, factory);
|
||||
}
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
|
||||
/**
|
||||
* provide an Apache UIMA {@link AnalysisEngine}
|
||||
*
|
||||
*/
|
||||
public interface AEProvider {
|
||||
|
||||
/**
|
||||
* Returns the AnalysisEngine
|
||||
*/
|
||||
public AnalysisEngine getAE() throws ResourceInitializationException;
|
||||
|
||||
}
|
|
@ -1,76 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Singleton factory class responsible of {@link AEProvider}s' creation
|
||||
*/
|
||||
public class AEProviderFactory {
|
||||
|
||||
private static final AEProviderFactory instance = new AEProviderFactory();
|
||||
|
||||
private final Map<String, AEProvider> providerCache = new HashMap<>();
|
||||
|
||||
private AEProviderFactory() {
|
||||
// Singleton
|
||||
}
|
||||
|
||||
public static AEProviderFactory getInstance() {
|
||||
return instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param keyPrefix a prefix of the key used to cache the AEProvider
|
||||
* @param aePath the AnalysisEngine descriptor path
|
||||
* @param runtimeParameters map of runtime parameters to configure inside the AnalysisEngine
|
||||
* @return AEProvider
|
||||
*/
|
||||
public synchronized AEProvider getAEProvider(String keyPrefix, String aePath, Map<String, Object> runtimeParameters) {
|
||||
String key = new StringBuilder(keyPrefix != null ? keyPrefix : "").append(aePath).append(runtimeParameters != null ?
|
||||
runtimeParameters.toString() : "").toString();
|
||||
if (providerCache.get(key) == null) {
|
||||
AEProvider aeProvider;
|
||||
if (runtimeParameters != null)
|
||||
aeProvider = new OverridingParamsAEProvider(aePath, runtimeParameters);
|
||||
else
|
||||
aeProvider = new BasicAEProvider(aePath);
|
||||
providerCache.put(key, aeProvider);
|
||||
}
|
||||
return providerCache.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param aePath the AnalysisEngine descriptor path
|
||||
* @return AEProvider
|
||||
*/
|
||||
public synchronized AEProvider getAEProvider(String aePath) {
|
||||
return getAEProvider(null, aePath, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param aePath the AnalysisEngine descriptor path
|
||||
* @param runtimeParameters map of runtime parameters to configure inside the AnalysisEngine
|
||||
* @return AEProvider
|
||||
*/
|
||||
public synchronized AEProvider getAEProvider(String aePath, Map<String, Object> runtimeParameters) {
|
||||
return getAEProvider(null, aePath, runtimeParameters);
|
||||
}
|
||||
}
|
|
@ -1,87 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.uima.UIMAFramework;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
import org.apache.uima.util.XMLInputSource;
|
||||
|
||||
/**
|
||||
* Basic {@link AEProvider} which just instantiates a UIMA {@link AnalysisEngine} with no additional metadata,
|
||||
* parameters or resources
|
||||
*/
|
||||
public class BasicAEProvider implements AEProvider {
|
||||
|
||||
private final String aePath;
|
||||
private AnalysisEngineDescription cachedDescription;
|
||||
|
||||
public BasicAEProvider(String aePath) {
|
||||
this.aePath = aePath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AnalysisEngine getAE() throws ResourceInitializationException {
|
||||
synchronized(this) {
|
||||
if (cachedDescription == null) {
|
||||
XMLInputSource in = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
// get Resource Specifier from XML file
|
||||
in = getInputSource();
|
||||
|
||||
// get AE description
|
||||
cachedDescription = UIMAFramework.getXMLParser()
|
||||
.parseAnalysisEngineDescription(in);
|
||||
configureDescription(cachedDescription);
|
||||
success = true;
|
||||
} catch (Exception e) {
|
||||
throw new ResourceInitializationException(e);
|
||||
} finally {
|
||||
if (success) {
|
||||
try {
|
||||
IOUtils.close(in.getInputStream());
|
||||
} catch (IOException e) {
|
||||
throw new ResourceInitializationException(e);
|
||||
}
|
||||
} else if (in != null) {
|
||||
IOUtils.closeWhileHandlingException(in.getInputStream());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return UIMAFramework.produceAnalysisEngine(cachedDescription);
|
||||
}
|
||||
|
||||
protected void configureDescription(AnalysisEngineDescription description) {
|
||||
// no configuration
|
||||
}
|
||||
|
||||
private XMLInputSource getInputSource() throws IOException {
|
||||
try {
|
||||
return new XMLInputSource(aePath);
|
||||
} catch (Exception e) {
|
||||
return new XMLInputSource(getClass().getResource(aePath));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,69 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* {@link AEProvider} implementation that creates an Aggregate AE from the given path, also
|
||||
* injecting runtime parameters defined in the solrconfig.xml Solr configuration file and assigning
|
||||
* them as overriding parameters in the aggregate AE
|
||||
*/
|
||||
public class OverridingParamsAEProvider extends BasicAEProvider {
|
||||
|
||||
private final Map<String, Object> runtimeParameters;
|
||||
|
||||
public OverridingParamsAEProvider(String aePath, Map<String, Object> runtimeParameters) {
|
||||
super(aePath);
|
||||
this.runtimeParameters = runtimeParameters;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void configureDescription(AnalysisEngineDescription description) {
|
||||
for (String attributeName : runtimeParameters.keySet()) {
|
||||
Object val = getRuntimeValue(description, attributeName);
|
||||
description.getAnalysisEngineMetaData().getConfigurationParameterSettings().setParameterValue(
|
||||
attributeName, val);
|
||||
}
|
||||
}
|
||||
|
||||
/* create the value to inject in the runtime parameter depending on its declared type */
|
||||
private Object getRuntimeValue(AnalysisEngineDescription desc, String attributeName) {
|
||||
String type = desc.getAnalysisEngineMetaData().getConfigurationParameterDeclarations().
|
||||
getConfigurationParameter(null, attributeName).getType();
|
||||
// TODO : do it via reflection ? i.e. Class paramType = Class.forName(type)...
|
||||
Object val = null;
|
||||
Object runtimeValue = runtimeParameters.get(attributeName);
|
||||
if (runtimeValue != null) {
|
||||
if ("String".equals(type)) {
|
||||
val = String.valueOf(runtimeValue);
|
||||
} else if ("Integer".equals(type)) {
|
||||
val = Integer.valueOf(runtimeValue.toString());
|
||||
} else if ("Boolean".equals(type)) {
|
||||
val = Boolean.valueOf(runtimeValue.toString());
|
||||
} else if ("Float".equals(type)) {
|
||||
val = Float.valueOf(runtimeValue.toString());
|
||||
}
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Integration with UIMA's AnalysisEngine.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima.ae;
|
|
@ -1,21 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Classes that integrate UIMA with Lucene's analysis API.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima;
|
|
@ -1,29 +0,0 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<head>
|
||||
<title>
|
||||
analyzers-uima
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
Analysis integration with <a href="http://uima.apache.org/">Apache UIMA</a>.
|
||||
<p>
|
||||
For an introduction to Lucene's analysis API, see the {@link org.apache.lucene.analysis} package documentation.
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
|
@ -1,17 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
org.apache.lucene.analysis.uima.UIMAAnnotationsTokenizerFactory
|
||||
org.apache.lucene.analysis.uima.UIMATypeAwareAnnotationsTokenizerFactory
|
|
@ -1,70 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>false</primitive>
|
||||
<delegateAnalysisEngineSpecifiers>
|
||||
<delegateAnalysisEngine key="WhitespaceTokenizer">
|
||||
<import name="WhitespaceTokenizer"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="HmmTagger">
|
||||
<import name="HmmTagger"/>
|
||||
</delegateAnalysisEngine>
|
||||
</delegateAnalysisEngineSpecifiers>
|
||||
<analysisEngineMetaData>
|
||||
<name>AggregateSentenceAE</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>ngramsize</name>
|
||||
<type>Integer</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
<overrides>
|
||||
<parameter>HmmTagger/NGRAM_SIZE</parameter>
|
||||
</overrides>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings/>
|
||||
<flowConstraints>
|
||||
<fixedFlow>
|
||||
<node>WhitespaceTokenizer</node>
|
||||
<node>HmmTagger</node>
|
||||
</fixedFlow>
|
||||
</flowConstraints>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.uima.SentenceAnnotation</type>
|
||||
<type allAnnotatorFeatures="true">org.apache.uima.TokenAnnotation</type>
|
||||
</outputs>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,55 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>false</primitive>
|
||||
<delegateAnalysisEngineSpecifiers>
|
||||
<delegateAnalysisEngine key="WhitespaceTokenizer">
|
||||
<import location="TestWSTokenizerAE.xml"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="PoSTagger">
|
||||
<import location="TestPoSTaggerAE.xml"/>
|
||||
</delegateAnalysisEngine>
|
||||
</delegateAnalysisEngineSpecifiers>
|
||||
<analysisEngineMetaData>
|
||||
<name>TestAggregateSentenceAE</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor>ASF</vendor>
|
||||
<flowConstraints>
|
||||
<fixedFlow>
|
||||
<node>WhitespaceTokenizer</node>
|
||||
<node>PoSTagger</node>
|
||||
</fixedFlow>
|
||||
</flowConstraints>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.SentenceAnnotation</type>
|
||||
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
|
||||
</outputs>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
</analysisEngineDescription>
|
|
@ -1,66 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleEntityAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>EntityAnnotator</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor>ASF</vendor>
|
||||
<typeSystemDescription>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.lucene.uima.ts.EntityAnnotation</name>
|
||||
<description/>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>name</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>entity</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
|
||||
</inputs>
|
||||
<outputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.EntityAnnotation</type>
|
||||
</outputs>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,44 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.SamplePoSTagger</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>DummyPoSTagger</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor>ASF</vendor>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
|
||||
</inputs>
|
||||
<outputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
|
||||
</outputs>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
</analysisEngineDescription>
|
|
@ -1,78 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleWSTokenizerAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>WSTokenizer</name>
|
||||
<version>1.0</version>
|
||||
<vendor>ASF</vendor>
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>line-end</name>
|
||||
<description>
|
||||
the string used as line end
|
||||
</description>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings>
|
||||
<nameValuePair>
|
||||
<name>line-end</name>
|
||||
<value>
|
||||
<string>\n</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
</configurationParameterSettings>
|
||||
<typeSystemDescription>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.lucene.uima.ts.TokenAnnotation</name>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>pos</name>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.lucene.uima.ts.SentenceAnnotation</name>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
|
||||
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.SentenceAnnotation</type>
|
||||
</outputs>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
</analysisEngineDescription>
|
|
@ -1,137 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Testcase for {@link UIMABaseAnalyzer}
|
||||
*/
|
||||
public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase {
|
||||
|
||||
private UIMABaseAnalyzer analyzer;
|
||||
|
||||
@Override
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation", null);
|
||||
}
|
||||
|
||||
@Override
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void baseUIMAAnalyzerStreamTest() throws Exception {
|
||||
TokenStream ts = analyzer.tokenStream("text", "the big brown fox jumped on the wood");
|
||||
assertTokenStreamContents(ts, new String[]{"the", "big", "brown", "fox", "jumped", "on", "the", "wood"});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void baseUIMAAnalyzerIntegrationTest() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer));
|
||||
// add the first doc
|
||||
Document doc = new Document();
|
||||
String dummyTitle = "this is a dummy title ";
|
||||
doc.add(new TextField("title", dummyTitle, Field.Store.YES));
|
||||
String dummyContent = "there is some content written here";
|
||||
doc.add(new TextField("contents", dummyContent, Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
||||
// try the search over the first doc
|
||||
DirectoryReader directoryReader = DirectoryReader.open(dir);
|
||||
IndexSearcher indexSearcher = newSearcher(directoryReader);
|
||||
TopDocs result = indexSearcher.search(new MatchAllDocsQuery(), 1);
|
||||
assertTrue(result.totalHits > 0);
|
||||
Document d = indexSearcher.doc(result.scoreDocs[0].doc);
|
||||
assertNotNull(d);
|
||||
assertNotNull(d.getField("title"));
|
||||
assertEquals(dummyTitle, d.getField("title").stringValue());
|
||||
assertNotNull(d.getField("contents"));
|
||||
assertEquals(dummyContent, d.getField("contents").stringValue());
|
||||
|
||||
// add a second doc
|
||||
doc = new Document();
|
||||
String dogmasTitle = "dogmas";
|
||||
doc.add(new TextField("title", dogmasTitle, Field.Store.YES));
|
||||
String dogmasContents = "white men can't jump";
|
||||
doc.add(new TextField("contents", dogmasContents, Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
||||
directoryReader.close();
|
||||
directoryReader = DirectoryReader.open(dir);
|
||||
indexSearcher = newSearcher(directoryReader);
|
||||
result = indexSearcher.search(new MatchAllDocsQuery(), 2);
|
||||
Document d1 = indexSearcher.doc(result.scoreDocs[1].doc);
|
||||
assertNotNull(d1);
|
||||
assertNotNull(d1.getField("title"));
|
||||
assertEquals(dogmasTitle, d1.getField("title").stringValue());
|
||||
assertNotNull(d1.getField("contents"));
|
||||
assertEquals(dogmasContents, d1.getField("contents").stringValue());
|
||||
|
||||
// do a matchalldocs query to retrieve both docs
|
||||
result = indexSearcher.search(new MatchAllDocsQuery(), 2);
|
||||
assertEquals(2, result.totalHits);
|
||||
writer.close();
|
||||
indexSearcher.getIndexReader().close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
@Test @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-3869")
|
||||
public void testRandomStrings() throws Exception {
|
||||
Analyzer analyzer = new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation", null);
|
||||
checkRandomData(random(), analyzer, 100 * RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
@Test @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-3869")
|
||||
public void testRandomStringsWithConfigurationParameters() throws Exception {
|
||||
Map<String, Object> cp = new HashMap<>();
|
||||
cp.put("line-end", "\r");
|
||||
Analyzer analyzer = new UIMABaseAnalyzer("/uima/TestWSTokenizerAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation", cp);
|
||||
checkRandomData(random(), analyzer, 100 * RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,70 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Testcase for {@link UIMATypeAwareAnalyzer}
|
||||
*/
|
||||
public class UIMATypeAwareAnalyzerTest extends BaseTokenStreamTestCase {
|
||||
|
||||
private UIMATypeAwareAnalyzer analyzer;
|
||||
|
||||
@Override
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new UIMATypeAwareAnalyzer("/uima/AggregateSentenceAE.xml",
|
||||
"org.apache.uima.TokenAnnotation", "posTag", null);
|
||||
}
|
||||
|
||||
@Override
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void baseUIMATypeAwareAnalyzerStreamTest() throws Exception {
|
||||
|
||||
// create a token stream
|
||||
TokenStream ts = analyzer.tokenStream("text", "the big brown fox jumped on the wood");
|
||||
|
||||
// check that 'the big brown fox jumped on the wood' tokens have the expected PoS types
|
||||
assertTokenStreamContents(ts,
|
||||
new String[]{"the", "big", "brown", "fox", "jumped", "on", "the", "wood"},
|
||||
new String[]{"at", "jj", "jj", "nn", "vbd", "in", "at", "nn"});
|
||||
|
||||
}
|
||||
|
||||
@Test @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-3869")
|
||||
public void testRandomStrings() throws Exception {
|
||||
Analyzer analyzer = new UIMATypeAwareAnalyzer("/uima/TestAggregateSentenceAE.xml",
|
||||
"org.apache.lucene.uima.ts.TokenAnnotation", "pos", null);
|
||||
checkRandomData(random(), analyzer, 100 * RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
* Testcase for {@link AEProviderFactory}
|
||||
*/
|
||||
public class AEProviderFactoryTest {
|
||||
|
||||
@Test
|
||||
public void testCorrectCaching() throws Exception {
|
||||
AEProvider aeProvider = AEProviderFactory.getInstance().getAEProvider("/uima/TestAggregateSentenceAE.xml");
|
||||
assertTrue(aeProvider == AEProviderFactory.getInstance().getAEProvider("/uima/TestAggregateSentenceAE.xml"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCorrectCachingWithParameters() throws Exception {
|
||||
AEProvider aeProvider = AEProviderFactory.getInstance().getAEProvider("prefix", "/uima/TestAggregateSentenceAE.xml",
|
||||
new HashMap<String, Object>());
|
||||
assertTrue(aeProvider == AEProviderFactory.getInstance().getAEProvider("prefix", "/uima/TestAggregateSentenceAE.xml",
|
||||
new HashMap<String, Object>()));
|
||||
}
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
|
||||
/**
|
||||
* TestCase for {@link BasicAEProvider}
|
||||
*/
|
||||
public class BasicAEProviderTest {
|
||||
|
||||
@Test
|
||||
public void testBasicInitialization() throws Exception {
|
||||
AEProvider basicAEProvider = new BasicAEProvider("/uima/TestEntityAnnotatorAE.xml");
|
||||
AnalysisEngine analysisEngine = basicAEProvider.getAE();
|
||||
assertNotNull(analysisEngine);
|
||||
}
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* TestCase for {@link OverridingParamsAEProvider}
|
||||
*/
|
||||
public class OverridingParamsAEProviderTest extends LuceneTestCase {
|
||||
|
||||
@Test
|
||||
public void testNullMapInitialization() throws Exception {
|
||||
expectThrows(ResourceInitializationException.class, () -> {
|
||||
AEProvider aeProvider = new OverridingParamsAEProvider("/uima/TestEntityAnnotatorAE.xml", null);
|
||||
aeProvider.getAE();
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyMapInitialization() throws Exception {
|
||||
AEProvider aeProvider = new OverridingParamsAEProvider("/uima/TestEntityAnnotatorAE.xml", new HashMap<String, Object>());
|
||||
AnalysisEngine analysisEngine = aeProvider.getAE();
|
||||
assertNotNull(analysisEngine);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOverridingParamsInitialization() throws Exception {
|
||||
Map<String, Object> runtimeParameters = new HashMap<>();
|
||||
runtimeParameters.put("ngramsize", "3");
|
||||
AEProvider aeProvider = new OverridingParamsAEProvider("/uima/AggregateSentenceAE.xml", runtimeParameters);
|
||||
AnalysisEngine analysisEngine = aeProvider.getAE();
|
||||
assertNotNull(analysisEngine);
|
||||
Object parameterValue = analysisEngine.getConfigParameterValue("ngramsize");
|
||||
assertNotNull(parameterValue);
|
||||
assertEquals(Integer.valueOf(3), Integer.valueOf(parameterValue.toString()));
|
||||
}
|
||||
}
|
|
@ -1,64 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima.an;
|
||||
|
||||
|
||||
import org.apache.uima.TokenAnnotation;
|
||||
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.cas.Feature;
|
||||
import org.apache.uima.cas.Type;
|
||||
import org.apache.uima.cas.text.AnnotationFS;
|
||||
import org.apache.uima.jcas.JCas;
|
||||
import org.apache.uima.jcas.tcas.Annotation;
|
||||
|
||||
/**
|
||||
* Dummy implementation of an entity annotator to tag tokens as certain types of entities
|
||||
*/
|
||||
public class SampleEntityAnnotator extends JCasAnnotator_ImplBase {
|
||||
|
||||
private static final String NP = "np";
|
||||
private static final String NPS = "nps";
|
||||
private static final String TYPE_NAME = "org.apache.lucene.analysis.uima.ts.EntityAnnotation";
|
||||
private static final String ENTITY_FEATURE = "entity";
|
||||
private static final String NAME_FEATURE = "entity";
|
||||
|
||||
@Override
|
||||
public void process(JCas jcas) throws AnalysisEngineProcessException {
|
||||
Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
|
||||
Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE);
|
||||
Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE);
|
||||
|
||||
for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
|
||||
String tokenPOS = ((TokenAnnotation) annotation).getPosTag();
|
||||
|
||||
if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) {
|
||||
AnnotationFS entityAnnotation = jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd());
|
||||
|
||||
entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText());
|
||||
|
||||
String name = "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
|
||||
if (annotation.getCoveredText().equals("Apache"))
|
||||
name = "ORGANIZATION";
|
||||
entityAnnotation.setStringValue(nameFeature, name);
|
||||
|
||||
jcas.addFsToIndexes(entityAnnotation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,57 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima.an;
|
||||
|
||||
|
||||
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.cas.Feature;
|
||||
import org.apache.uima.cas.Type;
|
||||
import org.apache.uima.jcas.JCas;
|
||||
import org.apache.uima.jcas.tcas.Annotation;
|
||||
|
||||
/**
|
||||
* Dummy implementation of a PoS tagger to add part of speech as token types
|
||||
*/
|
||||
public class SamplePoSTagger extends JCasAnnotator_ImplBase {
|
||||
|
||||
private static final String NUM = "NUM";
|
||||
private static final String WORD = "WORD";
|
||||
private static final String TYPE_NAME = "org.apache.lucene.uima.ts.TokenAnnotation";
|
||||
private static final String FEATURE_NAME = "pos";
|
||||
|
||||
@Override
|
||||
public void process(JCas jcas) throws AnalysisEngineProcessException {
|
||||
Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
|
||||
Feature posFeature = type.getFeatureByBaseName(FEATURE_NAME);
|
||||
|
||||
for (Annotation annotation : jcas.getAnnotationIndex(type)) {
|
||||
String text = annotation.getCoveredText();
|
||||
String pos = extractPoS(text);
|
||||
annotation.setStringValue(posFeature, pos);
|
||||
}
|
||||
}
|
||||
|
||||
private String extractPoS(String text) {
|
||||
try {
|
||||
Double.valueOf(text);
|
||||
return NUM;
|
||||
} catch (Exception e) {
|
||||
return WORD;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,66 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.uima.an;
|
||||
|
||||
|
||||
import org.apache.uima.UimaContext;
|
||||
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.cas.Type;
|
||||
import org.apache.uima.cas.text.AnnotationFS;
|
||||
import org.apache.uima.jcas.JCas;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
|
||||
/**
|
||||
* Dummy implementation of a UIMA based whitespace tokenizer
|
||||
*/
|
||||
public class SampleWSTokenizerAnnotator extends JCasAnnotator_ImplBase {
|
||||
|
||||
private final static String TOKEN_TYPE = "org.apache.lucene.uima.ts.TokenAnnotation";
|
||||
private final static String SENTENCE_TYPE = "org.apache.lucene.uima.ts.SentenceAnnotation";
|
||||
private String lineEnd;
|
||||
private static final String WHITESPACE = " ";
|
||||
|
||||
@Override
|
||||
public void initialize(UimaContext aContext) throws ResourceInitializationException {
|
||||
super.initialize(aContext);
|
||||
lineEnd = String.valueOf(aContext.getConfigParameterValue("line-end"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void process(JCas jCas) throws AnalysisEngineProcessException {
|
||||
Type sentenceType = jCas.getCas().getTypeSystem().getType(SENTENCE_TYPE);
|
||||
Type tokenType = jCas.getCas().getTypeSystem().getType(TOKEN_TYPE);
|
||||
int i = 0;
|
||||
for (String sentenceString : jCas.getDocumentText().split(lineEnd)) {
|
||||
// add the sentence
|
||||
AnnotationFS sentenceAnnotation = jCas.getCas().createAnnotation(sentenceType, i, sentenceString.length());
|
||||
jCas.addFsToIndexes(sentenceAnnotation);
|
||||
i += sentenceString.length();
|
||||
}
|
||||
|
||||
// get tokens
|
||||
int j = 0;
|
||||
for (String tokenString : jCas.getDocumentText().split(WHITESPACE)) {
|
||||
int tokenLength = tokenString.length();
|
||||
AnnotationFS tokenAnnotation = jCas.getCas().createAnnotation(tokenType, j, j + tokenLength);
|
||||
jCas.addFsToIndexes(tokenAnnotation);
|
||||
j += tokenLength;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -167,7 +167,6 @@
|
|||
<check-missing-javadocs dir="build/docs/analyzers-phonetic" level="method"/>
|
||||
<!-- analyzers-smartcn: problems -->
|
||||
<check-missing-javadocs dir="build/docs/analyzers-stempel" level="method"/>
|
||||
<!-- analyzers-uima: problems -->
|
||||
<!-- benchmark: problems -->
|
||||
<check-missing-javadocs dir="build/docs/classification" level="method"/>
|
||||
<!-- codecs: problems -->
|
||||
|
|
|
@ -1993,8 +1993,6 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
|
|||
<pattern substring="Produced by GNUPLOT"/>
|
||||
<!-- snowball stemmers generated by snowball compiler -->
|
||||
<pattern substring="This file was generated automatically by the Snowball to Java compiler"/>
|
||||
<!-- uima tests generated by JCasGen -->
|
||||
<pattern substring="First created by JCasGen"/>
|
||||
<!-- parsers generated by antlr -->
|
||||
<pattern substring="ANTLR GENERATED CODE"/>
|
||||
</rat:substringMatcher>
|
||||
|
|
|
@ -77,8 +77,6 @@ import org.apache.lucene.util.Version;
|
|||
* Analyzer for Simplified Chinese, which indexes words.
|
||||
* <li><a href="{@docRoot}/../analyzers-stempel/overview-summary.html">Stempel</a>:
|
||||
* Algorithmic Stemmer for the Polish Language.
|
||||
* <li><a href="{@docRoot}/../analyzers-uima/overview-summary.html">UIMA</a>:
|
||||
* Analysis integration with Apache UIMA.
|
||||
* </ul>
|
||||
*/
|
||||
public abstract class Analyzer implements Closeable {
|
||||
|
|
|
@ -51,7 +51,6 @@ com.sun.jersey.version = 1.9
|
|||
/commons-codec/commons-codec = 1.10
|
||||
/commons-collections/commons-collections = 3.2.2
|
||||
/commons-configuration/commons-configuration = 1.6
|
||||
/commons-digester/commons-digester = 2.1
|
||||
/commons-fileupload/commons-fileupload = 1.3.3
|
||||
/commons-io/commons-io = 2.5
|
||||
/commons-lang/commons-lang = 2.6
|
||||
|
@ -205,13 +204,6 @@ org.apache.tika.version = 1.17
|
|||
/org.apache.tika/tika-parsers = ${org.apache.tika.version}
|
||||
/org.apache.tika/tika-xmp = ${org.apache.tika.version}
|
||||
|
||||
org.apache.uima.version = 2.3.1
|
||||
/org.apache.uima/AlchemyAPIAnnotator = ${org.apache.uima.version}
|
||||
/org.apache.uima/OpenCalaisAnnotator = ${org.apache.uima.version}
|
||||
/org.apache.uima/Tagger = ${org.apache.uima.version}
|
||||
/org.apache.uima/WhitespaceTokenizer = ${org.apache.uima.version}
|
||||
/org.apache.uima/uimaj-core = ${org.apache.uima.version}
|
||||
|
||||
/org.apache.velocity/velocity = 1.7
|
||||
/org.apache.velocity/velocity-tools = 2.0
|
||||
/org.apache.xmlbeans/xmlbeans = 2.6.0
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
cd02db9e8d54decb14cbe303d001d13735237290
|
|
@ -1,202 +0,0 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -1,7 +0,0 @@
|
|||
|
||||
UIMA Annotator: Tagger
|
||||
Copyright 2006-2010 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
|
@ -1 +0,0 @@
|
|||
d7b0fd616c4289376c1f59e2a68edfb4cfd6730d
|
|
@ -1,202 +0,0 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -1,7 +0,0 @@
|
|||
|
||||
UIMA Annotator: WhitespaceTokenizer
|
||||
Copyright 2006-2010 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
|
@ -1 +0,0 @@
|
|||
99bf8d75b71410e4d5f2051ae79942721b3a2f60
|
|
@ -1,202 +0,0 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -1,13 +0,0 @@
|
|||
|
||||
UIMA Base: uimaj-core
|
||||
Copyright 2006-2010 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
||||
Portions of Apache UIMA were originally developed by
|
||||
International Business Machines Corporation and are
|
||||
licensed to the Apache Software Foundation under the
|
||||
"Software Grant License Agreement", informally known as the
|
||||
"IBM UIMA License Agreement".
|
||||
Copyright (c) 2003, 2006 IBM Corporation.
|
|
@ -403,28 +403,6 @@
|
|||
<property name="analyzers-kuromoji-javadocs.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<property name="analyzers-uima.jar" value="${common.dir}/build/analysis/uima/lucene-analyzers-uima-${version}.jar"/>
|
||||
<target name="check-analyzers-uima-uptodate" unless="analyzers-uima.uptodate">
|
||||
<module-uptodate name="analysis/uima" jarfile="${analyzers-uima.jar}" property="analyzers-uima.uptodate"/>
|
||||
</target>
|
||||
<target name="jar-analyzers-uima" unless="analyzers-uima.uptodate" depends="check-analyzers-uima-uptodate">
|
||||
<ant dir="${common.dir}/analysis/uima" target="jar-core" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="analyzers-uima.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<property name="analyzers-uima-javadoc.jar" value="${common.dir}/build/analysis/uima/lucene-analyzers-uima-${version}-javadoc.jar"/>
|
||||
<target name="check-analyzers-uima-javadocs-uptodate" unless="analyzers-uima-javadocs.uptodate">
|
||||
<module-uptodate name="analysis/uima" jarfile="${analyzers-uima-javadoc.jar}" property="analyzers-uima-javadocs.uptodate"/>
|
||||
</target>
|
||||
<target name="javadocs-analyzers-uima" unless="analyzers-uima-javadocs.uptodate" depends="check-analyzers-uima-javadocs-uptodate">
|
||||
<ant dir="${common.dir}/analysis/uima" target="javadocs" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="analyzers-uima-javadocs.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<property name="analyzers-morfologik.jar" value="${common.dir}/build/analysis/morfologik/lucene-analyzers-morfologik-${version}.jar"/>
|
||||
<fileset id="analyzers-morfologik.fileset" dir="${common.dir}">
|
||||
<include name="build/analysis/morfologik/lucene-analyzers-morfologik-${version}.jar" />
|
||||
|
|
|
@ -58,8 +58,7 @@ grant {
|
|||
permission java.lang.RuntimePermission "fileSystemProvider";
|
||||
// needed for test of IOUtils.spins (maybe it can be avoided)
|
||||
permission java.lang.RuntimePermission "getFileStoreAttributes";
|
||||
// analyzers/uima: needed by UIMA message localization... (?)
|
||||
permission java.lang.RuntimePermission "createSecurityManager";
|
||||
// analyzers/uima: needed by lucene expressions' JavascriptCompiler
|
||||
permission java.lang.RuntimePermission "createClassLoader";
|
||||
// needed to test unmap hack on platforms that support it
|
||||
permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
|
||||
|
|
|
@ -25,7 +25,6 @@ Versions of Major Components
|
|||
Apache Tika 1.17
|
||||
Carrot2 3.16.0
|
||||
Velocity 1.7 and Velocity Tools 2.0
|
||||
Apache UIMA 2.3.1
|
||||
Apache ZooKeeper 3.4.11
|
||||
Jetty 9.4.11.v20180605
|
||||
|
||||
|
@ -54,7 +53,6 @@ Versions of Major Components
|
|||
Apache Tika 1.17
|
||||
Carrot2 3.16.0
|
||||
Velocity 1.7 and Velocity Tools 2.0
|
||||
Apache UIMA 2.3.1
|
||||
Apache ZooKeeper 3.4.11
|
||||
Jetty 9.4.11.v20180605
|
||||
|
||||
|
@ -69,6 +67,8 @@ Upgrade Notes
|
|||
|
||||
* SOLR-12395: SignificantTermsQParserPlugin's name is now 'significantTerms' and its old name 'sigificantTerms' is deprecated.
|
||||
|
||||
* SOLR-11694: Extremely outdated UIMA contrib module has been removed
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -248,7 +248,7 @@
|
|||
<property name="lucenedocs" location="${common.dir}/build/docs"/>
|
||||
|
||||
<!-- dependency to ensure all lucene javadocs are present -->
|
||||
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-analyzers-uima,javadocs-backward-codecs,javadocs-codecs,javadocs-expressions,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial-extras,javadocs-join,javadocs-test-framework"/>
|
||||
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-backward-codecs,javadocs-codecs,javadocs-expressions,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial-extras,javadocs-join,javadocs-test-framework"/>
|
||||
|
||||
<!-- create javadocs for the current module -->
|
||||
<target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs,javadocs-solr-core,check-javadocs-uptodate" unless="javadocs-uptodate-${name}">
|
||||
|
@ -313,7 +313,6 @@
|
|||
<link offline="true" href="${lucene.javadoc.url}analyzers-phonetic" packagelistloc="${lucenedocs}/analyzers-phonetic"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}analyzers-smartcn" packagelistloc="${lucenedocs}/analyzers-smartcn"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}analyzers-stempel" packagelistloc="${lucenedocs}/analyzers-stempel"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}analyzers-uima" packagelistloc="${lucenedocs}/analyzers-uima"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}backward-codecs" packagelistloc="${lucenedocs}/backward-codecs"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}codecs" packagelistloc="${lucenedocs}/codecs"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}expressions" packagelistloc="${lucenedocs}/expressions"/>
|
||||
|
|
|
@ -1,109 +0,0 @@
|
|||
Apache Solr UIMA Metadata Extraction Library
|
||||
|
||||
Introduction
|
||||
------------
|
||||
This module is intended to be used both as an UpdateRequestProcessor while indexing documents and as a set of tokenizer/filters
|
||||
to be configured inside the schema.xml for use during analysis phase.
|
||||
UIMAUpdateRequestProcessor purpose is to provide additional on the fly automatically generated fields to the Solr index.
|
||||
Such fields could be language, concepts, keywords, sentences, named entities, etc.
|
||||
UIMA based tokenizers/filters can be used either inside plain Lucene or as index/query analyzers to be defined
|
||||
inside the schema.xml of a Solr core to create/filter tokens using specific UIMA annotations.
|
||||
|
||||
|
||||
Getting Started
|
||||
---------------
|
||||
To start using Solr UIMA Metadata Extraction Library you should go through the following configuration steps:
|
||||
|
||||
1. copy generated solr-uima jar and its libs (under contrib/uima/lib) inside a Solr libraries directory.
|
||||
or set <lib/> tags in solrconfig.xml appropriately to point those jar files.
|
||||
|
||||
<lib dir="../../contrib/uima/lib" />
|
||||
<lib dir="../../contrib/uima/lucene-libs" />
|
||||
<lib dir="../../dist/" regex="solr-uima-\d.*\.jar" />
|
||||
|
||||
2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options:
|
||||
|
||||
for example you could specify the following
|
||||
|
||||
<field name="language" type="string" indexed="true" stored="true" required="false"/>
|
||||
<field name="concept" type="string" indexed="true" stored="true" multiValued="true" required="false"/>
|
||||
<field name="sentence" type="text" indexed="true" stored="true" multiValued="true" required="false" />
|
||||
|
||||
3. modify your solrconfig.xml adding the following snippet:
|
||||
|
||||
<updateRequestProcessorChain name="uima">
|
||||
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
|
||||
<lst name="uimaConfig">
|
||||
<lst name="runtimeParameters">
|
||||
<str name="keyword_apikey">VALID_ALCHEMYAPI_KEY</str>
|
||||
<str name="concept_apikey">VALID_ALCHEMYAPI_KEY</str>
|
||||
<str name="lang_apikey">VALID_ALCHEMYAPI_KEY</str>
|
||||
<str name="cat_apikey">VALID_ALCHEMYAPI_KEY</str>
|
||||
<str name="entities_apikey">VALID_ALCHEMYAPI_KEY</str>
|
||||
<str name="oc_licenseID">VALID_OPENCALAIS_KEY</str>
|
||||
</lst>
|
||||
<str name="analysisEngine">/org/apache/uima/desc/OverridingParamsExtServicesAE.xml</str>
|
||||
<!-- Set to true if you want to continue indexing even if text processing fails.
|
||||
Default is false. That is, Solr throws RuntimeException and
|
||||
never indexed documents entirely in your session. -->
|
||||
<bool name="ignoreErrors">true</bool>
|
||||
<!-- This is optional. It is used for logging when text processing fails.
|
||||
If logField is not specified, uniqueKey will be used as logField.
|
||||
<str name="logField">id</str>
|
||||
-->
|
||||
<lst name="analyzeFields">
|
||||
<bool name="merge">false</bool>
|
||||
<arr name="fields">
|
||||
<str>text</str>
|
||||
</arr>
|
||||
</lst>
|
||||
<lst name="fieldMappings">
|
||||
<lst name="type">
|
||||
<str name="name">org.apache.uima.alchemy.ts.concept.ConceptFS</str>
|
||||
<lst name="mapping">
|
||||
<str name="feature">text</str>
|
||||
<str name="field">concept</str>
|
||||
</lst>
|
||||
</lst>
|
||||
<lst name="type">
|
||||
<str name="name">org.apache.uima.alchemy.ts.language.LanguageFS</str>
|
||||
<lst name="mapping">
|
||||
<str name="feature">language</str>
|
||||
<str name="field">language</str>
|
||||
</lst>
|
||||
</lst>
|
||||
<lst name="type">
|
||||
<str name="name">org.apache.uima.SentenceAnnotation</str>
|
||||
<lst name="mapping">
|
||||
<str name="feature">coveredText</str>
|
||||
<str name="field">sentence</str>
|
||||
</lst>
|
||||
</lst>
|
||||
</lst>
|
||||
</lst>
|
||||
</processor>
|
||||
<processor class="solr.LogUpdateProcessorFactory" />
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
where VALID_ALCHEMYAPI_KEY is your AlchemyAPI Access Key. You need to register AlchemyAPI Access
|
||||
key to exploit the AlchemyAPI services: http://www.alchemyapi.com/api/register.html
|
||||
|
||||
where VALID_OPENCALAIS_KEY is your Calais Service Key. You need to register Calais Service
|
||||
key to exploit the Calais services: http://www.opencalais.com/apikey
|
||||
|
||||
the analysisEngine must contain an AE descriptor inside the specified path in the classpath
|
||||
|
||||
the analyzeFields must contain the input fields that need to be analyzed by UIMA,
|
||||
if merge=true then their content will be merged and analyzed only once
|
||||
|
||||
field mapping describes which features of which types should go in a field
|
||||
|
||||
4. in your solrconfig.xml replace the existing default (<requestHandler name="/update"...) or create a new UpdateRequestHandler with the following:
|
||||
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler">
|
||||
<lst name="defaults">
|
||||
<str name="update.processor">uima</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
Once you're done with the configuration you can index documents which will be automatically enriched with the specified fields
|
|
@ -1,63 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project name="solr-uima" default="default">
|
||||
|
||||
<description>
|
||||
Solr Integration with UIMA for extracting metadata from arbitrary (text) fields and enrich document with features
|
||||
extracted from UIMA types (language, sentences, concepts, named entities, etc.)
|
||||
</description>
|
||||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<path id="uima.lucene.libs">
|
||||
<pathelement path="${analyzers-uima.jar}"/>
|
||||
</path>
|
||||
|
||||
<path id="classpath">
|
||||
<path refid="uima.lucene.libs"/>
|
||||
<path refid="solr.base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="module-jars-to-solr"
|
||||
depends="-module-jars-to-solr-not-for-package,-module-jars-to-solr-package"/>
|
||||
<target name="-module-jars-to-solr-not-for-package" unless="called.from.create-package">
|
||||
<antcall target="jar-analyzers-uima" inheritall="true"/>
|
||||
<property name="analyzers-uima.uptodate" value="true"/>
|
||||
<mkdir dir="${build.dir}/lucene-libs"/>
|
||||
<copy todir="${build.dir}/lucene-libs" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
|
||||
<fileset file="${analyzers-uima.jar}"/>
|
||||
</copy>
|
||||
</target>
|
||||
<target name="-module-jars-to-solr-package" if="called.from.create-package">
|
||||
<antcall target="-unpack-lucene-tgz" inheritall="true"/>
|
||||
<pathconvert property="relative.uima.lucene.libs" pathsep=",">
|
||||
<path refid="uima.lucene.libs"/>
|
||||
<globmapper from="${common.build.dir}/*" to="*" handledirsep="true"/>
|
||||
</pathconvert>
|
||||
<mkdir dir="${build.dir}/lucene-libs"/>
|
||||
<copy todir="${build.dir}/lucene-libs" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
|
||||
<fileset dir="${lucene.tgz.unpack.dir}/lucene-${version}" includes="${relative.uima.lucene.libs}"/>
|
||||
</copy>
|
||||
</target>
|
||||
|
||||
<target name="compile-core" depends="jar-analyzers-uima, solr-contrib-build.compile-core"/>
|
||||
<target name="dist" depends="module-jars-to-solr, common-solr.dist"/>
|
||||
|
||||
</project>
|
|
@ -1,35 +0,0 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<ivy-module version="2.0">
|
||||
<info organisation="org.apache.solr" module="uima"/>
|
||||
<configurations defaultconfmapping="compile->master;test->master">
|
||||
<conf name="compile" transitive="false"/>
|
||||
<conf name="test" transitive="false"/>
|
||||
</configurations>
|
||||
<dependencies>
|
||||
<dependency org="commons-digester" name="commons-digester" rev="${/commons-digester/commons-digester}" conf="compile"/>
|
||||
<dependency org="org.apache.uima" name="AlchemyAPIAnnotator" rev="${/org.apache.uima/AlchemyAPIAnnotator}" conf="compile"/>
|
||||
<dependency org="org.apache.uima" name="OpenCalaisAnnotator" rev="${/org.apache.uima/OpenCalaisAnnotator}" conf="compile"/>
|
||||
<dependency org="org.apache.uima" name="Tagger" rev="${/org.apache.uima/Tagger}" conf="compile"/>
|
||||
<dependency org="org.apache.uima" name="WhitespaceTokenizer" rev="${/org.apache.uima/WhitespaceTokenizer}" conf="compile"/>
|
||||
<dependency org="org.apache.uima" name="uimaj-core" rev="${/org.apache.uima/uimaj-core}" conf="compile"/>
|
||||
|
||||
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
|
||||
</dependencies>
|
||||
</ivy-module>
|
|
@ -1,27 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uima.processor;
|
||||
|
||||
/**
|
||||
* Exception thrown when an error happening while mapping UIMA CAS model to Solr fields
|
||||
*/
|
||||
@SuppressWarnings("serial")
|
||||
public class FieldMappingException extends Exception {
|
||||
public FieldMappingException(Exception e) {
|
||||
super(e);
|
||||
}
|
||||
}
|
|
@ -1,117 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uima.processor;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Configuration holding all the configurable parameters for calling UIMA inside Solr
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class SolrUIMAConfiguration {
|
||||
|
||||
private final String[] fieldsToAnalyze;
|
||||
|
||||
private final boolean fieldsMerging;
|
||||
|
||||
private final Map<String, Map<String, MapField>> typesFeaturesFieldsMapping;
|
||||
|
||||
private final String aePath;
|
||||
|
||||
private final Map<String, Object> runtimeParameters;
|
||||
|
||||
private final boolean ignoreErrors;
|
||||
|
||||
private final String logField;
|
||||
|
||||
SolrUIMAConfiguration(String aePath, String[] fieldsToAnalyze, boolean fieldsMerging,
|
||||
Map<String, Map<String, MapField>> typesFeaturesFieldsMapping,
|
||||
Map<String, Object> runtimeParameters, boolean ignoreErrors, String logField) {
|
||||
this.aePath = aePath;
|
||||
this.fieldsToAnalyze = fieldsToAnalyze;
|
||||
this.fieldsMerging = fieldsMerging;
|
||||
this.runtimeParameters = runtimeParameters;
|
||||
this.typesFeaturesFieldsMapping = typesFeaturesFieldsMapping;
|
||||
this.ignoreErrors = ignoreErrors;
|
||||
this.logField = logField;
|
||||
}
|
||||
|
||||
public String[] getFieldsToAnalyze() {
|
||||
return fieldsToAnalyze;
|
||||
}
|
||||
|
||||
public boolean isFieldsMerging() {
|
||||
return fieldsMerging;
|
||||
}
|
||||
|
||||
public Map<String, Map<String, MapField>> getTypesFeaturesFieldsMapping() {
|
||||
return typesFeaturesFieldsMapping;
|
||||
}
|
||||
|
||||
public String getAePath() {
|
||||
return aePath;
|
||||
}
|
||||
|
||||
public Map<String, Object> getRuntimeParameters() {
|
||||
return runtimeParameters;
|
||||
}
|
||||
|
||||
public boolean isIgnoreErrors() {
|
||||
return ignoreErrors;
|
||||
}
|
||||
|
||||
public String getLogField(){
|
||||
return logField;
|
||||
}
|
||||
|
||||
public static final class MapField {
|
||||
|
||||
private String fieldName;
|
||||
private final String fieldNameFeature;
|
||||
private boolean prefix; // valid if dynamicField == true
|
||||
// false: *_s, true: s_*
|
||||
|
||||
MapField(String fieldName, String fieldNameFeature){
|
||||
this.fieldName = fieldName;
|
||||
this.fieldNameFeature = fieldNameFeature;
|
||||
if(fieldNameFeature != null){
|
||||
if(fieldName.startsWith("*")){
|
||||
prefix = false;
|
||||
this.fieldName = fieldName.substring(1);
|
||||
}
|
||||
else if(fieldName.endsWith("*")){
|
||||
prefix = true;
|
||||
this.fieldName = fieldName.substring(0, fieldName.length() - 1);
|
||||
}
|
||||
else
|
||||
throw new RuntimeException("static field name cannot be used for dynamicField");
|
||||
}
|
||||
}
|
||||
|
||||
public String getFieldNameFeature(){
|
||||
return fieldNameFeature;
|
||||
}
|
||||
|
||||
public String getFieldName(String featureValue){
|
||||
if(fieldNameFeature != null){
|
||||
return prefix ? fieldName + featureValue : featureValue + fieldName;
|
||||
}
|
||||
return fieldName;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,116 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uima.processor;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
|
||||
|
||||
/**
|
||||
* Read configuration for Solr-UIMA integration
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class SolrUIMAConfigurationReader {
|
||||
|
||||
private final NamedList<Object> args;
|
||||
|
||||
public SolrUIMAConfigurationReader(NamedList<Object> args) {
|
||||
this.args = args;
|
||||
}
|
||||
|
||||
public SolrUIMAConfiguration readSolrUIMAConfiguration() {
|
||||
return new SolrUIMAConfiguration(readAEPath(), readFieldsToAnalyze(), readFieldsMerging(),
|
||||
readTypesFeaturesFieldsMapping(), readAEOverridingParameters(), readIgnoreErrors(),
|
||||
readLogField());
|
||||
}
|
||||
|
||||
private String readAEPath() {
|
||||
return (String) args.get("analysisEngine");
|
||||
}
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
private NamedList getAnalyzeFields() {
|
||||
return (NamedList) args.get("analyzeFields");
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private String[] readFieldsToAnalyze() {
|
||||
List<String> fields = (List<String>) getAnalyzeFields().get("fields");
|
||||
return fields.toArray(new String[fields.size()]);
|
||||
}
|
||||
|
||||
private boolean readFieldsMerging() {
|
||||
return (Boolean) getAnalyzeFields().get("merge");
|
||||
}
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
private Map<String, Map<String, MapField>> readTypesFeaturesFieldsMapping() {
|
||||
Map<String, Map<String, MapField>> map = new HashMap<>();
|
||||
|
||||
NamedList fieldMappings = (NamedList) args.get("fieldMappings");
|
||||
/* iterate over UIMA types */
|
||||
for (int i = 0; i < fieldMappings.size(); i++) {
|
||||
NamedList type = (NamedList) fieldMappings.get("type", i);
|
||||
String typeName = (String)type.get("name");
|
||||
|
||||
Map<String, MapField> subMap = new HashMap<>();
|
||||
/* iterate over mapping definitions */
|
||||
for(int j = 0; j < type.size() - 1; j++){
|
||||
NamedList mapping = (NamedList) type.get("mapping", j + 1);
|
||||
String featureName = (String) mapping.get("feature");
|
||||
String fieldNameFeature = null;
|
||||
String mappedFieldName = (String) mapping.get("field");
|
||||
if(mappedFieldName == null){
|
||||
fieldNameFeature = (String) mapping.get("fieldNameFeature");
|
||||
mappedFieldName = (String) mapping.get("dynamicField");
|
||||
}
|
||||
if(mappedFieldName == null)
|
||||
throw new RuntimeException("either of field or dynamicField should be defined for feature " + featureName);
|
||||
MapField mapField = new MapField(mappedFieldName, fieldNameFeature);
|
||||
subMap.put(featureName, mapField);
|
||||
}
|
||||
map.put(typeName, subMap);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
private Map<String, Object> readAEOverridingParameters() {
|
||||
Map<String, Object> runtimeParameters = new HashMap<>();
|
||||
NamedList runtimeParams = (NamedList) args.get("runtimeParameters");
|
||||
for (int i = 0; i < runtimeParams.size(); i++) {
|
||||
String name = runtimeParams.getName(i);
|
||||
Object value = runtimeParams.getVal(i);
|
||||
runtimeParameters.put(name, value);
|
||||
}
|
||||
return runtimeParameters;
|
||||
}
|
||||
|
||||
private boolean readIgnoreErrors() {
|
||||
Object ignoreErrors = args.get("ignoreErrors");
|
||||
return ignoreErrors == null ? false : (Boolean)ignoreErrors;
|
||||
}
|
||||
|
||||
private String readLogField() {
|
||||
return (String)args.get("logField");
|
||||
}
|
||||
}
|
|
@ -1,87 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uima.processor;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
|
||||
import org.apache.uima.cas.FSIterator;
|
||||
import org.apache.uima.cas.FeatureStructure;
|
||||
import org.apache.uima.cas.Type;
|
||||
import org.apache.uima.jcas.JCas;
|
||||
import org.apache.uima.jcas.tcas.Annotation;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Map UIMA types and features over fields of a Solr document
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class UIMAToSolrMapper {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private final SolrInputDocument document;
|
||||
|
||||
private final JCas cas;
|
||||
|
||||
public UIMAToSolrMapper(SolrInputDocument document, JCas cas) {
|
||||
this.document = document;
|
||||
this.cas = cas;
|
||||
}
|
||||
|
||||
/**
|
||||
* map features of a certain UIMA type to corresponding Solr fields based on the mapping
|
||||
*
|
||||
* @param typeName name of UIMA type to map
|
||||
*/
|
||||
void map(String typeName, Map<String, MapField> featureFieldsmapping) throws FieldMappingException {
|
||||
try {
|
||||
Type type = cas.getTypeSystem().getType(typeName);
|
||||
for (FSIterator<FeatureStructure> iterator = cas.getFSIndexRepository().getAllIndexedFS(type); iterator
|
||||
.hasNext(); ) {
|
||||
FeatureStructure fs = iterator.next();
|
||||
for (String featureName : featureFieldsmapping.keySet()) {
|
||||
MapField mapField = featureFieldsmapping.get(featureName);
|
||||
String fieldNameFeature = mapField.getFieldNameFeature();
|
||||
String fieldNameFeatureValue = fieldNameFeature == null ? null :
|
||||
fs.getFeatureValueAsString(type.getFeatureByBaseName(fieldNameFeature));
|
||||
String fieldName = mapField.getFieldName(fieldNameFeatureValue);
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("mapping {}@{} to {}", new Object[]{typeName, featureName, fieldName});
|
||||
}
|
||||
String featureValue;
|
||||
if (fs instanceof Annotation && "coveredText".equals(featureName)) {
|
||||
featureValue = ((Annotation) fs).getCoveredText();
|
||||
} else {
|
||||
featureValue = fs.getFeatureValueAsString(type.getFeatureByBaseName(featureName));
|
||||
}
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("writing {} in {}", new Object[]{featureValue, fieldName});
|
||||
}
|
||||
document.addField(fieldName, featureValue);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new FieldMappingException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,189 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uima.processor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.jcas.JCas;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
import org.apache.uima.util.JCasPool;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Update document(s) to be indexed with UIMA extracted information
|
||||
*
|
||||
*/
|
||||
public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private SolrUIMAConfiguration solrUIMAConfiguration;
|
||||
|
||||
private AnalysisEngine ae;
|
||||
|
||||
private JCasPool pool;
|
||||
|
||||
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next,
|
||||
String coreName, SolrUIMAConfiguration config, AnalysisEngine ae,
|
||||
JCasPool pool) {
|
||||
super(next);
|
||||
this.ae = ae;
|
||||
this.pool = pool;
|
||||
solrUIMAConfiguration = config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
||||
String text = null;
|
||||
try {
|
||||
/* get Solr document */
|
||||
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
|
||||
|
||||
/* get the fields to analyze */
|
||||
String[] texts = getTextsToAnalyze(solrInputDocument);
|
||||
for (String currentText : texts) {
|
||||
text = currentText;
|
||||
if (text != null && text.length() > 0) {
|
||||
/* create a JCas which contain the text to analyze */
|
||||
JCas jcas = pool.getJCas(0);
|
||||
try {
|
||||
/* process the text value */
|
||||
processText(text, jcas);
|
||||
|
||||
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(
|
||||
solrInputDocument, jcas);
|
||||
/* get field mapping from config */
|
||||
Map<String,Map<String,MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
|
||||
.getTypesFeaturesFieldsMapping();
|
||||
/* map type features on fields */
|
||||
for (Entry<String,Map<String,MapField>> entry : typesAndFeaturesFieldsMap
|
||||
.entrySet()) {
|
||||
uimaToSolrMapper.map(entry.getKey(), entry.getValue());
|
||||
}
|
||||
} finally {
|
||||
pool.releaseJCas(jcas);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
String logField = solrUIMAConfiguration.getLogField();
|
||||
if (logField == null) {
|
||||
SchemaField uniqueKeyField = cmd.getReq().getSchema()
|
||||
.getUniqueKeyField();
|
||||
if (uniqueKeyField != null) {
|
||||
logField = uniqueKeyField.getName();
|
||||
}
|
||||
}
|
||||
String optionalFieldInfo = logField == null ? "." : ". " + logField + "=" + cmd.getSolrInputDocument().
|
||||
getField(logField).getValue() + ", ";
|
||||
int len;
|
||||
String debugString;
|
||||
if (text != null && text.length() > 0) {
|
||||
len = Math.min(text.length(), 100);
|
||||
debugString = " text=\"" + text.substring(0, len) + "...\"";
|
||||
} else {
|
||||
debugString = " null text";
|
||||
}
|
||||
if (solrUIMAConfiguration.isIgnoreErrors()) {
|
||||
log.warn(
|
||||
"skip the text processing due to {}",
|
||||
new StringBuilder().append(e.getLocalizedMessage())
|
||||
.append(optionalFieldInfo).append(debugString));
|
||||
} else {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "processing error " + e.getLocalizedMessage() +
|
||||
optionalFieldInfo + debugString, e);
|
||||
}
|
||||
}
|
||||
super.processAdd(cmd);
|
||||
}
|
||||
|
||||
/*
|
||||
* get the texts to analyze from the corresponding fields
|
||||
*/
|
||||
private String[] getTextsToAnalyze(SolrInputDocument solrInputDocument) {
|
||||
String[] fieldsToAnalyze = solrUIMAConfiguration.getFieldsToAnalyze();
|
||||
boolean merge = solrUIMAConfiguration.isFieldsMerging();
|
||||
String[] textVals;
|
||||
if (merge) {
|
||||
StringBuilder unifiedText = new StringBuilder("");
|
||||
for (String aFieldsToAnalyze : fieldsToAnalyze) {
|
||||
if (solrInputDocument.getFieldValues(aFieldsToAnalyze) != null) {
|
||||
Object[] Values = solrInputDocument.getFieldValues(aFieldsToAnalyze).toArray();
|
||||
for (Object Value : Values) {
|
||||
if (unifiedText.length() > 0) {
|
||||
unifiedText.append(' ');
|
||||
}
|
||||
unifiedText.append(Value.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
textVals = new String[1];
|
||||
textVals[0] = unifiedText.toString();
|
||||
} else {
|
||||
textVals = new String[fieldsToAnalyze.length];
|
||||
for (int i = 0; i < fieldsToAnalyze.length; i++) {
|
||||
if (solrInputDocument.getFieldValues(fieldsToAnalyze[i]) != null) {
|
||||
Object[] Values = solrInputDocument.getFieldValues(fieldsToAnalyze[i]).toArray();
|
||||
for (Object Value : Values) {
|
||||
textVals[i] += Value.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return textVals;
|
||||
}
|
||||
|
||||
/*
|
||||
* process a field value executing UIMA on the JCas containing it as document
|
||||
* text
|
||||
*/
|
||||
private void processText(String textFieldValue, JCas jcas)
|
||||
throws ResourceInitializationException, AnalysisEngineProcessException {
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("Analyzing text");
|
||||
}
|
||||
|
||||
jcas.setDocumentText(textFieldValue);
|
||||
|
||||
/* perform analysis on text field */
|
||||
ae.process(jcas);
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("Text processing completed");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the configuration object for this request processor
|
||||
*/
|
||||
public SolrUIMAConfiguration getConfiguration()
|
||||
{
|
||||
return solrUIMAConfiguration;
|
||||
}
|
||||
}
|
|
@ -1,72 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uima.processor;
|
||||
|
||||
import org.apache.lucene.analysis.uima.ae.AEProvider;
|
||||
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
import org.apache.uima.util.JCasPool;
|
||||
|
||||
/**
|
||||
* Factory for {@link UIMAUpdateRequestProcessor}
|
||||
*
|
||||
* @since 3.1.0
|
||||
*/
|
||||
public class UIMAUpdateRequestProcessorFactory extends
|
||||
UpdateRequestProcessorFactory {
|
||||
|
||||
private NamedList<Object> args;
|
||||
private AnalysisEngine ae;
|
||||
private JCasPool pool;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void init(@SuppressWarnings("rawtypes") NamedList args) {
|
||||
this.args = (NamedList<Object>) args.get("uimaConfig");
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp, UpdateRequestProcessor next) {
|
||||
SolrUIMAConfiguration configuration = new SolrUIMAConfigurationReader(args)
|
||||
.readSolrUIMAConfiguration();
|
||||
synchronized (this) {
|
||||
if (ae == null && pool == null) {
|
||||
AEProvider aeProvider = AEProviderFactory.getInstance().getAEProvider(
|
||||
req.getCore().getName(), configuration.getAePath(),
|
||||
configuration.getRuntimeParameters());
|
||||
try {
|
||||
ae = aeProvider.getAE();
|
||||
pool = new JCasPool(10, ae);
|
||||
} catch (ResourceInitializationException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new UIMAUpdateRequestProcessor(next, req.getCore().getName(),
|
||||
configuration, ae, pool);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* {@link org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory} and related code.
|
||||
*/
|
||||
package org.apache.solr.uima.processor;
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<body>
|
||||
Apache Solr Search Server: Solr UIMA contrib
|
||||
</body>
|
||||
</html>
|
|
@ -1,41 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>false</primitive>
|
||||
<delegateAnalysisEngineSpecifiers>
|
||||
<delegateAnalysisEngine key="HmmTagger">
|
||||
<import name="HmmTagger"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="WhitespaceTokenizer">
|
||||
<import name="WhitespaceTokenizer"/>
|
||||
</delegateAnalysisEngine>
|
||||
</delegateAnalysisEngineSpecifiers>
|
||||
<analysisEngineMetaData>
|
||||
<name>AggregateSentenceAE</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters/>
|
||||
<configurationParameterSettings/>
|
||||
<flowConstraints>
|
||||
<fixedFlow>
|
||||
<node>WhitespaceTokenizer</node>
|
||||
<node>HmmTagger</node>
|
||||
</fixedFlow>
|
||||
</flowConstraints>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs/>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,121 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.uima.examples.tagger.HMMTagger</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>Hidden Markov Model - Part of Speech Tagger</name>
|
||||
<description>A configuration of the HmmTaggerAnnotator that looks for
|
||||
parts of speech of identified tokens within existing
|
||||
Sentence and Token annotations. See also
|
||||
WhitespaceTokenizer.xml.</description>
|
||||
<version>1.0</version>
|
||||
<vendor>The Apache Software Foundation</vendor>
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>NGRAM_SIZE</name>
|
||||
<type>Integer</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings>
|
||||
<nameValuePair>
|
||||
<name>NGRAM_SIZE</name>
|
||||
<value>
|
||||
<integer>3</integer>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
</configurationParameterSettings>
|
||||
<typeSystemDescription>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.TokenAnnotation</name>
|
||||
<description>Single token annotation</description>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>posTag</name>
|
||||
<description>contains part-of-speech of a
|
||||
corresponding token</description>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.SentenceAnnotation</name>
|
||||
<description>sentence annotation</description>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
<typePriorities/>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs>
|
||||
<type>org.apache.uima.TokenAnnotation</type>
|
||||
<type allAnnotatorFeatures="true">org.apache.uima.SentenceAnnotation</type>
|
||||
<feature>org.apache.uima.TokenAnnotation:end</feature>
|
||||
<feature>org.apache.uima.TokenAnnotation:begin</feature>
|
||||
</inputs>
|
||||
<outputs>
|
||||
<type>org.apache.uima.TokenAnnotation</type>
|
||||
<feature>org.apache.uima.TokenAnnotation:posTag</feature>
|
||||
<feature>org.apache.uima.TokenAnnotation:end</feature>
|
||||
<feature>org.apache.uima.TokenAnnotation:begin</feature>
|
||||
</outputs>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<externalResourceDependencies>
|
||||
<externalResourceDependency>
|
||||
<key>Model</key>
|
||||
<description>HMM Tagger model file</description>
|
||||
<interfaceName>org.apache.uima.examples.tagger.IModelResource</interfaceName>
|
||||
<optional>false</optional>
|
||||
</externalResourceDependency>
|
||||
</externalResourceDependencies>
|
||||
<resourceManagerConfiguration>
|
||||
<externalResources>
|
||||
<externalResource>
|
||||
<name>ModelFile</name>
|
||||
<description>HMM Tagger model file</description>
|
||||
<fileResourceSpecifier>
|
||||
<fileUrl>file:english/BrownModel.dat</fileUrl>
|
||||
</fileResourceSpecifier>
|
||||
<implementationName>org.apache.uima.examples.tagger.ModelResource</implementationName>
|
||||
</externalResource>
|
||||
</externalResources>
|
||||
<externalResourceBindings>
|
||||
<externalResourceBinding>
|
||||
<key>Model</key>
|
||||
<resourceName>ModelFile</resourceName>
|
||||
</externalResourceBinding>
|
||||
</externalResourceBindings>
|
||||
</resourceManagerConfiguration>
|
||||
</analysisEngineDescription>
|
|
@ -1,194 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.uima.annotator.calais.OpenCalaisAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>OpenCalaisAnnotator</name>
|
||||
<description/>
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>allowDistribution</name>
|
||||
<description/>
|
||||
<type>Boolean</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>allowSearch</name>
|
||||
<description/>
|
||||
<type>Boolean</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>submitter</name>
|
||||
<description/>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>licenseID</name>
|
||||
<description/>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings>
|
||||
<nameValuePair>
|
||||
<name>allowDistribution</name>
|
||||
<value>
|
||||
<boolean>false</boolean>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>allowSearch</name>
|
||||
<value>
|
||||
<boolean>false</boolean>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>submitter</name>
|
||||
<value>
|
||||
<string/>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>licenseID</name>
|
||||
<value>
|
||||
<string>OC_LICENSE_ID</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
</configurationParameterSettings>
|
||||
<typeSystemDescription>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.Person</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.Anniversary</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.City</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.Company</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.Continent</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.Country</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.Currency</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.EmailAddress</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.Facility</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.FaxNumber</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.Holiday</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.IndustryTerm</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.NaturalDisaster</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.NaturalFeature</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.Organization</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.PhoneNumber</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.ProviceOrState</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.Region</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.Technology</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.URL</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.calais.BaseType</name>
|
||||
<description/>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>calaisType</name>
|
||||
<description>OpenCalais type</description>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs/>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
</analysisEngineDescription>
|
|
@ -1,147 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>false</primitive>
|
||||
<delegateAnalysisEngineSpecifiers>
|
||||
<delegateAnalysisEngine key="TextKeywordExtractionAEDescriptor">
|
||||
<import name="TextKeywordExtractionAEDescriptor"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="TextConceptTaggingAEDescriptor">
|
||||
<import name="TextConceptTaggingAEDescriptor"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="OpenCalaisAnnotator">
|
||||
<import name="OpenCalaisAnnotator"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="TextLanguageDetectionAEDescriptor">
|
||||
<import name="TextLanguageDetectionAEDescriptor"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="TextCategorizationAEDescriptor">
|
||||
<import name="TextCategorizationAEDescriptor"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="AggregateSentenceAE">
|
||||
<import location="AggregateSentenceAE.xml"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="TextRankedEntityExtractionAEDescriptor">
|
||||
<import name="TextRankedEntityExtractionAEDescriptor"/>
|
||||
</delegateAnalysisEngine>
|
||||
</delegateAnalysisEngineSpecifiers>
|
||||
<analysisEngineMetaData>
|
||||
<name>ExtServicesAE</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters searchStrategy="language_fallback">
|
||||
<configurationParameter>
|
||||
<name>oc_licenseID</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
<overrides>
|
||||
<parameter>OpenCalaisAnnotator/licenseID</parameter>
|
||||
</overrides>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>keyword_apikey</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
<overrides>
|
||||
<parameter>TextKeywordExtractionAEDescriptor/apikey</parameter>
|
||||
</overrides>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>concept_apikey</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
<overrides>
|
||||
<parameter>TextConceptTaggingAEDescriptor/apikey</parameter>
|
||||
</overrides>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>lang_apikey</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
<overrides>
|
||||
<parameter>TextLanguageDetectionAEDescriptor/apikey</parameter>
|
||||
</overrides>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>cat_apikey</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
<overrides>
|
||||
<parameter>TextCategorizationAEDescriptor/apikey</parameter>
|
||||
</overrides>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>entities_apikey</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
<overrides>
|
||||
<parameter>TextRankedEntityExtractionAEDescriptor/apikey</parameter>
|
||||
</overrides>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings>
|
||||
<nameValuePair>
|
||||
<name>oc_licenseID</name>
|
||||
<value>
|
||||
<string>licenseid</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>keyword_apikey</name>
|
||||
<value>
|
||||
<string>apikey</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>concept_apikey</name>
|
||||
<value>
|
||||
<string>apikey</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>lang_apikey</name>
|
||||
<value>
|
||||
<string>apikey</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>cat_apikey</name>
|
||||
<value>
|
||||
<string>apikey</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
</configurationParameterSettings>
|
||||
<flowConstraints>
|
||||
<fixedFlow>
|
||||
<node>AggregateSentenceAE</node>
|
||||
<node>OpenCalaisAnnotator</node>
|
||||
<node>TextKeywordExtractionAEDescriptor</node>
|
||||
<node>TextLanguageDetectionAEDescriptor</node>
|
||||
<node>TextCategorizationAEDescriptor</node>
|
||||
<node>TextConceptTaggingAEDescriptor</node>
|
||||
<node>TextRankedEntityExtractionAEDescriptor</node>
|
||||
</fixedFlow>
|
||||
</flowConstraints>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs/>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,102 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextCategorizationAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>TextCategorizationAEDescriptor</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>apikey</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>outputMode</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>baseUrl</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings>
|
||||
<nameValuePair>
|
||||
<name>outputMode</name>
|
||||
<value>
|
||||
<string>xml</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>apikey</name>
|
||||
<value>
|
||||
<string>AA_API_KEY</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
</configurationParameterSettings>
|
||||
<typeSystemDescription>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.categorization.Category</name>
|
||||
<description/>
|
||||
<supertypeName>uima.cas.TOP</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>score</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>text</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
<typePriorities/>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs/>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,196 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextConceptTaggingAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>TextConceptTaggingAEDescriptor</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>apikey</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>outputMode</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>linkedData</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>showSourceText</name>
|
||||
<type>Integer</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>maxRetrieve</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>url</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings>
|
||||
<nameValuePair>
|
||||
<name>apikey</name>
|
||||
<value>
|
||||
<string/>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>outputMode</name>
|
||||
<value>
|
||||
<string>xml</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>linkedData</name>
|
||||
<value>
|
||||
<string>1</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>showSourceText</name>
|
||||
<value>
|
||||
<integer>0</integer>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>maxRetrieve</name>
|
||||
<value>
|
||||
<string>8</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
</configurationParameterSettings>
|
||||
<typeSystemDescription>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.concept.ConceptFS</name>
|
||||
<description>a concept tag</description>
|
||||
<supertypeName>uima.cas.TOP</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>text</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>relevance</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>website</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>geo</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>dbpedia</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>yago</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>opencyc</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>freebase</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>ciaFactbook</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>census</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>geonames</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>musicBrainz</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>crunchbase</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>semanticCrunchbase</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
<typePriorities/>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs/>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,107 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextKeywordExtractionAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>TextKeywordExtractionAEDescriptor</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>apikey</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>outputMode</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>baseUrl</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>url</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>maxRetrieve</name>
|
||||
<type>Integer</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>showSourceText</name>
|
||||
<type>Integer</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings>
|
||||
<nameValuePair>
|
||||
<name>outputMode</name>
|
||||
<value>
|
||||
<string>xml</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>apikey</name>
|
||||
<value>
|
||||
<string>04490000a72fe7ec5cb3497f14e77f338c86f2fe</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>maxRetrieve</name>
|
||||
<value>
|
||||
<integer>10</integer>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>showSourceText</name>
|
||||
<value>
|
||||
<integer>0</integer>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
</configurationParameterSettings>
|
||||
<typeSystemDescription>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.keywords.KeywordFS</name>
|
||||
<description/>
|
||||
<supertypeName>uima.cas.TOP</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>text</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
<typePriorities/>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs/>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,107 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextLanguageDetectionAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>TextLanguageDetectionAEDescriptor</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>apikey</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>outputMode</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>url</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings>
|
||||
<nameValuePair>
|
||||
<name>outputMode</name>
|
||||
<value>
|
||||
<string>xml</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>apikey</name>
|
||||
<value>
|
||||
<string>AA_API_KEY</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
</configurationParameterSettings>
|
||||
<typeSystemDescription>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.language.LanguageFS</name>
|
||||
<description/>
|
||||
<supertypeName>uima.cas.TOP</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>language</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>iso6391</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>iso6392</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>iso6393</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>ethnologue</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>nativeSpeakers</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>wikipedia</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
<typePriorities/>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs/>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,403 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextRankedNamedEntityExtractionAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>TextRankedEntityExtractionAEDescriptor</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>apikey</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>outputMode</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>disambiguate</name>
|
||||
<type>Integer</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>linkedData</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>showSourceText</name>
|
||||
<type>Integer</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>true</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>baseUrl</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>url</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>coreference</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
<configurationParameter>
|
||||
<name>quotations</name>
|
||||
<type>String</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings>
|
||||
<nameValuePair>
|
||||
<name>apikey</name>
|
||||
<value>
|
||||
<string/>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>outputMode</name>
|
||||
<value>
|
||||
<string>xml</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>disambiguate</name>
|
||||
<value>
|
||||
<integer>1</integer>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>linkedData</name>
|
||||
<value>
|
||||
<string>1</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>coreference</name>
|
||||
<value>
|
||||
<string>1</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>showSourceText</name>
|
||||
<value>
|
||||
<integer>0</integer>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
<nameValuePair>
|
||||
<name>quotations</name>
|
||||
<value>
|
||||
<string>1</string>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
</configurationParameterSettings>
|
||||
<typeSystemDescription>
|
||||
<imports>
|
||||
<import location="baseAlchemyTypeSystemDescriptor.xml"/>
|
||||
</imports>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Anniversary</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Automobile</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.City</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Company</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Continent</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Country</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.EntertainmentAward</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Facility</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.FieldTerminology</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.FinancialMarketIndex</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.GeographicFeature</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.HealthCondition</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Holiday</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Movie</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.MusicGroup</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.NaturalDisaster</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Organization</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Person</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.PrintMedia</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.RadioProgram</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.RadioStation</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Region</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Sport</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.StateOrCounty</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Technology</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.TelevisionShow</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.TelevisionStation</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.OperatingSystem</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.SportingEvent</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.Drug</name>
|
||||
<description/>
|
||||
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||
</typeDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.BaseEntity</name>
|
||||
<description/>
|
||||
<supertypeName>uima.cas.TOP</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>text</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>count</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>relevance</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>disambiguation</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>subType</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>website</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>geo</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>dbpedia</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>yago</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>opencyc</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>umbel</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>freebase</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>ciaFactbook</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>census</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>geonames</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>musicBrainz</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>quotations</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.StringArray</rangeTypeName>
|
||||
<multipleReferencesAllowed>true</multipleReferencesAllowed>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>occurrences</name>
|
||||
<description>A list of annotations annotating this entity</description>
|
||||
<rangeTypeName>uima.cas.FSList</rangeTypeName>
|
||||
<elementType>uima.tcas.Annotation</elementType>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
<typePriorities/>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs/>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,115 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
|
||||
<!--
|
||||
***************************************************************
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
***************************************************************
|
||||
-->
|
||||
|
||||
<analysisEngineDescription
|
||||
xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>
|
||||
org.apache.uima.java
|
||||
</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>
|
||||
org.apache.uima.annotator.WhitespaceTokenizer
|
||||
</annotatorImplementationName>
|
||||
|
||||
<analysisEngineMetaData>
|
||||
<name>WhitespaceTokenizer</name>
|
||||
<description>
|
||||
creates token and sentence annotations for whitespace
|
||||
separated languages
|
||||
</description>
|
||||
<version>1.0</version>
|
||||
<vendor>The Apache Software Foundation</vendor>
|
||||
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>SofaNames</name>
|
||||
<description>
|
||||
The Sofa names the annotator should work on. If no
|
||||
names are specified, the annotator works on the
|
||||
default sofa.
|
||||
</description>
|
||||
<type>String</type>
|
||||
<multiValued>true</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
</configurationParameter>
|
||||
|
||||
</configurationParameters>
|
||||
|
||||
<configurationParameterSettings>
|
||||
<!--
|
||||
<nameValuePair>
|
||||
<name>SofaNames</name>
|
||||
<value>
|
||||
<array>
|
||||
<string>sofaName</string>
|
||||
</array>
|
||||
</value>
|
||||
</nameValuePair>
|
||||
-->
|
||||
</configurationParameterSettings>
|
||||
|
||||
<typeSystemDescription>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.TokenAnnotation</name>
|
||||
<description>Single token annotation</description>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>tokenType</name>
|
||||
<description>token type</description>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.SentenceAnnotation</name>
|
||||
<description>sentence annotation</description>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
<features>
|
||||
|
||||
</features>
|
||||
</typeDescription>
|
||||
</typeSystemDescription>
|
||||
|
||||
<fsIndexes />
|
||||
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs />
|
||||
<outputs>
|
||||
<type>org.apache.uima.TokenAnnotation</type>
|
||||
<feature>
|
||||
org.apache.uima.TokenAnnotation:tokentype
|
||||
</feature>
|
||||
<type>org.apache.uima.SentenceAnnotation</type>
|
||||
</outputs>
|
||||
<languagesSupported>
|
||||
<language>x-unspecified</language>
|
||||
</languagesSupported>
|
||||
</capability>
|
||||
</capabilities>
|
||||
|
||||
</analysisEngineMetaData>
|
||||
</analysisEngineDescription>
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
|
||||
-->
|
||||
<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<name>baseAlchemyTypeSystemDescriptor</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.uima.alchemy.ts.entity.AlchemyAnnotation</name>
|
||||
<description/>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>alchemyType</name>
|
||||
<description>alchemyAPI type</description>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
|
@ -1,48 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version
|
||||
2.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 Unless required by
|
||||
applicable law or agreed to in writing, software distributed under
|
||||
the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
|
||||
OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<updateRequestProcessorChain name="uima">
|
||||
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
|
||||
<lst name="uimaConfig">
|
||||
<lst name="runtimeParameters">
|
||||
<str name="keyword_apikey">VALID_ALCHEMYAPI_KEY</str>
|
||||
<str name="concept_apikey">VALID_ALCHEMYAPI_KEY</str>
|
||||
<str name="lang_apikey">VALID_ALCHEMYAPI_KEY</str>
|
||||
<str name="cat_apikey">VALID_ALCHEMYAPI_KEY</str>
|
||||
<str name="entities_apikey">VALID_ALCHEMYAPI_KEY</str>
|
||||
<str name="oc_licenseID">VALID_OPENCALAIS_KEY</str>
|
||||
</lst>
|
||||
<str name="analysisEngine">/org/apache/uima/desc/OverridingParamsExtServicesAE.xml</str>
|
||||
<lst name="analyzeFields">
|
||||
<bool name="merge">false</bool>
|
||||
<arr name="fields">
|
||||
<str>text</str>
|
||||
<str>title</str>
|
||||
</arr>
|
||||
</lst>
|
||||
<lst name="fieldMappings">
|
||||
<lst name="mapping">
|
||||
<str name="type">org.apache.uima.jcas.tcas.Annotation</str>
|
||||
<str name="feature">convertText</str>
|
||||
<str name="field">tag</str>
|
||||
</lst>
|
||||
</lst>
|
||||
</lst>
|
||||
</processor>
|
||||
<processor class="solr.LogUpdateProcessorFactory" />
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
|
@ -1,9 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<fields>
|
||||
<field name="language" type="string" indexed="true" stored="true" required="false"/>
|
||||
<field name="concept" type="string" indexed="true" stored="true" multiValued="true" required="false"/>
|
||||
<field name="keyword" type="string" indexed="true" stored="true" multiValued="true" required="false"/>
|
||||
<field name="suggested_category" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||
<field name="sentence" type="text" indexed="true" stored="true" multiValued="true" required="false" />
|
||||
<dynamicField name="entity*" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
</fields>
|
|
@ -1,70 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>false</primitive>
|
||||
<delegateAnalysisEngineSpecifiers>
|
||||
<delegateAnalysisEngine key="WhitespaceTokenizer">
|
||||
<import name="WhitespaceTokenizer"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="HmmTagger">
|
||||
<import name="HmmTagger"/>
|
||||
</delegateAnalysisEngine>
|
||||
</delegateAnalysisEngineSpecifiers>
|
||||
<analysisEngineMetaData>
|
||||
<name>AggregateSentenceAE</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>ngramsize</name>
|
||||
<type>Integer</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
<overrides>
|
||||
<parameter>HmmTagger/NGRAM_SIZE</parameter>
|
||||
</overrides>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings/>
|
||||
<flowConstraints>
|
||||
<fixedFlow>
|
||||
<node>WhitespaceTokenizer</node>
|
||||
<node>HmmTagger</node>
|
||||
</fixedFlow>
|
||||
</flowConstraints>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.uima.SentenceAnnotation</type>
|
||||
<type allAnnotatorFeatures="true">org.apache.uima.TokenAnnotation</type>
|
||||
</outputs>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,68 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.solr.uima.processor.an.DummyEntityAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>DummyEntityAEDescriptor</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor>ASF</vendor>
|
||||
<configurationParameters/>
|
||||
<configurationParameterSettings/>
|
||||
<typeSystemDescription>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.solr.uima.ts.EntityAnnotation</name>
|
||||
<description/>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>name</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>entity</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
<typePriorities/>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.solr.uima.ts.EntityAnnotation</type>
|
||||
</outputs>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,40 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.solr.uima.processor.an.DummyExceptionAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>DummyExceptionAEDescriptor</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor>ASF</vendor>
|
||||
<configurationParameters/>
|
||||
<configurationParameterSettings/>
|
||||
<typeSystemDescription/>
|
||||
<typePriorities/>
|
||||
<fsIndexCollection/>
|
||||
<capabilities/>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,60 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.solr.uima.processor.an.DummySentimentAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>DummySentimentAnalysisAEDescriptor</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor>ASF</vendor>
|
||||
<configurationParameters/>
|
||||
<configurationParameterSettings/>
|
||||
<typeSystemDescription>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.solr.uima.ts.DummySentimentAnnotation</name>
|
||||
<description/>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>mood</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
<typePriorities/>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,72 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>false</primitive>
|
||||
<delegateAnalysisEngineSpecifiers>
|
||||
<delegateAnalysisEngine key="AggregateSentenceAE">
|
||||
<import location="AggregateSentenceAE.xml"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="DummyEntityAEDescriptor">
|
||||
<import location="DummyEntityAEDescriptor.xml"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="DummySentimentAnalysisAEDescriptor">
|
||||
<import location="DummySentimentAnalysisAEDescriptor.xml"/>
|
||||
</delegateAnalysisEngine>
|
||||
</delegateAnalysisEngineSpecifiers>
|
||||
<analysisEngineMetaData>
|
||||
<name>TestAE</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>ngramsize</name>
|
||||
<type>Integer</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
<overrides>
|
||||
<parameter>AggregateSentenceAE/ngramsize</parameter>
|
||||
</overrides>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings/>
|
||||
<flowConstraints>
|
||||
<fixedFlow>
|
||||
<node>AggregateSentenceAE</node>
|
||||
<node>DummyEntityAEDescriptor</node>
|
||||
<node>DummySentimentAnalysisAEDescriptor</node>
|
||||
</fixedFlow>
|
||||
</flowConstraints>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs/>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,54 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>false</primitive>
|
||||
<delegateAnalysisEngineSpecifiers>
|
||||
<delegateAnalysisEngine key="DummyExceptionAEDescriptor">
|
||||
<import location="DummyExceptionAEDescriptor.xml"/>
|
||||
</delegateAnalysisEngine>
|
||||
</delegateAnalysisEngineSpecifiers>
|
||||
<analysisEngineMetaData>
|
||||
<name>TestExceptionAE</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters/>
|
||||
<configurationParameterSettings/>
|
||||
<flowConstraints>
|
||||
<fixedFlow>
|
||||
<node>DummyExceptionAEDescriptor</node>
|
||||
</fixedFlow>
|
||||
</flowConstraints>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs/>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1,21 +0,0 @@
|
|||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# Use a protected word file to protect against the stemmer reducing two
|
||||
# unrelated words to the same base word.
|
||||
|
||||
# Some non-words that normally won't be encountered,
|
||||
# just to test that they won't be stemmed.
|
||||
dontstems
|
||||
zwhacky
|
||||
|
|
@ -1,612 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version
|
||||
2.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 Unless required by
|
||||
applicable law or agreed to in writing, software distributed under
|
||||
the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
|
||||
OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
This is the Solr schema file. This file should be named "schema.xml"
|
||||
and should be in the conf directory under the solr home (i.e.
|
||||
./solr/conf/schema.xml by default) or located where the classloader
|
||||
for the Solr webapp can find it. This example schema is the
|
||||
recommended starting point for users. It should be kept correct and
|
||||
concise, usable out-of-the-box. For more information, on how to
|
||||
customize this file, please see
|
||||
http://wiki.apache.org/solr/SchemaXml PERFORMANCE NOTE: this schema
|
||||
includes many optional features and should not be used for
|
||||
benchmarking. To improve performance one could - set stored="false"
|
||||
for all fields possible (esp large fields) when you only need to
|
||||
search on the field but don't need to return the original value. -
|
||||
set indexed="false" if you don't need to search on the field, but
|
||||
only return the field as a result of searching on other indexed
|
||||
fields. - remove all unneeded copyField statements - for best index
|
||||
size and searching performance, set "index" to false for all general
|
||||
text fields, use copyField to copy them to the catchall "text"
|
||||
field, and use that for searching. - For maximum indexing
|
||||
performance, use the ConcurrentUpdateSolrServer java client. -
|
||||
Remember to run the JVM in server mode, and use a higher logging
|
||||
level that avoids logging every request
|
||||
-->
|
||||
|
||||
<schema name="sample" version="1.2">
|
||||
<!--
|
||||
attribute "name" is the name of this schema and is only used for
|
||||
display purposes. Applications should change this to reflect the
|
||||
nature of the search collection. version="1.2" is Solr's version
|
||||
number for the schema syntax and semantics. It should not normally
|
||||
be changed by applications. 1.0: multiValued attribute did not
|
||||
exist, all fields are multiValued by nature 1.1: multiValued
|
||||
attribute introduced, false by default 1.2: omitTermFreqAndPositions
|
||||
attribute introduced, true by default except for text fields.
|
||||
-->
|
||||
|
||||
<!--
|
||||
field type definitions. The "name" attribute is just a label to be
|
||||
used by field definitions. The "class" attribute and any other
|
||||
attributes determine the real behavior of the fieldType. Class
|
||||
names starting with "solr" refer to java classes in the
|
||||
org.apache.solr.analysis package.
|
||||
-->
|
||||
|
||||
<!--
|
||||
The StrField type is not analyzed, but indexed/stored verbatim. -
|
||||
StrField and TextField support an optional compressThreshold which
|
||||
limits compression (if enabled in the derived fields) to values
|
||||
which exceed a certain size (in characters).
|
||||
-->
|
||||
<fieldType name="string" class="solr.StrField"
|
||||
sortMissingLast="true" omitNorms="true" />
|
||||
|
||||
<!-- boolean type: "true" or "false" -->
|
||||
<fieldType name="boolean" class="solr.BoolField"
|
||||
sortMissingLast="true" omitNorms="true" />
|
||||
<!--
|
||||
Binary data type. The data should be sent/retrieved in as Base64
|
||||
encoded Strings
|
||||
-->
|
||||
<fieldType name="binary" class="solr.BinaryField" />
|
||||
|
||||
<!--
|
||||
If sortMissingLast="true", then a sort on this field will cause
|
||||
documents without the field to come after documents with the
|
||||
field, regardless of the requested sort order (asc or desc). - If
|
||||
sortMissingFirst="true", then a sort on this field will cause
|
||||
documents without the field to come before documents with the
|
||||
field, regardless of the requested sort order. - If
|
||||
sortMissingLast="false" and sortMissingFirst="false" (the
|
||||
default), then default lucene sorting will be used which places
|
||||
docs without the field first in an ascending sort and last in a
|
||||
descending sort.
|
||||
-->
|
||||
|
||||
<!--
|
||||
Default numeric field types. For faster range queries, consider
|
||||
the tint/tfloat/tlong/tdouble types.
|
||||
-->
|
||||
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
|
||||
<fieldType name="float" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
|
||||
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
|
||||
<fieldType name="double" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
|
||||
|
||||
<!--
|
||||
Numeric field types that index each value at various levels of
|
||||
precision to accelerate range queries when the number of values
|
||||
between the range endpoints is large. See the javadoc for
|
||||
LegacyNumericRangeQuery for internal implementation details. Smaller
|
||||
precisionStep values (specified in bits) will lead to more tokens
|
||||
indexed per value, slightly larger index size, and faster range
|
||||
queries. A precisionStep of 0 disables indexing at different
|
||||
precision levels.
|
||||
-->
|
||||
<fieldType name="tint" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="8" omitNorms="true" positionIncrementGap="0" />
|
||||
<fieldType name="tfloat" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="8" omitNorms="true" positionIncrementGap="0" />
|
||||
<fieldType name="tlong" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="8" omitNorms="true" positionIncrementGap="0" />
|
||||
<fieldType name="tdouble" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="8" omitNorms="true" positionIncrementGap="0" />
|
||||
|
||||
<!--
|
||||
The format for this date field is of the form
|
||||
1995-12-31T23:59:59Z, and is a more restricted form of the
|
||||
canonical representation of dateTime
|
||||
http://www.w3.org/TR/xmlschema-2/#dateTime The trailing "Z"
|
||||
designates UTC time and is mandatory. Optional fractional seconds
|
||||
are allowed: 1995-12-31T23:59:59.999Z All other components are
|
||||
mandatory. Expressions can also be used to denote calculations
|
||||
that should be performed relative to "NOW" to determine the value,
|
||||
ie... NOW/HOUR ... Round to the start of the current hour NOW-1DAY
|
||||
... Exactly 1 day prior to now NOW/DAY+6MONTHS+3DAYS ... 6 months
|
||||
and 3 days in the future from the start of the current day Consult
|
||||
the TrieDateField javadocs for more information. Note: For faster
|
||||
range queries, consider the tdate type
|
||||
-->
|
||||
<fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
omitNorms="true" precisionStep="0" positionIncrementGap="0" />
|
||||
|
||||
<!--
|
||||
A Trie based date field for faster date range queries and date
|
||||
faceting.
|
||||
-->
|
||||
<fieldType name="tdate" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
omitNorms="true" precisionStep="6" positionIncrementGap="0" />
|
||||
|
||||
<!--
|
||||
The "RandomSortField" is not used to store or search any data. You
|
||||
can declare fields of this type it in your schema to generate
|
||||
pseudo-random orderings of your docs for sorting purposes. The
|
||||
ordering is generated based on the field name and the version of
|
||||
the index, As long as the index version remains unchanged, and the
|
||||
same field name is reused, the ordering of the docs will be
|
||||
consistent. If you want different psuedo-random orderings of
|
||||
documents, for the same version of the index, use a dynamicField
|
||||
and change the name
|
||||
-->
|
||||
<fieldType name="random" class="solr.RandomSortField"
|
||||
indexed="true" />
|
||||
|
||||
<!--
|
||||
solr.TextField allows the specification of custom text analyzers
|
||||
specified as a tokenizer and a list of token filters. Different
|
||||
analyzers may be specified for indexing and querying. The optional
|
||||
positionIncrementGap puts space between multiple fields of this
|
||||
type on the same document, with the purpose of preventing false
|
||||
phrase matching across fields. For more info on customizing your
|
||||
analyzer chain, please see
|
||||
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
||||
-->
|
||||
|
||||
<!--
|
||||
One can also specify an existing Analyzer class that has a default
|
||||
constructor via the class attribute on the analyzer element
|
||||
<fieldType name="text_greek" class="solr.TextField"> <analyzer
|
||||
class="org.apache.lucene.analysis.el.GreekAnalyzer"/> </fieldType>
|
||||
-->
|
||||
|
||||
<!--
|
||||
A text field that only splits on whitespace for exact matching of
|
||||
words
|
||||
-->
|
||||
<fieldType name="text_ws" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!--
|
||||
A text field that uses WordDelimiterGraphFilter to enable splitting and
|
||||
matching of words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars, so that a query of "wifi" or "wi fi" could
|
||||
match a document containing "Wi-Fi". Synonyms and stopwords are
|
||||
customized by external files, and stemming is enabled.
|
||||
-->
|
||||
<fieldType name="text" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<!--
|
||||
in this example, we will only use synonyms at query time
|
||||
<filter class="solr.SynonymGraphFilterFactory"
|
||||
synonyms="index_synonyms.txt" ignoreCase="true"
|
||||
expand="false"/>
|
||||
-->
|
||||
<!--
|
||||
Case insensitive stop word removal. add
|
||||
-->
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
|
||||
ignoreCase="true" expand="true" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!--
|
||||
Less flexible matching, but less false matches. Probably not ideal
|
||||
for product names, but may be good for SKUs. Can insert dashes in
|
||||
the wrong place and still match.
|
||||
-->
|
||||
<fieldType name="textTight" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
|
||||
ignoreCase="true" expand="false" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
|
||||
<!--
|
||||
this filter can remove any duplicate tokens that appear at the
|
||||
same position - sometimes possible with WordDelimiterGraphFilter in
|
||||
conjuncton with stemming.
|
||||
-->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
|
||||
ignoreCase="true" expand="false" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!--
|
||||
A general unstemmed text field - good if one does not know the
|
||||
language of the field
|
||||
-->
|
||||
<fieldType name="textgen" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
|
||||
ignoreCase="true" expand="true" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!--
|
||||
A general unstemmed text field that indexes tokens normally and
|
||||
also reversed (via ReversedWildcardFilterFactory), to enable more
|
||||
efficient leading wildcard queries.
|
||||
-->
|
||||
<fieldType name="text_rev" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<filter class="solr.ReversedWildcardFilterFactory"
|
||||
withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2"
|
||||
maxFractionAsterisk="0.33" />
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
|
||||
ignoreCase="true" expand="true" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- charFilter + WhitespaceTokenizer -->
|
||||
<!--
|
||||
<fieldType name="textCharNorm" class="solr.TextField"
|
||||
positionIncrementGap="100" > <analyzer> <charFilter
|
||||
class="solr.MappingCharFilterFactory"
|
||||
mapping="mapping-ISOLatin1Accent.txt"/> <tokenizer
|
||||
class="solr.MockTokenizerFactory"/> </analyzer> </fieldType>
|
||||
-->
|
||||
|
||||
<!--
|
||||
This is an example of using the KeywordTokenizer along With
|
||||
various TokenFilterFactories to produce a sortable field that does
|
||||
not include some properties of the source text
|
||||
-->
|
||||
<fieldType name="alphaOnlySort" class="solr.TextField"
|
||||
sortMissingLast="true" omitNorms="true">
|
||||
<analyzer>
|
||||
<!--
|
||||
KeywordTokenizer does no actual tokenizing, so the entire
|
||||
input string is preserved as a single token
|
||||
-->
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<!--
|
||||
The LowerCase TokenFilter does what you expect, which can be
|
||||
when you want your sorting to be case insensitive
|
||||
-->
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<!-- The TrimFilter removes any leading or trailing whitespace -->
|
||||
<filter class="solr.TrimFilterFactory" />
|
||||
<!--
|
||||
The PatternReplaceFilter gives you the flexibility to use Java
|
||||
Regular expression to replace any sequence of characters
|
||||
matching a pattern with an arbitrary replacement string, which
|
||||
may include back references to portions of the original string
|
||||
matched by the pattern. See the Java Regular Expression
|
||||
documentation for more information on pattern and replacement
|
||||
string syntax.
|
||||
|
||||
http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html
|
||||
-->
|
||||
<filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])"
|
||||
replacement="" replace="all" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="phonetic" stored="false" indexed="true"
|
||||
class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory" />
|
||||
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="payloads" stored="false" indexed="true"
|
||||
class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<!--
|
||||
The DelimitedPayloadTokenFilter can put payloads on tokens...
|
||||
for example, a token of "foo|1.4" would be indexed as "foo"
|
||||
with a payload of 1.4f Attributes of the
|
||||
DelimitedPayloadTokenFilterFactory : "delimiter" - a one
|
||||
character delimiter. Default is | (pipe) "encoder" - how to
|
||||
encode the following value into a playload float ->
|
||||
org.apache.lucene.analysis.payloads.FloatEncoder, integer ->
|
||||
o.a.l.a.p.IntegerEncoder identity -> o.a.l.a.p.IdentityEncoder
|
||||
Fully Qualified class name implementing PayloadEncoder,
|
||||
Encoder must have a no arg constructor.
|
||||
-->
|
||||
<filter class="solr.DelimitedPayloadTokenFilterFactory"
|
||||
encoder="float" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!--
|
||||
lowercases the entire field value, keeping it as a single token.
|
||||
-->
|
||||
<fieldType name="lowercase" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!--
|
||||
since fields of this type are by default not stored or indexed,
|
||||
any data added to them will be ignored outright.
|
||||
-->
|
||||
<fieldType name="ignored" stored="false" indexed="false"
|
||||
multiValued="true" class="solr.StrField" />
|
||||
|
||||
|
||||
|
||||
<!--
|
||||
Valid attributes for fields: name: mandatory - the name for the
|
||||
field type: mandatory - the name of a previously defined type from
|
||||
the <fieldType>s indexed: true if this field should be indexed
|
||||
(searchable or sortable) stored: true if this field should be
|
||||
retrievable multiValued: true if this field may contain multiple
|
||||
values per document omitNorms: (expert) set to true to omit the
|
||||
norms associated with this field (this disables length
|
||||
normalization and index-time boosting for the field, and saves
|
||||
some memory). Only full-text fields or fields that need an
|
||||
index-time boost need norms. termVectors: [false] set to true to
|
||||
store the term vector for a given field. When using MoreLikeThis,
|
||||
fields used for similarity should be stored for best performance.
|
||||
termPositions: Store position information with the term vector.
|
||||
This will increase storage costs. termOffsets: Store offset
|
||||
information with the term vector. This will increase storage
|
||||
costs. default: a value that should be used if no value is
|
||||
specified when adding a document.
|
||||
-->
|
||||
<field name="id" type="string" indexed="true" stored="true"
|
||||
required="true" />
|
||||
<field name="sku" type="textTight" indexed="true" stored="true"
|
||||
omitNorms="true" />
|
||||
<field name="name" type="textgen" indexed="true" stored="true" />
|
||||
<field name="alphaNameSort" type="alphaOnlySort" indexed="true"
|
||||
stored="false" />
|
||||
<field name="manu" type="textgen" indexed="true" stored="true"
|
||||
omitNorms="true" />
|
||||
<field name="cat" type="text_ws" indexed="true" stored="true"
|
||||
multiValued="true" omitNorms="true" />
|
||||
<field name="features" type="text" indexed="true" stored="true"
|
||||
multiValued="true" />
|
||||
<field name="includes" type="text" indexed="true" stored="true"
|
||||
termVectors="true" termPositions="true" termOffsets="true" />
|
||||
|
||||
<field name="weight" type="float" indexed="true" stored="true" />
|
||||
<field name="price" type="float" indexed="true" stored="true" />
|
||||
<field name="popularity" type="int" indexed="true" stored="true" />
|
||||
<field name="inStock" type="boolean" indexed="true" stored="true" />
|
||||
|
||||
|
||||
<!--
|
||||
Common metadata fields, named specifically to match up with
|
||||
SolrCell metadata when parsing rich documents such as Word, PDF.
|
||||
Some fields are multiValued only because Tika currently may return
|
||||
multiple values for them.
|
||||
-->
|
||||
<field name="title" type="text" indexed="true" stored="true"
|
||||
multiValued="true" />
|
||||
<field name="subject" type="text" indexed="true" stored="true" />
|
||||
<field name="description" type="text" indexed="true" stored="true" />
|
||||
<field name="comments" type="text" indexed="true" stored="true" />
|
||||
<field name="author" type="textgen" indexed="true" stored="true" />
|
||||
<field name="keywords" type="textgen" indexed="true" stored="true" />
|
||||
<field name="category" type="textgen" indexed="true" stored="true" />
|
||||
<field name="content_type" type="string" indexed="true"
|
||||
stored="true" multiValued="true" />
|
||||
<field name="last_modified" type="date" indexed="true" stored="true" />
|
||||
<field name="links" type="string" indexed="true" stored="true"
|
||||
multiValued="true" />
|
||||
|
||||
|
||||
<!--
|
||||
catchall field, containing all other searchable text fields
|
||||
(implemented via copyField further on in this schema
|
||||
-->
|
||||
<field name="text" type="text" indexed="true" stored="false"
|
||||
multiValued="true" />
|
||||
|
||||
<!--
|
||||
catchall text field that indexes tokens both normally and in
|
||||
reverse for efficient leading wildcard queries.
|
||||
-->
|
||||
<field name="text_rev" type="text_rev" indexed="true" stored="false"
|
||||
multiValued="true" />
|
||||
|
||||
<!--
|
||||
non-tokenized version of manufacturer to make it easier to sort or
|
||||
group results by manufacturer. copied from "manu" via copyField
|
||||
-->
|
||||
<field name="manu_exact" type="string" indexed="true" stored="false" />
|
||||
|
||||
<field name="payloads" type="payloads" indexed="true" stored="true" />
|
||||
|
||||
<!--
|
||||
Uncommenting the following will create a "timestamp" field using a
|
||||
default value of "NOW" to indicate when each document was indexed.
|
||||
-->
|
||||
<!--
|
||||
<field name="timestamp" type="date" indexed="true" stored="true"
|
||||
default="NOW" multiValued="false"/>
|
||||
-->
|
||||
|
||||
<field name="language" type="string" indexed="true" stored="true" required="false"/>
|
||||
<field name="sentence" type="text" indexed="true" stored="true" multiValued="true" required="false" />
|
||||
<field name="sentiment" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="entity" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<!--
|
||||
Dynamic field definitions. If a field name is not found,
|
||||
dynamicFields will be used if the name matches any of the
|
||||
patterns. RESTRICTION: the glob-like pattern in the name attribute
|
||||
must have a "*" only at the start or the end. EXAMPLE: name="*_i"
|
||||
will match any field ending in _i (like myid_i, z_i) Longer
|
||||
patterns will be matched first. if equal size patterns both match,
|
||||
the first appearing in the schema will be used. <dynamicField
|
||||
name="*_i" type="int" indexed="true" stored="true"/> <dynamicField
|
||||
name="*_s" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_l" type="long" indexed="true"
|
||||
stored="true"/> <dynamicField name="*_t" type="text"
|
||||
indexed="true" stored="true"/> <dynamicField name="*_b"
|
||||
type="boolean" indexed="true" stored="true"/> <dynamicField
|
||||
name="*_f" type="float" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_d" type="double" indexed="true"
|
||||
stored="true"/> <dynamicField name="*_dt" type="date"
|
||||
indexed="true" stored="true"/> <dynamicField name="*_ti"
|
||||
type="tint" indexed="true" stored="true"/> <dynamicField
|
||||
name="*_tl" type="tlong" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tf" type="tfloat" indexed="true"
|
||||
stored="true"/> <dynamicField name="*_td" type="tdouble"
|
||||
indexed="true" stored="true"/> <dynamicField name="*_tdt"
|
||||
type="tdate" indexed="true" stored="true"/>
|
||||
|
||||
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
|
||||
<dynamicField name="attr_*" type="textgen" indexed="true"
|
||||
stored="true" multiValued="true"/> <dynamicField name="random_*"
|
||||
type="random" />
|
||||
-->
|
||||
<dynamicField name="*_sm" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<!--
|
||||
uncomment the following to ignore any fields that don't already
|
||||
match an existing field name or dynamic field, rather than
|
||||
reporting them as an error. alternately, change the type="ignored"
|
||||
to some other type e.g. "text" if you want unknown fields indexed
|
||||
and/or stored by default
|
||||
-->
|
||||
<!--dynamicField name="*" type="ignored" multiValued="true" /-->
|
||||
|
||||
|
||||
<!--
|
||||
Field to use to determine and enforce document uniqueness. Unless
|
||||
this field is marked with required="false", it will be a required
|
||||
field
|
||||
-->
|
||||
<uniqueKey>id</uniqueKey>
|
||||
|
||||
<!--
|
||||
copyField commands copy one field to another at the time a document
|
||||
is added to the index. It's used either to index the same field
|
||||
differently, or to add multiple fields to the same field for
|
||||
easier/faster searching.
|
||||
-->
|
||||
|
||||
<copyField source="cat" dest="text" />
|
||||
<copyField source="name" dest="text" />
|
||||
<copyField source="manu" dest="text" />
|
||||
<copyField source="features" dest="text" />
|
||||
<copyField source="includes" dest="text" />
|
||||
<copyField source="manu" dest="manu_exact" />
|
||||
|
||||
|
||||
<!--copyField source="Titolo" dest="text"/-->
|
||||
|
||||
<!--
|
||||
Above, multiple source fields are copied to the [text] field.
|
||||
Another way to map multiple source fields to the same destination
|
||||
field is to use the dynamic field syntax. copyField also supports a
|
||||
maxChars to copy setting.
|
||||
-->
|
||||
|
||||
<!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
|
||||
|
||||
<!--
|
||||
copy name to alphaNameSort, a field designed for sorting by name
|
||||
-->
|
||||
<!-- <copyField source="name" dest="alphaNameSort"/> -->
|
||||
|
||||
</schema>
|
|
@ -1,773 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version
|
||||
2.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 Unless required by
|
||||
applicable law or agreed to in writing, software distributed under
|
||||
the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
|
||||
OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<!--
|
||||
For more details about configurations options that may appear in
|
||||
this file, see http://wiki.apache.org/solr/SolrConfigXml.
|
||||
|
||||
Specifically, the Solr Config can support XInclude, which may make
|
||||
it easier to manage the configuration. See
|
||||
https://issues.apache.org/jira/browse/SOLR-1167
|
||||
-->
|
||||
<config xmlns:xi="http://www.w3.org/2001/XInclude">
|
||||
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
|
||||
<indexConfig>
|
||||
<useCompoundFile>${useCompoundFile:false}</useCompoundFile>
|
||||
</indexConfig>
|
||||
<!--
|
||||
lib directives can be used to instruct Solr to load any Jars
|
||||
identified and use them to resolve any "plugins" specified in your
|
||||
solrconfig.xml or schema.xml (ie: Analyzers, Request Handlers,
|
||||
etc...). All directories and paths are resolved relative the
|
||||
instanceDir. If a "./lib" directory exists in your instanceDir, all
|
||||
files found in it are included as if you had used the following
|
||||
syntax... <lib dir="./lib" />
|
||||
-->
|
||||
<!--
|
||||
A dir option by itself adds any files found in the directory to the
|
||||
classpath, this is useful for including all jars in a directory.
|
||||
-->
|
||||
<lib dir="../../contrib/extraction/lib" />
|
||||
<!--
|
||||
When a regex is specified in addition to a directory, only the files
|
||||
in that directory which completely match the regex (anchored on both
|
||||
ends) will be included.
|
||||
-->
|
||||
<lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
|
||||
<!--
|
||||
If a dir option (with or without a regex) is used and nothing is
|
||||
found that matches, it will be ignored
|
||||
-->
|
||||
<lib dir="/total/crap/dir/ignored" />
|
||||
<!--
|
||||
an exact path can be used to specify a specific file. This will
|
||||
cause a serious error to be logged if it can't be loaded. <lib
|
||||
path="../a-jar-that-does-not-exist.jar" />
|
||||
-->
|
||||
|
||||
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
|
||||
<schemaFactory class="ClassicIndexSchemaFactory"/>
|
||||
|
||||
<!--
|
||||
Used to specify an alternate directory to hold all index data other
|
||||
than the default ./data under the Solr home. If replication is in
|
||||
use, this should match the replication configuration.
|
||||
-->
|
||||
<dataDir>${solr.data.dir:}</dataDir>
|
||||
|
||||
|
||||
<!--
|
||||
Enables JMX if and only if an existing MBeanServer is found, use
|
||||
this if you want to configure JMX through JVM parameters. Remove
|
||||
this to disable exposing Solr configuration and statistics to JMX.
|
||||
|
||||
If you want to connect to a particular server, specify the agentId
|
||||
e.g. <jmx agentId="myAgent" /> If you want to start a new
|
||||
MBeanServer, specify the serviceUrl e.g <jmx
|
||||
serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> For
|
||||
more details see http://wiki.apache.org/solr/SolrJmx
|
||||
-->
|
||||
<jmx />
|
||||
|
||||
<!-- the default high-performance update handler -->
|
||||
<updateHandler class="solr.DirectUpdateHandler2">
|
||||
<!--
|
||||
A prefix of "solr." for class names is an alias that causes solr
|
||||
to search appropriate packages, including
|
||||
org.apache.solr.(search|update|request|core|analysis)
|
||||
-->
|
||||
|
||||
<!--
|
||||
Perform a <commit/> automatically under certain conditions:
|
||||
maxDocs - number of updates since last commit is greater than this
|
||||
maxTime - oldest uncommited update (in ms) is this long ago
|
||||
Instead of enabling autoCommit, consider using "commitWithin" when
|
||||
adding documents. http://wiki.apache.org/solr/UpdateXmlMessages
|
||||
<autoCommit> <maxDocs>10000</maxDocs> <maxTime>1000</maxTime>
|
||||
</autoCommit>
|
||||
-->
|
||||
|
||||
</updateHandler>
|
||||
|
||||
<!--
|
||||
Use the following format to specify a custom IndexReaderFactory -
|
||||
allows for alternate IndexReader implementations. ** Experimental
|
||||
Feature ** Please note - Using a custom IndexReaderFactory may
|
||||
prevent certain other features from working. The API to
|
||||
IndexReaderFactory may change without warning or may even be removed
|
||||
from future releases if the problems cannot be resolved. ** Features
|
||||
that may not work with custom IndexReaderFactory ** The
|
||||
ReplicationHandler assumes a disk-resident index. Using a custom
|
||||
IndexReader implementation may cause incompatibility with
|
||||
ReplicationHandler and may cause replication to not work correctly.
|
||||
See SOLR-1366 for details. <indexReaderFactory
|
||||
name="IndexReaderFactory" class="package.class"> Parameters as
|
||||
required by the implementation </indexReaderFactory >
|
||||
-->
|
||||
<!-- To set the termInfosIndexDivisor, do this: -->
|
||||
<!--
|
||||
<indexReaderFactory name="IndexReaderFactory"
|
||||
class="org.apache.solr.core.StandardIndexReaderFactory"> <int
|
||||
name="setTermIndexDivisor">12</int> </indexReaderFactory >
|
||||
-->
|
||||
|
||||
|
||||
<query>
|
||||
<!--
|
||||
Maximum number of clauses in a boolean query... in the past, this
|
||||
affected range or prefix queries that expanded to big boolean
|
||||
queries - built in Solr query parsers no longer create queries
|
||||
with this limitation. An exception is thrown if exceeded.
|
||||
-->
|
||||
<maxBooleanClauses>1024</maxBooleanClauses>
|
||||
|
||||
|
||||
<!--
|
||||
There are two implementations of cache available for Solr,
|
||||
LRUCache, based on a synchronized LinkedHashMap, and FastLRUCache,
|
||||
based on a ConcurrentHashMap. FastLRUCache has faster gets and
|
||||
slower puts in single threaded operation and thus is generally
|
||||
faster than LRUCache when the hit ratio of the cache is high (>
|
||||
75%), and may be faster under other scenarios on multi-cpu
|
||||
systems.
|
||||
-->
|
||||
<!--
|
||||
Cache used by SolrIndexSearcher for filters (DocSets), unordered
|
||||
sets of *all* documents that match a query. When a new searcher is
|
||||
opened, its caches may be prepopulated or "autowarmed" using data
|
||||
from caches in the old searcher. autowarmCount is the number of
|
||||
items to prepopulate. For LRUCache, the autowarmed items will be
|
||||
the most recently accessed items. Parameters: class - the
|
||||
SolrCache implementation LRUCache or FastLRUCache size - the
|
||||
maximum number of entries in the cache initialSize - the initial
|
||||
capacity (number of entries) of the cache. (seel
|
||||
java.util.HashMap) autowarmCount - the number of entries to
|
||||
prepopulate from and old cache.
|
||||
-->
|
||||
<filterCache class="solr.FastLRUCache" size="512"
|
||||
initialSize="512" autowarmCount="0" />
|
||||
|
||||
<!--
|
||||
Cache used to hold field values that are quickly accessible by
|
||||
document id. The fieldValueCache is created by default even if not
|
||||
configured here. <fieldValueCache class="solr.FastLRUCache"
|
||||
size="512" autowarmCount="128" showItems="32" />
|
||||
-->
|
||||
|
||||
<!--
|
||||
queryResultCache caches results of searches - ordered lists of
|
||||
document ids (DocList) based on a query, a sort, and the range of
|
||||
documents requested.
|
||||
-->
|
||||
<queryResultCache class="solr.LRUCache" size="512"
|
||||
initialSize="512" autowarmCount="0" />
|
||||
|
||||
<!--
|
||||
documentCache caches Lucene Document objects (the stored fields
|
||||
for each document). Since Lucene internal document ids are
|
||||
transient, this cache will not be autowarmed.
|
||||
-->
|
||||
<documentCache class="solr.LRUCache" size="512"
|
||||
initialSize="512" autowarmCount="0" />
|
||||
|
||||
<!--
|
||||
If true, stored fields that are not requested will be loaded
|
||||
lazily. This can result in a significant speed improvement if the
|
||||
usual case is to not load all stored fields, especially if the
|
||||
skipped fields are large compressed text fields.
|
||||
-->
|
||||
<enableLazyFieldLoading>true</enableLazyFieldLoading>
|
||||
|
||||
<!--
|
||||
Example of a generic cache. These caches may be accessed by name
|
||||
through SolrIndexSearcher.getCache(),cacheLookup(), and
|
||||
cacheInsert(). The purpose is to enable easy caching of
|
||||
user/application level data. The regenerator argument should be
|
||||
specified as an implementation of solr.search.CacheRegenerator if
|
||||
autowarming is desired.
|
||||
-->
|
||||
<!--
|
||||
<cache name="myUserCache" class="solr.LRUCache" size="4096"
|
||||
initialSize="1024" autowarmCount="1024"
|
||||
regenerator="org.mycompany.mypackage.MyRegenerator" />
|
||||
-->
|
||||
|
||||
<!--
|
||||
An optimization that attempts to use a filter to satisfy a search.
|
||||
If the requested sort does not include score, then the filterCache
|
||||
will be checked for a filter matching the query. If found, the
|
||||
filter will be used as the source of document ids, and then the
|
||||
sort will be applied to that.
|
||||
<useFilterForSortedQuery>true</useFilterForSortedQuery>
|
||||
-->
|
||||
|
||||
<!--
|
||||
An optimization for use with the queryResultCache. When a search
|
||||
is requested, a superset of the requested number of document ids
|
||||
are collected. For example, if a search for a particular query
|
||||
requests matching documents 10 through 19, and queryWindowSize is
|
||||
50, then documents 0 through 49 will be collected and cached. Any
|
||||
further requests in that range can be satisfied via the cache.
|
||||
-->
|
||||
<queryResultWindowSize>20</queryResultWindowSize>
|
||||
|
||||
<!--
|
||||
Maximum number of documents to cache for any entry in the
|
||||
queryResultCache.
|
||||
-->
|
||||
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
|
||||
|
||||
<!--
|
||||
a newSearcher event is fired whenever a new searcher is being
|
||||
prepared and there is a current searcher handling requests (aka
|
||||
registered). It can be used to prime certain caches to prevent
|
||||
long request times for certain requests.
|
||||
-->
|
||||
<!--
|
||||
QuerySenderListener takes an array of NamedList and executes a
|
||||
local query request for each NamedList in sequence.
|
||||
-->
|
||||
<listener event="newSearcher" class="solr.QuerySenderListener">
|
||||
<arr name="queries">
|
||||
<!--
|
||||
<lst> <str name="q">solr</str> <str name="start">0</str> <str
|
||||
name="rows">10</str> </lst> <lst> <str name="q">rocks</str>
|
||||
<str name="start">0</str> <str name="rows">10</str> </lst>
|
||||
<lst><str name="q">static newSearcher warming query from
|
||||
solrconfig.xml</str></lst>
|
||||
-->
|
||||
</arr>
|
||||
</listener>
|
||||
|
||||
<!--
|
||||
a firstSearcher event is fired whenever a new searcher is being
|
||||
prepared but there is no current registered searcher to handle
|
||||
requests or to gain autowarming data from.
|
||||
-->
|
||||
<listener event="firstSearcher" class="solr.QuerySenderListener">
|
||||
<arr name="queries">
|
||||
<lst>
|
||||
<str name="q">solr rocks</str>
|
||||
<str name="start">0</str>
|
||||
<str name="rows">10</str>
|
||||
</lst>
|
||||
<lst>
|
||||
<str name="q">static firstSearcher warming query from
|
||||
solrconfig.xml</str>
|
||||
</lst>
|
||||
</arr>
|
||||
</listener>
|
||||
|
||||
<!--
|
||||
If a search request comes in and there is no current registered
|
||||
searcher, then immediately register the still warming searcher and
|
||||
use it. If "false" then all requests will block until the first
|
||||
searcher is done warming.
|
||||
-->
|
||||
<useColdSearcher>false</useColdSearcher>
|
||||
|
||||
<!--
|
||||
Maximum number of searchers that may be warming in the background
|
||||
concurrently. An error is returned if this limit is exceeded.
|
||||
Recommend 1-2 for read-only slaves, higher for masters w/o cache
|
||||
warming.
|
||||
-->
|
||||
<maxWarmingSearchers>2</maxWarmingSearchers>
|
||||
|
||||
</query>
|
||||
|
||||
<requestDispatcher>
|
||||
<!--
|
||||
Make sure your system has some authentication before enabling
|
||||
remote streaming!
|
||||
-->
|
||||
<requestParsers enableRemoteStreaming="false"
|
||||
multipartUploadLimitInKB="-1" />
|
||||
|
||||
<!--
|
||||
Set HTTP caching related parameters (for proxy caches and
|
||||
clients). To get the behaviour of Solr 1.2 (ie: no caching related
|
||||
headers) use the never304="true" option and do not specify a value
|
||||
for <cacheControl>
|
||||
-->
|
||||
<!-- <httpCaching never304="true"> -->
|
||||
<httpCaching lastModifiedFrom="openTime" etagSeed="Solr">
|
||||
<!--
|
||||
lastModFrom="openTime" is the default, the Last-Modified value
|
||||
(and validation against If-Modified-Since requests) will all be
|
||||
relative to when the current Searcher was opened. You can change
|
||||
it to lastModFrom="dirLastMod" if you want the value to exactly
|
||||
corrispond to when the physical index was last modified.
|
||||
|
||||
etagSeed="..." is an option you can change to force the ETag
|
||||
header (and validation against If-None-Match requests) to be
|
||||
differnet even if the index has not changed (ie: when making
|
||||
significant changes to your config file) lastModifiedFrom and
|
||||
etagSeed are both ignored if you use the never304="true" option.
|
||||
-->
|
||||
<!--
|
||||
If you include a <cacheControl> directive, it will be used to
|
||||
generate a Cache-Control header, as well as an Expires header if
|
||||
the value contains "max-age=" By default, no Cache-Control
|
||||
header is generated. You can use the <cacheControl> option even
|
||||
if you have set never304="true"
|
||||
-->
|
||||
<!-- <cacheControl>max-age=30, public</cacheControl> -->
|
||||
</httpCaching>
|
||||
</requestDispatcher>
|
||||
|
||||
<requestHandler name="/select" class="solr.SearchHandler">
|
||||
<!-- default values for query parameters -->
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
<!--
|
||||
<int name="rows">10</int> <str name="fl">*</str> <str
|
||||
name="version">2.1</str>
|
||||
-->
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
<!--
|
||||
DisMaxRequestHandler allows easy searching across multiple fields
|
||||
for simple user-entered phrases. Its implementation is now just the
|
||||
standard SearchHandler with a default query parser of "dismax". see
|
||||
http://wiki.apache.org/solr/DisMaxRequestHandler
|
||||
-->
|
||||
<requestHandler name="/dismax" class="solr.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="defType">dismax</str>
|
||||
<str name="echoParams">explicit</str>
|
||||
<float name="tie">0.01</float>
|
||||
<str name="qf">
|
||||
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0
|
||||
manu^1.1 cat^1.4
|
||||
</str>
|
||||
<str name="pf">
|
||||
text^0.2 features^1.1 name^1.5 manu^1.4
|
||||
manu_exact^1.9
|
||||
</str>
|
||||
<str name="bf">
|
||||
popularity^0.5 recip(price,1,1000,1000)^0.3
|
||||
</str>
|
||||
<str name="fl">
|
||||
id,name,price,score
|
||||
</str>
|
||||
<str name="mm">
|
||||
2<-1 5<-2 6<90% </str>
|
||||
<int name="ps">100</int>
|
||||
<str name="q.alt">*:*</str>
|
||||
<!-- example highlighter config, enable per-query with hl=true -->
|
||||
<str name="hl.fl">text features name</str>
|
||||
<!-- for this field, we want no fragmenting, just highlighting -->
|
||||
<str name="f.name.hl.fragsize">0</str>
|
||||
<!--
|
||||
instructs Solr to return the field itself if no query terms are
|
||||
found
|
||||
-->
|
||||
<str name="f.name.hl.alternateField">name</str>
|
||||
<str name="f.text.hl.fragmenter">regex</str> <!-- defined below -->
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
<!--
|
||||
Note how you can register the same handler multiple times with
|
||||
different names (and different init parameters)
|
||||
-->
|
||||
<requestHandler name="/partitioned" class="solr.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="defType">dismax</str>
|
||||
<str name="echoParams">explicit</str>
|
||||
<str name="qf">text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0</str>
|
||||
<str name="mm">2<-1 5<-2 6<90%</str>
|
||||
<!--
|
||||
This is an example of using Date Math to specify a constantly
|
||||
moving date range in a config...
|
||||
-->
|
||||
<str name="bq">incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2</str>
|
||||
</lst>
|
||||
<!--
|
||||
In addition to defaults, "appends" params can be specified to
|
||||
identify values which should be appended to the list of multi-val
|
||||
params from the query (or the existing "defaults"). In this
|
||||
example, the param "fq=instock:true" will be appended to any query
|
||||
time fq params the user may specify, as a mechanism for
|
||||
partitioning the index, independent of any user selected filtering
|
||||
that may also be desired (perhaps as a result of faceted
|
||||
searching). NOTE: there is *absolutely* nothing a client can do to
|
||||
prevent these "appends" values from being used, so don't use this
|
||||
mechanism unless you are sure you always want it.
|
||||
-->
|
||||
<lst name="appends">
|
||||
<str name="fq">inStock:true</str>
|
||||
</lst>
|
||||
<!--
|
||||
"invariants" are a way of letting the Solr maintainer lock down
|
||||
the options available to Solr clients. Any params values specified
|
||||
here are used regardless of what values may be specified in either
|
||||
the query, the "defaults", or the "appends" params. In this
|
||||
example, the facet.field and facet.query params are fixed,
|
||||
limiting the facets clients can use. Faceting is not turned on by
|
||||
default - but if the client does specify facet=true in the
|
||||
request, these are the only facets they will be able to see counts
|
||||
for; regardless of what other facet.field or facet.query params
|
||||
they may specify. NOTE: there is *absolutely* nothing a client can
|
||||
do to prevent these "invariants" values from being used, so don't
|
||||
use this mechanism unless you are sure you always want it.
|
||||
-->
|
||||
<lst name="invariants">
|
||||
<str name="facet.field">cat</str>
|
||||
<str name="facet.field">manu_exact</str>
|
||||
<str name="facet.query">price:[* TO 500]</str>
|
||||
<str name="facet.query">price:[500 TO *]</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!--
|
||||
Search components are registered to SolrCore and used by Search
|
||||
Handlers By default, the following components are avaliable:
|
||||
|
||||
<searchComponent name="query"
|
||||
class="org.apache.solr.handler.component.QueryComponent" />
|
||||
<searchComponent name="facet"
|
||||
class="org.apache.solr.handler.component.FacetComponent" />
|
||||
<searchComponent name="mlt"
|
||||
class="org.apache.solr.handler.component.MoreLikeThisComponent" />
|
||||
<searchComponent name="highlight"
|
||||
class="org.apache.solr.handler.component.HighlightComponent" />
|
||||
<searchComponent name="stats"
|
||||
class="org.apache.solr.handler.component.StatsComponent" />
|
||||
<searchComponent name="debug"
|
||||
class="org.apache.solr.handler.component.DebugComponent" /> Default
|
||||
configuration in a requestHandler would look like: <arr
|
||||
name="components"> <str>query</str> <str>facet</str> <str>mlt</str>
|
||||
<str>highlight</str> <str>stats</str> <str>debug</str> </arr> If you
|
||||
register a searchComponent to one of the standard names, that will
|
||||
be used instead. To insert components before or after the 'standard'
|
||||
components, use: <arr name="first-components">
|
||||
<str>myFirstComponentName</str> </arr> <arr name="last-components">
|
||||
<str>myLastComponentName</str> </arr>
|
||||
-->
|
||||
|
||||
<!--
|
||||
The spell check component can return a list of alternative spelling
|
||||
suggestions.
|
||||
-->
|
||||
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
|
||||
|
||||
<str name="queryAnalyzerFieldType">textSpell</str>
|
||||
|
||||
<lst name="spellchecker">
|
||||
<str name="name">default</str>
|
||||
<str name="field">name</str>
|
||||
<str name="spellcheckIndexDir">./spellchecker</str>
|
||||
</lst>
|
||||
|
||||
<!--
|
||||
a spellchecker that uses a different distance measure <lst
|
||||
name="spellchecker"> <str name="name">jarowinkler</str> <str
|
||||
name="field">spell</str> <str
|
||||
name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
|
||||
<str name="spellcheckIndexDir">./spellchecker2</str> </lst>
|
||||
-->
|
||||
|
||||
<!--
|
||||
a file based spell checker <lst name="spellchecker"> <str
|
||||
name="classname">solr.FileBasedSpellChecker</str> <str
|
||||
name="name">file</str> <str
|
||||
name="sourceLocation">spellings.txt</str> <str
|
||||
name="characterEncoding">UTF-8</str> <str
|
||||
name="spellcheckIndexDir">./spellcheckerFile</str> </lst>
|
||||
-->
|
||||
</searchComponent>
|
||||
|
||||
<!--
|
||||
A request handler utilizing the spellcheck component.
|
||||
#############################################################################
|
||||
NOTE: This is purely as an example. The whole purpose of the
|
||||
SpellCheckComponent is to hook it into the request handler that
|
||||
handles (i.e. the standard or dismax SearchHandler) queries such
|
||||
that a separate request is not needed to get suggestions. IN OTHER
|
||||
WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU
|
||||
WANT FOR YOUR PRODUCTION SYSTEM!
|
||||
#############################################################################
|
||||
-->
|
||||
<requestHandler name="/spell" class="solr.SearchHandler"
|
||||
lazy="true">
|
||||
<lst name="defaults">
|
||||
<!-- omp = Only More Popular -->
|
||||
<str name="spellcheck.onlyMorePopular">false</str>
|
||||
<!-- exr = Extended Results -->
|
||||
<str name="spellcheck.extendedResults">false</str>
|
||||
<!-- The number of suggestions to return -->
|
||||
<str name="spellcheck.count">1</str>
|
||||
</lst>
|
||||
<arr name="last-components">
|
||||
<str>spellcheck</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<searchComponent name="tvComponent"
|
||||
class="org.apache.solr.handler.component.TermVectorComponent" />
|
||||
<!--
|
||||
A Req Handler for working with the tvComponent. This is purely as an
|
||||
example. You will likely want to add the component to your already
|
||||
specified request handlers.
|
||||
-->
|
||||
<requestHandler name="/tvrh"
|
||||
class="org.apache.solr.handler.component.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<bool name="tv">true</bool>
|
||||
</lst>
|
||||
<arr name="last-components">
|
||||
<str>tvComponent</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<!-- Solr Cell: http://wiki.apache.org/solr/ExtractingRequestHandler -->
|
||||
<requestHandler name="/update/extract"
|
||||
class="org.apache.solr.handler.extraction.ExtractingRequestHandler"
|
||||
startup="lazy">
|
||||
<lst name="defaults">
|
||||
<!--
|
||||
All the main content goes into "text"... if you need to return
|
||||
the extracted text or do highlighting, use a stored field.
|
||||
-->
|
||||
<str name="fmap.content">text</str>
|
||||
<str name="lowernames">true</str>
|
||||
<str name="uprefix">ignored_</str>
|
||||
|
||||
<!-- capture link hrefs but ignore div attributes -->
|
||||
<str name="captureAttr">true</str>
|
||||
<str name="fmap.a">links</str>
|
||||
<str name="fmap.div">ignored_</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!--
|
||||
A component to return terms and document frequency of those terms.
|
||||
This component does not yet support distributed search.
|
||||
-->
|
||||
<searchComponent name="termsComponent"
|
||||
class="org.apache.solr.handler.component.TermsComponent" />
|
||||
|
||||
<requestHandler name="/terms"
|
||||
class="org.apache.solr.handler.component.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<bool name="terms">true</bool>
|
||||
</lst>
|
||||
<arr name="components">
|
||||
<str>termsComponent</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
|
||||
|
||||
<!--
|
||||
Update request handler. Note: Since solr1.1 requestHandlers requires
|
||||
a valid content type header if posted in the body. For example, curl
|
||||
now requires: -H 'Content-type:text/xml; charset=utf-8' The response
|
||||
format differs from solr1.1 formatting and returns a standard error
|
||||
code. To enable solr1.1 behavior, remove the /update handler or
|
||||
change its path
|
||||
-->
|
||||
<requestHandler name="/update" class="solr.UpdateRequestHandler" >
|
||||
<lst name="defaults">
|
||||
<str name="update.chain">uima</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
<!-- CSV update handler, loaded on demand -->
|
||||
<requestHandler name="/update/csv" class="solr.CSVRequestHandler"
|
||||
startup="lazy" />
|
||||
|
||||
<!--
|
||||
An example dedup update processor that creates the "id" field on the
|
||||
fly based on the hash code of some other fields. This example has
|
||||
overwriteDupes set to false since we are using the id field as the
|
||||
signatureField and Solr will maintain uniqueness based on that
|
||||
anyway. You have to link the chain to an update handler above to use
|
||||
it ie: <requestHandler name="/update
|
||||
"class="solr.UpdateRequestHandler"> <lst name="defaults"> <str
|
||||
name="update.chain">dedupe</str> </lst> </requestHandler>
|
||||
-->
|
||||
|
||||
<updateRequestProcessorChain name="uima">
|
||||
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
|
||||
<lst name="uimaConfig">
|
||||
<lst name="runtimeParameters">
|
||||
<int name="ngramsize">3</int>
|
||||
</lst>
|
||||
<str name="analysisEngine">/uima/TestAE.xml</str>
|
||||
<lst name="analyzeFields">
|
||||
<bool name="merge">false</bool>
|
||||
<arr name="fields">
|
||||
<str>text</str>
|
||||
</arr>
|
||||
</lst>
|
||||
<lst name="fieldMappings">
|
||||
<lst name="type">
|
||||
<str name="name">org.apache.uima.SentenceAnnotation</str>
|
||||
<lst name="mapping">
|
||||
<str name="feature">coveredText</str>
|
||||
<str name="field">sentence</str>
|
||||
</lst>
|
||||
</lst>
|
||||
<lst name="type">
|
||||
<str name="name">org.apache.solr.uima.ts.DummySentimentAnnotation</str>
|
||||
<lst name="mapping">
|
||||
<str name="feature">mood</str>
|
||||
<str name="field">sentiment</str>
|
||||
</lst>
|
||||
</lst>
|
||||
<lst name="type">
|
||||
<str name="name">org.apache.solr.uima.ts.EntityAnnotation</str>
|
||||
<lst name="mapping">
|
||||
<str name="feature">entity</str>
|
||||
<str name="fieldNameFeature">name</str>
|
||||
<str name="dynamicField">*_sm</str>
|
||||
</lst>
|
||||
</lst>
|
||||
</lst>
|
||||
</lst>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="uima-multi-map">
|
||||
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
|
||||
<lst name="uimaConfig">
|
||||
<lst name="runtimeParameters">
|
||||
<int name="ngramsize">3</int>
|
||||
</lst>
|
||||
<str name="analysisEngine">/uima/TestAE.xml</str>
|
||||
<lst name="analyzeFields">
|
||||
<bool name="merge">false</bool>
|
||||
<arr name="fields">
|
||||
<str>text</str>
|
||||
</arr>
|
||||
</lst>
|
||||
<lst name="fieldMappings">
|
||||
<lst name="type">
|
||||
<str name="name">a-type-which-can-have-multiple-features</str>
|
||||
<lst name="mapping">
|
||||
<str name="feature">A</str>
|
||||
<str name="field">1</str>
|
||||
</lst>
|
||||
<lst name="mapping">
|
||||
<str name="feature">B</str>
|
||||
<str name="field">2</str>
|
||||
</lst>
|
||||
</lst>
|
||||
</lst>
|
||||
</lst>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="uima-not-ignoreErrors">
|
||||
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
|
||||
<lst name="uimaConfig">
|
||||
<lst name="runtimeParameters">
|
||||
<int name="ngramsize">3</int>
|
||||
</lst>
|
||||
<str name="analysisEngine">/uima/TestExceptionAE.xml</str>
|
||||
<bool name="ignoreErrors">false</bool>
|
||||
<lst name="analyzeFields">
|
||||
<bool name="merge">false</bool>
|
||||
<arr name="fields">
|
||||
<str>text</str>
|
||||
</arr>
|
||||
</lst>
|
||||
<lst name="fieldMappings"/>
|
||||
</lst>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="uima-ignoreErrors">
|
||||
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
|
||||
<lst name="uimaConfig">
|
||||
<lst name="runtimeParameters">
|
||||
<int name="ngramsize">3</int>
|
||||
</lst>
|
||||
<str name="analysisEngine">/uima/TestExceptionAE.xml</str>
|
||||
<bool name="ignoreErrors">true</bool>
|
||||
<!-- This is optional. It is used for logging when text processing fails. Usually, set uniqueKey field name -->
|
||||
<str name="logField">id</str>
|
||||
<lst name="analyzeFields">
|
||||
<bool name="merge">false</bool>
|
||||
<arr name="fields">
|
||||
<str>text</str>
|
||||
</arr>
|
||||
</lst>
|
||||
<lst name="fieldMappings"/>
|
||||
</lst>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<!--
|
||||
queryResponseWriter plugins... query responses will be written using
|
||||
the writer specified by the 'wt' request parameter matching the name
|
||||
of a registered writer. The "default" writer is the default and will
|
||||
be used if 'wt' is not specified in the request. XMLResponseWriter
|
||||
will be used if nothing is specified here. The json, python, and
|
||||
ruby writers are also available by default. <queryResponseWriter
|
||||
name="xml" class="org.apache.solr.request.XMLResponseWriter"
|
||||
default="true"/> <queryResponseWriter name="json"
|
||||
class="org.apache.solr.request.JSONResponseWriter"/>
|
||||
<queryResponseWriter name="python"
|
||||
class="org.apache.solr.request.PythonResponseWriter"/>
|
||||
<queryResponseWriter name="ruby"
|
||||
class="org.apache.solr.request.RubyResponseWriter"/>
|
||||
<queryResponseWriter name="php"
|
||||
class="org.apache.solr.request.PHPResponseWriter"/>
|
||||
<queryResponseWriter name="phps"
|
||||
class="org.apache.solr.request.PHPSerializedResponseWriter"/>
|
||||
|
||||
<queryResponseWriter name="custom"
|
||||
class="com.example.MyResponseWriter"/>
|
||||
-->
|
||||
|
||||
<!--
|
||||
XSLT response writer transforms the XML output by any xslt file
|
||||
found in Solr's conf/xslt directory. Changes to xslt files are
|
||||
checked for every xsltCacheLifetimeSeconds.
|
||||
-->
|
||||
<queryResponseWriter name="xslt"
|
||||
class="org.apache.solr.response.XSLTResponseWriter">
|
||||
<int name="xsltCacheLifetimeSeconds">5</int>
|
||||
</queryResponseWriter>
|
||||
|
||||
|
||||
<!--
|
||||
example of registering a query parser <queryParser name="lucene"
|
||||
class="org.apache.solr.search.LuceneQParserPlugin"/>
|
||||
-->
|
||||
|
||||
<!--
|
||||
example of registering a custom function parser <valueSourceParser
|
||||
name="myfunc" class="com.mycompany.MyValueSourceParser" />
|
||||
-->
|
||||
|
||||
<!-- config for the admin interface -->
|
||||
<admin>
|
||||
<defaultQuery>*</defaultQuery>
|
||||
</admin>
|
||||
|
||||
</config>
|
|
@ -1,2 +0,0 @@
|
|||
pizza
|
||||
history
|
|
@ -1,58 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# a couple of test stopwords to test that the words are really being
|
||||
# configured from this file:
|
||||
stopworda
|
||||
stopwordb
|
||||
|
||||
#Standard english stop words taken from Lucene's StopAnalyzer
|
||||
a
|
||||
an
|
||||
and
|
||||
are
|
||||
as
|
||||
at
|
||||
be
|
||||
but
|
||||
by
|
||||
for
|
||||
if
|
||||
in
|
||||
into
|
||||
is
|
||||
it
|
||||
no
|
||||
not
|
||||
of
|
||||
on
|
||||
or
|
||||
s
|
||||
such
|
||||
t
|
||||
that
|
||||
the
|
||||
their
|
||||
then
|
||||
there
|
||||
these
|
||||
they
|
||||
this
|
||||
to
|
||||
was
|
||||
will
|
||||
with
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
#some test synonym mappings unlikely to appear in real input text
|
||||
aaa => aaaa
|
||||
bbb => bbbb1 bbbb2
|
||||
ccc => cccc1,cccc2
|
||||
a\=>a => b\=>b
|
||||
a\,a => b\,b
|
||||
fooaaa,baraaa,bazaaa
|
||||
|
||||
# Some synonym groups specific to this example
|
||||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
pixima => pixma
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
vbg
|
||||
vbz
|
||||
vbd
|
||||
vbn
|
||||
vb
|
||||
bez
|
||||
cc
|
||||
cd
|
||||
at
|
||||
.
|
||||
:
|
|
@ -1,613 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version
|
||||
2.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 Unless required by
|
||||
applicable law or agreed to in writing, software distributed under
|
||||
the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
|
||||
OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
This is the Solr schema file. This file should be named "schema.xml"
|
||||
and should be in the conf directory under the solr home (i.e.
|
||||
./solr/conf/schema.xml by default) or located where the classloader
|
||||
for the Solr webapp can find it. This example schema is the
|
||||
recommended starting point for users. It should be kept correct and
|
||||
concise, usable out-of-the-box. For more information, on how to
|
||||
customize this file, please see
|
||||
http://wiki.apache.org/solr/SchemaXml PERFORMANCE NOTE: this schema
|
||||
includes many optional features and should not be used for
|
||||
benchmarking. To improve performance one could - set stored="false"
|
||||
for all fields possible (esp large fields) when you only need to
|
||||
search on the field but don't need to return the original value. -
|
||||
set indexed="false" if you don't need to search on the field, but
|
||||
only return the field as a result of searching on other indexed
|
||||
fields. - remove all unneeded copyField statements - for best index
|
||||
size and searching performance, set "index" to false for all general
|
||||
text fields, use copyField to copy them to the catchall "text"
|
||||
field, and use that for searching. - For maximum indexing
|
||||
performance, use the ConcurrentUpdateSolrServer java client. -
|
||||
Remember to run the JVM in server mode, and use a higher logging
|
||||
level that avoids logging every request
|
||||
-->
|
||||
|
||||
<schema name="sample" version="1.2">
|
||||
<!--
|
||||
attribute "name" is the name of this schema and is only used for
|
||||
display purposes. Applications should change this to reflect the
|
||||
nature of the search collection. version="1.2" is Solr's version
|
||||
number for the schema syntax and semantics. It should not normally
|
||||
be changed by applications. 1.0: multiValued attribute did not
|
||||
exist, all fields are multiValued by nature 1.1: multiValued
|
||||
attribute introduced, false by default 1.2: omitTermFreqAndPositions
|
||||
attribute introduced, true by default except for text fields.
|
||||
-->
|
||||
|
||||
<!--
|
||||
field type definitions. The "name" attribute is just a label to be
|
||||
used by field definitions. The "class" attribute and any other
|
||||
attributes determine the real behavior of the fieldType. Class
|
||||
names starting with "solr" refer to java classes in the
|
||||
org.apache.solr.analysis package.
|
||||
-->
|
||||
|
||||
<!--
|
||||
The StrField type is not analyzed, but indexed/stored verbatim. -
|
||||
StrField and TextField support an optional compressThreshold which
|
||||
limits compression (if enabled in the derived fields) to values
|
||||
which exceed a certain size (in characters).
|
||||
-->
|
||||
<fieldType name="string" class="solr.StrField"
|
||||
sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
<!-- boolean type: "true" or "false" -->
|
||||
<fieldType name="boolean" class="solr.BoolField"
|
||||
sortMissingLast="true" omitNorms="true"/>
|
||||
<!--
|
||||
Binary data type. The data should be sent/retrieved in as Base64
|
||||
encoded Strings
|
||||
-->
|
||||
<fieldType name="binary" class="solr.BinaryField"/>
|
||||
|
||||
<!--
|
||||
If sortMissingLast="true", then a sort on this field will cause
|
||||
documents without the field to come after documents with the
|
||||
field, regardless of the requested sort order (asc or desc). - If
|
||||
sortMissingFirst="true", then a sort on this field will cause
|
||||
documents without the field to come before documents with the
|
||||
field, regardless of the requested sort order. - If
|
||||
sortMissingLast="false" and sortMissingFirst="false" (the
|
||||
default), then default lucene sorting will be used which places
|
||||
docs without the field first in an ascending sort and last in a
|
||||
descending sort.
|
||||
-->
|
||||
|
||||
<!--
|
||||
Default numeric field types. For faster range queries, consider
|
||||
the tint/tfloat/tlong/tdouble types.
|
||||
-->
|
||||
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="float" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="double" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
|
||||
<!--
|
||||
Numeric field types that index each value at various levels of
|
||||
precision to accelerate range queries when the number of values
|
||||
between the range endpoints is large. See the javadoc for
|
||||
LegacyNumericRangeQuery for internal implementation details. Smaller
|
||||
precisionStep values (specified in bits) will lead to more tokens
|
||||
indexed per value, slightly larger index size, and faster range
|
||||
queries. A precisionStep of 0 disables indexing at different
|
||||
precision levels.
|
||||
-->
|
||||
<fieldType name="tint" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="tfloat" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="tlong" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="tdouble" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
|
||||
<!--
|
||||
The format for this date field is of the form
|
||||
1995-12-31T23:59:59Z, and is a more restricted form of the
|
||||
canonical representation of dateTime
|
||||
http://www.w3.org/TR/xmlschema-2/#dateTime The trailing "Z"
|
||||
designates UTC time and is mandatory. Optional fractional seconds
|
||||
are allowed: 1995-12-31T23:59:59.999Z All other components are
|
||||
mandatory. Expressions can also be used to denote calculations
|
||||
that should be performed relative to "NOW" to determine the value,
|
||||
ie... NOW/HOUR ... Round to the start of the current hour NOW-1DAY
|
||||
... Exactly 1 day prior to now NOW/DAY+6MONTHS+3DAYS ... 6 months
|
||||
and 3 days in the future from the start of the current day Consult
|
||||
the TrieDateField javadocs for more information. Note: For faster
|
||||
range queries, consider the tdate type
|
||||
-->
|
||||
<fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
|
||||
|
||||
<!--
|
||||
A Trie based date field for faster date range queries and date
|
||||
faceting.
|
||||
-->
|
||||
<fieldType name="tdate" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}"
|
||||
omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
|
||||
|
||||
<!--
|
||||
The "RandomSortField" is not used to store or search any data. You
|
||||
can declare fields of this type it in your schema to generate
|
||||
pseudo-random orderings of your docs for sorting purposes. The
|
||||
ordering is generated based on the field name and the version of
|
||||
the index, As long as the index version remains unchanged, and the
|
||||
same field name is reused, the ordering of the docs will be
|
||||
consistent. If you want different psuedo-random orderings of
|
||||
documents, for the same version of the index, use a dynamicField
|
||||
and change the name
|
||||
-->
|
||||
<fieldType name="random" class="solr.RandomSortField"
|
||||
indexed="true"/>
|
||||
|
||||
<!--
|
||||
solr.TextField allows the specification of custom text analyzers
|
||||
specified as a tokenizer and a list of token filters. Different
|
||||
analyzers may be specified for indexing and querying. The optional
|
||||
positionIncrementGap puts space between multiple fields of this
|
||||
type on the same document, with the purpose of preventing false
|
||||
phrase matching across fields. For more info on customizing your
|
||||
analyzer chain, please see
|
||||
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
||||
-->
|
||||
|
||||
<!--
|
||||
One can also specify an existing Analyzer class that has a default
|
||||
constructor via the class attribute on the analyzer element
|
||||
<fieldType name="text_greek" class="solr.TextField"> <analyzer
|
||||
class="org.apache.lucene.analysis.el.GreekAnalyzer"/> </fieldType>
|
||||
-->
|
||||
|
||||
<!--
|
||||
A text field that only splits on whitespace for exact matching of
|
||||
words
|
||||
-->
|
||||
<fieldType name="text_ws" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!--
|
||||
A text field that uses WordDelimiterGraphFilter to enable splitting and
|
||||
matching of words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars, so that a query of "wifi" or "wi fi" could
|
||||
match a document containing "Wi-Fi". Synonyms and stopwords are
|
||||
customized by external files, and stemming is enabled.
|
||||
-->
|
||||
<fieldType name="text" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<!--
|
||||
in this example, we will only use synonyms at query time
|
||||
<filter class="solr.SynonymGraphFilterFactory"
|
||||
synonyms="index_synonyms.txt" ignoreCase="true"
|
||||
expand="false"/>
|
||||
-->
|
||||
<!--
|
||||
Case insensitive stop word removal. add
|
||||
-->
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!--
|
||||
Less flexible matching, but less false matches. Probably not ideal
|
||||
for product names, but may be good for SKUs. Can insert dashes in
|
||||
the wrong place and still match.
|
||||
-->
|
||||
<fieldType name="textTight" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
||||
<!--
|
||||
this filter can remove any duplicate tokens that appear at the
|
||||
same position - sometimes possible with WordDelimiterGraphFilter in
|
||||
conjuncton with stemming.
|
||||
-->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="uima_sentences" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.UIMAAnnotationsTokenizerFactory"
|
||||
descriptorPath="/uima/AggregateSentenceAE.xml" tokenType="org.apache.uima.SentenceAnnotation"
|
||||
ngramsize="2"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="uima_nouns" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.UIMATypeAwareAnnotationsTokenizerFactory"
|
||||
descriptorPath="/uima/AggregateSentenceAE.xml" tokenType="org.apache.uima.TokenAnnotation"
|
||||
featurePath="posTag"/>
|
||||
<filter class="solr.TypeTokenFilterFactory" types="uima/stoptypes.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!--
|
||||
A general unstemmed text field - good if one does not know the
|
||||
language of the field
|
||||
-->
|
||||
<fieldType name="textgen" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!--
|
||||
A general unstemmed text field that indexes tokens normally and
|
||||
also reversed (via ReversedWildcardFilterFactory), to enable more
|
||||
efficient leading wildcard queries.
|
||||
-->
|
||||
<fieldType name="text_rev" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.ReversedWildcardFilterFactory"
|
||||
withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2"
|
||||
maxFractionAsterisk="0.33"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- charFilter + WhitespaceTokenizer -->
|
||||
<!--
|
||||
<fieldType name="textCharNorm" class="solr.TextField"
|
||||
positionIncrementGap="100" > <analyzer> <charFilter
|
||||
class="solr.MappingCharFilterFactory"
|
||||
mapping="mapping-ISOLatin1Accent.txt"/> <tokenizer
|
||||
class="solr.MockTokenizerFactory"/> </analyzer> </fieldType>
|
||||
-->
|
||||
|
||||
<!--
|
||||
This is an example of using the KeywordTokenizer along With
|
||||
various TokenFilterFactories to produce a sortable field that does
|
||||
not include some properties of the source text
|
||||
-->
|
||||
<fieldType name="alphaOnlySort" class="solr.TextField"
|
||||
sortMissingLast="true" omitNorms="true">
|
||||
<analyzer>
|
||||
<!--
|
||||
KeywordTokenizer does no actual tokenizing, so the entire
|
||||
input string is preserved as a single token
|
||||
-->
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<!--
|
||||
The LowerCase TokenFilter does what you expect, which can be
|
||||
when you want your sorting to be case insensitive
|
||||
-->
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<!-- The TrimFilter removes any leading or trailing whitespace -->
|
||||
<filter class="solr.TrimFilterFactory"/>
|
||||
<!--
|
||||
The PatternReplaceFilter gives you the flexibility to use Java
|
||||
Regular expression to replace any sequence of characters
|
||||
matching a pattern with an arbitrary replacement string, which
|
||||
may include back references to portions of the original string
|
||||
matched by the pattern. See the Java Regular Expression
|
||||
documentation for more information on pattern and replacement
|
||||
string syntax.
|
||||
|
||||
http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html
|
||||
-->
|
||||
<filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])"
|
||||
replacement="" replace="all"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="phonetic" stored="false" indexed="true"
|
||||
class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="payloads" stored="false" indexed="true"
|
||||
class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<!--
|
||||
The DelimitedPayloadTokenFilter can put payloads on tokens...
|
||||
for example, a token of "foo|1.4" would be indexed as "foo"
|
||||
with a payload of 1.4f Attributes of the
|
||||
DelimitedPayloadTokenFilterFactory : "delimiter" - a one
|
||||
character delimiter. Default is | (pipe) "encoder" - how to
|
||||
encode the following value into a playload float ->
|
||||
org.apache.lucene.analysis.payloads.FloatEncoder, integer ->
|
||||
o.a.l.a.p.IntegerEncoder identity -> o.a.l.a.p.IdentityEncoder
|
||||
Fully Qualified class name implementing PayloadEncoder,
|
||||
Encoder must have a no arg constructor.
|
||||
-->
|
||||
<filter class="solr.DelimitedPayloadTokenFilterFactory"
|
||||
encoder="float"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!--
|
||||
lowercases the entire field value, keeping it as a single token.
|
||||
-->
|
||||
<fieldType name="lowercase" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!--
|
||||
since fields of this type are by default not stored or indexed,
|
||||
any data added to them will be ignored outright.
|
||||
-->
|
||||
<fieldType name="ignored" stored="false" indexed="false"
|
||||
multiValued="true" class="solr.StrField"/>
|
||||
|
||||
<!--
|
||||
Valid attributes for fields: name: mandatory - the name for the
|
||||
field type: mandatory - the name of a previously defined type from
|
||||
the <fieldType>s indexed: true if this field should be indexed
|
||||
(searchable or sortable) stored: true if this field should be
|
||||
retrievable multiValued: true if this field may contain multiple
|
||||
values per document omitNorms: (expert) set to true to omit the
|
||||
norms associated with this field (this disables length
|
||||
normalization and index-time boosting for the field, and saves
|
||||
some memory). Only full-text fields or fields that need an
|
||||
index-time boost need norms. termVectors: [false] set to true to
|
||||
store the term vector for a given field. When using MoreLikeThis,
|
||||
fields used for similarity should be stored for best performance.
|
||||
termPositions: Store position information with the term vector.
|
||||
This will increase storage costs. termOffsets: Store offset
|
||||
information with the term vector. This will increase storage
|
||||
costs. default: a value that should be used if no value is
|
||||
specified when adding a document.
|
||||
-->
|
||||
<field name="id" type="string" indexed="true" stored="true"
|
||||
required="true"/>
|
||||
<field name="sku" type="textTight" indexed="true" stored="true"
|
||||
omitNorms="true"/>
|
||||
<field name="name" type="textgen" indexed="true" stored="true"/>
|
||||
<field name="alphaNameSort" type="alphaOnlySort" indexed="true"
|
||||
stored="false"/>
|
||||
<field name="manu" type="textgen" indexed="true" stored="true"
|
||||
omitNorms="true"/>
|
||||
<field name="cat" type="text_ws" indexed="true" stored="true"
|
||||
multiValued="true" omitNorms="true"/>
|
||||
<field name="features" type="text" indexed="true" stored="true"
|
||||
multiValued="true"/>
|
||||
<field name="includes" type="text" indexed="true" stored="true"
|
||||
termVectors="true" termPositions="true" termOffsets="true"/>
|
||||
|
||||
<field name="sentences" type="uima_sentences" indexed="true" stored="true" multiValued="true"
|
||||
termVectors="true" termPositions="true" termOffsets="true"/>
|
||||
<field name="nouns" type="uima_nouns" indexed="true" stored="true" multiValued="true"
|
||||
termVectors="true" termPositions="true" termOffsets="true"/>
|
||||
|
||||
<field name="weight" type="float" indexed="true" stored="true"/>
|
||||
<field name="price" type="float" indexed="true" stored="true"/>
|
||||
<field name="popularity" type="int" indexed="true" stored="true"/>
|
||||
<field name="inStock" type="boolean" indexed="true" stored="true"/>
|
||||
|
||||
|
||||
<!--
|
||||
Common metadata fields, named specifically to match up with
|
||||
SolrCell metadata when parsing rich documents such as Word, PDF.
|
||||
Some fields are multiValued only because Tika currently may return
|
||||
multiple values for them.
|
||||
-->
|
||||
<field name="title" type="text" indexed="true" stored="true"
|
||||
multiValued="true"/>
|
||||
<field name="subject" type="text" indexed="true" stored="true"/>
|
||||
<field name="description" type="text" indexed="true" stored="true"/>
|
||||
<field name="comments" type="text" indexed="true" stored="true"/>
|
||||
<field name="author" type="textgen" indexed="true" stored="true"/>
|
||||
<field name="keywords" type="textgen" indexed="true" stored="true"/>
|
||||
<field name="category" type="textgen" indexed="true" stored="true"/>
|
||||
<field name="content_type" type="string" indexed="true"
|
||||
stored="true" multiValued="true"/>
|
||||
<field name="last_modified" type="date" indexed="true" stored="true"/>
|
||||
<field name="links" type="string" indexed="true" stored="true"
|
||||
multiValued="true"/>
|
||||
|
||||
|
||||
<!--
|
||||
catchall field, containing all other searchable text fields
|
||||
(implemented via copyField further on in this schema
|
||||
-->
|
||||
<field name="text" type="text" indexed="true" stored="false"
|
||||
multiValued="true"/>
|
||||
|
||||
<!--
|
||||
catchall text field that indexes tokens both normally and in
|
||||
reverse for efficient leading wildcard queries.
|
||||
-->
|
||||
<field name="text_rev" type="text_rev" indexed="true" stored="false"
|
||||
multiValued="true"/>
|
||||
|
||||
<!--
|
||||
non-tokenized version of manufacturer to make it easier to sort or
|
||||
group results by manufacturer. copied from "manu" via copyField
|
||||
-->
|
||||
<field name="manu_exact" type="string" indexed="true" stored="false"/>
|
||||
|
||||
<field name="payloads" type="payloads" indexed="true" stored="true"/>
|
||||
|
||||
<!--
|
||||
Uncommenting the following will create a "timestamp" field using a
|
||||
default value of "NOW" to indicate when each document was indexed.
|
||||
-->
|
||||
<!--
|
||||
<field name="timestamp" type="date" indexed="true" stored="true"
|
||||
default="NOW" multiValued="false"/>
|
||||
-->
|
||||
|
||||
<field name="language" type="string" indexed="true" stored="true" required="false"/>
|
||||
<field name="sentence" type="text" indexed="true" stored="true" multiValued="true" required="false"/>
|
||||
<field name="sentiment" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="entity" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<!--
|
||||
Dynamic field definitions. If a field name is not found,
|
||||
dynamicFields will be used if the name matches any of the
|
||||
patterns. RESTRICTION: the glob-like pattern in the name attribute
|
||||
must have a "*" only at the start or the end. EXAMPLE: name="*_i"
|
||||
will match any field ending in _i (like myid_i, z_i) Longer
|
||||
patterns will be matched first. if equal size patterns both match,
|
||||
the first appearing in the schema will be used. <dynamicField
|
||||
name="*_i" type="int" indexed="true" stored="true"/> <dynamicField
|
||||
name="*_s" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_l" type="long" indexed="true"
|
||||
stored="true"/> <dynamicField name="*_t" type="text"
|
||||
indexed="true" stored="true"/> <dynamicField name="*_b"
|
||||
type="boolean" indexed="true" stored="true"/> <dynamicField
|
||||
name="*_f" type="float" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_d" type="double" indexed="true"
|
||||
stored="true"/> <dynamicField name="*_dt" type="date"
|
||||
indexed="true" stored="true"/> <dynamicField name="*_ti"
|
||||
type="tint" indexed="true" stored="true"/> <dynamicField
|
||||
name="*_tl" type="tlong" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tf" type="tfloat" indexed="true"
|
||||
stored="true"/> <dynamicField name="*_td" type="tdouble"
|
||||
indexed="true" stored="true"/> <dynamicField name="*_tdt"
|
||||
type="tdate" indexed="true" stored="true"/>
|
||||
|
||||
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
|
||||
<dynamicField name="attr_*" type="textgen" indexed="true"
|
||||
stored="true" multiValued="true"/> <dynamicField name="random_*"
|
||||
type="random" />
|
||||
-->
|
||||
<dynamicField name="*_sm" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<!--
|
||||
uncomment the following to ignore any fields that don't already
|
||||
match an existing field name or dynamic field, rather than
|
||||
reporting them as an error. alternately, change the type="ignored"
|
||||
to some other type e.g. "text" if you want unknown fields indexed
|
||||
and/or stored by default
|
||||
-->
|
||||
<!--dynamicField name="*" type="ignored" multiValued="true" /-->
|
||||
|
||||
|
||||
<!--
|
||||
Field to use to determine and enforce document uniqueness. Unless
|
||||
this field is marked with required="false", it will be a required
|
||||
field
|
||||
-->
|
||||
<uniqueKey>id</uniqueKey>
|
||||
|
||||
<!--
|
||||
copyField commands copy one field to another at the time a document
|
||||
is added to the index. It's used either to index the same field
|
||||
differently, or to add multiple fields to the same field for
|
||||
easier/faster searching.
|
||||
-->
|
||||
|
||||
<copyField source="cat" dest="text"/>
|
||||
<copyField source="name" dest="text"/>
|
||||
<copyField source="manu" dest="text"/>
|
||||
<copyField source="features" dest="text"/>
|
||||
<copyField source="includes" dest="text"/>
|
||||
<copyField source="text" dest="nouns"/>
|
||||
<copyField source="text" dest="sentences"/>
|
||||
<copyField source="manu" dest="manu_exact"/>
|
||||
|
||||
|
||||
<!--copyField source="Titolo" dest="text"/-->
|
||||
|
||||
<!--
|
||||
Above, multiple source fields are copied to the [text] field.
|
||||
Another way to map multiple source fields to the same destination
|
||||
field is to use the dynamic field syntax. copyField also supports a
|
||||
maxChars to copy setting.
|
||||
-->
|
||||
|
||||
<!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
|
||||
|
||||
<!--
|
||||
copy name to alphaNameSort, a field designed for sorting by name
|
||||
-->
|
||||
<!-- <copyField source="name" dest="alphaNameSort"/> -->
|
||||
|
||||
|
||||
</schema>
|
|
@ -1,653 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version
|
||||
2.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 Unless required by
|
||||
applicable law or agreed to in writing, software distributed under
|
||||
the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
|
||||
OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<!--
|
||||
For more details about configurations options that may appear in
|
||||
this file, see http://wiki.apache.org/solr/SolrConfigXml.
|
||||
|
||||
Specifically, the Solr Config can support XInclude, which may make
|
||||
it easier to manage the configuration. See
|
||||
https://issues.apache.org/jira/browse/SOLR-1167
|
||||
-->
|
||||
<config xmlns:xi="http://www.w3.org/2001/XInclude">
|
||||
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
|
||||
<indexConfig>
|
||||
<useCompoundFile>${useCompoundFile:false}</useCompoundFile>
|
||||
</indexConfig>
|
||||
<!--
|
||||
lib directives can be used to instruct Solr to load any Jars
|
||||
identified and use them to resolve any "plugins" specified in your
|
||||
solrconfig.xml or schema.xml (ie: Analyzers, Request Handlers,
|
||||
etc...). All directories and paths are resolved relative the
|
||||
instanceDir. If a "./lib" directory exists in your instanceDir, all
|
||||
files found in it are included as if you had used the following
|
||||
syntax... <lib dir="./lib" />
|
||||
-->
|
||||
<!--
|
||||
A dir option by itself adds any files found in the directory to the
|
||||
classpath, this is useful for including all jars in a directory.
|
||||
-->
|
||||
<lib dir="../../contrib/extraction/lib" />
|
||||
<!--
|
||||
When a regex is specified in addition to a directory, only the files
|
||||
in that directory which completely match the regex (anchored on both
|
||||
ends) will be included.
|
||||
-->
|
||||
<lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
|
||||
<!--
|
||||
If a dir option (with or without a regex) is used and nothing is
|
||||
found that matches, it will be ignored
|
||||
-->
|
||||
<lib dir="/total/crap/dir/ignored" />
|
||||
<!--
|
||||
an exact path can be used to specify a specific file. This will
|
||||
cause a serious error to be logged if it can't be loaded. <lib
|
||||
path="../a-jar-that-does-not-exist.jar" />
|
||||
-->
|
||||
|
||||
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
|
||||
<schemaFactory class="ClassicIndexSchemaFactory"/>
|
||||
|
||||
<!--
|
||||
Used to specify an alternate directory to hold all index data other
|
||||
than the default ./data under the Solr home. If replication is in
|
||||
use, this should match the replication configuration.
|
||||
-->
|
||||
<dataDir>${solr.data.dir:}</dataDir>
|
||||
|
||||
<!--
|
||||
Enables JMX if and only if an existing MBeanServer is found, use
|
||||
this if you want to configure JMX through JVM parameters. Remove
|
||||
this to disable exposing Solr configuration and statistics to JMX.
|
||||
|
||||
If you want to connect to a particular server, specify the agentId
|
||||
e.g. <jmx agentId="myAgent" /> If you want to start a new
|
||||
MBeanServer, specify the serviceUrl e.g <jmx
|
||||
serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> For
|
||||
more details see http://wiki.apache.org/solr/SolrJmx
|
||||
-->
|
||||
<jmx />
|
||||
|
||||
<!-- the default high-performance update handler -->
|
||||
<updateHandler class="solr.DirectUpdateHandler2">
|
||||
<!--
|
||||
A prefix of "solr." for class names is an alias that causes solr
|
||||
to search appropriate packages, including
|
||||
org.apache.solr.(search|update|request|core|analysis)
|
||||
-->
|
||||
|
||||
<!--
|
||||
Perform a <commit/> automatically under certain conditions:
|
||||
maxDocs - number of updates since last commit is greater than this
|
||||
maxTime - oldest uncommited update (in ms) is this long ago
|
||||
Instead of enabling autoCommit, consider using "commitWithin" when
|
||||
adding documents. http://wiki.apache.org/solr/UpdateXmlMessages
|
||||
<autoCommit> <maxDocs>10000</maxDocs> <maxTime>1000</maxTime>
|
||||
</autoCommit>
|
||||
-->
|
||||
|
||||
</updateHandler>
|
||||
|
||||
<!--
|
||||
Use the following format to specify a custom IndexReaderFactory -
|
||||
allows for alternate IndexReader implementations. ** Experimental
|
||||
Feature ** Please note - Using a custom IndexReaderFactory may
|
||||
prevent certain other features from working. The API to
|
||||
IndexReaderFactory may change without warning or may even be removed
|
||||
from future releases if the problems cannot be resolved. ** Features
|
||||
that may not work with custom IndexReaderFactory ** The
|
||||
ReplicationHandler assumes a disk-resident index. Using a custom
|
||||
IndexReader implementation may cause incompatibility with
|
||||
ReplicationHandler and may cause replication to not work correctly.
|
||||
See SOLR-1366 for details. <indexReaderFactory
|
||||
name="IndexReaderFactory" class="package.class"> Parameters as
|
||||
required by the implementation </indexReaderFactory >
|
||||
-->
|
||||
<!-- To set the termInfosIndexDivisor, do this: -->
|
||||
<!--
|
||||
<indexReaderFactory name="IndexReaderFactory"
|
||||
class="org.apache.solr.core.StandardIndexReaderFactory"> <int
|
||||
name="setTermIndexDivisor">12</int> </indexReaderFactory >
|
||||
-->
|
||||
|
||||
|
||||
<query>
|
||||
<!--
|
||||
Maximum number of clauses in a boolean query... in the past, this
|
||||
affected range or prefix queries that expanded to big boolean
|
||||
queries - built in Solr query parsers no longer create queries
|
||||
with this limitation. An exception is thrown if exceeded.
|
||||
-->
|
||||
<maxBooleanClauses>1024</maxBooleanClauses>
|
||||
|
||||
|
||||
<!--
|
||||
There are two implementations of cache available for Solr,
|
||||
LRUCache, based on a synchronized LinkedHashMap, and FastLRUCache,
|
||||
based on a ConcurrentHashMap. FastLRUCache has faster gets and
|
||||
slower puts in single threaded operation and thus is generally
|
||||
faster than LRUCache when the hit ratio of the cache is high (>
|
||||
75%), and may be faster under other scenarios on multi-cpu
|
||||
systems.
|
||||
-->
|
||||
<!--
|
||||
Cache used by SolrIndexSearcher for filters (DocSets), unordered
|
||||
sets of *all* documents that match a query. When a new searcher is
|
||||
opened, its caches may be prepopulated or "autowarmed" using data
|
||||
from caches in the old searcher. autowarmCount is the number of
|
||||
items to prepopulate. For LRUCache, the autowarmed items will be
|
||||
the most recently accessed items. Parameters: class - the
|
||||
SolrCache implementation LRUCache or FastLRUCache size - the
|
||||
maximum number of entries in the cache initialSize - the initial
|
||||
capacity (number of entries) of the cache. (seel
|
||||
java.util.HashMap) autowarmCount - the number of entries to
|
||||
prepopulate from and old cache.
|
||||
-->
|
||||
<filterCache class="solr.FastLRUCache" size="512"
|
||||
initialSize="512" autowarmCount="0" />
|
||||
|
||||
<!--
|
||||
Cache used to hold field values that are quickly accessible by
|
||||
document id. The fieldValueCache is created by default even if not
|
||||
configured here. <fieldValueCache class="solr.FastLRUCache"
|
||||
size="512" autowarmCount="128" showItems="32" />
|
||||
-->
|
||||
|
||||
<!--
|
||||
queryResultCache caches results of searches - ordered lists of
|
||||
document ids (DocList) based on a query, a sort, and the range of
|
||||
documents requested.
|
||||
-->
|
||||
<queryResultCache class="solr.LRUCache" size="512"
|
||||
initialSize="512" autowarmCount="0" />
|
||||
|
||||
<!--
|
||||
documentCache caches Lucene Document objects (the stored fields
|
||||
for each document). Since Lucene internal document ids are
|
||||
transient, this cache will not be autowarmed.
|
||||
-->
|
||||
<documentCache class="solr.LRUCache" size="512"
|
||||
initialSize="512" autowarmCount="0" />
|
||||
|
||||
<!--
|
||||
If true, stored fields that are not requested will be loaded
|
||||
lazily. This can result in a significant speed improvement if the
|
||||
usual case is to not load all stored fields, especially if the
|
||||
skipped fields are large compressed text fields.
|
||||
-->
|
||||
<enableLazyFieldLoading>true</enableLazyFieldLoading>
|
||||
|
||||
<!--
|
||||
Example of a generic cache. These caches may be accessed by name
|
||||
through SolrIndexSearcher.getCache(),cacheLookup(), and
|
||||
cacheInsert(). The purpose is to enable easy caching of
|
||||
user/application level data. The regenerator argument should be
|
||||
specified as an implementation of solr.search.CacheRegenerator if
|
||||
autowarming is desired.
|
||||
-->
|
||||
<!--
|
||||
<cache name="myUserCache" class="solr.LRUCache" size="4096"
|
||||
initialSize="1024" autowarmCount="1024"
|
||||
regenerator="org.mycompany.mypackage.MyRegenerator" />
|
||||
-->
|
||||
|
||||
<!--
|
||||
An optimization that attempts to use a filter to satisfy a search.
|
||||
If the requested sort does not include score, then the filterCache
|
||||
will be checked for a filter matching the query. If found, the
|
||||
filter will be used as the source of document ids, and then the
|
||||
sort will be applied to that.
|
||||
<useFilterForSortedQuery>true</useFilterForSortedQuery>
|
||||
-->
|
||||
|
||||
<!--
|
||||
An optimization for use with the queryResultCache. When a search
|
||||
is requested, a superset of the requested number of document ids
|
||||
are collected. For example, if a search for a particular query
|
||||
requests matching documents 10 through 19, and queryWindowSize is
|
||||
50, then documents 0 through 49 will be collected and cached. Any
|
||||
further requests in that range can be satisfied via the cache.
|
||||
-->
|
||||
<queryResultWindowSize>20</queryResultWindowSize>
|
||||
|
||||
<!--
|
||||
Maximum number of documents to cache for any entry in the
|
||||
queryResultCache.
|
||||
-->
|
||||
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
|
||||
|
||||
<!--
|
||||
a newSearcher event is fired whenever a new searcher is being
|
||||
prepared and there is a current searcher handling requests (aka
|
||||
registered). It can be used to prime certain caches to prevent
|
||||
long request times for certain requests.
|
||||
-->
|
||||
<!--
|
||||
QuerySenderListener takes an array of NamedList and executes a
|
||||
local query request for each NamedList in sequence.
|
||||
-->
|
||||
<listener event="newSearcher" class="solr.QuerySenderListener">
|
||||
<arr name="queries">
|
||||
<!--
|
||||
<lst> <str name="q">solr</str> <str name="start">0</str> <str
|
||||
name="rows">10</str> </lst> <lst> <str name="q">rocks</str>
|
||||
<str name="start">0</str> <str name="rows">10</str> </lst>
|
||||
<lst><str name="q">static newSearcher warming query from
|
||||
solrconfig.xml</str></lst>
|
||||
-->
|
||||
</arr>
|
||||
</listener>
|
||||
|
||||
<!--
|
||||
a firstSearcher event is fired whenever a new searcher is being
|
||||
prepared but there is no current registered searcher to handle
|
||||
requests or to gain autowarming data from.
|
||||
-->
|
||||
<listener event="firstSearcher" class="solr.QuerySenderListener">
|
||||
<arr name="queries">
|
||||
<lst>
|
||||
<str name="q">solr rocks</str>
|
||||
<str name="start">0</str>
|
||||
<str name="rows">10</str>
|
||||
</lst>
|
||||
<lst>
|
||||
<str name="q">static firstSearcher warming query from
|
||||
solrconfig.xml</str>
|
||||
</lst>
|
||||
</arr>
|
||||
</listener>
|
||||
|
||||
<!--
|
||||
If a search request comes in and there is no current registered
|
||||
searcher, then immediately register the still warming searcher and
|
||||
use it. If "false" then all requests will block until the first
|
||||
searcher is done warming.
|
||||
-->
|
||||
<useColdSearcher>false</useColdSearcher>
|
||||
|
||||
<!--
|
||||
Maximum number of searchers that may be warming in the background
|
||||
concurrently. An error is returned if this limit is exceeded.
|
||||
Recommend 1-2 for read-only slaves, higher for masters w/o cache
|
||||
warming.
|
||||
-->
|
||||
<maxWarmingSearchers>2</maxWarmingSearchers>
|
||||
|
||||
</query>
|
||||
|
||||
<requestDispatcher>
|
||||
<!--
|
||||
Make sure your system has some authentication before enabling
|
||||
remote streaming!
|
||||
-->
|
||||
<requestParsers enableRemoteStreaming="false"
|
||||
multipartUploadLimitInKB="-1" />
|
||||
|
||||
<!--
|
||||
Set HTTP caching related parameters (for proxy caches and
|
||||
clients). To get the behaviour of Solr 1.2 (ie: no caching related
|
||||
headers) use the never304="true" option and do not specify a value
|
||||
for <cacheControl>
|
||||
-->
|
||||
<!-- <httpCaching never304="true"> -->
|
||||
<httpCaching lastModifiedFrom="openTime" etagSeed="Solr">
|
||||
<!--
|
||||
lastModFrom="openTime" is the default, the Last-Modified value
|
||||
(and validation against If-Modified-Since requests) will all be
|
||||
relative to when the current Searcher was opened. You can change
|
||||
it to lastModFrom="dirLastMod" if you want the value to exactly
|
||||
corrispond to when the physical index was last modified.
|
||||
|
||||
etagSeed="..." is an option you can change to force the ETag
|
||||
header (and validation against If-None-Match requests) to be
|
||||
differnet even if the index has not changed (ie: when making
|
||||
significant changes to your config file) lastModifiedFrom and
|
||||
etagSeed are both ignored if you use the never304="true" option.
|
||||
-->
|
||||
<!--
|
||||
If you include a <cacheControl> directive, it will be used to
|
||||
generate a Cache-Control header, as well as an Expires header if
|
||||
the value contains "max-age=" By default, no Cache-Control
|
||||
header is generated. You can use the <cacheControl> option even
|
||||
if you have set never304="true"
|
||||
-->
|
||||
<!-- <cacheControl>max-age=30, public</cacheControl> -->
|
||||
</httpCaching>
|
||||
</requestDispatcher>
|
||||
|
||||
|
||||
<!--
|
||||
requestHandler plugins... incoming queries will be dispatched to the
|
||||
correct handler based on the path or the 'qt' param.
|
||||
Names starting with a '/' are accessed with the a path equal to the
|
||||
registered name. Names without a leading '/' are accessed with:
|
||||
http://host/app/select?qt=name If no qt is defined, the
|
||||
requestHandler that declares default="true" will be used.
|
||||
-->
|
||||
<requestHandler name="/select" class="solr.SearchHandler"
|
||||
default="true">
|
||||
<!-- default values for query parameters -->
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
<!--
|
||||
<int name="rows">10</int> <str name="fl">*</str> <str
|
||||
name="version">2.1</str>
|
||||
-->
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
<!--
|
||||
DisMaxRequestHandler allows easy searching across multiple fields
|
||||
for simple user-entered phrases. Its implementation is now just the
|
||||
standard SearchHandler with a default query parser of "dismax". see
|
||||
http://wiki.apache.org/solr/DisMaxRequestHandler
|
||||
-->
|
||||
<requestHandler name="/dismax" class="solr.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="defType">dismax</str>
|
||||
<str name="echoParams">explicit</str>
|
||||
<float name="tie">0.01</float>
|
||||
<str name="qf">
|
||||
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0
|
||||
manu^1.1 cat^1.4
|
||||
</str>
|
||||
<str name="pf">
|
||||
text^0.2 features^1.1 name^1.5 manu^1.4
|
||||
manu_exact^1.9
|
||||
</str>
|
||||
<str name="bf">
|
||||
popularity^0.5 recip(price,1,1000,1000)^0.3
|
||||
</str>
|
||||
<str name="fl">
|
||||
id,name,price,score
|
||||
</str>
|
||||
<str name="mm">
|
||||
2<-1 5<-2 6<90% </str>
|
||||
<int name="ps">100</int>
|
||||
<str name="q.alt">*:*</str>
|
||||
<!-- example highlighter config, enable per-query with hl=true -->
|
||||
<str name="hl.fl">text features name</str>
|
||||
<!-- for this field, we want no fragmenting, just highlighting -->
|
||||
<str name="f.name.hl.fragsize">0</str>
|
||||
<!--
|
||||
instructs Solr to return the field itself if no query terms are
|
||||
found
|
||||
-->
|
||||
<str name="f.name.hl.alternateField">name</str>
|
||||
<str name="f.text.hl.fragmenter">regex</str> <!-- defined below -->
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
<!--
|
||||
Note how you can register the same handler multiple times with
|
||||
different names (and different init parameters)
|
||||
-->
|
||||
<requestHandler name="/partitioned" class="solr.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="defType">dismax</str>
|
||||
<str name="echoParams">explicit</str>
|
||||
<str name="qf">text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0</str>
|
||||
<str name="mm">2<-1 5<-2 6<90%</str>
|
||||
<!--
|
||||
This is an example of using Date Math to specify a constantly
|
||||
moving date range in a config...
|
||||
-->
|
||||
<str name="bq">incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2</str>
|
||||
</lst>
|
||||
<!--
|
||||
In addition to defaults, "appends" params can be specified to
|
||||
identify values which should be appended to the list of multi-val
|
||||
params from the query (or the existing "defaults"). In this
|
||||
example, the param "fq=instock:true" will be appended to any query
|
||||
time fq params the user may specify, as a mechanism for
|
||||
partitioning the index, independent of any user selected filtering
|
||||
that may also be desired (perhaps as a result of faceted
|
||||
searching). NOTE: there is *absolutely* nothing a client can do to
|
||||
prevent these "appends" values from being used, so don't use this
|
||||
mechanism unless you are sure you always want it.
|
||||
-->
|
||||
<lst name="appends">
|
||||
<str name="fq">inStock:true</str>
|
||||
</lst>
|
||||
<!--
|
||||
"invariants" are a way of letting the Solr maintainer lock down
|
||||
the options available to Solr clients. Any params values specified
|
||||
here are used regardless of what values may be specified in either
|
||||
the query, the "defaults", or the "appends" params. In this
|
||||
example, the facet.field and facet.query params are fixed,
|
||||
limiting the facets clients can use. Faceting is not turned on by
|
||||
default - but if the client does specify facet=true in the
|
||||
request, these are the only facets they will be able to see counts
|
||||
for; regardless of what other facet.field or facet.query params
|
||||
they may specify. NOTE: there is *absolutely* nothing a client can
|
||||
do to prevent these "invariants" values from being used, so don't
|
||||
use this mechanism unless you are sure you always want it.
|
||||
-->
|
||||
<lst name="invariants">
|
||||
<str name="facet.field">cat</str>
|
||||
<str name="facet.field">manu_exact</str>
|
||||
<str name="facet.query">price:[* TO 500]</str>
|
||||
<str name="facet.query">price:[500 TO *]</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!--
|
||||
Search components are registered to SolrCore and used by Search
|
||||
Handlers By default, the following components are avaliable:
|
||||
|
||||
<searchComponent name="query"
|
||||
class="org.apache.solr.handler.component.QueryComponent" />
|
||||
<searchComponent name="facet"
|
||||
class="org.apache.solr.handler.component.FacetComponent" />
|
||||
<searchComponent name="mlt"
|
||||
class="org.apache.solr.handler.component.MoreLikeThisComponent" />
|
||||
<searchComponent name="highlight"
|
||||
class="org.apache.solr.handler.component.HighlightComponent" />
|
||||
<searchComponent name="stats"
|
||||
class="org.apache.solr.handler.component.StatsComponent" />
|
||||
<searchComponent name="debug"
|
||||
class="org.apache.solr.handler.component.DebugComponent" /> Default
|
||||
configuration in a requestHandler would look like: <arr
|
||||
name="components"> <str>query</str> <str>facet</str> <str>mlt</str>
|
||||
<str>highlight</str> <str>stats</str> <str>debug</str> </arr> If you
|
||||
register a searchComponent to one of the standard names, that will
|
||||
be used instead. To insert components before or after the 'standard'
|
||||
components, use: <arr name="first-components">
|
||||
<str>myFirstComponentName</str> </arr> <arr name="last-components">
|
||||
<str>myLastComponentName</str> </arr>
|
||||
-->
|
||||
|
||||
<!--
|
||||
The spell check component can return a list of alternative spelling
|
||||
suggestions.
|
||||
-->
|
||||
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
|
||||
|
||||
<str name="queryAnalyzerFieldType">textSpell</str>
|
||||
|
||||
<lst name="spellchecker">
|
||||
<str name="name">default</str>
|
||||
<str name="field">name</str>
|
||||
<str name="spellcheckIndexDir">./spellchecker</str>
|
||||
</lst>
|
||||
|
||||
<!--
|
||||
a spellchecker that uses a different distance measure <lst
|
||||
name="spellchecker"> <str name="name">jarowinkler</str> <str
|
||||
name="field">spell</str> <str
|
||||
name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
|
||||
<str name="spellcheckIndexDir">./spellchecker2</str> </lst>
|
||||
-->
|
||||
|
||||
<!--
|
||||
a file based spell checker <lst name="spellchecker"> <str
|
||||
name="classname">solr.FileBasedSpellChecker</str> <str
|
||||
name="name">file</str> <str
|
||||
name="sourceLocation">spellings.txt</str> <str
|
||||
name="characterEncoding">UTF-8</str> <str
|
||||
name="spellcheckIndexDir">./spellcheckerFile</str> </lst>
|
||||
-->
|
||||
</searchComponent>
|
||||
|
||||
<!--
|
||||
A request handler utilizing the spellcheck component.
|
||||
#############################################################################
|
||||
NOTE: This is purely as an example. The whole purpose of the
|
||||
SpellCheckComponent is to hook it into the request handler that
|
||||
handles (i.e. the standard or dismax SearchHandler) queries such
|
||||
that a separate request is not needed to get suggestions. IN OTHER
|
||||
WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU
|
||||
WANT FOR YOUR PRODUCTION SYSTEM!
|
||||
#############################################################################
|
||||
-->
|
||||
<requestHandler name="/spell" class="solr.SearchHandler"
|
||||
lazy="true">
|
||||
<lst name="defaults">
|
||||
<!-- omp = Only More Popular -->
|
||||
<str name="spellcheck.onlyMorePopular">false</str>
|
||||
<!-- exr = Extended Results -->
|
||||
<str name="spellcheck.extendedResults">false</str>
|
||||
<!-- The number of suggestions to return -->
|
||||
<str name="spellcheck.count">1</str>
|
||||
</lst>
|
||||
<arr name="last-components">
|
||||
<str>spellcheck</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<searchComponent name="tvComponent"
|
||||
class="org.apache.solr.handler.component.TermVectorComponent" />
|
||||
<!--
|
||||
A Req Handler for working with the tvComponent. This is purely as an
|
||||
example. You will likely want to add the component to your already
|
||||
specified request handlers.
|
||||
-->
|
||||
<requestHandler name="/tvrh"
|
||||
class="org.apache.solr.handler.component.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<bool name="tv">true</bool>
|
||||
</lst>
|
||||
<arr name="last-components">
|
||||
<str>tvComponent</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<!-- Solr Cell: http://wiki.apache.org/solr/ExtractingRequestHandler -->
|
||||
<requestHandler name="/update/extract"
|
||||
class="org.apache.solr.handler.extraction.ExtractingRequestHandler"
|
||||
startup="lazy">
|
||||
<lst name="defaults">
|
||||
<!--
|
||||
All the main content goes into "text"... if you need to return
|
||||
the extracted text or do highlighting, use a stored field.
|
||||
-->
|
||||
<str name="fmap.content">text</str>
|
||||
<str name="lowernames">true</str>
|
||||
<str name="uprefix">ignored_</str>
|
||||
|
||||
<!-- capture link hrefs but ignore div attributes -->
|
||||
<str name="captureAttr">true</str>
|
||||
<str name="fmap.a">links</str>
|
||||
<str name="fmap.div">ignored_</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!--
|
||||
A component to return terms and document frequency of those terms.
|
||||
This component does not yet support distributed search.
|
||||
-->
|
||||
<searchComponent name="termsComponent"
|
||||
class="org.apache.solr.handler.component.TermsComponent" />
|
||||
|
||||
<requestHandler name="/terms"
|
||||
class="org.apache.solr.handler.component.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<bool name="terms">true</bool>
|
||||
</lst>
|
||||
<arr name="components">
|
||||
<str>termsComponent</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<!-- CSV update handler, loaded on demand -->
|
||||
<requestHandler name="/update/csv" class="solr.CSVRequestHandler"
|
||||
startup="lazy" />
|
||||
|
||||
<!--
|
||||
An example dedup update processor that creates the "id" field on the
|
||||
fly based on the hash code of some other fields. This example has
|
||||
overwriteDupes set to false since we are using the id field as the
|
||||
signatureField and Solr will maintain uniqueness based on that
|
||||
anyway. You have to link the chain to an update handler above to use
|
||||
it ie: <requestHandler name="/update
|
||||
"class="solr.UpdateRequestHandler"> <lst name="defaults"> <str
|
||||
name="update.chain">dedupe</str> </lst> </requestHandler>
|
||||
-->
|
||||
|
||||
|
||||
<!--
|
||||
queryResponseWriter plugins... query responses will be written using
|
||||
the writer specified by the 'wt' request parameter matching the name
|
||||
of a registered writer. The "default" writer is the default and will
|
||||
be used if 'wt' is not specified in the request. XMLResponseWriter
|
||||
will be used if nothing is specified here. The json, python, and
|
||||
ruby writers are also available by default. <queryResponseWriter
|
||||
name="xml" class="org.apache.solr.request.XMLResponseWriter"
|
||||
default="true"/> <queryResponseWriter name="json"
|
||||
class="org.apache.solr.request.JSONResponseWriter"/>
|
||||
<queryResponseWriter name="python"
|
||||
class="org.apache.solr.request.PythonResponseWriter"/>
|
||||
<queryResponseWriter name="ruby"
|
||||
class="org.apache.solr.request.RubyResponseWriter"/>
|
||||
<queryResponseWriter name="php"
|
||||
class="org.apache.solr.request.PHPResponseWriter"/>
|
||||
<queryResponseWriter name="phps"
|
||||
class="org.apache.solr.request.PHPSerializedResponseWriter"/>
|
||||
|
||||
<queryResponseWriter name="custom"
|
||||
class="com.example.MyResponseWriter"/>
|
||||
-->
|
||||
|
||||
<!--
|
||||
XSLT response writer transforms the XML output by any xslt file
|
||||
found in Solr's conf/xslt directory. Changes to xslt files are
|
||||
checked for every xsltCacheLifetimeSeconds.
|
||||
-->
|
||||
<queryResponseWriter name="xslt"
|
||||
class="org.apache.solr.response.XSLTResponseWriter">
|
||||
<int name="xsltCacheLifetimeSeconds">5</int>
|
||||
</queryResponseWriter>
|
||||
|
||||
|
||||
<!--
|
||||
example of registering a query parser <queryParser name="lucene"
|
||||
class="org.apache.solr.search.LuceneQParserPlugin"/>
|
||||
-->
|
||||
|
||||
<!--
|
||||
example of registering a custom function parser <valueSourceParser
|
||||
name="myfunc" class="com.mycompany.MyValueSourceParser" />
|
||||
-->
|
||||
|
||||
<!-- config for the admin interface -->
|
||||
<admin>
|
||||
<defaultQuery>*</defaultQuery>
|
||||
</admin>
|
||||
|
||||
</config>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue