SOLR-11694: Remove outdated UIMA module

This commit is contained in:
Alexandre Rafalovitch 2018-07-07 09:20:40 -04:00
parent 6d6e67140b
commit b7d14c50fb
130 changed files with 10 additions and 10307 deletions

View File

@ -264,7 +264,7 @@
<!-- TODO: find a better way to exclude duplicate JAR files & fix the servlet-api mess! -->
<pathconvert property="netbeans.path.libs" pathsep=":" dirsep="/">
<fileset dir="${basedir}/lucene" includes="**/lib/*.jar"
excludes="**/*servlet-api*.jar, analysis/uima/**, tools/**, build/**"/>
excludes="**/*servlet-api*.jar, tools/**, build/**"/>
<fileset dir="${basedir}/solr" includes="**/test-lib/*.jar,**/lib/*.jar"
excludes="core/test-lib/*servlet-api*.jar, contrib/analysis-extras/**, test-framework/lib/junit*, test-framework/lib/ant*, test-framework/lib/randomizedtesting*, build/**, dist/**, package/**, server/solr-webapp/**" />
<map from="${basedir}/" to=""/>
@ -311,7 +311,7 @@
</pathconvert>
<!-- TODO: find a better way to exclude duplicate JAR files & fix the servlet-api mess! -->
<pathconvert property="eclipse.fileset.libs" pathsep="|" dirsep="/">
<fileset dir="${basedir}/lucene" includes="**/lib/*.jar" excludes="**/*servlet-api*.jar, analysis/uima/**, tools/**, build/**"/>
<fileset dir="${basedir}/lucene" includes="**/lib/*.jar" excludes="**/*servlet-api*.jar, tools/**, build/**"/>
<fileset dir="${basedir}/solr" includes="**/test-lib/*.jar,**/lib/*.jar" excludes="core/test-lib/*servlet-api*.jar, contrib/analysis-extras/**, test-framework/lib/junit*, test-framework/lib/ant*, test-framework/lib/randomizedtesting*, build/**, dist/**, package/**" />
<map from="${basedir}/" to=""/>
</pathconvert>

View File

@ -15,7 +15,6 @@
<buildFile url="file://$PROJECT_DIR$/lucene/analysis/phonetic/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/analysis/smartcn/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/analysis/stempel/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/analysis/uima/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/benchmark/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/classification/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/codecs/build.xml" />
@ -48,7 +47,6 @@
<buildFile url="file://$PROJECT_DIR$/solr/contrib/extraction/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/contrib/langid/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/contrib/prometheus-exporter/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/contrib/uima/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/contrib/velocity/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/solrj/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/test-framework/build.xml" />

View File

@ -20,7 +20,6 @@
<module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/phonetic/phonetic.iml" />
<module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/smartcn/smartcn.iml" />
<module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/stempel/stempel.iml" />
<module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/uima/analysis-uima.iml" />
<module group="Lucene/Other" filepath="$PROJECT_DIR$/lucene/benchmark/src/benchmark.iml" />
<module group="Lucene/Other" filepath="$PROJECT_DIR$/lucene/benchmark/conf/benchmark-conf.iml" />
@ -59,7 +58,6 @@
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/langid/langid.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/ltr/ltr.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/prometheus-exporter/prometheus-exporter.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/uima/uima.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/velocity/velocity.iml" />
</modules>
</component>

View File

@ -76,14 +76,6 @@
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
</configuration>
<configuration default="false" name="Module analyzers-uima" type="JUnit" factoryName="JUnit">
<module name="analysis-uima" />
<option name="TEST_OBJECT" value="pattern" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/lucene/analysis/uima" />
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
</configuration>
<configuration default="false" name="Module backward-codecs" type="JUnit" factoryName="JUnit">
<module name="backward-codecs" />
<option name="TEST_OBJECT" value="pattern" />
@ -332,14 +324,6 @@
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
</configuration>
<configuration default="false" name="Solr uima contrib" type="JUnit" factoryName="JUnit">
<module name="uima" />
<option name="TEST_OBJECT" value="pattern" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/solr/contrib/solr-uima" />
<option name="VM_PARAMETERS" value="-ea -Dtests.luceneMatchVersion=@version.base@ -DtempDir=temp -Djetty.testMode=1 -Djetty.insecurerandom=1 -Dsolr.directoryFactory=org.apache.solr.core.MockDirectoryFactory" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
</configuration>
<configuration default="false" name="Solr velocity contrib" type="JUnit" factoryName="JUnit">
<module name="velocity" />
<option name="TEST_OBJECT" value="pattern" />
@ -359,7 +343,6 @@
<item index="6" class="java.lang.String" itemvalue="JUnit.Module analyzers-phonetic" />
<item index="7" class="java.lang.String" itemvalue="JUnit.Module analyzers-smartcn" />
<item index="8" class="java.lang.String" itemvalue="JUnit.Module analyzers-stempel" />
<item index="9" class="java.lang.String" itemvalue="JUnit.Module analyzers-uima" />
<item index="10" class="java.lang.String" itemvalue="JUnit.Module backward-codecs" />
<item index="11" class="java.lang.String" itemvalue="JUnit.Module benchmark" />
<item index="12" class="java.lang.String" itemvalue="JUnit.Module classification" />
@ -391,7 +374,6 @@
<item index="38" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
<item index="39" class="java.lang.String" itemvalue="JUnit.Solr ltr contrib" />
<item index="40" class="java.lang.String" itemvalue="JUnit.Solr prometheus-exporter contrib" />
<item index="41" class="java.lang.String" itemvalue="JUnit.Solr uima contrib" />
<item index="42" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
</list>
</component>

View File

@ -1,30 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/../../../idea-build/lucene/analysis/uima/classes/java" />
<output-test url="file://$MODULE_DIR$/../../../idea-build/lucene/analysis/uima/classes/test" />
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="file://$MODULE_DIR$/lib" />
</CLASSES>
<JAVADOC />
<SOURCES />
<jarDirectory url="file://$MODULE_DIR$/lib" recursive="false" />
</library>
</orderEntry>
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
<orderEntry type="module" module-name="analysis-common" />
<orderEntry type="module" module-name="lucene-core" />
</component>
</module>

View File

@ -1,36 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-uima/classes/java" />
<output-test url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-uima/classes/test" />
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
<orderEntry type="library" name="Solr core library" level="project" />
<orderEntry type="library" name="Solrj library" level="project" />
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="file://$MODULE_DIR$/lib" />
</CLASSES>
<JAVADOC />
<SOURCES />
<jarDirectory url="file://$MODULE_DIR$/lib" recursive="false" />
</library>
</orderEntry>
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
<orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
<orderEntry type="module" module-name="solr-core" />
<orderEntry type="module" module-name="solrj" />
<orderEntry type="module" module-name="lucene-core" />
<orderEntry type="module" module-name="analysis-uima" />
<orderEntry type="module" module-name="analysis-common" />
</component>
</module>

View File

@ -40,7 +40,6 @@
<module>phonetic</module>
<module>smartcn</module>
<module>stempel</module>
<module>uima</module>
</modules>
<build>
<plugins>

View File

@ -1,74 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-parent</artifactId>
<version>@version@</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-uima</artifactId>
<packaging>jar</packaging>
<name>Lucene UIMA Analysis Components</name>
<description>
Lucene Integration with UIMA for extracting metadata from arbitrary (text)
fields and enrich document with features extracted from UIMA types
(language, sentences, concepts, named entities, etc.)
</description>
<properties>
<module-directory>lucene/analysis/uima</module-directory>
<relative-top-level>../../../..</relative-top-level>
<module-path>${relative-top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>scm:git:${vc-anonymous-base-url}</connection>
<developerConnection>scm:git:${vc-dev-base-url}</developerConnection>
<url>${vc-browse-base-url};f=${module-directory}</url>
</scm>
<dependencies>
<dependency>
<!-- lucene-test-framework dependency must be declared before lucene-core -->
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-test-framework</artifactId>
<scope>test</scope>
</dependency>
@lucene-analyzers-uima.internal.dependencies@
@lucene-analyzers-uima.external.dependencies@
@lucene-analyzers-uima.internal.test.dependencies@
@lucene-analyzers-uima.external.test.dependencies@
</dependencies>
<build>
<sourceDirectory>${module-path}/src/java</sourceDirectory>
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
<resources>
<resource>
<directory>${module-path}/src/resources</directory>
</resource>
</resources>
<testResources>
<testResource>
<directory>${module-path}/src/test-files</directory>
</testResource>
</testResources>
</build>
</project>

View File

@ -40,7 +40,6 @@
<module>langid</module>
<module>ltr</module>
<module>prometheus-exporter</module>
<module>uima</module>
<module>velocity</module>
</modules>
<build>

View File

@ -1,83 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.solr</groupId>
<artifactId>solr-parent</artifactId>
<version>@version@</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<groupId>org.apache.solr</groupId>
<artifactId>solr-uima</artifactId>
<packaging>jar</packaging>
<name>Apache Solr UIMA integration</name>
<description>Apache Solr - UIMA integration</description>
<properties>
<module-directory>solr/contrib/uima</module-directory>
<relative-top-level>../../../..</relative-top-level>
<module-path>${relative-top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>scm:git:${vc-anonymous-base-url}</connection>
<developerConnection>scm:git:${vc-dev-base-url}</developerConnection>
<url>${vc-browse-base-url};f=${module-directory}</url>
</scm>
<dependencies>
<dependency>
<!-- lucene-test-framework dependency must be declared before lucene-core -->
<!-- This dependency cannot be put into solr-parent, because local -->
<!-- dependencies are always ordered before inherited dependencies. -->
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-test-framework</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-test-framework</artifactId>
<scope>test</scope>
</dependency>
@solr-uima.internal.dependencies@
@solr-uima.external.dependencies@
@solr-uima.internal.test.dependencies@
@solr-uima.external.test.dependencies@
</dependencies>
<build>
<sourceDirectory>${module-path}/src/java</sourceDirectory>
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
<resources>
<resource>
<directory>${module-path}/src/resources</directory>
</resource>
</resources>
<testResources>
<testResource>
<directory>${module-path}/src/test-files</directory>
</testResource>
<testResource>
<directory>${top-level}/dev-tools/maven/solr</directory>
<includes>
<include>maven.testlogging.properties</include>
</includes>
</testResource>
</testResources>
</build>
</project>

View File

@ -87,21 +87,6 @@ my @moves = (
'solr/contrib/extraction/src/main/java'
=> 'solr/contrib/extraction/src/java',
'solr/contrib/uima/src/test/java'
=> 'solr/contrib/uima/src/test',
'solr/contrib/uima/src/test/resources/solr-uima'
=> 'solr/contrib/uima/src/test-files/uima/solr',
'solr/contrib/uima/src/test/resources'
=> 'solr/contrib/uima/src/test-files/uima',
'solr/contrib/uima/src/main/java'
=> 'solr/contrib/uima/src/java',
'solr/contrib/uima/src/main/resources'
=> 'solr/contrib/uima/src/resources',
'solr/src/test-files/books.csv'
=> 'solr/solrj/src/test-files/solrj/books.csv',

View File

@ -201,7 +201,6 @@ def get_solr_init_changes():
Apache Tika %(org.apache.tika.version)s
Carrot2 %(/org.carrot2/carrot2-mini)s
Velocity %(/org.apache.velocity/velocity)s and Velocity Tools %(/org.apache.velocity/velocity-tools)s
Apache UIMA %(org.apache.uima.version)s
Apache ZooKeeper %(/org.apache.zookeeper/zookeeper)s
Jetty %(org.eclipse.jetty.version)s

View File

@ -47,10 +47,6 @@ lucene-analyzers-stempel-XX.jar
An add-on analysis library that contains a universal algorithmic stemmer,
including tables for the Polish language.
lucene-analyzers-uima-XX.jar
An add-on analysis library that contains tokenizers/analyzers using
Apache UIMA extracted annotations to identify tokens/types/etc.
common/src/java
icu/src/java
kuromoji/src/java
@ -60,7 +56,6 @@ opennlp/src/java
phonetic/src/java
smartcn/src/java
stempel/src/java
uima/src/java
The source code for the libraries.
common/src/test
@ -72,5 +67,4 @@ opennlp/src/test
phonetic/src/test
smartcn/src/test
stempel/src/test
uima/src/test
Unit tests for the libraries.

View File

@ -28,7 +28,6 @@
- nori: Korean Morphological Analyzer
- smartcn: Smart Analyzer for Simplified Chinese Text
- stempel: Algorithmic Stemmer for Polish
- uima: UIMA Analysis module
</description>
<dirname file="${ant.file.analyzers}" property="analyzers.dir"/>
@ -86,12 +85,8 @@
<ant dir="stempel" />
</target>
<target name="uima">
<ant dir="uima" />
</target>
<target name="default" depends="compile"/>
<target name="compile" depends="common,icu,kuromoji,morfologik,nori,opennlp,phonetic,smartcn,stempel,uima" />
<target name="compile" depends="common,icu,kuromoji,morfologik,nori,opennlp,phonetic,smartcn,stempel" />
<target name="clean">
<forall-analyzers target="clean"/>

View File

@ -1,50 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="analyzers-uima" default="default">
<description>
Analysis integration with Apache UIMA
</description>
<property name="tests.userdir" value="src/test-files"/>
<!-- TODO: why is this limited to one JVM? -->
<property name="tests.jvms.override" value="1" />
<!-- TODO: go fix this in uima, its stupid -->
<property name="tests.policy" location="../../tools/junit4/solr-tests.policy"/>
<path id="uimajars">
<fileset dir="lib"/>
</path>
<import file="../analysis-module-build.xml"/>
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="uimajars"/>
<path refid="base.classpath"/>
</path>
<path id="test.classpath">
<path refid="test.base.classpath"/>
<pathelement path="${tests.userdir}"/>
</path>
<target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
</project>

View File

@ -1,30 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<ivy-module version="2.0">
<info organisation="org.apache.lucene" module="analyzers-uima"/>
<configurations defaultconfmapping="compile->master">
<conf name="compile" transitive="false"/>
</configurations>
<dependencies>
<dependency org="org.apache.uima" name="Tagger" rev="${/org.apache.uima/Tagger}" conf="compile"/>
<dependency org="org.apache.uima" name="WhitespaceTokenizer" rev="${/org.apache.uima/WhitespaceTokenizer}" conf="compile"/>
<dependency org="org.apache.uima" name="uimaj-core" rev="${/org.apache.uima/uimaj-core}" conf="compile"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>

View File

@ -1,96 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
import org.apache.lucene.util.AttributeFactory;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceInitializationException;
import java.io.IOException;
import java.io.Reader;
import java.util.Map;
/**
* Abstract base implementation of a {@link Tokenizer} which is able to analyze the given input with a
* UIMA {@link AnalysisEngine}
*/
public abstract class BaseUIMATokenizer extends Tokenizer {
protected FSIterator<AnnotationFS> iterator;
private final String descriptorPath;
private final Map<String, Object> configurationParameters;
protected AnalysisEngine ae;
protected CAS cas;
protected BaseUIMATokenizer
(AttributeFactory factory, String descriptorPath, Map<String, Object> configurationParameters) {
super(factory);
this.descriptorPath = descriptorPath;
this.configurationParameters = configurationParameters;
}
/**
* analyzes the tokenizer input using the given analysis engine
* <p>
* {@link #cas} will be filled with extracted metadata (UIMA annotations, feature structures)
*
* @throws IOException If there is a low-level I/O error.
*/
protected void analyzeInput() throws ResourceInitializationException, AnalysisEngineProcessException, IOException {
if (ae == null) {
ae = AEProviderFactory.getInstance().getAEProvider(null, descriptorPath, configurationParameters).getAE();
}
if (cas == null) {
cas = ae.newCAS();
} else {
cas.reset();
}
cas.setDocumentText(toString(input));
ae.process(cas);
}
/**
* initialize the FSIterator which is used to build tokens at each incrementToken() method call
*
* @throws IOException If there is a low-level I/O error.
*/
protected abstract void initializeIterator() throws IOException;
private String toString(Reader reader) throws IOException {
StringBuilder stringBuilder = new StringBuilder();
int ch;
while ((ch = reader.read()) > -1) {
stringBuilder.append((char) ch);
}
return stringBuilder.toString();
}
@Override
public void reset() throws IOException {
super.reset();
iterator = null;
}
}

View File

@ -1,90 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceInitializationException;
import java.io.IOException;
import java.util.Map;
/**
* a {@link Tokenizer} which creates tokens from UIMA Annotations
*/
public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
private final CharTermAttribute termAttr;
private final OffsetAttribute offsetAttr;
private final String tokenTypeString;
private int finalOffset = 0;
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
this(descriptorPath, tokenType, configurationParameters, DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
}
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters,
AttributeFactory factory) {
super(factory, descriptorPath, configurationParameters);
this.tokenTypeString = tokenType;
this.termAttr = addAttribute(CharTermAttribute.class);
this.offsetAttr = addAttribute(OffsetAttribute.class);
}
@Override
protected void initializeIterator() throws IOException {
try {
analyzeInput();
} catch (AnalysisEngineProcessException | ResourceInitializationException e) {
throw new IOException(e);
}
finalOffset = correctOffset(cas.getDocumentText().length());
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
iterator = cas.getAnnotationIndex(tokenType).iterator();
}
@Override
public boolean incrementToken() throws IOException {
if (iterator == null) {
initializeIterator();
}
if (iterator.hasNext()) {
clearAttributes();
AnnotationFS next = iterator.next();
termAttr.append(next.getCoveredText());
offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
return true;
} else {
return false;
}
}
@Override
public void end() throws IOException {
super.end();
offsetAttr.setOffset(finalOffset, finalOffset);
}
}

View File

@ -1,47 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import java.util.HashMap;
import java.util.Map;
/**
* {@link org.apache.lucene.analysis.util.TokenizerFactory} for {@link UIMAAnnotationsTokenizer}
*/
public class UIMAAnnotationsTokenizerFactory extends TokenizerFactory {
private String descriptorPath;
private String tokenType;
private final Map<String,Object> configurationParameters = new HashMap<>();
/** Creates a new UIMAAnnotationsTokenizerFactory */
public UIMAAnnotationsTokenizerFactory(Map<String,String> args) {
super(args);
tokenType = require(args, "tokenType");
descriptorPath = require(args, "descriptorPath");
configurationParameters.putAll(args);
}
@Override
public UIMAAnnotationsTokenizer create(AttributeFactory factory) {
return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, factory);
}
}

View File

@ -1,44 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Analyzer;
import java.util.Map;
/**
* An {@link Analyzer} which use the {@link UIMAAnnotationsTokenizer} for creating tokens
*/
public final class UIMABaseAnalyzer extends Analyzer {
private final String descriptorPath;
private final String tokenType;
private final Map<String, Object> configurationParameters;
public UIMABaseAnalyzer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
this.descriptorPath = descriptorPath;
this.tokenType = tokenType;
this.configurationParameters = configurationParameters;
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters));
}
}

View File

@ -1,44 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Analyzer;
import java.util.Map;
/**
* {@link Analyzer} which uses the {@link UIMATypeAwareAnnotationsTokenizer} for the tokenization phase
*/
public final class UIMATypeAwareAnalyzer extends Analyzer {
private final String descriptorPath;
private final String tokenType;
private final String featurePath;
private final Map<String, Object> configurationParameters;
public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath, Map<String, Object> configurationParameters) {
this.descriptorPath = descriptorPath;
this.tokenType = tokenType;
this.featurePath = featurePath;
this.configurationParameters = configurationParameters;
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters));
}
}

View File

@ -1,113 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.FeaturePath;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceInitializationException;
import java.io.IOException;
import java.util.Map;
/**
* A {@link Tokenizer} which creates token from UIMA Annotations filling also their {@link TypeAttribute} according to
* {@link org.apache.uima.cas.FeaturePath}s specified
*/
public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
private final TypeAttribute typeAttr;
private final CharTermAttribute termAttr;
private final OffsetAttribute offsetAttr;
private final String tokenTypeString;
private final String typeAttributeFeaturePath;
private FeaturePath featurePath;
private int finalOffset = 0;
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters) {
this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
}
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath,
Map<String, Object> configurationParameters, AttributeFactory factory) {
super(factory, descriptorPath, configurationParameters);
this.tokenTypeString = tokenType;
this.termAttr = addAttribute(CharTermAttribute.class);
this.typeAttr = addAttribute(TypeAttribute.class);
this.offsetAttr = addAttribute(OffsetAttribute.class);
this.typeAttributeFeaturePath = typeAttributeFeaturePath;
}
@Override
protected void initializeIterator() throws IOException {
try {
analyzeInput();
} catch (AnalysisEngineProcessException | ResourceInitializationException e) {
throw new IOException(e);
}
featurePath = cas.createFeaturePath();
try {
featurePath.initialize(typeAttributeFeaturePath);
} catch (CASException e) {
featurePath = null;
throw new IOException(e);
}
finalOffset = correctOffset(cas.getDocumentText().length());
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
iterator = cas.getAnnotationIndex(tokenType).iterator();
}
@Override
public boolean incrementToken() throws IOException {
if (iterator == null) {
initializeIterator();
}
if (iterator.hasNext()) {
clearAttributes();
AnnotationFS next = iterator.next();
termAttr.append(next.getCoveredText());
offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
typeAttr.setType(featurePath.getValueAsString(next));
return true;
} else {
return false;
}
}
@Override
public void end() throws IOException {
super.end();
offsetAttr.setOffset(finalOffset, finalOffset);
}
}

View File

@ -1,50 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import java.util.HashMap;
import java.util.Map;
/**
* {@link org.apache.lucene.analysis.util.TokenizerFactory} for {@link UIMATypeAwareAnnotationsTokenizer}
*/
public class UIMATypeAwareAnnotationsTokenizerFactory extends TokenizerFactory {
private String descriptorPath;
private String tokenType;
private String featurePath;
private final Map<String,Object> configurationParameters = new HashMap<>();
/** Creates a new UIMATypeAwareAnnotationsTokenizerFactory */
public UIMATypeAwareAnnotationsTokenizerFactory(Map<String,String> args) {
super(args);
featurePath = require(args, "featurePath");
tokenType = require(args, "tokenType");
descriptorPath = require(args, "descriptorPath");
configurationParameters.putAll(args);
}
@Override
public UIMATypeAwareAnnotationsTokenizer create(AttributeFactory factory) {
return new UIMATypeAwareAnnotationsTokenizer
(descriptorPath, tokenType, featurePath, configurationParameters, factory);
}
}

View File

@ -1,34 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima.ae;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.resource.ResourceInitializationException;
/**
* provide an Apache UIMA {@link AnalysisEngine}
*
*/
public interface AEProvider {
/**
* Returns the AnalysisEngine
*/
public AnalysisEngine getAE() throws ResourceInitializationException;
}

View File

@ -1,76 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima.ae;
import java.util.HashMap;
import java.util.Map;
/**
* Singleton factory class responsible of {@link AEProvider}s' creation
*/
public class AEProviderFactory {
private static final AEProviderFactory instance = new AEProviderFactory();
private final Map<String, AEProvider> providerCache = new HashMap<>();
private AEProviderFactory() {
// Singleton
}
public static AEProviderFactory getInstance() {
return instance;
}
/**
* @param keyPrefix a prefix of the key used to cache the AEProvider
* @param aePath the AnalysisEngine descriptor path
* @param runtimeParameters map of runtime parameters to configure inside the AnalysisEngine
* @return AEProvider
*/
public synchronized AEProvider getAEProvider(String keyPrefix, String aePath, Map<String, Object> runtimeParameters) {
String key = new StringBuilder(keyPrefix != null ? keyPrefix : "").append(aePath).append(runtimeParameters != null ?
runtimeParameters.toString() : "").toString();
if (providerCache.get(key) == null) {
AEProvider aeProvider;
if (runtimeParameters != null)
aeProvider = new OverridingParamsAEProvider(aePath, runtimeParameters);
else
aeProvider = new BasicAEProvider(aePath);
providerCache.put(key, aeProvider);
}
return providerCache.get(key);
}
/**
* @param aePath the AnalysisEngine descriptor path
* @return AEProvider
*/
public synchronized AEProvider getAEProvider(String aePath) {
return getAEProvider(null, aePath, null);
}
/**
* @param aePath the AnalysisEngine descriptor path
* @param runtimeParameters map of runtime parameters to configure inside the AnalysisEngine
* @return AEProvider
*/
public synchronized AEProvider getAEProvider(String aePath, Map<String, Object> runtimeParameters) {
return getAEProvider(null, aePath, runtimeParameters);
}
}

View File

@ -1,87 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima.ae;
import java.io.IOException;
import org.apache.lucene.util.IOUtils;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.XMLInputSource;
/**
* Basic {@link AEProvider} which just instantiates a UIMA {@link AnalysisEngine} with no additional metadata,
* parameters or resources
*/
public class BasicAEProvider implements AEProvider {
private final String aePath;
private AnalysisEngineDescription cachedDescription;
public BasicAEProvider(String aePath) {
this.aePath = aePath;
}
@Override
public AnalysisEngine getAE() throws ResourceInitializationException {
synchronized(this) {
if (cachedDescription == null) {
XMLInputSource in = null;
boolean success = false;
try {
// get Resource Specifier from XML file
in = getInputSource();
// get AE description
cachedDescription = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(in);
configureDescription(cachedDescription);
success = true;
} catch (Exception e) {
throw new ResourceInitializationException(e);
} finally {
if (success) {
try {
IOUtils.close(in.getInputStream());
} catch (IOException e) {
throw new ResourceInitializationException(e);
}
} else if (in != null) {
IOUtils.closeWhileHandlingException(in.getInputStream());
}
}
}
}
return UIMAFramework.produceAnalysisEngine(cachedDescription);
}
protected void configureDescription(AnalysisEngineDescription description) {
// no configuration
}
private XMLInputSource getInputSource() throws IOException {
try {
return new XMLInputSource(aePath);
} catch (Exception e) {
return new XMLInputSource(getClass().getResource(aePath));
}
}
}

View File

@ -1,69 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima.ae;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import java.util.Map;
/**
* {@link AEProvider} implementation that creates an Aggregate AE from the given path, also
* injecting runtime parameters defined in the solrconfig.xml Solr configuration file and assigning
* them as overriding parameters in the aggregate AE
*/
public class OverridingParamsAEProvider extends BasicAEProvider {
private final Map<String, Object> runtimeParameters;
public OverridingParamsAEProvider(String aePath, Map<String, Object> runtimeParameters) {
super(aePath);
this.runtimeParameters = runtimeParameters;
}
@Override
protected void configureDescription(AnalysisEngineDescription description) {
for (String attributeName : runtimeParameters.keySet()) {
Object val = getRuntimeValue(description, attributeName);
description.getAnalysisEngineMetaData().getConfigurationParameterSettings().setParameterValue(
attributeName, val);
}
}
/* create the value to inject in the runtime parameter depending on its declared type */
private Object getRuntimeValue(AnalysisEngineDescription desc, String attributeName) {
String type = desc.getAnalysisEngineMetaData().getConfigurationParameterDeclarations().
getConfigurationParameter(null, attributeName).getType();
// TODO : do it via reflection ? i.e. Class paramType = Class.forName(type)...
Object val = null;
Object runtimeValue = runtimeParameters.get(attributeName);
if (runtimeValue != null) {
if ("String".equals(type)) {
val = String.valueOf(runtimeValue);
} else if ("Integer".equals(type)) {
val = Integer.valueOf(runtimeValue.toString());
} else if ("Boolean".equals(type)) {
val = Boolean.valueOf(runtimeValue.toString());
} else if ("Float".equals(type)) {
val = Float.valueOf(runtimeValue.toString());
}
}
return val;
}
}

View File

@ -1,21 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Integration with UIMA's AnalysisEngine.
*/
package org.apache.lucene.analysis.uima.ae;

View File

@ -1,21 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Classes that integrate UIMA with Lucene's analysis API.
*/
package org.apache.lucene.analysis.uima;

View File

@ -1,29 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>
analyzers-uima
</title>
</head>
<body>
Analysis integration with <a href="http://uima.apache.org/">Apache UIMA</a>.
<p>
For an introduction to Lucene's analysis API, see the {@link org.apache.lucene.analysis} package documentation.
</p>
</body>
</html>

View File

@ -1,17 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.lucene.analysis.uima.UIMAAnnotationsTokenizerFactory
org.apache.lucene.analysis.uima.UIMATypeAwareAnnotationsTokenizerFactory

View File

@ -1,70 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>false</primitive>
<delegateAnalysisEngineSpecifiers>
<delegateAnalysisEngine key="WhitespaceTokenizer">
<import name="WhitespaceTokenizer"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="HmmTagger">
<import name="HmmTagger"/>
</delegateAnalysisEngine>
</delegateAnalysisEngineSpecifiers>
<analysisEngineMetaData>
<name>AggregateSentenceAE</name>
<description/>
<version>1.0</version>
<vendor/>
<configurationParameters>
<configurationParameter>
<name>ngramsize</name>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
<overrides>
<parameter>HmmTagger/NGRAM_SIZE</parameter>
</overrides>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings/>
<flowConstraints>
<fixedFlow>
<node>WhitespaceTokenizer</node>
<node>HmmTagger</node>
</fixedFlow>
</flowConstraints>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs>
<type allAnnotatorFeatures="true">org.apache.uima.SentenceAnnotation</type>
<type allAnnotatorFeatures="true">org.apache.uima.TokenAnnotation</type>
</outputs>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,55 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>false</primitive>
<delegateAnalysisEngineSpecifiers>
<delegateAnalysisEngine key="WhitespaceTokenizer">
<import location="TestWSTokenizerAE.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="PoSTagger">
<import location="TestPoSTaggerAE.xml"/>
</delegateAnalysisEngine>
</delegateAnalysisEngineSpecifiers>
<analysisEngineMetaData>
<name>TestAggregateSentenceAE</name>
<description/>
<version>1.0</version>
<vendor>ASF</vendor>
<flowConstraints>
<fixedFlow>
<node>WhitespaceTokenizer</node>
<node>PoSTagger</node>
</fixedFlow>
</flowConstraints>
<capabilities>
<capability>
<inputs/>
<outputs>
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.SentenceAnnotation</type>
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
</outputs>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
</analysisEngineDescription>

View File

@ -1,66 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleEntityAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>EntityAnnotator</name>
<description/>
<version>1.0</version>
<vendor>ASF</vendor>
<typeSystemDescription>
<types>
<typeDescription>
<name>org.apache.lucene.uima.ts.EntityAnnotation</name>
<description/>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
<name>name</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>entity</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
</types>
</typeSystemDescription>
<capabilities>
<capability>
<inputs>
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
</inputs>
<outputs>
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.EntityAnnotation</type>
</outputs>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,44 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.SamplePoSTagger</annotatorImplementationName>
<analysisEngineMetaData>
<name>DummyPoSTagger</name>
<description/>
<version>1.0</version>
<vendor>ASF</vendor>
<capabilities>
<capability>
<inputs>
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
</inputs>
<outputs>
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
</outputs>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
</analysisEngineDescription>

View File

@ -1,78 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleWSTokenizerAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>WSTokenizer</name>
<version>1.0</version>
<vendor>ASF</vendor>
<configurationParameters>
<configurationParameter>
<name>line-end</name>
<description>
the string used as line end
</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>line-end</name>
<value>
<string>\n</string>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
<types>
<typeDescription>
<name>org.apache.lucene.uima.ts.TokenAnnotation</name>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
<name>pos</name>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>org.apache.lucene.uima.ts.SentenceAnnotation</name>
<supertypeName>uima.tcas.Annotation</supertypeName>
</typeDescription>
</types>
</typeSystemDescription>
<capabilities>
<capability>
<inputs/>
<outputs>
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.TokenAnnotation</type>
<type allAnnotatorFeatures="true">org.apache.lucene.uima.ts.SentenceAnnotation</type>
</outputs>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
</analysisEngineDescription>

View File

@ -1,137 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
/**
* Testcase for {@link UIMABaseAnalyzer}
*/
public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase {
private UIMABaseAnalyzer analyzer;
@Override
@Before
public void setUp() throws Exception {
super.setUp();
analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation", null);
}
@Override
@After
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
@Test
public void baseUIMAAnalyzerStreamTest() throws Exception {
TokenStream ts = analyzer.tokenStream("text", "the big brown fox jumped on the wood");
assertTokenStreamContents(ts, new String[]{"the", "big", "brown", "fox", "jumped", "on", "the", "wood"});
}
@Test
public void baseUIMAAnalyzerIntegrationTest() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer));
// add the first doc
Document doc = new Document();
String dummyTitle = "this is a dummy title ";
doc.add(new TextField("title", dummyTitle, Field.Store.YES));
String dummyContent = "there is some content written here";
doc.add(new TextField("contents", dummyContent, Field.Store.YES));
writer.addDocument(doc);
writer.commit();
// try the search over the first doc
DirectoryReader directoryReader = DirectoryReader.open(dir);
IndexSearcher indexSearcher = newSearcher(directoryReader);
TopDocs result = indexSearcher.search(new MatchAllDocsQuery(), 1);
assertTrue(result.totalHits > 0);
Document d = indexSearcher.doc(result.scoreDocs[0].doc);
assertNotNull(d);
assertNotNull(d.getField("title"));
assertEquals(dummyTitle, d.getField("title").stringValue());
assertNotNull(d.getField("contents"));
assertEquals(dummyContent, d.getField("contents").stringValue());
// add a second doc
doc = new Document();
String dogmasTitle = "dogmas";
doc.add(new TextField("title", dogmasTitle, Field.Store.YES));
String dogmasContents = "white men can't jump";
doc.add(new TextField("contents", dogmasContents, Field.Store.YES));
writer.addDocument(doc);
writer.commit();
directoryReader.close();
directoryReader = DirectoryReader.open(dir);
indexSearcher = newSearcher(directoryReader);
result = indexSearcher.search(new MatchAllDocsQuery(), 2);
Document d1 = indexSearcher.doc(result.scoreDocs[1].doc);
assertNotNull(d1);
assertNotNull(d1.getField("title"));
assertEquals(dogmasTitle, d1.getField("title").stringValue());
assertNotNull(d1.getField("contents"));
assertEquals(dogmasContents, d1.getField("contents").stringValue());
// do a matchalldocs query to retrieve both docs
result = indexSearcher.search(new MatchAllDocsQuery(), 2);
assertEquals(2, result.totalHits);
writer.close();
indexSearcher.getIndexReader().close();
dir.close();
}
@Test @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-3869")
public void testRandomStrings() throws Exception {
Analyzer analyzer = new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation", null);
checkRandomData(random(), analyzer, 100 * RANDOM_MULTIPLIER);
analyzer.close();
}
@Test @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-3869")
public void testRandomStringsWithConfigurationParameters() throws Exception {
Map<String, Object> cp = new HashMap<>();
cp.put("line-end", "\r");
Analyzer analyzer = new UIMABaseAnalyzer("/uima/TestWSTokenizerAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation", cp);
checkRandomData(random(), analyzer, 100 * RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -1,70 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
/**
* Testcase for {@link UIMATypeAwareAnalyzer}
*/
public class UIMATypeAwareAnalyzerTest extends BaseTokenStreamTestCase {
private UIMATypeAwareAnalyzer analyzer;
@Override
@Before
public void setUp() throws Exception {
super.setUp();
analyzer = new UIMATypeAwareAnalyzer("/uima/AggregateSentenceAE.xml",
"org.apache.uima.TokenAnnotation", "posTag", null);
}
@Override
@After
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
@Test
public void baseUIMATypeAwareAnalyzerStreamTest() throws Exception {
// create a token stream
TokenStream ts = analyzer.tokenStream("text", "the big brown fox jumped on the wood");
// check that 'the big brown fox jumped on the wood' tokens have the expected PoS types
assertTokenStreamContents(ts,
new String[]{"the", "big", "brown", "fox", "jumped", "on", "the", "wood"},
new String[]{"at", "jj", "jj", "nn", "vbd", "in", "at", "nn"});
}
@Test @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-3869")
public void testRandomStrings() throws Exception {
Analyzer analyzer = new UIMATypeAwareAnalyzer("/uima/TestAggregateSentenceAE.xml",
"org.apache.lucene.uima.ts.TokenAnnotation", "pos", null);
checkRandomData(random(), analyzer, 100 * RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -1,44 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima.ae;
import org.junit.Test;
import java.util.HashMap;
import static org.junit.Assert.assertTrue;
/**
* Testcase for {@link AEProviderFactory}
*/
public class AEProviderFactoryTest {
@Test
public void testCorrectCaching() throws Exception {
AEProvider aeProvider = AEProviderFactory.getInstance().getAEProvider("/uima/TestAggregateSentenceAE.xml");
assertTrue(aeProvider == AEProviderFactory.getInstance().getAEProvider("/uima/TestAggregateSentenceAE.xml"));
}
@Test
public void testCorrectCachingWithParameters() throws Exception {
AEProvider aeProvider = AEProviderFactory.getInstance().getAEProvider("prefix", "/uima/TestAggregateSentenceAE.xml",
new HashMap<String, Object>());
assertTrue(aeProvider == AEProviderFactory.getInstance().getAEProvider("prefix", "/uima/TestAggregateSentenceAE.xml",
new HashMap<String, Object>()));
}
}

View File

@ -1,36 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima.ae;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.junit.Test;
import static org.junit.Assert.assertNotNull;
/**
* TestCase for {@link BasicAEProvider}
*/
public class BasicAEProviderTest {
@Test
public void testBasicInitialization() throws Exception {
AEProvider basicAEProvider = new BasicAEProvider("/uima/TestEntityAnnotatorAE.xml");
AnalysisEngine analysisEngine = basicAEProvider.getAE();
assertNotNull(analysisEngine);
}
}

View File

@ -1,59 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima.ae;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.resource.ResourceInitializationException;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
/**
* TestCase for {@link OverridingParamsAEProvider}
*/
public class OverridingParamsAEProviderTest extends LuceneTestCase {
@Test
public void testNullMapInitialization() throws Exception {
expectThrows(ResourceInitializationException.class, () -> {
AEProvider aeProvider = new OverridingParamsAEProvider("/uima/TestEntityAnnotatorAE.xml", null);
aeProvider.getAE();
});
}
@Test
public void testEmptyMapInitialization() throws Exception {
AEProvider aeProvider = new OverridingParamsAEProvider("/uima/TestEntityAnnotatorAE.xml", new HashMap<String, Object>());
AnalysisEngine analysisEngine = aeProvider.getAE();
assertNotNull(analysisEngine);
}
@Test
public void testOverridingParamsInitialization() throws Exception {
Map<String, Object> runtimeParameters = new HashMap<>();
runtimeParameters.put("ngramsize", "3");
AEProvider aeProvider = new OverridingParamsAEProvider("/uima/AggregateSentenceAE.xml", runtimeParameters);
AnalysisEngine analysisEngine = aeProvider.getAE();
assertNotNull(analysisEngine);
Object parameterValue = analysisEngine.getConfigParameterValue("ngramsize");
assertNotNull(parameterValue);
assertEquals(Integer.valueOf(3), Integer.valueOf(parameterValue.toString()));
}
}

View File

@ -1,64 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima.an;
import org.apache.uima.TokenAnnotation;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
/**
* Dummy implementation of an entity annotator to tag tokens as certain types of entities
*/
public class SampleEntityAnnotator extends JCasAnnotator_ImplBase {
private static final String NP = "np";
private static final String NPS = "nps";
private static final String TYPE_NAME = "org.apache.lucene.analysis.uima.ts.EntityAnnotation";
private static final String ENTITY_FEATURE = "entity";
private static final String NAME_FEATURE = "entity";
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE);
Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE);
for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
String tokenPOS = ((TokenAnnotation) annotation).getPosTag();
if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) {
AnnotationFS entityAnnotation = jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd());
entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText());
String name = "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
if (annotation.getCoveredText().equals("Apache"))
name = "ORGANIZATION";
entityAnnotation.setStringValue(nameFeature, name);
jcas.addFsToIndexes(entityAnnotation);
}
}
}
}

View File

@ -1,57 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima.an;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
/**
* Dummy implementation of a PoS tagger to add part of speech as token types
*/
public class SamplePoSTagger extends JCasAnnotator_ImplBase {
private static final String NUM = "NUM";
private static final String WORD = "WORD";
private static final String TYPE_NAME = "org.apache.lucene.uima.ts.TokenAnnotation";
private static final String FEATURE_NAME = "pos";
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
Feature posFeature = type.getFeatureByBaseName(FEATURE_NAME);
for (Annotation annotation : jcas.getAnnotationIndex(type)) {
String text = annotation.getCoveredText();
String pos = extractPoS(text);
annotation.setStringValue(posFeature, pos);
}
}
private String extractPoS(String text) {
try {
Double.valueOf(text);
return NUM;
} catch (Exception e) {
return WORD;
}
}
}

View File

@ -1,66 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.uima.an;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
/**
* Dummy implementation of a UIMA based whitespace tokenizer
*/
public class SampleWSTokenizerAnnotator extends JCasAnnotator_ImplBase {
private final static String TOKEN_TYPE = "org.apache.lucene.uima.ts.TokenAnnotation";
private final static String SENTENCE_TYPE = "org.apache.lucene.uima.ts.SentenceAnnotation";
private String lineEnd;
private static final String WHITESPACE = " ";
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
super.initialize(aContext);
lineEnd = String.valueOf(aContext.getConfigParameterValue("line-end"));
}
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
Type sentenceType = jCas.getCas().getTypeSystem().getType(SENTENCE_TYPE);
Type tokenType = jCas.getCas().getTypeSystem().getType(TOKEN_TYPE);
int i = 0;
for (String sentenceString : jCas.getDocumentText().split(lineEnd)) {
// add the sentence
AnnotationFS sentenceAnnotation = jCas.getCas().createAnnotation(sentenceType, i, sentenceString.length());
jCas.addFsToIndexes(sentenceAnnotation);
i += sentenceString.length();
}
// get tokens
int j = 0;
for (String tokenString : jCas.getDocumentText().split(WHITESPACE)) {
int tokenLength = tokenString.length();
AnnotationFS tokenAnnotation = jCas.getCas().createAnnotation(tokenType, j, j + tokenLength);
jCas.addFsToIndexes(tokenAnnotation);
j += tokenLength;
}
}
}

View File

@ -167,7 +167,6 @@
<check-missing-javadocs dir="build/docs/analyzers-phonetic" level="method"/>
<!-- analyzers-smartcn: problems -->
<check-missing-javadocs dir="build/docs/analyzers-stempel" level="method"/>
<!-- analyzers-uima: problems -->
<!-- benchmark: problems -->
<check-missing-javadocs dir="build/docs/classification" level="method"/>
<!-- codecs: problems -->

View File

@ -1993,8 +1993,6 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
<pattern substring="Produced by GNUPLOT"/>
<!-- snowball stemmers generated by snowball compiler -->
<pattern substring="This file was generated automatically by the Snowball to Java compiler"/>
<!-- uima tests generated by JCasGen -->
<pattern substring="First created by JCasGen"/>
<!-- parsers generated by antlr -->
<pattern substring="ANTLR GENERATED CODE"/>
</rat:substringMatcher>

View File

@ -77,8 +77,6 @@ import org.apache.lucene.util.Version;
* Analyzer for Simplified Chinese, which indexes words.
* <li><a href="{@docRoot}/../analyzers-stempel/overview-summary.html">Stempel</a>:
* Algorithmic Stemmer for the Polish Language.
* <li><a href="{@docRoot}/../analyzers-uima/overview-summary.html">UIMA</a>:
* Analysis integration with Apache UIMA.
* </ul>
*/
public abstract class Analyzer implements Closeable {

View File

@ -51,7 +51,6 @@ com.sun.jersey.version = 1.9
/commons-codec/commons-codec = 1.10
/commons-collections/commons-collections = 3.2.2
/commons-configuration/commons-configuration = 1.6
/commons-digester/commons-digester = 2.1
/commons-fileupload/commons-fileupload = 1.3.3
/commons-io/commons-io = 2.5
/commons-lang/commons-lang = 2.6
@ -205,13 +204,6 @@ org.apache.tika.version = 1.17
/org.apache.tika/tika-parsers = ${org.apache.tika.version}
/org.apache.tika/tika-xmp = ${org.apache.tika.version}
org.apache.uima.version = 2.3.1
/org.apache.uima/AlchemyAPIAnnotator = ${org.apache.uima.version}
/org.apache.uima/OpenCalaisAnnotator = ${org.apache.uima.version}
/org.apache.uima/Tagger = ${org.apache.uima.version}
/org.apache.uima/WhitespaceTokenizer = ${org.apache.uima.version}
/org.apache.uima/uimaj-core = ${org.apache.uima.version}
/org.apache.velocity/velocity = 1.7
/org.apache.velocity/velocity-tools = 2.0
/org.apache.xmlbeans/xmlbeans = 2.6.0

View File

@ -1 +0,0 @@
cd02db9e8d54decb14cbe303d001d13735237290

View File

@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,7 +0,0 @@
UIMA Annotator: Tagger
Copyright 2006-2010 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).

View File

@ -1 +0,0 @@
d7b0fd616c4289376c1f59e2a68edfb4cfd6730d

View File

@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,7 +0,0 @@
UIMA Annotator: WhitespaceTokenizer
Copyright 2006-2010 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).

View File

@ -1 +0,0 @@
99bf8d75b71410e4d5f2051ae79942721b3a2f60

View File

@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,13 +0,0 @@
UIMA Base: uimaj-core
Copyright 2006-2010 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
Portions of Apache UIMA were originally developed by
International Business Machines Corporation and are
licensed to the Apache Software Foundation under the
"Software Grant License Agreement", informally known as the
"IBM UIMA License Agreement".
Copyright (c) 2003, 2006 IBM Corporation.

View File

@ -403,28 +403,6 @@
<property name="analyzers-kuromoji-javadocs.uptodate" value="true"/>
</target>
<property name="analyzers-uima.jar" value="${common.dir}/build/analysis/uima/lucene-analyzers-uima-${version}.jar"/>
<target name="check-analyzers-uima-uptodate" unless="analyzers-uima.uptodate">
<module-uptodate name="analysis/uima" jarfile="${analyzers-uima.jar}" property="analyzers-uima.uptodate"/>
</target>
<target name="jar-analyzers-uima" unless="analyzers-uima.uptodate" depends="check-analyzers-uima-uptodate">
<ant dir="${common.dir}/analysis/uima" target="jar-core" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="analyzers-uima.uptodate" value="true"/>
</target>
<property name="analyzers-uima-javadoc.jar" value="${common.dir}/build/analysis/uima/lucene-analyzers-uima-${version}-javadoc.jar"/>
<target name="check-analyzers-uima-javadocs-uptodate" unless="analyzers-uima-javadocs.uptodate">
<module-uptodate name="analysis/uima" jarfile="${analyzers-uima-javadoc.jar}" property="analyzers-uima-javadocs.uptodate"/>
</target>
<target name="javadocs-analyzers-uima" unless="analyzers-uima-javadocs.uptodate" depends="check-analyzers-uima-javadocs-uptodate">
<ant dir="${common.dir}/analysis/uima" target="javadocs" inheritAll="false">
<propertyset refid="uptodate.and.compiled.properties"/>
</ant>
<property name="analyzers-uima-javadocs.uptodate" value="true"/>
</target>
<property name="analyzers-morfologik.jar" value="${common.dir}/build/analysis/morfologik/lucene-analyzers-morfologik-${version}.jar"/>
<fileset id="analyzers-morfologik.fileset" dir="${common.dir}">
<include name="build/analysis/morfologik/lucene-analyzers-morfologik-${version}.jar" />

View File

@ -58,8 +58,7 @@ grant {
permission java.lang.RuntimePermission "fileSystemProvider";
// needed for test of IOUtils.spins (maybe it can be avoided)
permission java.lang.RuntimePermission "getFileStoreAttributes";
// analyzers/uima: needed by UIMA message localization... (?)
permission java.lang.RuntimePermission "createSecurityManager";
// analyzers/uima: needed by lucene expressions' JavascriptCompiler
permission java.lang.RuntimePermission "createClassLoader";
// needed to test unmap hack on platforms that support it
permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";

View File

@ -25,7 +25,6 @@ Versions of Major Components
Apache Tika 1.17
Carrot2 3.16.0
Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.4.11
Jetty 9.4.11.v20180605
@ -54,7 +53,6 @@ Versions of Major Components
Apache Tika 1.17
Carrot2 3.16.0
Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.4.11
Jetty 9.4.11.v20180605
@ -69,6 +67,8 @@ Upgrade Notes
* SOLR-12395: SignificantTermsQParserPlugin's name is now 'significantTerms' and its old name 'sigificantTerms' is deprecated.
* SOLR-11694: Extremely outdated UIMA contrib module has been removed
New Features
----------------------

View File

@ -248,7 +248,7 @@
<property name="lucenedocs" location="${common.dir}/build/docs"/>
<!-- dependency to ensure all lucene javadocs are present -->
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-analyzers-uima,javadocs-backward-codecs,javadocs-codecs,javadocs-expressions,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial-extras,javadocs-join,javadocs-test-framework"/>
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-backward-codecs,javadocs-codecs,javadocs-expressions,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial-extras,javadocs-join,javadocs-test-framework"/>
<!-- create javadocs for the current module -->
<target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs,javadocs-solr-core,check-javadocs-uptodate" unless="javadocs-uptodate-${name}">
@ -313,7 +313,6 @@
<link offline="true" href="${lucene.javadoc.url}analyzers-phonetic" packagelistloc="${lucenedocs}/analyzers-phonetic"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-smartcn" packagelistloc="${lucenedocs}/analyzers-smartcn"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-stempel" packagelistloc="${lucenedocs}/analyzers-stempel"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-uima" packagelistloc="${lucenedocs}/analyzers-uima"/>
<link offline="true" href="${lucene.javadoc.url}backward-codecs" packagelistloc="${lucenedocs}/backward-codecs"/>
<link offline="true" href="${lucene.javadoc.url}codecs" packagelistloc="${lucenedocs}/codecs"/>
<link offline="true" href="${lucene.javadoc.url}expressions" packagelistloc="${lucenedocs}/expressions"/>

View File

@ -1,109 +0,0 @@
Apache Solr UIMA Metadata Extraction Library
Introduction
------------
This module is intended to be used both as an UpdateRequestProcessor while indexing documents and as a set of tokenizer/filters
to be configured inside the schema.xml for use during analysis phase.
UIMAUpdateRequestProcessor purpose is to provide additional on the fly automatically generated fields to the Solr index.
Such fields could be language, concepts, keywords, sentences, named entities, etc.
UIMA based tokenizers/filters can be used either inside plain Lucene or as index/query analyzers to be defined
inside the schema.xml of a Solr core to create/filter tokens using specific UIMA annotations.
Getting Started
---------------
To start using Solr UIMA Metadata Extraction Library you should go through the following configuration steps:
1. copy generated solr-uima jar and its libs (under contrib/uima/lib) inside a Solr libraries directory.
or set <lib/> tags in solrconfig.xml appropriately to point those jar files.
<lib dir="../../contrib/uima/lib" />
<lib dir="../../contrib/uima/lucene-libs" />
<lib dir="../../dist/" regex="solr-uima-\d.*\.jar" />
2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options:
for example you could specify the following
<field name="language" type="string" indexed="true" stored="true" required="false"/>
<field name="concept" type="string" indexed="true" stored="true" multiValued="true" required="false"/>
<field name="sentence" type="text" indexed="true" stored="true" multiValued="true" required="false" />
3. modify your solrconfig.xml adding the following snippet:
<updateRequestProcessorChain name="uima">
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
<lst name="uimaConfig">
<lst name="runtimeParameters">
<str name="keyword_apikey">VALID_ALCHEMYAPI_KEY</str>
<str name="concept_apikey">VALID_ALCHEMYAPI_KEY</str>
<str name="lang_apikey">VALID_ALCHEMYAPI_KEY</str>
<str name="cat_apikey">VALID_ALCHEMYAPI_KEY</str>
<str name="entities_apikey">VALID_ALCHEMYAPI_KEY</str>
<str name="oc_licenseID">VALID_OPENCALAIS_KEY</str>
</lst>
<str name="analysisEngine">/org/apache/uima/desc/OverridingParamsExtServicesAE.xml</str>
<!-- Set to true if you want to continue indexing even if text processing fails.
Default is false. That is, Solr throws RuntimeException and
never indexed documents entirely in your session. -->
<bool name="ignoreErrors">true</bool>
<!-- This is optional. It is used for logging when text processing fails.
If logField is not specified, uniqueKey will be used as logField.
<str name="logField">id</str>
-->
<lst name="analyzeFields">
<bool name="merge">false</bool>
<arr name="fields">
<str>text</str>
</arr>
</lst>
<lst name="fieldMappings">
<lst name="type">
<str name="name">org.apache.uima.alchemy.ts.concept.ConceptFS</str>
<lst name="mapping">
<str name="feature">text</str>
<str name="field">concept</str>
</lst>
</lst>
<lst name="type">
<str name="name">org.apache.uima.alchemy.ts.language.LanguageFS</str>
<lst name="mapping">
<str name="feature">language</str>
<str name="field">language</str>
</lst>
</lst>
<lst name="type">
<str name="name">org.apache.uima.SentenceAnnotation</str>
<lst name="mapping">
<str name="feature">coveredText</str>
<str name="field">sentence</str>
</lst>
</lst>
</lst>
</lst>
</processor>
<processor class="solr.LogUpdateProcessorFactory" />
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
where VALID_ALCHEMYAPI_KEY is your AlchemyAPI Access Key. You need to register AlchemyAPI Access
key to exploit the AlchemyAPI services: http://www.alchemyapi.com/api/register.html
where VALID_OPENCALAIS_KEY is your Calais Service Key. You need to register Calais Service
key to exploit the Calais services: http://www.opencalais.com/apikey
the analysisEngine must contain an AE descriptor inside the specified path in the classpath
the analyzeFields must contain the input fields that need to be analyzed by UIMA,
if merge=true then their content will be merged and analyzed only once
field mapping describes which features of which types should go in a field
4. in your solrconfig.xml replace the existing default (<requestHandler name="/update"...) or create a new UpdateRequestHandler with the following:
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler">
<lst name="defaults">
<str name="update.processor">uima</str>
</lst>
</requestHandler>
Once you're done with the configuration you can index documents which will be automatically enriched with the specified fields

View File

@ -1,63 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="solr-uima" default="default">
<description>
Solr Integration with UIMA for extracting metadata from arbitrary (text) fields and enrich document with features
extracted from UIMA types (language, sentences, concepts, named entities, etc.)
</description>
<import file="../contrib-build.xml"/>
<path id="uima.lucene.libs">
<pathelement path="${analyzers-uima.jar}"/>
</path>
<path id="classpath">
<path refid="uima.lucene.libs"/>
<path refid="solr.base.classpath"/>
</path>
<target name="module-jars-to-solr"
depends="-module-jars-to-solr-not-for-package,-module-jars-to-solr-package"/>
<target name="-module-jars-to-solr-not-for-package" unless="called.from.create-package">
<antcall target="jar-analyzers-uima" inheritall="true"/>
<property name="analyzers-uima.uptodate" value="true"/>
<mkdir dir="${build.dir}/lucene-libs"/>
<copy todir="${build.dir}/lucene-libs" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
<fileset file="${analyzers-uima.jar}"/>
</copy>
</target>
<target name="-module-jars-to-solr-package" if="called.from.create-package">
<antcall target="-unpack-lucene-tgz" inheritall="true"/>
<pathconvert property="relative.uima.lucene.libs" pathsep=",">
<path refid="uima.lucene.libs"/>
<globmapper from="${common.build.dir}/*" to="*" handledirsep="true"/>
</pathconvert>
<mkdir dir="${build.dir}/lucene-libs"/>
<copy todir="${build.dir}/lucene-libs" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
<fileset dir="${lucene.tgz.unpack.dir}/lucene-${version}" includes="${relative.uima.lucene.libs}"/>
</copy>
</target>
<target name="compile-core" depends="jar-analyzers-uima, solr-contrib-build.compile-core"/>
<target name="dist" depends="module-jars-to-solr, common-solr.dist"/>
</project>

View File

@ -1,35 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<ivy-module version="2.0">
<info organisation="org.apache.solr" module="uima"/>
<configurations defaultconfmapping="compile->master;test->master">
<conf name="compile" transitive="false"/>
<conf name="test" transitive="false"/>
</configurations>
<dependencies>
<dependency org="commons-digester" name="commons-digester" rev="${/commons-digester/commons-digester}" conf="compile"/>
<dependency org="org.apache.uima" name="AlchemyAPIAnnotator" rev="${/org.apache.uima/AlchemyAPIAnnotator}" conf="compile"/>
<dependency org="org.apache.uima" name="OpenCalaisAnnotator" rev="${/org.apache.uima/OpenCalaisAnnotator}" conf="compile"/>
<dependency org="org.apache.uima" name="Tagger" rev="${/org.apache.uima/Tagger}" conf="compile"/>
<dependency org="org.apache.uima" name="WhitespaceTokenizer" rev="${/org.apache.uima/WhitespaceTokenizer}" conf="compile"/>
<dependency org="org.apache.uima" name="uimaj-core" rev="${/org.apache.uima/uimaj-core}" conf="compile"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>

View File

@ -1,27 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uima.processor;
/**
* Exception thrown when an error happening while mapping UIMA CAS model to Solr fields
*/
@SuppressWarnings("serial")
public class FieldMappingException extends Exception {
public FieldMappingException(Exception e) {
super(e);
}
}

View File

@ -1,117 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uima.processor;
import java.util.Map;
/**
* Configuration holding all the configurable parameters for calling UIMA inside Solr
*
*
*/
public class SolrUIMAConfiguration {
private final String[] fieldsToAnalyze;
private final boolean fieldsMerging;
private final Map<String, Map<String, MapField>> typesFeaturesFieldsMapping;
private final String aePath;
private final Map<String, Object> runtimeParameters;
private final boolean ignoreErrors;
private final String logField;
SolrUIMAConfiguration(String aePath, String[] fieldsToAnalyze, boolean fieldsMerging,
Map<String, Map<String, MapField>> typesFeaturesFieldsMapping,
Map<String, Object> runtimeParameters, boolean ignoreErrors, String logField) {
this.aePath = aePath;
this.fieldsToAnalyze = fieldsToAnalyze;
this.fieldsMerging = fieldsMerging;
this.runtimeParameters = runtimeParameters;
this.typesFeaturesFieldsMapping = typesFeaturesFieldsMapping;
this.ignoreErrors = ignoreErrors;
this.logField = logField;
}
public String[] getFieldsToAnalyze() {
return fieldsToAnalyze;
}
public boolean isFieldsMerging() {
return fieldsMerging;
}
public Map<String, Map<String, MapField>> getTypesFeaturesFieldsMapping() {
return typesFeaturesFieldsMapping;
}
public String getAePath() {
return aePath;
}
public Map<String, Object> getRuntimeParameters() {
return runtimeParameters;
}
public boolean isIgnoreErrors() {
return ignoreErrors;
}
public String getLogField(){
return logField;
}
public static final class MapField {
private String fieldName;
private final String fieldNameFeature;
private boolean prefix; // valid if dynamicField == true
// false: *_s, true: s_*
MapField(String fieldName, String fieldNameFeature){
this.fieldName = fieldName;
this.fieldNameFeature = fieldNameFeature;
if(fieldNameFeature != null){
if(fieldName.startsWith("*")){
prefix = false;
this.fieldName = fieldName.substring(1);
}
else if(fieldName.endsWith("*")){
prefix = true;
this.fieldName = fieldName.substring(0, fieldName.length() - 1);
}
else
throw new RuntimeException("static field name cannot be used for dynamicField");
}
}
public String getFieldNameFeature(){
return fieldNameFeature;
}
public String getFieldName(String featureValue){
if(fieldNameFeature != null){
return prefix ? fieldName + featureValue : featureValue + fieldName;
}
return fieldName;
}
}
}

View File

@ -1,116 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uima.processor;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
/**
* Read configuration for Solr-UIMA integration
*
*
*
*/
public class SolrUIMAConfigurationReader {
private final NamedList<Object> args;
public SolrUIMAConfigurationReader(NamedList<Object> args) {
this.args = args;
}
public SolrUIMAConfiguration readSolrUIMAConfiguration() {
return new SolrUIMAConfiguration(readAEPath(), readFieldsToAnalyze(), readFieldsMerging(),
readTypesFeaturesFieldsMapping(), readAEOverridingParameters(), readIgnoreErrors(),
readLogField());
}
private String readAEPath() {
return (String) args.get("analysisEngine");
}
@SuppressWarnings("rawtypes")
private NamedList getAnalyzeFields() {
return (NamedList) args.get("analyzeFields");
}
@SuppressWarnings("unchecked")
private String[] readFieldsToAnalyze() {
List<String> fields = (List<String>) getAnalyzeFields().get("fields");
return fields.toArray(new String[fields.size()]);
}
private boolean readFieldsMerging() {
return (Boolean) getAnalyzeFields().get("merge");
}
@SuppressWarnings("rawtypes")
private Map<String, Map<String, MapField>> readTypesFeaturesFieldsMapping() {
Map<String, Map<String, MapField>> map = new HashMap<>();
NamedList fieldMappings = (NamedList) args.get("fieldMappings");
/* iterate over UIMA types */
for (int i = 0; i < fieldMappings.size(); i++) {
NamedList type = (NamedList) fieldMappings.get("type", i);
String typeName = (String)type.get("name");
Map<String, MapField> subMap = new HashMap<>();
/* iterate over mapping definitions */
for(int j = 0; j < type.size() - 1; j++){
NamedList mapping = (NamedList) type.get("mapping", j + 1);
String featureName = (String) mapping.get("feature");
String fieldNameFeature = null;
String mappedFieldName = (String) mapping.get("field");
if(mappedFieldName == null){
fieldNameFeature = (String) mapping.get("fieldNameFeature");
mappedFieldName = (String) mapping.get("dynamicField");
}
if(mappedFieldName == null)
throw new RuntimeException("either of field or dynamicField should be defined for feature " + featureName);
MapField mapField = new MapField(mappedFieldName, fieldNameFeature);
subMap.put(featureName, mapField);
}
map.put(typeName, subMap);
}
return map;
}
@SuppressWarnings("rawtypes")
private Map<String, Object> readAEOverridingParameters() {
Map<String, Object> runtimeParameters = new HashMap<>();
NamedList runtimeParams = (NamedList) args.get("runtimeParameters");
for (int i = 0; i < runtimeParams.size(); i++) {
String name = runtimeParams.getName(i);
Object value = runtimeParams.getVal(i);
runtimeParameters.put(name, value);
}
return runtimeParameters;
}
private boolean readIgnoreErrors() {
Object ignoreErrors = args.get("ignoreErrors");
return ignoreErrors == null ? false : (Boolean)ignoreErrors;
}
private String readLogField() {
return (String)args.get("logField");
}
}

View File

@ -1,87 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uima.processor;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.invoke.MethodHandles;
import java.util.Map;
/**
* Map UIMA types and features over fields of a Solr document
*
*
*/
public class UIMAToSolrMapper {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final SolrInputDocument document;
private final JCas cas;
public UIMAToSolrMapper(SolrInputDocument document, JCas cas) {
this.document = document;
this.cas = cas;
}
/**
* map features of a certain UIMA type to corresponding Solr fields based on the mapping
*
* @param typeName name of UIMA type to map
*/
void map(String typeName, Map<String, MapField> featureFieldsmapping) throws FieldMappingException {
try {
Type type = cas.getTypeSystem().getType(typeName);
for (FSIterator<FeatureStructure> iterator = cas.getFSIndexRepository().getAllIndexedFS(type); iterator
.hasNext(); ) {
FeatureStructure fs = iterator.next();
for (String featureName : featureFieldsmapping.keySet()) {
MapField mapField = featureFieldsmapping.get(featureName);
String fieldNameFeature = mapField.getFieldNameFeature();
String fieldNameFeatureValue = fieldNameFeature == null ? null :
fs.getFeatureValueAsString(type.getFeatureByBaseName(fieldNameFeature));
String fieldName = mapField.getFieldName(fieldNameFeatureValue);
if (log.isInfoEnabled()) {
log.info("mapping {}@{} to {}", new Object[]{typeName, featureName, fieldName});
}
String featureValue;
if (fs instanceof Annotation && "coveredText".equals(featureName)) {
featureValue = ((Annotation) fs).getCoveredText();
} else {
featureValue = fs.getFeatureValueAsString(type.getFeatureByBaseName(featureName));
}
if (log.isDebugEnabled()) {
log.debug("writing {} in {}", new Object[]{featureValue, fieldName});
}
document.addField(fieldName, featureValue);
}
}
} catch (Exception e) {
throw new FieldMappingException(e);
}
}
}

View File

@ -1,189 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uima.processor;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.JCasPool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Update document(s) to be indexed with UIMA extracted information
*
*/
public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private SolrUIMAConfiguration solrUIMAConfiguration;
private AnalysisEngine ae;
private JCasPool pool;
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next,
String coreName, SolrUIMAConfiguration config, AnalysisEngine ae,
JCasPool pool) {
super(next);
this.ae = ae;
this.pool = pool;
solrUIMAConfiguration = config;
}
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
String text = null;
try {
/* get Solr document */
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
/* get the fields to analyze */
String[] texts = getTextsToAnalyze(solrInputDocument);
for (String currentText : texts) {
text = currentText;
if (text != null && text.length() > 0) {
/* create a JCas which contain the text to analyze */
JCas jcas = pool.getJCas(0);
try {
/* process the text value */
processText(text, jcas);
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(
solrInputDocument, jcas);
/* get field mapping from config */
Map<String,Map<String,MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
.getTypesFeaturesFieldsMapping();
/* map type features on fields */
for (Entry<String,Map<String,MapField>> entry : typesAndFeaturesFieldsMap
.entrySet()) {
uimaToSolrMapper.map(entry.getKey(), entry.getValue());
}
} finally {
pool.releaseJCas(jcas);
}
}
}
} catch (Exception e) {
String logField = solrUIMAConfiguration.getLogField();
if (logField == null) {
SchemaField uniqueKeyField = cmd.getReq().getSchema()
.getUniqueKeyField();
if (uniqueKeyField != null) {
logField = uniqueKeyField.getName();
}
}
String optionalFieldInfo = logField == null ? "." : ". " + logField + "=" + cmd.getSolrInputDocument().
getField(logField).getValue() + ", ";
int len;
String debugString;
if (text != null && text.length() > 0) {
len = Math.min(text.length(), 100);
debugString = " text=\"" + text.substring(0, len) + "...\"";
} else {
debugString = " null text";
}
if (solrUIMAConfiguration.isIgnoreErrors()) {
log.warn(
"skip the text processing due to {}",
new StringBuilder().append(e.getLocalizedMessage())
.append(optionalFieldInfo).append(debugString));
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, "processing error " + e.getLocalizedMessage() +
optionalFieldInfo + debugString, e);
}
}
super.processAdd(cmd);
}
/*
* get the texts to analyze from the corresponding fields
*/
private String[] getTextsToAnalyze(SolrInputDocument solrInputDocument) {
String[] fieldsToAnalyze = solrUIMAConfiguration.getFieldsToAnalyze();
boolean merge = solrUIMAConfiguration.isFieldsMerging();
String[] textVals;
if (merge) {
StringBuilder unifiedText = new StringBuilder("");
for (String aFieldsToAnalyze : fieldsToAnalyze) {
if (solrInputDocument.getFieldValues(aFieldsToAnalyze) != null) {
Object[] Values = solrInputDocument.getFieldValues(aFieldsToAnalyze).toArray();
for (Object Value : Values) {
if (unifiedText.length() > 0) {
unifiedText.append(' ');
}
unifiedText.append(Value.toString());
}
}
}
textVals = new String[1];
textVals[0] = unifiedText.toString();
} else {
textVals = new String[fieldsToAnalyze.length];
for (int i = 0; i < fieldsToAnalyze.length; i++) {
if (solrInputDocument.getFieldValues(fieldsToAnalyze[i]) != null) {
Object[] Values = solrInputDocument.getFieldValues(fieldsToAnalyze[i]).toArray();
for (Object Value : Values) {
textVals[i] += Value.toString();
}
}
}
}
return textVals;
}
/*
* process a field value executing UIMA on the JCas containing it as document
* text
*/
private void processText(String textFieldValue, JCas jcas)
throws ResourceInitializationException, AnalysisEngineProcessException {
if (log.isDebugEnabled()) {
log.debug("Analyzing text");
}
jcas.setDocumentText(textFieldValue);
/* perform analysis on text field */
ae.process(jcas);
if (log.isDebugEnabled()) {
log.debug("Text processing completed");
}
}
/**
* @return the configuration object for this request processor
*/
public SolrUIMAConfiguration getConfiguration()
{
return solrUIMAConfiguration;
}
}

View File

@ -1,72 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uima.processor;
import org.apache.lucene.analysis.uima.ae.AEProvider;
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.JCasPool;
/**
* Factory for {@link UIMAUpdateRequestProcessor}
*
* @since 3.1.0
*/
public class UIMAUpdateRequestProcessorFactory extends
UpdateRequestProcessorFactory {
private NamedList<Object> args;
private AnalysisEngine ae;
private JCasPool pool;
@SuppressWarnings("unchecked")
@Override
public void init(@SuppressWarnings("rawtypes") NamedList args) {
this.args = (NamedList<Object>) args.get("uimaConfig");
}
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp, UpdateRequestProcessor next) {
SolrUIMAConfiguration configuration = new SolrUIMAConfigurationReader(args)
.readSolrUIMAConfiguration();
synchronized (this) {
if (ae == null && pool == null) {
AEProvider aeProvider = AEProviderFactory.getInstance().getAEProvider(
req.getCore().getName(), configuration.getAePath(),
configuration.getRuntimeParameters());
try {
ae = aeProvider.getAE();
pool = new JCasPool(10, ae);
} catch (ResourceInitializationException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
}
return new UIMAUpdateRequestProcessor(next, req.getCore().getName(),
configuration, ae, pool);
}
}

View File

@ -1,25 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* {@link org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory} and related code.
*/
package org.apache.solr.uima.processor;

View File

@ -1,21 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<body>
Apache Solr Search Server: Solr UIMA contrib
</body>
</html>

View File

@ -1,41 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>false</primitive>
<delegateAnalysisEngineSpecifiers>
<delegateAnalysisEngine key="HmmTagger">
<import name="HmmTagger"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="WhitespaceTokenizer">
<import name="WhitespaceTokenizer"/>
</delegateAnalysisEngine>
</delegateAnalysisEngineSpecifiers>
<analysisEngineMetaData>
<name>AggregateSentenceAE</name>
<description/>
<version>1.0</version>
<vendor/>
<configurationParameters/>
<configurationParameterSettings/>
<flowConstraints>
<fixedFlow>
<node>WhitespaceTokenizer</node>
<node>HmmTagger</node>
</fixedFlow>
</flowConstraints>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,121 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.uima.examples.tagger.HMMTagger</annotatorImplementationName>
<analysisEngineMetaData>
<name>Hidden Markov Model - Part of Speech Tagger</name>
<description>A configuration of the HmmTaggerAnnotator that looks for
parts of speech of identified tokens within existing
Sentence and Token annotations. See also
WhitespaceTokenizer.xml.</description>
<version>1.0</version>
<vendor>The Apache Software Foundation</vendor>
<configurationParameters>
<configurationParameter>
<name>NGRAM_SIZE</name>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>NGRAM_SIZE</name>
<value>
<integer>3</integer>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
<types>
<typeDescription>
<name>org.apache.uima.TokenAnnotation</name>
<description>Single token annotation</description>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
<name>posTag</name>
<description>contains part-of-speech of a
corresponding token</description>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>org.apache.uima.SentenceAnnotation</name>
<description>sentence annotation</description>
<supertypeName>uima.tcas.Annotation</supertypeName>
</typeDescription>
</types>
</typeSystemDescription>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs>
<type>org.apache.uima.TokenAnnotation</type>
<type allAnnotatorFeatures="true">org.apache.uima.SentenceAnnotation</type>
<feature>org.apache.uima.TokenAnnotation:end</feature>
<feature>org.apache.uima.TokenAnnotation:begin</feature>
</inputs>
<outputs>
<type>org.apache.uima.TokenAnnotation</type>
<feature>org.apache.uima.TokenAnnotation:posTag</feature>
<feature>org.apache.uima.TokenAnnotation:end</feature>
<feature>org.apache.uima.TokenAnnotation:begin</feature>
</outputs>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<externalResourceDependencies>
<externalResourceDependency>
<key>Model</key>
<description>HMM Tagger model file</description>
<interfaceName>org.apache.uima.examples.tagger.IModelResource</interfaceName>
<optional>false</optional>
</externalResourceDependency>
</externalResourceDependencies>
<resourceManagerConfiguration>
<externalResources>
<externalResource>
<name>ModelFile</name>
<description>HMM Tagger model file</description>
<fileResourceSpecifier>
<fileUrl>file:english/BrownModel.dat</fileUrl>
</fileResourceSpecifier>
<implementationName>org.apache.uima.examples.tagger.ModelResource</implementationName>
</externalResource>
</externalResources>
<externalResourceBindings>
<externalResourceBinding>
<key>Model</key>
<resourceName>ModelFile</resourceName>
</externalResourceBinding>
</externalResourceBindings>
</resourceManagerConfiguration>
</analysisEngineDescription>

View File

@ -1,194 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.uima.annotator.calais.OpenCalaisAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>OpenCalaisAnnotator</name>
<description/>
<configurationParameters>
<configurationParameter>
<name>allowDistribution</name>
<description/>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>allowSearch</name>
<description/>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>submitter</name>
<description/>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>licenseID</name>
<description/>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>allowDistribution</name>
<value>
<boolean>false</boolean>
</value>
</nameValuePair>
<nameValuePair>
<name>allowSearch</name>
<value>
<boolean>false</boolean>
</value>
</nameValuePair>
<nameValuePair>
<name>submitter</name>
<value>
<string/>
</value>
</nameValuePair>
<nameValuePair>
<name>licenseID</name>
<value>
<string>OC_LICENSE_ID</string>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
<types>
<typeDescription>
<name>org.apache.uima.calais.Person</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.Anniversary</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.City</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.Company</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.Continent</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.Country</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.Currency</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.EmailAddress</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.Facility</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.FaxNumber</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.Holiday</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.IndustryTerm</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.NaturalDisaster</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.NaturalFeature</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.Organization</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.PhoneNumber</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.ProviceOrState</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.Region</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.Technology</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.URL</name>
<description/>
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.calais.BaseType</name>
<description/>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
<name>calaisType</name>
<description>OpenCalais type</description>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
</types>
</typeSystemDescription>
<capabilities>
<capability>
<inputs/>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
</analysisEngineDescription>

View File

@ -1,147 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>false</primitive>
<delegateAnalysisEngineSpecifiers>
<delegateAnalysisEngine key="TextKeywordExtractionAEDescriptor">
<import name="TextKeywordExtractionAEDescriptor"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="TextConceptTaggingAEDescriptor">
<import name="TextConceptTaggingAEDescriptor"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="OpenCalaisAnnotator">
<import name="OpenCalaisAnnotator"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="TextLanguageDetectionAEDescriptor">
<import name="TextLanguageDetectionAEDescriptor"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="TextCategorizationAEDescriptor">
<import name="TextCategorizationAEDescriptor"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="AggregateSentenceAE">
<import location="AggregateSentenceAE.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="TextRankedEntityExtractionAEDescriptor">
<import name="TextRankedEntityExtractionAEDescriptor"/>
</delegateAnalysisEngine>
</delegateAnalysisEngineSpecifiers>
<analysisEngineMetaData>
<name>ExtServicesAE</name>
<description/>
<version>1.0</version>
<vendor/>
<configurationParameters searchStrategy="language_fallback">
<configurationParameter>
<name>oc_licenseID</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
<overrides>
<parameter>OpenCalaisAnnotator/licenseID</parameter>
</overrides>
</configurationParameter>
<configurationParameter>
<name>keyword_apikey</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
<overrides>
<parameter>TextKeywordExtractionAEDescriptor/apikey</parameter>
</overrides>
</configurationParameter>
<configurationParameter>
<name>concept_apikey</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
<overrides>
<parameter>TextConceptTaggingAEDescriptor/apikey</parameter>
</overrides>
</configurationParameter>
<configurationParameter>
<name>lang_apikey</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
<overrides>
<parameter>TextLanguageDetectionAEDescriptor/apikey</parameter>
</overrides>
</configurationParameter>
<configurationParameter>
<name>cat_apikey</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
<overrides>
<parameter>TextCategorizationAEDescriptor/apikey</parameter>
</overrides>
</configurationParameter>
<configurationParameter>
<name>entities_apikey</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
<overrides>
<parameter>TextRankedEntityExtractionAEDescriptor/apikey</parameter>
</overrides>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>oc_licenseID</name>
<value>
<string>licenseid</string>
</value>
</nameValuePair>
<nameValuePair>
<name>keyword_apikey</name>
<value>
<string>apikey</string>
</value>
</nameValuePair>
<nameValuePair>
<name>concept_apikey</name>
<value>
<string>apikey</string>
</value>
</nameValuePair>
<nameValuePair>
<name>lang_apikey</name>
<value>
<string>apikey</string>
</value>
</nameValuePair>
<nameValuePair>
<name>cat_apikey</name>
<value>
<string>apikey</string>
</value>
</nameValuePair>
</configurationParameterSettings>
<flowConstraints>
<fixedFlow>
<node>AggregateSentenceAE</node>
<node>OpenCalaisAnnotator</node>
<node>TextKeywordExtractionAEDescriptor</node>
<node>TextLanguageDetectionAEDescriptor</node>
<node>TextCategorizationAEDescriptor</node>
<node>TextConceptTaggingAEDescriptor</node>
<node>TextRankedEntityExtractionAEDescriptor</node>
</fixedFlow>
</flowConstraints>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,102 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextCategorizationAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>TextCategorizationAEDescriptor</name>
<description/>
<version>1.0</version>
<vendor/>
<configurationParameters>
<configurationParameter>
<name>apikey</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>outputMode</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>baseUrl</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>outputMode</name>
<value>
<string>xml</string>
</value>
</nameValuePair>
<nameValuePair>
<name>apikey</name>
<value>
<string>AA_API_KEY</string>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
<types>
<typeDescription>
<name>org.apache.uima.alchemy.ts.categorization.Category</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>score</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>text</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
</types>
</typeSystemDescription>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,196 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextConceptTaggingAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>TextConceptTaggingAEDescriptor</name>
<description/>
<version>1.0</version>
<vendor/>
<configurationParameters>
<configurationParameter>
<name>apikey</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>outputMode</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>linkedData</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>showSourceText</name>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>maxRetrieve</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>url</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>apikey</name>
<value>
<string/>
</value>
</nameValuePair>
<nameValuePair>
<name>outputMode</name>
<value>
<string>xml</string>
</value>
</nameValuePair>
<nameValuePair>
<name>linkedData</name>
<value>
<string>1</string>
</value>
</nameValuePair>
<nameValuePair>
<name>showSourceText</name>
<value>
<integer>0</integer>
</value>
</nameValuePair>
<nameValuePair>
<name>maxRetrieve</name>
<value>
<string>8</string>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
<types>
<typeDescription>
<name>org.apache.uima.alchemy.ts.concept.ConceptFS</name>
<description>a concept tag</description>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>text</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>relevance</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>website</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>geo</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>dbpedia</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>yago</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>opencyc</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>freebase</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>ciaFactbook</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>census</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>geonames</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>musicBrainz</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>crunchbase</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>semanticCrunchbase</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
</types>
</typeSystemDescription>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,107 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextKeywordExtractionAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>TextKeywordExtractionAEDescriptor</name>
<description/>
<version>1.0</version>
<vendor/>
<configurationParameters>
<configurationParameter>
<name>apikey</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>outputMode</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>baseUrl</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>url</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>maxRetrieve</name>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>showSourceText</name>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>outputMode</name>
<value>
<string>xml</string>
</value>
</nameValuePair>
<nameValuePair>
<name>apikey</name>
<value>
<string>04490000a72fe7ec5cb3497f14e77f338c86f2fe</string>
</value>
</nameValuePair>
<nameValuePair>
<name>maxRetrieve</name>
<value>
<integer>10</integer>
</value>
</nameValuePair>
<nameValuePair>
<name>showSourceText</name>
<value>
<integer>0</integer>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
<types>
<typeDescription>
<name>org.apache.uima.alchemy.ts.keywords.KeywordFS</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>text</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
</types>
</typeSystemDescription>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,107 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextLanguageDetectionAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>TextLanguageDetectionAEDescriptor</name>
<description/>
<version>1.0</version>
<vendor/>
<configurationParameters>
<configurationParameter>
<name>apikey</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>outputMode</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>url</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>outputMode</name>
<value>
<string>xml</string>
</value>
</nameValuePair>
<nameValuePair>
<name>apikey</name>
<value>
<string>AA_API_KEY</string>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
<types>
<typeDescription>
<name>org.apache.uima.alchemy.ts.language.LanguageFS</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>language</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>iso6391</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>iso6392</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>iso6393</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>ethnologue</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>nativeSpeakers</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>wikipedia</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
</types>
</typeSystemDescription>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,403 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextRankedNamedEntityExtractionAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>TextRankedEntityExtractionAEDescriptor</name>
<description/>
<version>1.0</version>
<vendor/>
<configurationParameters>
<configurationParameter>
<name>apikey</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>outputMode</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>disambiguate</name>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>linkedData</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>showSourceText</name>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>baseUrl</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>url</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>coreference</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>quotations</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>apikey</name>
<value>
<string/>
</value>
</nameValuePair>
<nameValuePair>
<name>outputMode</name>
<value>
<string>xml</string>
</value>
</nameValuePair>
<nameValuePair>
<name>disambiguate</name>
<value>
<integer>1</integer>
</value>
</nameValuePair>
<nameValuePair>
<name>linkedData</name>
<value>
<string>1</string>
</value>
</nameValuePair>
<nameValuePair>
<name>coreference</name>
<value>
<string>1</string>
</value>
</nameValuePair>
<nameValuePair>
<name>showSourceText</name>
<value>
<integer>0</integer>
</value>
</nameValuePair>
<nameValuePair>
<name>quotations</name>
<value>
<string>1</string>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
<imports>
<import location="baseAlchemyTypeSystemDescriptor.xml"/>
</imports>
<types>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Anniversary</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Automobile</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.City</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Company</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Continent</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Country</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.EntertainmentAward</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Facility</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.FieldTerminology</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.FinancialMarketIndex</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.GeographicFeature</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.HealthCondition</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Holiday</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Movie</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.MusicGroup</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.NaturalDisaster</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Organization</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Person</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.PrintMedia</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.RadioProgram</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.RadioStation</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Region</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Sport</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.StateOrCounty</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Technology</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.TelevisionShow</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.TelevisionStation</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.OperatingSystem</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.SportingEvent</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.Drug</name>
<description/>
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
</typeDescription>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.BaseEntity</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>text</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>count</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>relevance</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>disambiguation</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>subType</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>website</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>geo</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>dbpedia</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>yago</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>opencyc</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>umbel</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>freebase</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>ciaFactbook</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>census</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>geonames</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>musicBrainz</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>quotations</name>
<description/>
<rangeTypeName>uima.cas.StringArray</rangeTypeName>
<multipleReferencesAllowed>true</multipleReferencesAllowed>
</featureDescription>
<featureDescription>
<name>occurrences</name>
<description>A list of annotations annotating this entity</description>
<rangeTypeName>uima.cas.FSList</rangeTypeName>
<elementType>uima.tcas.Annotation</elementType>
</featureDescription>
</features>
</typeDescription>
</types>
</typeSystemDescription>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,115 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
***************************************************************
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
***************************************************************
-->
<analysisEngineDescription
xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>
org.apache.uima.java
</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>
org.apache.uima.annotator.WhitespaceTokenizer
</annotatorImplementationName>
<analysisEngineMetaData>
<name>WhitespaceTokenizer</name>
<description>
creates token and sentence annotations for whitespace
separated languages
</description>
<version>1.0</version>
<vendor>The Apache Software Foundation</vendor>
<configurationParameters>
<configurationParameter>
<name>SofaNames</name>
<description>
The Sofa names the annotator should work on. If no
names are specified, the annotator works on the
default sofa.
</description>
<type>String</type>
<multiValued>true</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<!--
<nameValuePair>
<name>SofaNames</name>
<value>
<array>
<string>sofaName</string>
</array>
</value>
</nameValuePair>
-->
</configurationParameterSettings>
<typeSystemDescription>
<typeDescription>
<name>org.apache.uima.TokenAnnotation</name>
<description>Single token annotation</description>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
<name>tokenType</name>
<description>token type</description>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>org.apache.uima.SentenceAnnotation</name>
<description>sentence annotation</description>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
</features>
</typeDescription>
</typeSystemDescription>
<fsIndexes />
<capabilities>
<capability>
<inputs />
<outputs>
<type>org.apache.uima.TokenAnnotation</type>
<feature>
org.apache.uima.TokenAnnotation:tokentype
</feature>
<type>org.apache.uima.SentenceAnnotation</type>
</outputs>
<languagesSupported>
<language>x-unspecified</language>
</languagesSupported>
</capability>
</capabilities>
</analysisEngineMetaData>
</analysisEngineDescription>

View File

@ -1,41 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
<name>baseAlchemyTypeSystemDescriptor</name>
<description/>
<version>1.0</version>
<vendor/>
<types>
<typeDescription>
<name>org.apache.uima.alchemy.ts.entity.AlchemyAnnotation</name>
<description/>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
<name>alchemyType</name>
<description>alchemyAPI type</description>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
</types>
</typeSystemDescription>

View File

@ -1,48 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version
2.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 Unless required by
applicable law or agreed to in writing, software distributed under
the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and
limitations under the License.
-->
<updateRequestProcessorChain name="uima">
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
<lst name="uimaConfig">
<lst name="runtimeParameters">
<str name="keyword_apikey">VALID_ALCHEMYAPI_KEY</str>
<str name="concept_apikey">VALID_ALCHEMYAPI_KEY</str>
<str name="lang_apikey">VALID_ALCHEMYAPI_KEY</str>
<str name="cat_apikey">VALID_ALCHEMYAPI_KEY</str>
<str name="entities_apikey">VALID_ALCHEMYAPI_KEY</str>
<str name="oc_licenseID">VALID_OPENCALAIS_KEY</str>
</lst>
<str name="analysisEngine">/org/apache/uima/desc/OverridingParamsExtServicesAE.xml</str>
<lst name="analyzeFields">
<bool name="merge">false</bool>
<arr name="fields">
<str>text</str>
<str>title</str>
</arr>
</lst>
<lst name="fieldMappings">
<lst name="mapping">
<str name="type">org.apache.uima.jcas.tcas.Annotation</str>
<str name="feature">convertText</str>
<str name="field">tag</str>
</lst>
</lst>
</lst>
</processor>
<processor class="solr.LogUpdateProcessorFactory" />
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>

View File

@ -1,9 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<fields>
<field name="language" type="string" indexed="true" stored="true" required="false"/>
<field name="concept" type="string" indexed="true" stored="true" multiValued="true" required="false"/>
<field name="keyword" type="string" indexed="true" stored="true" multiValued="true" required="false"/>
<field name="suggested_category" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="sentence" type="text" indexed="true" stored="true" multiValued="true" required="false" />
<dynamicField name="entity*" type="text" indexed="true" stored="true" multiValued="true"/>
</fields>

View File

@ -1,70 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>false</primitive>
<delegateAnalysisEngineSpecifiers>
<delegateAnalysisEngine key="WhitespaceTokenizer">
<import name="WhitespaceTokenizer"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="HmmTagger">
<import name="HmmTagger"/>
</delegateAnalysisEngine>
</delegateAnalysisEngineSpecifiers>
<analysisEngineMetaData>
<name>AggregateSentenceAE</name>
<description/>
<version>1.0</version>
<vendor/>
<configurationParameters>
<configurationParameter>
<name>ngramsize</name>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
<overrides>
<parameter>HmmTagger/NGRAM_SIZE</parameter>
</overrides>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings/>
<flowConstraints>
<fixedFlow>
<node>WhitespaceTokenizer</node>
<node>HmmTagger</node>
</fixedFlow>
</flowConstraints>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs>
<type allAnnotatorFeatures="true">org.apache.uima.SentenceAnnotation</type>
<type allAnnotatorFeatures="true">org.apache.uima.TokenAnnotation</type>
</outputs>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,68 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.solr.uima.processor.an.DummyEntityAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>DummyEntityAEDescriptor</name>
<description/>
<version>1.0</version>
<vendor>ASF</vendor>
<configurationParameters/>
<configurationParameterSettings/>
<typeSystemDescription>
<types>
<typeDescription>
<name>org.apache.solr.uima.ts.EntityAnnotation</name>
<description/>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
<name>name</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>entity</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
</types>
</typeSystemDescription>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs>
<type allAnnotatorFeatures="true">org.apache.solr.uima.ts.EntityAnnotation</type>
</outputs>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,40 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.solr.uima.processor.an.DummyExceptionAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>DummyExceptionAEDescriptor</name>
<description/>
<version>1.0</version>
<vendor>ASF</vendor>
<configurationParameters/>
<configurationParameterSettings/>
<typeSystemDescription/>
<typePriorities/>
<fsIndexCollection/>
<capabilities/>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,60 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.solr.uima.processor.an.DummySentimentAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>DummySentimentAnalysisAEDescriptor</name>
<description/>
<version>1.0</version>
<vendor>ASF</vendor>
<configurationParameters/>
<configurationParameterSettings/>
<typeSystemDescription>
<types>
<typeDescription>
<name>org.apache.solr.uima.ts.DummySentimentAnnotation</name>
<description/>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
<name>mood</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
</types>
</typeSystemDescription>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,72 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>false</primitive>
<delegateAnalysisEngineSpecifiers>
<delegateAnalysisEngine key="AggregateSentenceAE">
<import location="AggregateSentenceAE.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="DummyEntityAEDescriptor">
<import location="DummyEntityAEDescriptor.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="DummySentimentAnalysisAEDescriptor">
<import location="DummySentimentAnalysisAEDescriptor.xml"/>
</delegateAnalysisEngine>
</delegateAnalysisEngineSpecifiers>
<analysisEngineMetaData>
<name>TestAE</name>
<description/>
<version>1.0</version>
<vendor/>
<configurationParameters>
<configurationParameter>
<name>ngramsize</name>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
<overrides>
<parameter>AggregateSentenceAE/ngramsize</parameter>
</overrides>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings/>
<flowConstraints>
<fixedFlow>
<node>AggregateSentenceAE</node>
<node>DummyEntityAEDescriptor</node>
<node>DummySentimentAnalysisAEDescriptor</node>
</fixedFlow>
</flowConstraints>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,54 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>false</primitive>
<delegateAnalysisEngineSpecifiers>
<delegateAnalysisEngine key="DummyExceptionAEDescriptor">
<import location="DummyExceptionAEDescriptor.xml"/>
</delegateAnalysisEngine>
</delegateAnalysisEngineSpecifiers>
<analysisEngineMetaData>
<name>TestExceptionAE</name>
<description/>
<version>1.0</version>
<vendor/>
<configurationParameters/>
<configurationParameterSettings/>
<flowConstraints>
<fixedFlow>
<node>DummyExceptionAEDescriptor</node>
</fixedFlow>
</flowConstraints>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>

View File

@ -1,21 +0,0 @@
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
# Use a protected word file to protect against the stemmer reducing two
# unrelated words to the same base word.
# Some non-words that normally won't be encountered,
# just to test that they won't be stemmed.
dontstems
zwhacky

View File

@ -1,612 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version
2.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 Unless required by
applicable law or agreed to in writing, software distributed under
the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and
limitations under the License.
-->
<!--
This is the Solr schema file. This file should be named "schema.xml"
and should be in the conf directory under the solr home (i.e.
./solr/conf/schema.xml by default) or located where the classloader
for the Solr webapp can find it. This example schema is the
recommended starting point for users. It should be kept correct and
concise, usable out-of-the-box. For more information, on how to
customize this file, please see
http://wiki.apache.org/solr/SchemaXml PERFORMANCE NOTE: this schema
includes many optional features and should not be used for
benchmarking. To improve performance one could - set stored="false"
for all fields possible (esp large fields) when you only need to
search on the field but don't need to return the original value. -
set indexed="false" if you don't need to search on the field, but
only return the field as a result of searching on other indexed
fields. - remove all unneeded copyField statements - for best index
size and searching performance, set "index" to false for all general
text fields, use copyField to copy them to the catchall "text"
field, and use that for searching. - For maximum indexing
performance, use the ConcurrentUpdateSolrServer java client. -
Remember to run the JVM in server mode, and use a higher logging
level that avoids logging every request
-->
<schema name="sample" version="1.2">
<!--
attribute "name" is the name of this schema and is only used for
display purposes. Applications should change this to reflect the
nature of the search collection. version="1.2" is Solr's version
number for the schema syntax and semantics. It should not normally
be changed by applications. 1.0: multiValued attribute did not
exist, all fields are multiValued by nature 1.1: multiValued
attribute introduced, false by default 1.2: omitTermFreqAndPositions
attribute introduced, true by default except for text fields.
-->
<!--
field type definitions. The "name" attribute is just a label to be
used by field definitions. The "class" attribute and any other
attributes determine the real behavior of the fieldType. Class
names starting with "solr" refer to java classes in the
org.apache.solr.analysis package.
-->
<!--
The StrField type is not analyzed, but indexed/stored verbatim. -
StrField and TextField support an optional compressThreshold which
limits compression (if enabled in the derived fields) to values
which exceed a certain size (in characters).
-->
<fieldType name="string" class="solr.StrField"
sortMissingLast="true" omitNorms="true" />
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField"
sortMissingLast="true" omitNorms="true" />
<!--
Binary data type. The data should be sent/retrieved in as Base64
encoded Strings
-->
<fieldType name="binary" class="solr.BinaryField" />
<!--
If sortMissingLast="true", then a sort on this field will cause
documents without the field to come after documents with the
field, regardless of the requested sort order (asc or desc). - If
sortMissingFirst="true", then a sort on this field will cause
documents without the field to come before documents with the
field, regardless of the requested sort order. - If
sortMissingLast="false" and sortMissingFirst="false" (the
default), then default lucene sorting will be used which places
docs without the field first in an ascending sort and last in a
descending sort.
-->
<!--
Default numeric field types. For faster range queries, consider
the tint/tfloat/tlong/tdouble types.
-->
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
<fieldType name="float" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
<fieldType name="double" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
<!--
Numeric field types that index each value at various levels of
precision to accelerate range queries when the number of values
between the range endpoints is large. See the javadoc for
LegacyNumericRangeQuery for internal implementation details. Smaller
precisionStep values (specified in bits) will lead to more tokens
indexed per value, slightly larger index size, and faster range
queries. A precisionStep of 0 disables indexing at different
precision levels.
-->
<fieldType name="tint" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="8" omitNorms="true" positionIncrementGap="0" />
<fieldType name="tfloat" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="8" omitNorms="true" positionIncrementGap="0" />
<fieldType name="tlong" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="8" omitNorms="true" positionIncrementGap="0" />
<fieldType name="tdouble" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="8" omitNorms="true" positionIncrementGap="0" />
<!--
The format for this date field is of the form
1995-12-31T23:59:59Z, and is a more restricted form of the
canonical representation of dateTime
http://www.w3.org/TR/xmlschema-2/#dateTime The trailing "Z"
designates UTC time and is mandatory. Optional fractional seconds
are allowed: 1995-12-31T23:59:59.999Z All other components are
mandatory. Expressions can also be used to denote calculations
that should be performed relative to "NOW" to determine the value,
ie... NOW/HOUR ... Round to the start of the current hour NOW-1DAY
... Exactly 1 day prior to now NOW/DAY+6MONTHS+3DAYS ... 6 months
and 3 days in the future from the start of the current day Consult
the TrieDateField javadocs for more information. Note: For faster
range queries, consider the tdate type
-->
<fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}"
omitNorms="true" precisionStep="0" positionIncrementGap="0" />
<!--
A Trie based date field for faster date range queries and date
faceting.
-->
<fieldType name="tdate" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}"
omitNorms="true" precisionStep="6" positionIncrementGap="0" />
<!--
The "RandomSortField" is not used to store or search any data. You
can declare fields of this type it in your schema to generate
pseudo-random orderings of your docs for sorting purposes. The
ordering is generated based on the field name and the version of
the index, As long as the index version remains unchanged, and the
same field name is reused, the ordering of the docs will be
consistent. If you want different psuedo-random orderings of
documents, for the same version of the index, use a dynamicField
and change the name
-->
<fieldType name="random" class="solr.RandomSortField"
indexed="true" />
<!--
solr.TextField allows the specification of custom text analyzers
specified as a tokenizer and a list of token filters. Different
analyzers may be specified for indexing and querying. The optional
positionIncrementGap puts space between multiple fields of this
type on the same document, with the purpose of preventing false
phrase matching across fields. For more info on customizing your
analyzer chain, please see
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
-->
<!--
One can also specify an existing Analyzer class that has a default
constructor via the class attribute on the analyzer element
<fieldType name="text_greek" class="solr.TextField"> <analyzer
class="org.apache.lucene.analysis.el.GreekAnalyzer"/> </fieldType>
-->
<!--
A text field that only splits on whitespace for exact matching of
words
-->
<fieldType name="text_ws" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory" />
</analyzer>
</fieldType>
<!--
A text field that uses WordDelimiterGraphFilter to enable splitting and
matching of words on case-change, alpha numeric boundaries, and
non-alphanumeric chars, so that a query of "wifi" or "wi fi" could
match a document containing "Wi-Fi". Synonyms and stopwords are
customized by external files, and stemming is enabled.
-->
<fieldType name="text" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory" />
<!--
in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory"
synonyms="index_synonyms.txt" ignoreCase="true"
expand="false"/>
-->
<!--
Case insensitive stop word removal. add
-->
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.FlattenGraphFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<!--
Less flexible matching, but less false matches. Probably not ideal
for product names, but may be good for SKUs. Can insert dashes in
the wrong place and still match.
-->
<fieldType name="textTight" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="false" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0" />
<filter class="solr.LowerCaseFilterFactory" />
<!--
this filter can remove any duplicate tokens that appear at the
same position - sometimes possible with WordDelimiterGraphFilter in
conjuncton with stemming.
-->
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
<filter class="solr.FlattenGraphFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="false" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer>
</fieldType>
<!--
A general unstemmed text field - good if one does not know the
language of the field
-->
<fieldType name="textgen" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.FlattenGraphFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<!--
A general unstemmed text field that indexes tokens normally and
also reversed (via ReversedWildcardFilterFactory), to enable more
efficient leading wildcard queries.
-->
<fieldType name="text_rev" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.ReversedWildcardFilterFactory"
withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2"
maxFractionAsterisk="0.33" />
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<!-- charFilter + WhitespaceTokenizer -->
<!--
<fieldType name="textCharNorm" class="solr.TextField"
positionIncrementGap="100" > <analyzer> <charFilter
class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt"/> <tokenizer
class="solr.MockTokenizerFactory"/> </analyzer> </fieldType>
-->
<!--
This is an example of using the KeywordTokenizer along With
various TokenFilterFactories to produce a sortable field that does
not include some properties of the source text
-->
<fieldType name="alphaOnlySort" class="solr.TextField"
sortMissingLast="true" omitNorms="true">
<analyzer>
<!--
KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token
-->
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<!--
The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive
-->
<filter class="solr.LowerCaseFilterFactory" />
<!-- The TrimFilter removes any leading or trailing whitespace -->
<filter class="solr.TrimFilterFactory" />
<!--
The PatternReplaceFilter gives you the flexibility to use Java
Regular expression to replace any sequence of characters
matching a pattern with an arbitrary replacement string, which
may include back references to portions of the original string
matched by the pattern. See the Java Regular Expression
documentation for more information on pattern and replacement
string syntax.
http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html
-->
<filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])"
replacement="" replace="all" />
</analyzer>
</fieldType>
<fieldType name="phonetic" stored="false" indexed="true"
class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false" />
</analyzer>
</fieldType>
<fieldType name="payloads" stored="false" indexed="true"
class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory" />
<!--
The DelimitedPayloadTokenFilter can put payloads on tokens...
for example, a token of "foo|1.4" would be indexed as "foo"
with a payload of 1.4f Attributes of the
DelimitedPayloadTokenFilterFactory : "delimiter" - a one
character delimiter. Default is | (pipe) "encoder" - how to
encode the following value into a playload float ->
org.apache.lucene.analysis.payloads.FloatEncoder, integer ->
o.a.l.a.p.IntegerEncoder identity -> o.a.l.a.p.IdentityEncoder
Fully Qualified class name implementing PayloadEncoder,
Encoder must have a no arg constructor.
-->
<filter class="solr.DelimitedPayloadTokenFilterFactory"
encoder="float" />
</analyzer>
</fieldType>
<!--
lowercases the entire field value, keeping it as a single token.
-->
<fieldType name="lowercase" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<!--
since fields of this type are by default not stored or indexed,
any data added to them will be ignored outright.
-->
<fieldType name="ignored" stored="false" indexed="false"
multiValued="true" class="solr.StrField" />
<!--
Valid attributes for fields: name: mandatory - the name for the
field type: mandatory - the name of a previously defined type from
the <fieldType>s indexed: true if this field should be indexed
(searchable or sortable) stored: true if this field should be
retrievable multiValued: true if this field may contain multiple
values per document omitNorms: (expert) set to true to omit the
norms associated with this field (this disables length
normalization and index-time boosting for the field, and saves
some memory). Only full-text fields or fields that need an
index-time boost need norms. termVectors: [false] set to true to
store the term vector for a given field. When using MoreLikeThis,
fields used for similarity should be stored for best performance.
termPositions: Store position information with the term vector.
This will increase storage costs. termOffsets: Store offset
information with the term vector. This will increase storage
costs. default: a value that should be used if no value is
specified when adding a document.
-->
<field name="id" type="string" indexed="true" stored="true"
required="true" />
<field name="sku" type="textTight" indexed="true" stored="true"
omitNorms="true" />
<field name="name" type="textgen" indexed="true" stored="true" />
<field name="alphaNameSort" type="alphaOnlySort" indexed="true"
stored="false" />
<field name="manu" type="textgen" indexed="true" stored="true"
omitNorms="true" />
<field name="cat" type="text_ws" indexed="true" stored="true"
multiValued="true" omitNorms="true" />
<field name="features" type="text" indexed="true" stored="true"
multiValued="true" />
<field name="includes" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true" />
<field name="weight" type="float" indexed="true" stored="true" />
<field name="price" type="float" indexed="true" stored="true" />
<field name="popularity" type="int" indexed="true" stored="true" />
<field name="inStock" type="boolean" indexed="true" stored="true" />
<!--
Common metadata fields, named specifically to match up with
SolrCell metadata when parsing rich documents such as Word, PDF.
Some fields are multiValued only because Tika currently may return
multiple values for them.
-->
<field name="title" type="text" indexed="true" stored="true"
multiValued="true" />
<field name="subject" type="text" indexed="true" stored="true" />
<field name="description" type="text" indexed="true" stored="true" />
<field name="comments" type="text" indexed="true" stored="true" />
<field name="author" type="textgen" indexed="true" stored="true" />
<field name="keywords" type="textgen" indexed="true" stored="true" />
<field name="category" type="textgen" indexed="true" stored="true" />
<field name="content_type" type="string" indexed="true"
stored="true" multiValued="true" />
<field name="last_modified" type="date" indexed="true" stored="true" />
<field name="links" type="string" indexed="true" stored="true"
multiValued="true" />
<!--
catchall field, containing all other searchable text fields
(implemented via copyField further on in this schema
-->
<field name="text" type="text" indexed="true" stored="false"
multiValued="true" />
<!--
catchall text field that indexes tokens both normally and in
reverse for efficient leading wildcard queries.
-->
<field name="text_rev" type="text_rev" indexed="true" stored="false"
multiValued="true" />
<!--
non-tokenized version of manufacturer to make it easier to sort or
group results by manufacturer. copied from "manu" via copyField
-->
<field name="manu_exact" type="string" indexed="true" stored="false" />
<field name="payloads" type="payloads" indexed="true" stored="true" />
<!--
Uncommenting the following will create a "timestamp" field using a
default value of "NOW" to indicate when each document was indexed.
-->
<!--
<field name="timestamp" type="date" indexed="true" stored="true"
default="NOW" multiValued="false"/>
-->
<field name="language" type="string" indexed="true" stored="true" required="false"/>
<field name="sentence" type="text" indexed="true" stored="true" multiValued="true" required="false" />
<field name="sentiment" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="entity" type="text" indexed="true" stored="true" multiValued="true"/>
<!--
Dynamic field definitions. If a field name is not found,
dynamicFields will be used if the name matches any of the
patterns. RESTRICTION: the glob-like pattern in the name attribute
must have a "*" only at the start or the end. EXAMPLE: name="*_i"
will match any field ending in _i (like myid_i, z_i) Longer
patterns will be matched first. if equal size patterns both match,
the first appearing in the schema will be used. <dynamicField
name="*_i" type="int" indexed="true" stored="true"/> <dynamicField
name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_l" type="long" indexed="true"
stored="true"/> <dynamicField name="*_t" type="text"
indexed="true" stored="true"/> <dynamicField name="*_b"
type="boolean" indexed="true" stored="true"/> <dynamicField
name="*_f" type="float" indexed="true" stored="true"/>
<dynamicField name="*_d" type="double" indexed="true"
stored="true"/> <dynamicField name="*_dt" type="date"
indexed="true" stored="true"/> <dynamicField name="*_ti"
type="tint" indexed="true" stored="true"/> <dynamicField
name="*_tl" type="tlong" indexed="true" stored="true"/>
<dynamicField name="*_tf" type="tfloat" indexed="true"
stored="true"/> <dynamicField name="*_td" type="tdouble"
indexed="true" stored="true"/> <dynamicField name="*_tdt"
type="tdate" indexed="true" stored="true"/>
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
<dynamicField name="attr_*" type="textgen" indexed="true"
stored="true" multiValued="true"/> <dynamicField name="random_*"
type="random" />
-->
<dynamicField name="*_sm" type="string" indexed="true" stored="true" multiValued="true"/>
<!--
uncomment the following to ignore any fields that don't already
match an existing field name or dynamic field, rather than
reporting them as an error. alternately, change the type="ignored"
to some other type e.g. "text" if you want unknown fields indexed
and/or stored by default
-->
<!--dynamicField name="*" type="ignored" multiValued="true" /-->
<!--
Field to use to determine and enforce document uniqueness. Unless
this field is marked with required="false", it will be a required
field
-->
<uniqueKey>id</uniqueKey>
<!--
copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field
differently, or to add multiple fields to the same field for
easier/faster searching.
-->
<copyField source="cat" dest="text" />
<copyField source="name" dest="text" />
<copyField source="manu" dest="text" />
<copyField source="features" dest="text" />
<copyField source="includes" dest="text" />
<copyField source="manu" dest="manu_exact" />
<!--copyField source="Titolo" dest="text"/-->
<!--
Above, multiple source fields are copied to the [text] field.
Another way to map multiple source fields to the same destination
field is to use the dynamic field syntax. copyField also supports a
maxChars to copy setting.
-->
<!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
<!--
copy name to alphaNameSort, a field designed for sorting by name
-->
<!-- <copyField source="name" dest="alphaNameSort"/> -->
</schema>

View File

@ -1,773 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version
2.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 Unless required by
applicable law or agreed to in writing, software distributed under
the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and
limitations under the License.
-->
<!--
For more details about configurations options that may appear in
this file, see http://wiki.apache.org/solr/SolrConfigXml.
Specifically, the Solr Config can support XInclude, which may make
it easier to manage the configuration. See
https://issues.apache.org/jira/browse/SOLR-1167
-->
<config xmlns:xi="http://www.w3.org/2001/XInclude">
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
<indexConfig>
<useCompoundFile>${useCompoundFile:false}</useCompoundFile>
</indexConfig>
<!--
lib directives can be used to instruct Solr to load any Jars
identified and use them to resolve any "plugins" specified in your
solrconfig.xml or schema.xml (ie: Analyzers, Request Handlers,
etc...). All directories and paths are resolved relative the
instanceDir. If a "./lib" directory exists in your instanceDir, all
files found in it are included as if you had used the following
syntax... <lib dir="./lib" />
-->
<!--
A dir option by itself adds any files found in the directory to the
classpath, this is useful for including all jars in a directory.
-->
<lib dir="../../contrib/extraction/lib" />
<!--
When a regex is specified in addition to a directory, only the files
in that directory which completely match the regex (anchored on both
ends) will be included.
-->
<lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
<!--
If a dir option (with or without a regex) is used and nothing is
found that matches, it will be ignored
-->
<lib dir="/total/crap/dir/ignored" />
<!--
an exact path can be used to specify a specific file. This will
cause a serious error to be logged if it can't be loaded. <lib
path="../a-jar-that-does-not-exist.jar" />
-->
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
<schemaFactory class="ClassicIndexSchemaFactory"/>
<!--
Used to specify an alternate directory to hold all index data other
than the default ./data under the Solr home. If replication is in
use, this should match the replication configuration.
-->
<dataDir>${solr.data.dir:}</dataDir>
<!--
Enables JMX if and only if an existing MBeanServer is found, use
this if you want to configure JMX through JVM parameters. Remove
this to disable exposing Solr configuration and statistics to JMX.
If you want to connect to a particular server, specify the agentId
e.g. <jmx agentId="myAgent" /> If you want to start a new
MBeanServer, specify the serviceUrl e.g <jmx
serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> For
more details see http://wiki.apache.org/solr/SolrJmx
-->
<jmx />
<!-- the default high-performance update handler -->
<updateHandler class="solr.DirectUpdateHandler2">
<!--
A prefix of "solr." for class names is an alias that causes solr
to search appropriate packages, including
org.apache.solr.(search|update|request|core|analysis)
-->
<!--
Perform a <commit/> automatically under certain conditions:
maxDocs - number of updates since last commit is greater than this
maxTime - oldest uncommited update (in ms) is this long ago
Instead of enabling autoCommit, consider using "commitWithin" when
adding documents. http://wiki.apache.org/solr/UpdateXmlMessages
<autoCommit> <maxDocs>10000</maxDocs> <maxTime>1000</maxTime>
</autoCommit>
-->
</updateHandler>
<!--
Use the following format to specify a custom IndexReaderFactory -
allows for alternate IndexReader implementations. ** Experimental
Feature ** Please note - Using a custom IndexReaderFactory may
prevent certain other features from working. The API to
IndexReaderFactory may change without warning or may even be removed
from future releases if the problems cannot be resolved. ** Features
that may not work with custom IndexReaderFactory ** The
ReplicationHandler assumes a disk-resident index. Using a custom
IndexReader implementation may cause incompatibility with
ReplicationHandler and may cause replication to not work correctly.
See SOLR-1366 for details. <indexReaderFactory
name="IndexReaderFactory" class="package.class"> Parameters as
required by the implementation </indexReaderFactory >
-->
<!-- To set the termInfosIndexDivisor, do this: -->
<!--
<indexReaderFactory name="IndexReaderFactory"
class="org.apache.solr.core.StandardIndexReaderFactory"> <int
name="setTermIndexDivisor">12</int> </indexReaderFactory >
-->
<query>
<!--
Maximum number of clauses in a boolean query... in the past, this
affected range or prefix queries that expanded to big boolean
queries - built in Solr query parsers no longer create queries
with this limitation. An exception is thrown if exceeded.
-->
<maxBooleanClauses>1024</maxBooleanClauses>
<!--
There are two implementations of cache available for Solr,
LRUCache, based on a synchronized LinkedHashMap, and FastLRUCache,
based on a ConcurrentHashMap. FastLRUCache has faster gets and
slower puts in single threaded operation and thus is generally
faster than LRUCache when the hit ratio of the cache is high (>
75%), and may be faster under other scenarios on multi-cpu
systems.
-->
<!--
Cache used by SolrIndexSearcher for filters (DocSets), unordered
sets of *all* documents that match a query. When a new searcher is
opened, its caches may be prepopulated or "autowarmed" using data
from caches in the old searcher. autowarmCount is the number of
items to prepopulate. For LRUCache, the autowarmed items will be
the most recently accessed items. Parameters: class - the
SolrCache implementation LRUCache or FastLRUCache size - the
maximum number of entries in the cache initialSize - the initial
capacity (number of entries) of the cache. (seel
java.util.HashMap) autowarmCount - the number of entries to
prepopulate from and old cache.
-->
<filterCache class="solr.FastLRUCache" size="512"
initialSize="512" autowarmCount="0" />
<!--
Cache used to hold field values that are quickly accessible by
document id. The fieldValueCache is created by default even if not
configured here. <fieldValueCache class="solr.FastLRUCache"
size="512" autowarmCount="128" showItems="32" />
-->
<!--
queryResultCache caches results of searches - ordered lists of
document ids (DocList) based on a query, a sort, and the range of
documents requested.
-->
<queryResultCache class="solr.LRUCache" size="512"
initialSize="512" autowarmCount="0" />
<!--
documentCache caches Lucene Document objects (the stored fields
for each document). Since Lucene internal document ids are
transient, this cache will not be autowarmed.
-->
<documentCache class="solr.LRUCache" size="512"
initialSize="512" autowarmCount="0" />
<!--
If true, stored fields that are not requested will be loaded
lazily. This can result in a significant speed improvement if the
usual case is to not load all stored fields, especially if the
skipped fields are large compressed text fields.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!--
Example of a generic cache. These caches may be accessed by name
through SolrIndexSearcher.getCache(),cacheLookup(), and
cacheInsert(). The purpose is to enable easy caching of
user/application level data. The regenerator argument should be
specified as an implementation of solr.search.CacheRegenerator if
autowarming is desired.
-->
<!--
<cache name="myUserCache" class="solr.LRUCache" size="4096"
initialSize="1024" autowarmCount="1024"
regenerator="org.mycompany.mypackage.MyRegenerator" />
-->
<!--
An optimization that attempts to use a filter to satisfy a search.
If the requested sort does not include score, then the filterCache
will be checked for a filter matching the query. If found, the
filter will be used as the source of document ids, and then the
sort will be applied to that.
<useFilterForSortedQuery>true</useFilterForSortedQuery>
-->
<!--
An optimization for use with the queryResultCache. When a search
is requested, a superset of the requested number of document ids
are collected. For example, if a search for a particular query
requests matching documents 10 through 19, and queryWindowSize is
50, then documents 0 through 49 will be collected and cached. Any
further requests in that range can be satisfied via the cache.
-->
<queryResultWindowSize>20</queryResultWindowSize>
<!--
Maximum number of documents to cache for any entry in the
queryResultCache.
-->
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<!--
a newSearcher event is fired whenever a new searcher is being
prepared and there is a current searcher handling requests (aka
registered). It can be used to prime certain caches to prevent
long request times for certain requests.
-->
<!--
QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence.
-->
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<!--
<lst> <str name="q">solr</str> <str name="start">0</str> <str
name="rows">10</str> </lst> <lst> <str name="q">rocks</str>
<str name="start">0</str> <str name="rows">10</str> </lst>
<lst><str name="q">static newSearcher warming query from
solrconfig.xml</str></lst>
-->
</arr>
</listener>
<!--
a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain autowarming data from.
-->
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst>
<str name="q">solr rocks</str>
<str name="start">0</str>
<str name="rows">10</str>
</lst>
<lst>
<str name="q">static firstSearcher warming query from
solrconfig.xml</str>
</lst>
</arr>
</listener>
<!--
If a search request comes in and there is no current registered
searcher, then immediately register the still warming searcher and
use it. If "false" then all requests will block until the first
searcher is done warming.
-->
<useColdSearcher>false</useColdSearcher>
<!--
Maximum number of searchers that may be warming in the background
concurrently. An error is returned if this limit is exceeded.
Recommend 1-2 for read-only slaves, higher for masters w/o cache
warming.
-->
<maxWarmingSearchers>2</maxWarmingSearchers>
</query>
<requestDispatcher>
<!--
Make sure your system has some authentication before enabling
remote streaming!
-->
<requestParsers enableRemoteStreaming="false"
multipartUploadLimitInKB="-1" />
<!--
Set HTTP caching related parameters (for proxy caches and
clients). To get the behaviour of Solr 1.2 (ie: no caching related
headers) use the never304="true" option and do not specify a value
for <cacheControl>
-->
<!-- <httpCaching never304="true"> -->
<httpCaching lastModifiedFrom="openTime" etagSeed="Solr">
<!--
lastModFrom="openTime" is the default, the Last-Modified value
(and validation against If-Modified-Since requests) will all be
relative to when the current Searcher was opened. You can change
it to lastModFrom="dirLastMod" if you want the value to exactly
corrispond to when the physical index was last modified.
etagSeed="..." is an option you can change to force the ETag
header (and validation against If-None-Match requests) to be
differnet even if the index has not changed (ie: when making
significant changes to your config file) lastModifiedFrom and
etagSeed are both ignored if you use the never304="true" option.
-->
<!--
If you include a <cacheControl> directive, it will be used to
generate a Cache-Control header, as well as an Expires header if
the value contains "max-age=" By default, no Cache-Control
header is generated. You can use the <cacheControl> option even
if you have set never304="true"
-->
<!-- <cacheControl>max-age=30, public</cacheControl> -->
</httpCaching>
</requestDispatcher>
<requestHandler name="/select" class="solr.SearchHandler">
<!-- default values for query parameters -->
<lst name="defaults">
<str name="echoParams">explicit</str>
<!--
<int name="rows">10</int> <str name="fl">*</str> <str
name="version">2.1</str>
-->
</lst>
</requestHandler>
<!--
DisMaxRequestHandler allows easy searching across multiple fields
for simple user-entered phrases. Its implementation is now just the
standard SearchHandler with a default query parser of "dismax". see
http://wiki.apache.org/solr/DisMaxRequestHandler
-->
<requestHandler name="/dismax" class="solr.SearchHandler">
<lst name="defaults">
<str name="defType">dismax</str>
<str name="echoParams">explicit</str>
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0
manu^1.1 cat^1.4
</str>
<str name="pf">
text^0.2 features^1.1 name^1.5 manu^1.4
manu_exact^1.9
</str>
<str name="bf">
popularity^0.5 recip(price,1,1000,1000)^0.3
</str>
<str name="fl">
id,name,price,score
</str>
<str name="mm">
2&lt;-1 5&lt;-2 6&lt;90% </str>
<int name="ps">100</int>
<str name="q.alt">*:*</str>
<!-- example highlighter config, enable per-query with hl=true -->
<str name="hl.fl">text features name</str>
<!-- for this field, we want no fragmenting, just highlighting -->
<str name="f.name.hl.fragsize">0</str>
<!--
instructs Solr to return the field itself if no query terms are
found
-->
<str name="f.name.hl.alternateField">name</str>
<str name="f.text.hl.fragmenter">regex</str> <!-- defined below -->
</lst>
</requestHandler>
<!--
Note how you can register the same handler multiple times with
different names (and different init parameters)
-->
<requestHandler name="/partitioned" class="solr.SearchHandler">
<lst name="defaults">
<str name="defType">dismax</str>
<str name="echoParams">explicit</str>
<str name="qf">text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0</str>
<str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>
<!--
This is an example of using Date Math to specify a constantly
moving date range in a config...
-->
<str name="bq">incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2</str>
</lst>
<!--
In addition to defaults, "appends" params can be specified to
identify values which should be appended to the list of multi-val
params from the query (or the existing "defaults"). In this
example, the param "fq=instock:true" will be appended to any query
time fq params the user may specify, as a mechanism for
partitioning the index, independent of any user selected filtering
that may also be desired (perhaps as a result of faceted
searching). NOTE: there is *absolutely* nothing a client can do to
prevent these "appends" values from being used, so don't use this
mechanism unless you are sure you always want it.
-->
<lst name="appends">
<str name="fq">inStock:true</str>
</lst>
<!--
"invariants" are a way of letting the Solr maintainer lock down
the options available to Solr clients. Any params values specified
here are used regardless of what values may be specified in either
the query, the "defaults", or the "appends" params. In this
example, the facet.field and facet.query params are fixed,
limiting the facets clients can use. Faceting is not turned on by
default - but if the client does specify facet=true in the
request, these are the only facets they will be able to see counts
for; regardless of what other facet.field or facet.query params
they may specify. NOTE: there is *absolutely* nothing a client can
do to prevent these "invariants" values from being used, so don't
use this mechanism unless you are sure you always want it.
-->
<lst name="invariants">
<str name="facet.field">cat</str>
<str name="facet.field">manu_exact</str>
<str name="facet.query">price:[* TO 500]</str>
<str name="facet.query">price:[500 TO *]</str>
</lst>
</requestHandler>
<!--
Search components are registered to SolrCore and used by Search
Handlers By default, the following components are avaliable:
<searchComponent name="query"
class="org.apache.solr.handler.component.QueryComponent" />
<searchComponent name="facet"
class="org.apache.solr.handler.component.FacetComponent" />
<searchComponent name="mlt"
class="org.apache.solr.handler.component.MoreLikeThisComponent" />
<searchComponent name="highlight"
class="org.apache.solr.handler.component.HighlightComponent" />
<searchComponent name="stats"
class="org.apache.solr.handler.component.StatsComponent" />
<searchComponent name="debug"
class="org.apache.solr.handler.component.DebugComponent" /> Default
configuration in a requestHandler would look like: <arr
name="components"> <str>query</str> <str>facet</str> <str>mlt</str>
<str>highlight</str> <str>stats</str> <str>debug</str> </arr> If you
register a searchComponent to one of the standard names, that will
be used instead. To insert components before or after the 'standard'
components, use: <arr name="first-components">
<str>myFirstComponentName</str> </arr> <arr name="last-components">
<str>myLastComponentName</str> </arr>
-->
<!--
The spell check component can return a list of alternative spelling
suggestions.
-->
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">textSpell</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">name</str>
<str name="spellcheckIndexDir">./spellchecker</str>
</lst>
<!--
a spellchecker that uses a different distance measure <lst
name="spellchecker"> <str name="name">jarowinkler</str> <str
name="field">spell</str> <str
name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
<str name="spellcheckIndexDir">./spellchecker2</str> </lst>
-->
<!--
a file based spell checker <lst name="spellchecker"> <str
name="classname">solr.FileBasedSpellChecker</str> <str
name="name">file</str> <str
name="sourceLocation">spellings.txt</str> <str
name="characterEncoding">UTF-8</str> <str
name="spellcheckIndexDir">./spellcheckerFile</str> </lst>
-->
</searchComponent>
<!--
A request handler utilizing the spellcheck component.
#############################################################################
NOTE: This is purely as an example. The whole purpose of the
SpellCheckComponent is to hook it into the request handler that
handles (i.e. the standard or dismax SearchHandler) queries such
that a separate request is not needed to get suggestions. IN OTHER
WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU
WANT FOR YOUR PRODUCTION SYSTEM!
#############################################################################
-->
<requestHandler name="/spell" class="solr.SearchHandler"
lazy="true">
<lst name="defaults">
<!-- omp = Only More Popular -->
<str name="spellcheck.onlyMorePopular">false</str>
<!-- exr = Extended Results -->
<str name="spellcheck.extendedResults">false</str>
<!-- The number of suggestions to return -->
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<searchComponent name="tvComponent"
class="org.apache.solr.handler.component.TermVectorComponent" />
<!--
A Req Handler for working with the tvComponent. This is purely as an
example. You will likely want to add the component to your already
specified request handlers.
-->
<requestHandler name="/tvrh"
class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<bool name="tv">true</bool>
</lst>
<arr name="last-components">
<str>tvComponent</str>
</arr>
</requestHandler>
<!-- Solr Cell: http://wiki.apache.org/solr/ExtractingRequestHandler -->
<requestHandler name="/update/extract"
class="org.apache.solr.handler.extraction.ExtractingRequestHandler"
startup="lazy">
<lst name="defaults">
<!--
All the main content goes into "text"... if you need to return
the extracted text or do highlighting, use a stored field.
-->
<str name="fmap.content">text</str>
<str name="lowernames">true</str>
<str name="uprefix">ignored_</str>
<!-- capture link hrefs but ignore div attributes -->
<str name="captureAttr">true</str>
<str name="fmap.a">links</str>
<str name="fmap.div">ignored_</str>
</lst>
</requestHandler>
<!--
A component to return terms and document frequency of those terms.
This component does not yet support distributed search.
-->
<searchComponent name="termsComponent"
class="org.apache.solr.handler.component.TermsComponent" />
<requestHandler name="/terms"
class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<bool name="terms">true</bool>
</lst>
<arr name="components">
<str>termsComponent</str>
</arr>
</requestHandler>
<!--
Update request handler. Note: Since solr1.1 requestHandlers requires
a valid content type header if posted in the body. For example, curl
now requires: -H 'Content-type:text/xml; charset=utf-8' The response
format differs from solr1.1 formatting and returns a standard error
code. To enable solr1.1 behavior, remove the /update handler or
change its path
-->
<requestHandler name="/update" class="solr.UpdateRequestHandler" >
<lst name="defaults">
<str name="update.chain">uima</str>
</lst>
</requestHandler>
<!-- CSV update handler, loaded on demand -->
<requestHandler name="/update/csv" class="solr.CSVRequestHandler"
startup="lazy" />
<!--
An example dedup update processor that creates the "id" field on the
fly based on the hash code of some other fields. This example has
overwriteDupes set to false since we are using the id field as the
signatureField and Solr will maintain uniqueness based on that
anyway. You have to link the chain to an update handler above to use
it ie: <requestHandler name="/update
"class="solr.UpdateRequestHandler"> <lst name="defaults"> <str
name="update.chain">dedupe</str> </lst> </requestHandler>
-->
<updateRequestProcessorChain name="uima">
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
<lst name="uimaConfig">
<lst name="runtimeParameters">
<int name="ngramsize">3</int>
</lst>
<str name="analysisEngine">/uima/TestAE.xml</str>
<lst name="analyzeFields">
<bool name="merge">false</bool>
<arr name="fields">
<str>text</str>
</arr>
</lst>
<lst name="fieldMappings">
<lst name="type">
<str name="name">org.apache.uima.SentenceAnnotation</str>
<lst name="mapping">
<str name="feature">coveredText</str>
<str name="field">sentence</str>
</lst>
</lst>
<lst name="type">
<str name="name">org.apache.solr.uima.ts.DummySentimentAnnotation</str>
<lst name="mapping">
<str name="feature">mood</str>
<str name="field">sentiment</str>
</lst>
</lst>
<lst name="type">
<str name="name">org.apache.solr.uima.ts.EntityAnnotation</str>
<lst name="mapping">
<str name="feature">entity</str>
<str name="fieldNameFeature">name</str>
<str name="dynamicField">*_sm</str>
</lst>
</lst>
</lst>
</lst>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="uima-multi-map">
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
<lst name="uimaConfig">
<lst name="runtimeParameters">
<int name="ngramsize">3</int>
</lst>
<str name="analysisEngine">/uima/TestAE.xml</str>
<lst name="analyzeFields">
<bool name="merge">false</bool>
<arr name="fields">
<str>text</str>
</arr>
</lst>
<lst name="fieldMappings">
<lst name="type">
<str name="name">a-type-which-can-have-multiple-features</str>
<lst name="mapping">
<str name="feature">A</str>
<str name="field">1</str>
</lst>
<lst name="mapping">
<str name="feature">B</str>
<str name="field">2</str>
</lst>
</lst>
</lst>
</lst>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="uima-not-ignoreErrors">
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
<lst name="uimaConfig">
<lst name="runtimeParameters">
<int name="ngramsize">3</int>
</lst>
<str name="analysisEngine">/uima/TestExceptionAE.xml</str>
<bool name="ignoreErrors">false</bool>
<lst name="analyzeFields">
<bool name="merge">false</bool>
<arr name="fields">
<str>text</str>
</arr>
</lst>
<lst name="fieldMappings"/>
</lst>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="uima-ignoreErrors">
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
<lst name="uimaConfig">
<lst name="runtimeParameters">
<int name="ngramsize">3</int>
</lst>
<str name="analysisEngine">/uima/TestExceptionAE.xml</str>
<bool name="ignoreErrors">true</bool>
<!-- This is optional. It is used for logging when text processing fails. Usually, set uniqueKey field name -->
<str name="logField">id</str>
<lst name="analyzeFields">
<bool name="merge">false</bool>
<arr name="fields">
<str>text</str>
</arr>
</lst>
<lst name="fieldMappings"/>
</lst>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<!--
queryResponseWriter plugins... query responses will be written using
the writer specified by the 'wt' request parameter matching the name
of a registered writer. The "default" writer is the default and will
be used if 'wt' is not specified in the request. XMLResponseWriter
will be used if nothing is specified here. The json, python, and
ruby writers are also available by default. <queryResponseWriter
name="xml" class="org.apache.solr.request.XMLResponseWriter"
default="true"/> <queryResponseWriter name="json"
class="org.apache.solr.request.JSONResponseWriter"/>
<queryResponseWriter name="python"
class="org.apache.solr.request.PythonResponseWriter"/>
<queryResponseWriter name="ruby"
class="org.apache.solr.request.RubyResponseWriter"/>
<queryResponseWriter name="php"
class="org.apache.solr.request.PHPResponseWriter"/>
<queryResponseWriter name="phps"
class="org.apache.solr.request.PHPSerializedResponseWriter"/>
<queryResponseWriter name="custom"
class="com.example.MyResponseWriter"/>
-->
<!--
XSLT response writer transforms the XML output by any xslt file
found in Solr's conf/xslt directory. Changes to xslt files are
checked for every xsltCacheLifetimeSeconds.
-->
<queryResponseWriter name="xslt"
class="org.apache.solr.response.XSLTResponseWriter">
<int name="xsltCacheLifetimeSeconds">5</int>
</queryResponseWriter>
<!--
example of registering a query parser <queryParser name="lucene"
class="org.apache.solr.search.LuceneQParserPlugin"/>
-->
<!--
example of registering a custom function parser <valueSourceParser
name="myfunc" class="com.mycompany.MyValueSourceParser" />
-->
<!-- config for the admin interface -->
<admin>
<defaultQuery>*</defaultQuery>
</admin>
</config>

View File

@ -1,58 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
# a couple of test stopwords to test that the words are really being
# configured from this file:
stopworda
stopwordb
#Standard english stop words taken from Lucene's StopAnalyzer
a
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
s
such
t
that
the
their
then
there
these
they
this
to
was
will
with

View File

@ -1,31 +0,0 @@
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
#some test synonym mappings unlikely to appear in real input text
aaa => aaaa
bbb => bbbb1 bbbb2
ccc => cccc1,cccc2
a\=>a => b\=>b
a\,a => b\,b
fooaaa,baraaa,bazaaa
# Some synonym groups specific to this example
GB,gib,gigabyte,gigabytes
MB,mib,megabyte,megabytes
Television, Televisions, TV, TVs
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
#after us won't split it into two words.
# Synonym mappings can be used for spelling correction too
pixima => pixma

View File

@ -1,25 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
vbg
vbz
vbd
vbn
vb
bez
cc
cd
at
.
:

View File

@ -1,613 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version
2.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 Unless required by
applicable law or agreed to in writing, software distributed under
the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and
limitations under the License.
-->
<!--
This is the Solr schema file. This file should be named "schema.xml"
and should be in the conf directory under the solr home (i.e.
./solr/conf/schema.xml by default) or located where the classloader
for the Solr webapp can find it. This example schema is the
recommended starting point for users. It should be kept correct and
concise, usable out-of-the-box. For more information, on how to
customize this file, please see
http://wiki.apache.org/solr/SchemaXml PERFORMANCE NOTE: this schema
includes many optional features and should not be used for
benchmarking. To improve performance one could - set stored="false"
for all fields possible (esp large fields) when you only need to
search on the field but don't need to return the original value. -
set indexed="false" if you don't need to search on the field, but
only return the field as a result of searching on other indexed
fields. - remove all unneeded copyField statements - for best index
size and searching performance, set "index" to false for all general
text fields, use copyField to copy them to the catchall "text"
field, and use that for searching. - For maximum indexing
performance, use the ConcurrentUpdateSolrServer java client. -
Remember to run the JVM in server mode, and use a higher logging
level that avoids logging every request
-->
<schema name="sample" version="1.2">
<!--
attribute "name" is the name of this schema and is only used for
display purposes. Applications should change this to reflect the
nature of the search collection. version="1.2" is Solr's version
number for the schema syntax and semantics. It should not normally
be changed by applications. 1.0: multiValued attribute did not
exist, all fields are multiValued by nature 1.1: multiValued
attribute introduced, false by default 1.2: omitTermFreqAndPositions
attribute introduced, true by default except for text fields.
-->
<!--
field type definitions. The "name" attribute is just a label to be
used by field definitions. The "class" attribute and any other
attributes determine the real behavior of the fieldType. Class
names starting with "solr" refer to java classes in the
org.apache.solr.analysis package.
-->
<!--
The StrField type is not analyzed, but indexed/stored verbatim. -
StrField and TextField support an optional compressThreshold which
limits compression (if enabled in the derived fields) to values
which exceed a certain size (in characters).
-->
<fieldType name="string" class="solr.StrField"
sortMissingLast="true" omitNorms="true"/>
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField"
sortMissingLast="true" omitNorms="true"/>
<!--
Binary data type. The data should be sent/retrieved in as Base64
encoded Strings
-->
<fieldType name="binary" class="solr.BinaryField"/>
<!--
If sortMissingLast="true", then a sort on this field will cause
documents without the field to come after documents with the
field, regardless of the requested sort order (asc or desc). - If
sortMissingFirst="true", then a sort on this field will cause
documents without the field to come before documents with the
field, regardless of the requested sort order. - If
sortMissingLast="false" and sortMissingFirst="false" (the
default), then default lucene sorting will be used which places
docs without the field first in an ascending sort and last in a
descending sort.
-->
<!--
Default numeric field types. For faster range queries, consider
the tint/tfloat/tlong/tdouble types.
-->
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="double" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<!--
Numeric field types that index each value at various levels of
precision to accelerate range queries when the number of values
between the range endpoints is large. See the javadoc for
LegacyNumericRangeQuery for internal implementation details. Smaller
precisionStep values (specified in bits) will lead to more tokens
indexed per value, slightly larger index size, and faster range
queries. A precisionStep of 0 disables indexing at different
precision levels.
-->
<fieldType name="tint" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tfloat" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tlong" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tdouble" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}"
precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<!--
The format for this date field is of the form
1995-12-31T23:59:59Z, and is a more restricted form of the
canonical representation of dateTime
http://www.w3.org/TR/xmlschema-2/#dateTime The trailing "Z"
designates UTC time and is mandatory. Optional fractional seconds
are allowed: 1995-12-31T23:59:59.999Z All other components are
mandatory. Expressions can also be used to denote calculations
that should be performed relative to "NOW" to determine the value,
ie... NOW/HOUR ... Round to the start of the current hour NOW-1DAY
... Exactly 1 day prior to now NOW/DAY+6MONTHS+3DAYS ... 6 months
and 3 days in the future from the start of the current day Consult
the TrieDateField javadocs for more information. Note: For faster
range queries, consider the tdate type
-->
<fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}"
omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
<!--
A Trie based date field for faster date range queries and date
faceting.
-->
<fieldType name="tdate" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}"
omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
<!--
The "RandomSortField" is not used to store or search any data. You
can declare fields of this type it in your schema to generate
pseudo-random orderings of your docs for sorting purposes. The
ordering is generated based on the field name and the version of
the index, As long as the index version remains unchanged, and the
same field name is reused, the ordering of the docs will be
consistent. If you want different psuedo-random orderings of
documents, for the same version of the index, use a dynamicField
and change the name
-->
<fieldType name="random" class="solr.RandomSortField"
indexed="true"/>
<!--
solr.TextField allows the specification of custom text analyzers
specified as a tokenizer and a list of token filters. Different
analyzers may be specified for indexing and querying. The optional
positionIncrementGap puts space between multiple fields of this
type on the same document, with the purpose of preventing false
phrase matching across fields. For more info on customizing your
analyzer chain, please see
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
-->
<!--
One can also specify an existing Analyzer class that has a default
constructor via the class attribute on the analyzer element
<fieldType name="text_greek" class="solr.TextField"> <analyzer
class="org.apache.lucene.analysis.el.GreekAnalyzer"/> </fieldType>
-->
<!--
A text field that only splits on whitespace for exact matching of
words
-->
<fieldType name="text_ws" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
<!--
A text field that uses WordDelimiterGraphFilter to enable splitting and
matching of words on case-change, alpha numeric boundaries, and
non-alphanumeric chars, so that a query of "wifi" or "wi fi" could
match a document containing "Wi-Fi". Synonyms and stopwords are
customized by external files, and stemming is enabled.
-->
<fieldType name="text" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<!--
in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory"
synonyms="index_synonyms.txt" ignoreCase="true"
expand="false"/>
-->
<!--
Case insensitive stop word removal. add
-->
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!--
Less flexible matching, but less false matches. Probably not ideal
for product names, but may be good for SKUs. Can insert dashes in
the wrong place and still match.
-->
<fieldType name="textTight" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<!--
this filter can remove any duplicate tokens that appear at the
same position - sometimes possible with WordDelimiterGraphFilter in
conjuncton with stemming.
-->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="uima_sentences" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.UIMAAnnotationsTokenizerFactory"
descriptorPath="/uima/AggregateSentenceAE.xml" tokenType="org.apache.uima.SentenceAnnotation"
ngramsize="2"/>
</analyzer>
</fieldType>
<fieldType name="uima_nouns" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.UIMATypeAwareAnnotationsTokenizerFactory"
descriptorPath="/uima/AggregateSentenceAE.xml" tokenType="org.apache.uima.TokenAnnotation"
featurePath="posTag"/>
<filter class="solr.TypeTokenFilterFactory" types="uima/stoptypes.txt"/>
</analyzer>
</fieldType>
<!--
A general unstemmed text field - good if one does not know the
language of the field
-->
<fieldType name="textgen" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!--
A general unstemmed text field that indexes tokens normally and
also reversed (via ReversedWildcardFilterFactory), to enable more
efficient leading wildcard queries.
-->
<fieldType name="text_rev" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ReversedWildcardFilterFactory"
withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2"
maxFractionAsterisk="0.33"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- charFilter + WhitespaceTokenizer -->
<!--
<fieldType name="textCharNorm" class="solr.TextField"
positionIncrementGap="100" > <analyzer> <charFilter
class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt"/> <tokenizer
class="solr.MockTokenizerFactory"/> </analyzer> </fieldType>
-->
<!--
This is an example of using the KeywordTokenizer along With
various TokenFilterFactories to produce a sortable field that does
not include some properties of the source text
-->
<fieldType name="alphaOnlySort" class="solr.TextField"
sortMissingLast="true" omitNorms="true">
<analyzer>
<!--
KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token
-->
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<!--
The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive
-->
<filter class="solr.LowerCaseFilterFactory"/>
<!-- The TrimFilter removes any leading or trailing whitespace -->
<filter class="solr.TrimFilterFactory"/>
<!--
The PatternReplaceFilter gives you the flexibility to use Java
Regular expression to replace any sequence of characters
matching a pattern with an arbitrary replacement string, which
may include back references to portions of the original string
matched by the pattern. See the Java Regular Expression
documentation for more information on pattern and replacement
string syntax.
http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html
-->
<filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])"
replacement="" replace="all"/>
</analyzer>
</fieldType>
<fieldType name="phonetic" stored="false" indexed="true"
class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
</analyzer>
</fieldType>
<fieldType name="payloads" stored="false" indexed="true"
class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<!--
The DelimitedPayloadTokenFilter can put payloads on tokens...
for example, a token of "foo|1.4" would be indexed as "foo"
with a payload of 1.4f Attributes of the
DelimitedPayloadTokenFilterFactory : "delimiter" - a one
character delimiter. Default is | (pipe) "encoder" - how to
encode the following value into a playload float ->
org.apache.lucene.analysis.payloads.FloatEncoder, integer ->
o.a.l.a.p.IntegerEncoder identity -> o.a.l.a.p.IdentityEncoder
Fully Qualified class name implementing PayloadEncoder,
Encoder must have a no arg constructor.
-->
<filter class="solr.DelimitedPayloadTokenFilterFactory"
encoder="float"/>
</analyzer>
</fieldType>
<!--
lowercases the entire field value, keeping it as a single token.
-->
<fieldType name="lowercase" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!--
since fields of this type are by default not stored or indexed,
any data added to them will be ignored outright.
-->
<fieldType name="ignored" stored="false" indexed="false"
multiValued="true" class="solr.StrField"/>
<!--
Valid attributes for fields: name: mandatory - the name for the
field type: mandatory - the name of a previously defined type from
the <fieldType>s indexed: true if this field should be indexed
(searchable or sortable) stored: true if this field should be
retrievable multiValued: true if this field may contain multiple
values per document omitNorms: (expert) set to true to omit the
norms associated with this field (this disables length
normalization and index-time boosting for the field, and saves
some memory). Only full-text fields or fields that need an
index-time boost need norms. termVectors: [false] set to true to
store the term vector for a given field. When using MoreLikeThis,
fields used for similarity should be stored for best performance.
termPositions: Store position information with the term vector.
This will increase storage costs. termOffsets: Store offset
information with the term vector. This will increase storage
costs. default: a value that should be used if no value is
specified when adding a document.
-->
<field name="id" type="string" indexed="true" stored="true"
required="true"/>
<field name="sku" type="textTight" indexed="true" stored="true"
omitNorms="true"/>
<field name="name" type="textgen" indexed="true" stored="true"/>
<field name="alphaNameSort" type="alphaOnlySort" indexed="true"
stored="false"/>
<field name="manu" type="textgen" indexed="true" stored="true"
omitNorms="true"/>
<field name="cat" type="text_ws" indexed="true" stored="true"
multiValued="true" omitNorms="true"/>
<field name="features" type="text" indexed="true" stored="true"
multiValued="true"/>
<field name="includes" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<field name="sentences" type="uima_sentences" indexed="true" stored="true" multiValued="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<field name="nouns" type="uima_nouns" indexed="true" stored="true" multiValued="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<field name="weight" type="float" indexed="true" stored="true"/>
<field name="price" type="float" indexed="true" stored="true"/>
<field name="popularity" type="int" indexed="true" stored="true"/>
<field name="inStock" type="boolean" indexed="true" stored="true"/>
<!--
Common metadata fields, named specifically to match up with
SolrCell metadata when parsing rich documents such as Word, PDF.
Some fields are multiValued only because Tika currently may return
multiple values for them.
-->
<field name="title" type="text" indexed="true" stored="true"
multiValued="true"/>
<field name="subject" type="text" indexed="true" stored="true"/>
<field name="description" type="text" indexed="true" stored="true"/>
<field name="comments" type="text" indexed="true" stored="true"/>
<field name="author" type="textgen" indexed="true" stored="true"/>
<field name="keywords" type="textgen" indexed="true" stored="true"/>
<field name="category" type="textgen" indexed="true" stored="true"/>
<field name="content_type" type="string" indexed="true"
stored="true" multiValued="true"/>
<field name="last_modified" type="date" indexed="true" stored="true"/>
<field name="links" type="string" indexed="true" stored="true"
multiValued="true"/>
<!--
catchall field, containing all other searchable text fields
(implemented via copyField further on in this schema
-->
<field name="text" type="text" indexed="true" stored="false"
multiValued="true"/>
<!--
catchall text field that indexes tokens both normally and in
reverse for efficient leading wildcard queries.
-->
<field name="text_rev" type="text_rev" indexed="true" stored="false"
multiValued="true"/>
<!--
non-tokenized version of manufacturer to make it easier to sort or
group results by manufacturer. copied from "manu" via copyField
-->
<field name="manu_exact" type="string" indexed="true" stored="false"/>
<field name="payloads" type="payloads" indexed="true" stored="true"/>
<!--
Uncommenting the following will create a "timestamp" field using a
default value of "NOW" to indicate when each document was indexed.
-->
<!--
<field name="timestamp" type="date" indexed="true" stored="true"
default="NOW" multiValued="false"/>
-->
<field name="language" type="string" indexed="true" stored="true" required="false"/>
<field name="sentence" type="text" indexed="true" stored="true" multiValued="true" required="false"/>
<field name="sentiment" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="entity" type="text" indexed="true" stored="true" multiValued="true"/>
<!--
Dynamic field definitions. If a field name is not found,
dynamicFields will be used if the name matches any of the
patterns. RESTRICTION: the glob-like pattern in the name attribute
must have a "*" only at the start or the end. EXAMPLE: name="*_i"
will match any field ending in _i (like myid_i, z_i) Longer
patterns will be matched first. if equal size patterns both match,
the first appearing in the schema will be used. <dynamicField
name="*_i" type="int" indexed="true" stored="true"/> <dynamicField
name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_l" type="long" indexed="true"
stored="true"/> <dynamicField name="*_t" type="text"
indexed="true" stored="true"/> <dynamicField name="*_b"
type="boolean" indexed="true" stored="true"/> <dynamicField
name="*_f" type="float" indexed="true" stored="true"/>
<dynamicField name="*_d" type="double" indexed="true"
stored="true"/> <dynamicField name="*_dt" type="date"
indexed="true" stored="true"/> <dynamicField name="*_ti"
type="tint" indexed="true" stored="true"/> <dynamicField
name="*_tl" type="tlong" indexed="true" stored="true"/>
<dynamicField name="*_tf" type="tfloat" indexed="true"
stored="true"/> <dynamicField name="*_td" type="tdouble"
indexed="true" stored="true"/> <dynamicField name="*_tdt"
type="tdate" indexed="true" stored="true"/>
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
<dynamicField name="attr_*" type="textgen" indexed="true"
stored="true" multiValued="true"/> <dynamicField name="random_*"
type="random" />
-->
<dynamicField name="*_sm" type="string" indexed="true" stored="true" multiValued="true"/>
<!--
uncomment the following to ignore any fields that don't already
match an existing field name or dynamic field, rather than
reporting them as an error. alternately, change the type="ignored"
to some other type e.g. "text" if you want unknown fields indexed
and/or stored by default
-->
<!--dynamicField name="*" type="ignored" multiValued="true" /-->
<!--
Field to use to determine and enforce document uniqueness. Unless
this field is marked with required="false", it will be a required
field
-->
<uniqueKey>id</uniqueKey>
<!--
copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field
differently, or to add multiple fields to the same field for
easier/faster searching.
-->
<copyField source="cat" dest="text"/>
<copyField source="name" dest="text"/>
<copyField source="manu" dest="text"/>
<copyField source="features" dest="text"/>
<copyField source="includes" dest="text"/>
<copyField source="text" dest="nouns"/>
<copyField source="text" dest="sentences"/>
<copyField source="manu" dest="manu_exact"/>
<!--copyField source="Titolo" dest="text"/-->
<!--
Above, multiple source fields are copied to the [text] field.
Another way to map multiple source fields to the same destination
field is to use the dynamic field syntax. copyField also supports a
maxChars to copy setting.
-->
<!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
<!--
copy name to alphaNameSort, a field designed for sorting by name
-->
<!-- <copyField source="name" dest="alphaNameSort"/> -->
</schema>

View File

@ -1,653 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version
2.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 Unless required by
applicable law or agreed to in writing, software distributed under
the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and
limitations under the License.
-->
<!--
For more details about configurations options that may appear in
this file, see http://wiki.apache.org/solr/SolrConfigXml.
Specifically, the Solr Config can support XInclude, which may make
it easier to manage the configuration. See
https://issues.apache.org/jira/browse/SOLR-1167
-->
<config xmlns:xi="http://www.w3.org/2001/XInclude">
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
<indexConfig>
<useCompoundFile>${useCompoundFile:false}</useCompoundFile>
</indexConfig>
<!--
lib directives can be used to instruct Solr to load any Jars
identified and use them to resolve any "plugins" specified in your
solrconfig.xml or schema.xml (ie: Analyzers, Request Handlers,
etc...). All directories and paths are resolved relative the
instanceDir. If a "./lib" directory exists in your instanceDir, all
files found in it are included as if you had used the following
syntax... <lib dir="./lib" />
-->
<!--
A dir option by itself adds any files found in the directory to the
classpath, this is useful for including all jars in a directory.
-->
<lib dir="../../contrib/extraction/lib" />
<!--
When a regex is specified in addition to a directory, only the files
in that directory which completely match the regex (anchored on both
ends) will be included.
-->
<lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
<!--
If a dir option (with or without a regex) is used and nothing is
found that matches, it will be ignored
-->
<lib dir="/total/crap/dir/ignored" />
<!--
an exact path can be used to specify a specific file. This will
cause a serious error to be logged if it can't be loaded. <lib
path="../a-jar-that-does-not-exist.jar" />
-->
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
<schemaFactory class="ClassicIndexSchemaFactory"/>
<!--
Used to specify an alternate directory to hold all index data other
than the default ./data under the Solr home. If replication is in
use, this should match the replication configuration.
-->
<dataDir>${solr.data.dir:}</dataDir>
<!--
Enables JMX if and only if an existing MBeanServer is found, use
this if you want to configure JMX through JVM parameters. Remove
this to disable exposing Solr configuration and statistics to JMX.
If you want to connect to a particular server, specify the agentId
e.g. <jmx agentId="myAgent" /> If you want to start a new
MBeanServer, specify the serviceUrl e.g <jmx
serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> For
more details see http://wiki.apache.org/solr/SolrJmx
-->
<jmx />
<!-- the default high-performance update handler -->
<updateHandler class="solr.DirectUpdateHandler2">
<!--
A prefix of "solr." for class names is an alias that causes solr
to search appropriate packages, including
org.apache.solr.(search|update|request|core|analysis)
-->
<!--
Perform a <commit/> automatically under certain conditions:
maxDocs - number of updates since last commit is greater than this
maxTime - oldest uncommited update (in ms) is this long ago
Instead of enabling autoCommit, consider using "commitWithin" when
adding documents. http://wiki.apache.org/solr/UpdateXmlMessages
<autoCommit> <maxDocs>10000</maxDocs> <maxTime>1000</maxTime>
</autoCommit>
-->
</updateHandler>
<!--
Use the following format to specify a custom IndexReaderFactory -
allows for alternate IndexReader implementations. ** Experimental
Feature ** Please note - Using a custom IndexReaderFactory may
prevent certain other features from working. The API to
IndexReaderFactory may change without warning or may even be removed
from future releases if the problems cannot be resolved. ** Features
that may not work with custom IndexReaderFactory ** The
ReplicationHandler assumes a disk-resident index. Using a custom
IndexReader implementation may cause incompatibility with
ReplicationHandler and may cause replication to not work correctly.
See SOLR-1366 for details. <indexReaderFactory
name="IndexReaderFactory" class="package.class"> Parameters as
required by the implementation </indexReaderFactory >
-->
<!-- To set the termInfosIndexDivisor, do this: -->
<!--
<indexReaderFactory name="IndexReaderFactory"
class="org.apache.solr.core.StandardIndexReaderFactory"> <int
name="setTermIndexDivisor">12</int> </indexReaderFactory >
-->
<query>
<!--
Maximum number of clauses in a boolean query... in the past, this
affected range or prefix queries that expanded to big boolean
queries - built in Solr query parsers no longer create queries
with this limitation. An exception is thrown if exceeded.
-->
<maxBooleanClauses>1024</maxBooleanClauses>
<!--
There are two implementations of cache available for Solr,
LRUCache, based on a synchronized LinkedHashMap, and FastLRUCache,
based on a ConcurrentHashMap. FastLRUCache has faster gets and
slower puts in single threaded operation and thus is generally
faster than LRUCache when the hit ratio of the cache is high (>
75%), and may be faster under other scenarios on multi-cpu
systems.
-->
<!--
Cache used by SolrIndexSearcher for filters (DocSets), unordered
sets of *all* documents that match a query. When a new searcher is
opened, its caches may be prepopulated or "autowarmed" using data
from caches in the old searcher. autowarmCount is the number of
items to prepopulate. For LRUCache, the autowarmed items will be
the most recently accessed items. Parameters: class - the
SolrCache implementation LRUCache or FastLRUCache size - the
maximum number of entries in the cache initialSize - the initial
capacity (number of entries) of the cache. (seel
java.util.HashMap) autowarmCount - the number of entries to
prepopulate from and old cache.
-->
<filterCache class="solr.FastLRUCache" size="512"
initialSize="512" autowarmCount="0" />
<!--
Cache used to hold field values that are quickly accessible by
document id. The fieldValueCache is created by default even if not
configured here. <fieldValueCache class="solr.FastLRUCache"
size="512" autowarmCount="128" showItems="32" />
-->
<!--
queryResultCache caches results of searches - ordered lists of
document ids (DocList) based on a query, a sort, and the range of
documents requested.
-->
<queryResultCache class="solr.LRUCache" size="512"
initialSize="512" autowarmCount="0" />
<!--
documentCache caches Lucene Document objects (the stored fields
for each document). Since Lucene internal document ids are
transient, this cache will not be autowarmed.
-->
<documentCache class="solr.LRUCache" size="512"
initialSize="512" autowarmCount="0" />
<!--
If true, stored fields that are not requested will be loaded
lazily. This can result in a significant speed improvement if the
usual case is to not load all stored fields, especially if the
skipped fields are large compressed text fields.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!--
Example of a generic cache. These caches may be accessed by name
through SolrIndexSearcher.getCache(),cacheLookup(), and
cacheInsert(). The purpose is to enable easy caching of
user/application level data. The regenerator argument should be
specified as an implementation of solr.search.CacheRegenerator if
autowarming is desired.
-->
<!--
<cache name="myUserCache" class="solr.LRUCache" size="4096"
initialSize="1024" autowarmCount="1024"
regenerator="org.mycompany.mypackage.MyRegenerator" />
-->
<!--
An optimization that attempts to use a filter to satisfy a search.
If the requested sort does not include score, then the filterCache
will be checked for a filter matching the query. If found, the
filter will be used as the source of document ids, and then the
sort will be applied to that.
<useFilterForSortedQuery>true</useFilterForSortedQuery>
-->
<!--
An optimization for use with the queryResultCache. When a search
is requested, a superset of the requested number of document ids
are collected. For example, if a search for a particular query
requests matching documents 10 through 19, and queryWindowSize is
50, then documents 0 through 49 will be collected and cached. Any
further requests in that range can be satisfied via the cache.
-->
<queryResultWindowSize>20</queryResultWindowSize>
<!--
Maximum number of documents to cache for any entry in the
queryResultCache.
-->
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<!--
a newSearcher event is fired whenever a new searcher is being
prepared and there is a current searcher handling requests (aka
registered). It can be used to prime certain caches to prevent
long request times for certain requests.
-->
<!--
QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence.
-->
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<!--
<lst> <str name="q">solr</str> <str name="start">0</str> <str
name="rows">10</str> </lst> <lst> <str name="q">rocks</str>
<str name="start">0</str> <str name="rows">10</str> </lst>
<lst><str name="q">static newSearcher warming query from
solrconfig.xml</str></lst>
-->
</arr>
</listener>
<!--
a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain autowarming data from.
-->
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst>
<str name="q">solr rocks</str>
<str name="start">0</str>
<str name="rows">10</str>
</lst>
<lst>
<str name="q">static firstSearcher warming query from
solrconfig.xml</str>
</lst>
</arr>
</listener>
<!--
If a search request comes in and there is no current registered
searcher, then immediately register the still warming searcher and
use it. If "false" then all requests will block until the first
searcher is done warming.
-->
<useColdSearcher>false</useColdSearcher>
<!--
Maximum number of searchers that may be warming in the background
concurrently. An error is returned if this limit is exceeded.
Recommend 1-2 for read-only slaves, higher for masters w/o cache
warming.
-->
<maxWarmingSearchers>2</maxWarmingSearchers>
</query>
<requestDispatcher>
<!--
Make sure your system has some authentication before enabling
remote streaming!
-->
<requestParsers enableRemoteStreaming="false"
multipartUploadLimitInKB="-1" />
<!--
Set HTTP caching related parameters (for proxy caches and
clients). To get the behaviour of Solr 1.2 (ie: no caching related
headers) use the never304="true" option and do not specify a value
for <cacheControl>
-->
<!-- <httpCaching never304="true"> -->
<httpCaching lastModifiedFrom="openTime" etagSeed="Solr">
<!--
lastModFrom="openTime" is the default, the Last-Modified value
(and validation against If-Modified-Since requests) will all be
relative to when the current Searcher was opened. You can change
it to lastModFrom="dirLastMod" if you want the value to exactly
corrispond to when the physical index was last modified.
etagSeed="..." is an option you can change to force the ETag
header (and validation against If-None-Match requests) to be
differnet even if the index has not changed (ie: when making
significant changes to your config file) lastModifiedFrom and
etagSeed are both ignored if you use the never304="true" option.
-->
<!--
If you include a <cacheControl> directive, it will be used to
generate a Cache-Control header, as well as an Expires header if
the value contains "max-age=" By default, no Cache-Control
header is generated. You can use the <cacheControl> option even
if you have set never304="true"
-->
<!-- <cacheControl>max-age=30, public</cacheControl> -->
</httpCaching>
</requestDispatcher>
<!--
requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the path or the 'qt' param.
Names starting with a '/' are accessed with the a path equal to the
registered name. Names without a leading '/' are accessed with:
http://host/app/select?qt=name If no qt is defined, the
requestHandler that declares default="true" will be used.
-->
<requestHandler name="/select" class="solr.SearchHandler"
default="true">
<!-- default values for query parameters -->
<lst name="defaults">
<str name="echoParams">explicit</str>
<!--
<int name="rows">10</int> <str name="fl">*</str> <str
name="version">2.1</str>
-->
</lst>
</requestHandler>
<!--
DisMaxRequestHandler allows easy searching across multiple fields
for simple user-entered phrases. Its implementation is now just the
standard SearchHandler with a default query parser of "dismax". see
http://wiki.apache.org/solr/DisMaxRequestHandler
-->
<requestHandler name="/dismax" class="solr.SearchHandler">
<lst name="defaults">
<str name="defType">dismax</str>
<str name="echoParams">explicit</str>
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0
manu^1.1 cat^1.4
</str>
<str name="pf">
text^0.2 features^1.1 name^1.5 manu^1.4
manu_exact^1.9
</str>
<str name="bf">
popularity^0.5 recip(price,1,1000,1000)^0.3
</str>
<str name="fl">
id,name,price,score
</str>
<str name="mm">
2&lt;-1 5&lt;-2 6&lt;90% </str>
<int name="ps">100</int>
<str name="q.alt">*:*</str>
<!-- example highlighter config, enable per-query with hl=true -->
<str name="hl.fl">text features name</str>
<!-- for this field, we want no fragmenting, just highlighting -->
<str name="f.name.hl.fragsize">0</str>
<!--
instructs Solr to return the field itself if no query terms are
found
-->
<str name="f.name.hl.alternateField">name</str>
<str name="f.text.hl.fragmenter">regex</str> <!-- defined below -->
</lst>
</requestHandler>
<!--
Note how you can register the same handler multiple times with
different names (and different init parameters)
-->
<requestHandler name="/partitioned" class="solr.SearchHandler">
<lst name="defaults">
<str name="defType">dismax</str>
<str name="echoParams">explicit</str>
<str name="qf">text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0</str>
<str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>
<!--
This is an example of using Date Math to specify a constantly
moving date range in a config...
-->
<str name="bq">incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2</str>
</lst>
<!--
In addition to defaults, "appends" params can be specified to
identify values which should be appended to the list of multi-val
params from the query (or the existing "defaults"). In this
example, the param "fq=instock:true" will be appended to any query
time fq params the user may specify, as a mechanism for
partitioning the index, independent of any user selected filtering
that may also be desired (perhaps as a result of faceted
searching). NOTE: there is *absolutely* nothing a client can do to
prevent these "appends" values from being used, so don't use this
mechanism unless you are sure you always want it.
-->
<lst name="appends">
<str name="fq">inStock:true</str>
</lst>
<!--
"invariants" are a way of letting the Solr maintainer lock down
the options available to Solr clients. Any params values specified
here are used regardless of what values may be specified in either
the query, the "defaults", or the "appends" params. In this
example, the facet.field and facet.query params are fixed,
limiting the facets clients can use. Faceting is not turned on by
default - but if the client does specify facet=true in the
request, these are the only facets they will be able to see counts
for; regardless of what other facet.field or facet.query params
they may specify. NOTE: there is *absolutely* nothing a client can
do to prevent these "invariants" values from being used, so don't
use this mechanism unless you are sure you always want it.
-->
<lst name="invariants">
<str name="facet.field">cat</str>
<str name="facet.field">manu_exact</str>
<str name="facet.query">price:[* TO 500]</str>
<str name="facet.query">price:[500 TO *]</str>
</lst>
</requestHandler>
<!--
Search components are registered to SolrCore and used by Search
Handlers By default, the following components are avaliable:
<searchComponent name="query"
class="org.apache.solr.handler.component.QueryComponent" />
<searchComponent name="facet"
class="org.apache.solr.handler.component.FacetComponent" />
<searchComponent name="mlt"
class="org.apache.solr.handler.component.MoreLikeThisComponent" />
<searchComponent name="highlight"
class="org.apache.solr.handler.component.HighlightComponent" />
<searchComponent name="stats"
class="org.apache.solr.handler.component.StatsComponent" />
<searchComponent name="debug"
class="org.apache.solr.handler.component.DebugComponent" /> Default
configuration in a requestHandler would look like: <arr
name="components"> <str>query</str> <str>facet</str> <str>mlt</str>
<str>highlight</str> <str>stats</str> <str>debug</str> </arr> If you
register a searchComponent to one of the standard names, that will
be used instead. To insert components before or after the 'standard'
components, use: <arr name="first-components">
<str>myFirstComponentName</str> </arr> <arr name="last-components">
<str>myLastComponentName</str> </arr>
-->
<!--
The spell check component can return a list of alternative spelling
suggestions.
-->
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">textSpell</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">name</str>
<str name="spellcheckIndexDir">./spellchecker</str>
</lst>
<!--
a spellchecker that uses a different distance measure <lst
name="spellchecker"> <str name="name">jarowinkler</str> <str
name="field">spell</str> <str
name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
<str name="spellcheckIndexDir">./spellchecker2</str> </lst>
-->
<!--
a file based spell checker <lst name="spellchecker"> <str
name="classname">solr.FileBasedSpellChecker</str> <str
name="name">file</str> <str
name="sourceLocation">spellings.txt</str> <str
name="characterEncoding">UTF-8</str> <str
name="spellcheckIndexDir">./spellcheckerFile</str> </lst>
-->
</searchComponent>
<!--
A request handler utilizing the spellcheck component.
#############################################################################
NOTE: This is purely as an example. The whole purpose of the
SpellCheckComponent is to hook it into the request handler that
handles (i.e. the standard or dismax SearchHandler) queries such
that a separate request is not needed to get suggestions. IN OTHER
WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU
WANT FOR YOUR PRODUCTION SYSTEM!
#############################################################################
-->
<requestHandler name="/spell" class="solr.SearchHandler"
lazy="true">
<lst name="defaults">
<!-- omp = Only More Popular -->
<str name="spellcheck.onlyMorePopular">false</str>
<!-- exr = Extended Results -->
<str name="spellcheck.extendedResults">false</str>
<!-- The number of suggestions to return -->
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<searchComponent name="tvComponent"
class="org.apache.solr.handler.component.TermVectorComponent" />
<!--
A Req Handler for working with the tvComponent. This is purely as an
example. You will likely want to add the component to your already
specified request handlers.
-->
<requestHandler name="/tvrh"
class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<bool name="tv">true</bool>
</lst>
<arr name="last-components">
<str>tvComponent</str>
</arr>
</requestHandler>
<!-- Solr Cell: http://wiki.apache.org/solr/ExtractingRequestHandler -->
<requestHandler name="/update/extract"
class="org.apache.solr.handler.extraction.ExtractingRequestHandler"
startup="lazy">
<lst name="defaults">
<!--
All the main content goes into "text"... if you need to return
the extracted text or do highlighting, use a stored field.
-->
<str name="fmap.content">text</str>
<str name="lowernames">true</str>
<str name="uprefix">ignored_</str>
<!-- capture link hrefs but ignore div attributes -->
<str name="captureAttr">true</str>
<str name="fmap.a">links</str>
<str name="fmap.div">ignored_</str>
</lst>
</requestHandler>
<!--
A component to return terms and document frequency of those terms.
This component does not yet support distributed search.
-->
<searchComponent name="termsComponent"
class="org.apache.solr.handler.component.TermsComponent" />
<requestHandler name="/terms"
class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<bool name="terms">true</bool>
</lst>
<arr name="components">
<str>termsComponent</str>
</arr>
</requestHandler>
<!-- CSV update handler, loaded on demand -->
<requestHandler name="/update/csv" class="solr.CSVRequestHandler"
startup="lazy" />
<!--
An example dedup update processor that creates the "id" field on the
fly based on the hash code of some other fields. This example has
overwriteDupes set to false since we are using the id field as the
signatureField and Solr will maintain uniqueness based on that
anyway. You have to link the chain to an update handler above to use
it ie: <requestHandler name="/update
"class="solr.UpdateRequestHandler"> <lst name="defaults"> <str
name="update.chain">dedupe</str> </lst> </requestHandler>
-->
<!--
queryResponseWriter plugins... query responses will be written using
the writer specified by the 'wt' request parameter matching the name
of a registered writer. The "default" writer is the default and will
be used if 'wt' is not specified in the request. XMLResponseWriter
will be used if nothing is specified here. The json, python, and
ruby writers are also available by default. <queryResponseWriter
name="xml" class="org.apache.solr.request.XMLResponseWriter"
default="true"/> <queryResponseWriter name="json"
class="org.apache.solr.request.JSONResponseWriter"/>
<queryResponseWriter name="python"
class="org.apache.solr.request.PythonResponseWriter"/>
<queryResponseWriter name="ruby"
class="org.apache.solr.request.RubyResponseWriter"/>
<queryResponseWriter name="php"
class="org.apache.solr.request.PHPResponseWriter"/>
<queryResponseWriter name="phps"
class="org.apache.solr.request.PHPSerializedResponseWriter"/>
<queryResponseWriter name="custom"
class="com.example.MyResponseWriter"/>
-->
<!--
XSLT response writer transforms the XML output by any xslt file
found in Solr's conf/xslt directory. Changes to xslt files are
checked for every xsltCacheLifetimeSeconds.
-->
<queryResponseWriter name="xslt"
class="org.apache.solr.response.XSLTResponseWriter">
<int name="xsltCacheLifetimeSeconds">5</int>
</queryResponseWriter>
<!--
example of registering a query parser <queryParser name="lucene"
class="org.apache.solr.search.LuceneQParserPlugin"/>
-->
<!--
example of registering a custom function parser <valueSourceParser
name="myfunc" class="com.mycompany.MyValueSourceParser" />
-->
<!-- config for the admin interface -->
<admin>
<defaultQuery>*</defaultQuery>
</admin>
</config>

Some files were not shown because too many files have changed in this diff Show More