mirror of https://github.com/apache/lucene.git
SOLR-2129: Provide a Solr module for dynamic metadata extraction/indexing with Apache UIMA
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1062604 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
68dc071064
commit
6c05d94c93
|
@ -73,6 +73,10 @@
|
||||||
<classpathentry kind="src" path="solr/contrib/extraction/src/main/java"/>
|
<classpathentry kind="src" path="solr/contrib/extraction/src/main/java"/>
|
||||||
<classpathentry kind="src" path="solr/contrib/extraction/src/test/java"/>
|
<classpathentry kind="src" path="solr/contrib/extraction/src/test/java"/>
|
||||||
<classpathentry kind="src" path="solr/contrib/extraction/src/test/resources"/>
|
<classpathentry kind="src" path="solr/contrib/extraction/src/test/resources"/>
|
||||||
|
<classpathentry kind="src" path="solr/contrib/uima/src/main/java"/>
|
||||||
|
<classpathentry kind="src" path="solr/contrib/uima/src/main/resources"/>
|
||||||
|
<classpathentry kind="src" path="solr/contrib/uima/src/test/java"/>
|
||||||
|
<classpathentry kind="src" path="solr/contrib/uima/src/test/resources"/>
|
||||||
<classpathentry kind="lib" path="lucene/lib/ant-1.7.1.jar"/>
|
<classpathentry kind="lib" path="lucene/lib/ant-1.7.1.jar"/>
|
||||||
<classpathentry kind="lib" path="lucene/lib/ant-junit-1.7.1.jar"/>
|
<classpathentry kind="lib" path="lucene/lib/ant-junit-1.7.1.jar"/>
|
||||||
<classpathentry kind="lib" path="lucene/lib/junit-4.7.jar"/>
|
<classpathentry kind="lib" path="lucene/lib/junit-4.7.jar"/>
|
||||||
|
@ -151,6 +155,12 @@
|
||||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-core-0.8.jar"/>
|
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-core-0.8.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-parsers-0.8.jar"/>
|
<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-parsers-0.8.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/contrib/extraction/lib/xmlbeans-2.3.0.jar"/>
|
<classpathentry kind="lib" path="solr/contrib/extraction/lib/xmlbeans-2.3.0.jar"/>
|
||||||
|
<classpathentry kind="lib" path="solr/contrib/uima/lib/commons-digester-2.0.jar"/>
|
||||||
|
<classpathentry kind="lib" path="solr/contrib/uima/lib/uima-an-alchemy.jar"/>
|
||||||
|
<classpathentry kind="lib" path="solr/contrib/uima/lib/uima-an-calais.jar"/>
|
||||||
|
<classpathentry kind="lib" path="solr/contrib/uima/lib/uima-an-tagger.jar"/>
|
||||||
|
<classpathentry kind="lib" path="solr/contrib/uima/lib/uima-an-wst.jar"/>
|
||||||
|
<classpathentry kind="lib" path="solr/contrib/uima/lib/uima-core.jar"/>
|
||||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||||
<classpathentry kind="output" path="bin"/>
|
<classpathentry kind="output" path="bin"/>
|
||||||
</classpath>
|
</classpath>
|
||||||
|
|
|
@ -411,6 +411,9 @@ New Features
|
||||||
* SOLR-2188: Added "maxTokenLength" argument to the factories for ClassicTokenizer,
|
* SOLR-2188: Added "maxTokenLength" argument to the factories for ClassicTokenizer,
|
||||||
StandardTokenizer, and UAX29URLEmailTokenizer. (Steven Rowe)
|
StandardTokenizer, and UAX29URLEmailTokenizer. (Steven Rowe)
|
||||||
|
|
||||||
|
* SOLR-2129: Added a Solr module for dynamic metadata extraction/indexing with Apache UIMA.
|
||||||
|
See contrib/uima/README.txt for more information. (Tommaso Teofili via rmuir)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -218,6 +218,7 @@
|
||||||
<packageset dir="contrib/dataimporthandler/src/main/java" />
|
<packageset dir="contrib/dataimporthandler/src/main/java" />
|
||||||
<packageset dir="contrib/clustering/src/main/java" />
|
<packageset dir="contrib/clustering/src/main/java" />
|
||||||
<packageset dir="contrib/extraction/src/main/java" />
|
<packageset dir="contrib/extraction/src/main/java" />
|
||||||
|
<packageset dir="contrib/uima/src/main/java" />
|
||||||
<packageset dir="contrib/analysis-extras/src/java" />
|
<packageset dir="contrib/analysis-extras/src/java" />
|
||||||
<group title="Core" packages="org.apache.*" />
|
<group title="Core" packages="org.apache.*" />
|
||||||
<group title="Common" packages="org.apache.solr.common.*" />
|
<group title="Common" packages="org.apache.solr.common.*" />
|
||||||
|
@ -225,6 +226,7 @@
|
||||||
<group title="contrib: DataImportHandler" packages="org.apache.solr.handler.dataimport*" />
|
<group title="contrib: DataImportHandler" packages="org.apache.solr.handler.dataimport*" />
|
||||||
<group title="contrib: Clustering" packages="org.apache.solr.handler.clustering*" />
|
<group title="contrib: Clustering" packages="org.apache.solr.handler.clustering*" />
|
||||||
<group title="contrib: Solr Cell" packages="org.apache.solr.handler.extraction*" />
|
<group title="contrib: Solr Cell" packages="org.apache.solr.handler.extraction*" />
|
||||||
|
<group title="contrib: Solr UIMA" packages="org.apache.solr.uima*" />
|
||||||
</sources>
|
</sources>
|
||||||
</invoke-javadoc>
|
</invoke-javadoc>
|
||||||
</sequential>
|
</sequential>
|
||||||
|
@ -514,6 +516,7 @@
|
||||||
<fileset dir="contrib/dataimporthandler/src/main/java" />
|
<fileset dir="contrib/dataimporthandler/src/main/java" />
|
||||||
<fileset dir="contrib/clustering/src/main/java" />
|
<fileset dir="contrib/clustering/src/main/java" />
|
||||||
<fileset dir="contrib/extraction/src/main/java" />
|
<fileset dir="contrib/extraction/src/main/java" />
|
||||||
|
<fileset dir="contrib/uima/src/main/java" />
|
||||||
<fileset dir="contrib/analysis-extras/src/java" />
|
<fileset dir="contrib/analysis-extras/src/java" />
|
||||||
</clover-setup>
|
</clover-setup>
|
||||||
</target>
|
</target>
|
||||||
|
@ -617,6 +620,10 @@
|
||||||
basedir="contrib/clustering/src" />
|
basedir="contrib/clustering/src" />
|
||||||
<solr-jar destfile="${dist}/apache-solr-analysis-extras-src-${version}.jar"
|
<solr-jar destfile="${dist}/apache-solr-analysis-extras-src-${version}.jar"
|
||||||
basedir="contrib/analysis-extras/src" />
|
basedir="contrib/analysis-extras/src" />
|
||||||
|
<solr-jar destfile="${dist}/apache-solr-uima-src-${version}.jar"
|
||||||
|
basedir="contrib/uima/src/main/java" >
|
||||||
|
<fileset dir="contrib/uima/src/main/resources" />
|
||||||
|
</solr-jar>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="dist-javadoc" description="Creates the Solr javadoc distribution files"
|
<target name="dist-javadoc" description="Creates the Solr javadoc distribution files"
|
||||||
|
@ -635,6 +642,8 @@
|
||||||
basedir="${build.javadoc}/contrib-solr-cell" />
|
basedir="${build.javadoc}/contrib-solr-cell" />
|
||||||
<solr-jar destfile="${dist}/apache-solr-analysis-extras-docs-${version}.jar"
|
<solr-jar destfile="${dist}/apache-solr-analysis-extras-docs-${version}.jar"
|
||||||
basedir="${build.javadoc}/contrib-solr-analysis-extras" />
|
basedir="${build.javadoc}/contrib-solr-analysis-extras" />
|
||||||
|
<solr-jar destfile="${dist}/apache-solr-uima-docs-${version}.jar"
|
||||||
|
basedir="${build.javadoc}/contrib-solr-uima" />
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<!-- Creates the solr jar. -->
|
<!-- Creates the solr jar. -->
|
||||||
|
@ -731,7 +740,7 @@
|
||||||
<tarfileset dir="."
|
<tarfileset dir="."
|
||||||
prefix="${fullnamever}"
|
prefix="${fullnamever}"
|
||||||
includes="LICENSE.txt NOTICE.txt *.txt *.xml lucene-libs/** lib/** src/** example/** client/** contrib/"
|
includes="LICENSE.txt NOTICE.txt *.txt *.xml lucene-libs/** lib/** src/** example/** client/** contrib/"
|
||||||
excludes="lib/README.committers.txt **/data/ **/logs/* **/classes/ **/*.sh **/bin/ src/scripts/ src/site/build/ **/target/ client/ruby/flare/ client/python contrib/**/build/ **/*.iml **/*.ipr **/*.iws contrib/clustering/example/lib/** contrib/clustering/lib/downloads/** contrib/analysis-extras/lib/**" />
|
excludes="lib/README.committers.txt **/data/ **/logs/* **/classes/ **/*.sh **/bin/ src/scripts/ src/site/build/ **/target/ client/ruby/flare/ client/python contrib/**/build/ **/*.iml **/*.ipr **/*.iws contrib/clustering/example/lib/** contrib/clustering/lib/downloads/** contrib/analysis-extras/lib/** contrib/uima/lib/**" />
|
||||||
<tarfileset dir="."
|
<tarfileset dir="."
|
||||||
prefix="${fullnamever}"
|
prefix="${fullnamever}"
|
||||||
includes="src/test-files/solr/lib/classes/empty-file-main-lib.txt" />
|
includes="src/test-files/solr/lib/classes/empty-file-main-lib.txt" />
|
||||||
|
@ -903,6 +912,14 @@
|
||||||
</artifact-attachments>
|
</artifact-attachments>
|
||||||
</m2-deploy>
|
</m2-deploy>
|
||||||
|
|
||||||
|
<m2-deploy pom.xml="contrib/extraction/solr-uima-pom.xml.template"
|
||||||
|
jar.file="${dist}/apache-solr-uima-${version}.jar">
|
||||||
|
<artifact-attachments>
|
||||||
|
<attach file="${dist}/apache-solr-uima-src-${version}.jar" classifier="sources"/>
|
||||||
|
<attach file="${dist}/apache-solr-uima-docs-${version}.jar" classifier="javadoc"/>
|
||||||
|
</artifact-attachments>
|
||||||
|
</m2-deploy>
|
||||||
|
|
||||||
<m2-deploy pom.xml="src/pom.xml"
|
<m2-deploy pom.xml="src/pom.xml"
|
||||||
jar.file="${dist}/apache-solr-core-${version}.jar">
|
jar.file="${dist}/apache-solr-core-${version}.jar">
|
||||||
<artifact-attachments>
|
<artifact-attachments>
|
||||||
|
@ -952,6 +969,8 @@
|
||||||
<fileset dir="contrib/extraction/src/test/java"/>
|
<fileset dir="contrib/extraction/src/test/java"/>
|
||||||
<fileset dir="contrib/analysis-extras/src/test"/>
|
<fileset dir="contrib/analysis-extras/src/test"/>
|
||||||
<fileset dir="contrib/analysis-extras/src/test"/>
|
<fileset dir="contrib/analysis-extras/src/test"/>
|
||||||
|
<fileset dir="contrib/uima/src/main/java"/>
|
||||||
|
<fileset dir="contrib/uima/src/test/java"/>
|
||||||
</rat:report>
|
</rat:report>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
Apache Solr UIMA Metadata Extraction Library
|
||||||
|
Release Notes
|
||||||
|
|
||||||
|
This file describes changes to the Solr UIMA (contrib/uima) module. See SOLR-2129 for details.
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
------------
|
||||||
|
This module is intended to be used while indexing documents.
|
||||||
|
Its purpose is to provide additional on the fly automatically generated fields to the Solr index.
|
||||||
|
Such fields could be language, concepts, keywords, sentences, named entities, etc.
|
||||||
|
|
||||||
|
UIMA Dependency
|
||||||
|
---------------
|
||||||
|
uima-core, OpenCalaisAnnotator, WhitespaceTokenizer, HMMTagger, AlchemyAPIAnnotator
|
||||||
|
Current Version: 2.3.1-SNAPSHOT rev. 999276
|
||||||
|
|
||||||
|
$Id$
|
|
@ -0,0 +1,60 @@
|
||||||
|
Getting Started
|
||||||
|
---------------
|
||||||
|
To start using Solr UIMA Metadata Extraction Library you should go through the following configuration steps:
|
||||||
|
|
||||||
|
1. copy generated solr-uima jar and its libs (under contrib/uima/lib) inside a Solr libraries directory.
|
||||||
|
|
||||||
|
2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options:
|
||||||
|
|
||||||
|
3. for example you could specify the following
|
||||||
|
<field name="language" type="string" indexed="true" stored="true" required="false"/>
|
||||||
|
<field name="concept" type="string" indexed="true" stored="true" multiValued="true" required="false"/>
|
||||||
|
<field name="sentence" type="text" indexed="true" stored="true" multiValued="true" required="false" />
|
||||||
|
|
||||||
|
4. modify your solrconfig.xml adding the following snippet:
|
||||||
|
<uimaConfig>
|
||||||
|
<runtimeParameters>
|
||||||
|
<keyword_apikey>VALID_ALCHEMYAPI_KEY</keyword_apikey>
|
||||||
|
<concept_apikey>VALID_ALCHEMYAPI_KEY</concept_apikey>
|
||||||
|
<lang_apikey>VALID_ALCHEMYAPI_KEY</lang_apikey>
|
||||||
|
<cat_apikey>VALID_ALCHEMYAPI_KEY</cat_apikey>
|
||||||
|
<entities_apikey>VALID_ALCHEMYAPI_KEY</entities_apikey>
|
||||||
|
<oc_licenseID>VALID_OPENCALAIS_KEY</oc_licenseID>
|
||||||
|
</runtimeParameters>
|
||||||
|
<analysisEngine>/org/apache/uima/desc/OverridingParamsExtServicesAE.xml</analysisEngine>
|
||||||
|
<analyzeFields merge="false">text</analyzeFields>
|
||||||
|
<fieldMapping>
|
||||||
|
<type name="org.apache.uima.alchemy.ts.concept.ConceptFS">
|
||||||
|
<map feature="text" field="concept"/>
|
||||||
|
</type>
|
||||||
|
<type name="org.apache.uima.alchemy.ts.language.LanguageFS">
|
||||||
|
<map feature="language" field="language"/>
|
||||||
|
</type>
|
||||||
|
<type name="org.apache.uima.SentenceAnnotation">
|
||||||
|
<map feature="coveredText" field="sentence"/>
|
||||||
|
</type>
|
||||||
|
</fieldMapping>
|
||||||
|
</uimaConfig>
|
||||||
|
|
||||||
|
5. the analysisEngine tag must contain an AE descriptor inside the specified path in the classpath
|
||||||
|
|
||||||
|
6. the analyzeFields tag must contain the input fields that need to be analyzed by UIMA,
|
||||||
|
if merge=true then their content will be merged and analyzed only once
|
||||||
|
|
||||||
|
7. field mapping describes which features of which types should go in a field
|
||||||
|
|
||||||
|
8. define in your solrconfig.xml an UpdateRequestProcessorChain as following:
|
||||||
|
<updateRequestProcessorChain name="uima">
|
||||||
|
<processor class="org.apache.solr.uima.processor.UIMAProcessorFactory"/>
|
||||||
|
<processor class="solr.LogUpdateProcessorFactory" />
|
||||||
|
<processor class="solr.RunUpdateProcessorFactory" />
|
||||||
|
</updateRequestProcessorChain>
|
||||||
|
|
||||||
|
9. in your solrconfig.xml replace the existing default (<requestHandler name="/update"...) or create a new UpdateRequestHandler with the following:
|
||||||
|
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="update.processor">uima</str>
|
||||||
|
</lst>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
Once you're done with the configuration you can index documents which will be automatically enriched with the specified fields
|
|
@ -0,0 +1,189 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<project name="solr-uima" default="build">
|
||||||
|
|
||||||
|
<property name="solr-path" value="../.." />
|
||||||
|
|
||||||
|
<import file="../../common-build.xml"/>
|
||||||
|
|
||||||
|
<description>
|
||||||
|
Solr Integration with UIMA for extracting metadata from arbitrary (text) fields and enrich document with features extracted from UIMA types (language, sentences, concepts, named entities, etc.)
|
||||||
|
</description>
|
||||||
|
|
||||||
|
<path id="common.classpath">
|
||||||
|
<pathelement location="${solr-path}/build/solr" />
|
||||||
|
<pathelement location="${solr-path}/build/solrj" />
|
||||||
|
<fileset dir="lib" includes="*.jar"/>
|
||||||
|
<fileset dir="${solr-path}/lib" includes="*.jar"/>
|
||||||
|
<path refid="lucene.classpath"/>
|
||||||
|
<pathelement location="${basedir}/src/main/resources" />
|
||||||
|
</path>
|
||||||
|
|
||||||
|
<path id="test.classpath">
|
||||||
|
<path refid="common.classpath" />
|
||||||
|
<pathelement path="${dest}/classes" />
|
||||||
|
<pathelement path="${dest}/test-classes" />
|
||||||
|
<pathelement location="${solr-path}/build/tests"/> <!-- include solr test code -->
|
||||||
|
<pathelement location="${solr-path}/../lucene/build/classes/test" /> <!-- include some lucene test code -->
|
||||||
|
<pathelement path="${java.class.path}"/>
|
||||||
|
</path>
|
||||||
|
|
||||||
|
<target name="clean">
|
||||||
|
<delete failonerror="false" dir="${dest}"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="init">
|
||||||
|
<mkdir dir="${dest}/classes"/>
|
||||||
|
<mkdir dir="${build.javadoc}" />
|
||||||
|
<subant target="compileTests">
|
||||||
|
<fileset dir="${solr-path}" includes="build.xml"/>
|
||||||
|
</subant>
|
||||||
|
<subant target="make-manifest">
|
||||||
|
<fileset dir="${solr-path}" includes="build.xml"/>
|
||||||
|
</subant>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="compile" depends="init">
|
||||||
|
<solr-javac destdir="${dest}/classes"
|
||||||
|
classpathref="common.classpath">
|
||||||
|
<src path="src/main/java" />
|
||||||
|
</solr-javac>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="build" depends="compile">
|
||||||
|
<solr-jar destfile="${dest}/${fullnamever}.jar" basedir="${dest}/classes"
|
||||||
|
manifest="../../${dest}/META-INF/MANIFEST.MF">
|
||||||
|
<fileset dir="src/main/resources" />
|
||||||
|
</solr-jar>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="compileTests" depends="compile">
|
||||||
|
<solr-javac destdir="${dest}/test-classes"
|
||||||
|
classpathref="test.classpath">
|
||||||
|
<src path="src/test/java" />
|
||||||
|
</solr-javac>
|
||||||
|
<copy todir="${dest}/test-classes">
|
||||||
|
<fileset dir="src/test/resources" excludes="**/*.java"/>
|
||||||
|
</copy>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<property name="tempDir" value="${junit.output.dir}/temp" />
|
||||||
|
|
||||||
|
<target name="test" depends="compileTests">
|
||||||
|
<mkdir dir="${junit.output.dir}"/>
|
||||||
|
<!-- <mkdir dir="@{tempDir}/@{pattern}"/>
|
||||||
|
This is very loud and obnoxious. abuse touch instead for a "quiet" mkdir
|
||||||
|
-->
|
||||||
|
<touch file="${tempDir}/quiet.ant" verbose="false" mkdirs="true"/>
|
||||||
|
|
||||||
|
<condition property="runall">
|
||||||
|
<not>
|
||||||
|
<or>
|
||||||
|
<isset property="testcase"/>
|
||||||
|
<isset property="testpackage"/>
|
||||||
|
<isset property="testpackageroot"/>
|
||||||
|
</or>
|
||||||
|
</not>
|
||||||
|
</condition>
|
||||||
|
|
||||||
|
<junit printsummary="no"
|
||||||
|
haltonfailure="no"
|
||||||
|
maxmemory="512M"
|
||||||
|
errorProperty="tests.failed"
|
||||||
|
failureProperty="tests.failed"
|
||||||
|
dir="${tempDir}"
|
||||||
|
tempdir="${tempDir}"
|
||||||
|
forkmode="perBatch"
|
||||||
|
>
|
||||||
|
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
|
||||||
|
<sysproperty key="tests.luceneMatchVersion" value="${tests.luceneMatchVersion}"/>
|
||||||
|
<sysproperty key="tests.codec" value="${tests.codec}"/>
|
||||||
|
<sysproperty key="tests.locale" value="${tests.locale}"/>
|
||||||
|
<sysproperty key="tests.timezone" value="${tests.timezone}"/>
|
||||||
|
<sysproperty key="tests.multiplier" value="${tests.multiplier}"/>
|
||||||
|
<sysproperty key="tests.iter" value="${tests.iter}"/>
|
||||||
|
<sysproperty key="tests.seed" value="${tests.seed}"/>
|
||||||
|
<sysproperty key="jetty.insecurerandom" value="1"/>
|
||||||
|
<sysproperty key="tempDir" file="${tempDir}"/>
|
||||||
|
<sysproperty key="testmethod" value="${testmethod}"/>
|
||||||
|
<jvmarg line="${args}"/>
|
||||||
|
<formatter classname="${junit.details.formatter}" usefile="false" if="junit.details"/>
|
||||||
|
<classpath refid="test.classpath"/>
|
||||||
|
<assertions>
|
||||||
|
<enable package="org.apache.lucene"/>
|
||||||
|
<enable package="org.apache.solr"/>
|
||||||
|
</assertions>
|
||||||
|
<formatter type="${junit.formatter}"/>
|
||||||
|
<batchtest fork="yes" todir="${junit.output.dir}" if="runall">
|
||||||
|
<fileset dir="src/test/java" includes="${junit.includes}"/>
|
||||||
|
</batchtest>
|
||||||
|
<batchtest fork="yes" todir="${junit.output.dir}" if="testcase">
|
||||||
|
<fileset dir="src/test/java" includes="**/${testcase}.java"/>
|
||||||
|
</batchtest>
|
||||||
|
<batchtest fork="yes" todir="${junit.output.dir}" if="testpackage">
|
||||||
|
<fileset dir="src/test/java" includes="**/${testpackage}/**/Test*.java,**/${testpackage}/**/*Test.java"/>
|
||||||
|
</batchtest>
|
||||||
|
<batchtest fork="yes" todir="${junit.output.dir}" if="testpackageroot">
|
||||||
|
<fileset dir="src/test/java" includes="**/${testpackageroot}/Test*.java,**/${testpackageroot}/*Test.java"/>
|
||||||
|
</batchtest>
|
||||||
|
</junit>
|
||||||
|
|
||||||
|
<fail if="tests.failed">Tests failed!</fail>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="test-reports"
|
||||||
|
description="Generates HTML test reports.">
|
||||||
|
<mkdir dir="${junit.reports}"/>
|
||||||
|
<junitreport todir="${junit.output.dir}">
|
||||||
|
<fileset dir="${junit.output.dir}">
|
||||||
|
<include name="TEST-*.xml"/>
|
||||||
|
</fileset>
|
||||||
|
<report format="frames" todir="${junit.reports}"/>
|
||||||
|
</junitreport>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="dist" depends="build">
|
||||||
|
<copy file="build/${fullnamever}.jar" todir="${solr-path}/dist"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="example" depends="build">
|
||||||
|
<!-- :NOOP: this use to copy libs but now we can refer to them by path -->
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="javadoc">
|
||||||
|
<sequential>
|
||||||
|
<mkdir dir="${build.javadoc}/contrib-${name}"/>
|
||||||
|
|
||||||
|
<path id="javadoc.classpath">
|
||||||
|
<path refid="common.classpath"/>
|
||||||
|
</path>
|
||||||
|
|
||||||
|
<invoke-javadoc
|
||||||
|
destdir="${build.javadoc}/contrib-${name}"
|
||||||
|
title="${Name} ${version} contrib-${fullnamever} API">
|
||||||
|
<sources>
|
||||||
|
<packageset dir="src/main/java"/>
|
||||||
|
</sources>
|
||||||
|
</invoke-javadoc>
|
||||||
|
</sequential>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
|
||||||
|
</project>
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[9c8bd13a2002a9ff5b35b873b9f111d5281ad201] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[532939ecab6b77ccb77af3635c55ff9752b70ab7] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[33165678da937e03cb069449b40f1cf690beda0a] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[5dfc32bce5e444a9bb3387d664485f7bfdc438ad] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[bf90c19d2c1f77e300b94363385841ec1225b4b9] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[9518da64cdf5d378273ab40a06823a7768f18ece] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[72991424bdfe4776f66feab7ff4e8564f12d2659] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -0,0 +1,115 @@
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<groupId>org.apache.solr</groupId>
|
||||||
|
<artifactId>solr-uima</artifactId>
|
||||||
|
<version>0.0.2-SNAPSHOT</version>
|
||||||
|
<name>Solr - UIMA integration</name>
|
||||||
|
<properties>
|
||||||
|
<uimaVersion>2.3.1-SNAPSHOT</uimaVersion>
|
||||||
|
</properties>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.solr</groupId>
|
||||||
|
<artifactId>solr-core</artifactId>
|
||||||
|
<version>1.4.1</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.uima</groupId>
|
||||||
|
<artifactId>uimaj-core</artifactId>
|
||||||
|
<version>${uimaVersion}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.uima</groupId>
|
||||||
|
<artifactId>alchemy-annotator</artifactId>
|
||||||
|
<version>${uimaVersion}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.uima</groupId>
|
||||||
|
<artifactId>OpenCalaisAnnotator</artifactId>
|
||||||
|
<version>${uimaVersion}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<version>4.7</version>
|
||||||
|
<type>jar</type>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-simple</artifactId>
|
||||||
|
<version>1.5.5</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.uima</groupId>
|
||||||
|
<artifactId>WhitespaceTokenizer</artifactId>
|
||||||
|
<version>${uimaVersion}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.uima</groupId>
|
||||||
|
<artifactId>Tagger</artifactId>
|
||||||
|
<version>${uimaVersion}</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
<build>
|
||||||
|
<pluginManagement>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>com.googlecode.maven-gcu-plugin</groupId>
|
||||||
|
<artifactId>maven-gcu-plugin</artifactId>
|
||||||
|
<version>1.0</version>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</pluginManagement>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<version>2.3.1</version>
|
||||||
|
<configuration>
|
||||||
|
<source>1.5</source>
|
||||||
|
<target>1.5</target>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>com.googlecode.maven-gcu-plugin</groupId>
|
||||||
|
<artifactId>maven-gcu-plugin</artifactId>
|
||||||
|
<version>1.0</version>
|
||||||
|
<configuration>
|
||||||
|
<serverId>googlecode</serverId>
|
||||||
|
<failsOnError>true</failsOnError>
|
||||||
|
<projectName>${project.artifactId}</projectName>
|
||||||
|
<uploads>
|
||||||
|
<upload>
|
||||||
|
<file>${project.build.directory}/${project.artifactId}-${project.version}.${project.packaging}</file>
|
||||||
|
<summary>${project.name} sources bundle ${project.version}</summary>
|
||||||
|
<labels>
|
||||||
|
<label>Featured</label>
|
||||||
|
<label>Type-Archive</label>
|
||||||
|
</labels>
|
||||||
|
</upload>
|
||||||
|
</uploads>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</project>
|
|
@ -0,0 +1,69 @@
|
||||||
|
package org.apache.solr.uima.processor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration holding all the configurable parameters for calling UIMA inside Solr
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class SolrUIMAConfiguration {
|
||||||
|
|
||||||
|
private String[] fieldsToAnalyze;
|
||||||
|
|
||||||
|
private boolean fieldsMerging;
|
||||||
|
|
||||||
|
private Map<String, Map<String, String>> typesFeaturesFieldsMapping;
|
||||||
|
|
||||||
|
private String aePath;
|
||||||
|
|
||||||
|
private Map<String, String> runtimeParameters;
|
||||||
|
|
||||||
|
public SolrUIMAConfiguration(String aePath, String[] fieldsToAnalyze, boolean fieldsMerging,
|
||||||
|
Map<String, Map<String, String>> typesFeaturesFieldsMapping,
|
||||||
|
Map<String, String> runtimeParameters) {
|
||||||
|
this.aePath = aePath;
|
||||||
|
this.fieldsToAnalyze = fieldsToAnalyze;
|
||||||
|
this.fieldsMerging = fieldsMerging;
|
||||||
|
this.runtimeParameters = runtimeParameters;
|
||||||
|
this.typesFeaturesFieldsMapping = typesFeaturesFieldsMapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] getFieldsToAnalyze() {
|
||||||
|
return fieldsToAnalyze;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isFieldsMerging() {
|
||||||
|
return fieldsMerging;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, Map<String, String>> getTypesFeaturesFieldsMapping() {
|
||||||
|
return typesFeaturesFieldsMapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAePath() {
|
||||||
|
return aePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, String> getRuntimeParameters() {
|
||||||
|
return runtimeParameters;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,125 @@
|
||||||
|
package org.apache.solr.uima.processor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.solr.core.SolrConfig;
|
||||||
|
import org.w3c.dom.Node;
|
||||||
|
import org.w3c.dom.NodeList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read configuration for Solr-UIMA integration
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class SolrUIMAConfigurationReader {
|
||||||
|
|
||||||
|
private static final String AE_RUNTIME_PARAMETERS_NODE_PATH = "/config/uimaConfig/runtimeParameters";
|
||||||
|
|
||||||
|
private static final String FIELD_MAPPING_NODE_PATH = "/config/uimaConfig/fieldMapping";
|
||||||
|
|
||||||
|
private static final String ANALYZE_FIELDS_NODE_PATH = "/config/uimaConfig/analyzeFields";
|
||||||
|
|
||||||
|
private static final String ANALYSIS_ENGINE_NODE_PATH = "/config/uimaConfig/analysisEngine";
|
||||||
|
|
||||||
|
private SolrConfig solrConfig;
|
||||||
|
|
||||||
|
public SolrUIMAConfigurationReader(SolrConfig solrConfig) {
|
||||||
|
this.solrConfig = solrConfig;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SolrUIMAConfiguration readSolrUIMAConfiguration() {
|
||||||
|
return new SolrUIMAConfiguration(readAEPath(), readFieldsToAnalyze(), readFieldsMerging(),
|
||||||
|
readTypesFeaturesFieldsMapping(), readAEOverridingParameters());
|
||||||
|
}
|
||||||
|
|
||||||
|
private String readAEPath() {
|
||||||
|
return solrConfig.getNode(ANALYSIS_ENGINE_NODE_PATH, true).getTextContent();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String[] readFieldsToAnalyze() {
|
||||||
|
Node analyzeFieldsNode = solrConfig.getNode(ANALYZE_FIELDS_NODE_PATH, true);
|
||||||
|
return analyzeFieldsNode.getTextContent().split(",");
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean readFieldsMerging() {
|
||||||
|
Node analyzeFieldsNode = solrConfig.getNode(ANALYZE_FIELDS_NODE_PATH, true);
|
||||||
|
Node mergeNode = analyzeFieldsNode.getAttributes().getNamedItem("merge");
|
||||||
|
return Boolean.valueOf(mergeNode.getNodeValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Map<String, String>> readTypesFeaturesFieldsMapping() {
|
||||||
|
Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
|
||||||
|
|
||||||
|
Node fieldMappingNode = solrConfig.getNode(FIELD_MAPPING_NODE_PATH, true);
|
||||||
|
/* iterate over UIMA types */
|
||||||
|
if (fieldMappingNode.hasChildNodes()) {
|
||||||
|
NodeList typeNodes = fieldMappingNode.getChildNodes();
|
||||||
|
for (int i = 0; i < typeNodes.getLength(); i++) {
|
||||||
|
/* <type> node */
|
||||||
|
Node typeNode = typeNodes.item(i);
|
||||||
|
if (typeNode.getNodeType() != Node.TEXT_NODE) {
|
||||||
|
Node typeNameAttribute = typeNode.getAttributes().getNamedItem("name");
|
||||||
|
/* get a UIMA typename */
|
||||||
|
String typeName = typeNameAttribute.getNodeValue();
|
||||||
|
/* create entry for UIMA type */
|
||||||
|
map.put(typeName, new HashMap<String, String>());
|
||||||
|
if (typeNode.hasChildNodes()) {
|
||||||
|
/* iterate over features */
|
||||||
|
NodeList featuresNodeList = typeNode.getChildNodes();
|
||||||
|
for (int j = 0; j < featuresNodeList.getLength(); j++) {
|
||||||
|
Node mappingNode = featuresNodeList.item(j);
|
||||||
|
if (mappingNode.getNodeType() != Node.TEXT_NODE) {
|
||||||
|
/* get field name */
|
||||||
|
Node fieldNameNode = mappingNode.getAttributes().getNamedItem("field");
|
||||||
|
String mappedFieldName = fieldNameNode.getNodeValue();
|
||||||
|
/* get feature name */
|
||||||
|
Node featureNameNode = mappingNode.getAttributes().getNamedItem("feature");
|
||||||
|
String featureName = featureNameNode.getNodeValue();
|
||||||
|
/* map the feature to the field for the specified type */
|
||||||
|
map.get(typeName).put(featureName, mappedFieldName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, String> readAEOverridingParameters() {
|
||||||
|
Map<String, String> runtimeParameters = new HashMap<String, String>();
|
||||||
|
Node uimaConfigNode = solrConfig.getNode(AE_RUNTIME_PARAMETERS_NODE_PATH, true);
|
||||||
|
|
||||||
|
if (uimaConfigNode.hasChildNodes()) {
|
||||||
|
NodeList overridingNodes = uimaConfigNode.getChildNodes();
|
||||||
|
for (int i = 0; i < overridingNodes.getLength(); i++) {
|
||||||
|
Node overridingNode = overridingNodes.item(i);
|
||||||
|
if (overridingNode.getNodeType() != Node.TEXT_NODE) {
|
||||||
|
runtimeParameters.put(overridingNode.getNodeName(), overridingNode.getTextContent());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return runtimeParameters;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,83 @@
|
||||||
|
package org.apache.solr.uima.processor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.uima.cas.FSIterator;
|
||||||
|
import org.apache.uima.cas.FeatureStructure;
|
||||||
|
import org.apache.uima.cas.Type;
|
||||||
|
import org.apache.uima.jcas.JCas;
|
||||||
|
import org.apache.uima.jcas.tcas.Annotation;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map UIMA types and features over fields of a Solr document
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class UIMAToSolrMapper {
|
||||||
|
|
||||||
|
private final Logger log = LoggerFactory.getLogger(UIMAToSolrMapper.class);
|
||||||
|
|
||||||
|
private SolrInputDocument document;
|
||||||
|
|
||||||
|
private JCas cas;
|
||||||
|
|
||||||
|
public UIMAToSolrMapper(SolrInputDocument document, JCas cas) {
|
||||||
|
this.document = document;
|
||||||
|
this.cas = cas;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* map features of a certain UIMA type to corresponding Solr fields based on the mapping
|
||||||
|
*
|
||||||
|
* @param typeName
|
||||||
|
* name of UIMA type to map
|
||||||
|
* @param featureFieldsmapping
|
||||||
|
*/
|
||||||
|
public void map(String typeName, Map<String, String> featureFieldsmapping) {
|
||||||
|
try {
|
||||||
|
FeatureStructure fsMock = (FeatureStructure) Class.forName(typeName).getConstructor(
|
||||||
|
JCas.class).newInstance(cas);
|
||||||
|
Type type = fsMock.getType();
|
||||||
|
for (FSIterator<FeatureStructure> iterator = cas.getFSIndexRepository().getAllIndexedFS(type); iterator
|
||||||
|
.hasNext();) {
|
||||||
|
FeatureStructure fs = iterator.next();
|
||||||
|
for (String featureName : featureFieldsmapping.keySet()) {
|
||||||
|
String fieldName = featureFieldsmapping.get(featureName);
|
||||||
|
log.info(new StringBuffer("mapping ").append(typeName).append("@").append(featureName)
|
||||||
|
.append(" to ").append(fieldName).toString());
|
||||||
|
String featureValue = null;
|
||||||
|
if (fs instanceof Annotation && "coveredText".equals(featureName)) {
|
||||||
|
featureValue = ((Annotation) fs).getCoveredText();
|
||||||
|
} else {
|
||||||
|
featureValue = fs.getFeatureValueAsString(type.getFeatureByBaseName(featureName));
|
||||||
|
}
|
||||||
|
log.info(new StringBuffer("writing ").append(featureValue).append(" in ").append(
|
||||||
|
fieldName).toString());
|
||||||
|
document.addField(fieldName, featureValue, 1.0f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error(e.getLocalizedMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,126 @@
|
||||||
|
package org.apache.solr.uima.processor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.uima.processor.ae.AEProvider;
|
||||||
|
import org.apache.solr.uima.processor.ae.AEProviderFactory;
|
||||||
|
import org.apache.solr.update.AddUpdateCommand;
|
||||||
|
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||||
|
import org.apache.uima.UIMAException;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||||
|
import org.apache.uima.jcas.JCas;
|
||||||
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update document(s) to be indexed with UIMA extracted information
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
||||||
|
|
||||||
|
private SolrUIMAConfiguration solrUIMAConfiguration;
|
||||||
|
|
||||||
|
private AEProvider aeProvider;
|
||||||
|
|
||||||
|
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next, SolrCore solrCore) {
|
||||||
|
super(next);
|
||||||
|
initialize(solrCore);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initialize(SolrCore solrCore) {
|
||||||
|
SolrUIMAConfigurationReader uimaConfigurationReader = new SolrUIMAConfigurationReader(solrCore
|
||||||
|
.getSolrConfig());
|
||||||
|
solrUIMAConfiguration = uimaConfigurationReader.readSolrUIMAConfiguration();
|
||||||
|
aeProvider = AEProviderFactory.getInstance().getAEProvider(solrCore.getName(),
|
||||||
|
solrUIMAConfiguration.getAePath(), solrUIMAConfiguration.getRuntimeParameters());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
||||||
|
try {
|
||||||
|
/* get Solr document */
|
||||||
|
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
|
||||||
|
|
||||||
|
/* get the fields to analyze */
|
||||||
|
for (String text : getTextsToAnalyze(solrInputDocument)) {
|
||||||
|
if (text != null && !"".equals(text)) {
|
||||||
|
/* process the text value */
|
||||||
|
JCas jcas = processText(text);
|
||||||
|
|
||||||
|
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
|
||||||
|
/* get field mapping from config */
|
||||||
|
Map<String, Map<String, String>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
|
||||||
|
.getTypesFeaturesFieldsMapping();
|
||||||
|
/* map type features on fields */
|
||||||
|
for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) {
|
||||||
|
uimaToSolrMapper.map(typeFQN, typesAndFeaturesFieldsMap.get(typeFQN));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (UIMAException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
super.processAdd(cmd);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get the texts to analyze from the corresponding fields
|
||||||
|
*/
|
||||||
|
private String[] getTextsToAnalyze(SolrInputDocument solrInputDocument) {
|
||||||
|
String[] fieldsToAnalyze = solrUIMAConfiguration.getFieldsToAnalyze();
|
||||||
|
boolean merge = solrUIMAConfiguration.isFieldsMerging();
|
||||||
|
String[] textVals = null;
|
||||||
|
if (merge) {
|
||||||
|
StringBuilder unifiedText = new StringBuilder("");
|
||||||
|
for (int i = 0; i < fieldsToAnalyze.length; i++) {
|
||||||
|
unifiedText.append(String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i])));
|
||||||
|
}
|
||||||
|
textVals = new String[1];
|
||||||
|
textVals[0] = unifiedText.toString();
|
||||||
|
} else {
|
||||||
|
textVals = new String[fieldsToAnalyze.length];
|
||||||
|
for (int i = 0; i < fieldsToAnalyze.length; i++) {
|
||||||
|
textVals[i] = String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return textVals;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* process a field value executing UIMA the CAS containing it as document text */
|
||||||
|
private JCas processText(String textFieldValue) throws ResourceInitializationException,
|
||||||
|
AnalysisEngineProcessException {
|
||||||
|
log.info(new StringBuffer("Analazying text").toString());
|
||||||
|
/* get the UIMA analysis engine */
|
||||||
|
AnalysisEngine ae = aeProvider.getAE();
|
||||||
|
|
||||||
|
/* create a JCas which contain the text to analyze */
|
||||||
|
JCas jcas = ae.newJCas();
|
||||||
|
jcas.setDocumentText(textFieldValue);
|
||||||
|
|
||||||
|
/* perform analysis on text field */
|
||||||
|
ae.process(jcas);
|
||||||
|
log.info(new StringBuilder("Text processing completed").toString());
|
||||||
|
return jcas;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
package org.apache.solr.uima.processor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
|
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||||
|
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for {@link UIMAUpdateRequestProcessor}
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFactory {
|
||||||
|
|
||||||
|
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp,
|
||||||
|
UpdateRequestProcessor next) {
|
||||||
|
return new UIMAUpdateRequestProcessor(next, req.getCore());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
package org.apache.solr.uima.processor.ae;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* provide an Apache UIMA {@link AnalysisEngine}
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public interface AEProvider {
|
||||||
|
|
||||||
|
public AnalysisEngine getAE() throws ResourceInitializationException;
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
package org.apache.solr.uima.processor.ae;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Singleton factory class responsible of {@link AEProvider}s' creation
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class AEProviderFactory {
|
||||||
|
|
||||||
|
private static AEProviderFactory instance;
|
||||||
|
|
||||||
|
private Map<String, AEProvider> providerCache = new HashMap<String, AEProvider>();
|
||||||
|
|
||||||
|
private AEProviderFactory() {
|
||||||
|
// Singleton
|
||||||
|
}
|
||||||
|
|
||||||
|
public static AEProviderFactory getInstance() {
|
||||||
|
if (instance == null) {
|
||||||
|
instance = new AEProviderFactory();
|
||||||
|
}
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized AEProvider getAEProvider(String core, String aePath,
|
||||||
|
Map<String, String> runtimeParameters) {
|
||||||
|
String key = new StringBuilder(core).append(aePath).toString();
|
||||||
|
if (providerCache.get(key) == null) {
|
||||||
|
providerCache.put(key, new OverridingParamsAEProvider(aePath, runtimeParameters));
|
||||||
|
}
|
||||||
|
return providerCache.get(key);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,89 @@
|
||||||
|
package org.apache.solr.uima.processor.ae;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.uima.UIMAFramework;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
|
||||||
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
import org.apache.uima.util.XMLInputSource;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@link AEProvider} implementation that creates an Aggregate AE from the given path, also
|
||||||
|
* injecting runtime parameters defined in the solrconfig.xml Solr configuration file and assigning
|
||||||
|
* them as overriding parameters in the aggregate AE
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class OverridingParamsAEProvider implements AEProvider {
|
||||||
|
|
||||||
|
private static Logger log = LoggerFactory.getLogger(OverridingParamsAEProvider.class);
|
||||||
|
|
||||||
|
private String aeFilePath;
|
||||||
|
|
||||||
|
private AnalysisEngine cachedAE;
|
||||||
|
|
||||||
|
private Map<String, String> runtimeParameters;
|
||||||
|
|
||||||
|
public OverridingParamsAEProvider(String aeFilePath, Map<String, String> runtimeParameters) {
|
||||||
|
this.aeFilePath = aeFilePath;
|
||||||
|
this.runtimeParameters = runtimeParameters;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized AnalysisEngine getAE() throws ResourceInitializationException {
|
||||||
|
try {
|
||||||
|
if (cachedAE == null) {
|
||||||
|
// get Resource Specifier from XML file
|
||||||
|
URL url = this.getClass().getResource(aeFilePath);
|
||||||
|
XMLInputSource in = new XMLInputSource(url);
|
||||||
|
|
||||||
|
// get AE description
|
||||||
|
AnalysisEngineDescription desc = UIMAFramework.getXMLParser()
|
||||||
|
.parseAnalysisEngineDescription(in);
|
||||||
|
|
||||||
|
/* iterate over each AE (to set runtime parameters) */
|
||||||
|
for (String attributeName : runtimeParameters.keySet()) {
|
||||||
|
desc.getAnalysisEngineMetaData().getConfigurationParameterSettings().setParameterValue(
|
||||||
|
attributeName, runtimeParameters.get(attributeName));
|
||||||
|
log.info(new StringBuilder("setting ").append(attributeName).append(" : ").append(
|
||||||
|
runtimeParameters.get(attributeName)).toString());
|
||||||
|
}
|
||||||
|
// create AE here
|
||||||
|
cachedAE = UIMAFramework.produceAnalysisEngine(desc);
|
||||||
|
if (log.isDebugEnabled())
|
||||||
|
log.debug(new StringBuilder("AE ").append(cachedAE.getAnalysisEngineMetaData().getName())
|
||||||
|
.append(" created from descriptor ").append(aeFilePath).toString());
|
||||||
|
} else {
|
||||||
|
cachedAE.reconfigure();
|
||||||
|
if (log.isDebugEnabled())
|
||||||
|
log.debug(new StringBuilder("AE ").append(cachedAE.getAnalysisEngineMetaData().getName())
|
||||||
|
.append(" at path ").append(aeFilePath).append(" reconfigured ").toString());
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
cachedAE = null;
|
||||||
|
throw new ResourceInitializationException(e);
|
||||||
|
}
|
||||||
|
return cachedAE;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>false</primitive>
|
||||||
|
<delegateAnalysisEngineSpecifiers>
|
||||||
|
<delegateAnalysisEngine key="HmmTagger">
|
||||||
|
<import name="HmmTagger"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="WhitespaceTokenizer">
|
||||||
|
<import name="WhitespaceTokenizer"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
</delegateAnalysisEngineSpecifiers>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>AggregateSentenceAE</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor/>
|
||||||
|
<configurationParameters/>
|
||||||
|
<configurationParameterSettings/>
|
||||||
|
<flowConstraints>
|
||||||
|
<fixedFlow>
|
||||||
|
<node>WhitespaceTokenizer</node>
|
||||||
|
<node>HmmTagger</node>
|
||||||
|
</fixedFlow>
|
||||||
|
</flowConstraints>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs/>
|
||||||
|
<outputs/>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<resourceManagerConfiguration/>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,57 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>false</primitive>
|
||||||
|
<delegateAnalysisEngineSpecifiers>
|
||||||
|
<delegateAnalysisEngine key="TextConceptTaggingAEDescriptor">
|
||||||
|
<import name="TextConceptTaggingAEDescriptor"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="TextKeywordExtractionAEDescriptor">
|
||||||
|
<import name="TextKeywordExtractionAEDescriptor"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="OpenCalaisAnnotator">
|
||||||
|
<import name="OpenCalaisAnnotator"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="TextCategorizationAEDescriptor">
|
||||||
|
<import name="TextCategorizationAEDescriptor"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="TextLanguageDetectionAEDescriptor">
|
||||||
|
<import name="TextLanguageDetectionAEDescriptor"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="TextRankedEntityExtractionAEDescriptor">
|
||||||
|
<import name="TextRankedEntityExtractionAEDescriptor"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
</delegateAnalysisEngineSpecifiers>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>ExtServicesAE</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor/>
|
||||||
|
<configurationParameters/>
|
||||||
|
<configurationParameterSettings/>
|
||||||
|
<flowConstraints>
|
||||||
|
<fixedFlow>
|
||||||
|
<node>OpenCalaisAnnotator</node>
|
||||||
|
<node>TextKeywordExtractionAEDescriptor</node>
|
||||||
|
<node>TextLanguageDetectionAEDescriptor</node>
|
||||||
|
<node>TextCategorizationAEDescriptor</node>
|
||||||
|
<node>TextConceptTaggingAEDescriptor</node>
|
||||||
|
<node>TextRankedEntityExtractionAEDescriptor</node>
|
||||||
|
</fixedFlow>
|
||||||
|
</flowConstraints>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs/>
|
||||||
|
<outputs/>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<resourceManagerConfiguration/>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,121 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>true</primitive>
|
||||||
|
<annotatorImplementationName>org.apache.uima.examples.tagger.HMMTagger</annotatorImplementationName>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>Hidden Markov Model - Part of Speech Tagger</name>
|
||||||
|
<description>A configuration of the HmmTaggerAnnotator that looks for
|
||||||
|
parts of speech of identified tokens within existing
|
||||||
|
Sentence and Token annotations. See also
|
||||||
|
WhitespaceTokenizer.xml.</description>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor>The Apache Software Foundation</vendor>
|
||||||
|
<configurationParameters>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>NGRAM_SIZE</name>
|
||||||
|
<type>Integer</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
</configurationParameters>
|
||||||
|
<configurationParameterSettings>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>NGRAM_SIZE</name>
|
||||||
|
<value>
|
||||||
|
<integer>3</integer>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
</configurationParameterSettings>
|
||||||
|
<typeSystemDescription>
|
||||||
|
<types>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.TokenAnnotation</name>
|
||||||
|
<description>Single token annotation</description>
|
||||||
|
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||||
|
<features>
|
||||||
|
<featureDescription>
|
||||||
|
<name>posTag</name>
|
||||||
|
<description>contains part-of-speech of a
|
||||||
|
corresponding token</description>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
</features>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.SentenceAnnotation</name>
|
||||||
|
<description>sentence annotation</description>
|
||||||
|
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
</types>
|
||||||
|
</typeSystemDescription>
|
||||||
|
<typePriorities/>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs>
|
||||||
|
<type>org.apache.uima.TokenAnnotation</type>
|
||||||
|
<type allAnnotatorFeatures="true">org.apache.uima.SentenceAnnotation</type>
|
||||||
|
<feature>org.apache.uima.TokenAnnotation:end</feature>
|
||||||
|
<feature>org.apache.uima.TokenAnnotation:begin</feature>
|
||||||
|
</inputs>
|
||||||
|
<outputs>
|
||||||
|
<type>org.apache.uima.TokenAnnotation</type>
|
||||||
|
<feature>org.apache.uima.TokenAnnotation:posTag</feature>
|
||||||
|
<feature>org.apache.uima.TokenAnnotation:end</feature>
|
||||||
|
<feature>org.apache.uima.TokenAnnotation:begin</feature>
|
||||||
|
</outputs>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<externalResourceDependencies>
|
||||||
|
<externalResourceDependency>
|
||||||
|
<key>Model</key>
|
||||||
|
<description>HMM Tagger model file</description>
|
||||||
|
<interfaceName>org.apache.uima.examples.tagger.IModelResource</interfaceName>
|
||||||
|
<optional>false</optional>
|
||||||
|
</externalResourceDependency>
|
||||||
|
</externalResourceDependencies>
|
||||||
|
<resourceManagerConfiguration>
|
||||||
|
<externalResources>
|
||||||
|
<externalResource>
|
||||||
|
<name>ModelFile</name>
|
||||||
|
<description>HMM Tagger model file</description>
|
||||||
|
<fileResourceSpecifier>
|
||||||
|
<fileUrl>file:english/BrownModel.dat</fileUrl>
|
||||||
|
</fileResourceSpecifier>
|
||||||
|
<implementationName>org.apache.uima.examples.tagger.ModelResource</implementationName>
|
||||||
|
</externalResource>
|
||||||
|
</externalResources>
|
||||||
|
<externalResourceBindings>
|
||||||
|
<externalResourceBinding>
|
||||||
|
<key>Model</key>
|
||||||
|
<resourceName>ModelFile</resourceName>
|
||||||
|
</externalResourceBinding>
|
||||||
|
</externalResourceBindings>
|
||||||
|
</resourceManagerConfiguration>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,194 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>true</primitive>
|
||||||
|
<annotatorImplementationName>org.apache.uima.annotator.calais.OpenCalaisAnnotator</annotatorImplementationName>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>OpenCalaisAnnotator</name>
|
||||||
|
<description/>
|
||||||
|
<configurationParameters>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>allowDistribution</name>
|
||||||
|
<description/>
|
||||||
|
<type>Boolean</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>allowSearch</name>
|
||||||
|
<description/>
|
||||||
|
<type>Boolean</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>submitter</name>
|
||||||
|
<description/>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>licenseID</name>
|
||||||
|
<description/>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
</configurationParameters>
|
||||||
|
<configurationParameterSettings>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>allowDistribution</name>
|
||||||
|
<value>
|
||||||
|
<boolean>false</boolean>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>allowSearch</name>
|
||||||
|
<value>
|
||||||
|
<boolean>false</boolean>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>submitter</name>
|
||||||
|
<value>
|
||||||
|
<string/>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>licenseID</name>
|
||||||
|
<value>
|
||||||
|
<string>OC_LICENSE_ID</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
</configurationParameterSettings>
|
||||||
|
<typeSystemDescription>
|
||||||
|
<types>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.Person</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.Anniversary</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.City</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.Company</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.Continent</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.Country</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.Currency</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.EmailAddress</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.Facility</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.FaxNumber</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.Holiday</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.IndustryTerm</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.NaturalDisaster</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.NaturalFeature</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.Organization</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.PhoneNumber</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.ProviceOrState</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.Region</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.Technology</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.URL</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.calais.BaseType</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.calais.BaseType</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||||
|
<features>
|
||||||
|
<featureDescription>
|
||||||
|
<name>calaisType</name>
|
||||||
|
<description>OpenCalais type</description>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
</features>
|
||||||
|
</typeDescription>
|
||||||
|
</types>
|
||||||
|
</typeSystemDescription>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs/>
|
||||||
|
<outputs/>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,147 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>false</primitive>
|
||||||
|
<delegateAnalysisEngineSpecifiers>
|
||||||
|
<delegateAnalysisEngine key="TextKeywordExtractionAEDescriptor">
|
||||||
|
<import name="TextKeywordExtractionAEDescriptor"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="TextConceptTaggingAEDescriptor">
|
||||||
|
<import name="TextConceptTaggingAEDescriptor"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="OpenCalaisAnnotator">
|
||||||
|
<import name="OpenCalaisAnnotator"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="TextLanguageDetectionAEDescriptor">
|
||||||
|
<import name="TextLanguageDetectionAEDescriptor"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="TextCategorizationAEDescriptor">
|
||||||
|
<import name="TextCategorizationAEDescriptor"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="AggregateSentenceAE">
|
||||||
|
<import location="AggregateSentenceAE.xml"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="TextRankedEntityExtractionAEDescriptor">
|
||||||
|
<import name="TextRankedEntityExtractionAEDescriptor"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
</delegateAnalysisEngineSpecifiers>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>ExtServicesAE</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor/>
|
||||||
|
<configurationParameters searchStrategy="language_fallback">
|
||||||
|
<configurationParameter>
|
||||||
|
<name>oc_licenseID</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
<overrides>
|
||||||
|
<parameter>OpenCalaisAnnotator/licenseID</parameter>
|
||||||
|
</overrides>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>keyword_apikey</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
<overrides>
|
||||||
|
<parameter>TextKeywordExtractionAEDescriptor/apikey</parameter>
|
||||||
|
</overrides>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>concept_apikey</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
<overrides>
|
||||||
|
<parameter>TextConceptTaggingAEDescriptor/apikey</parameter>
|
||||||
|
</overrides>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>lang_apikey</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
<overrides>
|
||||||
|
<parameter>TextLanguageDetectionAEDescriptor/apikey</parameter>
|
||||||
|
</overrides>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>cat_apikey</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
<overrides>
|
||||||
|
<parameter>TextCategorizationAEDescriptor/apikey</parameter>
|
||||||
|
</overrides>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>entities_apikey</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
<overrides>
|
||||||
|
<parameter>TextRankedEntityExtractionAEDescriptor/apikey</parameter>
|
||||||
|
</overrides>
|
||||||
|
</configurationParameter>
|
||||||
|
</configurationParameters>
|
||||||
|
<configurationParameterSettings>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>oc_licenseID</name>
|
||||||
|
<value>
|
||||||
|
<string>licenseid</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>keyword_apikey</name>
|
||||||
|
<value>
|
||||||
|
<string>apikey</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>concept_apikey</name>
|
||||||
|
<value>
|
||||||
|
<string>apikey</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>lang_apikey</name>
|
||||||
|
<value>
|
||||||
|
<string>apikey</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>cat_apikey</name>
|
||||||
|
<value>
|
||||||
|
<string>apikey</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
</configurationParameterSettings>
|
||||||
|
<flowConstraints>
|
||||||
|
<fixedFlow>
|
||||||
|
<node>AggregateSentenceAE</node>
|
||||||
|
<node>OpenCalaisAnnotator</node>
|
||||||
|
<node>TextKeywordExtractionAEDescriptor</node>
|
||||||
|
<node>TextLanguageDetectionAEDescriptor</node>
|
||||||
|
<node>TextCategorizationAEDescriptor</node>
|
||||||
|
<node>TextConceptTaggingAEDescriptor</node>
|
||||||
|
<node>TextRankedEntityExtractionAEDescriptor</node>
|
||||||
|
</fixedFlow>
|
||||||
|
</flowConstraints>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs/>
|
||||||
|
<outputs/>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<resourceManagerConfiguration/>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,102 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
|
||||||
|
-->
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>true</primitive>
|
||||||
|
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextCategorizationAnnotator</annotatorImplementationName>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>TextCategorizationAEDescriptor</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor/>
|
||||||
|
<configurationParameters>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>apikey</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>outputMode</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>baseUrl</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
</configurationParameters>
|
||||||
|
<configurationParameterSettings>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>outputMode</name>
|
||||||
|
<value>
|
||||||
|
<string>xml</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>apikey</name>
|
||||||
|
<value>
|
||||||
|
<string>AA_API_KEY</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
</configurationParameterSettings>
|
||||||
|
<typeSystemDescription>
|
||||||
|
<types>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.categorization.Category</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>uima.cas.TOP</supertypeName>
|
||||||
|
<features>
|
||||||
|
<featureDescription>
|
||||||
|
<name>score</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>text</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
</features>
|
||||||
|
</typeDescription>
|
||||||
|
</types>
|
||||||
|
</typeSystemDescription>
|
||||||
|
<typePriorities/>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs/>
|
||||||
|
<outputs/>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<resourceManagerConfiguration/>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,196 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>true</primitive>
|
||||||
|
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextConceptTaggingAnnotator</annotatorImplementationName>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>TextConceptTaggingAEDescriptor</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor/>
|
||||||
|
<configurationParameters>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>apikey</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>outputMode</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>linkedData</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>showSourceText</name>
|
||||||
|
<type>Integer</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>maxRetrieve</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>url</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
</configurationParameters>
|
||||||
|
<configurationParameterSettings>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>apikey</name>
|
||||||
|
<value>
|
||||||
|
<string/>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>outputMode</name>
|
||||||
|
<value>
|
||||||
|
<string>xml</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>linkedData</name>
|
||||||
|
<value>
|
||||||
|
<string>1</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>showSourceText</name>
|
||||||
|
<value>
|
||||||
|
<integer>0</integer>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>maxRetrieve</name>
|
||||||
|
<value>
|
||||||
|
<string>8</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
</configurationParameterSettings>
|
||||||
|
<typeSystemDescription>
|
||||||
|
<types>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.concept.ConceptFS</name>
|
||||||
|
<description>a concept tag</description>
|
||||||
|
<supertypeName>uima.cas.TOP</supertypeName>
|
||||||
|
<features>
|
||||||
|
<featureDescription>
|
||||||
|
<name>text</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>relevance</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>website</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>geo</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>dbpedia</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>yago</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>opencyc</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>freebase</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>ciaFactbook</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>census</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>geonames</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>musicBrainz</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>crunchbase</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>semanticCrunchbase</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
</features>
|
||||||
|
</typeDescription>
|
||||||
|
</types>
|
||||||
|
</typeSystemDescription>
|
||||||
|
<typePriorities/>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs/>
|
||||||
|
<outputs/>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<resourceManagerConfiguration/>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,107 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>true</primitive>
|
||||||
|
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextKeywordExtractionAnnotator</annotatorImplementationName>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>TextKeywordExtractionAEDescriptor</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor/>
|
||||||
|
<configurationParameters>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>apikey</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>outputMode</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>baseUrl</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>url</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>maxRetrieve</name>
|
||||||
|
<type>Integer</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>showSourceText</name>
|
||||||
|
<type>Integer</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
</configurationParameters>
|
||||||
|
<configurationParameterSettings>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>outputMode</name>
|
||||||
|
<value>
|
||||||
|
<string>xml</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>apikey</name>
|
||||||
|
<value>
|
||||||
|
<string>04490000a72fe7ec5cb3497f14e77f338c86f2fe</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>maxRetrieve</name>
|
||||||
|
<value>
|
||||||
|
<integer>10</integer>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>showSourceText</name>
|
||||||
|
<value>
|
||||||
|
<integer>0</integer>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
</configurationParameterSettings>
|
||||||
|
<typeSystemDescription>
|
||||||
|
<types>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.keywords.KeywordFS</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>uima.cas.TOP</supertypeName>
|
||||||
|
<features>
|
||||||
|
<featureDescription>
|
||||||
|
<name>text</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
</features>
|
||||||
|
</typeDescription>
|
||||||
|
</types>
|
||||||
|
</typeSystemDescription>
|
||||||
|
<typePriorities/>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs/>
|
||||||
|
<outputs/>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<resourceManagerConfiguration/>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,107 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>true</primitive>
|
||||||
|
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextLanguageDetectionAnnotator</annotatorImplementationName>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>TextLanguageDetectionAEDescriptor</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor/>
|
||||||
|
<configurationParameters>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>apikey</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>outputMode</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>url</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
</configurationParameters>
|
||||||
|
<configurationParameterSettings>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>outputMode</name>
|
||||||
|
<value>
|
||||||
|
<string>xml</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>apikey</name>
|
||||||
|
<value>
|
||||||
|
<string>AA_API_KEY</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
</configurationParameterSettings>
|
||||||
|
<typeSystemDescription>
|
||||||
|
<types>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.language.LanguageFS</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>uima.cas.TOP</supertypeName>
|
||||||
|
<features>
|
||||||
|
<featureDescription>
|
||||||
|
<name>language</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>iso6391</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>iso6392</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>iso6393</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>ethnologue</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>nativeSpeakers</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>wikipedia</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
</features>
|
||||||
|
</typeDescription>
|
||||||
|
</types>
|
||||||
|
</typeSystemDescription>
|
||||||
|
<typePriorities/>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs/>
|
||||||
|
<outputs/>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<resourceManagerConfiguration/>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,403 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
|
||||||
|
-->
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>true</primitive>
|
||||||
|
<annotatorImplementationName>org.apache.uima.alchemy.annotator.TextRankedNamedEntityExtractionAnnotator</annotatorImplementationName>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>TextRankedEntityExtractionAEDescriptor</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor/>
|
||||||
|
<configurationParameters>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>apikey</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>outputMode</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>disambiguate</name>
|
||||||
|
<type>Integer</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>linkedData</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>showSourceText</name>
|
||||||
|
<type>Integer</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>true</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>baseUrl</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>url</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>coreference</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>quotations</name>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
</configurationParameters>
|
||||||
|
<configurationParameterSettings>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>apikey</name>
|
||||||
|
<value>
|
||||||
|
<string/>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>outputMode</name>
|
||||||
|
<value>
|
||||||
|
<string>xml</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>disambiguate</name>
|
||||||
|
<value>
|
||||||
|
<integer>1</integer>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>linkedData</name>
|
||||||
|
<value>
|
||||||
|
<string>1</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>coreference</name>
|
||||||
|
<value>
|
||||||
|
<string>1</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>showSourceText</name>
|
||||||
|
<value>
|
||||||
|
<integer>0</integer>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
<nameValuePair>
|
||||||
|
<name>quotations</name>
|
||||||
|
<value>
|
||||||
|
<string>1</string>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
</configurationParameterSettings>
|
||||||
|
<typeSystemDescription>
|
||||||
|
<imports>
|
||||||
|
<import location="baseAlchemyTypeSystemDescriptor.xml"/>
|
||||||
|
</imports>
|
||||||
|
<types>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Anniversary</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Automobile</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.City</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Company</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Continent</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Country</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.EntertainmentAward</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Facility</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.FieldTerminology</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.FinancialMarketIndex</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.GeographicFeature</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.HealthCondition</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Holiday</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Movie</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.MusicGroup</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.NaturalDisaster</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Organization</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Person</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.PrintMedia</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.RadioProgram</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.RadioStation</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Region</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Sport</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.StateOrCounty</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Technology</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.TelevisionShow</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.TelevisionStation</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.OperatingSystem</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.SportingEvent</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.Drug</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>org.apache.uima.alchemy.ts.entity.BaseEntity</supertypeName>
|
||||||
|
</typeDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.BaseEntity</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>uima.cas.TOP</supertypeName>
|
||||||
|
<features>
|
||||||
|
<featureDescription>
|
||||||
|
<name>text</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>count</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>relevance</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>disambiguation</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>subType</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>website</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>geo</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>dbpedia</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>yago</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>opencyc</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>umbel</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>freebase</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>ciaFactbook</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>census</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>geonames</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>musicBrainz</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>quotations</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.StringArray</rangeTypeName>
|
||||||
|
<multipleReferencesAllowed>true</multipleReferencesAllowed>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>occurrences</name>
|
||||||
|
<description>A list of annotations annotating this entity</description>
|
||||||
|
<rangeTypeName>uima.cas.FSList</rangeTypeName>
|
||||||
|
<elementType>uima.tcas.Annotation</elementType>
|
||||||
|
</featureDescription>
|
||||||
|
</features>
|
||||||
|
</typeDescription>
|
||||||
|
</types>
|
||||||
|
</typeSystemDescription>
|
||||||
|
<typePriorities/>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs/>
|
||||||
|
<outputs/>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<resourceManagerConfiguration/>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,115 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
***************************************************************
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
***************************************************************
|
||||||
|
-->
|
||||||
|
|
||||||
|
<analysisEngineDescription
|
||||||
|
xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>
|
||||||
|
org.apache.uima.java
|
||||||
|
</frameworkImplementation>
|
||||||
|
<primitive>true</primitive>
|
||||||
|
<annotatorImplementationName>
|
||||||
|
org.apache.uima.annotator.WhitespaceTokenizer
|
||||||
|
</annotatorImplementationName>
|
||||||
|
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>WhitespaceTokenizer</name>
|
||||||
|
<description>
|
||||||
|
creates token and sentence annotations for whitespace
|
||||||
|
separated languages
|
||||||
|
</description>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor>The Apache Software Foundation</vendor>
|
||||||
|
|
||||||
|
<configurationParameters>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>SofaNames</name>
|
||||||
|
<description>
|
||||||
|
The Sofa names the annotator should work on. If no
|
||||||
|
names are specified, the annotator works on the
|
||||||
|
default sofa.
|
||||||
|
</description>
|
||||||
|
<type>String</type>
|
||||||
|
<multiValued>true</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
</configurationParameter>
|
||||||
|
|
||||||
|
</configurationParameters>
|
||||||
|
|
||||||
|
<configurationParameterSettings>
|
||||||
|
<!--
|
||||||
|
<nameValuePair>
|
||||||
|
<name>SofaNames</name>
|
||||||
|
<value>
|
||||||
|
<array>
|
||||||
|
<string>sofaName</string>
|
||||||
|
</array>
|
||||||
|
</value>
|
||||||
|
</nameValuePair>
|
||||||
|
-->
|
||||||
|
</configurationParameterSettings>
|
||||||
|
|
||||||
|
<typeSystemDescription>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.TokenAnnotation</name>
|
||||||
|
<description>Single token annotation</description>
|
||||||
|
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||||
|
<features>
|
||||||
|
<featureDescription>
|
||||||
|
<name>tokenType</name>
|
||||||
|
<description>token type</description>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
</features>
|
||||||
|
</typeDescription>
|
||||||
|
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.SentenceAnnotation</name>
|
||||||
|
<description>sentence annotation</description>
|
||||||
|
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||||
|
<features>
|
||||||
|
|
||||||
|
</features>
|
||||||
|
</typeDescription>
|
||||||
|
</typeSystemDescription>
|
||||||
|
|
||||||
|
<fsIndexes />
|
||||||
|
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs />
|
||||||
|
<outputs>
|
||||||
|
<type>org.apache.uima.TokenAnnotation</type>
|
||||||
|
<feature>
|
||||||
|
org.apache.uima.TokenAnnotation:tokentype
|
||||||
|
</feature>
|
||||||
|
<type>org.apache.uima.SentenceAnnotation</type>
|
||||||
|
</outputs>
|
||||||
|
<languagesSupported>
|
||||||
|
<language>x-unspecified</language>
|
||||||
|
</languagesSupported>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
</analysisEngineDescription>
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
|
||||||
|
-->
|
||||||
|
<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<name>baseAlchemyTypeSystemDescriptor</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor/>
|
||||||
|
<types>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.uima.alchemy.ts.entity.AlchemyAnnotation</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||||
|
<features>
|
||||||
|
<featureDescription>
|
||||||
|
<name>alchemyType</name>
|
||||||
|
<description>alchemyAPI type</description>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
</features>
|
||||||
|
</typeDescription>
|
||||||
|
</types>
|
||||||
|
</typeSystemDescription>
|
|
@ -0,0 +1,33 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version
|
||||||
|
2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0 Unless required by
|
||||||
|
applicable law or agreed to in writing, software distributed under
|
||||||
|
the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
|
||||||
|
OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<uimaConfig>
|
||||||
|
<runtimeParameters>
|
||||||
|
<keyword_apikey>VALID_ALCHEMYAPI_KEY</keyword_apikey>
|
||||||
|
<concept_apikey>VALID_ALCHEMYAPI_KEY</concept_apikey>
|
||||||
|
<lang_apikey>VALID_ALCHEMYAPI_KEY</lang_apikey>
|
||||||
|
<cat_apikey>VALID_ALCHEMYAPI_KEY</cat_apikey>
|
||||||
|
<oc_licenseID>VALID_OPENCALAIS_KEY</oc_licenseID>
|
||||||
|
</runtimeParameters>
|
||||||
|
<analysisEngine>/org/apache/uima/desc/OverridingParamsExtServicesAE.xml</analysisEngine>
|
||||||
|
<analyzeFields merge="false">text,title</analyzeFields>
|
||||||
|
<fieldMapping>
|
||||||
|
<type name="org.apache.uima.jcas.tcas.Annotation">
|
||||||
|
<map feature="coveredText" field="tag"/>
|
||||||
|
</type>
|
||||||
|
</fieldMapping>
|
||||||
|
</uimaConfig>
|
|
@ -0,0 +1,9 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<fields>
|
||||||
|
<field name="language" type="string" indexed="true" stored="true" required="false"/>
|
||||||
|
<field name="concept" type="string" indexed="true" stored="true" multiValued="true" required="false"/>
|
||||||
|
<field name="keyword" type="string" indexed="true" stored="true" multiValued="true" required="false"/>
|
||||||
|
<field name="suggested_category" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||||
|
<field name="sentence" type="text" indexed="true" stored="true" multiValued="true" required="false" />
|
||||||
|
<dynamicField name="entity*" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
</fields>
|
|
@ -0,0 +1,137 @@
|
||||||
|
package org.apache.solr.uima.processor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLConnection;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.common.params.MultiMapSolrParams;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.common.params.UpdateParams;
|
||||||
|
import org.apache.solr.common.util.ContentStream;
|
||||||
|
import org.apache.solr.common.util.ContentStreamBase;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.handler.XmlUpdateRequestHandler;
|
||||||
|
import org.apache.solr.request.SolrQueryRequestBase;
|
||||||
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
|
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TestCase for {@link UIMAUpdateRequestProcessor}
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig.xml", "schema.xml", "solr-uima");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
clearIndex();
|
||||||
|
assertU(commit());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testProcessorConfiguration() {
|
||||||
|
SolrCore core = h.getCore();
|
||||||
|
UpdateRequestProcessorChain chained = core.getUpdateProcessingChain("uima");
|
||||||
|
assertNotNull(chained);
|
||||||
|
UIMAUpdateRequestProcessorFactory factory = (UIMAUpdateRequestProcessorFactory) chained
|
||||||
|
.getFactories()[0];
|
||||||
|
assertNotNull(factory);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testProcessing() throws Exception {
|
||||||
|
// this test requires an internet connection (e.g. opencalais api)
|
||||||
|
checkInternetConnection();
|
||||||
|
|
||||||
|
addDoc(adoc(
|
||||||
|
"id",
|
||||||
|
"2312312321312",
|
||||||
|
"text",
|
||||||
|
"SpellCheckComponent got improvement related to recent Lucene changes. \n "
|
||||||
|
+ "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
|
||||||
|
+ "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
|
||||||
|
+ " attached if you need it, but it is also committed to trunk and 3_x branch."
|
||||||
|
+ " Last Lucene European Conference has been held in Prague."));
|
||||||
|
assertU(commit());
|
||||||
|
assertQ(req("language:english"), "//*[@numFound='1']");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTwoUpdates() {
|
||||||
|
// this test requires an internet connection (e.g. opencalais api)
|
||||||
|
checkInternetConnection();
|
||||||
|
|
||||||
|
try {
|
||||||
|
addDoc(adoc("id", "1", "text", "The Apache Software Foundation is happy to announce "
|
||||||
|
+ "BarCampApache Sydney, Australia, the first ASF-backed event in the Southern "
|
||||||
|
+ "Hemisphere!"));
|
||||||
|
assertU(commit());
|
||||||
|
assertQ(req("language:english"), "//*[@numFound='1']");
|
||||||
|
|
||||||
|
addDoc(adoc("id", "2", "text", "Taking place 11th December 2010 at the University "
|
||||||
|
+ "of Sydney's Darlington Centre, the BarCampApache \"unconference\" will be"
|
||||||
|
+ " attendee-driven, facilitated by members of the Apache community and will "
|
||||||
|
+ "focus on the Apache..."));
|
||||||
|
assertU(commit());
|
||||||
|
assertQ(req("language:english"), "//*[@numFound='2']");
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
assumeNoException("Multiple updates on same instance didn't work", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addDoc(String doc) throws Exception {
|
||||||
|
Map<String, String[]> params = new HashMap<String, String[]>();
|
||||||
|
params.put(UpdateParams.UPDATE_PROCESSOR, new String[] { "uima" });
|
||||||
|
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
|
||||||
|
SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(), (SolrParams) mmparams) {
|
||||||
|
};
|
||||||
|
|
||||||
|
XmlUpdateRequestHandler handler = new XmlUpdateRequestHandler();
|
||||||
|
handler.init(null);
|
||||||
|
ArrayList<ContentStream> streams = new ArrayList<ContentStream>(2);
|
||||||
|
streams.add(new ContentStreamBase.StringStream(doc));
|
||||||
|
req.setContentStreams(streams);
|
||||||
|
handler.handleRequestBody(req, new SolrQueryResponse());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkInternetConnection() {
|
||||||
|
try {
|
||||||
|
URLConnection conn = new URL("http://www.apache.org/").openConnection();
|
||||||
|
conn.setConnectTimeout(5000);
|
||||||
|
conn.setReadTimeout(5000);
|
||||||
|
conn.connect();
|
||||||
|
} catch (Exception ex) {
|
||||||
|
assumeNoException("This test requires an internet connection", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------
|
||||||
|
# Use a protected word file to protect against the stemmer reducing two
|
||||||
|
# unrelated words to the same base word.
|
||||||
|
|
||||||
|
# Some non-words that normally won't be encountered,
|
||||||
|
# just to test that they won't be stemmed.
|
||||||
|
dontstems
|
||||||
|
zwhacky
|
||||||
|
|
|
@ -0,0 +1,679 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version
|
||||||
|
2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0 Unless required by
|
||||||
|
applicable law or agreed to in writing, software distributed under
|
||||||
|
the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
|
||||||
|
OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
This is the Solr schema file. This file should be named "schema.xml"
|
||||||
|
and should be in the conf directory under the solr home (i.e.
|
||||||
|
./solr/conf/schema.xml by default) or located where the classloader
|
||||||
|
for the Solr webapp can find it. This example schema is the
|
||||||
|
recommended starting point for users. It should be kept correct and
|
||||||
|
concise, usable out-of-the-box. For more information, on how to
|
||||||
|
customize this file, please see
|
||||||
|
http://wiki.apache.org/solr/SchemaXml PERFORMANCE NOTE: this schema
|
||||||
|
includes many optional features and should not be used for
|
||||||
|
benchmarking. To improve performance one could - set stored="false"
|
||||||
|
for all fields possible (esp large fields) when you only need to
|
||||||
|
search on the field but don't need to return the original value. -
|
||||||
|
set indexed="false" if you don't need to search on the field, but
|
||||||
|
only return the field as a result of searching on other indexed
|
||||||
|
fields. - remove all unneeded copyField statements - for best index
|
||||||
|
size and searching performance, set "index" to false for all general
|
||||||
|
text fields, use copyField to copy them to the catchall "text"
|
||||||
|
field, and use that for searching. - For maximum indexing
|
||||||
|
performance, use the StreamingUpdateSolrServer java client. -
|
||||||
|
Remember to run the JVM in server mode, and use a higher logging
|
||||||
|
level that avoids logging every request
|
||||||
|
-->
|
||||||
|
|
||||||
|
<schema name="sample" version="1.2">
|
||||||
|
<!--
|
||||||
|
attribute "name" is the name of this schema and is only used for
|
||||||
|
display purposes. Applications should change this to reflect the
|
||||||
|
nature of the search collection. version="1.2" is Solr's version
|
||||||
|
number for the schema syntax and semantics. It should not normally
|
||||||
|
be changed by applications. 1.0: multiValued attribute did not
|
||||||
|
exist, all fields are multiValued by nature 1.1: multiValued
|
||||||
|
attribute introduced, false by default 1.2: omitTermFreqAndPositions
|
||||||
|
attribute introduced, true by default except for text fields.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<types>
|
||||||
|
<!--
|
||||||
|
field type definitions. The "name" attribute is just a label to be
|
||||||
|
used by field definitions. The "class" attribute and any other
|
||||||
|
attributes determine the real behavior of the fieldType. Class
|
||||||
|
names starting with "solr" refer to java classes in the
|
||||||
|
org.apache.solr.analysis package.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
The StrField type is not analyzed, but indexed/stored verbatim. -
|
||||||
|
StrField and TextField support an optional compressThreshold which
|
||||||
|
limits compression (if enabled in the derived fields) to values
|
||||||
|
which exceed a certain size (in characters).
|
||||||
|
-->
|
||||||
|
<fieldType name="string" class="solr.StrField"
|
||||||
|
sortMissingLast="true" omitNorms="true" />
|
||||||
|
|
||||||
|
<!-- boolean type: "true" or "false" -->
|
||||||
|
<fieldType name="boolean" class="solr.BoolField"
|
||||||
|
sortMissingLast="true" omitNorms="true" />
|
||||||
|
<!--
|
||||||
|
Binary data type. The data should be sent/retrieved in as Base64
|
||||||
|
encoded Strings
|
||||||
|
-->
|
||||||
|
<fieldtype name="binary" class="solr.BinaryField" />
|
||||||
|
|
||||||
|
<!--
|
||||||
|
The optional sortMissingLast and sortMissingFirst attributes are
|
||||||
|
currently supported on types that are sorted internally as
|
||||||
|
strings. This includes
|
||||||
|
"string","boolean","sint","slong","sfloat","sdouble","pdate" - If
|
||||||
|
sortMissingLast="true", then a sort on this field will cause
|
||||||
|
documents without the field to come after documents with the
|
||||||
|
field, regardless of the requested sort order (asc or desc). - If
|
||||||
|
sortMissingFirst="true", then a sort on this field will cause
|
||||||
|
documents without the field to come before documents with the
|
||||||
|
field, regardless of the requested sort order. - If
|
||||||
|
sortMissingLast="false" and sortMissingFirst="false" (the
|
||||||
|
default), then default lucene sorting will be used which places
|
||||||
|
docs without the field first in an ascending sort and last in a
|
||||||
|
descending sort.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Default numeric field types. For faster range queries, consider
|
||||||
|
the tint/tfloat/tlong/tdouble types.
|
||||||
|
-->
|
||||||
|
<fieldType name="int" class="solr.TrieIntField"
|
||||||
|
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
|
||||||
|
<fieldType name="float" class="solr.TrieFloatField"
|
||||||
|
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
|
||||||
|
<fieldType name="long" class="solr.TrieLongField"
|
||||||
|
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
|
||||||
|
<fieldType name="double" class="solr.TrieDoubleField"
|
||||||
|
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Numeric field types that index each value at various levels of
|
||||||
|
precision to accelerate range queries when the number of values
|
||||||
|
between the range endpoints is large. See the javadoc for
|
||||||
|
NumericRangeQuery for internal implementation details. Smaller
|
||||||
|
precisionStep values (specified in bits) will lead to more tokens
|
||||||
|
indexed per value, slightly larger index size, and faster range
|
||||||
|
queries. A precisionStep of 0 disables indexing at different
|
||||||
|
precision levels.
|
||||||
|
-->
|
||||||
|
<fieldType name="tint" class="solr.TrieIntField"
|
||||||
|
precisionStep="8" omitNorms="true" positionIncrementGap="0" />
|
||||||
|
<fieldType name="tfloat" class="solr.TrieFloatField"
|
||||||
|
precisionStep="8" omitNorms="true" positionIncrementGap="0" />
|
||||||
|
<fieldType name="tlong" class="solr.TrieLongField"
|
||||||
|
precisionStep="8" omitNorms="true" positionIncrementGap="0" />
|
||||||
|
<fieldType name="tdouble" class="solr.TrieDoubleField"
|
||||||
|
precisionStep="8" omitNorms="true" positionIncrementGap="0" />
|
||||||
|
|
||||||
|
<!--
|
||||||
|
The format for this date field is of the form
|
||||||
|
1995-12-31T23:59:59Z, and is a more restricted form of the
|
||||||
|
canonical representation of dateTime
|
||||||
|
http://www.w3.org/TR/xmlschema-2/#dateTime The trailing "Z"
|
||||||
|
designates UTC time and is mandatory. Optional fractional seconds
|
||||||
|
are allowed: 1995-12-31T23:59:59.999Z All other components are
|
||||||
|
mandatory. Expressions can also be used to denote calculations
|
||||||
|
that should be performed relative to "NOW" to determine the value,
|
||||||
|
ie... NOW/HOUR ... Round to the start of the current hour NOW-1DAY
|
||||||
|
... Exactly 1 day prior to now NOW/DAY+6MONTHS+3DAYS ... 6 months
|
||||||
|
and 3 days in the future from the start of the current day Consult
|
||||||
|
the DateField javadocs for more information. Note: For faster
|
||||||
|
range queries, consider the tdate type
|
||||||
|
-->
|
||||||
|
<fieldType name="date" class="solr.TrieDateField"
|
||||||
|
omitNorms="true" precisionStep="0" positionIncrementGap="0" />
|
||||||
|
|
||||||
|
<!--
|
||||||
|
A Trie based date field for faster date range queries and date
|
||||||
|
faceting.
|
||||||
|
-->
|
||||||
|
<fieldType name="tdate" class="solr.TrieDateField"
|
||||||
|
omitNorms="true" precisionStep="6" positionIncrementGap="0" />
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Note: These should only be used for compatibility with existing
|
||||||
|
indexes (created with older Solr versions) or if
|
||||||
|
"sortMissingFirst" or "sortMissingLast" functionality is needed.
|
||||||
|
Use Trie based fields instead. Plain numeric field types that
|
||||||
|
store and index the text value verbatim (and hence don't support
|
||||||
|
range queries, since the lexicographic ordering isn't equal to the
|
||||||
|
numeric ordering)
|
||||||
|
-->
|
||||||
|
<fieldType name="pint" class="solr.IntField" omitNorms="true" />
|
||||||
|
<fieldType name="plong" class="solr.LongField" omitNorms="true" />
|
||||||
|
<fieldType name="pfloat" class="solr.FloatField"
|
||||||
|
omitNorms="true" />
|
||||||
|
<fieldType name="pdouble" class="solr.DoubleField"
|
||||||
|
omitNorms="true" />
|
||||||
|
<fieldType name="pdate" class="solr.DateField"
|
||||||
|
sortMissingLast="true" omitNorms="true" />
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Note: These should only be used for compatibility with existing
|
||||||
|
indexes (created with older Solr versions) or if
|
||||||
|
"sortMissingFirst" or "sortMissingLast" functionality is needed.
|
||||||
|
Use Trie based fields instead. Numeric field types that manipulate
|
||||||
|
the value into a string value that isn't human-readable in its
|
||||||
|
internal form, but with a lexicographic ordering the same as the
|
||||||
|
numeric ordering, so that range queries work correctly.
|
||||||
|
-->
|
||||||
|
<fieldType name="sint" class="solr.SortableIntField"
|
||||||
|
sortMissingLast="true" omitNorms="true" />
|
||||||
|
<fieldType name="slong" class="solr.SortableLongField"
|
||||||
|
sortMissingLast="true" omitNorms="true" />
|
||||||
|
<fieldType name="sfloat" class="solr.SortableFloatField"
|
||||||
|
sortMissingLast="true" omitNorms="true" />
|
||||||
|
<fieldType name="sdouble" class="solr.SortableDoubleField"
|
||||||
|
sortMissingLast="true" omitNorms="true" />
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
The "RandomSortField" is not used to store or search any data. You
|
||||||
|
can declare fields of this type it in your schema to generate
|
||||||
|
pseudo-random orderings of your docs for sorting purposes. The
|
||||||
|
ordering is generated based on the field name and the version of
|
||||||
|
the index, As long as the index version remains unchanged, and the
|
||||||
|
same field name is reused, the ordering of the docs will be
|
||||||
|
consistent. If you want different psuedo-random orderings of
|
||||||
|
documents, for the same version of the index, use a dynamicField
|
||||||
|
and change the name
|
||||||
|
-->
|
||||||
|
<fieldType name="random" class="solr.RandomSortField"
|
||||||
|
indexed="true" />
|
||||||
|
|
||||||
|
<!--
|
||||||
|
solr.TextField allows the specification of custom text analyzers
|
||||||
|
specified as a tokenizer and a list of token filters. Different
|
||||||
|
analyzers may be specified for indexing and querying. The optional
|
||||||
|
positionIncrementGap puts space between multiple fields of this
|
||||||
|
type on the same document, with the purpose of preventing false
|
||||||
|
phrase matching across fields. For more info on customizing your
|
||||||
|
analyzer chain, please see
|
||||||
|
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
One can also specify an existing Analyzer class that has a default
|
||||||
|
constructor via the class attribute on the analyzer element
|
||||||
|
<fieldType name="text_greek" class="solr.TextField"> <analyzer
|
||||||
|
class="org.apache.lucene.analysis.el.GreekAnalyzer"/> </fieldType>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
A text field that only splits on whitespace for exact matching of
|
||||||
|
words
|
||||||
|
-->
|
||||||
|
<fieldType name="text_ws" class="solr.TextField"
|
||||||
|
positionIncrementGap="100">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
A text field that uses WordDelimiterFilter to enable splitting and
|
||||||
|
matching of words on case-change, alpha numeric boundaries, and
|
||||||
|
non-alphanumeric chars, so that a query of "wifi" or "wi fi" could
|
||||||
|
match a document containing "Wi-Fi". Synonyms and stopwords are
|
||||||
|
customized by external files, and stemming is enabled.
|
||||||
|
-->
|
||||||
|
<fieldType name="text" class="solr.TextField"
|
||||||
|
positionIncrementGap="100">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
||||||
|
<!--
|
||||||
|
in this example, we will only use synonyms at query time
|
||||||
|
<filter class="solr.SynonymFilterFactory"
|
||||||
|
synonyms="index_synonyms.txt" ignoreCase="true"
|
||||||
|
expand="false"/>
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
Case insensitive stop word removal. add
|
||||||
|
enablePositionIncrements=true in both the index and query
|
||||||
|
analyzers to leave a 'gap' for more accurate phrase queries.
|
||||||
|
-->
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||||
|
words="stopwords.txt" enablePositionIncrements="true" />
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory"
|
||||||
|
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||||
|
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" />
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
|
||||||
|
ignoreCase="true" expand="true" />
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||||
|
words="stopwords.txt" enablePositionIncrements="true" />
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory"
|
||||||
|
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||||
|
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" />
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Less flexible matching, but less false matches. Probably not ideal
|
||||||
|
for product names, but may be good for SKUs. Can insert dashes in
|
||||||
|
the wrong place and still match.
|
||||||
|
-->
|
||||||
|
<fieldType name="textTight" class="solr.TextField"
|
||||||
|
positionIncrementGap="100">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
|
||||||
|
ignoreCase="true" expand="false" />
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||||
|
words="stopwords.txt" />
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory"
|
||||||
|
generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||||
|
catenateNumbers="1" catenateAll="0" />
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
|
||||||
|
<!--
|
||||||
|
this filter can remove any duplicate tokens that appear at the
|
||||||
|
same position - sometimes possible with WordDelimiterFilter in
|
||||||
|
conjuncton with stemming.
|
||||||
|
-->
|
||||||
|
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
A general unstemmed text field - good if one does not know the
|
||||||
|
language of the field
|
||||||
|
-->
|
||||||
|
<fieldType name="textgen" class="solr.TextField"
|
||||||
|
positionIncrementGap="100">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||||
|
words="stopwords.txt" enablePositionIncrements="true" />
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory"
|
||||||
|
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||||
|
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" />
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
|
||||||
|
ignoreCase="true" expand="true" />
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||||
|
words="stopwords.txt" enablePositionIncrements="true" />
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory"
|
||||||
|
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||||
|
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
A general unstemmed text field that indexes tokens normally and
|
||||||
|
also reversed (via ReversedWildcardFilterFactory), to enable more
|
||||||
|
efficient leading wildcard queries.
|
||||||
|
-->
|
||||||
|
<fieldType name="text_rev" class="solr.TextField"
|
||||||
|
positionIncrementGap="100">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||||
|
words="stopwords.txt" enablePositionIncrements="true" />
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory"
|
||||||
|
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||||
|
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" />
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
<filter class="solr.ReversedWildcardFilterFactory"
|
||||||
|
withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2"
|
||||||
|
maxFractionAsterisk="0.33" />
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
|
||||||
|
ignoreCase="true" expand="true" />
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||||
|
words="stopwords.txt" enablePositionIncrements="true" />
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory"
|
||||||
|
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||||
|
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- charFilter + WhitespaceTokenizer -->
|
||||||
|
<!--
|
||||||
|
<fieldType name="textCharNorm" class="solr.TextField"
|
||||||
|
positionIncrementGap="100" > <analyzer> <charFilter
|
||||||
|
class="solr.MappingCharFilterFactory"
|
||||||
|
mapping="mapping-ISOLatin1Accent.txt"/> <tokenizer
|
||||||
|
class="solr.WhitespaceTokenizerFactory"/> </analyzer> </fieldType>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
This is an example of using the KeywordTokenizer along With
|
||||||
|
various TokenFilterFactories to produce a sortable field that does
|
||||||
|
not include some properties of the source text
|
||||||
|
-->
|
||||||
|
<fieldType name="alphaOnlySort" class="solr.TextField"
|
||||||
|
sortMissingLast="true" omitNorms="true">
|
||||||
|
<analyzer>
|
||||||
|
<!--
|
||||||
|
KeywordTokenizer does no actual tokenizing, so the entire
|
||||||
|
input string is preserved as a single token
|
||||||
|
-->
|
||||||
|
<tokenizer class="solr.KeywordTokenizerFactory" />
|
||||||
|
<!--
|
||||||
|
The LowerCase TokenFilter does what you expect, which can be
|
||||||
|
when you want your sorting to be case insensitive
|
||||||
|
-->
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
<!-- The TrimFilter removes any leading or trailing whitespace -->
|
||||||
|
<filter class="solr.TrimFilterFactory" />
|
||||||
|
<!--
|
||||||
|
The PatternReplaceFilter gives you the flexibility to use Java
|
||||||
|
Regular expression to replace any sequence of characters
|
||||||
|
matching a pattern with an arbitrary replacement string, which
|
||||||
|
may include back references to portions of the original string
|
||||||
|
matched by the pattern. See the Java Regular Expression
|
||||||
|
documentation for more information on pattern and replacement
|
||||||
|
string syntax.
|
||||||
|
|
||||||
|
http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
|
||||||
|
-->
|
||||||
|
<filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])"
|
||||||
|
replacement="" replace="all" />
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<fieldtype name="phonetic" stored="false" indexed="true"
|
||||||
|
class="solr.TextField">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.StandardTokenizerFactory" />
|
||||||
|
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false" />
|
||||||
|
</analyzer>
|
||||||
|
</fieldtype>
|
||||||
|
|
||||||
|
<fieldtype name="payloads" stored="false" indexed="true"
|
||||||
|
class="solr.TextField">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
||||||
|
<!--
|
||||||
|
The DelimitedPayloadTokenFilter can put payloads on tokens...
|
||||||
|
for example, a token of "foo|1.4" would be indexed as "foo"
|
||||||
|
with a payload of 1.4f Attributes of the
|
||||||
|
DelimitedPayloadTokenFilterFactory : "delimiter" - a one
|
||||||
|
character delimiter. Default is | (pipe) "encoder" - how to
|
||||||
|
encode the following value into a playload float ->
|
||||||
|
org.apache.lucene.analysis.payloads.FloatEncoder, integer ->
|
||||||
|
o.a.l.a.p.IntegerEncoder identity -> o.a.l.a.p.IdentityEncoder
|
||||||
|
Fully Qualified class name implementing PayloadEncoder,
|
||||||
|
Encoder must have a no arg constructor.
|
||||||
|
-->
|
||||||
|
<filter class="solr.DelimitedPayloadTokenFilterFactory"
|
||||||
|
encoder="float" />
|
||||||
|
</analyzer>
|
||||||
|
</fieldtype>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
lowercases the entire field value, keeping it as a single token.
|
||||||
|
-->
|
||||||
|
<fieldType name="lowercase" class="solr.TextField"
|
||||||
|
positionIncrementGap="100">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.KeywordTokenizerFactory" />
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
since fields of this type are by default not stored or indexed,
|
||||||
|
any data added to them will be ignored outright.
|
||||||
|
-->
|
||||||
|
<fieldtype name="ignored" stored="false" indexed="false"
|
||||||
|
multiValued="true" class="solr.StrField" />
|
||||||
|
|
||||||
|
</types>
|
||||||
|
|
||||||
|
|
||||||
|
<fields>
|
||||||
|
<!--
|
||||||
|
Valid attributes for fields: name: mandatory - the name for the
|
||||||
|
field type: mandatory - the name of a previously defined type from
|
||||||
|
the <types> section indexed: true if this field should be indexed
|
||||||
|
(searchable or sortable) stored: true if this field should be
|
||||||
|
retrievable compressed: [false] if this field should be stored
|
||||||
|
using gzip compression (this will only apply if the field type is
|
||||||
|
compressable; among the standard field types, only TextField and
|
||||||
|
StrField are) multiValued: true if this field may contain multiple
|
||||||
|
values per document omitNorms: (expert) set to true to omit the
|
||||||
|
norms associated with this field (this disables length
|
||||||
|
normalization and index-time boosting for the field, and saves
|
||||||
|
some memory). Only full-text fields or fields that need an
|
||||||
|
index-time boost need norms. termVectors: [false] set to true to
|
||||||
|
store the term vector for a given field. When using MoreLikeThis,
|
||||||
|
fields used for similarity should be stored for best performance.
|
||||||
|
termPositions: Store position information with the term vector.
|
||||||
|
This will increase storage costs. termOffsets: Store offset
|
||||||
|
information with the term vector. This will increase storage
|
||||||
|
costs. default: a value that should be used if no value is
|
||||||
|
specified when adding a document.
|
||||||
|
-->
|
||||||
|
<field name="id" type="string" indexed="true" stored="true"
|
||||||
|
required="true" />
|
||||||
|
<field name="sku" type="textTight" indexed="true" stored="true"
|
||||||
|
omitNorms="true" />
|
||||||
|
<field name="name" type="textgen" indexed="true" stored="true" />
|
||||||
|
<field name="alphaNameSort" type="alphaOnlySort" indexed="true"
|
||||||
|
stored="false" />
|
||||||
|
<field name="manu" type="textgen" indexed="true" stored="true"
|
||||||
|
omitNorms="true" />
|
||||||
|
<field name="cat" type="text_ws" indexed="true" stored="true"
|
||||||
|
multiValued="true" omitNorms="true" />
|
||||||
|
<field name="features" type="text" indexed="true" stored="true"
|
||||||
|
multiValued="true" />
|
||||||
|
<field name="includes" type="text" indexed="true" stored="true"
|
||||||
|
termVectors="true" termPositions="true" termOffsets="true" />
|
||||||
|
|
||||||
|
<field name="weight" type="float" indexed="true" stored="true" />
|
||||||
|
<field name="price" type="float" indexed="true" stored="true" />
|
||||||
|
<field name="popularity" type="int" indexed="true" stored="true" />
|
||||||
|
<field name="inStock" type="boolean" indexed="true" stored="true" />
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Common metadata fields, named specifically to match up with
|
||||||
|
SolrCell metadata when parsing rich documents such as Word, PDF.
|
||||||
|
Some fields are multiValued only because Tika currently may return
|
||||||
|
multiple values for them.
|
||||||
|
-->
|
||||||
|
<field name="title" type="text" indexed="true" stored="true"
|
||||||
|
multiValued="true" />
|
||||||
|
<field name="subject" type="text" indexed="true" stored="true" />
|
||||||
|
<field name="description" type="text" indexed="true" stored="true" />
|
||||||
|
<field name="comments" type="text" indexed="true" stored="true" />
|
||||||
|
<field name="author" type="textgen" indexed="true" stored="true" />
|
||||||
|
<field name="keywords" type="textgen" indexed="true" stored="true" />
|
||||||
|
<field name="category" type="textgen" indexed="true" stored="true" />
|
||||||
|
<field name="content_type" type="string" indexed="true"
|
||||||
|
stored="true" multiValued="true" />
|
||||||
|
<field name="last_modified" type="date" indexed="true" stored="true" />
|
||||||
|
<field name="links" type="string" indexed="true" stored="true"
|
||||||
|
multiValued="true" />
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
catchall field, containing all other searchable text fields
|
||||||
|
(implemented via copyField further on in this schema
|
||||||
|
-->
|
||||||
|
<field name="text" type="text" indexed="true" stored="false"
|
||||||
|
multiValued="true" />
|
||||||
|
|
||||||
|
<!--
|
||||||
|
catchall text field that indexes tokens both normally and in
|
||||||
|
reverse for efficient leading wildcard queries.
|
||||||
|
-->
|
||||||
|
<field name="text_rev" type="text_rev" indexed="true" stored="false"
|
||||||
|
multiValued="true" />
|
||||||
|
|
||||||
|
<!--
|
||||||
|
non-tokenized version of manufacturer to make it easier to sort or
|
||||||
|
group results by manufacturer. copied from "manu" via copyField
|
||||||
|
-->
|
||||||
|
<field name="manu_exact" type="string" indexed="true" stored="false" />
|
||||||
|
|
||||||
|
<field name="payloads" type="payloads" indexed="true" stored="true" />
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Uncommenting the following will create a "timestamp" field using a
|
||||||
|
default value of "NOW" to indicate when each document was indexed.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<field name="timestamp" type="date" indexed="true" stored="true"
|
||||||
|
default="NOW" multiValued="false"/>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<field name="language" type="string" indexed="true" stored="true" required="false"/>
|
||||||
|
<field name="concept" type="string" indexed="true" stored="true" multiValued="true" required="false"/>
|
||||||
|
<field name="keyword" type="string" indexed="true" stored="true" multiValued="true" required="false"/>
|
||||||
|
<field name="suggested_category" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||||
|
<field name="sentence" type="text" indexed="true" stored="true" multiValued="true" required="false" />
|
||||||
|
<dynamicField name="entity*" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Dynamic field definitions. If a field name is not found,
|
||||||
|
dynamicFields will be used if the name matches any of the
|
||||||
|
patterns. RESTRICTION: the glob-like pattern in the name attribute
|
||||||
|
must have a "*" only at the start or the end. EXAMPLE: name="*_i"
|
||||||
|
will match any field ending in _i (like myid_i, z_i) Longer
|
||||||
|
patterns will be matched first. if equal size patterns both match,
|
||||||
|
the first appearing in the schema will be used. <dynamicField
|
||||||
|
name="*_i" type="int" indexed="true" stored="true"/> <dynamicField
|
||||||
|
name="*_s" type="string" indexed="true" stored="true"/>
|
||||||
|
<dynamicField name="*_l" type="long" indexed="true"
|
||||||
|
stored="true"/> <dynamicField name="*_t" type="text"
|
||||||
|
indexed="true" stored="true"/> <dynamicField name="*_b"
|
||||||
|
type="boolean" indexed="true" stored="true"/> <dynamicField
|
||||||
|
name="*_f" type="float" indexed="true" stored="true"/>
|
||||||
|
<dynamicField name="*_d" type="double" indexed="true"
|
||||||
|
stored="true"/> <dynamicField name="*_dt" type="date"
|
||||||
|
indexed="true" stored="true"/> <dynamicField name="*_ti"
|
||||||
|
type="tint" indexed="true" stored="true"/> <dynamicField
|
||||||
|
name="*_tl" type="tlong" indexed="true" stored="true"/>
|
||||||
|
<dynamicField name="*_tf" type="tfloat" indexed="true"
|
||||||
|
stored="true"/> <dynamicField name="*_td" type="tdouble"
|
||||||
|
indexed="true" stored="true"/> <dynamicField name="*_tdt"
|
||||||
|
type="tdate" indexed="true" stored="true"/> <dynamicField
|
||||||
|
name="*_pi" type="pint" indexed="true" stored="true"/>
|
||||||
|
|
||||||
|
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
|
||||||
|
<dynamicField name="attr_*" type="textgen" indexed="true"
|
||||||
|
stored="true" multiValued="true"/> <dynamicField name="random_*"
|
||||||
|
type="random" />
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
uncomment the following to ignore any fields that don't already
|
||||||
|
match an existing field name or dynamic field, rather than
|
||||||
|
reporting them as an error. alternately, change the type="ignored"
|
||||||
|
to some other type e.g. "text" if you want unknown fields indexed
|
||||||
|
and/or stored by default
|
||||||
|
-->
|
||||||
|
<!--dynamicField name="*" type="ignored" multiValued="true" /-->
|
||||||
|
|
||||||
|
</fields>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Field to use to determine and enforce document uniqueness. Unless
|
||||||
|
this field is marked with required="false", it will be a required
|
||||||
|
field
|
||||||
|
-->
|
||||||
|
<uniqueKey>id</uniqueKey>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
field for the QueryParser to use when an explicit fieldname is
|
||||||
|
absent
|
||||||
|
-->
|
||||||
|
<defaultSearchField>text</defaultSearchField>
|
||||||
|
|
||||||
|
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
||||||
|
<solrQueryParser defaultOperator="OR" />
|
||||||
|
|
||||||
|
<!--
|
||||||
|
copyField commands copy one field to another at the time a document
|
||||||
|
is added to the index. It's used either to index the same field
|
||||||
|
differently, or to add multiple fields to the same field for
|
||||||
|
easier/faster searching.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<copyField source="cat" dest="text" />
|
||||||
|
<copyField source="name" dest="text" />
|
||||||
|
<copyField source="manu" dest="text" />
|
||||||
|
<copyField source="features" dest="text" />
|
||||||
|
<copyField source="includes" dest="text" />
|
||||||
|
<copyField source="manu" dest="manu_exact" />
|
||||||
|
|
||||||
|
|
||||||
|
<!--copyField source="Titolo" dest="text"/-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Above, multiple source fields are copied to the [text] field.
|
||||||
|
Another way to map multiple source fields to the same destination
|
||||||
|
field is to use the dynamic field syntax. copyField also supports a
|
||||||
|
maxChars to copy setting.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
copy name to alphaNameSort, a field designed for sorting by name
|
||||||
|
-->
|
||||||
|
<!-- <copyField source="name" dest="alphaNameSort"/> -->
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Similarity is the scoring routine for each document vs. a query. A
|
||||||
|
custom similarity may be specified here, but the default is fine for
|
||||||
|
most applications.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<similarity class="org.apache.lucene.search.DefaultSimilarity"/>
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
... OR ... Specify a SimilarityFactory class name implementation
|
||||||
|
allowing parameters to be used.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<similarity class="com.example.solr.CustomSimilarityFactory"> <str
|
||||||
|
name="paramkey">param value</str> </similarity>
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
|
</schema>
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,2 @@
|
||||||
|
pizza
|
||||||
|
history
|
|
@ -0,0 +1,58 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------
|
||||||
|
# a couple of test stopwords to test that the words are really being
|
||||||
|
# configured from this file:
|
||||||
|
stopworda
|
||||||
|
stopwordb
|
||||||
|
|
||||||
|
#Standard english stop words taken from Lucene's StopAnalyzer
|
||||||
|
a
|
||||||
|
an
|
||||||
|
and
|
||||||
|
are
|
||||||
|
as
|
||||||
|
at
|
||||||
|
be
|
||||||
|
but
|
||||||
|
by
|
||||||
|
for
|
||||||
|
if
|
||||||
|
in
|
||||||
|
into
|
||||||
|
is
|
||||||
|
it
|
||||||
|
no
|
||||||
|
not
|
||||||
|
of
|
||||||
|
on
|
||||||
|
or
|
||||||
|
s
|
||||||
|
such
|
||||||
|
t
|
||||||
|
that
|
||||||
|
the
|
||||||
|
their
|
||||||
|
then
|
||||||
|
there
|
||||||
|
these
|
||||||
|
they
|
||||||
|
this
|
||||||
|
to
|
||||||
|
was
|
||||||
|
will
|
||||||
|
with
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------
|
||||||
|
#some test synonym mappings unlikely to appear in real input text
|
||||||
|
aaa => aaaa
|
||||||
|
bbb => bbbb1 bbbb2
|
||||||
|
ccc => cccc1,cccc2
|
||||||
|
a\=>a => b\=>b
|
||||||
|
a\,a => b\,b
|
||||||
|
fooaaa,baraaa,bazaaa
|
||||||
|
|
||||||
|
# Some synonym groups specific to this example
|
||||||
|
GB,gib,gigabyte,gigabytes
|
||||||
|
MB,mib,megabyte,megabytes
|
||||||
|
Television, Televisions, TV, TVs
|
||||||
|
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||||
|
#after us won't split it into two words.
|
||||||
|
|
||||||
|
# Synonym mappings can be used for spelling correction too
|
||||||
|
pixima => pixma
|
||||||
|
|
Loading…
Reference in New Issue