mirror of https://github.com/apache/lucene.git
[LUCENE-3731] - Creating the analysis-uima module for UIMA based tokenizers/analyzers
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1244236 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
56f12201b7
commit
d66d97790b
|
@ -19,6 +19,7 @@
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/phonetic/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/analysis/phonetic/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/smartcn/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/analysis/smartcn/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
|
||||||
|
<buildFile url="file://$PROJECT_DIR$/modules/analysis/uima/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/facet/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/facet/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
<module filepath="$PROJECT_DIR$/modules/analysis/phonetic/phonetic.iml" />
|
<module filepath="$PROJECT_DIR$/modules/analysis/phonetic/phonetic.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/modules/analysis/smartcn/smartcn.iml" />
|
<module filepath="$PROJECT_DIR$/modules/analysis/smartcn/smartcn.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
|
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
|
||||||
|
<module filepath="$PROJECT_DIR$/modules/analysis/uima/analysis-uima.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
|
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/modules/facet/facet.iml" />
|
<module filepath="$PROJECT_DIR$/modules/facet/facet.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
|
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
|
||||||
|
|
|
@ -186,6 +186,13 @@
|
||||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||||
</configuration>
|
</configuration>
|
||||||
|
<configuration default="false" name="uima analysis module" type="JUnit" factoryName="JUnit">
|
||||||
|
<module name="uima" />
|
||||||
|
<option name="TEST_OBJECT" value="package" />
|
||||||
|
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/analysis/build/uima" />
|
||||||
|
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||||
|
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||||
|
</configuration>
|
||||||
<configuration default="false" name="suggest module" type="JUnit" factoryName="JUnit">
|
<configuration default="false" name="suggest module" type="JUnit" factoryName="JUnit">
|
||||||
<module name="suggest" />
|
<module name="suggest" />
|
||||||
<option name="TEST_OBJECT" value="package" />
|
<option name="TEST_OBJECT" value="package" />
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
</library>
|
</library>
|
||||||
</orderEntry>
|
</orderEntry>
|
||||||
<orderEntry type="module" module-name="solr" />
|
<orderEntry type="module" module-name="solr" />
|
||||||
<orderEntry type="module" module-name="lucene" scope="TEST" />
|
<orderEntry type="module" module-name="lucene" />
|
||||||
|
<orderEntry type="module" module-name="analysis-uima" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
||||||
|
|
|
@ -38,6 +38,7 @@
|
||||||
<module>phonetic</module>
|
<module>phonetic</module>
|
||||||
<module>smartcn</module>
|
<module>smartcn</module>
|
||||||
<module>stempel</module>
|
<module>stempel</module>
|
||||||
|
<module>uima</module>
|
||||||
</modules>
|
</modules>
|
||||||
<build>
|
<build>
|
||||||
<directory>build/lucene-analysis-modules-aggregator</directory>
|
<directory>build/lucene-analysis-modules-aggregator</directory>
|
||||||
|
|
|
@ -162,6 +162,17 @@
|
||||||
<property name="analyzers-kuromoji.uptodate" value="true"/>
|
<property name="analyzers-kuromoji.uptodate" value="true"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<property name="analyzers-uima.jar" value="${common.dir}/../modules/analysis/build/uima/lucene-analyzers-uima-${version}.jar"/>
|
||||||
|
<target name="check-analyzers-uima-uptodate" unless="analyzers-uima.uptodate">
|
||||||
|
<module-uptodate name="analysis/uima" jarfile="${analyzers-uima.jar}" property="analyzers-uima.uptodate"/>
|
||||||
|
</target>
|
||||||
|
<target name="jar-analyzers-uima" unless="analyzers-uima.uptodate" depends="check-analyzers-uima-uptodate">
|
||||||
|
<ant dir="${common.dir}/../modules/analysis/uima" target="jar-core" inheritAll="false">
|
||||||
|
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||||
|
</ant>
|
||||||
|
<property name="analyzers-uima.uptodate" value="true"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
<property name="grouping.jar" value="${common.dir}/../modules/grouping/build/lucene-grouping-${version}.jar"/>
|
<property name="grouping.jar" value="${common.dir}/../modules/grouping/build/lucene-grouping-${version}.jar"/>
|
||||||
<target name="check-grouping-uptodate" unless="grouping.uptodate">
|
<target name="check-grouping-uptodate" unless="grouping.uptodate">
|
||||||
<module-uptodate name="grouping" jarfile="${grouping.jar}" property="grouping.uptodate"/>
|
<module-uptodate name="grouping" jarfile="${grouping.jar}" property="grouping.uptodate"/>
|
||||||
|
|
|
@ -41,6 +41,10 @@ lucene-analyzers-stempel-XX.jar
|
||||||
An add-on analysis library that contains a universal algorithmic stemmer,
|
An add-on analysis library that contains a universal algorithmic stemmer,
|
||||||
including tables for the Polish language.
|
including tables for the Polish language.
|
||||||
|
|
||||||
|
lucene-analyzers-uima-XX.jar
|
||||||
|
An add-on analysis library that contains tokenizers/analyzers using
|
||||||
|
Apache UIMA extracted annotations to identify tokens/types/etc.
|
||||||
|
|
||||||
common/src/java
|
common/src/java
|
||||||
icu/src/java
|
icu/src/java
|
||||||
kuromoji/src/java
|
kuromoji/src/java
|
||||||
|
@ -48,6 +52,7 @@ morfologik/src/java
|
||||||
phonetic/src/java
|
phonetic/src/java
|
||||||
smartcn/src/java
|
smartcn/src/java
|
||||||
stempel/src/java
|
stempel/src/java
|
||||||
|
uima/src/java
|
||||||
The source code for the libraries.
|
The source code for the libraries.
|
||||||
|
|
||||||
common/src/test
|
common/src/test
|
||||||
|
@ -57,4 +62,5 @@ morfologik/src/test
|
||||||
phonetic/src/test
|
phonetic/src/test
|
||||||
smartcn/src/test
|
smartcn/src/test
|
||||||
stempel/src/test
|
stempel/src/test
|
||||||
|
uima/src/test
|
||||||
Unit tests for the libraries.
|
Unit tests for the libraries.
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
- morfologik: Morfologik Stemmer
|
- morfologik: Morfologik Stemmer
|
||||||
- smartcn: Smart Analyzer for Simplified Chinese Text
|
- smartcn: Smart Analyzer for Simplified Chinese Text
|
||||||
- stempel: Algorithmic Stemmer for Polish
|
- stempel: Algorithmic Stemmer for Polish
|
||||||
|
- uima: UIMA Analysis module
|
||||||
</description>
|
</description>
|
||||||
|
|
||||||
<target name="common">
|
<target name="common">
|
||||||
|
@ -57,8 +58,12 @@
|
||||||
<ant dir="stempel" />
|
<ant dir="stempel" />
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<target name="uima">
|
||||||
|
<ant dir="uima" />
|
||||||
|
</target>
|
||||||
|
|
||||||
<target name="default" depends="compile"/>
|
<target name="default" depends="compile"/>
|
||||||
<target name="compile" depends="common,icu,kuromoji,morfologik,phonetic,smartcn,stempel" />
|
<target name="compile" depends="common,icu,kuromoji,morfologik,phonetic,smartcn,stempel,uima" />
|
||||||
|
|
||||||
<target name="clean">
|
<target name="clean">
|
||||||
<ant dir="common" target="clean" />
|
<ant dir="common" target="clean" />
|
||||||
|
@ -68,6 +73,7 @@
|
||||||
<ant dir="phonetic" target="clean" />
|
<ant dir="phonetic" target="clean" />
|
||||||
<ant dir="smartcn" target="clean" />
|
<ant dir="smartcn" target="clean" />
|
||||||
<ant dir="stempel" target="clean" />
|
<ant dir="stempel" target="clean" />
|
||||||
|
<ant dir="uima" target="clean" />
|
||||||
</target>
|
</target>
|
||||||
<target name="validate">
|
<target name="validate">
|
||||||
<ant dir="common" target="validate" />
|
<ant dir="common" target="validate" />
|
||||||
|
@ -77,6 +83,7 @@
|
||||||
<ant dir="phonetic" target="validate" />
|
<ant dir="phonetic" target="validate" />
|
||||||
<ant dir="smartcn" target="validate" />
|
<ant dir="smartcn" target="validate" />
|
||||||
<ant dir="stempel" target="validate" />
|
<ant dir="stempel" target="validate" />
|
||||||
|
<ant dir="uima" target="validate" />
|
||||||
</target>
|
</target>
|
||||||
<target name="compile-core">
|
<target name="compile-core">
|
||||||
<ant dir="common" target="compile-core" />
|
<ant dir="common" target="compile-core" />
|
||||||
|
@ -86,6 +93,7 @@
|
||||||
<ant dir="phonetic" target="compile-core" />
|
<ant dir="phonetic" target="compile-core" />
|
||||||
<ant dir="smartcn" target="compile-core" />
|
<ant dir="smartcn" target="compile-core" />
|
||||||
<ant dir="stempel" target="compile-core" />
|
<ant dir="stempel" target="compile-core" />
|
||||||
|
<ant dir="uima" target="compile-core" />
|
||||||
</target>
|
</target>
|
||||||
<target name="compile-test">
|
<target name="compile-test">
|
||||||
<ant dir="common" target="compile-test" />
|
<ant dir="common" target="compile-test" />
|
||||||
|
@ -95,6 +103,7 @@
|
||||||
<ant dir="phonetic" target="compile-test" />
|
<ant dir="phonetic" target="compile-test" />
|
||||||
<ant dir="smartcn" target="compile-test" />
|
<ant dir="smartcn" target="compile-test" />
|
||||||
<ant dir="stempel" target="compile-test" />
|
<ant dir="stempel" target="compile-test" />
|
||||||
|
<ant dir="uima" target="compile-test" />
|
||||||
</target>
|
</target>
|
||||||
<target name="test">
|
<target name="test">
|
||||||
<ant dir="common" target="test" />
|
<ant dir="common" target="test" />
|
||||||
|
@ -104,6 +113,7 @@
|
||||||
<ant dir="phonetic" target="test" />
|
<ant dir="phonetic" target="test" />
|
||||||
<ant dir="smartcn" target="test" />
|
<ant dir="smartcn" target="test" />
|
||||||
<ant dir="stempel" target="test" />
|
<ant dir="stempel" target="test" />
|
||||||
|
<ant dir="uima" target="test" />
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="build-artifacts-and-tests" depends="default,compile-test" />
|
<target name="build-artifacts-and-tests" depends="default,compile-test" />
|
||||||
|
@ -116,6 +126,7 @@
|
||||||
<ant dir="phonetic" target="dist-maven" />
|
<ant dir="phonetic" target="dist-maven" />
|
||||||
<ant dir="smartcn" target="dist-maven" />
|
<ant dir="smartcn" target="dist-maven" />
|
||||||
<ant dir="stempel" target="dist-maven" />
|
<ant dir="stempel" target="dist-maven" />
|
||||||
|
<ant dir="uima" target="dist-maven" />
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="javadocs">
|
<target name="javadocs">
|
||||||
|
@ -126,6 +137,7 @@
|
||||||
<ant dir="phonetic" target="javadocs" />
|
<ant dir="phonetic" target="javadocs" />
|
||||||
<ant dir="smartcn" target="javadocs" />
|
<ant dir="smartcn" target="javadocs" />
|
||||||
<ant dir="stempel" target="javadocs" />
|
<ant dir="stempel" target="javadocs" />
|
||||||
|
<ant dir="uima" target="javadocs" />
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="javadocs-index.html">
|
<target name="javadocs-index.html">
|
||||||
|
@ -136,6 +148,7 @@
|
||||||
<ant dir="phonetic" target="javadocs-index.html" />
|
<ant dir="phonetic" target="javadocs-index.html" />
|
||||||
<ant dir="smartcn" target="javadocs-index.html" />
|
<ant dir="smartcn" target="javadocs-index.html" />
|
||||||
<ant dir="stempel" target="javadocs-index.html" />
|
<ant dir="stempel" target="javadocs-index.html" />
|
||||||
|
<ant dir="uima" target="javadocs-index.html" />
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -0,0 +1,46 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<project name="analyzers-uima" default="default">
|
||||||
|
|
||||||
|
<description>
|
||||||
|
UIMA Analysis module
|
||||||
|
</description>
|
||||||
|
|
||||||
|
<property name="build.dir" location="../build/uima" />
|
||||||
|
<property name="dist.dir" location="../dist/uima" />
|
||||||
|
<property name="tests.userdir" value="src/test-files"/>
|
||||||
|
<property name="tests.threadspercpu" value="0" />
|
||||||
|
|
||||||
|
<path id="additional.dependencies">
|
||||||
|
<fileset dir="lib" includes="*.jar"/>
|
||||||
|
</path>
|
||||||
|
|
||||||
|
<pathconvert property="project.classpath" targetos="unix" refid="additional.dependencies" />
|
||||||
|
|
||||||
|
<import file="../../../lucene/contrib/contrib-build.xml"/>
|
||||||
|
|
||||||
|
<path id="classpath">
|
||||||
|
<pathelement path="${analyzers-common.jar}"/>
|
||||||
|
<pathelement path="${tests.userdir}"/>
|
||||||
|
<path refid="base.classpath"/>
|
||||||
|
</path>
|
||||||
|
|
||||||
|
<target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
|
||||||
|
</project>
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[dffd510b7429dcbe37a283da92cbf06c1cfbe383] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -0,0 +1,202 @@
|
||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
|
@ -0,0 +1,7 @@
|
||||||
|
|
||||||
|
UIMA Annotator: Tagger
|
||||||
|
Copyright 2006-2010 The Apache Software Foundation
|
||||||
|
|
||||||
|
This product includes software developed at
|
||||||
|
The Apache Software Foundation (http://www.apache.org/).
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[10866014d8887bfdd8bfec43d3fdd780428d4ed4] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -0,0 +1,202 @@
|
||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
|
@ -0,0 +1,7 @@
|
||||||
|
|
||||||
|
UIMA Annotator: WhitespaceTokenizer
|
||||||
|
Copyright 2006-2010 The Apache Software Foundation
|
||||||
|
|
||||||
|
This product includes software developed at
|
||||||
|
The Apache Software Foundation (http://www.apache.org/).
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[e876a9749eed73ec2c95b83cf534d7a373130569] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -0,0 +1,202 @@
|
||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
|
@ -0,0 +1,13 @@
|
||||||
|
|
||||||
|
UIMA Base: uimaj-core
|
||||||
|
Copyright 2006-2010 The Apache Software Foundation
|
||||||
|
|
||||||
|
This product includes software developed at
|
||||||
|
The Apache Software Foundation (http://www.apache.org/).
|
||||||
|
|
||||||
|
Portions of Apache UIMA were originally developed by
|
||||||
|
International Business Machines Corporation and are
|
||||||
|
licensed to the Apache Software Foundation under the
|
||||||
|
"Software Grant License Agreement", informally known as the
|
||||||
|
"IBM UIMA License Agreement".
|
||||||
|
Copyright (c) 2003, 2006 IBM Corporation.
|
|
@ -0,0 +1,81 @@
|
||||||
|
package org.apache.lucene.analysis.uima;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||||
|
import org.apache.uima.cas.CAS;
|
||||||
|
import org.apache.uima.cas.FSIterator;
|
||||||
|
import org.apache.uima.cas.text.AnnotationFS;
|
||||||
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract base implementation of a {@link Tokenizer} which is able to analyze the given input with a
|
||||||
|
* UIMA {@link AnalysisEngine}
|
||||||
|
*/
|
||||||
|
public abstract class BaseUIMATokenizer extends Tokenizer {
|
||||||
|
|
||||||
|
protected FSIterator<AnnotationFS> iterator;
|
||||||
|
|
||||||
|
protected BaseUIMATokenizer(Reader reader) {
|
||||||
|
super(reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* analyzes the tokenizer input using the given analysis engine
|
||||||
|
*
|
||||||
|
* @param analysisEngine the AE to use for analyzing the tokenizer input
|
||||||
|
* @return CAS with extracted metadata (UIMA annotations, feature structures)
|
||||||
|
* @throws ResourceInitializationException
|
||||||
|
*
|
||||||
|
* @throws AnalysisEngineProcessException
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
protected CAS analyzeInput(AnalysisEngine analysisEngine) throws ResourceInitializationException,
|
||||||
|
AnalysisEngineProcessException, IOException {
|
||||||
|
CAS cas = analysisEngine.newCAS();
|
||||||
|
cas.setDocumentText(toString(input));
|
||||||
|
analysisEngine.process(cas);
|
||||||
|
analysisEngine.destroy();
|
||||||
|
return cas;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String toString(Reader reader) throws IOException {
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
int ch;
|
||||||
|
while ((ch = reader.read()) > -1) {
|
||||||
|
stringBuilder.append((char) ch);
|
||||||
|
}
|
||||||
|
return stringBuilder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset(Reader input) throws IOException {
|
||||||
|
super.reset(input);
|
||||||
|
iterator = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void end() throws IOException {
|
||||||
|
iterator = null;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,93 @@
|
||||||
|
package org.apache.lucene.analysis.uima;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||||
|
import org.apache.uima.cas.CAS;
|
||||||
|
import org.apache.uima.cas.Type;
|
||||||
|
import org.apache.uima.cas.text.AnnotationFS;
|
||||||
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
import org.apache.uima.util.InvalidXMLException;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a {@link Tokenizer} which creates tokens from UIMA Annotations
|
||||||
|
*/
|
||||||
|
public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
|
||||||
|
|
||||||
|
private final CharTermAttribute termAttr;
|
||||||
|
|
||||||
|
private final OffsetAttribute offsetAttr;
|
||||||
|
|
||||||
|
private final String tokenTypeString;
|
||||||
|
|
||||||
|
private final String descriptorPath;
|
||||||
|
|
||||||
|
private int finalOffset = 0;
|
||||||
|
|
||||||
|
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Reader input) {
|
||||||
|
super(input);
|
||||||
|
this.tokenTypeString = tokenType;
|
||||||
|
this.termAttr = addAttribute(CharTermAttribute.class);
|
||||||
|
this.offsetAttr = addAttribute(OffsetAttribute.class);
|
||||||
|
this.descriptorPath = descriptorPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void analyzeText(String descriptorPath) throws IOException, ResourceInitializationException,
|
||||||
|
AnalysisEngineProcessException {
|
||||||
|
AnalysisEngine ae = AEProviderFactory.getInstance().getAEProvider("", descriptorPath).getAE();
|
||||||
|
CAS cas = analyzeInput(ae);
|
||||||
|
finalOffset = correctOffset(cas.getDocumentText().length());
|
||||||
|
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
|
||||||
|
iterator = cas.getAnnotationIndex(tokenType).iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean incrementToken() throws IOException {
|
||||||
|
if (iterator == null) {
|
||||||
|
try {
|
||||||
|
analyzeText(descriptorPath);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (iterator.hasNext()) {
|
||||||
|
clearAttributes();
|
||||||
|
AnnotationFS next = iterator.next();
|
||||||
|
termAttr.append(next.getCoveredText());
|
||||||
|
offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void end() throws IOException {
|
||||||
|
if (offsetAttr.endOffset() < finalOffset)
|
||||||
|
offsetAttr.setOffset(finalOffset, finalOffset);
|
||||||
|
super.end();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
package org.apache.lucene.analysis.uima;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
|
||||||
|
import java.io.Reader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An {@link Analyzer} which use the {@link UIMAAnnotationsTokenizer} for creating tokens
|
||||||
|
*/
|
||||||
|
public final class UIMABaseAnalyzer extends Analyzer {
|
||||||
|
|
||||||
|
private final String descriptorPath;
|
||||||
|
private final String tokenType;
|
||||||
|
|
||||||
|
public UIMABaseAnalyzer(String descriptorPath, String tokenType) {
|
||||||
|
this.descriptorPath = descriptorPath;
|
||||||
|
this.tokenType = tokenType;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
|
return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, reader));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
package org.apache.lucene.analysis.uima;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
|
||||||
|
import java.io.Reader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@link Analyzer} which uses the {@link UIMATypeAwareAnnotationsTokenizer} for the tokenization phase
|
||||||
|
*/
|
||||||
|
public final class UIMATypeAwareAnalyzer extends Analyzer {
|
||||||
|
private final String descriptorPath;
|
||||||
|
private final String tokenType;
|
||||||
|
private final String featurePath;
|
||||||
|
|
||||||
|
public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath) {
|
||||||
|
this.descriptorPath = descriptorPath;
|
||||||
|
this.tokenType = tokenType;
|
||||||
|
this.featurePath = featurePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
|
return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, reader));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,110 @@
|
||||||
|
package org.apache.lucene.analysis.uima;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||||
|
import org.apache.uima.cas.CAS;
|
||||||
|
import org.apache.uima.cas.CASException;
|
||||||
|
import org.apache.uima.cas.FeaturePath;
|
||||||
|
import org.apache.uima.cas.Type;
|
||||||
|
import org.apache.uima.cas.text.AnnotationFS;
|
||||||
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
import org.apache.uima.util.InvalidXMLException;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A {@link Tokenizer} which creates token from UIMA Annotations filling also their {@link TypeAttribute} according to
|
||||||
|
* {@link org.apache.uima.cas.FeaturePath}s specified
|
||||||
|
*/
|
||||||
|
public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
|
||||||
|
|
||||||
|
private final TypeAttribute typeAttr;
|
||||||
|
|
||||||
|
private final CharTermAttribute termAttr;
|
||||||
|
|
||||||
|
private final OffsetAttribute offsetAttr;
|
||||||
|
|
||||||
|
private final String tokenTypeString;
|
||||||
|
|
||||||
|
private final String descriptorPath;
|
||||||
|
|
||||||
|
private final String typeAttributeFeaturePath;
|
||||||
|
|
||||||
|
private FeaturePath featurePath;
|
||||||
|
|
||||||
|
private int finalOffset = 0;
|
||||||
|
|
||||||
|
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Reader input) {
|
||||||
|
super(input);
|
||||||
|
this.tokenTypeString = tokenType;
|
||||||
|
this.termAttr = addAttribute(CharTermAttribute.class);
|
||||||
|
this.typeAttr = addAttribute(TypeAttribute.class);
|
||||||
|
this.offsetAttr = addAttribute(OffsetAttribute.class);
|
||||||
|
this.typeAttributeFeaturePath = typeAttributeFeaturePath;
|
||||||
|
this.descriptorPath = descriptorPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void analyzeText() throws IOException, ResourceInitializationException, AnalysisEngineProcessException,
|
||||||
|
CASException {
|
||||||
|
AnalysisEngine ae = AEProviderFactory.getInstance().getAEProvider("", descriptorPath).getAE();
|
||||||
|
CAS cas = analyzeInput(ae);
|
||||||
|
finalOffset = correctOffset(cas.getDocumentText().length());
|
||||||
|
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
|
||||||
|
iterator = cas.getAnnotationIndex(tokenType).iterator();
|
||||||
|
featurePath = cas.createFeaturePath();
|
||||||
|
featurePath.initialize(typeAttributeFeaturePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean incrementToken() throws IOException {
|
||||||
|
if (iterator == null) {
|
||||||
|
try {
|
||||||
|
analyzeText();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (iterator.hasNext()) {
|
||||||
|
clearAttributes();
|
||||||
|
AnnotationFS next = iterator.next();
|
||||||
|
termAttr.append(next.getCoveredText());
|
||||||
|
offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
|
||||||
|
typeAttr.setType(featurePath.getValueAsString(next));
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void end() throws IOException {
|
||||||
|
if (offsetAttr.endOffset() < finalOffset)
|
||||||
|
offsetAttr.setOffset(finalOffset, finalOffset);
|
||||||
|
super.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
package org.apache.lucene.analysis.uima.ae;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* provide an Apache UIMA {@link AnalysisEngine}
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public interface AEProvider {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* @throws ResourceInitializationException
|
||||||
|
*/
|
||||||
|
public AnalysisEngine getAE() throws ResourceInitializationException;
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,73 @@
|
||||||
|
package org.apache.lucene.analysis.uima.ae;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Singleton factory class responsible of {@link AEProvider}s' creation
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class AEProviderFactory {
|
||||||
|
|
||||||
|
private static AEProviderFactory instance;
|
||||||
|
|
||||||
|
private final Map<String, AEProvider> providerCache = new HashMap<String, AEProvider>();
|
||||||
|
|
||||||
|
private AEProviderFactory() {
|
||||||
|
// Singleton
|
||||||
|
}
|
||||||
|
|
||||||
|
public static AEProviderFactory getInstance() {
|
||||||
|
if (instance == null) {
|
||||||
|
instance = new AEProviderFactory();
|
||||||
|
}
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param keyPrefix
|
||||||
|
* @param aePath
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public synchronized AEProvider getAEProvider(String keyPrefix, String aePath) {
|
||||||
|
String key = new StringBuilder(keyPrefix).append(aePath).append(BasicAEProvider.class).toString();
|
||||||
|
if (providerCache.get(key) == null) {
|
||||||
|
providerCache.put(key, new BasicAEProvider(aePath));
|
||||||
|
}
|
||||||
|
return providerCache.get(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param keyPrefix
|
||||||
|
* @param aePath
|
||||||
|
* @param runtimeParameters
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public synchronized AEProvider getAEProvider(String keyPrefix, String aePath,
|
||||||
|
Map<String, Object> runtimeParameters) {
|
||||||
|
String key = new StringBuilder(keyPrefix).append(aePath).append(OverridingParamsAEProvider.class).toString();
|
||||||
|
if (providerCache.get(key) == null) {
|
||||||
|
providerCache.put(key, new OverridingParamsAEProvider(aePath, runtimeParameters));
|
||||||
|
}
|
||||||
|
return providerCache.get(key);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,67 @@
|
||||||
|
package org.apache.lucene.analysis.uima.ae;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.uima.UIMAFramework;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
|
||||||
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
import org.apache.uima.util.XMLInputSource;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic {@link AEProvider} which just instantiates a UIMA {@link AnalysisEngine} with no additional metadata,
|
||||||
|
* parameters or resources
|
||||||
|
*/
|
||||||
|
public class BasicAEProvider implements AEProvider {
|
||||||
|
|
||||||
|
private final String aePath;
|
||||||
|
private AnalysisEngine cachedAE;
|
||||||
|
|
||||||
|
public BasicAEProvider(String aePath) {
|
||||||
|
this.aePath = aePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized AnalysisEngine getAE() throws ResourceInitializationException {
|
||||||
|
try {
|
||||||
|
if (cachedAE == null) {
|
||||||
|
// get Resource Specifier from XML file
|
||||||
|
|
||||||
|
XMLInputSource in;
|
||||||
|
try {
|
||||||
|
in = new XMLInputSource(aePath);
|
||||||
|
} catch (Exception e) {
|
||||||
|
in = new XMLInputSource(getClass().getResource(aePath));
|
||||||
|
}
|
||||||
|
|
||||||
|
// get AE description
|
||||||
|
AnalysisEngineDescription desc = UIMAFramework.getXMLParser()
|
||||||
|
.parseAnalysisEngineDescription(in);
|
||||||
|
|
||||||
|
// create AE here
|
||||||
|
cachedAE = UIMAFramework.produceAnalysisEngine(desc);
|
||||||
|
} else {
|
||||||
|
cachedAE.reconfigure();
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
cachedAE = null;
|
||||||
|
throw new ResourceInitializationException(e);
|
||||||
|
}
|
||||||
|
return cachedAE;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,102 @@
|
||||||
|
package org.apache.lucene.analysis.uima.ae;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.uima.UIMAFramework;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
|
||||||
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
import org.apache.uima.util.XMLInputSource;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@link AEProvider} implementation that creates an Aggregate AE from the given path, also
|
||||||
|
* injecting runtime parameters defined in the solrconfig.xml Solr configuration file and assigning
|
||||||
|
* them as overriding parameters in the aggregate AE
|
||||||
|
*/
|
||||||
|
public class OverridingParamsAEProvider implements AEProvider {
|
||||||
|
|
||||||
|
private final String aePath;
|
||||||
|
|
||||||
|
private AnalysisEngine cachedAE;
|
||||||
|
|
||||||
|
private final Map<String, Object> runtimeParameters;
|
||||||
|
|
||||||
|
public OverridingParamsAEProvider(String aePath, Map<String, Object> runtimeParameters) {
|
||||||
|
this.aePath = aePath;
|
||||||
|
this.runtimeParameters = runtimeParameters;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized AnalysisEngine getAE() throws ResourceInitializationException {
|
||||||
|
try {
|
||||||
|
if (cachedAE == null) {
|
||||||
|
// get Resource Specifier from XML file
|
||||||
|
XMLInputSource in;
|
||||||
|
try {
|
||||||
|
in = new XMLInputSource(aePath);
|
||||||
|
} catch (Exception e) {
|
||||||
|
in = new XMLInputSource(getClass().getResource(aePath));
|
||||||
|
}
|
||||||
|
|
||||||
|
// get AE description
|
||||||
|
AnalysisEngineDescription desc = UIMAFramework.getXMLParser()
|
||||||
|
.parseAnalysisEngineDescription(in);
|
||||||
|
|
||||||
|
/* iterate over each AE (to set runtime parameters) */
|
||||||
|
for (String attributeName : runtimeParameters.keySet()) {
|
||||||
|
Object val = getRuntimeValue(desc, attributeName);
|
||||||
|
desc.getAnalysisEngineMetaData().getConfigurationParameterSettings().setParameterValue(
|
||||||
|
attributeName, val);
|
||||||
|
}
|
||||||
|
// create AE here
|
||||||
|
cachedAE = UIMAFramework.produceAnalysisEngine(desc);
|
||||||
|
} else {
|
||||||
|
cachedAE.reconfigure();
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
cachedAE = null;
|
||||||
|
throw new ResourceInitializationException(e);
|
||||||
|
}
|
||||||
|
return cachedAE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* create the value to inject in the runtime parameter depending on its declared type */
|
||||||
|
private Object getRuntimeValue(AnalysisEngineDescription desc, String attributeName) {
|
||||||
|
String type = desc.getAnalysisEngineMetaData().getConfigurationParameterDeclarations().
|
||||||
|
getConfigurationParameter(null, attributeName).getType();
|
||||||
|
// TODO : do it via reflection ? i.e. Class paramType = Class.forName(type)...
|
||||||
|
Object val = null;
|
||||||
|
Object runtimeValue = runtimeParameters.get(attributeName);
|
||||||
|
if (runtimeValue != null) {
|
||||||
|
if ("String".equals(type)) {
|
||||||
|
val = String.valueOf(runtimeValue);
|
||||||
|
} else if ("Integer".equals(type)) {
|
||||||
|
val = Integer.valueOf(runtimeValue.toString());
|
||||||
|
} else if ("Boolean".equals(type)) {
|
||||||
|
val = Boolean.valueOf(runtimeValue.toString());
|
||||||
|
} else if ("Float".equals(type)) {
|
||||||
|
val = Float.valueOf(runtimeValue.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,70 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>false</primitive>
|
||||||
|
<delegateAnalysisEngineSpecifiers>
|
||||||
|
<delegateAnalysisEngine key="WhitespaceTokenizer">
|
||||||
|
<import name="WhitespaceTokenizer"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="HmmTagger">
|
||||||
|
<import name="HmmTagger"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
</delegateAnalysisEngineSpecifiers>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>AggregateSentenceAE</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor/>
|
||||||
|
<configurationParameters>
|
||||||
|
<configurationParameter>
|
||||||
|
<name>ngramsize</name>
|
||||||
|
<type>Integer</type>
|
||||||
|
<multiValued>false</multiValued>
|
||||||
|
<mandatory>false</mandatory>
|
||||||
|
<overrides>
|
||||||
|
<parameter>HmmTagger/NGRAM_SIZE</parameter>
|
||||||
|
</overrides>
|
||||||
|
</configurationParameter>
|
||||||
|
</configurationParameters>
|
||||||
|
<configurationParameterSettings/>
|
||||||
|
<flowConstraints>
|
||||||
|
<fixedFlow>
|
||||||
|
<node>WhitespaceTokenizer</node>
|
||||||
|
<node>HmmTagger</node>
|
||||||
|
</fixedFlow>
|
||||||
|
</flowConstraints>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs/>
|
||||||
|
<outputs>
|
||||||
|
<type allAnnotatorFeatures="true">org.apache.uima.SentenceAnnotation</type>
|
||||||
|
<type allAnnotatorFeatures="true">org.apache.uima.TokenAnnotation</type>
|
||||||
|
</outputs>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<resourceManagerConfiguration/>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,59 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>false</primitive>
|
||||||
|
<delegateAnalysisEngineSpecifiers>
|
||||||
|
<delegateAnalysisEngine key="WhitespaceTokenizer">
|
||||||
|
<import name="WhitespaceTokenizer"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
<delegateAnalysisEngine key="DummyPoSTagger">
|
||||||
|
<import location="DummyPoSTagger.xml"/>
|
||||||
|
</delegateAnalysisEngine>
|
||||||
|
</delegateAnalysisEngineSpecifiers>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>AggregateSentenceAE</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor/>
|
||||||
|
<configurationParameterSettings/>
|
||||||
|
<flowConstraints>
|
||||||
|
<fixedFlow>
|
||||||
|
<node>WhitespaceTokenizer</node>
|
||||||
|
<node>DummyPoSTagger</node>
|
||||||
|
</fixedFlow>
|
||||||
|
</flowConstraints>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs/>
|
||||||
|
<outputs>
|
||||||
|
<type allAnnotatorFeatures="true">org.apache.uima.SentenceAnnotation</type>
|
||||||
|
<type allAnnotatorFeatures="true">org.apache.uima.TokenAnnotation</type>
|
||||||
|
</outputs>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<resourceManagerConfiguration/>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,68 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>true</primitive>
|
||||||
|
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.DummyEntityAnnotator</annotatorImplementationName>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>DummyPoSTagger</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor>ASF</vendor>
|
||||||
|
<configurationParameters/>
|
||||||
|
<configurationParameterSettings/>
|
||||||
|
<typeSystemDescription>
|
||||||
|
<types>
|
||||||
|
<typeDescription>
|
||||||
|
<name>org.apache.solr.uima.ts.EntityAnnotation</name>
|
||||||
|
<description/>
|
||||||
|
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||||
|
<features>
|
||||||
|
<featureDescription>
|
||||||
|
<name>name</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
<featureDescription>
|
||||||
|
<name>entity</name>
|
||||||
|
<description/>
|
||||||
|
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||||
|
</featureDescription>
|
||||||
|
</features>
|
||||||
|
</typeDescription>
|
||||||
|
</types>
|
||||||
|
</typeSystemDescription>
|
||||||
|
<typePriorities/>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs/>
|
||||||
|
<outputs>
|
||||||
|
<type allAnnotatorFeatures="true">org.apache.solr.uima.ts.EntityAnnotation</type>
|
||||||
|
</outputs>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<resourceManagerConfiguration/>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,50 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||||
|
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||||
|
<primitive>true</primitive>
|
||||||
|
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.DummyPoSTagger</annotatorImplementationName>
|
||||||
|
<analysisEngineMetaData>
|
||||||
|
<name>DummyPoSTagger</name>
|
||||||
|
<description/>
|
||||||
|
<version>1.0</version>
|
||||||
|
<vendor>ASF</vendor>
|
||||||
|
<configurationParameters/>
|
||||||
|
<configurationParameterSettings/>
|
||||||
|
<typeSystemDescription/>
|
||||||
|
<typePriorities/>
|
||||||
|
<fsIndexCollection/>
|
||||||
|
<capabilities>
|
||||||
|
<capability>
|
||||||
|
<inputs>
|
||||||
|
<type allAnnotatorFeatures="true">org.apache.uima.TokenAnnotation</type>
|
||||||
|
</inputs>
|
||||||
|
<outputs>
|
||||||
|
<type allAnnotatorFeatures="true">org.apache.uima.TokenAnnotation</type>
|
||||||
|
</outputs>
|
||||||
|
<languagesSupported/>
|
||||||
|
</capability>
|
||||||
|
</capabilities>
|
||||||
|
<operationalProperties>
|
||||||
|
<modifiesCas>true</modifiesCas>
|
||||||
|
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||||
|
<outputsNewCASes>false</outputsNewCASes>
|
||||||
|
</operationalProperties>
|
||||||
|
</analysisEngineMetaData>
|
||||||
|
<resourceManagerConfiguration/>
|
||||||
|
</analysisEngineDescription>
|
|
@ -0,0 +1,125 @@
|
||||||
|
package org.apache.lucene.analysis.uima;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Testcase for {@link UIMABaseAnalyzer}
|
||||||
|
*/
|
||||||
|
public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
|
private UIMABaseAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation");
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
analyzer.close();
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void baseUIMAAnalyzerStreamTest() throws Exception {
|
||||||
|
TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood"));
|
||||||
|
assertTokenStreamContents(ts, new String[]{"the", "big", "brown", "fox", "jumped", "on", "the", "wood"});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void baseUIMAAnalyzerIntegrationTest() throws Exception {
|
||||||
|
Directory dir = new RAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
|
||||||
|
// add the first doc
|
||||||
|
Document doc = new Document();
|
||||||
|
String dummyTitle = "this is a dummy title ";
|
||||||
|
doc.add(new Field("title", dummyTitle, TextField.TYPE_STORED));
|
||||||
|
String dummyContent = "there is some content written here";
|
||||||
|
doc.add(new Field("contents", dummyContent, TextField.TYPE_STORED));
|
||||||
|
writer.addDocument(doc, analyzer);
|
||||||
|
writer.commit();
|
||||||
|
|
||||||
|
// try the search over the first doc
|
||||||
|
DirectoryReader directoryReader = DirectoryReader.open(dir);
|
||||||
|
IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
|
||||||
|
TopDocs result = indexSearcher.search(new MatchAllDocsQuery(), 10);
|
||||||
|
assertTrue(result.totalHits > 0);
|
||||||
|
Document d = indexSearcher.doc(result.scoreDocs[0].doc);
|
||||||
|
assertNotNull(d);
|
||||||
|
assertNotNull(d.getField("title"));
|
||||||
|
assertEquals(dummyTitle, d.getField("title").stringValue());
|
||||||
|
assertNotNull(d.getField("contents"));
|
||||||
|
assertEquals(dummyContent, d.getField("contents").stringValue());
|
||||||
|
|
||||||
|
// add a second doc
|
||||||
|
doc = new Document();
|
||||||
|
String dogmasTitle = "dogmas";
|
||||||
|
doc.add(new Field("title", dogmasTitle, TextField.TYPE_STORED));
|
||||||
|
String dogmasContents = "white men can't jump";
|
||||||
|
doc.add(new Field("contents", dogmasContents, TextField.TYPE_STORED));
|
||||||
|
writer.addDocument(doc, analyzer);
|
||||||
|
writer.commit();
|
||||||
|
|
||||||
|
directoryReader.close();
|
||||||
|
directoryReader = DirectoryReader.open(dir);
|
||||||
|
indexSearcher = new IndexSearcher(directoryReader);
|
||||||
|
result = indexSearcher.search(new MatchAllDocsQuery(), 10);
|
||||||
|
Document d1 = indexSearcher.doc(result.scoreDocs[1].doc);
|
||||||
|
assertNotNull(d1);
|
||||||
|
assertNotNull(d1.getField("title"));
|
||||||
|
assertEquals(dogmasTitle, d1.getField("title").stringValue());
|
||||||
|
assertNotNull(d1.getField("contents"));
|
||||||
|
assertEquals(dogmasContents, d1.getField("contents").stringValue());
|
||||||
|
|
||||||
|
// do a matchalldocs query to retrieve both docs
|
||||||
|
indexSearcher = new IndexSearcher(directoryReader);
|
||||||
|
result = indexSearcher.search(new MatchAllDocsQuery(), 10);
|
||||||
|
assertEquals(2, result.totalHits);
|
||||||
|
writer.close();
|
||||||
|
indexSearcher.getIndexReader().close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRandomStrings() throws Exception {
|
||||||
|
checkRandomData(random, new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation"),
|
||||||
|
1000 * RANDOM_MULTIPLIER);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,67 @@
|
||||||
|
package org.apache.lucene.analysis.uima;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Testcase for {@link UIMATypeAwareAnalyzer}
|
||||||
|
*/
|
||||||
|
public class UIMATypeAwareAnalyzerTest extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
|
private UIMATypeAwareAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
analyzer = new UIMATypeAwareAnalyzer("/uima/AggregateSentenceAE.xml",
|
||||||
|
"org.apache.uima.TokenAnnotation", "posTag");
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
analyzer.close();
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void baseUIMATypeAwareAnalyzerStreamTest() throws Exception {
|
||||||
|
|
||||||
|
// create a token stream
|
||||||
|
TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood"));
|
||||||
|
|
||||||
|
// check that 'the big brown fox jumped on the wood' tokens have the expected PoS types
|
||||||
|
assertTokenStreamContents(ts,
|
||||||
|
new String[]{"the", "big", "brown", "fox", "jumped", "on", "the", "wood"},
|
||||||
|
new String[]{"at", "jj", "jj", "nn", "vbd", "in", "at", "nn"});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRandomStrings() throws Exception {
|
||||||
|
checkRandomData(random, new UIMATypeAwareAnalyzer("/uima/AggregateDummySentenceAE.xml",
|
||||||
|
"org.apache.uima.TokenAnnotation", "tokenType"), 1000 * RANDOM_MULTIPLIER);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
package org.apache.lucene.analysis.uima.ae;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TestCase for {@link BasicAEProvider}
|
||||||
|
*/
|
||||||
|
public class BasicAEProviderTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBasicInititalization() throws Exception {
|
||||||
|
AEProvider basicAEProvider = new BasicAEProvider("/uima/DummyEntityAE.xml");
|
||||||
|
AnalysisEngine analysisEngine = basicAEProvider.getAE();
|
||||||
|
assertNotNull(analysisEngine);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,61 @@
|
||||||
|
package org.apache.lucene.analysis.uima.ae;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TestCase for {@link OverridingParamsAEProvider}
|
||||||
|
*/
|
||||||
|
public class OverridingParamsAEProviderTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNullMapInitialization() throws Exception {
|
||||||
|
try {
|
||||||
|
AEProvider aeProvider = new OverridingParamsAEProvider("/uima/DummyEntityAE.xml", null);
|
||||||
|
aeProvider.getAE();
|
||||||
|
fail("should fail due to null Map passed");
|
||||||
|
} catch (ResourceInitializationException e) {
|
||||||
|
// everything ok
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEmptyMapInitialization() throws Exception {
|
||||||
|
AEProvider aeProvider = new OverridingParamsAEProvider("/uima/DummyEntityAE.xml", new HashMap<String, Object>());
|
||||||
|
AnalysisEngine analysisEngine = aeProvider.getAE();
|
||||||
|
assertNotNull(analysisEngine);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOverridingParamsInitialization() throws Exception {
|
||||||
|
Map<String, Object> runtimeParameters = new HashMap<String, Object>();
|
||||||
|
runtimeParameters.put("ngramsize", "3");
|
||||||
|
AEProvider aeProvider = new OverridingParamsAEProvider("/uima/AggregateSentenceAE.xml", runtimeParameters);
|
||||||
|
AnalysisEngine analysisEngine = aeProvider.getAE();
|
||||||
|
assertNotNull(analysisEngine);
|
||||||
|
assertEquals(analysisEngine.getConfigParameterValue("ngramsize"), 3);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,61 @@
|
||||||
|
package org.apache.lucene.analysis.uima.an;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.uima.TokenAnnotation;
|
||||||
|
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||||
|
import org.apache.uima.cas.Feature;
|
||||||
|
import org.apache.uima.cas.Type;
|
||||||
|
import org.apache.uima.cas.text.AnnotationFS;
|
||||||
|
import org.apache.uima.jcas.JCas;
|
||||||
|
import org.apache.uima.jcas.tcas.Annotation;
|
||||||
|
|
||||||
|
public class DummyEntityAnnotator extends JCasAnnotator_ImplBase {
|
||||||
|
|
||||||
|
private static final String NP = "np";
|
||||||
|
private static final String NPS = "nps";
|
||||||
|
private static final String TYPE_NAME = "org.apache.lucene.analysis.uima.ts.EntityAnnotation";
|
||||||
|
private static final String ENTITY_FEATURE = "entity";
|
||||||
|
private static final String NAME_FEATURE = "entity";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void process(JCas jcas) throws AnalysisEngineProcessException {
|
||||||
|
Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
|
||||||
|
Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE);
|
||||||
|
Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE);
|
||||||
|
|
||||||
|
for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
|
||||||
|
String tokenPOS = ((TokenAnnotation) annotation).getPosTag();
|
||||||
|
|
||||||
|
if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) {
|
||||||
|
AnnotationFS entityAnnotation = jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd());
|
||||||
|
|
||||||
|
entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText());
|
||||||
|
|
||||||
|
String name = "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
|
||||||
|
if (annotation.getCoveredText().equals("Apache"))
|
||||||
|
name = "ORGANIZATION";
|
||||||
|
entityAnnotation.setStringValue(nameFeature, name);
|
||||||
|
|
||||||
|
jcas.addFsToIndexes(entityAnnotation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
package org.apache.lucene.analysis.uima.an;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.uima.TokenAnnotation;
|
||||||
|
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||||
|
import org.apache.uima.cas.Feature;
|
||||||
|
import org.apache.uima.cas.Type;
|
||||||
|
import org.apache.uima.jcas.JCas;
|
||||||
|
import org.apache.uima.jcas.tcas.Annotation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
public class DummyPoSTagger extends JCasAnnotator_ImplBase {
|
||||||
|
|
||||||
|
private static final String NUM = "NUM";
|
||||||
|
private static final String WORD = "WORD";
|
||||||
|
private static final String TYPE_NAME = "org.apache.uima.TokenAnnotation";
|
||||||
|
private static final String FEATURE_NAME = "tokenType";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void process(JCas jcas) throws AnalysisEngineProcessException {
|
||||||
|
Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
|
||||||
|
Feature posFeature = type.getFeatureByBaseName(FEATURE_NAME);
|
||||||
|
|
||||||
|
for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
|
||||||
|
String text = annotation.getCoveredText();
|
||||||
|
String pos = extractPoS(text);
|
||||||
|
annotation.setStringValue(posFeature, pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractPoS(String text) {
|
||||||
|
try {
|
||||||
|
Double.valueOf(text);
|
||||||
|
return NUM;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return WORD;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue