mirror of https://github.com/apache/lucene.git
[LUCENE-3731] - Creating the analysis-uima module for UIMA based tokenizers/analyzers
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1244236 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
56f12201b7
commit
d66d97790b
|
@ -19,6 +19,7 @@
|
|||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/phonetic/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/smartcn/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/uima/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/facet/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
<module filepath="$PROJECT_DIR$/modules/analysis/phonetic/phonetic.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/analysis/smartcn/smartcn.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/analysis/uima/analysis-uima.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/facet/facet.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
|
||||
|
|
|
@ -186,6 +186,13 @@
|
|||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="uima analysis module" type="JUnit" factoryName="JUnit">
|
||||
<module name="uima" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/analysis/build/uima" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="suggest module" type="JUnit" factoryName="JUnit">
|
||||
<module name="suggest" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
</library>
|
||||
</orderEntry>
|
||||
<orderEntry type="module" module-name="solr" />
|
||||
<orderEntry type="module" module-name="lucene" scope="TEST" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
<orderEntry type="module" module-name="analysis-uima" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
<module>phonetic</module>
|
||||
<module>smartcn</module>
|
||||
<module>stempel</module>
|
||||
<module>uima</module>
|
||||
</modules>
|
||||
<build>
|
||||
<directory>build/lucene-analysis-modules-aggregator</directory>
|
||||
|
|
|
@ -162,6 +162,17 @@
|
|||
<property name="analyzers-kuromoji.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<property name="analyzers-uima.jar" value="${common.dir}/../modules/analysis/build/uima/lucene-analyzers-uima-${version}.jar"/>
|
||||
<target name="check-analyzers-uima-uptodate" unless="analyzers-uima.uptodate">
|
||||
<module-uptodate name="analysis/uima" jarfile="${analyzers-uima.jar}" property="analyzers-uima.uptodate"/>
|
||||
</target>
|
||||
<target name="jar-analyzers-uima" unless="analyzers-uima.uptodate" depends="check-analyzers-uima-uptodate">
|
||||
<ant dir="${common.dir}/../modules/analysis/uima" target="jar-core" inheritAll="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="analyzers-uima.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<property name="grouping.jar" value="${common.dir}/../modules/grouping/build/lucene-grouping-${version}.jar"/>
|
||||
<target name="check-grouping-uptodate" unless="grouping.uptodate">
|
||||
<module-uptodate name="grouping" jarfile="${grouping.jar}" property="grouping.uptodate"/>
|
||||
|
|
|
@ -41,6 +41,10 @@ lucene-analyzers-stempel-XX.jar
|
|||
An add-on analysis library that contains a universal algorithmic stemmer,
|
||||
including tables for the Polish language.
|
||||
|
||||
lucene-analyzers-uima-XX.jar
|
||||
An add-on analysis library that contains tokenizers/analyzers using
|
||||
Apache UIMA extracted annotations to identify tokens/types/etc.
|
||||
|
||||
common/src/java
|
||||
icu/src/java
|
||||
kuromoji/src/java
|
||||
|
@ -48,6 +52,7 @@ morfologik/src/java
|
|||
phonetic/src/java
|
||||
smartcn/src/java
|
||||
stempel/src/java
|
||||
uima/src/java
|
||||
The source code for the libraries.
|
||||
|
||||
common/src/test
|
||||
|
@ -57,4 +62,5 @@ morfologik/src/test
|
|||
phonetic/src/test
|
||||
smartcn/src/test
|
||||
stempel/src/test
|
||||
uima/src/test
|
||||
Unit tests for the libraries.
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
- morfologik: Morfologik Stemmer
|
||||
- smartcn: Smart Analyzer for Simplified Chinese Text
|
||||
- stempel: Algorithmic Stemmer for Polish
|
||||
- uima: UIMA Analysis module
|
||||
</description>
|
||||
|
||||
<target name="common">
|
||||
|
@ -57,8 +58,12 @@
|
|||
<ant dir="stempel" />
|
||||
</target>
|
||||
|
||||
<target name="uima">
|
||||
<ant dir="uima" />
|
||||
</target>
|
||||
|
||||
<target name="default" depends="compile"/>
|
||||
<target name="compile" depends="common,icu,kuromoji,morfologik,phonetic,smartcn,stempel" />
|
||||
<target name="compile" depends="common,icu,kuromoji,morfologik,phonetic,smartcn,stempel,uima" />
|
||||
|
||||
<target name="clean">
|
||||
<ant dir="common" target="clean" />
|
||||
|
@ -68,6 +73,7 @@
|
|||
<ant dir="phonetic" target="clean" />
|
||||
<ant dir="smartcn" target="clean" />
|
||||
<ant dir="stempel" target="clean" />
|
||||
<ant dir="uima" target="clean" />
|
||||
</target>
|
||||
<target name="validate">
|
||||
<ant dir="common" target="validate" />
|
||||
|
@ -77,6 +83,7 @@
|
|||
<ant dir="phonetic" target="validate" />
|
||||
<ant dir="smartcn" target="validate" />
|
||||
<ant dir="stempel" target="validate" />
|
||||
<ant dir="uima" target="validate" />
|
||||
</target>
|
||||
<target name="compile-core">
|
||||
<ant dir="common" target="compile-core" />
|
||||
|
@ -86,6 +93,7 @@
|
|||
<ant dir="phonetic" target="compile-core" />
|
||||
<ant dir="smartcn" target="compile-core" />
|
||||
<ant dir="stempel" target="compile-core" />
|
||||
<ant dir="uima" target="compile-core" />
|
||||
</target>
|
||||
<target name="compile-test">
|
||||
<ant dir="common" target="compile-test" />
|
||||
|
@ -95,6 +103,7 @@
|
|||
<ant dir="phonetic" target="compile-test" />
|
||||
<ant dir="smartcn" target="compile-test" />
|
||||
<ant dir="stempel" target="compile-test" />
|
||||
<ant dir="uima" target="compile-test" />
|
||||
</target>
|
||||
<target name="test">
|
||||
<ant dir="common" target="test" />
|
||||
|
@ -104,6 +113,7 @@
|
|||
<ant dir="phonetic" target="test" />
|
||||
<ant dir="smartcn" target="test" />
|
||||
<ant dir="stempel" target="test" />
|
||||
<ant dir="uima" target="test" />
|
||||
</target>
|
||||
|
||||
<target name="build-artifacts-and-tests" depends="default,compile-test" />
|
||||
|
@ -116,6 +126,7 @@
|
|||
<ant dir="phonetic" target="dist-maven" />
|
||||
<ant dir="smartcn" target="dist-maven" />
|
||||
<ant dir="stempel" target="dist-maven" />
|
||||
<ant dir="uima" target="dist-maven" />
|
||||
</target>
|
||||
|
||||
<target name="javadocs">
|
||||
|
@ -126,6 +137,7 @@
|
|||
<ant dir="phonetic" target="javadocs" />
|
||||
<ant dir="smartcn" target="javadocs" />
|
||||
<ant dir="stempel" target="javadocs" />
|
||||
<ant dir="uima" target="javadocs" />
|
||||
</target>
|
||||
|
||||
<target name="javadocs-index.html">
|
||||
|
@ -136,6 +148,7 @@
|
|||
<ant dir="phonetic" target="javadocs-index.html" />
|
||||
<ant dir="smartcn" target="javadocs-index.html" />
|
||||
<ant dir="stempel" target="javadocs-index.html" />
|
||||
<ant dir="uima" target="javadocs-index.html" />
|
||||
</target>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project name="analyzers-uima" default="default">
|
||||
|
||||
<description>
|
||||
UIMA Analysis module
|
||||
</description>
|
||||
|
||||
<property name="build.dir" location="../build/uima" />
|
||||
<property name="dist.dir" location="../dist/uima" />
|
||||
<property name="tests.userdir" value="src/test-files"/>
|
||||
<property name="tests.threadspercpu" value="0" />
|
||||
|
||||
<path id="additional.dependencies">
|
||||
<fileset dir="lib" includes="*.jar"/>
|
||||
</path>
|
||||
|
||||
<pathconvert property="project.classpath" targetos="unix" refid="additional.dependencies" />
|
||||
|
||||
<import file="../../../lucene/contrib/contrib-build.xml"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<pathelement path="${tests.userdir}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
|
||||
</project>
|
|
@ -0,0 +1,2 @@
|
|||
AnyObjectId[dffd510b7429dcbe37a283da92cbf06c1cfbe383] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,7 @@
|
|||
|
||||
UIMA Annotator: Tagger
|
||||
Copyright 2006-2010 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
AnyObjectId[10866014d8887bfdd8bfec43d3fdd780428d4ed4] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,7 @@
|
|||
|
||||
UIMA Annotator: WhitespaceTokenizer
|
||||
Copyright 2006-2010 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
AnyObjectId[e876a9749eed73ec2c95b83cf534d7a373130569] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,13 @@
|
|||
|
||||
UIMA Base: uimaj-core
|
||||
Copyright 2006-2010 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
||||
Portions of Apache UIMA were originally developed by
|
||||
International Business Machines Corporation and are
|
||||
licensed to the Apache Software Foundation under the
|
||||
"Software Grant License Agreement", informally known as the
|
||||
"IBM UIMA License Agreement".
|
||||
Copyright (c) 2003, 2006 IBM Corporation.
|
|
@ -0,0 +1,81 @@
|
|||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.cas.CAS;
|
||||
import org.apache.uima.cas.FSIterator;
|
||||
import org.apache.uima.cas.text.AnnotationFS;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Abstract base implementation of a {@link Tokenizer} which is able to analyze the given input with a
|
||||
* UIMA {@link AnalysisEngine}
|
||||
*/
|
||||
public abstract class BaseUIMATokenizer extends Tokenizer {
|
||||
|
||||
protected FSIterator<AnnotationFS> iterator;
|
||||
|
||||
protected BaseUIMATokenizer(Reader reader) {
|
||||
super(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* analyzes the tokenizer input using the given analysis engine
|
||||
*
|
||||
* @param analysisEngine the AE to use for analyzing the tokenizer input
|
||||
* @return CAS with extracted metadata (UIMA annotations, feature structures)
|
||||
* @throws ResourceInitializationException
|
||||
*
|
||||
* @throws AnalysisEngineProcessException
|
||||
* @throws IOException
|
||||
*/
|
||||
protected CAS analyzeInput(AnalysisEngine analysisEngine) throws ResourceInitializationException,
|
||||
AnalysisEngineProcessException, IOException {
|
||||
CAS cas = analysisEngine.newCAS();
|
||||
cas.setDocumentText(toString(input));
|
||||
analysisEngine.process(cas);
|
||||
analysisEngine.destroy();
|
||||
return cas;
|
||||
}
|
||||
|
||||
private String toString(Reader reader) throws IOException {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
int ch;
|
||||
while ((ch = reader.read()) > -1) {
|
||||
stringBuilder.append((char) ch);
|
||||
}
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset(Reader input) throws IOException {
|
||||
super.reset(input);
|
||||
iterator = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() throws IOException {
|
||||
iterator = null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.cas.CAS;
|
||||
import org.apache.uima.cas.Type;
|
||||
import org.apache.uima.cas.text.AnnotationFS;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
import org.apache.uima.util.InvalidXMLException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* a {@link Tokenizer} which creates tokens from UIMA Annotations
|
||||
*/
|
||||
public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
|
||||
|
||||
private final CharTermAttribute termAttr;
|
||||
|
||||
private final OffsetAttribute offsetAttr;
|
||||
|
||||
private final String tokenTypeString;
|
||||
|
||||
private final String descriptorPath;
|
||||
|
||||
private int finalOffset = 0;
|
||||
|
||||
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Reader input) {
|
||||
super(input);
|
||||
this.tokenTypeString = tokenType;
|
||||
this.termAttr = addAttribute(CharTermAttribute.class);
|
||||
this.offsetAttr = addAttribute(OffsetAttribute.class);
|
||||
this.descriptorPath = descriptorPath;
|
||||
}
|
||||
|
||||
private void analyzeText(String descriptorPath) throws IOException, ResourceInitializationException,
|
||||
AnalysisEngineProcessException {
|
||||
AnalysisEngine ae = AEProviderFactory.getInstance().getAEProvider("", descriptorPath).getAE();
|
||||
CAS cas = analyzeInput(ae);
|
||||
finalOffset = correctOffset(cas.getDocumentText().length());
|
||||
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
|
||||
iterator = cas.getAnnotationIndex(tokenType).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (iterator == null) {
|
||||
try {
|
||||
analyzeText(descriptorPath);
|
||||
} catch (Exception e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
if (iterator.hasNext()) {
|
||||
clearAttributes();
|
||||
AnnotationFS next = iterator.next();
|
||||
termAttr.append(next.getCoveredText());
|
||||
offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() throws IOException {
|
||||
if (offsetAttr.endOffset() < finalOffset)
|
||||
offsetAttr.setOffset(finalOffset, finalOffset);
|
||||
super.end();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* An {@link Analyzer} which use the {@link UIMAAnnotationsTokenizer} for creating tokens
|
||||
*/
|
||||
public final class UIMABaseAnalyzer extends Analyzer {
|
||||
|
||||
private final String descriptorPath;
|
||||
private final String tokenType;
|
||||
|
||||
public UIMABaseAnalyzer(String descriptorPath, String tokenType) {
|
||||
this.descriptorPath = descriptorPath;
|
||||
this.tokenType = tokenType;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, reader));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} which uses the {@link UIMATypeAwareAnnotationsTokenizer} for the tokenization phase
|
||||
*/
|
||||
public final class UIMATypeAwareAnalyzer extends Analyzer {
|
||||
private final String descriptorPath;
|
||||
private final String tokenType;
|
||||
private final String featurePath;
|
||||
|
||||
public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath) {
|
||||
this.descriptorPath = descriptorPath;
|
||||
this.tokenType = tokenType;
|
||||
this.featurePath = featurePath;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, reader));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.cas.CAS;
|
||||
import org.apache.uima.cas.CASException;
|
||||
import org.apache.uima.cas.FeaturePath;
|
||||
import org.apache.uima.cas.Type;
|
||||
import org.apache.uima.cas.text.AnnotationFS;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
import org.apache.uima.util.InvalidXMLException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* A {@link Tokenizer} which creates token from UIMA Annotations filling also their {@link TypeAttribute} according to
|
||||
* {@link org.apache.uima.cas.FeaturePath}s specified
|
||||
*/
|
||||
public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
|
||||
|
||||
private final TypeAttribute typeAttr;
|
||||
|
||||
private final CharTermAttribute termAttr;
|
||||
|
||||
private final OffsetAttribute offsetAttr;
|
||||
|
||||
private final String tokenTypeString;
|
||||
|
||||
private final String descriptorPath;
|
||||
|
||||
private final String typeAttributeFeaturePath;
|
||||
|
||||
private FeaturePath featurePath;
|
||||
|
||||
private int finalOffset = 0;
|
||||
|
||||
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Reader input) {
|
||||
super(input);
|
||||
this.tokenTypeString = tokenType;
|
||||
this.termAttr = addAttribute(CharTermAttribute.class);
|
||||
this.typeAttr = addAttribute(TypeAttribute.class);
|
||||
this.offsetAttr = addAttribute(OffsetAttribute.class);
|
||||
this.typeAttributeFeaturePath = typeAttributeFeaturePath;
|
||||
this.descriptorPath = descriptorPath;
|
||||
}
|
||||
|
||||
private void analyzeText() throws IOException, ResourceInitializationException, AnalysisEngineProcessException,
|
||||
CASException {
|
||||
AnalysisEngine ae = AEProviderFactory.getInstance().getAEProvider("", descriptorPath).getAE();
|
||||
CAS cas = analyzeInput(ae);
|
||||
finalOffset = correctOffset(cas.getDocumentText().length());
|
||||
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
|
||||
iterator = cas.getAnnotationIndex(tokenType).iterator();
|
||||
featurePath = cas.createFeaturePath();
|
||||
featurePath.initialize(typeAttributeFeaturePath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (iterator == null) {
|
||||
try {
|
||||
analyzeText();
|
||||
} catch (Exception e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
if (iterator.hasNext()) {
|
||||
clearAttributes();
|
||||
AnnotationFS next = iterator.next();
|
||||
termAttr.append(next.getCoveredText());
|
||||
offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
|
||||
typeAttr.setType(featurePath.getValueAsString(next));
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() throws IOException {
|
||||
if (offsetAttr.endOffset() < finalOffset)
|
||||
offsetAttr.setOffset(finalOffset, finalOffset);
|
||||
super.end();
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
|
||||
/**
|
||||
* provide an Apache UIMA {@link AnalysisEngine}
|
||||
*
|
||||
*/
|
||||
public interface AEProvider {
|
||||
|
||||
/**
|
||||
*
|
||||
* @return
|
||||
* @throws ResourceInitializationException
|
||||
*/
|
||||
public AnalysisEngine getAE() throws ResourceInitializationException;
|
||||
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Singleton factory class responsible of {@link AEProvider}s' creation
|
||||
*
|
||||
*/
|
||||
public class AEProviderFactory {
|
||||
|
||||
private static AEProviderFactory instance;
|
||||
|
||||
private final Map<String, AEProvider> providerCache = new HashMap<String, AEProvider>();
|
||||
|
||||
private AEProviderFactory() {
|
||||
// Singleton
|
||||
}
|
||||
|
||||
public static AEProviderFactory getInstance() {
|
||||
if (instance == null) {
|
||||
instance = new AEProviderFactory();
|
||||
}
|
||||
return instance;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param keyPrefix
|
||||
* @param aePath
|
||||
* @return
|
||||
*/
|
||||
public synchronized AEProvider getAEProvider(String keyPrefix, String aePath) {
|
||||
String key = new StringBuilder(keyPrefix).append(aePath).append(BasicAEProvider.class).toString();
|
||||
if (providerCache.get(key) == null) {
|
||||
providerCache.put(key, new BasicAEProvider(aePath));
|
||||
}
|
||||
return providerCache.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param keyPrefix
|
||||
* @param aePath
|
||||
* @param runtimeParameters
|
||||
* @return
|
||||
*/
|
||||
public synchronized AEProvider getAEProvider(String keyPrefix, String aePath,
|
||||
Map<String, Object> runtimeParameters) {
|
||||
String key = new StringBuilder(keyPrefix).append(aePath).append(OverridingParamsAEProvider.class).toString();
|
||||
if (providerCache.get(key) == null) {
|
||||
providerCache.put(key, new OverridingParamsAEProvider(aePath, runtimeParameters));
|
||||
}
|
||||
return providerCache.get(key);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.uima.UIMAFramework;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
import org.apache.uima.util.XMLInputSource;
|
||||
|
||||
/**
|
||||
* Basic {@link AEProvider} which just instantiates a UIMA {@link AnalysisEngine} with no additional metadata,
|
||||
* parameters or resources
|
||||
*/
|
||||
public class BasicAEProvider implements AEProvider {
|
||||
|
||||
private final String aePath;
|
||||
private AnalysisEngine cachedAE;
|
||||
|
||||
public BasicAEProvider(String aePath) {
|
||||
this.aePath = aePath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized AnalysisEngine getAE() throws ResourceInitializationException {
|
||||
try {
|
||||
if (cachedAE == null) {
|
||||
// get Resource Specifier from XML file
|
||||
|
||||
XMLInputSource in;
|
||||
try {
|
||||
in = new XMLInputSource(aePath);
|
||||
} catch (Exception e) {
|
||||
in = new XMLInputSource(getClass().getResource(aePath));
|
||||
}
|
||||
|
||||
// get AE description
|
||||
AnalysisEngineDescription desc = UIMAFramework.getXMLParser()
|
||||
.parseAnalysisEngineDescription(in);
|
||||
|
||||
// create AE here
|
||||
cachedAE = UIMAFramework.produceAnalysisEngine(desc);
|
||||
} else {
|
||||
cachedAE.reconfigure();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
cachedAE = null;
|
||||
throw new ResourceInitializationException(e);
|
||||
}
|
||||
return cachedAE;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.uima.UIMAFramework;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
import org.apache.uima.util.XMLInputSource;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* {@link AEProvider} implementation that creates an Aggregate AE from the given path, also
|
||||
* injecting runtime parameters defined in the solrconfig.xml Solr configuration file and assigning
|
||||
* them as overriding parameters in the aggregate AE
|
||||
*/
|
||||
public class OverridingParamsAEProvider implements AEProvider {
|
||||
|
||||
private final String aePath;
|
||||
|
||||
private AnalysisEngine cachedAE;
|
||||
|
||||
private final Map<String, Object> runtimeParameters;
|
||||
|
||||
public OverridingParamsAEProvider(String aePath, Map<String, Object> runtimeParameters) {
|
||||
this.aePath = aePath;
|
||||
this.runtimeParameters = runtimeParameters;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized AnalysisEngine getAE() throws ResourceInitializationException {
|
||||
try {
|
||||
if (cachedAE == null) {
|
||||
// get Resource Specifier from XML file
|
||||
XMLInputSource in;
|
||||
try {
|
||||
in = new XMLInputSource(aePath);
|
||||
} catch (Exception e) {
|
||||
in = new XMLInputSource(getClass().getResource(aePath));
|
||||
}
|
||||
|
||||
// get AE description
|
||||
AnalysisEngineDescription desc = UIMAFramework.getXMLParser()
|
||||
.parseAnalysisEngineDescription(in);
|
||||
|
||||
/* iterate over each AE (to set runtime parameters) */
|
||||
for (String attributeName : runtimeParameters.keySet()) {
|
||||
Object val = getRuntimeValue(desc, attributeName);
|
||||
desc.getAnalysisEngineMetaData().getConfigurationParameterSettings().setParameterValue(
|
||||
attributeName, val);
|
||||
}
|
||||
// create AE here
|
||||
cachedAE = UIMAFramework.produceAnalysisEngine(desc);
|
||||
} else {
|
||||
cachedAE.reconfigure();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
cachedAE = null;
|
||||
throw new ResourceInitializationException(e);
|
||||
}
|
||||
return cachedAE;
|
||||
}
|
||||
|
||||
/* create the value to inject in the runtime parameter depending on its declared type */
|
||||
private Object getRuntimeValue(AnalysisEngineDescription desc, String attributeName) {
|
||||
String type = desc.getAnalysisEngineMetaData().getConfigurationParameterDeclarations().
|
||||
getConfigurationParameter(null, attributeName).getType();
|
||||
// TODO : do it via reflection ? i.e. Class paramType = Class.forName(type)...
|
||||
Object val = null;
|
||||
Object runtimeValue = runtimeParameters.get(attributeName);
|
||||
if (runtimeValue != null) {
|
||||
if ("String".equals(type)) {
|
||||
val = String.valueOf(runtimeValue);
|
||||
} else if ("Integer".equals(type)) {
|
||||
val = Integer.valueOf(runtimeValue.toString());
|
||||
} else if ("Boolean".equals(type)) {
|
||||
val = Boolean.valueOf(runtimeValue.toString());
|
||||
} else if ("Float".equals(type)) {
|
||||
val = Float.valueOf(runtimeValue.toString());
|
||||
}
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>false</primitive>
|
||||
<delegateAnalysisEngineSpecifiers>
|
||||
<delegateAnalysisEngine key="WhitespaceTokenizer">
|
||||
<import name="WhitespaceTokenizer"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="HmmTagger">
|
||||
<import name="HmmTagger"/>
|
||||
</delegateAnalysisEngine>
|
||||
</delegateAnalysisEngineSpecifiers>
|
||||
<analysisEngineMetaData>
|
||||
<name>AggregateSentenceAE</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters>
|
||||
<configurationParameter>
|
||||
<name>ngramsize</name>
|
||||
<type>Integer</type>
|
||||
<multiValued>false</multiValued>
|
||||
<mandatory>false</mandatory>
|
||||
<overrides>
|
||||
<parameter>HmmTagger/NGRAM_SIZE</parameter>
|
||||
</overrides>
|
||||
</configurationParameter>
|
||||
</configurationParameters>
|
||||
<configurationParameterSettings/>
|
||||
<flowConstraints>
|
||||
<fixedFlow>
|
||||
<node>WhitespaceTokenizer</node>
|
||||
<node>HmmTagger</node>
|
||||
</fixedFlow>
|
||||
</flowConstraints>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.uima.SentenceAnnotation</type>
|
||||
<type allAnnotatorFeatures="true">org.apache.uima.TokenAnnotation</type>
|
||||
</outputs>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -0,0 +1,59 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>false</primitive>
|
||||
<delegateAnalysisEngineSpecifiers>
|
||||
<delegateAnalysisEngine key="WhitespaceTokenizer">
|
||||
<import name="WhitespaceTokenizer"/>
|
||||
</delegateAnalysisEngine>
|
||||
<delegateAnalysisEngine key="DummyPoSTagger">
|
||||
<import location="DummyPoSTagger.xml"/>
|
||||
</delegateAnalysisEngine>
|
||||
</delegateAnalysisEngineSpecifiers>
|
||||
<analysisEngineMetaData>
|
||||
<name>AggregateSentenceAE</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameterSettings/>
|
||||
<flowConstraints>
|
||||
<fixedFlow>
|
||||
<node>WhitespaceTokenizer</node>
|
||||
<node>DummyPoSTagger</node>
|
||||
</fixedFlow>
|
||||
</flowConstraints>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.uima.SentenceAnnotation</type>
|
||||
<type allAnnotatorFeatures="true">org.apache.uima.TokenAnnotation</type>
|
||||
</outputs>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -0,0 +1,68 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.DummyEntityAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>DummyPoSTagger</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor>ASF</vendor>
|
||||
<configurationParameters/>
|
||||
<configurationParameterSettings/>
|
||||
<typeSystemDescription>
|
||||
<types>
|
||||
<typeDescription>
|
||||
<name>org.apache.solr.uima.ts.EntityAnnotation</name>
|
||||
<description/>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>name</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>entity</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
<typePriorities/>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.solr.uima.ts.EntityAnnotation</type>
|
||||
</outputs>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -0,0 +1,50 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.DummyPoSTagger</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>DummyPoSTagger</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor>ASF</vendor>
|
||||
<configurationParameters/>
|
||||
<configurationParameterSettings/>
|
||||
<typeSystemDescription/>
|
||||
<typePriorities/>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.uima.TokenAnnotation</type>
|
||||
</inputs>
|
||||
<outputs>
|
||||
<type allAnnotatorFeatures="true">org.apache.uima.TokenAnnotation</type>
|
||||
</outputs>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -0,0 +1,125 @@
|
|||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
/**
|
||||
* Testcase for {@link UIMABaseAnalyzer}
|
||||
*/
|
||||
public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase {
|
||||
|
||||
private UIMABaseAnalyzer analyzer;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation");
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void baseUIMAAnalyzerStreamTest() throws Exception {
|
||||
TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood"));
|
||||
assertTokenStreamContents(ts, new String[]{"the", "big", "brown", "fox", "jumped", "on", "the", "wood"});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void baseUIMAAnalyzerIntegrationTest() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
|
||||
// add the first doc
|
||||
Document doc = new Document();
|
||||
String dummyTitle = "this is a dummy title ";
|
||||
doc.add(new Field("title", dummyTitle, TextField.TYPE_STORED));
|
||||
String dummyContent = "there is some content written here";
|
||||
doc.add(new Field("contents", dummyContent, TextField.TYPE_STORED));
|
||||
writer.addDocument(doc, analyzer);
|
||||
writer.commit();
|
||||
|
||||
// try the search over the first doc
|
||||
DirectoryReader directoryReader = DirectoryReader.open(dir);
|
||||
IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
|
||||
TopDocs result = indexSearcher.search(new MatchAllDocsQuery(), 10);
|
||||
assertTrue(result.totalHits > 0);
|
||||
Document d = indexSearcher.doc(result.scoreDocs[0].doc);
|
||||
assertNotNull(d);
|
||||
assertNotNull(d.getField("title"));
|
||||
assertEquals(dummyTitle, d.getField("title").stringValue());
|
||||
assertNotNull(d.getField("contents"));
|
||||
assertEquals(dummyContent, d.getField("contents").stringValue());
|
||||
|
||||
// add a second doc
|
||||
doc = new Document();
|
||||
String dogmasTitle = "dogmas";
|
||||
doc.add(new Field("title", dogmasTitle, TextField.TYPE_STORED));
|
||||
String dogmasContents = "white men can't jump";
|
||||
doc.add(new Field("contents", dogmasContents, TextField.TYPE_STORED));
|
||||
writer.addDocument(doc, analyzer);
|
||||
writer.commit();
|
||||
|
||||
directoryReader.close();
|
||||
directoryReader = DirectoryReader.open(dir);
|
||||
indexSearcher = new IndexSearcher(directoryReader);
|
||||
result = indexSearcher.search(new MatchAllDocsQuery(), 10);
|
||||
Document d1 = indexSearcher.doc(result.scoreDocs[1].doc);
|
||||
assertNotNull(d1);
|
||||
assertNotNull(d1.getField("title"));
|
||||
assertEquals(dogmasTitle, d1.getField("title").stringValue());
|
||||
assertNotNull(d1.getField("contents"));
|
||||
assertEquals(dogmasContents, d1.getField("contents").stringValue());
|
||||
|
||||
// do a matchalldocs query to retrieve both docs
|
||||
indexSearcher = new IndexSearcher(directoryReader);
|
||||
result = indexSearcher.search(new MatchAllDocsQuery(), 10);
|
||||
assertEquals(2, result.totalHits);
|
||||
writer.close();
|
||||
indexSearcher.getIndexReader().close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation"),
|
||||
1000 * RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
package org.apache.lucene.analysis.uima;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
/**
|
||||
* Testcase for {@link UIMATypeAwareAnalyzer}
|
||||
*/
|
||||
public class UIMATypeAwareAnalyzerTest extends BaseTokenStreamTestCase {
|
||||
|
||||
private UIMATypeAwareAnalyzer analyzer;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new UIMATypeAwareAnalyzer("/uima/AggregateSentenceAE.xml",
|
||||
"org.apache.uima.TokenAnnotation", "posTag");
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void baseUIMATypeAwareAnalyzerStreamTest() throws Exception {
|
||||
|
||||
// create a token stream
|
||||
TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood"));
|
||||
|
||||
// check that 'the big brown fox jumped on the wood' tokens have the expected PoS types
|
||||
assertTokenStreamContents(ts,
|
||||
new String[]{"the", "big", "brown", "fox", "jumped", "on", "the", "wood"},
|
||||
new String[]{"at", "jj", "jj", "nn", "vbd", "in", "at", "nn"});
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new UIMATypeAwareAnalyzer("/uima/AggregateDummySentenceAE.xml",
|
||||
"org.apache.uima.TokenAnnotation", "tokenType"), 1000 * RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
|
||||
/**
|
||||
* TestCase for {@link BasicAEProvider}
|
||||
*/
|
||||
public class BasicAEProviderTest {
|
||||
|
||||
@Test
|
||||
public void testBasicInititalization() throws Exception {
|
||||
AEProvider basicAEProvider = new BasicAEProvider("/uima/DummyEntityAE.xml");
|
||||
AnalysisEngine analysisEngine = basicAEProvider.getAE();
|
||||
assertNotNull(analysisEngine);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
package org.apache.lucene.analysis.uima.ae;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
/**
|
||||
* TestCase for {@link OverridingParamsAEProvider}
|
||||
*/
|
||||
public class OverridingParamsAEProviderTest {
|
||||
|
||||
@Test
|
||||
public void testNullMapInitialization() throws Exception {
|
||||
try {
|
||||
AEProvider aeProvider = new OverridingParamsAEProvider("/uima/DummyEntityAE.xml", null);
|
||||
aeProvider.getAE();
|
||||
fail("should fail due to null Map passed");
|
||||
} catch (ResourceInitializationException e) {
|
||||
// everything ok
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyMapInitialization() throws Exception {
|
||||
AEProvider aeProvider = new OverridingParamsAEProvider("/uima/DummyEntityAE.xml", new HashMap<String, Object>());
|
||||
AnalysisEngine analysisEngine = aeProvider.getAE();
|
||||
assertNotNull(analysisEngine);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOverridingParamsInitialization() throws Exception {
|
||||
Map<String, Object> runtimeParameters = new HashMap<String, Object>();
|
||||
runtimeParameters.put("ngramsize", "3");
|
||||
AEProvider aeProvider = new OverridingParamsAEProvider("/uima/AggregateSentenceAE.xml", runtimeParameters);
|
||||
AnalysisEngine analysisEngine = aeProvider.getAE();
|
||||
assertNotNull(analysisEngine);
|
||||
assertEquals(analysisEngine.getConfigParameterValue("ngramsize"), 3);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
package org.apache.lucene.analysis.uima.an;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.uima.TokenAnnotation;
|
||||
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.cas.Feature;
|
||||
import org.apache.uima.cas.Type;
|
||||
import org.apache.uima.cas.text.AnnotationFS;
|
||||
import org.apache.uima.jcas.JCas;
|
||||
import org.apache.uima.jcas.tcas.Annotation;
|
||||
|
||||
public class DummyEntityAnnotator extends JCasAnnotator_ImplBase {
|
||||
|
||||
private static final String NP = "np";
|
||||
private static final String NPS = "nps";
|
||||
private static final String TYPE_NAME = "org.apache.lucene.analysis.uima.ts.EntityAnnotation";
|
||||
private static final String ENTITY_FEATURE = "entity";
|
||||
private static final String NAME_FEATURE = "entity";
|
||||
|
||||
@Override
|
||||
public void process(JCas jcas) throws AnalysisEngineProcessException {
|
||||
Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
|
||||
Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE);
|
||||
Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE);
|
||||
|
||||
for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
|
||||
String tokenPOS = ((TokenAnnotation) annotation).getPosTag();
|
||||
|
||||
if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) {
|
||||
AnnotationFS entityAnnotation = jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd());
|
||||
|
||||
entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText());
|
||||
|
||||
String name = "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
|
||||
if (annotation.getCoveredText().equals("Apache"))
|
||||
name = "ORGANIZATION";
|
||||
entityAnnotation.setStringValue(nameFeature, name);
|
||||
|
||||
jcas.addFsToIndexes(entityAnnotation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
package org.apache.lucene.analysis.uima.an;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.uima.TokenAnnotation;
|
||||
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.cas.Feature;
|
||||
import org.apache.uima.cas.Type;
|
||||
import org.apache.uima.jcas.JCas;
|
||||
import org.apache.uima.jcas.tcas.Annotation;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class DummyPoSTagger extends JCasAnnotator_ImplBase {
|
||||
|
||||
private static final String NUM = "NUM";
|
||||
private static final String WORD = "WORD";
|
||||
private static final String TYPE_NAME = "org.apache.uima.TokenAnnotation";
|
||||
private static final String FEATURE_NAME = "tokenType";
|
||||
|
||||
@Override
|
||||
public void process(JCas jcas) throws AnalysisEngineProcessException {
|
||||
Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
|
||||
Feature posFeature = type.getFeatureByBaseName(FEATURE_NAME);
|
||||
|
||||
for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
|
||||
String text = annotation.getCoveredText();
|
||||
String pos = extractPoS(text);
|
||||
annotation.setStringValue(posFeature, pos);
|
||||
}
|
||||
}
|
||||
|
||||
private String extractPoS(String text) {
|
||||
try {
|
||||
Double.valueOf(text);
|
||||
return NUM;
|
||||
} catch (Exception e) {
|
||||
return WORD;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue