diff --git a/modules/analysis/uima/src/test-files/uima/AggregateDummySentenceAE.xml b/modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml
similarity index 77%
rename from modules/analysis/uima/src/test-files/uima/AggregateDummySentenceAE.xml
rename to modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml
index 8769b189404..51a778b133f 100644
--- a/modules/analysis/uima/src/test-files/uima/AggregateDummySentenceAE.xml
+++ b/modules/analysis/uima/src/test-files/uima/TestAggregateSentenceAE.xml
@@ -20,33 +20,30 @@
false
-
+
-
-
+
+
- AggregateSentenceAE
+ TestAggregateSentenceAE
1.0
-
-
+ ASF
WhitespaceTokenizer
- DummyPoSTagger
+ PoSTagger
-
- org.apache.uima.SentenceAnnotation
- org.apache.uima.TokenAnnotation
+ org.apache.lucene.uima.ts.SentenceAnnotation
+ org.apache.lucene.uima.ts.TokenAnnotation
-
@@ -55,5 +52,4 @@
false
-
diff --git a/modules/analysis/uima/src/test-files/uima/DummyEntityAE.xml b/modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
similarity index 86%
rename from modules/analysis/uima/src/test-files/uima/DummyEntityAE.xml
rename to modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
index 8827562a569..d7ec826fd50 100644
--- a/modules/analysis/uima/src/test-files/uima/DummyEntityAE.xml
+++ b/modules/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
@@ -18,18 +18,16 @@
org.apache.uima.java
true
- org.apache.lucene.analysis.uima.an.DummyEntityAnnotator
+ org.apache.lucene.analysis.uima.an.SampleEntityAnnotator
DummyPoSTagger
1.0
ASF
-
-
- org.apache.solr.uima.ts.EntityAnnotation
+ org.apache.lucene.uima.ts.EntityAnnotation
uima.tcas.Annotation
@@ -47,13 +45,13 @@
-
-
-
+
+ org.apache.lucene.uima.ts.TokenAnnotation
+
- org.apache.solr.uima.ts.EntityAnnotation
+ org.apache.lucene.uima.ts.EntityAnnotation
diff --git a/modules/analysis/uima/src/test-files/uima/DummyPoSTagger.xml b/modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml
similarity index 80%
rename from modules/analysis/uima/src/test-files/uima/DummyPoSTagger.xml
rename to modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml
index 7677502b959..eede632eadf 100644
--- a/modules/analysis/uima/src/test-files/uima/DummyPoSTagger.xml
+++ b/modules/analysis/uima/src/test-files/uima/TestPoSTaggerAE.xml
@@ -18,24 +18,19 @@
org.apache.uima.java
true
- org.apache.lucene.analysis.uima.an.DummyPoSTagger
+ org.apache.lucene.analysis.uima.an.SamplePoSTagger
DummyPoSTagger
1.0
ASF
-
-
-
-
-
- org.apache.uima.TokenAnnotation
+ org.apache.lucene.uima.ts.TokenAnnotation
- org.apache.uima.TokenAnnotation
+ org.apache.lucene.uima.ts.TokenAnnotation
@@ -46,5 +41,4 @@
false
-
diff --git a/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml b/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
new file mode 100644
index 00000000000..596a830eb21
--- /dev/null
+++ b/modules/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
@@ -0,0 +1,59 @@
+
+
+
+ org.apache.uima.java
+ true
+ org.apache.lucene.analysis.uima.an.SampleWSTokenizerAnnotator
+
+ DummyPoSTagger
+ 1.0
+ ASF
+
+
+
+ org.apache.lucene.uima.ts.TokenAnnotation
+ uima.tcas.Annotation
+
+
+ pos
+ uima.cas.String
+
+
+
+
+ org.apache.lucene.uima.ts.SentenceAnnotation
+ uima.tcas.Annotation
+
+
+
+
+
+
+
+ org.apache.lucene.uima.ts.TokenAnnotation
+ org.apache.lucene.uima.ts.SentenceAnnotation
+
+
+
+
+ true
+ true
+ false
+
+
+
diff --git a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
index 7b828527201..0fee64a0823 100644
--- a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
+++ b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
@@ -118,7 +118,7 @@ public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase {
@Test
public void testRandomStrings() throws Exception {
- checkRandomData(random, new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation"),
+ checkRandomData(random, new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation"),
1000 * RANDOM_MULTIPLIER);
}
diff --git a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
index e7b4de9ea67..85035cc5d77 100644
--- a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
+++ b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
@@ -60,8 +60,8 @@ public class UIMATypeAwareAnalyzerTest extends BaseTokenStreamTestCase {
@Test
public void testRandomStrings() throws Exception {
- checkRandomData(random, new UIMATypeAwareAnalyzer("/uima/AggregateDummySentenceAE.xml",
- "org.apache.uima.TokenAnnotation", "tokenType"), 1000 * RANDOM_MULTIPLIER);
+ checkRandomData(random, new UIMATypeAwareAnalyzer("/uima/TestAggregateSentenceAE.xml",
+ "org.apache.lucene.uima.ts.TokenAnnotation", "pos"), 1000 * RANDOM_MULTIPLIER);
}
}
diff --git a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java
index 08735d9d713..3fe46addd8a 100644
--- a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java
+++ b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/BasicAEProviderTest.java
@@ -28,8 +28,8 @@ import static org.junit.Assert.assertNotNull;
public class BasicAEProviderTest {
@Test
- public void testBasicInititalization() throws Exception {
- AEProvider basicAEProvider = new BasicAEProvider("/uima/DummyEntityAE.xml");
+ public void testBasicInitialization() throws Exception {
+ AEProvider basicAEProvider = new BasicAEProvider("/uima/TestEntityAnnotatorAE.xml");
AnalysisEngine analysisEngine = basicAEProvider.getAE();
assertNotNull(analysisEngine);
}
diff --git a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java
index f8325fe5968..c3419253a2a 100644
--- a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java
+++ b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/ae/OverridingParamsAEProviderTest.java
@@ -34,7 +34,7 @@ public class OverridingParamsAEProviderTest {
@Test
public void testNullMapInitialization() throws Exception {
try {
- AEProvider aeProvider = new OverridingParamsAEProvider("/uima/DummyEntityAE.xml", null);
+ AEProvider aeProvider = new OverridingParamsAEProvider("/uima/TestEntityAnnotatorAE.xml", null);
aeProvider.getAE();
fail("should fail due to null Map passed");
} catch (ResourceInitializationException e) {
@@ -44,7 +44,7 @@ public class OverridingParamsAEProviderTest {
@Test
public void testEmptyMapInitialization() throws Exception {
- AEProvider aeProvider = new OverridingParamsAEProvider("/uima/DummyEntityAE.xml", new HashMap());
+ AEProvider aeProvider = new OverridingParamsAEProvider("/uima/TestEntityAnnotatorAE.xml", new HashMap());
AnalysisEngine analysisEngine = aeProvider.getAE();
assertNotNull(analysisEngine);
}
diff --git a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/DummyEntityAnnotator.java b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java
similarity index 93%
rename from modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/DummyEntityAnnotator.java
rename to modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java
index bd6cc9c9e67..319380b0f0d 100644
--- a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/DummyEntityAnnotator.java
+++ b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleEntityAnnotator.java
@@ -26,7 +26,10 @@ import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
-public class DummyEntityAnnotator extends JCasAnnotator_ImplBase {
+/**
+ * Dummy implementation of an entity annotator to tag tokens as certain types of entities
+ */
+public class SampleEntityAnnotator extends JCasAnnotator_ImplBase {
private static final String NP = "np";
private static final String NPS = "nps";
diff --git a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/DummyPoSTagger.java b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java
similarity index 83%
rename from modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/DummyPoSTagger.java
rename to modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java
index a120bce3997..6ff1468dc98 100644
--- a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/DummyPoSTagger.java
+++ b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SamplePoSTagger.java
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis.uima.an;
* limitations under the License.
*/
-import org.apache.uima.TokenAnnotation;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.Feature;
@@ -26,20 +25,21 @@ import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
/**
+ * Dummy implementation of a PoS tagger to add part of speech as token types
*/
-public class DummyPoSTagger extends JCasAnnotator_ImplBase {
+public class SamplePoSTagger extends JCasAnnotator_ImplBase {
private static final String NUM = "NUM";
private static final String WORD = "WORD";
- private static final String TYPE_NAME = "org.apache.uima.TokenAnnotation";
- private static final String FEATURE_NAME = "tokenType";
+ private static final String TYPE_NAME = "org.apache.lucene.uima.ts.TokenAnnotation";
+ private static final String FEATURE_NAME = "pos";
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
Feature posFeature = type.getFeatureByBaseName(FEATURE_NAME);
- for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
+ for (Annotation annotation : jcas.getAnnotationIndex(type)) {
String text = annotation.getCoveredText();
String pos = extractPoS(text);
annotation.setStringValue(posFeature, pos);
diff --git a/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
new file mode 100644
index 00000000000..b33666b0321
--- /dev/null
+++ b/modules/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
@@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.uima.an;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.jcas.JCas;
+
+/**
+ * Dummy implementation of a UIMA based whitespace tokenizer
+ */
+public class SampleWSTokenizerAnnotator extends JCasAnnotator_ImplBase {
+
+ private final static String TOKEN_TYPE = "org.apache.lucene.uima.ts.TokenAnnotation";
+ private final static String SENTENCE_TYPE = "org.apache.lucene.uima.ts.SentenceAnnotation";
+ private static final String CR = "\n";
+ private static final String WHITESPACE = " ";
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ Type sentenceType = jCas.getCas().getTypeSystem().getType(SENTENCE_TYPE);
+ Type tokenType = jCas.getCas().getTypeSystem().getType(TOKEN_TYPE);
+ int i = 0;
+ for (String sentenceString : jCas.getDocumentText().split(CR)) {
+ // add the sentence
+ AnnotationFS sentenceAnnotation = jCas.getCas().createAnnotation(sentenceType, i, sentenceString.length());
+ jCas.addFsToIndexes(sentenceAnnotation);
+ i += sentenceString.length();
+ }
+
+ // get tokens
+ int j = 0;
+ for (String tokenString : jCas.getDocumentText().split(WHITESPACE)) {
+ int tokenLength = tokenString.length();
+ AnnotationFS tokenAnnotation = jCas.getCas().createAnnotation(tokenType, j, j + tokenLength);
+ jCas.addFsToIndexes(tokenAnnotation);
+ j += tokenLength;
+ }
+ }
+
+}