From 0c5c13e157f3ed41b526edbbb5e1d826b0a7864b Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Wed, 29 Feb 2012 22:43:12 +0000 Subject: [PATCH] [SOLR-3013] - removing the ae package from Solr as it's now under analysis/uima module, adding the Solr factories for UIMA based tokenizers git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1295330 13f79535-47bb-0310-9956-ffa450edef68 --- solr/contrib/uima/CHANGES.txt | 7 +- solr/contrib/uima/build.xml | 14 + .../UIMAAnnotationsTokenizerFactory.java} | 32 +- ...ATypeAwareAnnotationsTokenizerFactory.java | 48 + .../processor/UIMAUpdateRequestProcessor.java | 4 +- .../uima/processor/ae/AEProviderFactory.java | 53 - .../ae/OverridingParamsAEProvider.java | 117 -- .../uima/src/test-files/uima/stoptypes.txt | 25 + .../uima/uima-tokenizers-schema.xml | 680 +++++++++++ .../uima/uima-tokenizers-solrconfig.xml | 1006 +++++++++++++++++ .../UIMAAnnotationsTokenizerFactoryTest.java | 49 + ...eAwareAnnotationsTokenizerFactoryTest.java | 58 + 12 files changed, 1910 insertions(+), 183 deletions(-) rename solr/contrib/uima/src/java/org/apache/solr/uima/{processor/ae/AEProvider.java => analysis/UIMAAnnotationsTokenizerFactory.java} (50%) create mode 100644 solr/contrib/uima/src/java/org/apache/solr/uima/analysis/UIMATypeAwareAnnotationsTokenizerFactory.java delete mode 100644 solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java delete mode 100644 solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java create mode 100644 solr/contrib/uima/src/test-files/uima/stoptypes.txt create mode 100644 solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml create mode 100644 solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml create mode 100644 solr/contrib/uima/src/test/org/apache/solr/uima/analysis/UIMAAnnotationsTokenizerFactoryTest.java create mode 100644 solr/contrib/uima/src/test/org/apache/solr/uima/analysis/UIMATypeAwareAnnotationsTokenizerFactoryTest.java diff --git a/solr/contrib/uima/CHANGES.txt b/solr/contrib/uima/CHANGES.txt index 3a790706dc3..c43b24cd840 100644 --- a/solr/contrib/uima/CHANGES.txt +++ b/solr/contrib/uima/CHANGES.txt @@ -5,9 +5,12 @@ This file describes changes to the Solr UIMA (contrib/uima) module. See SOLR-212 Introduction ------------ -This module is intended to be used while indexing documents. -Its purpose is to provide additional on the fly automatically generated fields to the Solr index. +This module is intended to be used both as an UpdateRequestProcessor while indexing documents and as a set of tokenizer/filters +to be configured inside the schema.xml for use during analysis phase. +UIMAUpdateRequestProcessor purpose is to provide additional on the fly automatically generated fields to the Solr index. Such fields could be language, concepts, keywords, sentences, named entities, etc. +UIMA based tokenizers/filters can be used either inside plain Lucene or as index/query analyzers to be defined +inside the schema.xml of a Solr core to create/filter tokens using specific UIMA annotations. UIMA Dependency --------------- diff --git a/solr/contrib/uima/build.xml b/solr/contrib/uima/build.xml index 2e047e70f05..12487595c4f 100644 --- a/solr/contrib/uima/build.xml +++ b/solr/contrib/uima/build.xml @@ -25,4 +25,18 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/AEProvider.java b/solr/contrib/uima/src/java/org/apache/solr/uima/analysis/UIMAAnnotationsTokenizerFactory.java similarity index 50% rename from solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/AEProvider.java rename to solr/contrib/uima/src/java/org/apache/solr/uima/analysis/UIMAAnnotationsTokenizerFactory.java index 2f6ac479eed..2d237769766 100644 --- a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/AEProvider.java +++ b/solr/contrib/uima/src/java/org/apache/solr/uima/analysis/UIMAAnnotationsTokenizerFactory.java @@ -1,6 +1,6 @@ -package org.apache.solr.uima.processor.ae; +package org.apache.solr.uima.analysis; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,16 +17,30 @@ package org.apache.solr.uima.processor.ae; * limitations under the License. */ -import org.apache.uima.analysis_engine.AnalysisEngine; -import org.apache.uima.resource.ResourceInitializationException; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.uima.UIMAAnnotationsTokenizer; +import org.apache.solr.analysis.BaseTokenizerFactory; + +import java.io.Reader; +import java.util.Map; /** - * provide an Apache UIMA {@link AnalysisEngine} - * - * + * Solr {@link org.apache.solr.analysis.TokenizerFactory} for {@link UIMAAnnotationsTokenizer} */ -public interface AEProvider { +public class UIMAAnnotationsTokenizerFactory extends BaseTokenizerFactory { - public AnalysisEngine getAE() throws ResourceInitializationException; + private String descriptorPath; + private String tokenType; + @Override + public void init(Map args) { + super.init(args); + descriptorPath = args.get("descriptorPath"); + tokenType = args.get("tokenType"); + } + + @Override + public Tokenizer create(Reader input) { + return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, input); + } } diff --git a/solr/contrib/uima/src/java/org/apache/solr/uima/analysis/UIMATypeAwareAnnotationsTokenizerFactory.java b/solr/contrib/uima/src/java/org/apache/solr/uima/analysis/UIMATypeAwareAnnotationsTokenizerFactory.java new file mode 100644 index 00000000000..9566699c1f0 --- /dev/null +++ b/solr/contrib/uima/src/java/org/apache/solr/uima/analysis/UIMATypeAwareAnnotationsTokenizerFactory.java @@ -0,0 +1,48 @@ +package org.apache.solr.uima.analysis; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.uima.UIMATypeAwareAnnotationsTokenizer; +import org.apache.solr.analysis.BaseTokenizerFactory; + +import java.io.Reader; +import java.util.Map; + +/** + * Solr {@link org.apache.solr.analysis.TokenizerFactory} for {@link UIMATypeAwareAnnotationsTokenizer} + */ +public class UIMATypeAwareAnnotationsTokenizerFactory extends BaseTokenizerFactory { + + private String descriptorPath; + private String tokenType; + private String featurePath; + + @Override + public void init(Map args) { + super.init(args); + descriptorPath = args.get("descriptorPath"); + tokenType = args.get("tokenType"); + featurePath = args.get("featurePath"); + } + + @Override + public Tokenizer create(Reader input) { + return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, input); + } +} diff --git a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java b/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java index f2cda4baecb..a8a623558a9 100644 --- a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java +++ b/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java @@ -23,8 +23,8 @@ import org.apache.solr.common.SolrInputDocument; import org.apache.solr.core.SolrCore; import org.apache.solr.schema.SchemaField; import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField; -import org.apache.solr.uima.processor.ae.AEProvider; -import org.apache.solr.uima.processor.ae.AEProviderFactory; +import org.apache.lucene.analysis.uima.ae.AEProvider; +import org.apache.lucene.analysis.uima.ae.AEProviderFactory; import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.processor.UpdateRequestProcessor; import org.apache.uima.analysis_engine.AnalysisEngine; diff --git a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java b/solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java deleted file mode 100644 index 2bd2417936e..00000000000 --- a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java +++ /dev/null @@ -1,53 +0,0 @@ -package org.apache.solr.uima.processor.ae; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.HashMap; -import java.util.Map; - -/** - * Singleton factory class responsible of {@link AEProvider}s' creation - * - * - */ -public class AEProviderFactory { - - private static AEProviderFactory instance; - - private Map providerCache = new HashMap(); - - private AEProviderFactory() { - // Singleton - } - - public static AEProviderFactory getInstance() { - if (instance == null) { - instance = new AEProviderFactory(); - } - return instance; - } - - public synchronized AEProvider getAEProvider(String core, String aePath, - Map runtimeParameters) { - String key = new StringBuilder(core).append(aePath).toString(); - if (providerCache.get(key) == null) { - providerCache.put(key, new OverridingParamsAEProvider(aePath, runtimeParameters)); - } - return providerCache.get(key); - } -} diff --git a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java b/solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java deleted file mode 100644 index f3af7c4435e..00000000000 --- a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java +++ /dev/null @@ -1,117 +0,0 @@ -package org.apache.solr.uima.processor.ae; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.net.URL; -import java.util.Map; - -import org.apache.uima.UIMAFramework; -import org.apache.uima.analysis_engine.AnalysisEngine; -import org.apache.uima.analysis_engine.AnalysisEngineDescription; -import org.apache.uima.resource.ResourceInitializationException; -import org.apache.uima.util.XMLInputSource; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * {@link AEProvider} implementation that creates an Aggregate AE from the given path, also - * injecting runtime parameters defined in the solrconfig.xml Solr configuration file and assigning - * them as overriding parameters in the aggregate AE - * - * - */ -public class OverridingParamsAEProvider implements AEProvider { - - private static Logger log = LoggerFactory.getLogger(OverridingParamsAEProvider.class); - - private String aeFilePath; - - private AnalysisEngine cachedAE; - - private Map runtimeParameters; - - public OverridingParamsAEProvider(String aeFilePath, Map runtimeParameters) { - this.aeFilePath = aeFilePath; - this.runtimeParameters = runtimeParameters; - } - - public synchronized AnalysisEngine getAE() throws ResourceInitializationException { - try { - if (cachedAE == null) { - // get Resource Specifier from XML file - URL url = this.getClass().getResource(aeFilePath); - XMLInputSource in = new XMLInputSource(url); - - // get AE description - AnalysisEngineDescription desc = UIMAFramework.getXMLParser() - .parseAnalysisEngineDescription(in); - - /* iterate over each AE (to set runtime parameters) */ - for (String attributeName : runtimeParameters.keySet()) { - Object val = getRuntimeValue(desc, attributeName); - desc.getAnalysisEngineMetaData().getConfigurationParameterSettings().setParameterValue( - attributeName, val); - if (log.isDebugEnabled()) - log.debug(new StringBuilder("setting ").append(attributeName).append(" : ").append( - runtimeParameters.get(attributeName)).toString()); - } - // create AE here - cachedAE = UIMAFramework.produceAnalysisEngine(desc); - if (log.isDebugEnabled()) - log.debug(new StringBuilder("AE ").append(cachedAE.getAnalysisEngineMetaData().getName()) - .append(" created from descriptor ").append(aeFilePath).toString()); - } else { - cachedAE.reconfigure(); - if (log.isDebugEnabled()) - log.debug(new StringBuilder("AE ").append(cachedAE.getAnalysisEngineMetaData().getName()) - .append(" at path ").append(aeFilePath).append(" reconfigured ").toString()); - } - } catch (Exception e) { - cachedAE = null; - throw new ResourceInitializationException(e); - } - return cachedAE; - } - - /* create the value to inject in the runtime parameter depending on its declared type */ - private Object getRuntimeValue(AnalysisEngineDescription desc, String attributeName) - throws ClassNotFoundException { - String type = desc.getAnalysisEngineMetaData().getConfigurationParameterDeclarations(). - getConfigurationParameter(null, attributeName).getType(); - // TODO : do it via reflection ? i.e. Class paramType = Class.forName(type)... - Object val = null; - Object runtimeValue = runtimeParameters.get(attributeName); - if (runtimeValue!=null) { - if ("String".equals(type)) { - val = String.valueOf(runtimeValue); - } - else if ("Integer".equals(type)) { - val = Integer.valueOf(runtimeValue.toString()); - } - else if ("Boolean".equals(type)) { - val = Boolean.valueOf(runtimeValue.toString()); - } - else if ("Float".equals(type)) { - val = Float.valueOf(runtimeValue.toString()); - } - } - - return val; - } - -} \ No newline at end of file diff --git a/solr/contrib/uima/src/test-files/uima/stoptypes.txt b/solr/contrib/uima/src/test-files/uima/stoptypes.txt new file mode 100644 index 00000000000..c0e0084060d --- /dev/null +++ b/solr/contrib/uima/src/test-files/uima/stoptypes.txt @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +vbg +vbz +vbd +vbn +vb +bez +cc +cd +at +. +: \ No newline at end of file diff --git a/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml b/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml new file mode 100644 index 00000000000..aa279cedea4 --- /dev/null +++ b/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml @@ -0,0 +1,680 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + text + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml b/solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml new file mode 100644 index 00000000000..470b49fdc9b --- /dev/null +++ b/solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml @@ -0,0 +1,1006 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + false + + 10 + + + + + 32 + + 10000 + 1000 + + + + + + + + + + + + + native + + + + + + + false + 32 + 10 + + + + + + + + false + + + true + + + + + + + + 1 + + 0 + + + + + false + + + + + + + + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + true + + + + + + + + 20 + + + 200 + + + + + + + + + + + + + + solr rocks + 0 + 10 + + + static firstSearcher warming query from + solrconfig.xml + + + + + + false + + + 2 + + + + + + + + + + + + + + + + + + + + + + + explicit + + + + + + + + + + + + + dismax + explicit + 0.01 + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 + manu^1.1 cat^1.4 + + + text^0.2 features^1.1 name^1.5 manu^1.4 + manu_exact^1.9 + + + popularity^0.5 recip(price,1,1000,1000)^0.3 + + + id,name,price,score + + + 2<-1 5<-2 6<90% + 100 + *:* + + text features name + + 0 + + name + regex + + + + + + + dismax + explicit + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 + 2<-1 5<-2 6<90% + + incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2 + + + + inStock:true + + + + cat + manu_exact + price:[* TO 500] + price:[500 TO *] + + + + + + + + + + textSpell + + + default + name + ./spellchecker + + + + + + + + + + + + false + + false + + 1 + + + spellcheck + + + + + + + + true + + + tvComponent + + + + + + + + + default + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + 20 + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + true + default + true + + name + id + + features + + true + + + + false + + + clusteringComponent + + + + + + + + text + true + ignored_ + + + true + links + ignored_ + + + + + + + + + + true + + + termsComponent + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + explicit + true + + + + + + + + + + + 5 + + + + + + + + + + * + + + + + diff --git a/solr/contrib/uima/src/test/org/apache/solr/uima/analysis/UIMAAnnotationsTokenizerFactoryTest.java b/solr/contrib/uima/src/test/org/apache/solr/uima/analysis/UIMAAnnotationsTokenizerFactoryTest.java new file mode 100644 index 00000000000..c380fc0398f --- /dev/null +++ b/solr/contrib/uima/src/test/org/apache/solr/uima/analysis/UIMAAnnotationsTokenizerFactoryTest.java @@ -0,0 +1,49 @@ +package org.apache.solr.uima.analysis; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.request.SolrQueryRequest; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + */ +public class UIMAAnnotationsTokenizerFactoryTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("uima/uima-tokenizers-solrconfig.xml", "uima/uima-tokenizers-schema.xml"); + } + + @Test + public void testInitialization() throws Exception { + assertNotNull(h.getCore().getSchema().getField("sentences")); + assertNotNull(h.getCore().getSchema().getFieldType("sentences")); + } + + @Test + public void testIndexAndQuery() throws Exception { + assertU("123One and 1 is two. Instead One or 1 is 0."); + assertU(commit()); + SolrQueryRequest req = req("qt", "/terms", "terms.fl", "sentences"); + assertQ(req, "//lst[@name='sentences']/int[@name='One and 1 is two.']"); + assertQ(req, "//lst[@name='sentences']/int[@name=' Instead One or 1 is 0.']"); + req.close(); + } +} diff --git a/solr/contrib/uima/src/test/org/apache/solr/uima/analysis/UIMATypeAwareAnnotationsTokenizerFactoryTest.java b/solr/contrib/uima/src/test/org/apache/solr/uima/analysis/UIMATypeAwareAnnotationsTokenizerFactoryTest.java new file mode 100644 index 00000000000..59a4aa8eeaf --- /dev/null +++ b/solr/contrib/uima/src/test/org/apache/solr/uima/analysis/UIMATypeAwareAnnotationsTokenizerFactoryTest.java @@ -0,0 +1,58 @@ +package org.apache.solr.uima.analysis; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.request.SolrQueryRequest; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + */ +public class UIMATypeAwareAnnotationsTokenizerFactoryTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("uima/uima-tokenizers-solrconfig.xml", "uima/uima-tokenizers-schema.xml"); + } + + @Test + public void testInitialization() throws Exception { + assertNotNull(h.getCore().getSchema().getField("nouns")); + assertNotNull(h.getCore().getSchema().getFieldType("nouns")); + } + + @Test + public void testIndexAndQuery() throws Exception { + assertU("123The counter counts the beans: 1 and 2 and three."); + assertU(commit()); + SolrQueryRequest req = req("qt", "/terms", "terms.fl", "nouns"); + assertQ(req, "//lst[@name='nouns']/int[@name='beans']"); + assertQ(req, "//lst[@name='nouns']/int[@name='counter']"); + assertQ(req, "//lst[@name='nouns']/int[@name!='The']"); + assertQ(req, "//lst[@name='nouns']/int[@name!='counts']"); + assertQ(req, "//lst[@name='nouns']/int[@name!='the']"); + assertQ(req, "//lst[@name='nouns']/int[@name!=':']"); + assertQ(req, "//lst[@name='nouns']/int[@name!='1']"); + assertQ(req, "//lst[@name='nouns']/int[@name!='and']"); + assertQ(req, "//lst[@name='nouns']/int[@name!='2']"); + assertQ(req, "//lst[@name='nouns']/int[@name!='three']"); + assertQ(req, "//lst[@name='nouns']/int[@name!='.']"); + req.close(); + } +}