[SOLR-3013] - removing the ae package from Solr as it's now under analysis/uima module, adding the Solr factories for UIMA based tokenizers

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1295330 13f79535-47bb-0310-9956-ffa450edef68
2012-02-29 22:43:12 +00:00 · 2012-02-29 22:43:12 +00:00 · 0c5c13e157
parent 651a236696
commit 0c5c13e157
12 changed files with 1910 additions and 183 deletions
--- a/solr/contrib/uima/CHANGES.txt
+++ b/solr/contrib/uima/CHANGES.txt
@ -5,9 +5,12 @@ This file describes changes to the Solr UIMA (contrib/uima) module. See SOLR-212

 Introduction
 ------------
-This module is intended to be used while indexing documents.
-Its purpose is to provide additional on the fly automatically generated fields to the Solr index.
+This module is intended to be used both as an UpdateRequestProcessor while indexing documents and as a set of tokenizer/filters
+to be configured inside the schema.xml for use during analysis phase.
+UIMAUpdateRequestProcessor purpose is to provide additional on the fly automatically generated fields to the Solr index.
 Such fields could be language, concepts, keywords, sentences, named entities, etc.
+UIMA based tokenizers/filters can be used either inside plain Lucene or as index/query analyzers to be defined
+inside the schema.xml of a Solr core to create/filter tokens using specific UIMA annotations.

 UIMA Dependency
 ---------------
--- a/solr/contrib/uima/build.xml
+++ b/solr/contrib/uima/build.xml
@ -25,4 +25,18 @@

  <import file="../contrib-build.xml"/>

+  <path id="classpath">
+    <pathelement path="${analyzers-uima.jar}"/>
+    <path refid="solr.base.classpath"/>
+  </path>
+
+  <target name="module-jars-to-solr" depends="jar-analyzers-uima">
+    <mkdir dir="${build.dir}/lucene-libs"/>
+    <copy todir="${build.dir}/lucene-libs" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
+      <fileset file="${analyzers-uima.jar}"/>
+    </copy>
+  </target>
+
+  <target name="compile-core" depends="jar-analyzers-uima, solr-contrib-build.compile-core"/>
+  <target name="dist" depends="module-jars-to-solr, common-solr.dist"/>
 </project>
--- a/solr/contrib/uima/src/java/org/apache/solr/uima/analysis/UIMAAnnotationsTokenizerFactory.java
+++ b/solr/contrib/uima/src/java/org/apache/solr/uima/analysis/UIMAAnnotationsTokenizerFactory.java
@ -1,6 +1,6 @@
-package org.apache.solr.uima.processor.ae;
+package org.apache.solr.uima.analysis;

-/**
+/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
@ -17,16 +17,30 @@ package org.apache.solr.uima.processor.ae;
 * limitations under the License.
 */

-import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.uima.UIMAAnnotationsTokenizer;
+import org.apache.solr.analysis.BaseTokenizerFactory;
+
+import java.io.Reader;
+import java.util.Map;

 /**
- * provide an Apache UIMA {@link AnalysisEngine}
- * 
- *
+ * Solr {@link org.apache.solr.analysis.TokenizerFactory} for {@link UIMAAnnotationsTokenizer}
 */
-public interface AEProvider {
+public class UIMAAnnotationsTokenizerFactory extends BaseTokenizerFactory {

-  public AnalysisEngine getAE() throws ResourceInitializationException;
+  private String descriptorPath;
+  private String tokenType;

+  @Override
+  public void init(Map<String, String> args) {
+    super.init(args);
+    descriptorPath = args.get("descriptorPath");
+    tokenType = args.get("tokenType");
+  }
+
+  @Override
+  public Tokenizer create(Reader input) {
+    return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, input);
+  }
 }
--- a/solr/contrib/uima/src/java/org/apache/solr/uima/analysis/UIMATypeAwareAnnotationsTokenizerFactory.java
+++ b/solr/contrib/uima/src/java/org/apache/solr/uima/analysis/UIMATypeAwareAnnotationsTokenizerFactory.java
@ -0,0 +1,48 @@
+package org.apache.solr.uima.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.uima.UIMATypeAwareAnnotationsTokenizer;
+import org.apache.solr.analysis.BaseTokenizerFactory;
+
+import java.io.Reader;
+import java.util.Map;
+
+/**
+ * Solr {@link org.apache.solr.analysis.TokenizerFactory} for {@link UIMATypeAwareAnnotationsTokenizer}
+ */
+public class UIMATypeAwareAnnotationsTokenizerFactory extends BaseTokenizerFactory {
+
+  private String descriptorPath;
+  private String tokenType;
+  private String featurePath;
+
+  @Override
+  public void init(Map<String, String> args) {
+    super.init(args);
+    descriptorPath = args.get("descriptorPath");
+    tokenType = args.get("tokenType");
+    featurePath = args.get("featurePath");
+  }
+
+  @Override
+  public Tokenizer create(Reader input) {
+    return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, input);
+  }
+}
--- a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java
+++ b/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java
@ -23,8 +23,8 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
-import org.apache.solr.uima.processor.ae.AEProvider;
-import org.apache.solr.uima.processor.ae.AEProviderFactory;
+import org.apache.lucene.analysis.uima.ae.AEProvider;
+import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
 import org.apache.solr.update.AddUpdateCommand;
 import org.apache.solr.update.processor.UpdateRequestProcessor;
 import org.apache.uima.analysis_engine.AnalysisEngine;
--- a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java
+++ b/solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java
@ -1,53 +0,0 @@
-package org.apache.solr.uima.processor.ae;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Singleton factory class responsible of {@link AEProvider}s' creation
- * 
- *
- */
-public class AEProviderFactory {
-
-  private static AEProviderFactory instance;
-
-  private Map<String, AEProvider> providerCache = new HashMap<String, AEProvider>();
-
-  private AEProviderFactory() {
-    // Singleton
-  }
-
-  public static AEProviderFactory getInstance() {
-    if (instance == null) {
-      instance = new AEProviderFactory();
-    }
-    return instance;
-  }
-
-  public synchronized AEProvider getAEProvider(String core, String aePath,
-          Map<String, Object> runtimeParameters) {
-    String key = new StringBuilder(core).append(aePath).toString();
-    if (providerCache.get(key) == null) {
-      providerCache.put(key, new OverridingParamsAEProvider(aePath, runtimeParameters));
-    }
-    return providerCache.get(key);
-  }
-}
--- a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java
+++ b/solr/contrib/uima/src/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java
@ -1,117 +0,0 @@
-package org.apache.solr.uima.processor.ae;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.net.URL;
-import java.util.Map;
-
-import org.apache.uima.UIMAFramework;
-import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.util.XMLInputSource;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * {@link AEProvider} implementation that creates an Aggregate AE from the given path, also
- * injecting runtime parameters defined in the solrconfig.xml Solr configuration file and assigning
- * them as overriding parameters in the aggregate AE
- * 
- *
- */
-public class OverridingParamsAEProvider implements AEProvider {
-
-  private static Logger log = LoggerFactory.getLogger(OverridingParamsAEProvider.class);
-
-  private String aeFilePath;
-
-  private AnalysisEngine cachedAE;
-
-  private Map<String, Object> runtimeParameters;
-
-  public OverridingParamsAEProvider(String aeFilePath, Map<String, Object> runtimeParameters) {
-    this.aeFilePath = aeFilePath;
-    this.runtimeParameters = runtimeParameters;
-  }
-
-  public synchronized AnalysisEngine getAE() throws ResourceInitializationException {
-    try {
-      if (cachedAE == null) {
-        // get Resource Specifier from XML file
-        URL url = this.getClass().getResource(aeFilePath);
-        XMLInputSource in = new XMLInputSource(url);
-
-        // get AE description
-        AnalysisEngineDescription desc = UIMAFramework.getXMLParser()
-                .parseAnalysisEngineDescription(in);
-
-        /* iterate over each AE (to set runtime parameters) */
-        for (String attributeName : runtimeParameters.keySet()) {
-          Object val = getRuntimeValue(desc, attributeName);
-          desc.getAnalysisEngineMetaData().getConfigurationParameterSettings().setParameterValue(
-                  attributeName, val);
-          if (log.isDebugEnabled())
-            log.debug(new StringBuilder("setting ").append(attributeName).append(" : ").append(
-                  runtimeParameters.get(attributeName)).toString());
-        }
-        // create AE here
-        cachedAE = UIMAFramework.produceAnalysisEngine(desc);
-        if (log.isDebugEnabled())
-          log.debug(new StringBuilder("AE ").append(cachedAE.getAnalysisEngineMetaData().getName())
-                  .append(" created from descriptor ").append(aeFilePath).toString());
-      } else {
-        cachedAE.reconfigure();
-        if (log.isDebugEnabled())
-          log.debug(new StringBuilder("AE ").append(cachedAE.getAnalysisEngineMetaData().getName())
-                  .append(" at path ").append(aeFilePath).append(" reconfigured ").toString());
-      }
-    } catch (Exception e) {
-      cachedAE = null;
-      throw new ResourceInitializationException(e);
-    }
-    return cachedAE;
-  }
-
-  /* create the value to inject in the runtime parameter depending on its declared type */
-  private Object getRuntimeValue(AnalysisEngineDescription desc, String attributeName)
-          throws ClassNotFoundException {
-    String type = desc.getAnalysisEngineMetaData().getConfigurationParameterDeclarations().
-                    getConfigurationParameter(null, attributeName).getType();
-    // TODO : do it via reflection ? i.e. Class paramType = Class.forName(type)...
-    Object val = null;
-    Object runtimeValue = runtimeParameters.get(attributeName);
-    if (runtimeValue!=null) {
-      if ("String".equals(type)) {
-        val = String.valueOf(runtimeValue);
-      }
-      else if ("Integer".equals(type)) {
-        val = Integer.valueOf(runtimeValue.toString());
-      }
-      else if ("Boolean".equals(type)) {
-        val = Boolean.valueOf(runtimeValue.toString());
-      }
-      else if ("Float".equals(type)) {
-        val = Float.valueOf(runtimeValue.toString());
-      }
-    }
-
-    return val;
-  }
-
-}
--- a/solr/contrib/uima/src/test-files/uima/stoptypes.txt
+++ b/solr/contrib/uima/src/test-files/uima/stoptypes.txt
@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+vbg
+vbz
+vbd
+vbn
+vb
+bez
+cc
+cd
+at
+.
+:
--- a/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml
+++ b/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml
@ -0,0 +1,680 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+  <!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements. See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version
+    2.0 (the "License"); you may not use this file except in compliance
+    with the License. You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0 Unless required by
+    applicable law or agreed to in writing, software distributed under
+    the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
+    OR CONDITIONS OF ANY KIND, either express or implied. See the
+    License for the specific language governing permissions and
+    limitations under the License.
+  -->
+
+  <!--
+    This is the Solr schema file. This file should be named "schema.xml"
+    and should be in the conf directory under the solr home (i.e.
+    ./solr/conf/schema.xml by default) or located where the classloader
+    for the Solr webapp can find it. This example schema is the
+    recommended starting point for users. It should be kept correct and
+    concise, usable out-of-the-box. For more information, on how to
+    customize this file, please see
+    http://wiki.apache.org/solr/SchemaXml PERFORMANCE NOTE: this schema
+    includes many optional features and should not be used for
+    benchmarking. To improve performance one could - set stored="false"
+    for all fields possible (esp large fields) when you only need to
+    search on the field but don't need to return the original value. -
+    set indexed="false" if you don't need to search on the field, but
+    only return the field as a result of searching on other indexed
+    fields. - remove all unneeded copyField statements - for best index
+    size and searching performance, set "index" to false for all general
+    text fields, use copyField to copy them to the catchall "text"
+    field, and use that for searching. - For maximum indexing
+    performance, use the StreamingUpdateSolrServer java client. -
+    Remember to run the JVM in server mode, and use a higher logging
+    level that avoids logging every request
+  -->
+
+<schema name="sample" version="1.2">
+  <!--
+    attribute "name" is the name of this schema and is only used for
+    display purposes. Applications should change this to reflect the
+    nature of the search collection. version="1.2" is Solr's version
+    number for the schema syntax and semantics. It should not normally
+    be changed by applications. 1.0: multiValued attribute did not
+    exist, all fields are multiValued by nature 1.1: multiValued
+    attribute introduced, false by default 1.2: omitTermFreqAndPositions
+    attribute introduced, true by default except for text fields.
+  -->
+
+  <types>
+    <!--
+      field type definitions. The "name" attribute is just a label to be
+      used by field definitions. The "class" attribute and any other
+      attributes determine the real behavior of the fieldType. Class
+      names starting with "solr" refer to java classes in the
+      org.apache.solr.analysis package.
+    -->
+
+    <!--
+      The StrField type is not analyzed, but indexed/stored verbatim. -
+      StrField and TextField support an optional compressThreshold which
+      limits compression (if enabled in the derived fields) to values
+      which exceed a certain size (in characters).
+    -->
+    <fieldType name="string" class="solr.StrField"
+      sortMissingLast="true" omitNorms="true" />
+
+    <!-- boolean type: "true" or "false" -->
+    <fieldType name="boolean" class="solr.BoolField"
+      sortMissingLast="true" omitNorms="true" />
+    <!--
+      Binary data type. The data should be sent/retrieved in as Base64
+      encoded Strings
+    -->
+    <fieldtype name="binary" class="solr.BinaryField" />
+
+    <!--
+      The optional sortMissingLast and sortMissingFirst attributes are
+      currently supported on types that are sorted internally as
+      strings. This includes
+      "string","boolean","sint","slong","sfloat","sdouble","pdate" - If
+      sortMissingLast="true", then a sort on this field will cause
+      documents without the field to come after documents with the
+      field, regardless of the requested sort order (asc or desc). - If
+      sortMissingFirst="true", then a sort on this field will cause
+      documents without the field to come before documents with the
+      field, regardless of the requested sort order. - If
+      sortMissingLast="false" and sortMissingFirst="false" (the
+      default), then default lucene sorting will be used which places
+      docs without the field first in an ascending sort and last in a
+      descending sort.
+    -->
+
+    <!--
+      Default numeric field types. For faster range queries, consider
+      the tint/tfloat/tlong/tdouble types.
+    -->
+    <fieldType name="int" class="solr.TrieIntField"
+      precisionStep="0" omitNorms="true" positionIncrementGap="0" />
+    <fieldType name="float" class="solr.TrieFloatField"
+      precisionStep="0" omitNorms="true" positionIncrementGap="0" />
+    <fieldType name="long" class="solr.TrieLongField"
+      precisionStep="0" omitNorms="true" positionIncrementGap="0" />
+    <fieldType name="double" class="solr.TrieDoubleField"
+      precisionStep="0" omitNorms="true" positionIncrementGap="0" />
+
+    <!--
+      Numeric field types that index each value at various levels of
+      precision to accelerate range queries when the number of values
+      between the range endpoints is large. See the javadoc for
+      NumericRangeQuery for internal implementation details. Smaller
+      precisionStep values (specified in bits) will lead to more tokens
+      indexed per value, slightly larger index size, and faster range
+      queries. A precisionStep of 0 disables indexing at different
+      precision levels.
+    -->
+    <fieldType name="tint" class="solr.TrieIntField"
+      precisionStep="8" omitNorms="true" positionIncrementGap="0" />
+    <fieldType name="tfloat" class="solr.TrieFloatField"
+      precisionStep="8" omitNorms="true" positionIncrementGap="0" />
+    <fieldType name="tlong" class="solr.TrieLongField"
+      precisionStep="8" omitNorms="true" positionIncrementGap="0" />
+    <fieldType name="tdouble" class="solr.TrieDoubleField"
+      precisionStep="8" omitNorms="true" positionIncrementGap="0" />
+
+    <!--
+      The format for this date field is of the form
+      1995-12-31T23:59:59Z, and is a more restricted form of the
+      canonical representation of dateTime
+      http://www.w3.org/TR/xmlschema-2/#dateTime The trailing "Z"
+      designates UTC time and is mandatory. Optional fractional seconds
+      are allowed: 1995-12-31T23:59:59.999Z All other components are
+      mandatory. Expressions can also be used to denote calculations
+      that should be performed relative to "NOW" to determine the value,
+      ie... NOW/HOUR ... Round to the start of the current hour NOW-1DAY
+      ... Exactly 1 day prior to now NOW/DAY+6MONTHS+3DAYS ... 6 months
+      and 3 days in the future from the start of the current day Consult
+      the DateField javadocs for more information. Note: For faster
+      range queries, consider the tdate type
+    -->
+    <fieldType name="date" class="solr.TrieDateField"
+      omitNorms="true" precisionStep="0" positionIncrementGap="0" />
+
+    <!--
+      A Trie based date field for faster date range queries and date
+      faceting.
+    -->
+    <fieldType name="tdate" class="solr.TrieDateField"
+      omitNorms="true" precisionStep="6" positionIncrementGap="0" />
+
+
+    <!--
+      Note: These should only be used for compatibility with existing
+      indexes (created with older Solr versions) or if
+      "sortMissingFirst" or "sortMissingLast" functionality is needed.
+      Use Trie based fields instead. Plain numeric field types that
+      store and index the text value verbatim (and hence don't support
+      range queries, since the lexicographic ordering isn't equal to the
+      numeric ordering)
+    -->
+    <fieldType name="pint" class="solr.IntField" omitNorms="true" />
+    <fieldType name="plong" class="solr.LongField" omitNorms="true" />
+    <fieldType name="pfloat" class="solr.FloatField"
+      omitNorms="true" />
+    <fieldType name="pdouble" class="solr.DoubleField"
+      omitNorms="true" />
+    <fieldType name="pdate" class="solr.DateField"
+      sortMissingLast="true" omitNorms="true" />
+
+
+    <!--
+      Note: These should only be used for compatibility with existing
+      indexes (created with older Solr versions) or if
+      "sortMissingFirst" or "sortMissingLast" functionality is needed.
+      Use Trie based fields instead. Numeric field types that manipulate
+      the value into a string value that isn't human-readable in its
+      internal form, but with a lexicographic ordering the same as the
+      numeric ordering, so that range queries work correctly.
+    -->
+    <fieldType name="sint" class="solr.SortableIntField"
+      sortMissingLast="true" omitNorms="true" />
+    <fieldType name="slong" class="solr.SortableLongField"
+      sortMissingLast="true" omitNorms="true" />
+    <fieldType name="sfloat" class="solr.SortableFloatField"
+      sortMissingLast="true" omitNorms="true" />
+    <fieldType name="sdouble" class="solr.SortableDoubleField"
+      sortMissingLast="true" omitNorms="true" />
+
+
+    <!--
+      The "RandomSortField" is not used to store or search any data. You
+      can declare fields of this type it in your schema to generate
+      pseudo-random orderings of your docs for sorting purposes. The
+      ordering is generated based on the field name and the version of
+      the index, As long as the index version remains unchanged, and the
+      same field name is reused, the ordering of the docs will be
+      consistent. If you want different psuedo-random orderings of
+      documents, for the same version of the index, use a dynamicField
+      and change the name
+    -->
+    <fieldType name="random" class="solr.RandomSortField"
+      indexed="true" />
+
+    <!--
+      solr.TextField allows the specification of custom text analyzers
+      specified as a tokenizer and a list of token filters. Different
+      analyzers may be specified for indexing and querying. The optional
+      positionIncrementGap puts space between multiple fields of this
+      type on the same document, with the purpose of preventing false
+      phrase matching across fields. For more info on customizing your
+      analyzer chain, please see
+      http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
+    -->
+
+    <!--
+      One can also specify an existing Analyzer class that has a default
+      constructor via the class attribute on the analyzer element
+      <fieldType name="text_greek" class="solr.TextField"> <analyzer
+      class="org.apache.lucene.analysis.el.GreekAnalyzer"/> </fieldType>
+    -->
+
+    <!--
+      A text field that only splits on whitespace for exact matching of
+      words
+    -->
+    <fieldType name="text_ws" class="solr.TextField"
+      positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.MockTokenizerFactory" />
+      </analyzer>
+    </fieldType>
+
+    <!--
+      A text field that uses WordDelimiterFilter to enable splitting and
+      matching of words on case-change, alpha numeric boundaries, and
+      non-alphanumeric chars, so that a query of "wifi" or "wi fi" could
+      match a document containing "Wi-Fi". Synonyms and stopwords are
+      customized by external files, and stemming is enabled.
+    -->
+    <fieldType name="text" class="solr.TextField"
+      positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.MockTokenizerFactory" />
+        <!--
+          in this example, we will only use synonyms at query time
+          <filter class="solr.SynonymFilterFactory"
+          synonyms="index_synonyms.txt" ignoreCase="true"
+          expand="false"/>
+        -->
+        <!--
+          Case insensitive stop word removal. add
+          enablePositionIncrements=true in both the index and query
+          analyzers to leave a 'gap' for more accurate phrase queries.
+        -->
+        <filter class="solr.WordDelimiterFilterFactory"
+          generateWordParts="1" generateNumberParts="1" catenateWords="1"
+          catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" />
+        <filter class="solr.LowerCaseFilterFactory" />
+        
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.MockTokenizerFactory" />
+
+        <filter class="solr.WordDelimiterFilterFactory"
+          generateWordParts="1" generateNumberParts="1" catenateWords="0"
+          catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" />
+        <filter class="solr.LowerCaseFilterFactory" />
+        
+      </analyzer>
+    </fieldType>
+
+
+    <!--
+      Less flexible matching, but less false matches. Probably not ideal
+      for product names, but may be good for SKUs. Can insert dashes in
+      the wrong place and still match.
+    -->
+    <fieldType name="textTight" class="solr.TextField"
+      positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.MockTokenizerFactory" />
+        <filter class="solr.WordDelimiterFilterFactory"
+          generateWordParts="0" generateNumberParts="0" catenateWords="1"
+          catenateNumbers="1" catenateAll="0" />
+        <filter class="solr.LowerCaseFilterFactory" />
+        
+        <!--
+          this filter can remove any duplicate tokens that appear at the
+          same position - sometimes possible with WordDelimiterFilter in
+          conjuncton with stemming.
+        -->
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="uima_sentences" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="org.apache.solr.uima.analysis.UIMAAnnotationsTokenizerFactory"
+                   descriptorPath="/uima/AggregateSentenceAE.xml" tokenType="org.apache.uima.SentenceAnnotation"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="uima_nouns" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="org.apache.solr.uima.analysis.UIMATypeAwareAnnotationsTokenizerFactory"
+                   descriptorPath="/uima/AggregateSentenceAE.xml" tokenType="org.apache.uima.TokenAnnotation"
+                   featurePath="posTag"/>
+        <filter class="solr.TypeTokenFilterFactory" types="uima/stoptypes.txt" />
+      </analyzer>
+    </fieldType>
+
+
+    <!--
+      A general unstemmed text field - good if one does not know the
+      language of the field
+    -->
+    <fieldType name="textgen" class="solr.TextField"
+      positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.MockTokenizerFactory" />
+        <filter class="solr.WordDelimiterFilterFactory"
+          generateWordParts="1" generateNumberParts="1" catenateWords="1"
+          catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" />
+        <filter class="solr.LowerCaseFilterFactory" />
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.MockTokenizerFactory" />
+        <filter class="solr.WordDelimiterFilterFactory"
+          generateWordParts="1" generateNumberParts="1" catenateWords="0"
+          catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
+        <filter class="solr.LowerCaseFilterFactory" />
+      </analyzer>
+    </fieldType>
+
+
+    <!--
+      A general unstemmed text field that indexes tokens normally and
+      also reversed (via ReversedWildcardFilterFactory), to enable more
+      efficient leading wildcard queries.
+    -->
+    <fieldType name="text_rev" class="solr.TextField"
+      positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.MockTokenizerFactory" />
+        <filter class="solr.WordDelimiterFilterFactory"
+          generateWordParts="1" generateNumberParts="1" catenateWords="1"
+          catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" />
+        <filter class="solr.LowerCaseFilterFactory" />
+        <filter class="solr.ReversedWildcardFilterFactory"
+          withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2"
+          maxFractionAsterisk="0.33" />
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.MockTokenizerFactory" />
+        <filter class="solr.WordDelimiterFilterFactory"
+          generateWordParts="1" generateNumberParts="1" catenateWords="0"
+          catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
+        <filter class="solr.LowerCaseFilterFactory" />
+      </analyzer>
+    </fieldType>
+
+    <!-- charFilter + WhitespaceTokenizer  -->
+    <!--
+      <fieldType name="textCharNorm" class="solr.TextField"
+      positionIncrementGap="100" > <analyzer> <charFilter
+      class="solr.MappingCharFilterFactory"
+      mapping="mapping-ISOLatin1Accent.txt"/> <tokenizer
+      class="solr.MockTokenizerFactory"/> </analyzer> </fieldType>
+    -->
+
+    <!--
+      This is an example of using the KeywordTokenizer along With
+      various TokenFilterFactories to produce a sortable field that does
+      not include some properties of the source text
+    -->
+    <fieldType name="alphaOnlySort" class="solr.TextField"
+      sortMissingLast="true" omitNorms="true">
+      <analyzer>
+        <!--
+          KeywordTokenizer does no actual tokenizing, so the entire
+          input string is preserved as a single token
+        -->
+        <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
+        <!--
+          The LowerCase TokenFilter does what you expect, which can be
+          when you want your sorting to be case insensitive
+        -->
+        <filter class="solr.LowerCaseFilterFactory" />
+        <!-- The TrimFilter removes any leading or trailing whitespace -->
+        <filter class="solr.TrimFilterFactory" />
+        <!--
+          The PatternReplaceFilter gives you the flexibility to use Java
+          Regular expression to replace any sequence of characters
+          matching a pattern with an arbitrary replacement string, which
+          may include back references to portions of the original string
+          matched by the pattern. See the Java Regular Expression
+          documentation for more information on pattern and replacement
+          string syntax.
+
+          http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html
+        -->
+        <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])"
+          replacement="" replace="all" />
+      </analyzer>
+    </fieldType>
+
+    <fieldtype name="phonetic" stored="false" indexed="true"
+      class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory" />
+        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false" />
+      </analyzer>
+    </fieldtype>
+
+    <fieldtype name="payloads" stored="false" indexed="true"
+      class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.MockTokenizerFactory" />
+        <!--
+          The DelimitedPayloadTokenFilter can put payloads on tokens...
+          for example, a token of "foo|1.4" would be indexed as "foo"
+          with a payload of 1.4f Attributes of the
+          DelimitedPayloadTokenFilterFactory : "delimiter" - a one
+          character delimiter. Default is | (pipe) "encoder" - how to
+          encode the following value into a playload float ->
+          org.apache.lucene.analysis.payloads.FloatEncoder, integer ->
+          o.a.l.a.p.IntegerEncoder identity -> o.a.l.a.p.IdentityEncoder
+          Fully Qualified class name implementing PayloadEncoder,
+          Encoder must have a no arg constructor.
+        -->
+        <filter class="solr.DelimitedPayloadTokenFilterFactory"
+          encoder="float" />
+      </analyzer>
+    </fieldtype>
+
+    <!--
+      lowercases the entire field value, keeping it as a single token.
+    -->
+    <fieldType name="lowercase" class="solr.TextField"
+      positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
+        <filter class="solr.LowerCaseFilterFactory" />
+      </analyzer>
+    </fieldType>
+
+
+    <!--
+      since fields of this type are by default not stored or indexed,
+      any data added to them will be ignored outright.
+    -->
+    <fieldtype name="ignored" stored="false" indexed="false"
+      multiValued="true" class="solr.StrField" />
+
+  </types>
+
+
+  <fields>
+    <!--
+      Valid attributes for fields: name: mandatory - the name for the
+      field type: mandatory - the name of a previously defined type from
+      the <types> section indexed: true if this field should be indexed
+      (searchable or sortable) stored: true if this field should be
+      retrievable compressed: [false] if this field should be stored
+      using gzip compression (this will only apply if the field type is
+      compressable; among the standard field types, only TextField and
+      StrField are) multiValued: true if this field may contain multiple
+      values per document omitNorms: (expert) set to true to omit the
+      norms associated with this field (this disables length
+      normalization and index-time boosting for the field, and saves
+      some memory). Only full-text fields or fields that need an
+      index-time boost need norms. termVectors: [false] set to true to
+      store the term vector for a given field. When using MoreLikeThis,
+      fields used for similarity should be stored for best performance.
+      termPositions: Store position information with the term vector.
+      This will increase storage costs. termOffsets: Store offset
+      information with the term vector. This will increase storage
+      costs. default: a value that should be used if no value is
+      specified when adding a document.
+    -->
+    <field name="id" type="string" indexed="true" stored="true"
+      required="true" />
+    <field name="sku" type="textTight" indexed="true" stored="true"
+      omitNorms="true" />
+    <field name="name" type="textgen" indexed="true" stored="true" />
+    <field name="alphaNameSort" type="alphaOnlySort" indexed="true"
+      stored="false" />
+    <field name="manu" type="textgen" indexed="true" stored="true"
+      omitNorms="true" />
+    <field name="cat" type="text_ws" indexed="true" stored="true"
+      multiValued="true" omitNorms="true" />
+    <field name="features" type="text" indexed="true" stored="true"
+      multiValued="true" />
+    <field name="includes" type="text" indexed="true" stored="true"
+      termVectors="true" termPositions="true" termOffsets="true" />
+
+    <field name="sentences" type="uima_sentences" indexed="true" stored="true" multiValued="true"
+          termVectors="true" termPositions="true" termOffsets="true" />
+    <field name="nouns" type="uima_nouns" indexed="true" stored="true" multiValued="true"
+      termVectors="true" termPositions="true" termOffsets="true" />
+
+    <field name="weight" type="float" indexed="true" stored="true" />
+    <field name="price" type="float" indexed="true" stored="true" />
+    <field name="popularity" type="int" indexed="true" stored="true" />
+    <field name="inStock" type="boolean" indexed="true" stored="true" />
+
+
+    <!--
+      Common metadata fields, named specifically to match up with
+      SolrCell metadata when parsing rich documents such as Word, PDF.
+      Some fields are multiValued only because Tika currently may return
+      multiple values for them.
+    -->
+    <field name="title" type="text" indexed="true" stored="true"
+      multiValued="true" />
+    <field name="subject" type="text" indexed="true" stored="true" />
+    <field name="description" type="text" indexed="true" stored="true" />
+    <field name="comments" type="text" indexed="true" stored="true" />
+    <field name="author" type="textgen" indexed="true" stored="true" />
+    <field name="keywords" type="textgen" indexed="true" stored="true" />
+    <field name="category" type="textgen" indexed="true" stored="true" />
+    <field name="content_type" type="string" indexed="true"
+      stored="true" multiValued="true" />
+    <field name="last_modified" type="date" indexed="true" stored="true" />
+    <field name="links" type="string" indexed="true" stored="true"
+      multiValued="true" />
+
+
+    <!--
+      catchall field, containing all other searchable text fields
+      (implemented via copyField further on in this schema
+    -->
+    <field name="text" type="text" indexed="true" stored="false"
+      multiValued="true" />
+
+    <!--
+      catchall text field that indexes tokens both normally and in
+      reverse for efficient leading wildcard queries.
+    -->
+    <field name="text_rev" type="text_rev" indexed="true" stored="false"
+      multiValued="true" />
+
+    <!--
+      non-tokenized version of manufacturer to make it easier to sort or
+      group results by manufacturer. copied from "manu" via copyField
+    -->
+    <field name="manu_exact" type="string" indexed="true" stored="false" />
+
+    <field name="payloads" type="payloads" indexed="true" stored="true" />
+
+    <!--
+      Uncommenting the following will create a "timestamp" field using a
+      default value of "NOW" to indicate when each document was indexed.
+    -->
+    <!--
+      <field name="timestamp" type="date" indexed="true" stored="true"
+      default="NOW" multiValued="false"/>
+    -->
+
+  <field name="language" type="string" indexed="true" stored="true" required="false"/>
+  <field name="sentence" type="text" indexed="true" stored="true" multiValued="true" required="false" />
+  <field name="sentiment" type="string" indexed="true" stored="true" multiValued="true"/>
+  <field name="entity" type="text" indexed="true" stored="true" multiValued="true"/>
+
+    <!--
+      Dynamic field definitions. If a field name is not found,
+      dynamicFields will be used if the name matches any of the
+      patterns. RESTRICTION: the glob-like pattern in the name attribute
+      must have a "*" only at the start or the end. EXAMPLE: name="*_i"
+      will match any field ending in _i (like myid_i, z_i) Longer
+      patterns will be matched first. if equal size patterns both match,
+      the first appearing in the schema will be used. <dynamicField
+      name="*_i" type="int" indexed="true" stored="true"/> <dynamicField
+      name="*_s" type="string" indexed="true" stored="true"/>
+      <dynamicField name="*_l" type="long" indexed="true"
+      stored="true"/> <dynamicField name="*_t" type="text"
+      indexed="true" stored="true"/> <dynamicField name="*_b"
+      type="boolean" indexed="true" stored="true"/> <dynamicField
+      name="*_f" type="float" indexed="true" stored="true"/>
+      <dynamicField name="*_d" type="double" indexed="true"
+      stored="true"/> <dynamicField name="*_dt" type="date"
+      indexed="true" stored="true"/> <dynamicField name="*_ti"
+      type="tint" indexed="true" stored="true"/> <dynamicField
+      name="*_tl" type="tlong" indexed="true" stored="true"/>
+      <dynamicField name="*_tf" type="tfloat" indexed="true"
+      stored="true"/> <dynamicField name="*_td" type="tdouble"
+      indexed="true" stored="true"/> <dynamicField name="*_tdt"
+      type="tdate" indexed="true" stored="true"/> <dynamicField
+      name="*_pi" type="pint" indexed="true" stored="true"/>
+
+      <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
+      <dynamicField name="attr_*" type="textgen" indexed="true"
+      stored="true" multiValued="true"/> <dynamicField name="random_*"
+      type="random" />
+    -->
+    <dynamicField name="*_sm" type="string" indexed="true" stored="true" multiValued="true"/>
+    <!--
+      uncomment the following to ignore any fields that don't already
+      match an existing field name or dynamic field, rather than
+      reporting them as an error. alternately, change the type="ignored"
+      to some other type e.g. "text" if you want unknown fields indexed
+      and/or stored by default
+    -->
+    <!--dynamicField name="*" type="ignored" multiValued="true" /-->
+
+  </fields>
+
+  <!--
+    Field to use to determine and enforce document uniqueness. Unless
+    this field is marked with required="false", it will be a required
+    field
+  -->
+  <uniqueKey>id</uniqueKey>
+
+  <!--
+    field for the QueryParser to use when an explicit fieldname is
+    absent
+  -->
+  <defaultSearchField>text</defaultSearchField>
+
+  <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
+  <solrQueryParser defaultOperator="OR" />
+
+  <!--
+    copyField commands copy one field to another at the time a document
+    is added to the index. It's used either to index the same field
+    differently, or to add multiple fields to the same field for
+    easier/faster searching.
+  -->
+
+  <copyField source="cat" dest="text" />
+  <copyField source="name" dest="text" />
+  <copyField source="manu" dest="text" />
+  <copyField source="features" dest="text" />
+  <copyField source="includes" dest="text" />
+  <copyField source="text" dest="nouns" />
+  <copyField source="text" dest="sentences" />
+  <copyField source="manu" dest="manu_exact" />
+
+
+  <!--copyField source="Titolo" dest="text"/-->
+
+  <!--
+    Above, multiple source fields are copied to the [text] field.
+    Another way to map multiple source fields to the same destination
+    field is to use the dynamic field syntax. copyField also supports a
+    maxChars to copy setting.
+  -->
+
+  <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
+
+  <!--
+    copy name to alphaNameSort, a field designed for sorting by name
+  -->
+  <!-- <copyField source="name" dest="alphaNameSort"/> -->
+
+
+  <!--
+    Similarity is the scoring routine for each document vs. a query. A
+    custom similarity may be specified here, but the default is fine for
+    most applications.
+  -->
+  <!--
+    <similarity class="org.apache.lucene.search.DefaultSimilarity"/>
+  -->
+  <!--
+    ... OR ... Specify a SimilarityFactory class name implementation
+    allowing parameters to be used.
+  -->
+  <!--
+    <similarity class="com.example.solr.CustomSimilarityFactory"> <str
+    name="paramkey">param value</str> </similarity>
+  -->
+
+
+</schema>
--- a/solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml
+++ b/solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml
--- a/solr/contrib/uima/src/test/org/apache/solr/uima/analysis/UIMAAnnotationsTokenizerFactoryTest.java
+++ b/solr/contrib/uima/src/test/org/apache/solr/uima/analysis/UIMAAnnotationsTokenizerFactoryTest.java
@ -0,0 +1,49 @@
+package org.apache.solr.uima.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.request.SolrQueryRequest;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ */
+public class UIMAAnnotationsTokenizerFactoryTest extends SolrTestCaseJ4 {
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("uima/uima-tokenizers-solrconfig.xml", "uima/uima-tokenizers-schema.xml");
+  }
+
+  @Test
+  public void testInitialization() throws Exception {
+    assertNotNull(h.getCore().getSchema().getField("sentences"));
+    assertNotNull(h.getCore().getSchema().getFieldType("sentences"));
+  }
+
+  @Test
+  public void testIndexAndQuery() throws Exception {
+    assertU("<add><doc><field name=\"id\">123</field><field name=\"text\">One and 1 is two. Instead One or 1 is 0.</field></doc></add>");
+    assertU(commit());
+    SolrQueryRequest req = req("qt", "/terms", "terms.fl", "sentences");
+    assertQ(req, "//lst[@name='sentences']/int[@name='One and 1 is two.']");
+    assertQ(req, "//lst[@name='sentences']/int[@name=' Instead One or 1 is 0.']");
+    req.close();
+  }
+}
--- a/solr/contrib/uima/src/test/org/apache/solr/uima/analysis/UIMATypeAwareAnnotationsTokenizerFactoryTest.java
+++ b/solr/contrib/uima/src/test/org/apache/solr/uima/analysis/UIMATypeAwareAnnotationsTokenizerFactoryTest.java
@ -0,0 +1,58 @@
+package org.apache.solr.uima.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.request.SolrQueryRequest;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ */
+public class UIMATypeAwareAnnotationsTokenizerFactoryTest extends SolrTestCaseJ4 {
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("uima/uima-tokenizers-solrconfig.xml", "uima/uima-tokenizers-schema.xml");
+  }
+
+  @Test
+  public void testInitialization() throws Exception {
+    assertNotNull(h.getCore().getSchema().getField("nouns"));
+    assertNotNull(h.getCore().getSchema().getFieldType("nouns"));
+  }
+
+  @Test
+  public void testIndexAndQuery() throws Exception {
+    assertU("<add><doc><field name=\"id\">123</field><field name=\"text\">The counter counts the beans: 1 and 2 and three.</field></doc></add>");
+    assertU(commit());
+    SolrQueryRequest req = req("qt", "/terms", "terms.fl", "nouns");
+    assertQ(req, "//lst[@name='nouns']/int[@name='beans']");
+    assertQ(req, "//lst[@name='nouns']/int[@name='counter']");
+    assertQ(req, "//lst[@name='nouns']/int[@name!='The']");
+    assertQ(req, "//lst[@name='nouns']/int[@name!='counts']");
+    assertQ(req, "//lst[@name='nouns']/int[@name!='the']");
+    assertQ(req, "//lst[@name='nouns']/int[@name!=':']");
+    assertQ(req, "//lst[@name='nouns']/int[@name!='1']");
+    assertQ(req, "//lst[@name='nouns']/int[@name!='and']");
+    assertQ(req, "//lst[@name='nouns']/int[@name!='2']");
+    assertQ(req, "//lst[@name='nouns']/int[@name!='three']");
+    assertQ(req, "//lst[@name='nouns']/int[@name!='.']");
+    req.close();
+  }
+}