SOLR-2438, allow an analysis chain to be created for multiterm query terms or synthesize one if not defined explicitly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206229 13f79535-47bb-0310-9956-ffa450edef68
2011-11-25 15:46:26 +00:00 · 2011-11-25 15:46:26 +00:00 · 098371446a
parent 6870592252
commit 098371446a
12 changed files with 655 additions and 26 deletions
--- a/modules/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
+++ b/modules/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
@ -290,7 +290,6 @@ public abstract class QueryParserBase {
    this.lowercaseExpandedTerms = lowercaseExpandedTerms;
  }

-
  /**
   * @see #setLowercaseExpandedTerms(boolean)
   */
@ -778,14 +777,21 @@ public abstract class QueryParserBase {
    return new FuzzyQuery(term,minimumSimilarity,prefixLength);
  }

-  private BytesRef analyzeRangePart(String field, String part) {
+  // TODO: Should this be protected instead?
+  private BytesRef analyzeMultitermTerm(String field, String part) {
+    return analyzeMultitermTerm(field, part, analyzer);
+  }
+
+  protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
    TokenStream source;
-      
+
+    if (analyzerIn == null) analyzerIn = analyzer;
+
    try {
-      source = analyzer.tokenStream(field, new StringReader(part));
+      source = analyzerIn.tokenStream(field, new StringReader(part));
      source.reset();
    } catch (IOException e) {
-      throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e);
+      throw new RuntimeException("Unable to initialize TokenStream to analyze multiTerm term: " + part, e);
    }
      
    TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
@ -793,10 +799,10 @@ public abstract class QueryParserBase {

    try {
      if (!source.incrementToken())
-        throw new IllegalArgumentException("analyzer returned no terms for range part: " + part);
+        throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
      termAtt.fillBytesRef();
      if (source.incrementToken())
-        throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part);
+        throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
    } catch (IOException e) {
      throw new RuntimeException("error analyzing range part: " + part, e);
    }
@ -805,7 +811,7 @@ public abstract class QueryParserBase {
      source.end();
      source.close();
    } catch (IOException e) {
-      throw new RuntimeException("Unable to end & close TokenStream after analyzing range part: " + part, e);
+      throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part, e);
    }
    
    return BytesRef.deepCopyOf(bytes);
@ -827,13 +833,13 @@ public abstract class QueryParserBase {
    if (part1 == null) {
      start = null;
    } else {
-      start = analyzeRangeTerms ? analyzeRangePart(field, part1) : new BytesRef(part1);
+      start = analyzeRangeTerms ? analyzeMultitermTerm(field, part1) : new BytesRef(part1);
    }
     
    if (part2 == null) {
      end = null;
    } else {
-      end = analyzeRangeTerms ? analyzeRangePart(field, part2) : new BytesRef(part2);
+      end = analyzeRangeTerms ? analyzeMultitermTerm(field, part2) : new BytesRef(part2);
    }
      
    final TermRangeQuery query = new TermRangeQuery(field, start, end, startInclusive, endInclusive);
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -188,6 +188,11 @@ New Features
 
 * SOLR-2134 Trie* fields should support sortMissingLast=true, and deprecate Sortable* Field Types
  (Ryan McKinley, Mike McCandless, Uwe Schindler, Erick Erickson)
+    
+* SOLR-2438: Case insensitive search for wildcard queries. Actually, the ability to specify
+  a complete analysis chain for multiterm queries.   
+  (Pete Sturge Erick Erickson, Mentoring from Seeley and Muir)
+

 Optimizations
 ----------------------
@ -383,6 +388,11 @@ New Features
 * SOLR-1565: StreamingUpdateSolrServer supports RequestWriter API and therefore, javabin update
  format (shalin)

+* SOLR-2438: Case insensitive search for wildcard queries. Actually, the ability to specify
+  a complete analysis chain for multiterm queries.   
+  (Pete Sturge Erick Erickson, Mentoring from Seeley and Muir)
+
+
 Bug Fixes
 ----------------------
 * SOLR-2912: Fixed File descriptor leak in ShowFileRequestHandler (Michael Ryan, shalin)
--- a/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
@ -48,13 +48,15 @@ public abstract class FieldProperties {
  
  protected final static int REQUIRED            = 0x00001000;
  protected final static int OMIT_POSITIONS      = 0x00002000;
+  protected final static int LEGACY_MULTITERM    = 0x00004000;
  
  static final String[] propertyNames = {
          "indexed", "tokenized", "stored",
          "binary", "omitNorms", "omitTermFreqAndPositions",
          "termVectors", "termPositions", "termOffsets",
          "multiValued",
-          "sortMissingFirst","sortMissingLast","required", "omitPositions"
+          "sortMissingFirst","sortMissingLast","required", "omitPositions" ,
+          "legacyMultiTerm"
  };

  static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();
--- a/solr/core/src/java/org/apache/solr/schema/FieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java
@ -428,6 +428,21 @@ public abstract class FieldType extends FieldProperties {
   */
  protected Analyzer queryAnalyzer=analyzer;

+  /**
+   * Analyzer set by schema for text types to use when searching fields
+   * of this type, subclasses can set analyzer themselves or override
+   * getAnalyzer()
+   * This analyzer is used to process wildcard, prefix, regex and other multiterm queries. It
+   * assembles a list of tokenizer +filters that "make sense" for this, primarily accent folding and
+   * lowercasing filters, and charfilters.
+   *
+   * If users require old-style behavior, they can specify 'legacyMultiterm="true" ' in the schema file
+   * @see #getMultiTermAnalyzer
+   * @see #setMultiTermAnalyzer
+   */
+  protected Analyzer multiTermAnalyzer=null;
+
+
  /**
   * Returns the Analyzer to be used when indexing fields of this type.
   * <p>
@ -450,6 +465,17 @@ public abstract class FieldType extends FieldProperties {
    return queryAnalyzer;
  }

+  /**
+   * Returns the Analyzer to be used when searching fields of this type when mult-term queries are specified.
+   * <p>
+   * This method may be called many times, at any time.
+   * </p>
+   * @see #getAnalyzer
+   */
+  public Analyzer getMultiTermAnalyzer() {
+    return multiTermAnalyzer;
+  }
+
  private final String analyzerError = 
    "FieldType: " + this.getClass().getSimpleName() + 
    " (" + typeName + ") does not support specifying an analyzer";
@ -498,6 +524,28 @@ public abstract class FieldType extends FieldProperties {
    throw e;
  }

+  /**
+   * Sets the Analyzer to be used when querying fields of this type.
+   *
+   * <p>
+   *
+   * Subclasses that override this method need to ensure the behavior
+   * of the analyzer is consistent with the implementation of toInternal.
+   * </p>
+   *
+   * @see #toInternal
+   * @see #setAnalyzer
+   * @see #getQueryAnalyzer
+   */
+  public void setMultiTermAnalyzer(Analyzer analyzer) {
+    SolrException e = new SolrException
+      (ErrorCode.SERVER_ERROR,
+       "FieldType: " + this.getClass().getSimpleName() +
+       " (" + typeName + ") does not support specifying an analyzer");
+    SolrException.logOnce(log,null,e);
+    throw e;
+  }
+
  /** @lucene.internal */
  protected Similarity similarity;
  
--- a/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java
@ -18,19 +18,15 @@
 package org.apache.solr.schema;

 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.Version;
+import org.apache.solr.analysis.*;
 import org.apache.solr.common.ResourceLoader;
 import org.apache.solr.common.SolrException;
-import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.DOMUtil;
-import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.Config;
 import org.apache.solr.core.SolrResourceLoader;
-import org.apache.solr.analysis.CharFilterFactory;
-import org.apache.solr.analysis.TokenFilterFactory;
-import org.apache.solr.analysis.TokenizerChain;
-import org.apache.solr.analysis.TokenizerFactory;
 import org.apache.solr.util.plugin.AbstractPluginLoader;
 import org.w3c.dom.*;

@ -88,12 +84,16 @@ public final class FieldTypePluginLoader
    String expression = "./analyzer[@type='query']";
    Node anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
    Analyzer queryAnalyzer = readAnalyzer(anode);
-    
+
+    expression = "./analyzer[@type='multiterm']";
+    anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
+    Analyzer multiAnalyzer = readAnalyzer(anode);
+
    // An analyzer without a type specified, or with type="index"
    expression = "./analyzer[not(@type)] | ./analyzer[@type='index']";
    anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
    Analyzer analyzer = readAnalyzer(anode);
-    
+
    // a custom similarity[Factory]
    expression = "./similarity";
    anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
@ -101,9 +101,16 @@ public final class FieldTypePluginLoader
    
    if (queryAnalyzer==null) queryAnalyzer=analyzer;
    if (analyzer==null) analyzer=queryAnalyzer;
+    if (multiAnalyzer == null) {
+      Boolean legacyMatch = ! schema.getDefaultLuceneMatchVersion().onOrAfter(Version.LUCENE_36);
+      legacyMatch = (DOMUtil.getAttr(node, "legacyMultiTerm", null) == null) ? legacyMatch :
+          Boolean.parseBoolean(DOMUtil.getAttr(node, "legacyMultiTerm", null));
+      multiAnalyzer = constructMultiTermAnalyzer(queryAnalyzer, legacyMatch);
+    }
    if (analyzer!=null) {
      ft.setAnalyzer(analyzer);
      ft.setQueryAnalyzer(queryAnalyzer);
+      ft.setMultiTermAnalyzer(multiAnalyzer);
    }
    if (similarity!=null) {
      ft.setSimilarity(similarity);
@ -130,6 +137,42 @@ public final class FieldTypePluginLoader
    return fieldTypes.put( name, plugin );
  }

+  // The point here is that, if no multitermanalyzer was specified in the schema file, do one of several things:
+  // 1> If legacyMultiTerm == false, assemble a new analyzer composed of all of the charfilters,
+  //    lowercase filters and asciifoldingfilter.
+  // 2> If letacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior.
+  //    Do the same if they've specified that the old behavior is required (legacyMultiTerm="true")
+
+  private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer, Boolean legacyMultiTerm) {
+    if (queryAnalyzer == null) return null;
+
+    if (legacyMultiTerm || (!(queryAnalyzer instanceof TokenizerChain))) {
+      return new KeywordAnalyzer();
+    }
+
+    TokenizerChain tc = (TokenizerChain) queryAnalyzer;
+
+    // we know it'll never be longer than this unless the code below is explicitly changed
+    TokenFilterFactory[] filters = new TokenFilterFactory[2];
+    int idx = 0;
+    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
+      if (factory instanceof LowerCaseFilterFactory) {
+        filters[idx] = new LowerCaseFilterFactory();
+        filters[idx++].init(factory.getArgs());
+      }
+      if (factory instanceof ASCIIFoldingFilterFactory) {
+        filters[idx] = new ASCIIFoldingFilterFactory();
+        filters[idx++].init(factory.getArgs());
+      }
+    }
+    WhitespaceTokenizerFactory white = new WhitespaceTokenizerFactory();
+    white.init(tc.getTokenizerFactory().getArgs());
+
+    return new TokenizerChain(tc.getCharFilterFactories(),
+        white,
+        Arrays.copyOfRange(filters, 0, idx));
+  }
+
  //
  // <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
  //
--- a/solr/core/src/java/org/apache/solr/schema/SchemaField.java
+++ b/solr/core/src/java/org/apache/solr/schema/SchemaField.java
@ -97,6 +97,9 @@ public final class SchemaField extends FieldProperties {
  boolean isTokenized() { return (properties & TOKENIZED)!=0; }
  boolean isBinary() { return (properties & BINARY)!=0; }

+  boolean legacyMultiTerm() {
+    return (properties & LEGACY_MULTITERM) != 0;
+  }

  public IndexableField createField(Object val, float boost) {
    return type.createField(this,val,boost);
--- a/solr/core/src/java/org/apache/solr/schema/TextField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TextField.java
@ -98,6 +98,11 @@ public class TextField extends FieldType {
    this.queryAnalyzer = analyzer;
  }

+  @Override
+  public void setMultiTermAnalyzer(Analyzer analyzer) {
+    this.multiTermAnalyzer = analyzer;
+  }
+
  static Query parseFieldQuery(QParser parser, Analyzer analyzer, String field, String queryText) {
    int phraseSlop = 0;
    boolean enablePositionIncrements = true;
--- a/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java
@ -26,7 +26,6 @@ import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.*;
 import org.apache.lucene.util.ToStringUtils;
-import org.apache.lucene.util.Version;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.BasicAutomata;
 import org.apache.lucene.util.automaton.BasicOperations;
@ -71,7 +70,6 @@ public class SolrQueryParser extends QueryParser {
    this.schema = parser.getReq().getSchema();
    this.parser = parser;
    this.defaultField = defaultField;
-    setLowercaseExpandedTerms(false);
    setEnablePositionIncrements(true);
    checkAllowLeadingWildcards();
  }
@ -106,6 +104,14 @@ public class SolrQueryParser extends QueryParser {
    }
  }

+  protected String analyzeIfMultitermTermText(String field, String part, Analyzer analyzer) {
+    if (part == null) return part;
+
+    SchemaField sf = schema.getFieldOrNull((field));
+    if (sf == null || ! (sf.getType() instanceof TextField)) return part;
+    return analyzeMultitermTerm(field, part, analyzer).utf8ToString();
+  }
+
  @Override
  protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
    checkNullField(field);
@ -137,6 +143,8 @@ public class SolrQueryParser extends QueryParser {
  @Override
  protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
    checkNullField(field);
+    part1 = analyzeIfMultitermTermText(field, part1, schema.getFieldType(field).getMultiTermAnalyzer());
+    part2 = analyzeIfMultitermTermText(field, part2, schema.getFieldType(field).getMultiTermAnalyzer());
    SchemaField sf = schema.getField(field);
    return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
  }
@ -144,9 +152,8 @@ public class SolrQueryParser extends QueryParser {
  @Override
  protected Query getPrefixQuery(String field, String termStr) throws ParseException {
    checkNullField(field);
-    if (getLowercaseExpandedTerms()) {
-      termStr = termStr.toLowerCase();
-    }
+
+    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());

    // TODO: toInternal() won't necessarily work on partial
    // values, so it looks like we need a getPrefix() function
@ -162,14 +169,13 @@ public class SolrQueryParser extends QueryParser {
    PrefixQuery prefixQuery = new PrefixQuery(t);
    return prefixQuery;
  }
-
  @Override
  protected Query getWildcardQuery(String field, String termStr) throws ParseException {
    // *:* -> MatchAllDocsQuery
    if ("*".equals(field) && "*".equals(termStr)) {
      return newMatchAllDocsQuery();
    }
-    
+    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
    // can we use reversed wildcards in this field?
    String type = schema.getFieldType(field).getTypeName();
    ReversedWildcardFilterFactory factory = leadingWildcards.get(type);
@ -213,4 +219,11 @@ public class SolrQueryParser extends QueryParser {
    }
    return q;
  }
+
+
+  protected Query getRegexpQuery(String field, String termStr) throws ParseException
+  {
+    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
+    return super.getRegexpQuery(field, termStr);
+  }
 }
--- a/solr/core/src/test-files/solr/conf/schema-folding.xml
+++ b/solr/core/src/test-files/solr/conf/schema-folding.xml
@ -0,0 +1,145 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+
+<schema name="test" version="1.0">
+  <types>
+    <fieldtype name="string" class="solr.StrField" sortMissingLast="true" multiValued="false"/>
+
+    <fieldType name="text" class="solr.TextField" multiValued="false">
+      <analyzer>
+        <tokenizer class="solr.PatternTokenizerFactory" pattern="\s+"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_multi" class="solr.TextField" multiValued="true">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.TrimFilterFactory"/>
+      </analyzer>
+      <analyzer type="multiterm">        <!-- Intentionally different to test that these are kept  distinct -->
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_multi_bad" class="solr.TextField" multiValued="false">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.TrimFilterFactory"/>
+      </analyzer>
+      <analyzer type="multiterm">        <!-- Intentionally different to test that these are kept  distinct -->
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
+                catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+
+    <fieldType name="text_ws" class="solr.TextField" multiValued="true">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_rev" class="solr.TextField" legacyMultiTerm="false">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="false"
+                maxPosAsterisk="1" maxPosQuestion="2" maxFractionAsterisk="0.99"
+                minTrailing="1"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_lower_tokenizer" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.LowerCaseTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_charfilter" class="solr.TextField" multiValued="false">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_oldstyle" class="solr.TextField" multiValued="false" legacyMultiTerm="true">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.TrimFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="byte" class="solr.ByteField" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="short" class="solr.ShortField" omitNorms="true" positionIncrementGap="0"/>
+    <fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
+    <fieldtype name="date" class="solr.TrieDateField" precisionStep="0"/>
+  </types>
+
+  <fields>
+    <field name="id" type="string" indexed="true" stored="true" required="true"/>
+    <field name="int_f" type="int"/>
+    <field name="float_f" type="float"/>
+    <field name="long_f" type="long"/>
+    <field name="double_f" type="double"/>
+    <field name="byte_f" type="byte"/>
+    <field name="short_f" type="short"/>
+    <field name="bool_f" type="boolean"/>
+    <field name="date_f" type="date"/>
+
+    <field name="content" type="text" indexed="true" stored="true"/>
+    <field name="content_ws" type="text_ws" indexed="true" stored="true"/>
+    <field name="content_rev" type="text_rev" indexed="true" stored="true"/>
+    <field name="content_multi" type="text_multi" indexed="true" stored="true"/>
+    <field name="content_lower_token" type="text_multi" indexed="true" stored="true"/>
+    <field name="content_oldstyle" type="text_oldstyle" indexed="true" stored="true"/>
+    <field name="content_charfilter" type="text_charfilter" indexed="true" stored="true"/>
+    <field name="content_multi_bad" type="text_multi_bad" indexed="true" stored="true"/>
+  </fields>
+
+  <defaultSearchField>content</defaultSearchField>
+  <uniqueKey>id</uniqueKey>
+
+</schema>
--- a/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java
@ -0,0 +1,87 @@
+package org.apache.solr.schema;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.analysis.*;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class MultiTermTest extends SolrTestCaseJ4 {
+  public String getCoreName() {
+    return "basic";
+  }
+
+  @BeforeClass
+  public static void beforeTests() throws Exception {
+    initCore("solrconfig-basic.xml", "schema-folding.xml");
+  }
+
+  @Test
+  public void testMultiFound() {
+    SchemaField field = h.getCore().getSchema().getField("content_multi");
+    Analyzer analyzer = field.getType().getMultiTermAnalyzer();
+    assertTrue(analyzer instanceof TokenizerChain);
+    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
+    TokenizerChain tc = (TokenizerChain) analyzer;
+    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
+      assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
+    }
+
+    analyzer = field.getType().getAnalyzer();
+    assertTrue(analyzer instanceof TokenizerChain);
+    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
+    tc = (TokenizerChain) analyzer;
+    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
+      assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof TrimFilterFactory));
+    }
+
+    assertTrue(tc.getCharFilterFactories().length == 0);
+  }
+
+  @Test
+  public void testQueryCopiedToMulti() {
+    SchemaField field = h.getCore().getSchema().getField("content_charfilter");
+    Analyzer analyzer = field.getType().getMultiTermAnalyzer();
+    assertTrue(analyzer instanceof TokenizerChain);
+    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
+    TokenizerChain tc = (TokenizerChain) analyzer;
+    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
+      assertTrue(factory instanceof LowerCaseFilterFactory);
+    }
+
+    assertTrue(tc.getCharFilterFactories().length == 1);
+    assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory);
+  }
+
+  @Test
+  public void testDefaultCopiedToMulti() {
+    SchemaField field = h.getCore().getSchema().getField("content_ws");
+    Analyzer analyzer = field.getType().getMultiTermAnalyzer();
+    assertTrue(analyzer instanceof TokenizerChain);
+    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
+    TokenizerChain tc = (TokenizerChain) analyzer;
+    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
+      assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
+    }
+
+    assertTrue(tc.getCharFilterFactories().length == 0);
+
+  }
+}
--- a/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
+++ b/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
@ -0,0 +1,231 @@
+package org.apache.solr.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
+
+  public String getCoreName() {
+    return "basic";
+  }
+
+  @BeforeClass
+  public static void beforeTests() throws Exception {
+    initCore("solrconfig-basic.xml", "schema-folding.xml");
+    IndexWriter iw;
+
+    String docs[] = {
+        "abcdefg1 finger",
+        "gangs hijklmn1",
+        "opqrstu1 zilly",
+    };
+
+    // prepare the index
+    for (int i = 0; i < docs.length; i++) {
+      String num = Integer.toString(i);
+      String boolVal = ((i % 2) == 0) ? "true" : "false";
+      assertU(adoc("id", num,
+          "int_f", num,
+          "float_f", num,
+          "long_f", num,
+          "double_f", num,
+          "byte_f", num,
+          "short_f", num,
+          "bool_f", boolVal,
+          "date_f", "200" + Integer.toString(i % 10) + "-01-01T00:00:00Z",
+          "content", docs[i],
+          "content_ws", docs[i],
+          "content_rev", docs[i],
+          "content_multi", docs[i],
+          "content_lower_token", docs[i],
+          "content_oldstyle", docs[i],
+          "content_charfilter", docs[i],
+          "content_multi_bad", docs[i]
+      ));
+    }
+    assertU(optimize());
+  }
+
+  @Test
+  public void testPrefixCaseAccentFolding() throws Exception {
+    String matchOneDocPrefixUpper[][] = {
+        {"A*", "ÁB*", "ABÇ*"},   // these should find only doc 0
+        {"H*", "HÏ*", "HìJ*"},   // these should find only doc 1
+        {"O*", "ÖP*", "OPQ*"},   // these should find only doc 2
+    };
+
+    String matchRevPrefixUpper[][] = {
+        {"*Ğ1", "*DEfG1", "*EfG1"},
+        {"*N1", "*LmŊ1", "*MÑ1"},
+        {"*Ǖ1", "*sTu1", "*RŠTU1"}
+    };
+
+    // test the prefix queries find only one doc where the query is uppercased. Must go through query parser here!
+    for (int idx = 0; idx < matchOneDocPrefixUpper.length; idx++) {
+      for (int jdx = 0; jdx < matchOneDocPrefixUpper[idx].length; jdx++) {
+        String me = matchOneDocPrefixUpper[idx][jdx];
+        assertQ(req("q", "content:" + me),
+            "//*[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+        assertQ(req("q", "content_ws:" + me),
+            "//*[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+        assertQ(req("q", "content_multi:" + me),
+            "//*[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+        assertQ(req("q", "content_lower_token:" + me),
+            "//result[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+      }
+    }
+    for (int idx = 0; idx < matchRevPrefixUpper.length; idx++) {
+      for (int jdx = 0; jdx < matchRevPrefixUpper[idx].length; jdx++) {
+        String me = matchRevPrefixUpper[idx][jdx];
+        assertQ(req("q", "content_rev:" + me),
+            "//*[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+      }
+    }
+  }
+
+  // test the wildcard queries find only one doc  where the query is uppercased and/or accented.
+  @Test
+  public void testWildcardCaseAccentFolding() throws Exception {
+    String matchOneDocWildUpper[][] = {
+        {"Á*C*", "ÁB*1", "ABÇ*g1", "Á*FG1"},      // these should find only doc 0
+        {"H*k*", "HÏ*l?*", "HìJ*n*", "HìJ*m*"},   // these should find only doc 1
+        {"O*ř*", "ÖP*ş???", "OPQ*S?Ů*", "ÖP*1"},  // these should find only doc 2
+    };
+
+    for (int idx = 0; idx < matchOneDocWildUpper.length; idx++) {
+      for (int jdx = 0; jdx < matchOneDocWildUpper[idx].length; jdx++) {
+        String me = matchOneDocWildUpper[idx][jdx];
+        assertQ("Error with " + me, req("q", "content:" + me),
+            "//result[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+        assertQ(req("q", "content_ws:" + me),
+            "//result[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+        assertQ(req("q", "content_multi:" + me),
+            "//result[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+        assertQ(req("q", "content_lower_token:" + me),
+            "//result[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+      }
+    }
+  }
+
+  // Phrases should fail. This test is mainly a marker so if phrases ever do start working with wildcards we go
+  // and update the documentation
+  @Test
+  public void testPhrase() {
+    assertQ(req("q", "content:\"silly ABCD*\""),
+        "//result[@numFound='0']");
+  }
+
+  // Make sure the legacy behavior flag is honored
+  @Test
+  public void testLegacyBehavior() {
+    assertQ(req("q", "content_oldstyle:ABCD*"),
+        "//result[@numFound='0']");
+  }
+
+  @Test
+  public void testWildcardRange() {
+    assertQ(req("q", "content:[* TO *]"),
+        "//result[@numFound='3']");
+  }
+
+
+  // Does the char filter get correctly handled?
+  @Test
+  public void testCharFilter() {
+    assertQ(req("q", "content_charfilter:" + "Á*C*"),
+        "//result[@numFound='1']",
+        "//*[@name='id'][.='0']");
+    assertQ(req("q", "content_charfilter:" + "ABÇ*g1"),
+        "//result[@numFound='1']",
+        "//*[@name='id'][.='0']");
+    assertQ(req("q", "content_charfilter:" + "HÏ*l?*"),
+        "//result[@numFound='1']",
+        "//*[@name='id'][.='1']");
+  }
+
+  @Test
+  public void testRangeQuery() {
+    assertQ(req("q", "content:" + "{Ȫp*1 TO QŮ*}"),
+        "//result[@numFound='1']",
+        "//*[@name='id'][.='2']");
+
+    assertQ(req("q", "content:" + "[Áb* TO f?Ñg?r]"),
+        "//result[@numFound='1']",
+        "//*[@name='id'][.='0']");
+
+  }
+
+  @Test
+  public void testNonTextTypes() {
+    String[] intTypes = {"int_f", "float_f", "long_f", "double_f", "byte_f", "short_f"};
+
+    for (String str : intTypes) {
+      assertQ(req("q", str + ":" + "0"),
+          "//result[@numFound='1']",
+          "//*[@name='id'][.='0']");
+
+      assertQ(req("q", str + ":" + "[0 TO 2]"),
+          "//result[@numFound='3']",
+          "//*[@name='id'][.='0']",
+          "//*[@name='id'][.='1']",
+          "//*[@name='id'][.='2']");
+    }
+    assertQ(req("q", "bool_f:true"),
+        "//result[@numFound='2']",
+        "//*[@name='id'][.='0']",
+        "//*[@name='id'][.='2']");
+
+    assertQ(req("q", "bool_f:[false TO true]"),
+        "//result[@numFound='3']",
+        "//*[@name='id'][.='0']",
+        "//*[@name='id'][.='1']",
+        "//*[@name='id'][.='2']");
+
+    assertQ(req("q", "date_f:2000-01-01T00\\:00\\:00Z"),
+        "//result[@numFound='1']",
+        "//*[@name='id'][.='0']");
+
+    assertQ(req("q", "date_f:[2000-12-31T23:59:59.999Z TO 2002-01-02T00:00:01Z]"),
+        "//result[@numFound='2']",
+        "//*[@name='id'][.='1']",
+        "//*[@name='id'][.='2']");
+  }
+
+  @Test
+  public void testMultiBad() {
+    try {
+      assertQ(req("q", "content_multi_bad:" + "abCD*"));
+      fail("Should throw exception when token evaluates to more than one term");
+    } catch (Exception expected) {
+      assertTrue(expected.getCause() instanceof IllegalArgumentException);
+    }
+  }
+}
--- a/solr/example/solr/conf/schema.xml
+++ b/solr/example/solr/conf/schema.xml
@ -427,6 +427,42 @@
      </analyzer>
    </fieldType>

+    <!-- Illustrates the new "multiterm" analyzer definition the <fieldType> can take a new
+         parameter legacyMultiTerm="true" if the old behvaior is desired. The new default
+         behavior as of 3.6+ is to automatically define a multiterm analyzer
+    -->
+    <fieldType name="text_multiterm" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <!-- Illustrates the use of a new analyzer type "multiterm". See the Wiki page "Multiterm 
+           Query Analysis" and SOLR-2438 for full details. The short form is that this analyzer is
+           applied to wildcard terms (prefix, wildcard range) if specified. This allows, among other
+           things, not having to lowercase wildcard terms on the client.
+           
+           In the absence of this section, the new default behavior (3.6, 4.0) is to construct
+           one of these from the query analyzer that incorporates any defined charfilters, a
+           WhitespaceTokenizer, a LowerCaseFilter (if defined), and an ASCIIFoldingFilter 
+           (if defined).
+           
+           Arguably, this is an expert-level analyzer, most cases will be handled by an instance
+           of this being automatically constructed from the queryanalyzer.
+           
+      -->
+      <analyzer type="multiterm"> 
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
    <!-- since fields of this type are by default not stored or indexed,
         any data added to them will be ignored outright.  --> 
    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />