SOLR-2438, allow an analysis chain to be created for multiterm query terms or synthesize one if not defined explicitly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206229 13f79535-47bb-0310-9956-ffa450edef68
2011-11-25 15:46:26 +00:00 · 2011-11-25 15:46:26 +00:00 · 098371446a
parent 6870592252
commit 098371446a
12 changed files with 655 additions and 26 deletions
--- a/modules/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
+++ b/modules/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
@ -290,7 +290,6 @@ public abstract class QueryParserBase {
    this.lowercaseExpandedTerms = lowercaseExpandedTerms;
  }
  /**
   * @see #setLowercaseExpandedTerms(boolean)
   */
@ -778,14 +777,21 @@ public abstract class QueryParserBase {
    return new FuzzyQuery(term,minimumSimilarity,prefixLength);
  }
-  private BytesRef analyzeRangePart(String field, String part) {
+  // TODO: Should this be protected instead?
  private BytesRef analyzeMultitermTerm(String field, String part) {
    return analyzeMultitermTerm(field, part, analyzer);
  }
  protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
    TokenStream source;
-      
+
    if (analyzerIn == null) analyzerIn = analyzer;
    try {
-      source = analyzer.tokenStream(field, new StringReader(part));
+      source = analyzerIn.tokenStream(field, new StringReader(part));
      source.reset();
    } catch (IOException e) {
-      throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e);
+      throw new RuntimeException("Unable to initialize TokenStream to analyze multiTerm term: " + part, e);
    }
    TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
@ -793,10 +799,10 @@ public abstract class QueryParserBase {
    try {
      if (!source.incrementToken())
-        throw new IllegalArgumentException("analyzer returned no terms for range part: " + part);
+        throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
      termAtt.fillBytesRef();
      if (source.incrementToken())
-        throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part);
+        throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
    } catch (IOException e) {
      throw new RuntimeException("error analyzing range part: " + part, e);
    }
@ -805,7 +811,7 @@ public abstract class QueryParserBase {
      source.end();
      source.close();
    } catch (IOException e) {
-      throw new RuntimeException("Unable to end & close TokenStream after analyzing range part: " + part, e);
+      throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part, e);
    }
    return BytesRef.deepCopyOf(bytes);
@ -827,13 +833,13 @@ public abstract class QueryParserBase {
    if (part1 == null) {
      start = null;
    } else {
-      start = analyzeRangeTerms ? analyzeRangePart(field, part1) : new BytesRef(part1);
+      start = analyzeRangeTerms ? analyzeMultitermTerm(field, part1) : new BytesRef(part1);
    }
    if (part2 == null) {
      end = null;
    } else {
-      end = analyzeRangeTerms ? analyzeRangePart(field, part2) : new BytesRef(part2);
+      end = analyzeRangeTerms ? analyzeMultitermTerm(field, part2) : new BytesRef(part2);
    }
    final TermRangeQuery query = new TermRangeQuery(field, start, end, startInclusive, endInclusive);
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -188,6 +188,11 @@ New Features
 * SOLR-2134 Trie* fields should support sortMissingLast=true, and deprecate Sortable* Field Types
  (Ryan McKinley, Mike McCandless, Uwe Schindler, Erick Erickson)
 * SOLR-2438: Case insensitive search for wildcard queries. Actually, the ability to specify
  a complete analysis chain for multiterm queries.   
  (Pete Sturge Erick Erickson, Mentoring from Seeley and Muir)
 Optimizations
 ----------------------
@ -383,6 +388,11 @@ New Features
 * SOLR-1565: StreamingUpdateSolrServer supports RequestWriter API and therefore, javabin update
  format (shalin)
 * SOLR-2438: Case insensitive search for wildcard queries. Actually, the ability to specify
  a complete analysis chain for multiterm queries.   
  (Pete Sturge Erick Erickson, Mentoring from Seeley and Muir)
 Bug Fixes
 ----------------------
 * SOLR-2912: Fixed File descriptor leak in ShowFileRequestHandler (Michael Ryan, shalin)
--- a/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
@ -48,13 +48,15 @@ public abstract class FieldProperties {
  protected final static int REQUIRED            = 0x00001000;
  protected final static int OMIT_POSITIONS      = 0x00002000;
  protected final static int LEGACY_MULTITERM    = 0x00004000;
  static final String[] propertyNames = {
          "indexed", "tokenized", "stored",
          "binary", "omitNorms", "omitTermFreqAndPositions",
          "termVectors", "termPositions", "termOffsets",
          "multiValued",
-          "sortMissingFirst","sortMissingLast","required", "omitPositions"
+          "sortMissingFirst","sortMissingLast","required", "omitPositions" ,
          "legacyMultiTerm"
  };
  static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();
--- a/solr/core/src/java/org/apache/solr/schema/FieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java
@ -428,6 +428,21 @@ public abstract class FieldType extends FieldProperties {
   */
  protected Analyzer queryAnalyzer=analyzer;
  /**
   * Analyzer set by schema for text types to use when searching fields
   * of this type, subclasses can set analyzer themselves or override
   * getAnalyzer()
   * This analyzer is used to process wildcard, prefix, regex and other multiterm queries. It
   * assembles a list of tokenizer +filters that "make sense" for this, primarily accent folding and
   * lowercasing filters, and charfilters.
   *
   * If users require old-style behavior, they can specify 'legacyMultiterm="true" ' in the schema file
   * @see #getMultiTermAnalyzer
   * @see #setMultiTermAnalyzer
   */
  protected Analyzer multiTermAnalyzer=null;
  /**
   * Returns the Analyzer to be used when indexing fields of this type.
   * <p>
@ -450,6 +465,17 @@ public abstract class FieldType extends FieldProperties {
    return queryAnalyzer;
  }
  /**
   * Returns the Analyzer to be used when searching fields of this type when mult-term queries are specified.
   * <p>
   * This method may be called many times, at any time.
   * </p>
   * @see #getAnalyzer
   */
  public Analyzer getMultiTermAnalyzer() {
    return multiTermAnalyzer;
  }
  private final String analyzerError = 
    "FieldType: " + this.getClass().getSimpleName() + 
    " (" + typeName + ") does not support specifying an analyzer";
@ -498,6 +524,28 @@ public abstract class FieldType extends FieldProperties {
    throw e;
  }
  /**
   * Sets the Analyzer to be used when querying fields of this type.
   *
   * <p>
   *
   * Subclasses that override this method need to ensure the behavior
   * of the analyzer is consistent with the implementation of toInternal.
   * </p>
   *
   * @see #toInternal
   * @see #setAnalyzer
   * @see #getQueryAnalyzer
   */
  public void setMultiTermAnalyzer(Analyzer analyzer) {
    SolrException e = new SolrException
      (ErrorCode.SERVER_ERROR,
       "FieldType: " + this.getClass().getSimpleName() +
       " (" + typeName + ") does not support specifying an analyzer");
    SolrException.logOnce(log,null,e);
    throw e;
  }
  /** @lucene.internal */
  protected Similarity similarity;
--- a/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java
@ -18,19 +18,15 @@
 package org.apache.solr.schema;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.Version;
 import org.apache.solr.analysis.*;
 import org.apache.solr.common.ResourceLoader;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.DOMUtil;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.Config;
 import org.apache.solr.core.SolrResourceLoader;
 import org.apache.solr.analysis.CharFilterFactory;
 import org.apache.solr.analysis.TokenFilterFactory;
 import org.apache.solr.analysis.TokenizerChain;
 import org.apache.solr.analysis.TokenizerFactory;
 import org.apache.solr.util.plugin.AbstractPluginLoader;
 import org.w3c.dom.*;
@ -88,12 +84,16 @@ public final class FieldTypePluginLoader
    String expression = "./analyzer[@type='query']";
    Node anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
    Analyzer queryAnalyzer = readAnalyzer(anode);
-    
+
    expression = "./analyzer[@type='multiterm']";
    anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
    Analyzer multiAnalyzer = readAnalyzer(anode);
    // An analyzer without a type specified, or with type="index"
    expression = "./analyzer[not(@type)] | ./analyzer[@type='index']";
    anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
    Analyzer analyzer = readAnalyzer(anode);
-    
+
    // a custom similarity[Factory]
    expression = "./similarity";
    anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
@ -101,9 +101,16 @@ public final class FieldTypePluginLoader
    if (queryAnalyzer==null) queryAnalyzer=analyzer;
    if (analyzer==null) analyzer=queryAnalyzer;
    if (multiAnalyzer == null) {
      Boolean legacyMatch = ! schema.getDefaultLuceneMatchVersion().onOrAfter(Version.LUCENE_36);
      legacyMatch = (DOMUtil.getAttr(node, "legacyMultiTerm", null) == null) ? legacyMatch :
          Boolean.parseBoolean(DOMUtil.getAttr(node, "legacyMultiTerm", null));
      multiAnalyzer = constructMultiTermAnalyzer(queryAnalyzer, legacyMatch);
    }
    if (analyzer!=null) {
      ft.setAnalyzer(analyzer);
      ft.setQueryAnalyzer(queryAnalyzer);
      ft.setMultiTermAnalyzer(multiAnalyzer);
    }
    if (similarity!=null) {
      ft.setSimilarity(similarity);
@ -130,6 +137,42 @@ public final class FieldTypePluginLoader
    return fieldTypes.put( name, plugin );
  }
  // The point here is that, if no multitermanalyzer was specified in the schema file, do one of several things:
  // 1> If legacyMultiTerm == false, assemble a new analyzer composed of all of the charfilters,
  //    lowercase filters and asciifoldingfilter.
  // 2> If letacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior.
  //    Do the same if they've specified that the old behavior is required (legacyMultiTerm="true")
  private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer, Boolean legacyMultiTerm) {
    if (queryAnalyzer == null) return null;
    if (legacyMultiTerm || (!(queryAnalyzer instanceof TokenizerChain))) {
      return new KeywordAnalyzer();
    }
    TokenizerChain tc = (TokenizerChain) queryAnalyzer;
    // we know it'll never be longer than this unless the code below is explicitly changed
    TokenFilterFactory[] filters = new TokenFilterFactory[2];
    int idx = 0;
    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
      if (factory instanceof LowerCaseFilterFactory) {
        filters[idx] = new LowerCaseFilterFactory();
        filters[idx++].init(factory.getArgs());
      }
      if (factory instanceof ASCIIFoldingFilterFactory) {
        filters[idx] = new ASCIIFoldingFilterFactory();
        filters[idx++].init(factory.getArgs());
      }
    }
    WhitespaceTokenizerFactory white = new WhitespaceTokenizerFactory();
    white.init(tc.getTokenizerFactory().getArgs());
    return new TokenizerChain(tc.getCharFilterFactories(),
        white,
        Arrays.copyOfRange(filters, 0, idx));
  }
  //
  // <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
  //
--- a/solr/core/src/java/org/apache/solr/schema/SchemaField.java
+++ b/solr/core/src/java/org/apache/solr/schema/SchemaField.java
@ -97,6 +97,9 @@ public final class SchemaField extends FieldProperties {
  boolean isTokenized() { return (properties & TOKENIZED)!=0; }
  boolean isBinary() { return (properties & BINARY)!=0; }
  boolean legacyMultiTerm() {
    return (properties & LEGACY_MULTITERM) != 0;
  }
  public IndexableField createField(Object val, float boost) {
    return type.createField(this,val,boost);
--- a/solr/core/src/java/org/apache/solr/schema/TextField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TextField.java
@ -98,6 +98,11 @@ public class TextField extends FieldType {
    this.queryAnalyzer = analyzer;
  }
  @Override
  public void setMultiTermAnalyzer(Analyzer analyzer) {
    this.multiTermAnalyzer = analyzer;
  }
  static Query parseFieldQuery(QParser parser, Analyzer analyzer, String field, String queryText) {
    int phraseSlop = 0;
    boolean enablePositionIncrements = true;
--- a/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java
@ -26,7 +26,6 @@ import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.*;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.util.Version;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.BasicAutomata;
 import org.apache.lucene.util.automaton.BasicOperations;
@ -71,7 +70,6 @@ public class SolrQueryParser extends QueryParser {
    this.schema = parser.getReq().getSchema();
    this.parser = parser;
    this.defaultField = defaultField;
    setLowercaseExpandedTerms(false);
    setEnablePositionIncrements(true);
    checkAllowLeadingWildcards();
  }
@ -106,6 +104,14 @@ public class SolrQueryParser extends QueryParser {
    }
  }
  protected String analyzeIfMultitermTermText(String field, String part, Analyzer analyzer) {
    if (part == null) return part;
    SchemaField sf = schema.getFieldOrNull((field));
    if (sf == null || ! (sf.getType() instanceof TextField)) return part;
    return analyzeMultitermTerm(field, part, analyzer).utf8ToString();
  }
  @Override
  protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
    checkNullField(field);
@ -137,6 +143,8 @@ public class SolrQueryParser extends QueryParser {
  @Override
  protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
    checkNullField(field);
    part1 = analyzeIfMultitermTermText(field, part1, schema.getFieldType(field).getMultiTermAnalyzer());
    part2 = analyzeIfMultitermTermText(field, part2, schema.getFieldType(field).getMultiTermAnalyzer());
    SchemaField sf = schema.getField(field);
    return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
  }
@ -144,9 +152,8 @@ public class SolrQueryParser extends QueryParser {
  @Override
  protected Query getPrefixQuery(String field, String termStr) throws ParseException {
    checkNullField(field);
-    if (getLowercaseExpandedTerms()) {
+
-      termStr = termStr.toLowerCase();
+    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
    }
    // TODO: toInternal() won't necessarily work on partial
    // values, so it looks like we need a getPrefix() function
@ -162,14 +169,13 @@ public class SolrQueryParser extends QueryParser {
    PrefixQuery prefixQuery = new PrefixQuery(t);
    return prefixQuery;
  }
  @Override
  protected Query getWildcardQuery(String field, String termStr) throws ParseException {
    // *:* -> MatchAllDocsQuery
    if ("*".equals(field) && "*".equals(termStr)) {
      return newMatchAllDocsQuery();
    }
-    
+    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
    // can we use reversed wildcards in this field?
    String type = schema.getFieldType(field).getTypeName();
    ReversedWildcardFilterFactory factory = leadingWildcards.get(type);
@ -213,4 +219,11 @@ public class SolrQueryParser extends QueryParser {
    }
    return q;
  }
  protected Query getRegexpQuery(String field, String termStr) throws ParseException
  {
    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
    return super.getRegexpQuery(field, termStr);
  }
 }
--- a/solr/core/src/test-files/solr/conf/schema-folding.xml
+++ b/solr/core/src/test-files/solr/conf/schema-folding.xml
@ -0,0 +1,145 @@
 <!--
  Licensed to the Apache Software Foundation (ASF) under one or more
  contributor license agreements.  See the NOTICE file distributed with
  this work for additional information regarding copyright ownership.
  The ASF licenses this file to You under the Apache License, Version 2.0
  (the "License"); you may not use this file except in compliance with
  the License.  You may obtain a copy of the License at
      http://www.apache.org/licenses/LICENSE-2.0
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
  -->
 <schema name="test" version="1.0">
  <types>
    <fieldtype name="string" class="solr.StrField" sortMissingLast="true" multiValued="false"/>
    <fieldType name="text" class="solr.TextField" multiValued="false">
      <analyzer>
        <tokenizer class="solr.PatternTokenizerFactory" pattern="\s+"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text_multi" class="solr.TextField" multiValued="true">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
        <filter class="solr.TrimFilterFactory"/>
      </analyzer>
      <analyzer type="multiterm">        <!-- Intentionally different to test that these are kept  distinct -->
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text_multi_bad" class="solr.TextField" multiValued="false">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
        <filter class="solr.TrimFilterFactory"/>
      </analyzer>
      <analyzer type="multiterm">        <!-- Intentionally different to test that these are kept  distinct -->
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
                catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text_ws" class="solr.TextField" multiValued="true">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text_rev" class="solr.TextField" legacyMultiTerm="false">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="false"
                maxPosAsterisk="1" maxPosQuestion="2" maxFractionAsterisk="0.99"
                minTrailing="1"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text_lower_tokenizer" class="solr.TextField">
      <analyzer>
        <tokenizer class="solr.LowerCaseTokenizerFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text_charfilter" class="solr.TextField" multiValued="false">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text_oldstyle" class="solr.TextField" multiValued="false" legacyMultiTerm="true">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.TrimFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="byte" class="solr.ByteField" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="short" class="solr.ShortField" omitNorms="true" positionIncrementGap="0"/>
    <fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
    <fieldtype name="date" class="solr.TrieDateField" precisionStep="0"/>
  </types>
  <fields>
    <field name="id" type="string" indexed="true" stored="true" required="true"/>
    <field name="int_f" type="int"/>
    <field name="float_f" type="float"/>
    <field name="long_f" type="long"/>
    <field name="double_f" type="double"/>
    <field name="byte_f" type="byte"/>
    <field name="short_f" type="short"/>
    <field name="bool_f" type="boolean"/>
    <field name="date_f" type="date"/>
    <field name="content" type="text" indexed="true" stored="true"/>
    <field name="content_ws" type="text_ws" indexed="true" stored="true"/>
    <field name="content_rev" type="text_rev" indexed="true" stored="true"/>
    <field name="content_multi" type="text_multi" indexed="true" stored="true"/>
    <field name="content_lower_token" type="text_multi" indexed="true" stored="true"/>
    <field name="content_oldstyle" type="text_oldstyle" indexed="true" stored="true"/>
    <field name="content_charfilter" type="text_charfilter" indexed="true" stored="true"/>
    <field name="content_multi_bad" type="text_multi_bad" indexed="true" stored="true"/>
  </fields>
  <defaultSearchField>content</defaultSearchField>
  <uniqueKey>id</uniqueKey>
 </schema>
--- a/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java
@ -0,0 +1,87 @@
 package org.apache.solr.schema;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.analysis.*;
 import org.junit.BeforeClass;
 import org.junit.Test;
 public class MultiTermTest extends SolrTestCaseJ4 {
  public String getCoreName() {
    return "basic";
  }
  @BeforeClass
  public static void beforeTests() throws Exception {
    initCore("solrconfig-basic.xml", "schema-folding.xml");
  }
  @Test
  public void testMultiFound() {
    SchemaField field = h.getCore().getSchema().getField("content_multi");
    Analyzer analyzer = field.getType().getMultiTermAnalyzer();
    assertTrue(analyzer instanceof TokenizerChain);
    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
    TokenizerChain tc = (TokenizerChain) analyzer;
    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
      assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
    }
    analyzer = field.getType().getAnalyzer();
    assertTrue(analyzer instanceof TokenizerChain);
    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
    tc = (TokenizerChain) analyzer;
    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
      assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof TrimFilterFactory));
    }
    assertTrue(tc.getCharFilterFactories().length == 0);
  }
  @Test
  public void testQueryCopiedToMulti() {
    SchemaField field = h.getCore().getSchema().getField("content_charfilter");
    Analyzer analyzer = field.getType().getMultiTermAnalyzer();
    assertTrue(analyzer instanceof TokenizerChain);
    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
    TokenizerChain tc = (TokenizerChain) analyzer;
    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
      assertTrue(factory instanceof LowerCaseFilterFactory);
    }
    assertTrue(tc.getCharFilterFactories().length == 1);
    assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory);
  }
  @Test
  public void testDefaultCopiedToMulti() {
    SchemaField field = h.getCore().getSchema().getField("content_ws");
    Analyzer analyzer = field.getType().getMultiTermAnalyzer();
    assertTrue(analyzer instanceof TokenizerChain);
    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
    TokenizerChain tc = (TokenizerChain) analyzer;
    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
      assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
    }
    assertTrue(tc.getCharFilterFactories().length == 0);
  }
 }
--- a/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
+++ b/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
@ -0,0 +1,231 @@
 package org.apache.solr.search;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.index.IndexWriter;
 import org.apache.solr.SolrTestCaseJ4;
 import org.junit.BeforeClass;
 import org.junit.Test;
 public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
  public String getCoreName() {
    return "basic";
  }
  @BeforeClass
  public static void beforeTests() throws Exception {
    initCore("solrconfig-basic.xml", "schema-folding.xml");
    IndexWriter iw;
    String docs[] = {
        "abcdefg1 finger",
        "gangs hijklmn1",
        "opqrstu1 zilly",
    };
    // prepare the index
    for (int i = 0; i < docs.length; i++) {
      String num = Integer.toString(i);
      String boolVal = ((i % 2) == 0) ? "true" : "false";
      assertU(adoc("id", num,
          "int_f", num,
          "float_f", num,
          "long_f", num,
          "double_f", num,
          "byte_f", num,
          "short_f", num,
          "bool_f", boolVal,
          "date_f", "200" + Integer.toString(i % 10) + "-01-01T00:00:00Z",
          "content", docs[i],
          "content_ws", docs[i],
          "content_rev", docs[i],
          "content_multi", docs[i],
          "content_lower_token", docs[i],
          "content_oldstyle", docs[i],
          "content_charfilter", docs[i],
          "content_multi_bad", docs[i]
      ));
    }
    assertU(optimize());
  }
  @Test
  public void testPrefixCaseAccentFolding() throws Exception {
    String matchOneDocPrefixUpper[][] = {
        {"A*", "ÁB*", "ABÇ*"},   // these should find only doc 0
        {"H*", "HÏ*", "HìJ*"},   // these should find only doc 1
        {"O*", "ÖP*", "OPQ*"},   // these should find only doc 2
    };
    String matchRevPrefixUpper[][] = {
        {"*Ğ1", "*DEfG1", "*EfG1"},
        {"*N1", "*LmŊ1", "*MÑ1"},
        {"*Ǖ1", "*sTu1", "*RŠTU1"}
    };
    // test the prefix queries find only one doc where the query is uppercased. Must go through query parser here!
    for (int idx = 0; idx < matchOneDocPrefixUpper.length; idx++) {
      for (int jdx = 0; jdx < matchOneDocPrefixUpper[idx].length; jdx++) {
        String me = matchOneDocPrefixUpper[idx][jdx];
        assertQ(req("q", "content:" + me),
            "//*[@numFound='1']",
            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
        assertQ(req("q", "content_ws:" + me),
            "//*[@numFound='1']",
            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
        assertQ(req("q", "content_multi:" + me),
            "//*[@numFound='1']",
            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
        assertQ(req("q", "content_lower_token:" + me),
            "//result[@numFound='1']",
            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
      }
    }
    for (int idx = 0; idx < matchRevPrefixUpper.length; idx++) {
      for (int jdx = 0; jdx < matchRevPrefixUpper[idx].length; jdx++) {
        String me = matchRevPrefixUpper[idx][jdx];
        assertQ(req("q", "content_rev:" + me),
            "//*[@numFound='1']",
            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
      }
    }
  }
  // test the wildcard queries find only one doc  where the query is uppercased and/or accented.
  @Test
  public void testWildcardCaseAccentFolding() throws Exception {
    String matchOneDocWildUpper[][] = {
        {"Á*C*", "ÁB*1", "ABÇ*g1", "Á*FG1"},      // these should find only doc 0
        {"H*k*", "HÏ*l?*", "HìJ*n*", "HìJ*m*"},   // these should find only doc 1
        {"O*ř*", "ÖP*ş???", "OPQ*S?Ů*", "ÖP*1"},  // these should find only doc 2
    };
    for (int idx = 0; idx < matchOneDocWildUpper.length; idx++) {
      for (int jdx = 0; jdx < matchOneDocWildUpper[idx].length; jdx++) {
        String me = matchOneDocWildUpper[idx][jdx];
        assertQ("Error with " + me, req("q", "content:" + me),
            "//result[@numFound='1']",
            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
        assertQ(req("q", "content_ws:" + me),
            "//result[@numFound='1']",
            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
        assertQ(req("q", "content_multi:" + me),
            "//result[@numFound='1']",
            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
        assertQ(req("q", "content_lower_token:" + me),
            "//result[@numFound='1']",
            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
      }
    }
  }
  // Phrases should fail. This test is mainly a marker so if phrases ever do start working with wildcards we go
  // and update the documentation
  @Test
  public void testPhrase() {
    assertQ(req("q", "content:\"silly ABCD*\""),
        "//result[@numFound='0']");
  }
  // Make sure the legacy behavior flag is honored
  @Test
  public void testLegacyBehavior() {
    assertQ(req("q", "content_oldstyle:ABCD*"),
        "//result[@numFound='0']");
  }
  @Test
  public void testWildcardRange() {
    assertQ(req("q", "content:[* TO *]"),
        "//result[@numFound='3']");
  }
  // Does the char filter get correctly handled?
  @Test
  public void testCharFilter() {
    assertQ(req("q", "content_charfilter:" + "Á*C*"),
        "//result[@numFound='1']",
        "//*[@name='id'][.='0']");
    assertQ(req("q", "content_charfilter:" + "ABÇ*g1"),
        "//result[@numFound='1']",
        "//*[@name='id'][.='0']");
    assertQ(req("q", "content_charfilter:" + "HÏ*l?*"),
        "//result[@numFound='1']",
        "//*[@name='id'][.='1']");
  }
  @Test
  public void testRangeQuery() {
    assertQ(req("q", "content:" + "{Ȫp*1 TO QŮ*}"),
        "//result[@numFound='1']",
        "//*[@name='id'][.='2']");
    assertQ(req("q", "content:" + "[Áb* TO f?Ñg?r]"),
        "//result[@numFound='1']",
        "//*[@name='id'][.='0']");
  }
  @Test
  public void testNonTextTypes() {
    String[] intTypes = {"int_f", "float_f", "long_f", "double_f", "byte_f", "short_f"};
    for (String str : intTypes) {
      assertQ(req("q", str + ":" + "0"),
          "//result[@numFound='1']",
          "//*[@name='id'][.='0']");
      assertQ(req("q", str + ":" + "[0 TO 2]"),
          "//result[@numFound='3']",
          "//*[@name='id'][.='0']",
          "//*[@name='id'][.='1']",
          "//*[@name='id'][.='2']");
    }
    assertQ(req("q", "bool_f:true"),
        "//result[@numFound='2']",
        "//*[@name='id'][.='0']",
        "//*[@name='id'][.='2']");
    assertQ(req("q", "bool_f:[false TO true]"),
        "//result[@numFound='3']",
        "//*[@name='id'][.='0']",
        "//*[@name='id'][.='1']",
        "//*[@name='id'][.='2']");
    assertQ(req("q", "date_f:2000-01-01T00\\:00\\:00Z"),
        "//result[@numFound='1']",
        "//*[@name='id'][.='0']");
    assertQ(req("q", "date_f:[2000-12-31T23:59:59.999Z TO 2002-01-02T00:00:01Z]"),
        "//result[@numFound='2']",
        "//*[@name='id'][.='1']",
        "//*[@name='id'][.='2']");
  }
  @Test
  public void testMultiBad() {
    try {
      assertQ(req("q", "content_multi_bad:" + "abCD*"));
      fail("Should throw exception when token evaluates to more than one term");
    } catch (Exception expected) {
      assertTrue(expected.getCause() instanceof IllegalArgumentException);
    }
  }
 }
--- a/solr/example/solr/conf/schema.xml
+++ b/solr/example/solr/conf/schema.xml
@ -427,6 +427,42 @@
      </analyzer>
    </fieldType>
    <!-- Illustrates the new "multiterm" analyzer definition the <fieldType> can take a new
         parameter legacyMultiTerm="true" if the old behvaior is desired. The new default
         behavior as of 3.6+ is to automatically define a multiterm analyzer
    -->
    <fieldType name="text_multiterm" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <!-- Illustrates the use of a new analyzer type "multiterm". See the Wiki page "Multiterm 
           Query Analysis" and SOLR-2438 for full details. The short form is that this analyzer is
           applied to wildcard terms (prefix, wildcard range) if specified. This allows, among other
           things, not having to lowercase wildcard terms on the client.
           In the absence of this section, the new default behavior (3.6, 4.0) is to construct
           one of these from the query analyzer that incorporates any defined charfilters, a
           WhitespaceTokenizer, a LowerCaseFilter (if defined), and an ASCIIFoldingFilter 
           (if defined).
           Arguably, this is an expert-level analyzer, most cases will be handled by an instance
           of this being automatically constructed from the queryanalyzer.
      -->
      <analyzer type="multiterm"> 
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
    </fieldType>
    <!-- since fields of this type are by default not stored or indexed,
         any data added to them will be ignored outright.  --> 
    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />