git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206767 13f79535-47bb-0310-9956-ffa450edef68

2011-11-27 17:04:38 +00:00 · 2011-11-27 17:04:38 +00:00 · c94c1c5a64
parent 5c4063bef2
commit c94c1c5a64
18 changed files with 366 additions and 210 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -193,6 +193,11 @@ New Features
  a complete analysis chain for multiterm queries.   
  (Pete Sturge Erick Erickson, Mentoring from Seeley and Muir)
 * SOLR-2918 Improvement to SOLR-2438, added MultiTermAwareComponent to the various classes
  that should transform multiterm queries in various ways, and use this as the criteria for
  adding them to the multiterm analyzer that is constructed if not specified in the
  <fieldType>
 Optimizations
 ----------------------
--- a/solr/core/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java
@ -32,9 +32,14 @@ import org.apache.lucene.analysis.TokenStream;
 * &lt;/fieldType&gt;</pre>
 *
 */
-public class ASCIIFoldingFilterFactory extends BaseTokenFilterFactory {
+public class ASCIIFoldingFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
  public ASCIIFoldingFilter create(TokenStream input) {
    return new ASCIIFoldingFilter(input);
  }
  @Override
  public Object getMultiTermComponent() {
    return this;
  }
 }
--- a/solr/core/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java
@ -33,7 +33,7 @@ import org.apache.lucene.analysis.core.LowerCaseFilter;
 * &lt;/fieldType&gt;</pre> 
 *
 */
-public class LowerCaseFilterFactory extends BaseTokenFilterFactory {
+public class LowerCaseFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
  @Override
  public void init(Map<String,String> args) {
    super.init(args);
@ -43,4 +43,9 @@ public class LowerCaseFilterFactory extends BaseTokenFilterFactory {
  public LowerCaseFilter create(TokenStream input) {
    return new LowerCaseFilter(luceneMatchVersion,input);
  }
  @Override
  public Object getMultiTermComponent() {
    return this;
  }
 }
--- a/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java
+++ b/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java
@ -17,6 +17,7 @@
 package org.apache.solr.analysis;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import java.io.Reader;
@ -32,7 +33,7 @@ import java.util.Map;
 * &lt;/fieldType&gt;</pre> 
 *
 */
-public class LowerCaseTokenizerFactory extends BaseTokenizerFactory {
+public class LowerCaseTokenizerFactory extends BaseTokenizerFactory implements MultiTermAwareComponent {
  @Override
  public void init(Map<String,String> args) {
    super.init(args);
@ -42,4 +43,11 @@ public class LowerCaseTokenizerFactory extends BaseTokenizerFactory {
  public LowerCaseTokenizer create(Reader input) {
    return new LowerCaseTokenizer(luceneMatchVersion,input);
  }
  @Override
  public Object getMultiTermComponent() {
    LowerCaseFilterFactory filt = new LowerCaseFilterFactory();
    filt.init(args);
    return filt;
  }
 }
--- a/solr/core/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java
@ -46,7 +46,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
 *
 */
 public class MappingCharFilterFactory extends BaseCharFilterFactory implements
-    ResourceLoaderAware {
+    ResourceLoaderAware, MultiTermAwareComponent {
  protected NormalizeCharMap normMap;
  private String mapping;
@ -126,4 +126,9 @@ public class MappingCharFilterFactory extends BaseCharFilterFactory implements
    }
    return new String( out, 0, writePos );
  }
  @Override
  public Object getMultiTermComponent() {
    return this;
  }
 }
--- a/solr/core/src/java/org/apache/solr/analysis/MultiTermAwareComponent.java
+++ b/solr/core/src/java/org/apache/solr/analysis/MultiTermAwareComponent.java
@ -0,0 +1,31 @@
 package org.apache.solr.analysis;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /** Add to any analysis factory component to allow returning an
 * analysis component factory for use with partial terms in prefix queries,
 * wildcard queries, range query endpoints, regex queries, etc.
 *
 * @lucene.experimental
 */
 public interface MultiTermAwareComponent {
  /** Returns an analysis component to handle analysis if multi-term queries.
   * The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory.
   */
  public Object getMultiTermComponent();
 }
--- a/solr/core/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java
@ -31,10 +31,15 @@ import org.apache.lucene.analysis.fa.PersianCharFilter;
 * &lt;/fieldType&gt;</pre>
 *
 */
-public class PersianCharFilterFactory extends BaseCharFilterFactory {
+public class PersianCharFilterFactory extends BaseCharFilterFactory implements MultiTermAwareComponent {
  @Override
  public CharStream create(CharStream input) {
    return new PersianCharFilter(input);
  }
  @Override
  public Object getMultiTermComponent() {
    return this;
  }
 }
--- a/solr/core/src/java/org/apache/solr/analysis/TokenFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/analysis/TokenFilterFactory.java
@ -67,3 +67,4 @@ public interface TokenFilterFactory {
  /** Transform the specified input TokenStream */
  public TokenStream create(TokenStream input);
 }
--- a/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
@ -48,15 +48,13 @@ public abstract class FieldProperties {
  protected final static int REQUIRED            = 0x00001000;
  protected final static int OMIT_POSITIONS      = 0x00002000;
  protected final static int LEGACY_MULTITERM    = 0x00004000;
  static final String[] propertyNames = {
          "indexed", "tokenized", "stored",
          "binary", "omitNorms", "omitTermFreqAndPositions",
          "termVectors", "termPositions", "termOffsets",
          "multiValued",
-          "sortMissingFirst","sortMissingLast","required", "omitPositions" ,
+          "sortMissingFirst","sortMissingLast","required", "omitPositions"
          "legacyMultiTerm"
  };
  static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();
--- a/solr/core/src/java/org/apache/solr/schema/FieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java
@ -428,21 +428,6 @@ public abstract class FieldType extends FieldProperties {
   */
  protected Analyzer queryAnalyzer=analyzer;
  /**
   * Analyzer set by schema for text types to use when searching fields
   * of this type, subclasses can set analyzer themselves or override
   * getAnalyzer()
   * This analyzer is used to process wildcard, prefix, regex and other multiterm queries. It
   * assembles a list of tokenizer +filters that "make sense" for this, primarily accent folding and
   * lowercasing filters, and charfilters.
   *
   * If users require old-style behavior, they can specify 'legacyMultiterm="true" ' in the schema file
   * @see #getMultiTermAnalyzer
   * @see #setMultiTermAnalyzer
   */
  protected Analyzer multiTermAnalyzer=null;
  /**
   * Returns the Analyzer to be used when indexing fields of this type.
   * <p>
@ -465,20 +450,6 @@ public abstract class FieldType extends FieldProperties {
    return queryAnalyzer;
  }
  /**
   * Returns the Analyzer to be used when searching fields of this type when mult-term queries are specified.
   * <p>
   * This method may be called many times, at any time.
   * </p>
   * @see #getAnalyzer
   */
  public Analyzer getMultiTermAnalyzer() {
    return multiTermAnalyzer;
  }
  private final String analyzerError = 
    "FieldType: " + this.getClass().getSimpleName() + 
    " (" + typeName + ") does not support specifying an analyzer";
  /**
   * Sets the Analyzer to be used when indexing fields of this type.
@ -524,28 +495,6 @@ public abstract class FieldType extends FieldProperties {
    throw e;
  }
  /**
   * Sets the Analyzer to be used when querying fields of this type.
   *
   * <p>
   *
   * Subclasses that override this method need to ensure the behavior
   * of the analyzer is consistent with the implementation of toInternal.
   * </p>
   *
   * @see #toInternal
   * @see #setAnalyzer
   * @see #getQueryAnalyzer
   */
  public void setMultiTermAnalyzer(Analyzer analyzer) {
    SolrException e = new SolrException
      (ErrorCode.SERVER_ERROR,
       "FieldType: " + this.getClass().getSimpleName() +
       " (" + typeName + ") does not support specifying an analyzer");
    SolrException.logOnce(log,null,e);
    throw e;
  }
  /** @lucene.internal */
  protected Similarity similarity;
--- a/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java
@ -102,15 +102,13 @@ public final class FieldTypePluginLoader
    if (queryAnalyzer==null) queryAnalyzer=analyzer;
    if (analyzer==null) analyzer=queryAnalyzer;
    if (multiAnalyzer == null) {
-      Boolean legacyMatch = ! schema.getDefaultLuceneMatchVersion().onOrAfter(Version.LUCENE_36);
+      multiAnalyzer = constructMultiTermAnalyzer(queryAnalyzer);
      legacyMatch = (DOMUtil.getAttr(node, "legacyMultiTerm", null) == null) ? legacyMatch :
          Boolean.parseBoolean(DOMUtil.getAttr(node, "legacyMultiTerm", null));
      multiAnalyzer = constructMultiTermAnalyzer(queryAnalyzer, legacyMatch);
    }
    if (analyzer!=null) {
      ft.setAnalyzer(analyzer);
      ft.setQueryAnalyzer(queryAnalyzer);
-      ft.setMultiTermAnalyzer(multiAnalyzer);
+      if (ft instanceof TextField)
        ((TextField)ft).setMultiTermAnalyzer(multiAnalyzer);
    }
    if (similarity!=null) {
      ft.setSimilarity(similarity);
@ -143,36 +141,75 @@ public final class FieldTypePluginLoader
  // 2> If letacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior.
  //    Do the same if they've specified that the old behavior is required (legacyMultiTerm="true")
-  private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer, Boolean legacyMultiTerm) {
+  private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) {
    if (queryAnalyzer == null) return null;
-    if (legacyMultiTerm || (!(queryAnalyzer instanceof TokenizerChain))) {
+    if (!(queryAnalyzer instanceof TokenizerChain)) {
      return new KeywordAnalyzer();
    }
    TokenizerChain tc = (TokenizerChain) queryAnalyzer;
    MultiTermChainBuilder builder = new MultiTermChainBuilder();
-    // we know it'll never be longer than this unless the code below is explicitly changed
+    CharFilterFactory[] charFactories = tc.getCharFilterFactories();
-    TokenFilterFactory[] filters = new TokenFilterFactory[2];
+    if (charFactories != null) {
-    int idx = 0;
+      for (CharFilterFactory fact : charFactories) {
-    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
+        builder.add(fact);
      if (factory instanceof LowerCaseFilterFactory) {
        filters[idx] = new LowerCaseFilterFactory();
        filters[idx++].init(factory.getArgs());
      }
      if (factory instanceof ASCIIFoldingFilterFactory) {
        filters[idx] = new ASCIIFoldingFilterFactory();
        filters[idx++].init(factory.getArgs());
      }
    }
    WhitespaceTokenizerFactory white = new WhitespaceTokenizerFactory();
    white.init(tc.getTokenizerFactory().getArgs());
-    return new TokenizerChain(tc.getCharFilterFactories(),
+    builder.add(tc.getTokenizerFactory());
-        white,
+
-        Arrays.copyOfRange(filters, 0, idx));
+    for (TokenFilterFactory fact : tc.getTokenFilterFactories()) {
      builder.add(fact);
    }
    return builder.build();
  }
  private static class MultiTermChainBuilder {
    static final KeywordTokenizerFactory keyFactory;
    static {
      keyFactory = new KeywordTokenizerFactory();
      keyFactory.init(new HashMap<String,String>());
    }
    ArrayList<CharFilterFactory> charFilters = null;
    ArrayList<TokenFilterFactory> filters = new ArrayList<TokenFilterFactory>(2);
    TokenizerFactory tokenizer = keyFactory;
    public void add(Object current) {
      if (!(current instanceof MultiTermAwareComponent)) return;
      Object newComponent = ((MultiTermAwareComponent)current).getMultiTermComponent();
      if (newComponent instanceof TokenFilterFactory) {
        if (filters == null) {
          filters = new ArrayList<TokenFilterFactory>(2);
        }
        filters.add((TokenFilterFactory)newComponent);
      } else if (newComponent instanceof TokenizerFactory) {
        tokenizer = (TokenizerFactory)newComponent;
      } else if (newComponent instanceof CharFilterFactory) {
        if (charFilters == null) {
          charFilters = new ArrayList<CharFilterFactory>(1);
        }
        charFilters.add( (CharFilterFactory)newComponent);
      } else {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown analysis component from MultiTermAwareComponent: " + newComponent);
      }
    }
    public TokenizerChain build() {
      CharFilterFactory[] charFilterArr =  charFilters == null ? null : charFilters.toArray(new CharFilterFactory[charFilters.size()]);
      TokenFilterFactory[] filterArr = filters == null ? new TokenFilterFactory[0] : filters.toArray(new TokenFilterFactory[filters.size()]);
      return new TokenizerChain(charFilterArr, tokenizer, filterArr);
    }
  }
  //
  // <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
  //
--- a/solr/core/src/java/org/apache/solr/schema/SchemaField.java
+++ b/solr/core/src/java/org/apache/solr/schema/SchemaField.java
@ -97,10 +97,6 @@ public final class SchemaField extends FieldProperties {
  boolean isTokenized() { return (properties & TOKENIZED)!=0; }
  boolean isBinary() { return (properties & BINARY)!=0; }
  boolean legacyMultiTerm() {
    return (properties & LEGACY_MULTITERM) != 0;
  }
  public IndexableField createField(Object val, float boost) {
    return type.createField(this,val,boost);
  }
--- a/solr/core/src/java/org/apache/solr/schema/TextField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TextField.java
@ -17,13 +17,8 @@
 package org.apache.solr.schema;
-import org.apache.lucene.search.SortField;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
-import org.apache.lucene.search.Query;
+import org.apache.lucene.search.*;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.MultiPhraseQuery;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -32,6 +27,7 @@ import org.apache.lucene.analysis.CachingTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.response.TextResponseWriter;
 import org.apache.solr.search.QParser;
@ -48,6 +44,19 @@ import java.io.StringReader;
 public class TextField extends FieldType {
  protected boolean autoGeneratePhraseQueries;
  /**
   * Analyzer set by schema for text types to use when searching fields
   * of this type, subclasses can set analyzer themselves or override
   * getAnalyzer()
   * This analyzer is used to process wildcard, prefix, regex and other multiterm queries. It
   * assembles a list of tokenizer +filters that "make sense" for this, primarily accent folding and
   * lowercasing filters, and charfilters.
   *
   * @see #getMultiTermAnalyzer
   * @see #setMultiTermAnalyzer
   */
  protected Analyzer multiTermAnalyzer=null;
  @Override
  protected void init(IndexSchema schema, Map<String,String> args) {
    properties |= TOKENIZED;
@ -63,6 +72,21 @@ public class TextField extends FieldType {
    super.init(schema, args);    
  }
  /**
   * Returns the Analyzer to be used when searching fields of this type when mult-term queries are specified.
   * <p>
   * This method may be called many times, at any time.
   * </p>
   * @see #getAnalyzer
   */
  public Analyzer getMultiTermAnalyzer() {
    return multiTermAnalyzer;
  }
  public void setMultiTermAnalyzer(Analyzer analyzer) {
    this.multiTermAnalyzer = analyzer;
  }
  public boolean getAutoGeneratePhraseQueries() {
    return autoGeneratePhraseQueries;
  }
@ -98,11 +122,50 @@ public class TextField extends FieldType {
    this.queryAnalyzer = analyzer;
  }
  @Override
-  public void setMultiTermAnalyzer(Analyzer analyzer) {
+  public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
-    this.multiTermAnalyzer = analyzer;
+    Analyzer multiAnalyzer = getMultiTermAnalyzer();
    BytesRef lower = analyzeMultiTerm(field.getName(), part1, multiAnalyzer);
    BytesRef upper = analyzeMultiTerm(field.getName(), part2, multiAnalyzer);
    return new TermRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
  }
  public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
    if (part == null) return null;
    TokenStream source;
    try {
      source = analyzerIn.tokenStream(field, new StringReader(part));
      source.reset();
    } catch (IOException e) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to initialize TokenStream to analyze multiTerm term: " + part, e);
    }
    TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
    BytesRef bytes = termAtt.getBytesRef();
    try {
      if (!source.incrementToken())
        throw  new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
      termAtt.fillBytesRef();
      if (source.incrementToken())
        throw  new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
    } catch (IOException e) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
    }
    try {
      source.end();
      source.close();
    } catch (IOException e) {
      throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part, e);
    }
    return BytesRef.deepCopyOf(bytes);
  }
  static Query parseFieldQuery(QParser parser, Analyzer analyzer, String field, String queryText) {
    int phraseSlop = 0;
    boolean enablePositionIncrements = true;
--- a/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java
@ -58,8 +58,9 @@ public class SolrQueryParser extends QueryParser {
  protected final IndexSchema schema;
  protected final QParser parser;
  protected final String defaultField;
-  protected final Map<String, ReversedWildcardFilterFactory> leadingWildcards =
+
-    new HashMap<String, ReversedWildcardFilterFactory>();
+  // implementation detail - caching ReversedWildcardFilterFactory based on type
  private Map<FieldType, ReversedWildcardFilterFactory> leadingWildcards;
  public SolrQueryParser(QParser parser, String defaultField) {
    this(parser, defaultField, parser.getReq().getSchema().getQueryAnalyzer());
@ -71,31 +72,35 @@ public class SolrQueryParser extends QueryParser {
    this.parser = parser;
    this.defaultField = defaultField;
    setEnablePositionIncrements(true);
-    checkAllowLeadingWildcards();
+    setLowercaseExpandedTerms(false);
    setAllowLeadingWildcard(true);
  }
-  protected void checkAllowLeadingWildcards() {
+  protected ReversedWildcardFilterFactory getReversedWildcardFilterFactory(FieldType fieldType) {
-    boolean allow = false;
+    if (leadingWildcards == null) leadingWildcards = new HashMap<FieldType, ReversedWildcardFilterFactory>();
-    for (Entry<String, FieldType> e : schema.getFieldTypes().entrySet()) {
+    ReversedWildcardFilterFactory fac = leadingWildcards.get(fieldType);
-      Analyzer a = e.getValue().getAnalyzer();
+    if (fac == null && leadingWildcards.containsKey(fac)) {
-      if (a instanceof TokenizerChain) {
+      return fac;
-        // examine the indexing analysis chain if it supports leading wildcards
+    }
-        TokenizerChain tc = (TokenizerChain)a;
+
-        TokenFilterFactory[] factories = tc.getTokenFilterFactories();
+    Analyzer a = fieldType.getAnalyzer();
-        for (TokenFilterFactory factory : factories) {
+    if (a instanceof TokenizerChain) {
-          if (factory instanceof ReversedWildcardFilterFactory) {
+      // examine the indexing analysis chain if it supports leading wildcards
-            allow = true;
+      TokenizerChain tc = (TokenizerChain)a;
-            leadingWildcards.put(e.getKey(), (ReversedWildcardFilterFactory)factory);
+      TokenFilterFactory[] factories = tc.getTokenFilterFactories();
-          }
+      for (TokenFilterFactory factory : factories) {
        if (factory instanceof ReversedWildcardFilterFactory) {
          fac = (ReversedWildcardFilterFactory)factory;
          break;
        }
      }
    }
-    // XXX should be enabled on a per-field basis
+
-    if (allow) {
+    leadingWildcards.put(fieldType, fac);
-      setAllowLeadingWildcard(true);
+    return fac;
    }
  }
  private void checkNullField(String field) throws SolrException {
    if (field == null && defaultField == null) {
      throw new SolrException
@ -104,12 +109,14 @@ public class SolrQueryParser extends QueryParser {
    }
  }
-  protected String analyzeIfMultitermTermText(String field, String part, Analyzer analyzer) {
+  protected String analyzeIfMultitermTermText(String field, String part, FieldType fieldType) {
    if (part == null) return part;
    SchemaField sf = schema.getFieldOrNull((field));
-    if (sf == null || ! (sf.getType() instanceof TextField)) return part;
+    if (sf == null || ! (fieldType instanceof TextField)) return part;
-    return analyzeMultitermTerm(field, part, analyzer).utf8ToString();
+    String out = TextField.analyzeMultiTerm(field, part, ((TextField)fieldType).getMultiTermAnalyzer()).utf8ToString();
    // System.out.println("INPUT="+part + " OUTPUT="+out);
    return out;
  }
  @Override
@ -143,8 +150,6 @@ public class SolrQueryParser extends QueryParser {
  @Override
  protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
    checkNullField(field);
    part1 = analyzeIfMultitermTermText(field, part1, schema.getFieldType(field).getMultiTermAnalyzer());
    part2 = analyzeIfMultitermTermText(field, part2, schema.getFieldType(field).getMultiTermAnalyzer());
    SchemaField sf = schema.getField(field);
    return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
  }
@ -153,21 +158,10 @@ public class SolrQueryParser extends QueryParser {
  protected Query getPrefixQuery(String field, String termStr) throws ParseException {
    checkNullField(field);
-    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
+    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field));
-    // TODO: toInternal() won't necessarily work on partial
+    // Solr has always used constant scoring for prefix queries.  This should return constant scoring by default.
-    // values, so it looks like we need a getPrefix() function
+    return newPrefixQuery(new Term(field, termStr));
    // on fieldtype?  Or at the minimum, a method on fieldType
    // that can tell me if I should lowercase or not...
    // Schema could tell if lowercase filter is in the chain,
    // but a more sure way would be to run something through
    // the first time and check if it got lowercased.
    // TODO: throw exception if field type doesn't support prefixes?
    // (sortable numeric types don't do prefixes, but can do range queries)
    Term t = new Term(field, termStr);
    PrefixQuery prefixQuery = new PrefixQuery(t);
    return prefixQuery;
  }
  @Override
  protected Query getWildcardQuery(String field, String termStr) throws ParseException {
@ -175,10 +169,10 @@ public class SolrQueryParser extends QueryParser {
    if ("*".equals(field) && "*".equals(termStr)) {
      return newMatchAllDocsQuery();
    }
-    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
+    FieldType fieldType = schema.getFieldType(field);
    termStr = analyzeIfMultitermTermText(field, termStr, fieldType);
    // can we use reversed wildcards in this field?
-    String type = schema.getFieldType(field).getTypeName();
+    ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(fieldType);
    ReversedWildcardFilterFactory factory = leadingWildcards.get(type);
    if (factory != null) {
      Term term = new Term(field, termStr);
      // fsa representing the query
@ -211,19 +205,15 @@ public class SolrQueryParser extends QueryParser {
        }
      };
    }
-    Query q = super.getWildcardQuery(field, termStr);
+
-    if (q instanceof WildcardQuery) {
+    // Solr has always used constant scoring for wildcard queries.  This should return constant scoring by default.
-      // use a constant score query to avoid overflowing clauses
+    return newWildcardQuery(new Term(field, termStr));
      WildcardQuery wildcardQuery = new WildcardQuery(((WildcardQuery)q).getTerm());
      return  wildcardQuery; 
    }
    return q;
  }
-
+  @Override
  protected Query getRegexpQuery(String field, String termStr) throws ParseException
  {
-    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
+    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field));
-    return super.getRegexpQuery(field, termStr);
+    return newRegexpQuery(new Term(field, termStr));
  }
 }
--- a/solr/core/src/test-files/solr/conf/schema-folding.xml
+++ b/solr/core/src/test-files/solr/conf/schema-folding.xml
@ -64,7 +64,7 @@
      </analyzer>
    </fieldType>
-    <fieldType name="text_rev" class="solr.TextField" legacyMultiTerm="false">
+    <fieldType name="text_rev" class="solr.TextField">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
@ -80,12 +80,25 @@
      </analyzer>
    </fieldType>
-    <fieldType name="text_lower_tokenizer" class="solr.TextField">
+    <fieldType name="text_lower_token" class="solr.TextField">
      <analyzer>
        <tokenizer class="solr.LowerCaseTokenizerFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text_oldstyle" class="solr.TextField">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
      <analyzer type="multiterm">
        <tokenizer class="solr.KeywordTokenizerFactory" />
      </analyzer>
    </fieldType>
    <fieldType name="text_charfilter" class="solr.TextField" multiValued="false">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@ -99,19 +112,47 @@
      </analyzer>
    </fieldType>
-    <fieldType name="text_oldstyle" class="solr.TextField" multiValued="false" legacyMultiTerm="true">
+    <fieldType name="text_straight" class="solr.TextField">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text_lower" class="solr.TextField">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text_folding" class="solr.TextField">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
    </fieldType>
    <fieldType name="text_stemming" class="solr.TextField">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.TrimFilterFactory"/>
+        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
    </fieldType>
-    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="text_keyword" class="solr.TextField" sortMissingLast="true" omitNorms="true">
-    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+      <analyzer>
-    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+         <tokenizer class="solr.KeywordTokenizerFactory"/>
-    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+        <filter class="solr.LowerCaseFilterFactory" />
      </analyzer>
    </fieldType>
    <fieldType name="int" class="solr.TrieIntField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="byte" class="solr.ByteField" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="short" class="solr.ShortField" omitNorms="true" positionIncrementGap="0"/>
    <fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
@ -133,10 +174,17 @@
    <field name="content_ws" type="text_ws" indexed="true" stored="true"/>
    <field name="content_rev" type="text_rev" indexed="true" stored="true"/>
    <field name="content_multi" type="text_multi" indexed="true" stored="true"/>
-    <field name="content_lower_token" type="text_multi" indexed="true" stored="true"/>
+    <field name="content_lower_token" type="text_lower_token" indexed="true" stored="true"/>
    <field name="content_oldstyle" type="text_oldstyle" indexed="true" stored="true"/>
    <field name="content_charfilter" type="text_charfilter" indexed="true" stored="true"/>
    <field name="content_multi_bad" type="text_multi_bad" indexed="true" stored="true"/>
    <dynamicField name="*_straight" type="text_straight" indexed="true" stored="true"/>
    <dynamicField name="*_lower" type="text_lower" indexed="true" stored="true"/>
    <dynamicField name="*_folding" type="text_folding" indexed="true" stored="true"/>
    <dynamicField name="*_stemming" type="text_stemming" indexed="true" stored="true"/>
    <dynamicField name="*_keyword" type="text_keyword" indexed="true" stored="true"/>
  </fields>
  <defaultSearchField>content</defaultSearchField>
--- a/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java
@ -36,7 +36,7 @@ public class MultiTermTest extends SolrTestCaseJ4 {
  @Test
  public void testMultiFound() {
    SchemaField field = h.getCore().getSchema().getField("content_multi");
-    Analyzer analyzer = field.getType().getMultiTermAnalyzer();
+    Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
    assertTrue(analyzer instanceof TokenizerChain);
    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
    TokenizerChain tc = (TokenizerChain) analyzer;
@ -58,9 +58,9 @@ public class MultiTermTest extends SolrTestCaseJ4 {
  @Test
  public void testQueryCopiedToMulti() {
    SchemaField field = h.getCore().getSchema().getField("content_charfilter");
-    Analyzer analyzer = field.getType().getMultiTermAnalyzer();
+    Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
    assertTrue(analyzer instanceof TokenizerChain);
-    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
+    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
    TokenizerChain tc = (TokenizerChain) analyzer;
    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
      assertTrue(factory instanceof LowerCaseFilterFactory);
@ -73,15 +73,15 @@ public class MultiTermTest extends SolrTestCaseJ4 {
  @Test
  public void testDefaultCopiedToMulti() {
    SchemaField field = h.getCore().getSchema().getField("content_ws");
-    Analyzer analyzer = field.getType().getMultiTermAnalyzer();
+    Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
    assertTrue(analyzer instanceof TokenizerChain);
-    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
+    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
    TokenizerChain tc = (TokenizerChain) analyzer;
    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
      assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
    }
-    assertTrue(tc.getCharFilterFactories().length == 0);
+    assertTrue(tc.getCharFilterFactories() == null);
  }
 }
--- a/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
+++ b/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
@ -59,7 +59,12 @@ public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
          "content_lower_token", docs[i],
          "content_oldstyle", docs[i],
          "content_charfilter", docs[i],
-          "content_multi_bad", docs[i]
+          "content_multi_bad", docs[i],
          "content_straight", docs[i],
          "content_lower", docs[i],
          "content_folding", docs[i],
          "content_stemming", docs[i],
          "content_keyword", docs[i]
      ));
    }
    assertU(optimize());
@ -95,6 +100,8 @@ public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
        assertQ(req("q", "content_lower_token:" + me),
            "//result[@numFound='1']",
            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
        assertQ(req("q", "content_oldstyle:" + me),
            "//result[@numFound='0']");
      }
    }
    for (int idx = 0; idx < matchRevPrefixUpper.length; idx++) {
@ -128,13 +135,50 @@ public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
        assertQ(req("q", "content_multi:" + me),
            "//result[@numFound='1']",
            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
-        assertQ(req("q", "content_lower_token:" + me),
+        assertQ(req("q", "content_oldstyle:" + me),
-            "//result[@numFound='1']",
+            "//result[@numFound='0']");
            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
      }
    }
  }
  @Test
  public void testLowerTokenizer() {
    // The lowercasetokenizer will remove the '1' from the index, but not from the query, thus the special test.
    assertQ(req("q", "content_lower_token:Á*C*"), "//result[@numFound='1']");
    assertQ(req("q", "content_lower_token:Á*C*1"), "//result[@numFound='0']");
    assertQ(req("q", "content_lower_token:h*1"), "//result[@numFound='0']");
    assertQ(req("q", "content_lower_token:H*1"), "//result[@numFound='0']");
    assertQ(req("q", "content_lower_token:*1"), "//result[@numFound='0']");
    assertQ(req("q", "content_lower_token:HÏ*l?*"), "//result[@numFound='1']");
    assertQ(req("q", "content_lower_token:hȉ*l?*"), "//result[@numFound='1']");
  }
  @Test
  public void testRegex() throws Exception {
    assertQ(req("q", "content:/Zill[a-z]/"),
            "//result[@numFound='1']");
    assertQ(req("q", "content:/Zill[A-Z]/"),   // everything in the regex gets lowercased?
            "//result[@numFound='1']");
    assertQ(req("q", "content_keyword:/.*Zill[A-Z]/"),
            "//result[@numFound='1']");
    assertQ(req("q", "content_straight:/Zill[a-z]/"),      // case preserving field shouldn't match
           "//result[@numFound='0']");
    assertQ(req("q", "content_folding:/Zill[a-z]/"),       // case preserving field shouldn't match
           "//result[@numFound='0']");
    assertQ(req("q", "content_keyword:/Abcdefg1 Finger/"), // test spaces
           "//result[@numFound='1']");
  }
  @Test
  public void testGeneral() throws Exception {
    assertQ(req("q", "content_stemming:fings*"), "//result[@numFound='0']"); // should not match (but would if fings* was stemmed to fing*
    assertQ(req("q", "content_stemming:fing*"), "//result[@numFound='1']");
  }
  // Phrases should fail. This test is mainly a marker so if phrases ever do start working with wildcards we go
  // and update the documentation
  @Test
@ -143,17 +187,14 @@ public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
        "//result[@numFound='0']");
  }
  // Make sure the legacy behavior flag is honored
  @Test
  public void testLegacyBehavior() {
    assertQ(req("q", "content_oldstyle:ABCD*"),
        "//result[@numFound='0']");
  }
  @Test
  public void testWildcardRange() {
    assertQ(req("q", "content:[* TO *]"),
        "//result[@numFound='3']");
    assertQ(req("q", "content:[AB* TO Z*]"),
        "//result[@numFound='3']");
    assertQ(req("q", "content:[AB*E?G* TO TU*W]"),
        "//result[@numFound='3']");
  }
@ -222,10 +263,13 @@ public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
  @Test
  public void testMultiBad() {
    try {
      ignoreException("analyzer returned too many terms");
      assertQ(req("q", "content_multi_bad:" + "abCD*"));
      fail("Should throw exception when token evaluates to more than one term");
    } catch (Exception expected) {
-      assertTrue(expected.getCause() instanceof IllegalArgumentException);
+      assertTrue(expected.getCause() instanceof org.apache.solr.common.SolrException);
    } finally {
      resetExceptionIgnores();
    }
  }
 }
--- a/solr/example/solr/conf/schema.xml
+++ b/solr/example/solr/conf/schema.xml
@ -427,41 +427,6 @@
      </analyzer>
    </fieldType>
    <!-- Illustrates the new "multiterm" analyzer definition the <fieldType> can take a new
         parameter legacyMultiTerm="true" if the old behvaior is desired. The new default
         behavior as of 3.6+ is to automatically define a multiterm analyzer
    -->
    <fieldType name="text_multiterm" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <!-- Illustrates the use of a new analyzer type "multiterm". See the Wiki page "Multiterm 
           Query Analysis" and SOLR-2438 for full details. The short form is that this analyzer is
           applied to wildcard terms (prefix, wildcard range) if specified. This allows, among other
           things, not having to lowercase wildcard terms on the client.
           In the absence of this section, the new default behavior (3.6, 4.0) is to construct
           one of these from the query analyzer that incorporates any defined charfilters, a
           WhitespaceTokenizer, a LowerCaseFilter (if defined), and an ASCIIFoldingFilter 
           (if defined).
           Arguably, this is an expert-level analyzer, most cases will be handled by an instance
           of this being automatically constructed from the queryanalyzer.
      -->
      <analyzer type="multiterm"> 
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
    </fieldType>
    <!-- since fields of this type are by default not stored or indexed,
         any data added to them will be ignored outright.  --> 
@ -587,6 +552,7 @@
   <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
   <dynamicField name="*_t"  type="text_general"    indexed="true"  stored="true"/>
   <dynamicField name="*_txt" type="text_general"    indexed="true"  stored="true" multiValued="true"/>
   <dynamicField name="*_en"  type="text_en"    indexed="true"  stored="true" multiValued="true" />
   <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
   <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
   <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>