SOLR-2848: generalize distributed spellcheck code to work with any SolrSpellChecker

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1200266 13f79535-47bb-0310-9956-ffa450edef68
2011-11-10 10:54:46 +00:00 · 2011-11-10 10:54:46 +00:00 · b97d321f82
parent ee293e7e7d
commit b97d321f82
11 changed files with 326 additions and 202 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -268,6 +268,10 @@ Bug Fixes
  equals methods. (Yonik Seeley, Hossman, Erick Erickson. 
  Marc Tinnemeyer caught the bug)  
 * SOLR-2848: Removed 'instanceof AbstractLuceneSpellChecker' hacks from distributed spellchecking code,
  and added a merge() method to SolrSpellChecker instead. Previously if you extended SolrSpellChecker
  your spellchecker would not work in distributed fashion.  (James Dyer via rmuir)
 Other Changes
 ----------------------
--- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
@ -22,6 +22,8 @@ import java.io.StringReader;
 import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
 import org.apache.lucene.search.spell.DirectSpellChecker;
 import org.apache.lucene.search.spell.JaroWinklerDistance;
 import org.apache.lucene.search.spell.LevensteinDistance;
 import org.apache.lucene.search.spell.StringDistance;
 import org.apache.lucene.search.spell.SuggestWord;
@ -147,7 +149,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
        IndexReader reader = rb.req.getSearcher().getIndexReader();
        boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
        float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
-        SolrParams customParams = getCustomParams(getDictionaryName(params), params, shardRequest);
+        SolrParams customParams = getCustomParams(getDictionaryName(params), params);
        SpellingOptions options = new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults,
                accuracy, customParams);                       
        SpellingResult spellingResult = spellChecker.getSuggestions(options);
@ -210,7 +212,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
   * @param params The original SolrParams
   * @return The new Params
   */
-  protected SolrParams getCustomParams(String dictionary, SolrParams params, boolean shardRequest) {
+  protected SolrParams getCustomParams(String dictionary, SolrParams params) {
    ModifiableSolrParams result = new ModifiableSolrParams();
    Iterator<String> iter = params.getParameterNamesIterator();
    String prefix = SpellingParams.SPELLCHECK_PREFIX + "." + dictionary + ".";
@ -220,10 +222,6 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
        result.add(nxt.substring(prefix.length()), params.getParams(nxt));
      }
    }
    if(shardRequest)
    {
    	result.add(ShardParams.IS_SHARD, "true");
    }
    return result;
  }
@ -256,6 +254,8 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
    boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
    int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
    int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
    int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
    int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
    String origQuery = params.get(SPELLCHECK_Q);
    if (origQuery == null) {
@ -265,190 +265,28 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
      }
    }    
-    int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
+    SpellCheckMergeData mergeData = new SpellCheckMergeData();     
    float min = 0.5f;
    StringDistance sd = null;
    int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
    SolrSpellChecker checker = getSpellChecker(rb.req.getParams());
    if (checker instanceof AbstractLuceneSpellChecker) {
      AbstractLuceneSpellChecker spellChecker = (AbstractLuceneSpellChecker) checker;
      min = spellChecker.getAccuracy();
      sd = spellChecker.getStringDistance();
    }
    if (sd == null)
      sd = new LevensteinDistance();
    Collection<Token> tokens = null;
    try {
      tokens = getTokens(origQuery, checker.getQueryAnalyzer());
    } catch (IOException e) {
      LOG.error("Could not get tokens (this should never happen)", e);
    }
    // original token -> corresponding Suggestion object (keep track of start,end)
    Map<String, SpellCheckResponse.Suggestion> origVsSuggestion = new HashMap<String, SpellCheckResponse.Suggestion>();
    // original token string -> summed up frequency
    Map<String, Integer> origVsFreq = new HashMap<String, Integer>();
    // original token string -> # of shards reporting it as misspelled
    Map<String, Integer> origVsShards = new HashMap<String, Integer>();
    // original token string -> set of alternatives
    // must preserve order because collation algorithm can only work in-order
    Map<String, HashSet<String>> origVsSuggested = new LinkedHashMap<String, HashSet<String>>();
    // alternative string -> corresponding SuggestWord object
    Map<String, SuggestWord> suggestedVsWord = new HashMap<String, SuggestWord>();
    Map<String, SpellCheckCollation> collations = new HashMap<String, SpellCheckCollation>();
    int totalNumberShardResponses = 0;
    for (ShardRequest sreq : rb.finished) {
      for (ShardResponse srsp : sreq.responses) {
        NamedList nl = (NamedList) srsp.getSolrResponse().getResponse().get("spellcheck");
        LOG.info(srsp.getShard() + " " + nl);
        if (nl != null) {
-        	totalNumberShardResponses++;
+        	mergeData.totalNumberShardResponses++;
-          SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl);
+        	collectShardSuggestions(nl, mergeData);          
-          for (SpellCheckResponse.Suggestion suggestion : spellCheckResp.getSuggestions()) {
+          collectShardCollations(mergeData, nl, maxCollationTries);
            origVsSuggestion.put(suggestion.getToken(), suggestion);
            HashSet<String> suggested = origVsSuggested.get(suggestion.getToken());
            if (suggested == null) {
              suggested = new HashSet<String>();
              origVsSuggested.put(suggestion.getToken(), suggested);
            }
            // sum up original frequency          
            int origFreq = 0;
            Integer o = origVsFreq.get(suggestion.getToken());
            if (o != null)  origFreq += o;
            origFreq += suggestion.getOriginalFrequency();
            origVsFreq.put(suggestion.getToken(), origFreq);
            //# shards reporting
            Integer origShards = origVsShards.get(suggestion.getToken());
            if(origShards==null) {
            	origVsShards.put(suggestion.getToken(), 1);
            } else {
            	origVsShards.put(suggestion.getToken(), ++origShards);
            }            
            // find best suggestions
            for (int i = 0; i < suggestion.getNumFound(); i++) {
              String alternative = suggestion.getAlternatives().get(i);
              suggested.add(alternative);
              SuggestWord sug = suggestedVsWord.get(alternative);
              if (sug == null)  {
                sug = new SuggestWord();
                suggestedVsWord.put(alternative, sug);
              }
              sug.string = alternative;
              // alternative frequency is present only for extendedResults=true
              if (suggestion.getAlternativeFrequencies() != null && suggestion.getAlternativeFrequencies().size() > 0) {
                Integer freq = suggestion.getAlternativeFrequencies().get(i);
                if (freq != null) sug.freq += freq;
              }
            }
          }
          NamedList suggestions = (NamedList) nl.get("suggestions");
          if(suggestions != null) {
 	      		List<Object> collationList = suggestions.getAll("collation");
 	      		List<Object> collationRankList = suggestions.getAll("collationInternalRank");
 	      		int i=0;
 	      		if(collationList != null) {
 		      		for(Object o : collationList)
 		      		{
 		      			if(o instanceof String)
 		      			{
 		      				SpellCheckCollation coll = new SpellCheckCollation();
 		      				coll.setCollationQuery((String) o);
 		      				if(collationRankList!= null && collationRankList.size()>0)
 		      				{
 			      				coll.setInternalRank((Integer) collationRankList.get(i));
 			      				i++;
 		      				}
 		      				SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
 		      				if(priorColl != null)
 		      				{
 		      					coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
 		      				}
 		      				collations.put(coll.getCollationQuery(), coll);
 		      			} else
 		      			{
 		      				NamedList expandedCollation = (NamedList) o;		      				
 		      				SpellCheckCollation coll = new SpellCheckCollation();
 		      				coll.setCollationQuery((String) expandedCollation.get("collationQuery"));
 		      				coll.setHits((Integer) expandedCollation.get("hits"));
 		      				if(maxCollationTries>0)
 		      				{
 		      					coll.setInternalRank((Integer) expandedCollation.get("collationInternalRank"));
 		      				}
 		      				coll.setMisspellingsAndCorrections((NamedList) expandedCollation.get("misspellingsAndCorrections"));
 		      				SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
 		      				if(priorColl != null)
 		      				{
 		      					coll.setHits(coll.getHits() + priorColl.getHits());
 		      					coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
 		      				}
 		      				collations.put(coll.getCollationQuery(), coll);
 		      			}
 		      		}
 	      		}
          }
        }
      }
    }
    // all shard responses have been collected
    // create token and get top suggestions
-    SpellingResult result = new SpellingResult(tokens); //todo: investigate, why does it need tokens beforehand?
+    SolrSpellChecker checker = getSpellChecker(rb.req.getParams());    
-    for (Map.Entry<String, HashSet<String>> entry : origVsSuggested.entrySet()) {
+    SpellingResult result = checker.mergeSuggestions(mergeData, numSug, count, extendedResults);
      String original = entry.getKey();
      //Only use this suggestion if all shards reported it as misspelled.
      Integer numShards = origVsShards.get(original);
      if(numShards<totalNumberShardResponses) {
      	continue;
      }
      HashSet<String> suggested = entry.getValue();
      SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
      for (String suggestion : suggested) {
        SuggestWord sug = suggestedVsWord.get(suggestion);
        sug.score = sd.getDistance(original, sug.string);
        if (sug.score < min) continue;
        sugQueue.insertWithOverflow(sug);
        if (sugQueue.size() == numSug) {
          // if queue full, maintain the minScore score
          min = sugQueue.top().score;
        }
      }
      // create token
      SpellCheckResponse.Suggestion suggestion = origVsSuggestion.get(original);
      Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());
      // get top 'count' suggestions out of 'sugQueue.size()' candidates
      SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
      // skip the first sugQueue.size() - count elements
      for (int k=0; k < sugQueue.size() - count; k++) sugQueue.pop();
      // now collect the top 'count' responses
      for (int k = Math.min(count, sugQueue.size()) - 1; k >= 0; k--)  {
        suggestions[k] = sugQueue.pop();
      }
      if (extendedResults) {
        Integer o = origVsFreq.get(original);
        if (o != null) result.addFrequency(token, o);
        for (SuggestWord word : suggestions)
          result.add(token, word.string, word.freq);
      } else {
        List<String> words = new ArrayList<String>(sugQueue.size());
        for (SuggestWord word : suggestions) words.add(word.string);
        result.add(token, words);
      }
    }
    NamedList response = new SimpleOrderedMap();
 		NamedList suggestions = toNamedList(false, result, origQuery, extendedResults, collate);
 		if (collate) {
-			SpellCheckCollation[] sortedCollations = collations.values().toArray(new SpellCheckCollation[collations.size()]);
+			SpellCheckCollation[] sortedCollations = mergeData.collations.values().toArray(new SpellCheckCollation[mergeData.collations.size()]);
 			Arrays.sort(sortedCollations);
 			int i = 0;
 			while (i < maxCollations && i < sortedCollations.length) {
@ -471,6 +309,101 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
    rb.rsp.add("spellcheck", response);
  }
  @SuppressWarnings("unchecked")
  private void collectShardSuggestions(NamedList nl, SpellCheckMergeData mergeData) {
    SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl);
    for (SpellCheckResponse.Suggestion suggestion : spellCheckResp.getSuggestions()) {
      mergeData.origVsSuggestion.put(suggestion.getToken(), suggestion);
      HashSet<String> suggested = mergeData.origVsSuggested.get(suggestion.getToken());
      if (suggested == null) {
        suggested = new HashSet<String>();
        mergeData.origVsSuggested.put(suggestion.getToken(), suggested);
      }
      // sum up original frequency          
      int origFreq = 0;
      Integer o = mergeData.origVsFreq.get(suggestion.getToken());
      if (o != null)  origFreq += o;
      origFreq += suggestion.getOriginalFrequency();
      mergeData.origVsFreq.put(suggestion.getToken(), origFreq);
      //# shards reporting
      Integer origShards = mergeData.origVsShards.get(suggestion.getToken());
      if(origShards==null) {
        mergeData.origVsShards.put(suggestion.getToken(), 1);
      } else {
        mergeData.origVsShards.put(suggestion.getToken(), ++origShards);
      }            
      // find best suggestions
      for (int i = 0; i < suggestion.getNumFound(); i++) {
        String alternative = suggestion.getAlternatives().get(i);
        suggested.add(alternative);
        SuggestWord sug = mergeData.suggestedVsWord.get(alternative);
        if (sug == null)  {
          sug = new SuggestWord();
          mergeData.suggestedVsWord.put(alternative, sug);
        }
        sug.string = alternative;
        // alternative frequency is present only for extendedResults=true
        if (suggestion.getAlternativeFrequencies() != null && suggestion.getAlternativeFrequencies().size() > 0) {
          Integer freq = suggestion.getAlternativeFrequencies().get(i);
          if (freq != null) sug.freq += freq;
        }
      }
    }
  }
  @SuppressWarnings("unchecked")
  private void collectShardCollations(SpellCheckMergeData mergeData, NamedList spellCheckResponse, int maxCollationTries) {
    Map<String, SpellCheckCollation> collations = mergeData.collations;
    NamedList suggestions = (NamedList) spellCheckResponse.get("suggestions");
    if(suggestions != null) {
      List<Object> collationList = suggestions.getAll("collation");
      List<Object> collationRankList = suggestions.getAll("collationInternalRank");
      int i=0;
      if(collationList != null) {
        for(Object o : collationList)
        {
          if(o instanceof String)
          {
            SpellCheckCollation coll = new SpellCheckCollation();
            coll.setCollationQuery((String) o);
            if(collationRankList!= null && collationRankList.size()>0)
            {
              coll.setInternalRank((Integer) collationRankList.get(i));
              i++;
            }
            SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
            if(priorColl != null)
            {
              coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
            }
            collations.put(coll.getCollationQuery(), coll);
          } else
          {
            NamedList expandedCollation = (NamedList) o;                  
            SpellCheckCollation coll = new SpellCheckCollation();
            coll.setCollationQuery((String) expandedCollation.get("collationQuery"));
            coll.setHits((Integer) expandedCollation.get("hits"));
            if(maxCollationTries>0)
            {
              coll.setInternalRank((Integer) expandedCollation.get("collationInternalRank"));
            }
            coll.setMisspellingsAndCorrections((NamedList) expandedCollation.get("misspellingsAndCorrections"));
            SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
            if(priorColl != null)
            {
              coll.setHits(coll.getHits() + priorColl.getHits());
              coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
            }
            collations.put(coll.getCollationQuery(), coll);
          }
        }
      }
    }
  }
  private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
    Collection<Token> result = new ArrayList<Token>();
    assert analyzer != null;
--- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckMergeData.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckMergeData.java
@ -0,0 +1,43 @@
 package org.apache.solr.handler.component;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.Map;
 import org.apache.lucene.search.spell.SuggestWord;
 import org.apache.solr.client.solrj.response.SpellCheckResponse;
 import org.apache.solr.spelling.SpellCheckCollation;
 public class SpellCheckMergeData {
  //original token -> corresponding Suggestion object (keep track of start,end)
  public Map<String, SpellCheckResponse.Suggestion> origVsSuggestion = new HashMap<String, SpellCheckResponse.Suggestion>();
  // original token string -> summed up frequency
  public Map<String, Integer> origVsFreq = new HashMap<String, Integer>();
  // original token string -> # of shards reporting it as misspelled
  public Map<String, Integer> origVsShards = new HashMap<String, Integer>();
  // original token string -> set of alternatives
  // must preserve order because collation algorithm can only work in-order
  public Map<String, HashSet<String>> origVsSuggested = new LinkedHashMap<String, HashSet<String>>();
  // alternative string -> corresponding SuggestWord object
  public Map<String, SuggestWord> suggestedVsWord = new HashMap<String, SuggestWord>();
  public Map<String, SpellCheckCollation> collations = new HashMap<String, SpellCheckCollation>();
  public int totalNumberShardResponses = 0;
 }
--- a/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
+++ b/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
@ -141,13 +141,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
  @Override
  public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
-  	boolean shardRequest = false;
+  	SpellingResult result = new SpellingResult(options.tokens);
  	SolrParams params = options.customParams;
  	if(params!=null)
  	{
  		shardRequest = "true".equals(params.get(ShardParams.IS_SHARD));
  	}
    SpellingResult result = new SpellingResult(options.tokens);
    IndexReader reader = determineReader(options.reader);
    Term term = field != null ? new Term(field, "") : null;
    float theAccuracy = (options.accuracy == Float.MIN_VALUE) ? spellChecker.getAccuracy() : options.accuracy;
@ -176,7 +170,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
 	          term = new Term(field, suggestions[i]);
 	          result.add(token, suggestions[i], reader.docFreq(term));
 	        }
-        } else if(shardRequest) {
+        } else {
        	List<String> suggList = Collections.emptyList();
        	result.add(token, suggList);
        }
@ -187,7 +181,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
            suggList = suggList.subList(0, options.count);
          }
          result.add(token, suggList);
-        } else if(shardRequest) {
+        } else {
        	List<String> suggList = Collections.emptyList();
        	result.add(token, suggList);
        }
@ -222,6 +216,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
  /*
  * @return the Accuracy used for the Spellchecker
  * */
  @Override
  public float getAccuracy() {
    return accuracy;
  }
@ -257,6 +252,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
    return sourceLocation;
  }
  @Override
  public StringDistance getStringDistance() {
    return sd;
  }
--- a/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
+++ b/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
@ -18,7 +18,9 @@ package org.apache.solr.spelling;
 */
 import java.io.IOException;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
@ -29,6 +31,8 @@ import org.apache.lucene.search.spell.SuggestMode;
 import org.apache.lucene.search.spell.SuggestWord;
 import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
 import org.apache.lucene.search.spell.SuggestWordQueue;
 import org.apache.solr.common.params.ShardParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.SpellingParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
@ -182,11 +186,33 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
    	Term term = new Term(field, token.toString());
      SuggestWord[] suggestions = checker.suggestSimilar(term, 
          options.count, options.reader, mode, accuracy); 
-      result.addFrequency(token, options.reader.docFreq(term));
+      
-      for (SuggestWord suggestion : suggestions) {
+      int docFreq = 0;
-        result.add(token, suggestion.string, suggestion.freq);      	
+      if(options.extendedResults || suggestions.length==0) {
        docFreq = options.reader.docFreq(term);
      }
      if(options.extendedResults) {        
        result.addFrequency(token, docFreq);
      }
      if(suggestions.length==0 && docFreq==0) {
        List<String> empty = Collections.emptyList();
        result.add(token, empty);
      } else {        
        for (SuggestWord suggestion : suggestions) {
          result.add(token, suggestion.string, suggestion.freq);      	
        }
      }
    }
    return result;
  }
  @Override
  public float getAccuracy() {
    return checker.getAccuracy();
  }
  @Override
  public StringDistance getStringDistance() {
    return checker.getDistance();
  }
 }
--- a/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java
+++ b/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java
@ -59,6 +59,9 @@ public class PossibilityIterator implements Iterator<RankedSpellPossibility> {
 	public PossibilityIterator(Map<Token, LinkedHashMap<String, Integer>> suggestions, int maximumRequiredSuggestions, int maxEvaluations) {
 		for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) {
 			Token token = entry.getKey();
 			if(entry.getValue().size()==0) {
 			  continue;
 			}
 			List<SpellCheckCorrection> possibleCorrections = new ArrayList<SpellCheckCorrection>();
 			for (Map.Entry<String, Integer> entry1 : entry.getValue().entrySet()) {
 				SpellCheckCorrection correction = new SpellCheckCorrection();
--- a/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java
@ -17,13 +17,24 @@ package org.apache.solr.spelling;
 */
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.search.spell.LevensteinDistance;
 import org.apache.lucene.search.spell.StringDistance;
 import org.apache.lucene.search.spell.SuggestWord;
 import org.apache.lucene.search.spell.SuggestWordQueue;
 import org.apache.solr.client.solrj.response.SpellCheckResponse;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.component.SpellCheckMergeData;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.search.SolrIndexSearcher;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 /**
@ -64,6 +75,74 @@ public abstract class SolrSpellChecker {
    }
    return name;
  }
  /**
   * Integrate spelling suggestions from the various shards in a distributed environment.
   * 
   * @param mergeData
   * @param numSug
   * @param count
   * @param extendedResults
   * @return
   */
  public SpellingResult mergeSuggestions(SpellCheckMergeData mergeData, int numSug, int count, boolean extendedResults) {
    float min = 0.5f;
    try {
      min = getAccuracy();
    } catch(UnsupportedOperationException uoe) {
      //just use .5 as a default
    }
    StringDistance sd = getStringDistance() == null ? new LevensteinDistance() : getStringDistance();    
    SpellingResult result = new SpellingResult();
    for (Map.Entry<String, HashSet<String>> entry : mergeData.origVsSuggested.entrySet()) {
      String original = entry.getKey();
      //Only use this suggestion if all shards reported it as misspelled.
      Integer numShards = mergeData.origVsShards.get(original);
      if(numShards<mergeData.totalNumberShardResponses) {
        continue;
      }
      HashSet<String> suggested = entry.getValue();
      SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
      for (String suggestion : suggested) {
        SuggestWord sug = mergeData.suggestedVsWord.get(suggestion);
        sug.score = sd.getDistance(original, sug.string);
        if (sug.score < min) continue;
        sugQueue.insertWithOverflow(sug);
        if (sugQueue.size() == numSug) {
          // if queue full, maintain the minScore score
          min = sugQueue.top().score;
        }
      }
      // create token
      SpellCheckResponse.Suggestion suggestion = mergeData.origVsSuggestion.get(original);
      Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());
      // get top 'count' suggestions out of 'sugQueue.size()' candidates
      SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
      // skip the first sugQueue.size() - count elements
      for (int k=0; k < sugQueue.size() - count; k++) sugQueue.pop();
      // now collect the top 'count' responses
      for (int k = Math.min(count, sugQueue.size()) - 1; k >= 0; k--)  {
        suggestions[k] = sugQueue.pop();
      }
      if (extendedResults) {
        Integer o = mergeData.origVsFreq.get(original);
        if (o != null) result.addFrequency(token, o);
        for (SuggestWord word : suggestions)
          result.add(token, word.string, word.freq);
      } else {
        List<String> words = new ArrayList<String>(sugQueue.size());
        for (SuggestWord word : suggestions) words.add(word.string);
        result.add(token, words);
      }
    }
    return result;
  }
  public Analyzer getQueryAnalyzer() {
    return analyzer;
@ -85,6 +164,23 @@ public abstract class SolrSpellChecker {
   */
  public abstract void build(SolrCore core, SolrIndexSearcher searcher);
  /**
   * Get the value of {@link SpellingParams.SPELLCHECK_ACCURACY} if supported.  
   * Otherwise throws UnsupportedOperationException.
   * @return
   */
  protected float getAccuracy() {
    throw new UnsupportedOperationException();
  }
  /**
   * Get the distance implementation used by this spellchecker, or NULL if not applicable.
   * @return
   */
  protected StringDistance getStringDistance()  {
    throw new UnsupportedOperationException();
  }
  /**
   * Get suggestions for the given query.  Tokenizes the query using a field appropriate Analyzer.
--- a/solr/core/src/test-files/solr/conf/solrconfig.xml
+++ b/solr/core/src/test-files/solr/conf/solrconfig.xml
@ -323,6 +323,12 @@
      <str name="spellcheckIndexDir">spellchecker1</str>
      <str name="buildOnCommit">false</str>
    </lst>
    <lst name="spellchecker">
      <str name="name">direct</str>
      <str name="classname">DirectSolrSpellChecker</str>
      <str name="field">lowerfilt</str>
      <int name="minQueryLength">3</int>
    </lst>
    <lst name="spellchecker">
 			<str name="name">multipleFields</str>
 			<str name="field">lowerfilt1and2</str>
@ -397,6 +403,17 @@
    <arr name="last-components">
      <str>spellcheck</str>
    </arr>
  </requestHandler>
    <requestHandler name="spellCheckCompRH_Direct" class="org.apache.solr.handler.component.SearchHandler">
    <lst name="defaults">
      <str name="spellcheck.dictionary">direct</str>
      <str name="spellcheck.onlyMorePopular">false</str>
      <str name="spellcheck.extendedResults">false</str>
      <str name="spellcheck.count">1</str>
    </lst>
    <arr name="last-components">
      <str>spellcheck</str>
    </arr>
  </requestHandler>
  <requestHandler name="spellCheckCompRH1" class="org.apache.solr.handler.component.SearchHandler">
 			<lst name="defaults">
--- a/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java
@ -30,10 +30,13 @@ import org.apache.solr.common.params.ModifiableSolrParams;
 */
 public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTestCase {
  private String requestHandlerName;
 	public DistributedSpellCheckComponentTest()
 	{
 		//fixShardCount=true;
 		//shardCount=2;
 		//stress=0;
 	}
  private String saveProp;
@ -42,6 +45,7 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes
    // this test requires FSDir
    saveProp = System.getProperty("solr.directoryFactory");
    System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");    
    requestHandlerName = random.nextBoolean() ? "spellCheckCompRH" : "spellCheckCompRH_Direct";   
    super.setUp();
  }
@ -104,15 +108,17 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes
    handle.put("maxScore", SKIPVAL);
    // we care only about the spellcheck results
    handle.put("response", SKIP);
    q("q", "*:*", SpellCheckComponent.SPELLCHECK_BUILD, "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH");
-    query("q", "*:*", "fl", "id,lowerfilt", "spellcheck.q","toyata", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH");
+    query("q", "*:*", "fl", "id,lowerfilt", "spellcheck.q","toyata", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName);
-    query("q", "*:*", "fl", "id,lowerfilt", "spellcheck.q","toyata", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true");
+    query("q", "*:*", "fl", "id,lowerfilt", "spellcheck.q","toyata", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true");
-    query("q", "*:*", "fl", "id,lowerfilt", "spellcheck.q","bluo", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "4");
+    query("q", "*:*", "fl", "id,lowerfilt", "spellcheck.q","bluo", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "4");
-    query("q", "The quick reb fox jumped over the lazy brown dogs", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "4", SpellCheckComponent.SPELLCHECK_COLLATE, "true");
+    query("q", "The quick reb fox jumped over the lazy brown dogs", "fl", "id,lowerfilt", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "4", SpellCheckComponent.SPELLCHECK_COLLATE, "true");
    query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
    query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "false");
    query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "false");
    query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
    query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "false");
    query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "false");
  }
 }
--- a/solr/core/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java
+++ b/solr/core/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java
@ -175,7 +175,7 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
-    assertTrue("suggestions is not null and it should be", suggestions == null);
+    assertTrue("suggestions size should be 0", suggestions.size()==0);
    searcher.decref();
  }
 }
--- a/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
+++ b/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
@ -140,7 +140,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
-    assertTrue("suggestions is not null and it should be", suggestions == null);
+    assertTrue("suggestions size should be 0", suggestions.size()==0);
    //test something that is spelled correctly
    spellOpts.tokens = queryConverter.convert("document");
@ -215,7 +215,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
-    assertTrue("suggestions is not null and it should be", suggestions == null);
+    assertTrue("suggestions size should be 0", suggestions.size()==0);
    spellOpts.tokens = queryConverter.convert("document");
    result = checker.getSuggestions(spellOpts);
@ -328,7 +328,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
-    assertTrue("suggestions is not null and it should be", suggestions == null);
+    assertTrue("suggestions size should be 0", suggestions.size()==0);
    spellOpts.tokens = queryConverter.convert("Caroline");
    result = checker.getSuggestions(spellOpts);