diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 8834a428312..bca20827bc8 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -475,7 +475,9 @@ Bug Fixes substitution/dereferencing. Properly encode local params in distributed faceting. (yonik) -* SOLR-2114: Fixed parsing error in hsin function. The function signature has changed slightly. (gsingers) +* SOLR-2114: Fixed parsing error in hsin function. The function signature has changed slightly. (gsingers) + +* SOLR-2083: SpellCheckComponent misreports suggestions when distributed (James Dyer via gsingers) Other Changes diff --git a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java index 5cc6b0d47ac..8e0661c5f82 100644 --- a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java @@ -46,6 +46,7 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.index.IndexReader; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SpellingParams; import org.apache.solr.common.util.NamedList; @@ -123,6 +124,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) { return; } + boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD)); String q = params.get(SPELLCHECK_Q); SolrSpellChecker spellChecker = getSpellChecker(params); Collection tokens = null; @@ -147,13 +149,12 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar IndexReader reader = rb.req.getSearcher().getReader(); boolean collate = params.getBool(SPELLCHECK_COLLATE, false); float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE); - SolrParams customParams = getCustomParams(getDictionaryName(params), params); + SolrParams customParams = getCustomParams(getDictionaryName(params), params, shardRequest); SpellingOptions options = new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults, accuracy, customParams); - SpellingResult spellingResult = spellChecker.getSuggestions(options); if (spellingResult != null) { - response.add("suggestions", toNamedList(spellingResult, q, + response.add("suggestions", toNamedList(shardRequest, spellingResult, q, extendedResults, collate)); rb.rsp.add("spellcheck", response); } @@ -171,7 +172,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar * @param params The original SolrParams * @return The new Params */ - protected SolrParams getCustomParams(String dictionary, SolrParams params) { + protected SolrParams getCustomParams(String dictionary, SolrParams params, boolean shardRequest) { ModifiableSolrParams result = new ModifiableSolrParams(); Iterator iter = params.getParameterNamesIterator(); String prefix = SpellingParams.SPELLCHECK_PREFIX + "." + dictionary + "."; @@ -181,6 +182,10 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar result.add(nxt.substring(prefix.length()), params.getParams(nxt)); } } + if(shardRequest) + { + result.add(ShardParams.IS_SHARD, "true"); + } return result; } @@ -243,17 +248,21 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar Map origVsSuggestion = new HashMap(); // original token string -> summed up frequency Map origVsFreq = new HashMap(); + // original token string -> # of shards reporting it as misspelled + Map origVsShards = new HashMap(); // original token string -> set of alternatives // must preserve order because collation algorithm can only work in-order Map> origVsSuggested = new LinkedHashMap>(); // alternative string -> corresponding SuggestWord object Map suggestedVsWord = new HashMap(); - + + int totalNumberShardResponses = 0; for (ShardRequest sreq : rb.finished) { for (ShardResponse srsp : sreq.responses) { NamedList nl = (NamedList) srsp.getSolrResponse().getResponse().get("spellcheck"); LOG.info(srsp.getShard() + " " + nl); if (nl != null) { + totalNumberShardResponses++; SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl); for (SpellCheckResponse.Suggestion suggestion : spellCheckResp.getSuggestions()) { origVsSuggestion.put(suggestion.getToken(), suggestion); @@ -269,6 +278,14 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar if (o != null) origFreq += o; origFreq += suggestion.getOriginalFrequency(); origVsFreq.put(suggestion.getToken(), origFreq); + + //# shards reporting + Integer origShards = origVsShards.get(suggestion.getToken()); + if(origShards==null) { + origVsShards.put(suggestion.getToken(), 1); + } else { + origVsShards.put(suggestion.getToken(), ++origShards); + } // find best suggestions for (int i = 0; i < suggestion.getNumFound(); i++) { @@ -296,6 +313,13 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar SpellingResult result = new SpellingResult(tokens); //todo: investigate, why does it need tokens beforehand? for (Map.Entry> entry : origVsSuggested.entrySet()) { String original = entry.getKey(); + + //Only use this suggestion if all shards reported it as misspelled. + Integer numShards = origVsShards.get(original); + if(numShards suggested = entry.getValue(); SuggestWordQueue sugQueue = new SuggestWordQueue(numSug); for (String suggestion : suggested) { @@ -335,7 +359,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar } NamedList response = new SimpleOrderedMap(); - response.add("suggestions", toNamedList(result, origQuery, extendedResults, collate)); + response.add("suggestions", toNamedList(false, result, origQuery, extendedResults, collate)); rb.rsp.add("spellcheck", response); } @@ -383,7 +407,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar return spellCheckers.get(name); } - protected NamedList toNamedList(SpellingResult spellingResult, String origQuery, boolean extendedResults, boolean collate) { + protected NamedList toNamedList(boolean shardRequest, SpellingResult spellingResult, String origQuery, boolean extendedResults, boolean collate) { NamedList result = new NamedList(); Map> suggestions = spellingResult.getSuggestions(); boolean hasFreqInfo = spellingResult.hasTokenFrequencyInfo(); @@ -393,15 +417,23 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar best = new LinkedHashMap(suggestions.size()); } + int numSuggestions = 0; + for(LinkedHashMap theSuggestion : suggestions.values()) + { + if(theSuggestion.size()>0) + { + numSuggestions++; + } + } // will be flipped to false if any of the suggestions are not in the index and hasFreqInfo is true - if(suggestions.size() > 0) { + if(numSuggestions > 0) { isCorrectlySpelled = true; } for (Map.Entry> entry : suggestions.entrySet()) { Token inputToken = entry.getKey(); Map theSuggestions = entry.getValue(); - if (theSuggestions != null && theSuggestions.size() > 0) { + if (theSuggestions != null && (theSuggestions.size()>0 || shardRequest)) { SimpleOrderedMap suggestionList = new SimpleOrderedMap(); suggestionList.add("numFound", theSuggestions.size()); suggestionList.add("startOffset", inputToken.startOffset()); @@ -430,7 +462,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar suggestionList.add("suggestion", theSuggestions.keySet()); } - if (collate == true ){//set aside the best suggestion for this token + if (collate == true && theSuggestions.size()>0){//set aside the best suggestion for this token best.put(inputToken, theSuggestions.keySet().iterator().next()); } if (hasFreqInfo) { diff --git a/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java b/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java index 2a5d10c872d..0e9de433254 100644 --- a/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java +++ b/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.Comparator; import java.util.List; @@ -41,6 +42,8 @@ import org.apache.lucene.search.spell.SpellChecker; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.schema.FieldType; @@ -153,6 +156,12 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker { @Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { + boolean shardRequest = false; + SolrParams params = options.customParams; + if(params!=null) + { + shardRequest = "true".equals(params.get(ShardParams.IS_SHARD)); + } SpellingResult result = new SpellingResult(options.tokens); IndexReader reader = determineReader(options.reader); Term term = field != null ? new Term(field, "") : null; @@ -167,7 +176,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker { field, options.onlyMorePopular, theAccuracy); if (suggestions.length == 1 && suggestions[0].equals(tokenText)) { - //These are spelled the same, continue on + //These are spelled the same, continue on continue; } @@ -175,9 +184,15 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker { term = term.createTerm(tokenText); result.add(token, reader.docFreq(term)); int countLimit = Math.min(options.count, suggestions.length); - for (int i = 0; i < countLimit; i++) { - term = term.createTerm(suggestions[i]); - result.add(token, suggestions[i], reader.docFreq(term)); + if(countLimit>0) + { + for (int i = 0; i < countLimit; i++) { + term = term.createTerm(suggestions[i]); + result.add(token, suggestions[i], reader.docFreq(term)); + } + } else if(shardRequest) { + List suggList = Collections.emptyList(); + result.add(token, suggList); } } else { if (suggestions.length > 0) { @@ -186,6 +201,9 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker { suggList = suggList.subList(0, options.count); } result.add(token, suggList); + } else if(shardRequest) { + List suggList = Collections.emptyList(); + result.add(token, suggList); } } } diff --git a/solr/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java b/solr/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java index 77a17b67ca3..ecfed62e6fb 100644 --- a/solr/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java +++ b/solr/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java @@ -1,8 +1,11 @@ package org.apache.solr.handler.component; +import java.io.File; + import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.util.AbstractSolrTestCase; /** * Test for SpellCheckComponent's distributed querying @@ -13,6 +16,12 @@ import org.apache.solr.common.params.ModifiableSolrParams; */ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTestCase { + public DistributedSpellCheckComponentTest() + { + //fixShardCount=true; + //shardCount=2; + } + private String saveProp; @Override public void setUp() throws Exception { @@ -49,6 +58,7 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes @Override public void doTest() throws Exception { + del("*:*"); index(id, "1", "lowerfilt", "toyota"); index(id, "2", "lowerfilt", "chevrolet"); index(id, "3", "lowerfilt", "suzuki"); @@ -60,6 +70,18 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes index(id, "9", "lowerfilt", "The quick red fox jumped over the lazy brown dogs."); index(id, "10", "lowerfilt", "blue"); index(id, "12", "lowerfilt", "glue"); + index(id, "13", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."); + index(id, "14", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."); + index(id, "15", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."); + index(id, "16", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."); + index(id, "17", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."); + index(id, "18", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."); + index(id, "19", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."); + index(id, "20", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."); + index(id, "21", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."); + index(id, "22", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."); + index(id, "23", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."); + index(id, "24", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."); commit(); handle.clear();