mirror of https://github.com/apache/lucene.git
SOLR-2848: generalize distributed spellcheck code to work with any SolrSpellChecker
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1200266 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ee293e7e7d
commit
b97d321f82
|
@ -268,6 +268,10 @@ Bug Fixes
|
||||||
equals methods. (Yonik Seeley, Hossman, Erick Erickson.
|
equals methods. (Yonik Seeley, Hossman, Erick Erickson.
|
||||||
Marc Tinnemeyer caught the bug)
|
Marc Tinnemeyer caught the bug)
|
||||||
|
|
||||||
|
* SOLR-2848: Removed 'instanceof AbstractLuceneSpellChecker' hacks from distributed spellchecking code,
|
||||||
|
and added a merge() method to SolrSpellChecker instead. Previously if you extended SolrSpellChecker
|
||||||
|
your spellchecker would not work in distributed fashion. (James Dyer via rmuir)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,8 @@ import java.io.StringReader;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||||
|
import org.apache.lucene.search.spell.JaroWinklerDistance;
|
||||||
import org.apache.lucene.search.spell.LevensteinDistance;
|
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||||
import org.apache.lucene.search.spell.StringDistance;
|
import org.apache.lucene.search.spell.StringDistance;
|
||||||
import org.apache.lucene.search.spell.SuggestWord;
|
import org.apache.lucene.search.spell.SuggestWord;
|
||||||
|
@ -147,7 +149,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
IndexReader reader = rb.req.getSearcher().getIndexReader();
|
IndexReader reader = rb.req.getSearcher().getIndexReader();
|
||||||
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
|
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
|
||||||
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
|
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
|
||||||
SolrParams customParams = getCustomParams(getDictionaryName(params), params, shardRequest);
|
SolrParams customParams = getCustomParams(getDictionaryName(params), params);
|
||||||
SpellingOptions options = new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults,
|
SpellingOptions options = new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults,
|
||||||
accuracy, customParams);
|
accuracy, customParams);
|
||||||
SpellingResult spellingResult = spellChecker.getSuggestions(options);
|
SpellingResult spellingResult = spellChecker.getSuggestions(options);
|
||||||
|
@ -210,7 +212,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
* @param params The original SolrParams
|
* @param params The original SolrParams
|
||||||
* @return The new Params
|
* @return The new Params
|
||||||
*/
|
*/
|
||||||
protected SolrParams getCustomParams(String dictionary, SolrParams params, boolean shardRequest) {
|
protected SolrParams getCustomParams(String dictionary, SolrParams params) {
|
||||||
ModifiableSolrParams result = new ModifiableSolrParams();
|
ModifiableSolrParams result = new ModifiableSolrParams();
|
||||||
Iterator<String> iter = params.getParameterNamesIterator();
|
Iterator<String> iter = params.getParameterNamesIterator();
|
||||||
String prefix = SpellingParams.SPELLCHECK_PREFIX + "." + dictionary + ".";
|
String prefix = SpellingParams.SPELLCHECK_PREFIX + "." + dictionary + ".";
|
||||||
|
@ -220,10 +222,6 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
result.add(nxt.substring(prefix.length()), params.getParams(nxt));
|
result.add(nxt.substring(prefix.length()), params.getParams(nxt));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(shardRequest)
|
|
||||||
{
|
|
||||||
result.add(ShardParams.IS_SHARD, "true");
|
|
||||||
}
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -256,6 +254,8 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
|
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
|
||||||
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
|
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
|
||||||
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
|
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
|
||||||
|
int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
|
||||||
|
int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
|
||||||
|
|
||||||
String origQuery = params.get(SPELLCHECK_Q);
|
String origQuery = params.get(SPELLCHECK_Q);
|
||||||
if (origQuery == null) {
|
if (origQuery == null) {
|
||||||
|
@ -265,190 +265,28 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
|
SpellCheckMergeData mergeData = new SpellCheckMergeData();
|
||||||
float min = 0.5f;
|
|
||||||
StringDistance sd = null;
|
|
||||||
int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
|
|
||||||
SolrSpellChecker checker = getSpellChecker(rb.req.getParams());
|
|
||||||
if (checker instanceof AbstractLuceneSpellChecker) {
|
|
||||||
AbstractLuceneSpellChecker spellChecker = (AbstractLuceneSpellChecker) checker;
|
|
||||||
min = spellChecker.getAccuracy();
|
|
||||||
sd = spellChecker.getStringDistance();
|
|
||||||
}
|
|
||||||
if (sd == null)
|
|
||||||
sd = new LevensteinDistance();
|
|
||||||
|
|
||||||
Collection<Token> tokens = null;
|
|
||||||
try {
|
|
||||||
tokens = getTokens(origQuery, checker.getQueryAnalyzer());
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOG.error("Could not get tokens (this should never happen)", e);
|
|
||||||
}
|
|
||||||
|
|
||||||
// original token -> corresponding Suggestion object (keep track of start,end)
|
|
||||||
Map<String, SpellCheckResponse.Suggestion> origVsSuggestion = new HashMap<String, SpellCheckResponse.Suggestion>();
|
|
||||||
// original token string -> summed up frequency
|
|
||||||
Map<String, Integer> origVsFreq = new HashMap<String, Integer>();
|
|
||||||
// original token string -> # of shards reporting it as misspelled
|
|
||||||
Map<String, Integer> origVsShards = new HashMap<String, Integer>();
|
|
||||||
// original token string -> set of alternatives
|
|
||||||
// must preserve order because collation algorithm can only work in-order
|
|
||||||
Map<String, HashSet<String>> origVsSuggested = new LinkedHashMap<String, HashSet<String>>();
|
|
||||||
// alternative string -> corresponding SuggestWord object
|
|
||||||
Map<String, SuggestWord> suggestedVsWord = new HashMap<String, SuggestWord>();
|
|
||||||
Map<String, SpellCheckCollation> collations = new HashMap<String, SpellCheckCollation>();
|
|
||||||
|
|
||||||
int totalNumberShardResponses = 0;
|
|
||||||
for (ShardRequest sreq : rb.finished) {
|
for (ShardRequest sreq : rb.finished) {
|
||||||
for (ShardResponse srsp : sreq.responses) {
|
for (ShardResponse srsp : sreq.responses) {
|
||||||
NamedList nl = (NamedList) srsp.getSolrResponse().getResponse().get("spellcheck");
|
NamedList nl = (NamedList) srsp.getSolrResponse().getResponse().get("spellcheck");
|
||||||
LOG.info(srsp.getShard() + " " + nl);
|
LOG.info(srsp.getShard() + " " + nl);
|
||||||
if (nl != null) {
|
if (nl != null) {
|
||||||
totalNumberShardResponses++;
|
mergeData.totalNumberShardResponses++;
|
||||||
SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl);
|
collectShardSuggestions(nl, mergeData);
|
||||||
for (SpellCheckResponse.Suggestion suggestion : spellCheckResp.getSuggestions()) {
|
collectShardCollations(mergeData, nl, maxCollationTries);
|
||||||
origVsSuggestion.put(suggestion.getToken(), suggestion);
|
|
||||||
HashSet<String> suggested = origVsSuggested.get(suggestion.getToken());
|
|
||||||
if (suggested == null) {
|
|
||||||
suggested = new HashSet<String>();
|
|
||||||
origVsSuggested.put(suggestion.getToken(), suggested);
|
|
||||||
}
|
|
||||||
|
|
||||||
// sum up original frequency
|
|
||||||
int origFreq = 0;
|
|
||||||
Integer o = origVsFreq.get(suggestion.getToken());
|
|
||||||
if (o != null) origFreq += o;
|
|
||||||
origFreq += suggestion.getOriginalFrequency();
|
|
||||||
origVsFreq.put(suggestion.getToken(), origFreq);
|
|
||||||
|
|
||||||
//# shards reporting
|
|
||||||
Integer origShards = origVsShards.get(suggestion.getToken());
|
|
||||||
if(origShards==null) {
|
|
||||||
origVsShards.put(suggestion.getToken(), 1);
|
|
||||||
} else {
|
|
||||||
origVsShards.put(suggestion.getToken(), ++origShards);
|
|
||||||
}
|
|
||||||
|
|
||||||
// find best suggestions
|
|
||||||
for (int i = 0; i < suggestion.getNumFound(); i++) {
|
|
||||||
String alternative = suggestion.getAlternatives().get(i);
|
|
||||||
suggested.add(alternative);
|
|
||||||
SuggestWord sug = suggestedVsWord.get(alternative);
|
|
||||||
if (sug == null) {
|
|
||||||
sug = new SuggestWord();
|
|
||||||
suggestedVsWord.put(alternative, sug);
|
|
||||||
}
|
|
||||||
sug.string = alternative;
|
|
||||||
// alternative frequency is present only for extendedResults=true
|
|
||||||
if (suggestion.getAlternativeFrequencies() != null && suggestion.getAlternativeFrequencies().size() > 0) {
|
|
||||||
Integer freq = suggestion.getAlternativeFrequencies().get(i);
|
|
||||||
if (freq != null) sug.freq += freq;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
NamedList suggestions = (NamedList) nl.get("suggestions");
|
|
||||||
if(suggestions != null) {
|
|
||||||
List<Object> collationList = suggestions.getAll("collation");
|
|
||||||
List<Object> collationRankList = suggestions.getAll("collationInternalRank");
|
|
||||||
int i=0;
|
|
||||||
if(collationList != null) {
|
|
||||||
for(Object o : collationList)
|
|
||||||
{
|
|
||||||
if(o instanceof String)
|
|
||||||
{
|
|
||||||
SpellCheckCollation coll = new SpellCheckCollation();
|
|
||||||
coll.setCollationQuery((String) o);
|
|
||||||
if(collationRankList!= null && collationRankList.size()>0)
|
|
||||||
{
|
|
||||||
coll.setInternalRank((Integer) collationRankList.get(i));
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
|
|
||||||
if(priorColl != null)
|
|
||||||
{
|
|
||||||
coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
|
|
||||||
}
|
|
||||||
collations.put(coll.getCollationQuery(), coll);
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
NamedList expandedCollation = (NamedList) o;
|
|
||||||
SpellCheckCollation coll = new SpellCheckCollation();
|
|
||||||
coll.setCollationQuery((String) expandedCollation.get("collationQuery"));
|
|
||||||
coll.setHits((Integer) expandedCollation.get("hits"));
|
|
||||||
if(maxCollationTries>0)
|
|
||||||
{
|
|
||||||
coll.setInternalRank((Integer) expandedCollation.get("collationInternalRank"));
|
|
||||||
}
|
|
||||||
coll.setMisspellingsAndCorrections((NamedList) expandedCollation.get("misspellingsAndCorrections"));
|
|
||||||
SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
|
|
||||||
if(priorColl != null)
|
|
||||||
{
|
|
||||||
coll.setHits(coll.getHits() + priorColl.getHits());
|
|
||||||
coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
|
|
||||||
}
|
|
||||||
collations.put(coll.getCollationQuery(), coll);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// all shard responses have been collected
|
// all shard responses have been collected
|
||||||
// create token and get top suggestions
|
// create token and get top suggestions
|
||||||
SpellingResult result = new SpellingResult(tokens); //todo: investigate, why does it need tokens beforehand?
|
SolrSpellChecker checker = getSpellChecker(rb.req.getParams());
|
||||||
for (Map.Entry<String, HashSet<String>> entry : origVsSuggested.entrySet()) {
|
SpellingResult result = checker.mergeSuggestions(mergeData, numSug, count, extendedResults);
|
||||||
String original = entry.getKey();
|
|
||||||
|
|
||||||
//Only use this suggestion if all shards reported it as misspelled.
|
|
||||||
Integer numShards = origVsShards.get(original);
|
|
||||||
if(numShards<totalNumberShardResponses) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
HashSet<String> suggested = entry.getValue();
|
|
||||||
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
|
|
||||||
for (String suggestion : suggested) {
|
|
||||||
SuggestWord sug = suggestedVsWord.get(suggestion);
|
|
||||||
sug.score = sd.getDistance(original, sug.string);
|
|
||||||
if (sug.score < min) continue;
|
|
||||||
sugQueue.insertWithOverflow(sug);
|
|
||||||
if (sugQueue.size() == numSug) {
|
|
||||||
// if queue full, maintain the minScore score
|
|
||||||
min = sugQueue.top().score;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// create token
|
|
||||||
SpellCheckResponse.Suggestion suggestion = origVsSuggestion.get(original);
|
|
||||||
Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());
|
|
||||||
|
|
||||||
// get top 'count' suggestions out of 'sugQueue.size()' candidates
|
|
||||||
SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
|
|
||||||
// skip the first sugQueue.size() - count elements
|
|
||||||
for (int k=0; k < sugQueue.size() - count; k++) sugQueue.pop();
|
|
||||||
// now collect the top 'count' responses
|
|
||||||
for (int k = Math.min(count, sugQueue.size()) - 1; k >= 0; k--) {
|
|
||||||
suggestions[k] = sugQueue.pop();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (extendedResults) {
|
|
||||||
Integer o = origVsFreq.get(original);
|
|
||||||
if (o != null) result.addFrequency(token, o);
|
|
||||||
for (SuggestWord word : suggestions)
|
|
||||||
result.add(token, word.string, word.freq);
|
|
||||||
} else {
|
|
||||||
List<String> words = new ArrayList<String>(sugQueue.size());
|
|
||||||
for (SuggestWord word : suggestions) words.add(word.string);
|
|
||||||
result.add(token, words);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
NamedList response = new SimpleOrderedMap();
|
NamedList response = new SimpleOrderedMap();
|
||||||
NamedList suggestions = toNamedList(false, result, origQuery, extendedResults, collate);
|
NamedList suggestions = toNamedList(false, result, origQuery, extendedResults, collate);
|
||||||
if (collate) {
|
if (collate) {
|
||||||
SpellCheckCollation[] sortedCollations = collations.values().toArray(new SpellCheckCollation[collations.size()]);
|
SpellCheckCollation[] sortedCollations = mergeData.collations.values().toArray(new SpellCheckCollation[mergeData.collations.size()]);
|
||||||
Arrays.sort(sortedCollations);
|
Arrays.sort(sortedCollations);
|
||||||
int i = 0;
|
int i = 0;
|
||||||
while (i < maxCollations && i < sortedCollations.length) {
|
while (i < maxCollations && i < sortedCollations.length) {
|
||||||
|
@ -471,6 +309,101 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
rb.rsp.add("spellcheck", response);
|
rb.rsp.add("spellcheck", response);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private void collectShardSuggestions(NamedList nl, SpellCheckMergeData mergeData) {
|
||||||
|
SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl);
|
||||||
|
for (SpellCheckResponse.Suggestion suggestion : spellCheckResp.getSuggestions()) {
|
||||||
|
mergeData.origVsSuggestion.put(suggestion.getToken(), suggestion);
|
||||||
|
HashSet<String> suggested = mergeData.origVsSuggested.get(suggestion.getToken());
|
||||||
|
if (suggested == null) {
|
||||||
|
suggested = new HashSet<String>();
|
||||||
|
mergeData.origVsSuggested.put(suggestion.getToken(), suggested);
|
||||||
|
}
|
||||||
|
|
||||||
|
// sum up original frequency
|
||||||
|
int origFreq = 0;
|
||||||
|
Integer o = mergeData.origVsFreq.get(suggestion.getToken());
|
||||||
|
if (o != null) origFreq += o;
|
||||||
|
origFreq += suggestion.getOriginalFrequency();
|
||||||
|
mergeData.origVsFreq.put(suggestion.getToken(), origFreq);
|
||||||
|
|
||||||
|
//# shards reporting
|
||||||
|
Integer origShards = mergeData.origVsShards.get(suggestion.getToken());
|
||||||
|
if(origShards==null) {
|
||||||
|
mergeData.origVsShards.put(suggestion.getToken(), 1);
|
||||||
|
} else {
|
||||||
|
mergeData.origVsShards.put(suggestion.getToken(), ++origShards);
|
||||||
|
}
|
||||||
|
|
||||||
|
// find best suggestions
|
||||||
|
for (int i = 0; i < suggestion.getNumFound(); i++) {
|
||||||
|
String alternative = suggestion.getAlternatives().get(i);
|
||||||
|
suggested.add(alternative);
|
||||||
|
SuggestWord sug = mergeData.suggestedVsWord.get(alternative);
|
||||||
|
if (sug == null) {
|
||||||
|
sug = new SuggestWord();
|
||||||
|
mergeData.suggestedVsWord.put(alternative, sug);
|
||||||
|
}
|
||||||
|
sug.string = alternative;
|
||||||
|
// alternative frequency is present only for extendedResults=true
|
||||||
|
if (suggestion.getAlternativeFrequencies() != null && suggestion.getAlternativeFrequencies().size() > 0) {
|
||||||
|
Integer freq = suggestion.getAlternativeFrequencies().get(i);
|
||||||
|
if (freq != null) sug.freq += freq;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private void collectShardCollations(SpellCheckMergeData mergeData, NamedList spellCheckResponse, int maxCollationTries) {
|
||||||
|
Map<String, SpellCheckCollation> collations = mergeData.collations;
|
||||||
|
NamedList suggestions = (NamedList) spellCheckResponse.get("suggestions");
|
||||||
|
if(suggestions != null) {
|
||||||
|
List<Object> collationList = suggestions.getAll("collation");
|
||||||
|
List<Object> collationRankList = suggestions.getAll("collationInternalRank");
|
||||||
|
int i=0;
|
||||||
|
if(collationList != null) {
|
||||||
|
for(Object o : collationList)
|
||||||
|
{
|
||||||
|
if(o instanceof String)
|
||||||
|
{
|
||||||
|
SpellCheckCollation coll = new SpellCheckCollation();
|
||||||
|
coll.setCollationQuery((String) o);
|
||||||
|
if(collationRankList!= null && collationRankList.size()>0)
|
||||||
|
{
|
||||||
|
coll.setInternalRank((Integer) collationRankList.get(i));
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
|
||||||
|
if(priorColl != null)
|
||||||
|
{
|
||||||
|
coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
|
||||||
|
}
|
||||||
|
collations.put(coll.getCollationQuery(), coll);
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
NamedList expandedCollation = (NamedList) o;
|
||||||
|
SpellCheckCollation coll = new SpellCheckCollation();
|
||||||
|
coll.setCollationQuery((String) expandedCollation.get("collationQuery"));
|
||||||
|
coll.setHits((Integer) expandedCollation.get("hits"));
|
||||||
|
if(maxCollationTries>0)
|
||||||
|
{
|
||||||
|
coll.setInternalRank((Integer) expandedCollation.get("collationInternalRank"));
|
||||||
|
}
|
||||||
|
coll.setMisspellingsAndCorrections((NamedList) expandedCollation.get("misspellingsAndCorrections"));
|
||||||
|
SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
|
||||||
|
if(priorColl != null)
|
||||||
|
{
|
||||||
|
coll.setHits(coll.getHits() + priorColl.getHits());
|
||||||
|
coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
|
||||||
|
}
|
||||||
|
collations.put(coll.getCollationQuery(), coll);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
|
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
|
||||||
Collection<Token> result = new ArrayList<Token>();
|
Collection<Token> result = new ArrayList<Token>();
|
||||||
assert analyzer != null;
|
assert analyzer != null;
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
package org.apache.solr.handler.component;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.spell.SuggestWord;
|
||||||
|
import org.apache.solr.client.solrj.response.SpellCheckResponse;
|
||||||
|
import org.apache.solr.spelling.SpellCheckCollation;
|
||||||
|
|
||||||
|
public class SpellCheckMergeData {
|
||||||
|
//original token -> corresponding Suggestion object (keep track of start,end)
|
||||||
|
public Map<String, SpellCheckResponse.Suggestion> origVsSuggestion = new HashMap<String, SpellCheckResponse.Suggestion>();
|
||||||
|
// original token string -> summed up frequency
|
||||||
|
public Map<String, Integer> origVsFreq = new HashMap<String, Integer>();
|
||||||
|
// original token string -> # of shards reporting it as misspelled
|
||||||
|
public Map<String, Integer> origVsShards = new HashMap<String, Integer>();
|
||||||
|
// original token string -> set of alternatives
|
||||||
|
// must preserve order because collation algorithm can only work in-order
|
||||||
|
public Map<String, HashSet<String>> origVsSuggested = new LinkedHashMap<String, HashSet<String>>();
|
||||||
|
// alternative string -> corresponding SuggestWord object
|
||||||
|
public Map<String, SuggestWord> suggestedVsWord = new HashMap<String, SuggestWord>();
|
||||||
|
public Map<String, SpellCheckCollation> collations = new HashMap<String, SpellCheckCollation>();
|
||||||
|
public int totalNumberShardResponses = 0;
|
||||||
|
}
|
|
@ -141,13 +141,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
|
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
|
||||||
boolean shardRequest = false;
|
SpellingResult result = new SpellingResult(options.tokens);
|
||||||
SolrParams params = options.customParams;
|
|
||||||
if(params!=null)
|
|
||||||
{
|
|
||||||
shardRequest = "true".equals(params.get(ShardParams.IS_SHARD));
|
|
||||||
}
|
|
||||||
SpellingResult result = new SpellingResult(options.tokens);
|
|
||||||
IndexReader reader = determineReader(options.reader);
|
IndexReader reader = determineReader(options.reader);
|
||||||
Term term = field != null ? new Term(field, "") : null;
|
Term term = field != null ? new Term(field, "") : null;
|
||||||
float theAccuracy = (options.accuracy == Float.MIN_VALUE) ? spellChecker.getAccuracy() : options.accuracy;
|
float theAccuracy = (options.accuracy == Float.MIN_VALUE) ? spellChecker.getAccuracy() : options.accuracy;
|
||||||
|
@ -176,7 +170,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
||||||
term = new Term(field, suggestions[i]);
|
term = new Term(field, suggestions[i]);
|
||||||
result.add(token, suggestions[i], reader.docFreq(term));
|
result.add(token, suggestions[i], reader.docFreq(term));
|
||||||
}
|
}
|
||||||
} else if(shardRequest) {
|
} else {
|
||||||
List<String> suggList = Collections.emptyList();
|
List<String> suggList = Collections.emptyList();
|
||||||
result.add(token, suggList);
|
result.add(token, suggList);
|
||||||
}
|
}
|
||||||
|
@ -187,7 +181,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
||||||
suggList = suggList.subList(0, options.count);
|
suggList = suggList.subList(0, options.count);
|
||||||
}
|
}
|
||||||
result.add(token, suggList);
|
result.add(token, suggList);
|
||||||
} else if(shardRequest) {
|
} else {
|
||||||
List<String> suggList = Collections.emptyList();
|
List<String> suggList = Collections.emptyList();
|
||||||
result.add(token, suggList);
|
result.add(token, suggList);
|
||||||
}
|
}
|
||||||
|
@ -222,6 +216,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
||||||
/*
|
/*
|
||||||
* @return the Accuracy used for the Spellchecker
|
* @return the Accuracy used for the Spellchecker
|
||||||
* */
|
* */
|
||||||
|
@Override
|
||||||
public float getAccuracy() {
|
public float getAccuracy() {
|
||||||
return accuracy;
|
return accuracy;
|
||||||
}
|
}
|
||||||
|
@ -257,6 +252,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
||||||
return sourceLocation;
|
return sourceLocation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public StringDistance getStringDistance() {
|
public StringDistance getStringDistance() {
|
||||||
return sd;
|
return sd;
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,9 @@ package org.apache.solr.spelling;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
|
@ -29,6 +31,8 @@ import org.apache.lucene.search.spell.SuggestMode;
|
||||||
import org.apache.lucene.search.spell.SuggestWord;
|
import org.apache.lucene.search.spell.SuggestWord;
|
||||||
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||||
|
import org.apache.solr.common.params.ShardParams;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
import org.apache.solr.common.params.SpellingParams;
|
import org.apache.solr.common.params.SpellingParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
|
@ -182,11 +186,33 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
|
||||||
Term term = new Term(field, token.toString());
|
Term term = new Term(field, token.toString());
|
||||||
SuggestWord[] suggestions = checker.suggestSimilar(term,
|
SuggestWord[] suggestions = checker.suggestSimilar(term,
|
||||||
options.count, options.reader, mode, accuracy);
|
options.count, options.reader, mode, accuracy);
|
||||||
result.addFrequency(token, options.reader.docFreq(term));
|
|
||||||
for (SuggestWord suggestion : suggestions) {
|
int docFreq = 0;
|
||||||
result.add(token, suggestion.string, suggestion.freq);
|
if(options.extendedResults || suggestions.length==0) {
|
||||||
|
docFreq = options.reader.docFreq(term);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(options.extendedResults) {
|
||||||
|
result.addFrequency(token, docFreq);
|
||||||
|
}
|
||||||
|
if(suggestions.length==0 && docFreq==0) {
|
||||||
|
List<String> empty = Collections.emptyList();
|
||||||
|
result.add(token, empty);
|
||||||
|
} else {
|
||||||
|
for (SuggestWord suggestion : suggestions) {
|
||||||
|
result.add(token, suggestion.string, suggestion.freq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getAccuracy() {
|
||||||
|
return checker.getAccuracy();
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public StringDistance getStringDistance() {
|
||||||
|
return checker.getDistance();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,6 +59,9 @@ public class PossibilityIterator implements Iterator<RankedSpellPossibility> {
|
||||||
public PossibilityIterator(Map<Token, LinkedHashMap<String, Integer>> suggestions, int maximumRequiredSuggestions, int maxEvaluations) {
|
public PossibilityIterator(Map<Token, LinkedHashMap<String, Integer>> suggestions, int maximumRequiredSuggestions, int maxEvaluations) {
|
||||||
for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) {
|
for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) {
|
||||||
Token token = entry.getKey();
|
Token token = entry.getKey();
|
||||||
|
if(entry.getValue().size()==0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
List<SpellCheckCorrection> possibleCorrections = new ArrayList<SpellCheckCorrection>();
|
List<SpellCheckCorrection> possibleCorrections = new ArrayList<SpellCheckCorrection>();
|
||||||
for (Map.Entry<String, Integer> entry1 : entry.getValue().entrySet()) {
|
for (Map.Entry<String, Integer> entry1 : entry.getValue().entrySet()) {
|
||||||
SpellCheckCorrection correction = new SpellCheckCorrection();
|
SpellCheckCorrection correction = new SpellCheckCorrection();
|
||||||
|
|
|
@ -17,13 +17,24 @@ package org.apache.solr.spelling;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
|
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||||
|
import org.apache.lucene.search.spell.StringDistance;
|
||||||
|
import org.apache.lucene.search.spell.SuggestWord;
|
||||||
|
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||||
|
import org.apache.solr.client.solrj.response.SpellCheckResponse;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.handler.component.SpellCheckMergeData;
|
||||||
import org.apache.solr.schema.FieldType;
|
import org.apache.solr.schema.FieldType;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -64,6 +75,74 @@ public abstract class SolrSpellChecker {
|
||||||
}
|
}
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Integrate spelling suggestions from the various shards in a distributed environment.
|
||||||
|
*
|
||||||
|
* @param mergeData
|
||||||
|
* @param numSug
|
||||||
|
* @param count
|
||||||
|
* @param extendedResults
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public SpellingResult mergeSuggestions(SpellCheckMergeData mergeData, int numSug, int count, boolean extendedResults) {
|
||||||
|
float min = 0.5f;
|
||||||
|
try {
|
||||||
|
min = getAccuracy();
|
||||||
|
} catch(UnsupportedOperationException uoe) {
|
||||||
|
//just use .5 as a default
|
||||||
|
}
|
||||||
|
|
||||||
|
StringDistance sd = getStringDistance() == null ? new LevensteinDistance() : getStringDistance();
|
||||||
|
|
||||||
|
SpellingResult result = new SpellingResult();
|
||||||
|
for (Map.Entry<String, HashSet<String>> entry : mergeData.origVsSuggested.entrySet()) {
|
||||||
|
String original = entry.getKey();
|
||||||
|
|
||||||
|
//Only use this suggestion if all shards reported it as misspelled.
|
||||||
|
Integer numShards = mergeData.origVsShards.get(original);
|
||||||
|
if(numShards<mergeData.totalNumberShardResponses) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
HashSet<String> suggested = entry.getValue();
|
||||||
|
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
|
||||||
|
for (String suggestion : suggested) {
|
||||||
|
SuggestWord sug = mergeData.suggestedVsWord.get(suggestion);
|
||||||
|
sug.score = sd.getDistance(original, sug.string);
|
||||||
|
if (sug.score < min) continue;
|
||||||
|
sugQueue.insertWithOverflow(sug);
|
||||||
|
if (sugQueue.size() == numSug) {
|
||||||
|
// if queue full, maintain the minScore score
|
||||||
|
min = sugQueue.top().score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// create token
|
||||||
|
SpellCheckResponse.Suggestion suggestion = mergeData.origVsSuggestion.get(original);
|
||||||
|
Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());
|
||||||
|
|
||||||
|
// get top 'count' suggestions out of 'sugQueue.size()' candidates
|
||||||
|
SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
|
||||||
|
// skip the first sugQueue.size() - count elements
|
||||||
|
for (int k=0; k < sugQueue.size() - count; k++) sugQueue.pop();
|
||||||
|
// now collect the top 'count' responses
|
||||||
|
for (int k = Math.min(count, sugQueue.size()) - 1; k >= 0; k--) {
|
||||||
|
suggestions[k] = sugQueue.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (extendedResults) {
|
||||||
|
Integer o = mergeData.origVsFreq.get(original);
|
||||||
|
if (o != null) result.addFrequency(token, o);
|
||||||
|
for (SuggestWord word : suggestions)
|
||||||
|
result.add(token, word.string, word.freq);
|
||||||
|
} else {
|
||||||
|
List<String> words = new ArrayList<String>(sugQueue.size());
|
||||||
|
for (SuggestWord word : suggestions) words.add(word.string);
|
||||||
|
result.add(token, words);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
public Analyzer getQueryAnalyzer() {
|
public Analyzer getQueryAnalyzer() {
|
||||||
return analyzer;
|
return analyzer;
|
||||||
|
@ -85,6 +164,23 @@ public abstract class SolrSpellChecker {
|
||||||
*/
|
*/
|
||||||
public abstract void build(SolrCore core, SolrIndexSearcher searcher);
|
public abstract void build(SolrCore core, SolrIndexSearcher searcher);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the value of {@link SpellingParams.SPELLCHECK_ACCURACY} if supported.
|
||||||
|
* Otherwise throws UnsupportedOperationException.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected float getAccuracy() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the distance implementation used by this spellchecker, or NULL if not applicable.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected StringDistance getStringDistance() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get suggestions for the given query. Tokenizes the query using a field appropriate Analyzer.
|
* Get suggestions for the given query. Tokenizes the query using a field appropriate Analyzer.
|
||||||
|
|
|
@ -323,6 +323,12 @@
|
||||||
<str name="spellcheckIndexDir">spellchecker1</str>
|
<str name="spellcheckIndexDir">spellchecker1</str>
|
||||||
<str name="buildOnCommit">false</str>
|
<str name="buildOnCommit">false</str>
|
||||||
</lst>
|
</lst>
|
||||||
|
<lst name="spellchecker">
|
||||||
|
<str name="name">direct</str>
|
||||||
|
<str name="classname">DirectSolrSpellChecker</str>
|
||||||
|
<str name="field">lowerfilt</str>
|
||||||
|
<int name="minQueryLength">3</int>
|
||||||
|
</lst>
|
||||||
<lst name="spellchecker">
|
<lst name="spellchecker">
|
||||||
<str name="name">multipleFields</str>
|
<str name="name">multipleFields</str>
|
||||||
<str name="field">lowerfilt1and2</str>
|
<str name="field">lowerfilt1and2</str>
|
||||||
|
@ -397,6 +403,17 @@
|
||||||
<arr name="last-components">
|
<arr name="last-components">
|
||||||
<str>spellcheck</str>
|
<str>spellcheck</str>
|
||||||
</arr>
|
</arr>
|
||||||
|
</requestHandler>
|
||||||
|
<requestHandler name="spellCheckCompRH_Direct" class="org.apache.solr.handler.component.SearchHandler">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="spellcheck.dictionary">direct</str>
|
||||||
|
<str name="spellcheck.onlyMorePopular">false</str>
|
||||||
|
<str name="spellcheck.extendedResults">false</str>
|
||||||
|
<str name="spellcheck.count">1</str>
|
||||||
|
</lst>
|
||||||
|
<arr name="last-components">
|
||||||
|
<str>spellcheck</str>
|
||||||
|
</arr>
|
||||||
</requestHandler>
|
</requestHandler>
|
||||||
<requestHandler name="spellCheckCompRH1" class="org.apache.solr.handler.component.SearchHandler">
|
<requestHandler name="spellCheckCompRH1" class="org.apache.solr.handler.component.SearchHandler">
|
||||||
<lst name="defaults">
|
<lst name="defaults">
|
||||||
|
|
|
@ -30,10 +30,13 @@ import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
*/
|
*/
|
||||||
public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTestCase {
|
public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTestCase {
|
||||||
|
|
||||||
|
private String requestHandlerName;
|
||||||
|
|
||||||
public DistributedSpellCheckComponentTest()
|
public DistributedSpellCheckComponentTest()
|
||||||
{
|
{
|
||||||
//fixShardCount=true;
|
//fixShardCount=true;
|
||||||
//shardCount=2;
|
//shardCount=2;
|
||||||
|
//stress=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String saveProp;
|
private String saveProp;
|
||||||
|
@ -42,6 +45,7 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes
|
||||||
// this test requires FSDir
|
// this test requires FSDir
|
||||||
saveProp = System.getProperty("solr.directoryFactory");
|
saveProp = System.getProperty("solr.directoryFactory");
|
||||||
System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
|
System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
|
||||||
|
requestHandlerName = random.nextBoolean() ? "spellCheckCompRH" : "spellCheckCompRH_Direct";
|
||||||
super.setUp();
|
super.setUp();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -104,15 +108,17 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes
|
||||||
handle.put("maxScore", SKIPVAL);
|
handle.put("maxScore", SKIPVAL);
|
||||||
// we care only about the spellcheck results
|
// we care only about the spellcheck results
|
||||||
handle.put("response", SKIP);
|
handle.put("response", SKIP);
|
||||||
|
|
||||||
q("q", "*:*", SpellCheckComponent.SPELLCHECK_BUILD, "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH");
|
q("q", "*:*", SpellCheckComponent.SPELLCHECK_BUILD, "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH");
|
||||||
|
|
||||||
query("q", "*:*", "fl", "id,lowerfilt", "spellcheck.q","toyata", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH");
|
query("q", "*:*", "fl", "id,lowerfilt", "spellcheck.q","toyata", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName);
|
||||||
query("q", "*:*", "fl", "id,lowerfilt", "spellcheck.q","toyata", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true");
|
query("q", "*:*", "fl", "id,lowerfilt", "spellcheck.q","toyata", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true");
|
||||||
query("q", "*:*", "fl", "id,lowerfilt", "spellcheck.q","bluo", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "4");
|
query("q", "*:*", "fl", "id,lowerfilt", "spellcheck.q","bluo", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "4");
|
||||||
query("q", "The quick reb fox jumped over the lazy brown dogs", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "4", SpellCheckComponent.SPELLCHECK_COLLATE, "true");
|
query("q", "The quick reb fox jumped over the lazy brown dogs", "fl", "id,lowerfilt", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "4", SpellCheckComponent.SPELLCHECK_COLLATE, "true");
|
||||||
|
|
||||||
|
query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
|
||||||
|
query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "false");
|
||||||
|
query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", requestHandlerName, "shards.qt", requestHandlerName, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "false");
|
||||||
|
|
||||||
query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
|
|
||||||
query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "false");
|
|
||||||
query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "false");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -175,7 +175,7 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
result = checker.getSuggestions(spellOpts);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions size should be 0", suggestions.size()==0);
|
||||||
searcher.decref();
|
searcher.decref();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -140,7 +140,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
result = checker.getSuggestions(spellOpts);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions size should be 0", suggestions.size()==0);
|
||||||
|
|
||||||
//test something that is spelled correctly
|
//test something that is spelled correctly
|
||||||
spellOpts.tokens = queryConverter.convert("document");
|
spellOpts.tokens = queryConverter.convert("document");
|
||||||
|
@ -215,7 +215,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
result = checker.getSuggestions(spellOpts);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions size should be 0", suggestions.size()==0);
|
||||||
|
|
||||||
spellOpts.tokens = queryConverter.convert("document");
|
spellOpts.tokens = queryConverter.convert("document");
|
||||||
result = checker.getSuggestions(spellOpts);
|
result = checker.getSuggestions(spellOpts);
|
||||||
|
@ -328,7 +328,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
result = checker.getSuggestions(spellOpts);
|
result = checker.getSuggestions(spellOpts);
|
||||||
assertTrue("result is null and it shouldn't be", result != null);
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
suggestions = result.get(spellOpts.tokens.iterator().next());
|
suggestions = result.get(spellOpts.tokens.iterator().next());
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions size should be 0", suggestions.size()==0);
|
||||||
|
|
||||||
spellOpts.tokens = queryConverter.convert("Caroline");
|
spellOpts.tokens = queryConverter.convert("Caroline");
|
||||||
result = checker.getSuggestions(spellOpts);
|
result = checker.getSuggestions(spellOpts);
|
||||||
|
|
Loading…
Reference in New Issue