mirror of https://github.com/apache/lucene.git
SOLR-2993: fix test failures (SOLR-2993-fixes.patch)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1346489 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
24ac5fdd13
commit
ce20e7b133
|
@ -19,6 +19,7 @@ package org.apache.solr.spelling;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
|
@ -135,8 +136,12 @@ public class ConjunctionSolrSpellChecker extends SolrSpellChecker {
|
|||
//TODO: This just interleaves the results. In the future, we might want to let users give each checker its
|
||||
// own weight and use that in combination to score & frequency to sort the results ?
|
||||
private SpellingResult mergeCheckers(SpellingResult[] results, int numSug) {
|
||||
Map<Token, Integer> combinedTokenFrequency = new HashMap<Token, Integer>();
|
||||
Map<Token, List<LinkedHashMap<String, Integer>>> allSuggestions = new LinkedHashMap<Token, List<LinkedHashMap<String, Integer>>>();
|
||||
for(SpellingResult result : results) {
|
||||
if(result.getTokenFrequency()!=null) {
|
||||
combinedTokenFrequency.putAll(result.getTokenFrequency());
|
||||
}
|
||||
for(Map.Entry<Token, LinkedHashMap<String, Integer>> entry : result.getSuggestions().entrySet()) {
|
||||
List<LinkedHashMap<String, Integer>> allForThisToken = allSuggestions.get(entry.getKey());
|
||||
if(allForThisToken==null) {
|
||||
|
@ -161,6 +166,10 @@ public class ConjunctionSolrSpellChecker extends SolrSpellChecker {
|
|||
anyData = true;
|
||||
Map.Entry<String,Integer> corr = iter.next();
|
||||
combinedResult.add(original, corr.getKey(), corr.getValue());
|
||||
Integer tokenFrequency = combinedTokenFrequency.get(original);
|
||||
if(tokenFrequency!=null) {
|
||||
combinedResult.addFrequency(original, tokenFrequency);
|
||||
}
|
||||
if(++numberAdded==numSug) {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.ArrayList;
|
|||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -90,16 +91,22 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
|
|||
*/
|
||||
public static final String PARAM_MIN_SUGGESTION_FREQUENCY = "minSuggestionFreq";
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Specify a value on the "breakSugestionTieBreaker" parameter.
|
||||
* The default is MAX_FREQ.
|
||||
* </p>
|
||||
*/
|
||||
public enum BreakSuggestionTieBreaker {
|
||||
/**
|
||||
* See
|
||||
* {@link BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY}
|
||||
* {@link WordBreakSpellChecker.BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY}
|
||||
* #
|
||||
*/
|
||||
MAX_FREQ,
|
||||
/**
|
||||
* See
|
||||
* {@link BreakSuggestionSortMethod#NUM_CHANGES_THEN_SUMMED_FREQUENCY}
|
||||
* {@link WordBreakSpellChecker.BreakSuggestionSortMethod#NUM_CHANGES_THEN_SUMMED_FREQUENCY}
|
||||
*/
|
||||
SUM_FREQ
|
||||
};
|
||||
|
@ -108,6 +115,7 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
|
|||
private boolean combineWords = false;
|
||||
private boolean breakWords = false;
|
||||
private BreakSuggestionSortMethod sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY;
|
||||
private static final Pattern spacePattern = Pattern.compile("\\s+");
|
||||
|
||||
@Override
|
||||
public String init(@SuppressWarnings("unchecked") NamedList config,
|
||||
|
@ -127,6 +135,8 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
|
|||
throw new IllegalArgumentException("Invalid value for parameter "
|
||||
+ PARAM_BREAK_SUGGESTION_TIE_BREAKER + " : " + bstb);
|
||||
}
|
||||
} else {
|
||||
sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY;
|
||||
}
|
||||
int mc = intParam(config, PARAM_MAX_CHANGES);
|
||||
if (mc > 0) {
|
||||
|
@ -272,21 +282,27 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
|
|||
while (lastBreak != null || lastCombine != null) {
|
||||
if (lastBreak == null) {
|
||||
result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq);
|
||||
result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token));
|
||||
lastCombine = null;
|
||||
} else if (lastCombine == null) {
|
||||
result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq);
|
||||
result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())));
|
||||
lastBreak = null;
|
||||
} else if (lastBreak.freq < lastCombine.freq) {
|
||||
result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq);
|
||||
result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token));
|
||||
lastCombine = null;
|
||||
} else if (lastCombine.freq < lastBreak.freq) {
|
||||
result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq);
|
||||
result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())));
|
||||
lastBreak = null;
|
||||
} else if (breakCount >= combineCount) {
|
||||
result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq);
|
||||
result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token));
|
||||
lastCombine = null;
|
||||
} else {
|
||||
result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq);
|
||||
result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())));
|
||||
lastBreak = null;
|
||||
}
|
||||
if (result.getSuggestions().size() > numSuggestions) {
|
||||
|
@ -304,6 +320,21 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
|
|||
return result;
|
||||
}
|
||||
|
||||
private int getCombineFrequency(IndexReader ir, Token token) throws IOException {
|
||||
String[] words = spacePattern.split(token.toString());
|
||||
int result = 0;
|
||||
if(sortMethod==BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY) {
|
||||
for(String word : words) {
|
||||
result = Math.max(result, ir.docFreq(new Term(field, word)));
|
||||
}
|
||||
} else {
|
||||
for(String word : words) {
|
||||
result += ir.docFreq(new Term(field, word));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void build(SolrCore core, SolrIndexSearcher searcher) {
|
||||
/* no-op */
|
||||
|
|
|
@ -75,6 +75,7 @@ Config for testing spellcheck component
|
|||
<str name="field">lowerfilt</str>
|
||||
<str name="combineWords">true</str>
|
||||
<str name="breakWords">true</str>
|
||||
<str name="breakSugestionTieBreaker">MAX_FREQ</str>
|
||||
<int name="maxChanges">10</int>
|
||||
</lst>
|
||||
<lst name="spellchecker">
|
||||
|
|
|
@ -149,6 +149,6 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes
|
|||
query("q", "lowerfilt:(\"quote red fox\")", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ALTERNATIVE_TERM_COUNT, "5", SpellCheckComponent.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "10");
|
||||
query("q", "lowerfilt:(\"rod fix\")", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ALTERNATIVE_TERM_COUNT, "5", SpellCheckComponent.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "10");
|
||||
|
||||
//query("q", "lowerfilt:(+quock +redfox +jum +ped)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", reqHandlerWithWordbreak, "shards.qt", reqHandlerWithWordbreak, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
|
||||
query("q", "lowerfilt:(+quock +redfox +jum +ped)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", reqHandlerWithWordbreak, "shards.qt", reqHandlerWithWordbreak, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -139,41 +139,50 @@ public class WordBreakSolrSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
"//lst[@name='paintable']/int[@name='numFound']=8",
|
||||
"//lst[@name='paintable']/int[@name='startOffset']=11",
|
||||
"//lst[@name='paintable']/int[@name='endOffset']=20",
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/str[1]='printable'", //SolrSpellChecker result interleaved
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/str[2]='paint able'", //1 op ; max doc freq=5
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/str[3]='pintable'", //SolrSpellChecker result interleaved
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/str[4]='pain table'", //1 op ; max doc freq=4
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/str[5]='pointable'", //SolrSpellChecker result interleaved
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/str[6]='pa in table'",//2 ops
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/str[7]='plantable'", //SolrSpellChecker result interleaved
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/str[8]='puntable'", //SolrSpellChecker result interleaved
|
||||
"//lst[@name='paintable']/int[@name='origFreq']=0",
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[1]/str[@name='word']='printable'", //SolrSpellChecker result interleaved
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[1]/int[@name='freq']=3",
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[2]/str[@name='word']='paint able'", //1 op
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[2]/int[@name='freq']=5",
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[3]/str[@name='word']='pintable'", //SolrSpellChecker result interleaved
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[3]/int[@name='freq']=1",
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[4]/str[@name='word']='pain table'", //1 op
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[4]/int[@name='freq']=2",
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[5]/str[@name='word']='pointable'", //SolrSpellChecker result interleaved
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[5]/int[@name='freq']=1",
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[6]/str[@name='word']='pa in table'", //2 ops
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[6]/int[@name='freq']=7",
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[7]/str[@name='word']='plantable'", //SolrSpellChecker result interleaved
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[7]/int[@name='freq']=1",
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[8]/str[@name='word']='puntable'", //SolrSpellChecker result interleaved
|
||||
"//lst[@name='paintable']/arr[@name='suggestion']/lst[8]/int[@name='freq']=1",
|
||||
"//lst[@name='pine']/int[@name='numFound']=2",
|
||||
"//lst[@name='pine']/int[@name='startOffset']=21",
|
||||
"//lst[@name='pine']/int[@name='endOffset']=25",
|
||||
"//lst[@name='pine']/arr[@name='suggestion']/str[1]='line'",
|
||||
"//lst[@name='pine']/arr[@name='suggestion']/str[2]='pi ne'",
|
||||
"//lst[@name='pine']/arr[@name='suggestion']/lst[1]/str[@name='word']='line'",
|
||||
"//lst[@name='pine']/arr[@name='suggestion']/lst[2]/str[@name='word']='pi ne'",
|
||||
"//lst[@name='apple']/int[@name='numFound']=1",
|
||||
"//lst[@name='apple']/arr[@name='suggestion']/str[1]='ample'",
|
||||
"//lst[@name='apple']/arr[@name='suggestion']/lst[1]/str[@name='word']='ample'",
|
||||
"//lst[@name='good']/int[@name='numFound']=1",
|
||||
"//lst[@name='good']/arr[@name='suggestion']/str[1]='food'",
|
||||
"//lst[@name='good']/arr[@name='suggestion']/lst[1]/str[@name='word']='food'",
|
||||
"//lst[@name='ness']/int[@name='numFound']=1",
|
||||
"//lst[@name='ness']/arr[@name='suggestion']/str[1]='mess'",
|
||||
"//lst[@name='ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='mess'",
|
||||
"//lst[@name='pine apple']/int[@name='numFound']=1",
|
||||
"//lst[@name='pine apple']/int[@name='startOffset']=21",
|
||||
"//lst[@name='pine apple']/int[@name='endOffset']=31",
|
||||
"//lst[@name='pine apple']/arr[@name='suggestion']/str[1]='pineapple'",
|
||||
"//lst[@name='pine apple']/arr[@name='suggestion']/lst[1]/str[@name='word']='pineapple'",
|
||||
"//lst[@name='paintable pine']/int[@name='numFound']=1",
|
||||
"//lst[@name='paintable pine']/int[@name='startOffset']=11",
|
||||
"//lst[@name='paintable pine']/int[@name='endOffset']=25",
|
||||
"//lst[@name='paintable pine']/arr[@name='suggestion']/str[1]='paintablepine'",
|
||||
"//lst[@name='paintable pine']/arr[@name='suggestion']/lst[1]/str[@name='word']='paintablepine'",
|
||||
"//lst[@name='good ness']/int[@name='numFound']=1",
|
||||
"//lst[@name='good ness']/int[@name='startOffset']=32",
|
||||
"//lst[@name='good ness']/int[@name='endOffset']=41",
|
||||
"//lst[@name='good ness']/arr[@name='suggestion']/str[1]='goodness'",
|
||||
"//lst[@name='good ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='goodness'",
|
||||
"//lst[@name='pine apple good ness']/int[@name='numFound']=1",
|
||||
"//lst[@name='pine apple good ness']/int[@name='startOffset']=21",
|
||||
"//lst[@name='pine apple good ness']/int[@name='endOffset']=41",
|
||||
"//lst[@name='pine apple good ness']/arr[@name='suggestion']/str[1]='pineapplegoodness'"
|
||||
"//lst[@name='pine apple good ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='pineapplegoodness'"
|
||||
);
|
||||
}
|
||||
@Test
|
||||
|
|
|
@ -1245,7 +1245,7 @@
|
|||
collations (re-written queries) can include a combination of
|
||||
corrections from both spellcheckers -->
|
||||
<str name="spellcheck.dictionary">default</str>
|
||||
<!--str name="spellcheck.dictionary">wordbreak</str-->
|
||||
<str name="spellcheck.dictionary">wordbreak</str>
|
||||
<str name="spellcheck">on</str>
|
||||
<str name="spellcheck.extendedResults">true</str>
|
||||
<str name="spellcheck.count">10</str>
|
||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.solr.common.params.CommonParams;
|
|||
import org.apache.solr.common.params.SpellingParams;
|
||||
import org.apache.solr.util.ExternalPaths;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -48,10 +47,11 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
|
|||
|
||||
static String field = "name";
|
||||
|
||||
@Ignore
|
||||
@Test
|
||||
public void testSpellCheckResponse() throws Exception {
|
||||
getSolrServer();
|
||||
server.deleteByQuery("*:*");
|
||||
server.commit(true, true);
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.setField("id", "111");
|
||||
doc.setField(field, "Samsung");
|
||||
|
@ -62,7 +62,6 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
|
|||
query.set(CommonParams.QT, "/spell");
|
||||
query.set("spellcheck", true);
|
||||
query.set(SpellingParams.SPELLCHECK_Q, "samsang");
|
||||
query.set(SpellingParams.SPELLCHECK_BUILD, true);
|
||||
QueryRequest request = new QueryRequest(query);
|
||||
SpellCheckResponse response = request.process(server).getSpellCheckResponse();
|
||||
Assert.assertEquals("samsung", response.getFirstSuggestion("samsang"));
|
||||
|
@ -71,17 +70,18 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
|
|||
@Test
|
||||
public void testSpellCheckResponse_Extended() throws Exception {
|
||||
getSolrServer();
|
||||
server.deleteByQuery("*:*");
|
||||
server.commit(true, true);
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.setField("id", "111");
|
||||
doc.setField(field, "Samsung");
|
||||
server.add(doc);
|
||||
server.commit(true, true);
|
||||
|
||||
SolrQuery query = new SolrQuery("name:samsang");
|
||||
SolrQuery query = new SolrQuery("*:*");
|
||||
query.set(CommonParams.QT, "/spell");
|
||||
query.set("spellcheck", true);
|
||||
//query.set(SpellingParams.SPELLCHECK_Q, "samsang");
|
||||
query.set(SpellingParams.SPELLCHECK_BUILD, true);
|
||||
query.set(SpellingParams.SPELLCHECK_Q, "samsang");
|
||||
query.set(SpellingParams.SPELLCHECK_EXTENDED_RESULTS, true);
|
||||
QueryRequest request = new QueryRequest(query);
|
||||
SpellCheckResponse response = request.process(server).getSpellCheckResponse();
|
||||
|
@ -109,6 +109,8 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
|
|||
@Test
|
||||
public void testSpellCheckCollationResponse() throws Exception {
|
||||
getSolrServer();
|
||||
server.deleteByQuery("*:*");
|
||||
server.commit(true, true);
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.setField("id", "0");
|
||||
doc.setField("name", "faith hope and love");
|
||||
|
@ -135,7 +137,6 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
|
|||
SolrQuery query = new SolrQuery("name:(+fauth +home +loane)");
|
||||
query.set(CommonParams.QT, "/spell");
|
||||
query.set("spellcheck", true);
|
||||
query.set(SpellingParams.SPELLCHECK_BUILD, true);
|
||||
query.set(SpellingParams.SPELLCHECK_COUNT, 10);
|
||||
query.set(SpellingParams.SPELLCHECK_COLLATE, true);
|
||||
QueryRequest request = new QueryRequest(query);
|
||||
|
|
Loading…
Reference in New Issue