From ce20e7b133c7006ba8d0339f51d1c7f985b2cc07 Mon Sep 17 00:00:00 2001 From: James Dyer Date: Tue, 5 Jun 2012 17:44:02 +0000 Subject: [PATCH] SOLR-2993: fix test failures (SOLR-2993-fixes.patch) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1346489 13f79535-47bb-0310-9956-ffa450edef68 --- .../spelling/ConjunctionSolrSpellChecker.java | 9 ++++ .../spelling/WordBreakSolrSpellChecker.java | 35 ++++++++++++++- .../conf/solrconfig-spellcheckcomponent.xml | 1 + .../DistributedSpellCheckComponentTest.java | 2 +- .../WordBreakSolrSpellCheckerTest.java | 43 +++++++++++-------- solr/example/solr/conf/solrconfig.xml | 2 +- .../response/TestSpellCheckResponse.java | 15 ++++--- 7 files changed, 79 insertions(+), 28 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java index 9ce67213940..d952c39b729 100644 --- a/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java @@ -19,6 +19,7 @@ package org.apache.solr.spelling; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; @@ -135,8 +136,12 @@ public class ConjunctionSolrSpellChecker extends SolrSpellChecker { //TODO: This just interleaves the results. In the future, we might want to let users give each checker its // own weight and use that in combination to score & frequency to sort the results ? private SpellingResult mergeCheckers(SpellingResult[] results, int numSug) { + Map combinedTokenFrequency = new HashMap(); Map>> allSuggestions = new LinkedHashMap>>(); for(SpellingResult result : results) { + if(result.getTokenFrequency()!=null) { + combinedTokenFrequency.putAll(result.getTokenFrequency()); + } for(Map.Entry> entry : result.getSuggestions().entrySet()) { List> allForThisToken = allSuggestions.get(entry.getKey()); if(allForThisToken==null) { @@ -161,6 +166,10 @@ public class ConjunctionSolrSpellChecker extends SolrSpellChecker { anyData = true; Map.Entry corr = iter.next(); combinedResult.add(original, corr.getKey(), corr.getValue()); + Integer tokenFrequency = combinedTokenFrequency.get(original); + if(tokenFrequency!=null) { + combinedResult.addFrequency(original, tokenFrequency); + } if(++numberAdded==numSug) { break; } diff --git a/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java index 0cb099d7f24..7fbe4262f76 100644 --- a/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.regex.Pattern; import org.apache.lucene.analysis.Token; import org.apache.lucene.index.IndexReader; @@ -90,16 +91,22 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker { */ public static final String PARAM_MIN_SUGGESTION_FREQUENCY = "minSuggestionFreq"; + /** + *

+ * Specify a value on the "breakSugestionTieBreaker" parameter. + * The default is MAX_FREQ. + *

+ */ public enum BreakSuggestionTieBreaker { /** * See - * {@link BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY} + * {@link WordBreakSpellChecker.BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY} * # */ MAX_FREQ, /** * See - * {@link BreakSuggestionSortMethod#NUM_CHANGES_THEN_SUMMED_FREQUENCY} + * {@link WordBreakSpellChecker.BreakSuggestionSortMethod#NUM_CHANGES_THEN_SUMMED_FREQUENCY} */ SUM_FREQ }; @@ -108,6 +115,7 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker { private boolean combineWords = false; private boolean breakWords = false; private BreakSuggestionSortMethod sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY; + private static final Pattern spacePattern = Pattern.compile("\\s+"); @Override public String init(@SuppressWarnings("unchecked") NamedList config, @@ -127,6 +135,8 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker { throw new IllegalArgumentException("Invalid value for parameter " + PARAM_BREAK_SUGGESTION_TIE_BREAKER + " : " + bstb); } + } else { + sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY; } int mc = intParam(config, PARAM_MAX_CHANGES); if (mc > 0) { @@ -272,21 +282,27 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker { while (lastBreak != null || lastCombine != null) { if (lastBreak == null) { result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq); + result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token)); lastCombine = null; } else if (lastCombine == null) { result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq); + result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString()))); lastBreak = null; } else if (lastBreak.freq < lastCombine.freq) { result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq); + result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token)); lastCombine = null; } else if (lastCombine.freq < lastBreak.freq) { result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq); + result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString()))); lastBreak = null; } else if (breakCount >= combineCount) { result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq); + result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token)); lastCombine = null; } else { result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq); + result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString()))); lastBreak = null; } if (result.getSuggestions().size() > numSuggestions) { @@ -304,6 +320,21 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker { return result; } + private int getCombineFrequency(IndexReader ir, Token token) throws IOException { + String[] words = spacePattern.split(token.toString()); + int result = 0; + if(sortMethod==BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY) { + for(String word : words) { + result = Math.max(result, ir.docFreq(new Term(field, word))); + } + } else { + for(String word : words) { + result += ir.docFreq(new Term(field, word)); + } + } + return result; + } + @Override public void build(SolrCore core, SolrIndexSearcher searcher) { /* no-op */ diff --git a/solr/core/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml b/solr/core/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml index 40dea3f83ff..87bd0d42bb4 100644 --- a/solr/core/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml +++ b/solr/core/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml @@ -75,6 +75,7 @@ Config for testing spellcheck component lowerfilt true true + MAX_FREQ 10 diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java index 24b0dfbb247..fcf9df4d2fe 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java @@ -149,6 +149,6 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes query("q", "lowerfilt:(\"quote red fox\")", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ALTERNATIVE_TERM_COUNT, "5", SpellCheckComponent.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "10"); query("q", "lowerfilt:(\"rod fix\")", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ALTERNATIVE_TERM_COUNT, "5", SpellCheckComponent.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "10"); - //query("q", "lowerfilt:(+quock +redfox +jum +ped)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", reqHandlerWithWordbreak, "shards.qt", reqHandlerWithWordbreak, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true"); + query("q", "lowerfilt:(+quock +redfox +jum +ped)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", reqHandlerWithWordbreak, "shards.qt", reqHandlerWithWordbreak, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true"); } } diff --git a/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java b/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java index 4172e79998e..4335700c123 100644 --- a/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java @@ -139,41 +139,50 @@ public class WordBreakSolrSpellCheckerTest extends SolrTestCaseJ4 { "//lst[@name='paintable']/int[@name='numFound']=8", "//lst[@name='paintable']/int[@name='startOffset']=11", "//lst[@name='paintable']/int[@name='endOffset']=20", - "//lst[@name='paintable']/arr[@name='suggestion']/str[1]='printable'", //SolrSpellChecker result interleaved - "//lst[@name='paintable']/arr[@name='suggestion']/str[2]='paint able'", //1 op ; max doc freq=5 - "//lst[@name='paintable']/arr[@name='suggestion']/str[3]='pintable'", //SolrSpellChecker result interleaved - "//lst[@name='paintable']/arr[@name='suggestion']/str[4]='pain table'", //1 op ; max doc freq=4 - "//lst[@name='paintable']/arr[@name='suggestion']/str[5]='pointable'", //SolrSpellChecker result interleaved - "//lst[@name='paintable']/arr[@name='suggestion']/str[6]='pa in table'",//2 ops - "//lst[@name='paintable']/arr[@name='suggestion']/str[7]='plantable'", //SolrSpellChecker result interleaved - "//lst[@name='paintable']/arr[@name='suggestion']/str[8]='puntable'", //SolrSpellChecker result interleaved + "//lst[@name='paintable']/int[@name='origFreq']=0", + "//lst[@name='paintable']/arr[@name='suggestion']/lst[1]/str[@name='word']='printable'", //SolrSpellChecker result interleaved + "//lst[@name='paintable']/arr[@name='suggestion']/lst[1]/int[@name='freq']=3", + "//lst[@name='paintable']/arr[@name='suggestion']/lst[2]/str[@name='word']='paint able'", //1 op + "//lst[@name='paintable']/arr[@name='suggestion']/lst[2]/int[@name='freq']=5", + "//lst[@name='paintable']/arr[@name='suggestion']/lst[3]/str[@name='word']='pintable'", //SolrSpellChecker result interleaved + "//lst[@name='paintable']/arr[@name='suggestion']/lst[3]/int[@name='freq']=1", + "//lst[@name='paintable']/arr[@name='suggestion']/lst[4]/str[@name='word']='pain table'", //1 op + "//lst[@name='paintable']/arr[@name='suggestion']/lst[4]/int[@name='freq']=2", + "//lst[@name='paintable']/arr[@name='suggestion']/lst[5]/str[@name='word']='pointable'", //SolrSpellChecker result interleaved + "//lst[@name='paintable']/arr[@name='suggestion']/lst[5]/int[@name='freq']=1", + "//lst[@name='paintable']/arr[@name='suggestion']/lst[6]/str[@name='word']='pa in table'", //2 ops + "//lst[@name='paintable']/arr[@name='suggestion']/lst[6]/int[@name='freq']=7", + "//lst[@name='paintable']/arr[@name='suggestion']/lst[7]/str[@name='word']='plantable'", //SolrSpellChecker result interleaved + "//lst[@name='paintable']/arr[@name='suggestion']/lst[7]/int[@name='freq']=1", + "//lst[@name='paintable']/arr[@name='suggestion']/lst[8]/str[@name='word']='puntable'", //SolrSpellChecker result interleaved + "//lst[@name='paintable']/arr[@name='suggestion']/lst[8]/int[@name='freq']=1", "//lst[@name='pine']/int[@name='numFound']=2", "//lst[@name='pine']/int[@name='startOffset']=21", "//lst[@name='pine']/int[@name='endOffset']=25", - "//lst[@name='pine']/arr[@name='suggestion']/str[1]='line'", - "//lst[@name='pine']/arr[@name='suggestion']/str[2]='pi ne'", + "//lst[@name='pine']/arr[@name='suggestion']/lst[1]/str[@name='word']='line'", + "//lst[@name='pine']/arr[@name='suggestion']/lst[2]/str[@name='word']='pi ne'", "//lst[@name='apple']/int[@name='numFound']=1", - "//lst[@name='apple']/arr[@name='suggestion']/str[1]='ample'", + "//lst[@name='apple']/arr[@name='suggestion']/lst[1]/str[@name='word']='ample'", "//lst[@name='good']/int[@name='numFound']=1", - "//lst[@name='good']/arr[@name='suggestion']/str[1]='food'", + "//lst[@name='good']/arr[@name='suggestion']/lst[1]/str[@name='word']='food'", "//lst[@name='ness']/int[@name='numFound']=1", - "//lst[@name='ness']/arr[@name='suggestion']/str[1]='mess'", + "//lst[@name='ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='mess'", "//lst[@name='pine apple']/int[@name='numFound']=1", "//lst[@name='pine apple']/int[@name='startOffset']=21", "//lst[@name='pine apple']/int[@name='endOffset']=31", - "//lst[@name='pine apple']/arr[@name='suggestion']/str[1]='pineapple'", + "//lst[@name='pine apple']/arr[@name='suggestion']/lst[1]/str[@name='word']='pineapple'", "//lst[@name='paintable pine']/int[@name='numFound']=1", "//lst[@name='paintable pine']/int[@name='startOffset']=11", "//lst[@name='paintable pine']/int[@name='endOffset']=25", - "//lst[@name='paintable pine']/arr[@name='suggestion']/str[1]='paintablepine'", + "//lst[@name='paintable pine']/arr[@name='suggestion']/lst[1]/str[@name='word']='paintablepine'", "//lst[@name='good ness']/int[@name='numFound']=1", "//lst[@name='good ness']/int[@name='startOffset']=32", "//lst[@name='good ness']/int[@name='endOffset']=41", - "//lst[@name='good ness']/arr[@name='suggestion']/str[1]='goodness'", + "//lst[@name='good ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='goodness'", "//lst[@name='pine apple good ness']/int[@name='numFound']=1", "//lst[@name='pine apple good ness']/int[@name='startOffset']=21", "//lst[@name='pine apple good ness']/int[@name='endOffset']=41", - "//lst[@name='pine apple good ness']/arr[@name='suggestion']/str[1]='pineapplegoodness'" + "//lst[@name='pine apple good ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='pineapplegoodness'" ); } @Test diff --git a/solr/example/solr/conf/solrconfig.xml b/solr/example/solr/conf/solrconfig.xml index 762cddd928c..bd80e6ce088 100755 --- a/solr/example/solr/conf/solrconfig.xml +++ b/solr/example/solr/conf/solrconfig.xml @@ -1245,7 +1245,7 @@ collations (re-written queries) can include a combination of corrections from both spellcheckers --> default - + wordbreak on true 10 diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestSpellCheckResponse.java b/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestSpellCheckResponse.java index 01584b6603e..2216e81cb7c 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestSpellCheckResponse.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestSpellCheckResponse.java @@ -27,7 +27,6 @@ import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SpellingParams; import org.apache.solr.util.ExternalPaths; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; import java.util.List; @@ -48,10 +47,11 @@ public class TestSpellCheckResponse extends SolrJettyTestBase { static String field = "name"; - @Ignore @Test public void testSpellCheckResponse() throws Exception { getSolrServer(); + server.deleteByQuery("*:*"); + server.commit(true, true); SolrInputDocument doc = new SolrInputDocument(); doc.setField("id", "111"); doc.setField(field, "Samsung"); @@ -62,7 +62,6 @@ public class TestSpellCheckResponse extends SolrJettyTestBase { query.set(CommonParams.QT, "/spell"); query.set("spellcheck", true); query.set(SpellingParams.SPELLCHECK_Q, "samsang"); - query.set(SpellingParams.SPELLCHECK_BUILD, true); QueryRequest request = new QueryRequest(query); SpellCheckResponse response = request.process(server).getSpellCheckResponse(); Assert.assertEquals("samsung", response.getFirstSuggestion("samsang")); @@ -71,17 +70,18 @@ public class TestSpellCheckResponse extends SolrJettyTestBase { @Test public void testSpellCheckResponse_Extended() throws Exception { getSolrServer(); + server.deleteByQuery("*:*"); + server.commit(true, true); SolrInputDocument doc = new SolrInputDocument(); doc.setField("id", "111"); doc.setField(field, "Samsung"); server.add(doc); server.commit(true, true); - SolrQuery query = new SolrQuery("name:samsang"); + SolrQuery query = new SolrQuery("*:*"); query.set(CommonParams.QT, "/spell"); query.set("spellcheck", true); - //query.set(SpellingParams.SPELLCHECK_Q, "samsang"); - query.set(SpellingParams.SPELLCHECK_BUILD, true); + query.set(SpellingParams.SPELLCHECK_Q, "samsang"); query.set(SpellingParams.SPELLCHECK_EXTENDED_RESULTS, true); QueryRequest request = new QueryRequest(query); SpellCheckResponse response = request.process(server).getSpellCheckResponse(); @@ -109,6 +109,8 @@ public class TestSpellCheckResponse extends SolrJettyTestBase { @Test public void testSpellCheckCollationResponse() throws Exception { getSolrServer(); + server.deleteByQuery("*:*"); + server.commit(true, true); SolrInputDocument doc = new SolrInputDocument(); doc.setField("id", "0"); doc.setField("name", "faith hope and love"); @@ -135,7 +137,6 @@ public class TestSpellCheckResponse extends SolrJettyTestBase { SolrQuery query = new SolrQuery("name:(+fauth +home +loane)"); query.set(CommonParams.QT, "/spell"); query.set("spellcheck", true); - query.set(SpellingParams.SPELLCHECK_BUILD, true); query.set(SpellingParams.SPELLCHECK_COUNT, 10); query.set(SpellingParams.SPELLCHECK_COLLATE, true); QueryRequest request = new QueryRequest(query);