SOLR-1630: fix minor collation issue

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@987509 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2010-08-20 13:56:35 +00:00
parent c31c4b63d1
commit 85549f7e7c
5 changed files with 41 additions and 2 deletions

View File

@ -412,7 +412,10 @@ Bug Fixes
* SOLR-2036: Avoid expensive fieldCache ram estimation for the
admin stats page. (yonik)
* SOLR-2047: ReplicationHandler should accept bool type for enable flag. (koji)
* SOLR-2047: ReplicationHandler should accept bool type for enable flag. (koji)
* SOLR-1630: Fix spell checking collation issue related to token positions (rmuir, gsingers)
Other Changes
----------------------

View File

@ -450,6 +450,8 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
for (Iterator<Map.Entry<Token, String>> bestIter = best.entrySet().iterator(); bestIter.hasNext();) {
Map.Entry<Token, String> entry = bestIter.next();
Token tok = entry.getKey();
// we are replacing the query in order, but injected terms might cause illegal offsets due to previous replacements.
if (tok.getPositionIncrement() == 0) continue;
collation.replace(tok.startOffset() + offset,
tok.endOffset() + offset, entry.getValue());
offset += entry.getValue().length() - (tok.endOffset() - tok.startOffset());

View File

@ -57,6 +57,7 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
assertNull(h.validateUpdate(adoc("id", "6", "lowerfilt", "boue")));
assertNull(h.validateUpdate(adoc("id", "7", "lowerfilt", "glue")));
assertNull(h.validateUpdate(adoc("id", "8", "lowerfilt", "blee")));
assertNull(h.validateUpdate(adoc("id", "9", "lowerfilt", "pixmaa")));
assertNull(h.validateUpdate(commit()));
}
@ -235,6 +236,30 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
collation = (String) suggestions.get("collation");
assertEquals("document brown",collation);
}
@Test
public void testCollate2() throws Exception {
SolrCore core = h.getCore();
SearchComponent speller = core.getSearchComponent("spellcheck");
assertTrue("speller is null and it shouldn't be", speller != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CommonParams.QT, "spellCheckCompRH");
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
params.add(CommonParams.Q, "pixma-a-b-c-d-e-f-g");
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
SolrQueryResponse rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
NamedList values = rsp.getValues();
NamedList spellCheck = (NamedList) values.get("spellcheck");
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
String collation = (String) suggestions.get("collation");
assertEquals("pixmaa", collation);
}
@Test
public void testCorrectSpelling() throws Exception {

View File

@ -197,6 +197,14 @@
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerpunctfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="patternreplacefilt" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/>

View File

@ -333,7 +333,8 @@
</requestHandler>
<searchComponent name="spellcheck" class="org.apache.solr.handler.component.SpellCheckComponent">
<str name="queryAnalyzerFieldType">lowerfilt</str>
<!-- This is slightly different from the field value so we can test dealing with token offset changes -->
<str name="queryAnalyzerFieldType">lowerpunctfilt</str>
<lst name="spellchecker">
<str name="name">default</str>