SOLR-1630: fix minor collation issue

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@987509 13f79535-47bb-0310-9956-ffa450edef68
2010-08-20 13:56:35 +00:00 · 2010-08-20 13:56:35 +00:00 · 85549f7e7c
parent c31c4b63d1
commit 85549f7e7c
5 changed files with 41 additions and 2 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -412,7 +412,10 @@ Bug Fixes
 * SOLR-2036: Avoid expensive fieldCache ram estimation for the
  admin stats page. (yonik)

-* SOLR-2047: ReplicationHandler should accept bool type for enable flag. (koji)  
+* SOLR-2047: ReplicationHandler should accept bool type for enable flag. (koji)
+
+* SOLR-1630: Fix spell checking collation issue related to token positions (rmuir, gsingers) 
+

 Other Changes
 ----------------------
--- a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
+++ b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
@ -450,6 +450,8 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
      for (Iterator<Map.Entry<Token, String>> bestIter = best.entrySet().iterator(); bestIter.hasNext();) {
        Map.Entry<Token, String> entry = bestIter.next();
        Token tok = entry.getKey();
+        // we are replacing the query in order, but injected terms might cause illegal offsets due to previous replacements.
+        if (tok.getPositionIncrement() == 0) continue;
        collation.replace(tok.startOffset() + offset, 
          tok.endOffset() + offset, entry.getValue());
        offset += entry.getValue().length() - (tok.endOffset() - tok.startOffset());
--- a/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
+++ b/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
@ -57,6 +57,7 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
    assertNull(h.validateUpdate(adoc("id", "6", "lowerfilt", "boue")));
    assertNull(h.validateUpdate(adoc("id", "7", "lowerfilt", "glue")));
    assertNull(h.validateUpdate(adoc("id", "8", "lowerfilt", "blee")));
+    assertNull(h.validateUpdate(adoc("id", "9", "lowerfilt", "pixmaa")));
    assertNull(h.validateUpdate(commit()));
  }
  
@ -235,6 +236,30 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
    collation = (String) suggestions.get("collation");
    assertEquals("document brown",collation);
  }
+  
+  @Test
+  public void testCollate2() throws Exception {
+    SolrCore core = h.getCore();
+    SearchComponent speller = core.getSearchComponent("spellcheck");
+    assertTrue("speller is null and it shouldn't be", speller != null);
+
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.add(CommonParams.QT, "spellCheckCompRH");
+    params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
+    params.add(CommonParams.Q, "pixma-a-b-c-d-e-f-g");
+    params.add(SpellCheckComponent.COMPONENT_NAME, "true");
+    params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
+
+    SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
+    SolrQueryResponse rsp = new SolrQueryResponse();
+    rsp.add("responseHeader", new SimpleOrderedMap());
+    handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+    NamedList values = rsp.getValues();
+    NamedList spellCheck = (NamedList) values.get("spellcheck");
+    NamedList suggestions = (NamedList) spellCheck.get("suggestions");
+    String collation = (String) suggestions.get("collation");
+    assertEquals("pixmaa", collation);
+  }

  @Test
  public void testCorrectSpelling() throws Exception {
--- a/solr/src/test/test-files/solr/conf/schema.xml
+++ b/solr/src/test/test-files/solr/conf/schema.xml
@ -197,6 +197,14 @@
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldtype>
+    <fieldtype name="lowerpunctfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt" expand="true"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
    <fieldtype name="patternreplacefilt" class="solr.TextField">
      <analyzer type="index">
        <tokenizer class="solr.KeywordTokenizerFactory"/>
--- a/solr/src/test/test-files/solr/conf/solrconfig.xml
+++ b/solr/src/test/test-files/solr/conf/solrconfig.xml
@ -333,7 +333,8 @@
  </requestHandler>

  <searchComponent name="spellcheck" class="org.apache.solr.handler.component.SpellCheckComponent">
-    <str name="queryAnalyzerFieldType">lowerfilt</str>
+    <!-- This is slightly different from the field value so we can test dealing with token offset changes -->
+    <str name="queryAnalyzerFieldType">lowerpunctfilt</str>

    <lst name="spellchecker">
      <str name="name">default</str>