From b89ded1448bf86f213282dff4f85a9f0251d5ec1 Mon Sep 17 00:00:00 2001 From: Erick Erickson Date: Wed, 7 Dec 2011 14:42:16 +0000 Subject: [PATCH] Fix for SOLR-2509 (IndexOutOfBoundsException in Spellcheck Component/Collation) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1211456 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 6 ++ .../solr/spelling/SpellingQueryConverter.java | 6 +- .../component/SpellCheckComponentTest.java | 2 +- .../solr/spelling/SpellCheckCollatorTest.java | 55 ++++++++++++++++++- 4 files changed, 65 insertions(+), 4 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index e34a698ea2c..cc73fe185f1 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -286,6 +286,9 @@ Bug Fixes and added a merge() method to SolrSpellChecker instead. Previously if you extended SolrSpellChecker your spellchecker would not work in distributed fashion. (James Dyer via rmuir) +* SOLR-2509: StringIndexOutOfBoundsException in the spellchecker collate when the term contains + a hyphen. (Thomas Gambier caught the bug, Steffen Godskesen did the patch, via Erick Erickson) + Other Changes ---------------------- @@ -410,6 +413,9 @@ Bug Fixes * SOLR-2819: Improved speed of parsing hex entities in HTMLStripCharFilter (Bernhard Berger, hossman) +* SOLR-2509: StringIndexOutOfBoundsException in the spellchecker collate when the term contains + a hyphen. (Thomas Gambier caught the bug, Steffen Godskesen did the patch, via Erick Erickson) + Other Changes ---------------------- * SOLR-2922: Upgrade commons-io and commons-lang to 2.1 and 2.6, respectively. (koji) diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java b/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java index 07052fdf0a1..7b0af9273a4 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java +++ b/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java @@ -28,6 +28,7 @@ import java.util.regex.Pattern; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -111,12 +112,13 @@ public class SpellingQueryConverter extends QueryConverter { TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); + OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); stream.reset(); while (stream.incrementToken()) { Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); - token.setStartOffset(matcher.start()); - token.setEndOffset(matcher.end()); + token.setStartOffset(matcher.start() + offsetAtt.startOffset()); + token.setEndOffset(matcher.start() + offsetAtt.endOffset()); token.setFlags(flagsAtt.getFlags()); token.setType(typeAtt.type()); token.setPayload(payloadAtt.getPayload()); diff --git a/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java index eda48e9c7b5..1e04d62c582 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java @@ -114,7 +114,7 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 { assertJQ(req("json.nl","map", "qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documemtsss broens", SpellCheckComponent.SPELLCHECK_COLLATE, "true") ,"/spellcheck/suggestions/collation=='document brown'" ); - assertJQ(req("json.nl","map", "qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","pixma-a-b-c-d-e-f-g", SpellCheckComponent.SPELLCHECK_COLLATE, "true") + assertJQ(req("json.nl","map", "qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","pixma", SpellCheckComponent.SPELLCHECK_COLLATE, "true") ,"/spellcheck/suggestions/collation=='pixmaa'" ); } diff --git a/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java b/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java index e4a43342110..9383c534c84 100644 --- a/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java @@ -46,9 +46,62 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 { assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer"))); assertNull(h.validateUpdate(adoc("id", "4", "lowerfilt", "fat of homer"))); assertNull(h.validateUpdate(adoc("id", "5", "lowerfilt1", "peace"))); + assertNull(h.validateUpdate(adoc("id", "6", "lowerfilt", "hyphenated word"))); assertNull(h.validateUpdate(commit())); } - + + @Test + public void testCollationWithHypens() throws Exception + { + SolrCore core = h.getCore(); + SearchComponent speller = core.getSearchComponent("spellcheck"); + assertTrue("speller is null and it shouldn't be", speller != null); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add(SpellCheckComponent.COMPONENT_NAME, "true"); + params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true"); + params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10"); + params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true"); + + params.add(CommonParams.Q, "lowerfilt:(hypenated-wotd)"); + { + SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH"); + SolrQueryResponse rsp = new SolrQueryResponse(); + rsp.add("responseHeader", new SimpleOrderedMap()); + SolrQueryRequest req = new LocalSolrQueryRequest(core, params); + handler.handleRequest(req, rsp); + req.close(); + NamedList values = rsp.getValues(); + NamedList spellCheck = (NamedList) values.get("spellcheck"); + NamedList suggestions = (NamedList) spellCheck.get("suggestions"); + List collations = suggestions.getAll("collation"); + assertTrue(collations.size()==1); + String collation = collations.iterator().next(); + assertTrue("Incorrect collation: " + collation,"lowerfilt:(hyphenated-word)".equals(collation)); + } + + params.remove(CommonParams.Q); + params.add("defType", "dismax"); + params.add("qf", "lowerfilt"); + params.add(CommonParams.Q, "hypenated-wotd"); + { + SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH"); + SolrQueryResponse rsp = new SolrQueryResponse(); + rsp.add("responseHeader", new SimpleOrderedMap()); + SolrQueryRequest req = new LocalSolrQueryRequest(core, params); + handler.handleRequest(req, rsp); + req.close(); + NamedList values = rsp.getValues(); + NamedList spellCheck = (NamedList) values.get("spellcheck"); + NamedList suggestions = (NamedList) spellCheck.get("suggestions"); + List collations = suggestions.getAll("collation"); + assertTrue(collations.size()==1); + String collation = collations.iterator().next(); + assertTrue("Incorrect collation: " + collation,"hyphenated-word".equals(collation)); + } + + } + @Test public void testCollateWithFilter() throws Exception {