Fix for SOLR-2509 (IndexOutOfBoundsException in Spellcheck Component/Collation)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1211456 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erick Erickson 2011-12-07 14:42:16 +00:00
parent c0f0e06213
commit b89ded1448
4 changed files with 65 additions and 4 deletions

View File

@ -286,6 +286,9 @@ Bug Fixes
and added a merge() method to SolrSpellChecker instead. Previously if you extended SolrSpellChecker and added a merge() method to SolrSpellChecker instead. Previously if you extended SolrSpellChecker
your spellchecker would not work in distributed fashion. (James Dyer via rmuir) your spellchecker would not work in distributed fashion. (James Dyer via rmuir)
* SOLR-2509: StringIndexOutOfBoundsException in the spellchecker collate when the term contains
a hyphen. (Thomas Gambier caught the bug, Steffen Godskesen did the patch, via Erick Erickson)
Other Changes Other Changes
---------------------- ----------------------
@ -410,6 +413,9 @@ Bug Fixes
* SOLR-2819: Improved speed of parsing hex entities in HTMLStripCharFilter * SOLR-2819: Improved speed of parsing hex entities in HTMLStripCharFilter
(Bernhard Berger, hossman) (Bernhard Berger, hossman)
* SOLR-2509: StringIndexOutOfBoundsException in the spellchecker collate when the term contains
a hyphen. (Thomas Gambier caught the bug, Steffen Godskesen did the patch, via Erick Erickson)
Other Changes Other Changes
---------------------- ----------------------
* SOLR-2922: Upgrade commons-io and commons-lang to 2.1 and 2.6, respectively. (koji) * SOLR-2922: Upgrade commons-io and commons-lang to 2.1 and 2.6, respectively. (koji)

View File

@ -28,6 +28,7 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -111,12 +112,13 @@ public class SpellingQueryConverter extends QueryConverter {
TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class); TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class); PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
stream.reset(); stream.reset();
while (stream.incrementToken()) { while (stream.incrementToken()) {
Token token = new Token(); Token token = new Token();
token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setStartOffset(matcher.start()); token.setStartOffset(matcher.start() + offsetAtt.startOffset());
token.setEndOffset(matcher.end()); token.setEndOffset(matcher.start() + offsetAtt.endOffset());
token.setFlags(flagsAtt.getFlags()); token.setFlags(flagsAtt.getFlags());
token.setType(typeAtt.type()); token.setType(typeAtt.type());
token.setPayload(payloadAtt.getPayload()); token.setPayload(payloadAtt.getPayload());

View File

@ -114,7 +114,7 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
assertJQ(req("json.nl","map", "qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documemtsss broens", SpellCheckComponent.SPELLCHECK_COLLATE, "true") assertJQ(req("json.nl","map", "qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documemtsss broens", SpellCheckComponent.SPELLCHECK_COLLATE, "true")
,"/spellcheck/suggestions/collation=='document brown'" ,"/spellcheck/suggestions/collation=='document brown'"
); );
assertJQ(req("json.nl","map", "qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","pixma-a-b-c-d-e-f-g", SpellCheckComponent.SPELLCHECK_COLLATE, "true") assertJQ(req("json.nl","map", "qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","pixma", SpellCheckComponent.SPELLCHECK_COLLATE, "true")
,"/spellcheck/suggestions/collation=='pixmaa'" ,"/spellcheck/suggestions/collation=='pixmaa'"
); );
} }

View File

@ -46,9 +46,62 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer"))); assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer")));
assertNull(h.validateUpdate(adoc("id", "4", "lowerfilt", "fat of homer"))); assertNull(h.validateUpdate(adoc("id", "4", "lowerfilt", "fat of homer")));
assertNull(h.validateUpdate(adoc("id", "5", "lowerfilt1", "peace"))); assertNull(h.validateUpdate(adoc("id", "5", "lowerfilt1", "peace")));
assertNull(h.validateUpdate(adoc("id", "6", "lowerfilt", "hyphenated word")));
assertNull(h.validateUpdate(commit())); assertNull(h.validateUpdate(commit()));
} }
@Test
public void testCollationWithHypens() throws Exception
{
SolrCore core = h.getCore();
SearchComponent speller = core.getSearchComponent("spellcheck");
assertTrue("speller is null and it shouldn't be", speller != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");
params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
params.add(CommonParams.Q, "lowerfilt:(hypenated-wotd)");
{
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
SolrQueryResponse rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
SolrQueryRequest req = new LocalSolrQueryRequest(core, params);
handler.handleRequest(req, rsp);
req.close();
NamedList values = rsp.getValues();
NamedList spellCheck = (NamedList) values.get("spellcheck");
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
List<String> collations = suggestions.getAll("collation");
assertTrue(collations.size()==1);
String collation = collations.iterator().next();
assertTrue("Incorrect collation: " + collation,"lowerfilt:(hyphenated-word)".equals(collation));
}
params.remove(CommonParams.Q);
params.add("defType", "dismax");
params.add("qf", "lowerfilt");
params.add(CommonParams.Q, "hypenated-wotd");
{
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
SolrQueryResponse rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
SolrQueryRequest req = new LocalSolrQueryRequest(core, params);
handler.handleRequest(req, rsp);
req.close();
NamedList values = rsp.getValues();
NamedList spellCheck = (NamedList) values.get("spellcheck");
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
List<String> collations = suggestions.getAll("collation");
assertTrue(collations.size()==1);
String collation = collations.iterator().next();
assertTrue("Incorrect collation: " + collation,"hyphenated-word".equals(collation));
}
}
@Test @Test
public void testCollateWithFilter() throws Exception public void testCollateWithFilter() throws Exception
{ {