mirror of https://github.com/apache/lucene.git
SOLR-2462: use of spellcheck.collate could result in extremely high memory usage
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1132729 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
63083944b1
commit
bdee0a9764
|
@ -277,6 +277,11 @@ Bug Fixes
|
||||||
English-specific fieldTypes (Jan Høydahl, hossman, Robert Muir,
|
English-specific fieldTypes (Jan Høydahl, hossman, Robert Muir,
|
||||||
yonik, Mike McCandless)
|
yonik, Mike McCandless)
|
||||||
|
|
||||||
|
* SOLR-2462: Fix extremely high memory usage problems with spellcheck.collate.
|
||||||
|
Separately, an additional spellcheck.maxCollationEvaluations (default=10000)
|
||||||
|
parameter is added to avoid excessive CPU time in extreme cases (e.g. long
|
||||||
|
queries with many misspelled words). (James Dyer via rmuir)
|
||||||
|
|
||||||
================== 3.2.0 ==================
|
================== 3.2.0 ==================
|
||||||
Versions of Major Components
|
Versions of Major Components
|
||||||
---------------------
|
---------------------
|
||||||
|
|
|
@ -96,6 +96,14 @@ public interface SpellingParams {
|
||||||
* </p>
|
* </p>
|
||||||
*/
|
*/
|
||||||
public static final String SPELLCHECK_MAX_COLLATION_TRIES = SPELLCHECK_PREFIX + "maxCollationTries";
|
public static final String SPELLCHECK_MAX_COLLATION_TRIES = SPELLCHECK_PREFIX + "maxCollationTries";
|
||||||
|
/**
|
||||||
|
* <p>
|
||||||
|
* The maximum number of word correction combinations to rank and evaluate prior to deciding which collation
|
||||||
|
* candidates to test against the index. This is a performance safety-net in cases a user enters a query with
|
||||||
|
* many misspelled words. The default is 10,000 combinations.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
public static final String SPELLCHECK_MAX_COLLATION_EVALUATIONS = SPELLCHECK_PREFIX + "maxCollationEvaluations";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
|
|
|
@ -172,11 +172,12 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
NamedList response) {
|
NamedList response) {
|
||||||
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
|
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
|
||||||
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
|
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
|
||||||
|
int maxCollationEvaluations = params.getInt(SPELLCHECK_MAX_COLLATION_EVALUATIONS, 10000);
|
||||||
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
|
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
|
||||||
boolean shard = params.getBool(ShardParams.IS_SHARD, false);
|
boolean shard = params.getBool(ShardParams.IS_SHARD, false);
|
||||||
|
|
||||||
SpellCheckCollator collator = new SpellCheckCollator();
|
SpellCheckCollator collator = new SpellCheckCollator();
|
||||||
List<SpellCheckCollation> collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries);
|
List<SpellCheckCollation> collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries, maxCollationEvaluations);
|
||||||
//by sorting here we guarantee a non-distributed request returns all
|
//by sorting here we guarantee a non-distributed request returns all
|
||||||
//results in the same order as a distributed request would,
|
//results in the same order as a distributed request would,
|
||||||
//even in cases when the internal rank is the same.
|
//even in cases when the internal rank is the same.
|
||||||
|
|
|
@ -17,12 +17,13 @@ package org.apache.solr.spelling;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Arrays;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.PriorityQueue;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
|
|
||||||
|
@ -38,8 +39,7 @@ import org.apache.lucene.analysis.Token;
|
||||||
*/
|
*/
|
||||||
public class PossibilityIterator implements Iterator<RankedSpellPossibility> {
|
public class PossibilityIterator implements Iterator<RankedSpellPossibility> {
|
||||||
private List<List<SpellCheckCorrection>> possibilityList = new ArrayList<List<SpellCheckCorrection>>();
|
private List<List<SpellCheckCorrection>> possibilityList = new ArrayList<List<SpellCheckCorrection>>();
|
||||||
private List<RankedSpellPossibility> rankedPossibilityList = new ArrayList<RankedSpellPossibility>();
|
private Iterator<RankedSpellPossibility> rankedPossibilityIterator = null;
|
||||||
private Iterator<RankedSpellPossibility> rankedPossibilityIterator;
|
|
||||||
private int correctionIndex[];
|
private int correctionIndex[];
|
||||||
private boolean done = false;
|
private boolean done = false;
|
||||||
|
|
||||||
|
@ -56,7 +56,7 @@ public class PossibilityIterator implements Iterator<RankedSpellPossibility> {
|
||||||
*
|
*
|
||||||
* @param suggestions
|
* @param suggestions
|
||||||
*/
|
*/
|
||||||
public PossibilityIterator(Map<Token, LinkedHashMap<String, Integer>> suggestions) {
|
public PossibilityIterator(Map<Token, LinkedHashMap<String, Integer>> suggestions, int maximumRequiredSuggestions, int maxEvaluations) {
|
||||||
for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) {
|
for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) {
|
||||||
Token token = entry.getKey();
|
Token token = entry.getKey();
|
||||||
List<SpellCheckCorrection> possibleCorrections = new ArrayList<SpellCheckCorrection>();
|
List<SpellCheckCorrection> possibleCorrections = new ArrayList<SpellCheckCorrection>();
|
||||||
|
@ -85,11 +85,26 @@ public class PossibilityIterator implements Iterator<RankedSpellPossibility> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while (internalHasNext()) {
|
long count = 0;
|
||||||
rankedPossibilityList.add(internalNext());
|
PriorityQueue<RankedSpellPossibility> rankedPossibilities = new PriorityQueue<RankedSpellPossibility>();
|
||||||
|
while (count < maxEvaluations && internalHasNext()) {
|
||||||
|
RankedSpellPossibility rsp = internalNext();
|
||||||
|
count++;
|
||||||
|
|
||||||
|
if(rankedPossibilities.size() >= maximumRequiredSuggestions && rsp.getRank() >= rankedPossibilities.peek().getRank()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
rankedPossibilities.offer(rsp);
|
||||||
|
if(rankedPossibilities.size() > maximumRequiredSuggestions) {
|
||||||
|
rankedPossibilities.poll();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Collections.sort(rankedPossibilityList);
|
|
||||||
rankedPossibilityIterator = rankedPossibilityList.iterator();
|
RankedSpellPossibility[] rpArr = new RankedSpellPossibility[rankedPossibilities.size()];
|
||||||
|
for(int i=rankedPossibilities.size() - 1 ; i>=0 ; i--) {
|
||||||
|
rpArr[i] = rankedPossibilities.remove();
|
||||||
|
}
|
||||||
|
rankedPossibilityIterator = Arrays.asList(rpArr).iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean internalHasNext() {
|
private boolean internalHasNext() {
|
||||||
|
|
|
@ -22,8 +22,9 @@ public class RankedSpellPossibility implements Comparable<RankedSpellPossibility
|
||||||
private List<SpellCheckCorrection> corrections;
|
private List<SpellCheckCorrection> corrections;
|
||||||
private int rank;
|
private int rank;
|
||||||
|
|
||||||
|
//Rank poorer suggestions ahead of better ones for use with a PriorityQueue
|
||||||
public int compareTo(RankedSpellPossibility rcl) {
|
public int compareTo(RankedSpellPossibility rcl) {
|
||||||
return new Integer(rank).compareTo(rcl.rank);
|
return new Integer(rcl.rank).compareTo(rank);
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<SpellCheckCorrection> getCorrections() {
|
public List<SpellCheckCorrection> getCorrections() {
|
||||||
|
@ -41,4 +42,17 @@ public class RankedSpellPossibility implements Comparable<RankedSpellPossibility
|
||||||
public void setRank(int rank) {
|
public void setRank(int rank) {
|
||||||
this.rank = rank;
|
this.rank = rank;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("rank=").append(rank);
|
||||||
|
if(corrections != null) {
|
||||||
|
for(SpellCheckCorrection corr : corrections) {
|
||||||
|
sb.append(" ");
|
||||||
|
sb.append(corr.getOriginal()).append(">").append(corr.getCorrection()).append(" (").append(corr.getNumberOfOccurences()).append(")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,7 @@ public class SpellCheckCollator {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(SpellCheckCollator.class);
|
private static final Logger LOG = LoggerFactory.getLogger(SpellCheckCollator.class);
|
||||||
|
|
||||||
public List<SpellCheckCollation> collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse,
|
public List<SpellCheckCollation> collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse,
|
||||||
int maxCollations, int maxTries) {
|
int maxCollations, int maxTries, int maxEvaluations) {
|
||||||
List<SpellCheckCollation> collations = new ArrayList<SpellCheckCollation>();
|
List<SpellCheckCollation> collations = new ArrayList<SpellCheckCollation>();
|
||||||
|
|
||||||
QueryComponent queryComponent = null;
|
QueryComponent queryComponent = null;
|
||||||
|
@ -62,7 +62,7 @@ public class SpellCheckCollator {
|
||||||
|
|
||||||
int tryNo = 0;
|
int tryNo = 0;
|
||||||
int collNo = 0;
|
int collNo = 0;
|
||||||
PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions());
|
PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxTries, maxEvaluations);
|
||||||
while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {
|
while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {
|
||||||
|
|
||||||
RankedSpellPossibility possibility = possibilityIter.next();
|
RankedSpellPossibility possibility = possibilityIter.next();
|
||||||
|
|
|
@ -143,7 +143,7 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
|
||||||
|
|
||||||
//Test Expanded Collation Results
|
//Test Expanded Collation Results
|
||||||
query.set(SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, true);
|
query.set(SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, true);
|
||||||
query.set(SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, 5);
|
query.set(SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, 10);
|
||||||
query.set(SpellingParams.SPELLCHECK_MAX_COLLATIONS, 2);
|
query.set(SpellingParams.SPELLCHECK_MAX_COLLATIONS, 2);
|
||||||
request = new QueryRequest(query);
|
request = new QueryRequest(query);
|
||||||
response = request.process(server).getSpellCheckResponse();
|
response = request.process(server).getSpellCheckResponse();
|
||||||
|
|
|
@ -60,8 +60,8 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
|
||||||
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
|
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
|
||||||
params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");
|
params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");
|
||||||
params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
|
params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
|
||||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "5");
|
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10");
|
||||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "2");
|
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10");
|
||||||
params.add(CommonParams.Q, "lowerfilt:(+fauth +home +loane)");
|
params.add(CommonParams.Q, "lowerfilt:(+fauth +home +loane)");
|
||||||
params.add(CommonParams.FQ, "NOT(id:1)");
|
params.add(CommonParams.FQ, "NOT(id:1)");
|
||||||
|
|
||||||
|
@ -77,8 +77,10 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
|
||||||
NamedList spellCheck = (NamedList) values.get("spellcheck");
|
NamedList spellCheck = (NamedList) values.get("spellcheck");
|
||||||
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
|
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
|
||||||
List<String> collations = suggestions.getAll("collation");
|
List<String> collations = suggestions.getAll("collation");
|
||||||
assertTrue(collations.size() == 1);
|
assertTrue(collations.size() > 0);
|
||||||
assertTrue(collations.get(0).equals("lowerfilt:(+faith +hope +love)"));
|
for(String collation : collations) {
|
||||||
|
assertTrue(!collation.equals("lowerfilt:(+faith +hope +loaves)"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -180,7 +182,7 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
|
||||||
// combination exists.
|
// combination exists.
|
||||||
params.remove(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES);
|
params.remove(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES);
|
||||||
params.remove(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS);
|
params.remove(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS);
|
||||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "5");
|
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10");
|
||||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "2");
|
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "2");
|
||||||
handler = core.getRequestHandler("spellCheckCompRH");
|
handler = core.getRequestHandler("spellCheckCompRH");
|
||||||
rsp = new SolrQueryResponse();
|
rsp = new SolrQueryResponse();
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.junit.Test;
|
||||||
public class SpellPossibilityIteratorTest extends SolrTestCaseJ4 {
|
public class SpellPossibilityIteratorTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
private static Map<Token, LinkedHashMap<String, Integer>> suggestions = new LinkedHashMap<Token, LinkedHashMap<String, Integer>>();
|
private static Map<Token, LinkedHashMap<String, Integer>> suggestions = new LinkedHashMap<Token, LinkedHashMap<String, Integer>>();
|
||||||
|
private static Map<Token, LinkedHashMap<String, Integer>> lotsaSuggestions = new LinkedHashMap<Token, LinkedHashMap<String, Integer>>();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Before
|
@Before
|
||||||
|
@ -72,21 +73,57 @@ public class SpellPossibilityIteratorTest extends SolrTestCaseJ4 {
|
||||||
suggestions.put(new Token("AYE", 0, 2), AYE);
|
suggestions.put(new Token("AYE", 0, 2), AYE);
|
||||||
suggestions.put(new Token("BEE", 0, 2), BEE);
|
suggestions.put(new Token("BEE", 0, 2), BEE);
|
||||||
suggestions.put(new Token("CEE", 0, 2), CEE);
|
suggestions.put(new Token("CEE", 0, 2), CEE);
|
||||||
|
|
||||||
|
lotsaSuggestions.put(new Token("AYE", 0, 2), AYE);
|
||||||
|
lotsaSuggestions.put(new Token("BEE", 0, 2), BEE);
|
||||||
|
lotsaSuggestions.put(new Token("CEE", 0, 2), CEE);
|
||||||
|
|
||||||
|
lotsaSuggestions.put(new Token("AYE1", 0, 3), AYE);
|
||||||
|
lotsaSuggestions.put(new Token("BEE1", 0, 3), BEE);
|
||||||
|
lotsaSuggestions.put(new Token("CEE1", 0, 3), CEE);
|
||||||
|
|
||||||
|
lotsaSuggestions.put(new Token("AYE2", 0, 3), AYE);
|
||||||
|
lotsaSuggestions.put(new Token("BEE2", 0, 3), BEE);
|
||||||
|
lotsaSuggestions.put(new Token("CEE2", 0, 3), CEE);
|
||||||
|
|
||||||
|
lotsaSuggestions.put(new Token("AYE3", 0, 3), AYE);
|
||||||
|
lotsaSuggestions.put(new Token("BEE3", 0, 3), BEE);
|
||||||
|
lotsaSuggestions.put(new Token("CEE3", 0, 3), CEE);
|
||||||
|
|
||||||
|
lotsaSuggestions.put(new Token("AYE4", 0, 3), AYE);
|
||||||
|
lotsaSuggestions.put(new Token("BEE4", 0, 3), BEE);
|
||||||
|
lotsaSuggestions.put(new Token("CEE4", 0, 3), CEE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testScalability() throws Exception {
|
||||||
|
PossibilityIterator iter = new PossibilityIterator(lotsaSuggestions, 1000, 10000);
|
||||||
|
int count = 0;
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
RankedSpellPossibility rsp = iter.next();
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
assertTrue(count==1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSpellPossibilityIterator() throws Exception {
|
public void testSpellPossibilityIterator() throws Exception {
|
||||||
PossibilityIterator iter = new PossibilityIterator(suggestions);
|
PossibilityIterator iter = new PossibilityIterator(suggestions, 1000, 10000);
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
|
|
||||||
iter.next();
|
RankedSpellPossibility rsp = iter.next();
|
||||||
|
if(count==0) {
|
||||||
|
assertTrue("I".equals(rsp.getCorrections().get(0).getCorrection()));
|
||||||
|
assertTrue("alpha".equals(rsp.getCorrections().get(1).getCorrection()));
|
||||||
|
assertTrue("one".equals(rsp.getCorrections().get(2).getCorrection()));
|
||||||
|
}
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
assertTrue(("Three maps (8*9*10) should return 720 iterations but instead returned " + count), count == 720);
|
assertTrue(("Three maps (8*9*10) should return 720 iterations but instead returned " + count), count == 720);
|
||||||
|
|
||||||
suggestions.remove(new Token("CEE", 0, 2));
|
suggestions.remove(new Token("CEE", 0, 2));
|
||||||
iter = new PossibilityIterator(suggestions);
|
iter = new PossibilityIterator(suggestions, 100, 10000);
|
||||||
count = 0;
|
count = 0;
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
iter.next();
|
iter.next();
|
||||||
|
@ -95,16 +132,16 @@ public class SpellPossibilityIteratorTest extends SolrTestCaseJ4 {
|
||||||
assertTrue(("Two maps (8*9) should return 72 iterations but instead returned " + count), count == 72);
|
assertTrue(("Two maps (8*9) should return 72 iterations but instead returned " + count), count == 72);
|
||||||
|
|
||||||
suggestions.remove(new Token("BEE", 0, 2));
|
suggestions.remove(new Token("BEE", 0, 2));
|
||||||
iter = new PossibilityIterator(suggestions);
|
iter = new PossibilityIterator(suggestions, 5, 10000);
|
||||||
count = 0;
|
count = 0;
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
iter.next();
|
iter.next();
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
assertTrue(("One map of 8 should return 8 iterations but instead returned " + count), count == 8);
|
assertTrue(("We requested 5 suggestions but got " + count), count == 5);
|
||||||
|
|
||||||
suggestions.remove(new Token("AYE", 0, 2));
|
suggestions.remove(new Token("AYE", 0, 2));
|
||||||
iter = new PossibilityIterator(suggestions);
|
iter = new PossibilityIterator(suggestions, Integer.MAX_VALUE, 10000);
|
||||||
count = 0;
|
count = 0;
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
iter.next();
|
iter.next();
|
||||||
|
|
Loading…
Reference in New Issue