mirror of https://github.com/apache/lucene.git
SOLR-2762: FSTLookup returns one less suggestion than it should when onlyMorePopular=true
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1172006 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
96c4bb7074
commit
eee77d22ef
|
@ -284,7 +284,7 @@ public class FSTLookup extends Lookup {
|
|||
// traversals and sorting.
|
||||
return lookupSortedAlphabetically(key, num);
|
||||
} else {
|
||||
return lookupSortedByWeight(key, num, true);
|
||||
return lookupSortedByWeight(key, num, false);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// Should never happen, but anyway.
|
||||
|
@ -298,7 +298,7 @@ public class FSTLookup extends Lookup {
|
|||
*/
|
||||
private List<LookupResult> lookupSortedAlphabetically(String key, int num) throws IOException {
|
||||
// Greedily get num results from each weight branch.
|
||||
List<LookupResult> res = lookupSortedByWeight(key, num, false);
|
||||
List<LookupResult> res = lookupSortedByWeight(key, num, true);
|
||||
|
||||
// Sort and trim.
|
||||
Collections.sort(res, new Comparator<LookupResult>() {
|
||||
|
@ -316,11 +316,14 @@ public class FSTLookup extends Lookup {
|
|||
/**
|
||||
* Lookup suggestions sorted by weight (descending order).
|
||||
*
|
||||
* @param greedy If <code>true</code>, the routine terminates immediately when <code>num</code>
|
||||
* @param collectAll If <code>true</code>, the routine terminates immediately when <code>num</code>
|
||||
* suggestions have been collected. If <code>false</code>, it will collect suggestions from
|
||||
* all weight arcs (needed for {@link #lookupSortedAlphabetically}.
|
||||
*/
|
||||
private ArrayList<LookupResult> lookupSortedByWeight(String key, int num, boolean greedy) throws IOException {
|
||||
private ArrayList<LookupResult> lookupSortedByWeight(String key, int num, boolean collectAll) throws IOException {
|
||||
// Don't overallocate the results buffers. This also serves the purpose of allowing
|
||||
// the user of this class to request all matches using Integer.MAX_VALUE as the number
|
||||
// of results.
|
||||
final ArrayList<LookupResult> res = new ArrayList<LookupResult>(Math.min(10, num));
|
||||
final StringBuilder output = new StringBuilder(key);
|
||||
final int matchLength = key.length() - 1;
|
||||
|
@ -338,15 +341,18 @@ public class FSTLookup extends Lookup {
|
|||
// of the key prefix. The arc we're at is the last key's byte,
|
||||
// so we will collect it too.
|
||||
output.setLength(matchLength);
|
||||
if (collect(res, num, weight, output, arc) && greedy) {
|
||||
if (collect(res, num, weight, output, arc) && !collectAll) {
|
||||
// We have enough suggestions to return immediately. Keep on looking for an
|
||||
// exact match, if requested.
|
||||
if (exactMatchFirst) {
|
||||
Float exactMatchWeight = getExactMatchStartingFromRootArc(i, key);
|
||||
if (exactMatchWeight != null) {
|
||||
res.add(0, new LookupResult(key, exactMatchWeight));
|
||||
while (res.size() > num) {
|
||||
res.remove(res.size() - 1);
|
||||
if (!checkExistingAndReorder(res, key)) {
|
||||
Float exactMatchWeight = getExactMatchStartingFromRootArc(i, key);
|
||||
if (exactMatchWeight != null) {
|
||||
// Insert as the first result and truncate at num.
|
||||
while (res.size() >= num) {
|
||||
res.remove(res.size() - 1);
|
||||
}
|
||||
res.add(0, new LookupResult(key, exactMatchWeight));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -357,6 +363,25 @@ public class FSTLookup extends Lookup {
|
|||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the list of {@link LookupResult}s already has a <code>key</code>. If so,
|
||||
* reorders that {@link LookupResult} to the first position.
|
||||
*
|
||||
* @return Returns <code>true<code> if and only if <code>list</code> contained <code>key</code>.
|
||||
*/
|
||||
private boolean checkExistingAndReorder(ArrayList<LookupResult> list, String key) {
|
||||
// We assume list does not have duplicates (because of how the FST is created).
|
||||
for (int i = list.size(); --i >= 0;) {
|
||||
if (key.equals(list.get(i).key)) {
|
||||
// Key found. Unless already at i==0, remove it and push up front so that the ordering
|
||||
// remains identical with the exception of the exact match.
|
||||
list.add(0, list.remove(i));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Descend along the path starting at <code>arc</code> and going through
|
||||
* bytes in <code>utf8</code> argument.
|
||||
|
|
|
@ -62,9 +62,11 @@ public class FSTLookupTest extends LuceneTestCase {
|
|||
tf("threat", 1),
|
||||
tf("three", 1),
|
||||
tf("foundation", 1),
|
||||
tf("fourier", 1),
|
||||
tf("four", 1),
|
||||
tf("fourty", 1),
|
||||
tf("fourblah", 1),
|
||||
tf("fourteen", 1),
|
||||
tf("four", 0.5f),
|
||||
tf("fourier", 0.5f),
|
||||
tf("fourty", 0.5f),
|
||||
tf("xo", 1),
|
||||
};
|
||||
return keys;
|
||||
|
@ -91,6 +93,18 @@ public class FSTLookupTest extends LuceneTestCase {
|
|||
"one/0.0",
|
||||
"oneness/1.0");
|
||||
|
||||
// 'four' is collected in a bucket and then again as an exact match.
|
||||
assertMatchEquals(lookup.lookup("four", true, 2),
|
||||
"four/0.0",
|
||||
"fourblah/1.0");
|
||||
|
||||
// Check reordering of exact matches.
|
||||
assertMatchEquals(lookup.lookup("four", true, 4),
|
||||
"four/0.0",
|
||||
"fourblah/1.0",
|
||||
"fourteen/1.0",
|
||||
"fourier/0.0");
|
||||
|
||||
lookup = new FSTLookup(10, false);
|
||||
lookup.build(new TermFreqArrayIterator(evalKeys()));
|
||||
|
||||
|
|
|
@ -209,6 +209,9 @@ Optimizations
|
|||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
* SOLR-2762: FSTLookup could return duplicate results or one results less
|
||||
than requested. (David Smiley, Dawid Weiss)
|
||||
|
||||
* SOLR-2741: Bugs in facet range display in trunk (janhoy)
|
||||
|
||||
* SOLR-1908: Fixed SignatureUpdateProcessor to fail to initialize on
|
||||
|
|
Loading…
Reference in New Issue