mirror of https://github.com/apache/lucene.git
SOLR-2762: FSTLookup returns one less suggestion than it should when onlyMorePopular=true
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1172006 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
96c4bb7074
commit
eee77d22ef
|
@ -284,7 +284,7 @@ public class FSTLookup extends Lookup {
|
||||||
// traversals and sorting.
|
// traversals and sorting.
|
||||||
return lookupSortedAlphabetically(key, num);
|
return lookupSortedAlphabetically(key, num);
|
||||||
} else {
|
} else {
|
||||||
return lookupSortedByWeight(key, num, true);
|
return lookupSortedByWeight(key, num, false);
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// Should never happen, but anyway.
|
// Should never happen, but anyway.
|
||||||
|
@ -298,7 +298,7 @@ public class FSTLookup extends Lookup {
|
||||||
*/
|
*/
|
||||||
private List<LookupResult> lookupSortedAlphabetically(String key, int num) throws IOException {
|
private List<LookupResult> lookupSortedAlphabetically(String key, int num) throws IOException {
|
||||||
// Greedily get num results from each weight branch.
|
// Greedily get num results from each weight branch.
|
||||||
List<LookupResult> res = lookupSortedByWeight(key, num, false);
|
List<LookupResult> res = lookupSortedByWeight(key, num, true);
|
||||||
|
|
||||||
// Sort and trim.
|
// Sort and trim.
|
||||||
Collections.sort(res, new Comparator<LookupResult>() {
|
Collections.sort(res, new Comparator<LookupResult>() {
|
||||||
|
@ -316,11 +316,14 @@ public class FSTLookup extends Lookup {
|
||||||
/**
|
/**
|
||||||
* Lookup suggestions sorted by weight (descending order).
|
* Lookup suggestions sorted by weight (descending order).
|
||||||
*
|
*
|
||||||
* @param greedy If <code>true</code>, the routine terminates immediately when <code>num</code>
|
* @param collectAll If <code>true</code>, the routine terminates immediately when <code>num</code>
|
||||||
* suggestions have been collected. If <code>false</code>, it will collect suggestions from
|
* suggestions have been collected. If <code>false</code>, it will collect suggestions from
|
||||||
* all weight arcs (needed for {@link #lookupSortedAlphabetically}.
|
* all weight arcs (needed for {@link #lookupSortedAlphabetically}.
|
||||||
*/
|
*/
|
||||||
private ArrayList<LookupResult> lookupSortedByWeight(String key, int num, boolean greedy) throws IOException {
|
private ArrayList<LookupResult> lookupSortedByWeight(String key, int num, boolean collectAll) throws IOException {
|
||||||
|
// Don't overallocate the results buffers. This also serves the purpose of allowing
|
||||||
|
// the user of this class to request all matches using Integer.MAX_VALUE as the number
|
||||||
|
// of results.
|
||||||
final ArrayList<LookupResult> res = new ArrayList<LookupResult>(Math.min(10, num));
|
final ArrayList<LookupResult> res = new ArrayList<LookupResult>(Math.min(10, num));
|
||||||
final StringBuilder output = new StringBuilder(key);
|
final StringBuilder output = new StringBuilder(key);
|
||||||
final int matchLength = key.length() - 1;
|
final int matchLength = key.length() - 1;
|
||||||
|
@ -338,15 +341,18 @@ public class FSTLookup extends Lookup {
|
||||||
// of the key prefix. The arc we're at is the last key's byte,
|
// of the key prefix. The arc we're at is the last key's byte,
|
||||||
// so we will collect it too.
|
// so we will collect it too.
|
||||||
output.setLength(matchLength);
|
output.setLength(matchLength);
|
||||||
if (collect(res, num, weight, output, arc) && greedy) {
|
if (collect(res, num, weight, output, arc) && !collectAll) {
|
||||||
// We have enough suggestions to return immediately. Keep on looking for an
|
// We have enough suggestions to return immediately. Keep on looking for an
|
||||||
// exact match, if requested.
|
// exact match, if requested.
|
||||||
if (exactMatchFirst) {
|
if (exactMatchFirst) {
|
||||||
Float exactMatchWeight = getExactMatchStartingFromRootArc(i, key);
|
if (!checkExistingAndReorder(res, key)) {
|
||||||
if (exactMatchWeight != null) {
|
Float exactMatchWeight = getExactMatchStartingFromRootArc(i, key);
|
||||||
res.add(0, new LookupResult(key, exactMatchWeight));
|
if (exactMatchWeight != null) {
|
||||||
while (res.size() > num) {
|
// Insert as the first result and truncate at num.
|
||||||
res.remove(res.size() - 1);
|
while (res.size() >= num) {
|
||||||
|
res.remove(res.size() - 1);
|
||||||
|
}
|
||||||
|
res.add(0, new LookupResult(key, exactMatchWeight));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -357,6 +363,25 @@ public class FSTLookup extends Lookup {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the list of {@link LookupResult}s already has a <code>key</code>. If so,
|
||||||
|
* reorders that {@link LookupResult} to the first position.
|
||||||
|
*
|
||||||
|
* @return Returns <code>true<code> if and only if <code>list</code> contained <code>key</code>.
|
||||||
|
*/
|
||||||
|
private boolean checkExistingAndReorder(ArrayList<LookupResult> list, String key) {
|
||||||
|
// We assume list does not have duplicates (because of how the FST is created).
|
||||||
|
for (int i = list.size(); --i >= 0;) {
|
||||||
|
if (key.equals(list.get(i).key)) {
|
||||||
|
// Key found. Unless already at i==0, remove it and push up front so that the ordering
|
||||||
|
// remains identical with the exception of the exact match.
|
||||||
|
list.add(0, list.remove(i));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Descend along the path starting at <code>arc</code> and going through
|
* Descend along the path starting at <code>arc</code> and going through
|
||||||
* bytes in <code>utf8</code> argument.
|
* bytes in <code>utf8</code> argument.
|
||||||
|
|
|
@ -62,9 +62,11 @@ public class FSTLookupTest extends LuceneTestCase {
|
||||||
tf("threat", 1),
|
tf("threat", 1),
|
||||||
tf("three", 1),
|
tf("three", 1),
|
||||||
tf("foundation", 1),
|
tf("foundation", 1),
|
||||||
tf("fourier", 1),
|
tf("fourblah", 1),
|
||||||
tf("four", 1),
|
tf("fourteen", 1),
|
||||||
tf("fourty", 1),
|
tf("four", 0.5f),
|
||||||
|
tf("fourier", 0.5f),
|
||||||
|
tf("fourty", 0.5f),
|
||||||
tf("xo", 1),
|
tf("xo", 1),
|
||||||
};
|
};
|
||||||
return keys;
|
return keys;
|
||||||
|
@ -91,6 +93,18 @@ public class FSTLookupTest extends LuceneTestCase {
|
||||||
"one/0.0",
|
"one/0.0",
|
||||||
"oneness/1.0");
|
"oneness/1.0");
|
||||||
|
|
||||||
|
// 'four' is collected in a bucket and then again as an exact match.
|
||||||
|
assertMatchEquals(lookup.lookup("four", true, 2),
|
||||||
|
"four/0.0",
|
||||||
|
"fourblah/1.0");
|
||||||
|
|
||||||
|
// Check reordering of exact matches.
|
||||||
|
assertMatchEquals(lookup.lookup("four", true, 4),
|
||||||
|
"four/0.0",
|
||||||
|
"fourblah/1.0",
|
||||||
|
"fourteen/1.0",
|
||||||
|
"fourier/0.0");
|
||||||
|
|
||||||
lookup = new FSTLookup(10, false);
|
lookup = new FSTLookup(10, false);
|
||||||
lookup.build(new TermFreqArrayIterator(evalKeys()));
|
lookup.build(new TermFreqArrayIterator(evalKeys()));
|
||||||
|
|
||||||
|
|
|
@ -209,6 +209,9 @@ Optimizations
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
* SOLR-2762: FSTLookup could return duplicate results or one results less
|
||||||
|
than requested. (David Smiley, Dawid Weiss)
|
||||||
|
|
||||||
* SOLR-2741: Bugs in facet range display in trunk (janhoy)
|
* SOLR-2741: Bugs in facet range display in trunk (janhoy)
|
||||||
|
|
||||||
* SOLR-1908: Fixed SignatureUpdateProcessor to fail to initialize on
|
* SOLR-1908: Fixed SignatureUpdateProcessor to fail to initialize on
|
||||||
|
|
Loading…
Reference in New Issue