LUCENE-5120: AnalyzingSuggester might modify it's FST's cached root arc if payloads are used

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1504695 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2013-07-18 22:25:32 +00:00
parent 8928d3d351
commit f0a66e6903
3 changed files with 45 additions and 41 deletions

View File

@ -244,6 +244,10 @@ Bug Fixes
SortedSetDocValuesReaderState and SortedSetDocValuesAccumulator.
(Robert Muir, Mike McCandless)
* LUCENE-5120: AnalyzingSuggester modifed it's FST's cached root arc if payloads
are used and the entire output resided on the root arc on the first access. This
caused subsequent suggest calls to fail. (Simon Willnauer)
Optimizations
* LUCENE-4936: Improve numeric doc values compression in case all values share

View File

@ -624,9 +624,8 @@ public class AnalyzingSuggester extends Lookup {
}
assert sepIndex != -1;
spare.grow(sepIndex);
int payloadLen = output2.length - sepIndex - 1;
output2.length = sepIndex;
UnicodeUtil.UTF8toUTF16(output2, spare);
final int payloadLen = output2.length - sepIndex - 1;
UnicodeUtil.UTF8toUTF16(output2.bytes, output2.offset, sepIndex, spare);
BytesRef payload = new BytesRef(payloadLen);
System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen);
payload.length = payloadLen;

View File

@ -112,44 +112,45 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
suggester.build(new TermFreqPayloadArrayIterator(keys));
for (int i = 0; i < 2; i++) {
// top N of 2, but only foo is available
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2);
assertEquals(1, results.size());
assertEquals("foo", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
assertEquals(new BytesRef("hello"), results.get(0).payload);
// top N of 2, but only foo is available
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2);
assertEquals(1, results.size());
assertEquals("foo", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
assertEquals(new BytesRef("hello"), results.get(0).payload);
// top N of 1 for 'bar': we return this even though
// barbar is higher because exactFirst is enabled:
results = suggester.lookup(_TestUtil.stringToCharSequence("bar", random()), false, 1);
assertEquals(1, results.size());
assertEquals("bar", results.get(0).key.toString());
assertEquals(10, results.get(0).value, 0.01F);
assertEquals(new BytesRef("goodbye"), results.get(0).payload);
// top N of 1 for 'bar': we return this even though
// barbar is higher because exactFirst is enabled:
results = suggester.lookup(_TestUtil.stringToCharSequence("bar", random()), false, 1);
assertEquals(1, results.size());
assertEquals("bar", results.get(0).key.toString());
assertEquals(10, results.get(0).value, 0.01F);
assertEquals(new BytesRef("goodbye"), results.get(0).payload);
// top N Of 2 for 'b'
results = suggester.lookup(_TestUtil.stringToCharSequence("b", random()), false, 2);
assertEquals(2, results.size());
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals(new BytesRef("thank you"), results.get(0).payload);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
assertEquals(new BytesRef("goodbye"), results.get(1).payload);
// top N Of 2 for 'b'
results = suggester.lookup(_TestUtil.stringToCharSequence("b", random()), false, 2);
assertEquals(2, results.size());
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals(new BytesRef("thank you"), results.get(0).payload);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
assertEquals(new BytesRef("goodbye"), results.get(1).payload);
// top N of 3 for 'ba'
results = suggester.lookup(_TestUtil.stringToCharSequence("ba", random()), false, 3);
assertEquals(3, results.size());
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals(new BytesRef("thank you"), results.get(0).payload);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
assertEquals(new BytesRef("goodbye"), results.get(1).payload);
assertEquals("barbara", results.get(2).key.toString());
assertEquals(6, results.get(2).value, 0.01F);
assertEquals(new BytesRef("for all the fish"), results.get(2).payload);
// top N of 3 for 'ba'
results = suggester.lookup(_TestUtil.stringToCharSequence("ba", random()), false, 3);
assertEquals(3, results.size());
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals(new BytesRef("thank you"), results.get(0).payload);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
assertEquals(new BytesRef("goodbye"), results.get(1).payload);
assertEquals("barbara", results.get(2).key.toString());
assertEquals(6, results.get(2).value, 0.01F);
assertEquals(new BytesRef("for all the fish"), results.get(2).payload);
}
}
// TODO: more tests