allow full binary terms for suggest contexts

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1679392 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-05-14 15:29:41 +00:00
parent 6503e980c3
commit f63253da5f
2 changed files with 64 additions and 11 deletions

View File

@ -84,7 +84,6 @@ import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
// javadocs
// TODO:
// - a PostingsFormat that stores super-high-freq terms as
@ -104,9 +103,8 @@ import org.apache.lucene.util.RamUsageEstimator;
* this suggester best applies when there is a strong
* a-priori ranking of all the suggestions.
*
* <p>This suggester supports contexts, however the
* contexts must be valid utf8 (arbitrary binary terms will
* not work).
* <p>This suggester supports contexts, including arbitrary binary
* terms.
*
* @lucene.experimental */
@ -373,9 +371,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
}
if (contexts != null) {
for(BytesRef context : contexts) {
// TODO: if we had a BinaryTermField we could fix
// this "must be valid ut8f" limitation:
doc.add(new StringField(CONTEXTS_FIELD_NAME, context.utf8ToString(), Field.Store.NO));
doc.add(new StringField(CONTEXTS_FIELD_NAME, context, Field.Store.NO));
doc.add(new SortedSetDocValuesField(CONTEXTS_FIELD_NAME, context));
}
}
@ -529,10 +525,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
// NOTE: we "should" wrap this in
// ConstantScoreQuery, or maybe send this as a
// Filter instead to search.
// TODO: if we had a BinaryTermField we could fix
// this "must be valid ut8f" limitation:
sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), entry.getValue());
sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey())), entry.getValue());
}
}
}

View File

@ -940,6 +940,15 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
return result;
}
private Set<BytesRef> asSet(byte[]... values) {
HashSet<BytesRef> result = new HashSet<>();
for(byte[] value : values) {
result.add(new BytesRef(value));
}
return result;
}
// LUCENE-5528
public void testBasicContext() throws Exception {
Input keys[] = new Input[] {
@ -1194,5 +1203,56 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
return sb.toString();
}
public void testBinaryContext() throws Exception {
byte[] context1 = new byte[4];
byte[] context2 = new byte[5];
byte[] context3 = new byte[1];
context3[0] = (byte) 0xff;
Input keys[] = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar"), asSet(context1, context2)),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), asSet(context1, context3))
};
Path tempDir = createTempDir("analyzingInfixContext");
for(int iter=0;iter<2;iter++) {
AnalyzingInfixSuggester suggester;
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
if (iter == 0) {
suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
} else {
// Test again, after close/reopen:
suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
}
// Both have context1:
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet(context1), 10, true, true);
assertEquals(2, results.size());
LookupResult result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef(context1)));
assertTrue(result.contexts.contains(new BytesRef(context3)));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef(context1)));
assertTrue(result.contexts.contains(new BytesRef(context2)));
suggester.close();
a.close();
}
}
}