mirror of https://github.com/apache/lucene.git
LUCENE-6050: allow for MUST and MUST_NOT (in addition to SHOULD) context tags in Analyzing/BlendedInfixSuggester
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1638441 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fe6eff7d1b
commit
95ddd51108
|
@ -91,6 +91,10 @@ New Features
|
||||||
index using it will need to be rebuilt. (Thomas
|
index using it will need to be rebuilt. (Thomas
|
||||||
Neidhart via Mike McCandless)
|
Neidhart via Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-6050: Accept MUST and MUST_NOT (in addition to SHOULD) for
|
||||||
|
each context passed to Analyzing/BlendedInfixSuggester (Arcadius
|
||||||
|
Ahouansou, jane chang via Mike McCandless)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and
|
* LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and
|
||||||
|
|
|
@ -23,6 +23,8 @@ import java.io.StringReader;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
@ -390,7 +392,22 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
|
|
||||||
/** Lookup, without any context. */
|
/** Lookup, without any context. */
|
||||||
public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
||||||
return lookup(key, null, num, allTermsRequired, doHighlight);
|
return lookup(key, (Map<BytesRef, BooleanClause.Occur>)null, num, allTermsRequired, doHighlight);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Lookup, with context but without booleans. Context booleans default to SHOULD,
|
||||||
|
* so each suggestion must have at least one of the contexts. */
|
||||||
|
public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
||||||
|
|
||||||
|
if (contexts == null) {
|
||||||
|
return lookup(key, num, allTermsRequired, doHighlight);
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<BytesRef, BooleanClause.Occur> contextInfo = new HashMap<>();
|
||||||
|
for (BytesRef context : contexts) {
|
||||||
|
contextInfo.put(context, BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
return lookup(key, contextInfo, num, allTermsRequired, doHighlight);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This is called if the last token isn't ended
|
/** This is called if the last token isn't ended
|
||||||
|
@ -408,7 +425,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
/** Retrieve suggestions, specifying whether all terms
|
/** Retrieve suggestions, specifying whether all terms
|
||||||
* must match ({@code allTermsRequired}) and whether the hits
|
* must match ({@code allTermsRequired}) and whether the hits
|
||||||
* should be highlighted ({@code doHighlight}). */
|
* should be highlighted ({@code doHighlight}). */
|
||||||
public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
public List<LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
||||||
|
|
||||||
if (searcherMgr == null) {
|
if (searcherMgr == null) {
|
||||||
throw new IllegalStateException("suggester was not built");
|
throw new IllegalStateException("suggester was not built");
|
||||||
|
@ -469,21 +486,35 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (contexts != null) {
|
if (contextInfo != null) {
|
||||||
BooleanQuery sub = new BooleanQuery();
|
|
||||||
query.add(sub, BooleanClause.Occur.MUST);
|
boolean allMustNot = true;
|
||||||
for(BytesRef context : contexts) {
|
for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
|
||||||
// NOTE: we "should" wrap this in
|
if (entry.getValue() != BooleanClause.Occur.MUST_NOT) {
|
||||||
// ConstantScoreQuery, or maybe send this as a
|
allMustNot = false;
|
||||||
// Filter instead to search, but since all of
|
break;
|
||||||
// these are MUST'd, the change to the score won't
|
}
|
||||||
// affect the overall ranking. Since we indexed
|
}
|
||||||
// as DOCS_ONLY, the perf should be the same
|
|
||||||
// either way (no freq int[] blocks to decode):
|
|
||||||
|
|
||||||
// TODO: if we had a BinaryTermField we could fix
|
// do not make a subquery if all context booleans are must not
|
||||||
// this "must be valid ut8f" limitation:
|
if (allMustNot == true) {
|
||||||
sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.utf8ToString())), BooleanClause.Occur.SHOULD);
|
for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
|
||||||
|
query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), BooleanClause.Occur.MUST_NOT);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
BooleanQuery sub = new BooleanQuery();
|
||||||
|
query.add(sub, BooleanClause.Occur.MUST);
|
||||||
|
|
||||||
|
for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
|
||||||
|
// NOTE: we "should" wrap this in
|
||||||
|
// ConstantScoreQuery, or maybe send this as a
|
||||||
|
// Filter instead to search.
|
||||||
|
|
||||||
|
// TODO: if we had a BinaryTermField we could fix
|
||||||
|
// this "must be valid ut8f" limitation:
|
||||||
|
sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), entry.getValue());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
@ -33,6 +34,7 @@ import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.MultiDocValues;
|
import org.apache.lucene.index.MultiDocValues;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.FieldDoc;
|
import org.apache.lucene.search.FieldDoc;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.TopFieldDocs;
|
import org.apache.lucene.search.TopFieldDocs;
|
||||||
|
@ -146,6 +148,12 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
|
||||||
return super.lookup(key, contexts, num * numFactor, allTermsRequired, doHighlight);
|
return super.lookup(key, contexts, num * numFactor, allTermsRequired, doHighlight);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Lookup.LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
||||||
|
// here we multiply the number of searched element by the defined factor
|
||||||
|
return super.lookup(key, contextInfo, num * numFactor, allTermsRequired, doHighlight);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected FieldType getTextFieldType() {
|
protected FieldType getTextFieldType() {
|
||||||
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
|
|
@ -23,8 +23,10 @@ import java.nio.file.Path;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
@ -35,6 +37,7 @@ import org.apache.lucene.analysis.core.StopFilter;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.util.CharArraySet;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.suggest.Input;
|
import org.apache.lucene.search.suggest.Input;
|
||||||
import org.apache.lucene.search.suggest.InputArrayIterator;
|
import org.apache.lucene.search.suggest.InputArrayIterator;
|
||||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||||
|
@ -899,7 +902,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
||||||
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
||||||
assertTrue(result.contexts.contains(new BytesRef("bar")));
|
assertTrue(result.contexts.contains(new BytesRef("bar")));
|
||||||
|
|
||||||
// Both suggestions have "foo" context:
|
// Both have "foo" context:
|
||||||
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo"), 10, true, true);
|
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo"), 10, true, true);
|
||||||
assertEquals(2, results.size());
|
assertEquals(2, results.size());
|
||||||
|
|
||||||
|
@ -934,8 +937,16 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
||||||
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
||||||
assertTrue(result.contexts.contains(new BytesRef("bar")));
|
assertTrue(result.contexts.contains(new BytesRef("bar")));
|
||||||
|
|
||||||
// Only one has "baz" context:
|
// None do not have "foo" context:
|
||||||
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("baz"), 10, true, true);
|
Map<BytesRef, BooleanClause.Occur> contextInfo = new HashMap<>();
|
||||||
|
contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST_NOT);
|
||||||
|
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
|
||||||
|
assertEquals(0, results.size());
|
||||||
|
|
||||||
|
// Only one does not have "bar" context:
|
||||||
|
contextInfo.clear();
|
||||||
|
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
|
||||||
|
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
|
||||||
assertEquals(1, results.size());
|
assertEquals(1, results.size());
|
||||||
|
|
||||||
result = results.get(0);
|
result = results.get(0);
|
||||||
|
@ -947,7 +958,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
||||||
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
||||||
assertTrue(result.contexts.contains(new BytesRef("baz")));
|
assertTrue(result.contexts.contains(new BytesRef("baz")));
|
||||||
|
|
||||||
// Both have foo or bar:
|
// Both have "foo" or "bar" context:
|
||||||
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo", "bar"), 10, true, true);
|
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo", "bar"), 10, true, true);
|
||||||
assertEquals(2, results.size());
|
assertEquals(2, results.size());
|
||||||
|
|
||||||
|
@ -969,6 +980,99 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
||||||
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
||||||
assertTrue(result.contexts.contains(new BytesRef("bar")));
|
assertTrue(result.contexts.contains(new BytesRef("bar")));
|
||||||
|
|
||||||
|
// Both have "bar" or "baz" context:
|
||||||
|
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("bar", "baz"), 10, true, true);
|
||||||
|
assertEquals(2, results.size());
|
||||||
|
|
||||||
|
result = results.get(0);
|
||||||
|
assertEquals("a penny saved is a penny <b>ear</b>ned", result.key);
|
||||||
|
assertEquals(10, result.value);
|
||||||
|
assertEquals(new BytesRef("foobaz"), result.payload);
|
||||||
|
assertNotNull(result.contexts);
|
||||||
|
assertEquals(2, result.contexts.size());
|
||||||
|
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
||||||
|
assertTrue(result.contexts.contains(new BytesRef("baz")));
|
||||||
|
|
||||||
|
result = results.get(1);
|
||||||
|
assertEquals("lend me your <b>ear</b>", result.key);
|
||||||
|
assertEquals(8, result.value);
|
||||||
|
assertEquals(new BytesRef("foobar"), result.payload);
|
||||||
|
assertNotNull(result.contexts);
|
||||||
|
assertEquals(2, result.contexts.size());
|
||||||
|
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
||||||
|
assertTrue(result.contexts.contains(new BytesRef("bar")));
|
||||||
|
|
||||||
|
// Only one has "foo" and "bar" context:
|
||||||
|
contextInfo.clear();
|
||||||
|
contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST);
|
||||||
|
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST);
|
||||||
|
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
|
||||||
|
assertEquals(1, results.size());
|
||||||
|
|
||||||
|
result = results.get(0);
|
||||||
|
assertEquals("lend me your <b>ear</b>", result.key);
|
||||||
|
assertEquals(8, result.value);
|
||||||
|
assertEquals(new BytesRef("foobar"), result.payload);
|
||||||
|
assertNotNull(result.contexts);
|
||||||
|
assertEquals(2, result.contexts.size());
|
||||||
|
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
||||||
|
assertTrue(result.contexts.contains(new BytesRef("bar")));
|
||||||
|
|
||||||
|
// None have "bar" and "baz" context:
|
||||||
|
contextInfo.clear();
|
||||||
|
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST);
|
||||||
|
contextInfo.put(new BytesRef("baz"), BooleanClause.Occur.MUST);
|
||||||
|
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
|
||||||
|
assertEquals(0, results.size());
|
||||||
|
|
||||||
|
// None do not have "foo" and do not have "bar" context:
|
||||||
|
contextInfo.clear();
|
||||||
|
contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST_NOT);
|
||||||
|
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
|
||||||
|
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
|
||||||
|
assertEquals(0, results.size());
|
||||||
|
|
||||||
|
// Both do not have "bar" and do not have "baz" context:
|
||||||
|
contextInfo.clear();
|
||||||
|
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
|
||||||
|
contextInfo.put(new BytesRef("baz"), BooleanClause.Occur.MUST_NOT);
|
||||||
|
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("bar", "baz"), 10, true, true);
|
||||||
|
assertEquals(2, results.size());
|
||||||
|
|
||||||
|
result = results.get(0);
|
||||||
|
assertEquals("a penny saved is a penny <b>ear</b>ned", result.key);
|
||||||
|
assertEquals(10, result.value);
|
||||||
|
assertEquals(new BytesRef("foobaz"), result.payload);
|
||||||
|
assertNotNull(result.contexts);
|
||||||
|
assertEquals(2, result.contexts.size());
|
||||||
|
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
||||||
|
assertTrue(result.contexts.contains(new BytesRef("baz")));
|
||||||
|
|
||||||
|
result = results.get(1);
|
||||||
|
assertEquals("lend me your <b>ear</b>", result.key);
|
||||||
|
assertEquals(8, result.value);
|
||||||
|
assertEquals(new BytesRef("foobar"), result.payload);
|
||||||
|
assertNotNull(result.contexts);
|
||||||
|
assertEquals(2, result.contexts.size());
|
||||||
|
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
||||||
|
assertTrue(result.contexts.contains(new BytesRef("bar")));
|
||||||
|
|
||||||
|
// Only one has "foo" and does not have "bar" context:
|
||||||
|
contextInfo.clear();
|
||||||
|
contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST);
|
||||||
|
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
|
||||||
|
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
|
||||||
|
assertEquals(1, results.size());
|
||||||
|
|
||||||
|
result = results.get(0);
|
||||||
|
assertEquals("a penny saved is a penny <b>ear</b>ned", result.key);
|
||||||
|
assertEquals(10, result.value);
|
||||||
|
assertEquals(new BytesRef("foobaz"), result.payload);
|
||||||
|
assertNotNull(result.contexts);
|
||||||
|
assertEquals(2, result.contexts.size());
|
||||||
|
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
||||||
|
assertTrue(result.contexts.contains(new BytesRef("baz")));
|
||||||
|
|
||||||
suggester.close();
|
suggester.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -137,12 +137,12 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
|
||||||
|
|
||||||
|
|
||||||
// we don't find it for in the 2 first
|
// we don't find it for in the 2 first
|
||||||
assertEquals(2, suggester.lookup("the", null, 2, true, false).size());
|
assertEquals(2, suggester.lookup("the", 2, true, false).size());
|
||||||
long w0 = getInResults(suggester, "the", ret, 2);
|
long w0 = getInResults(suggester, "the", ret, 2);
|
||||||
assertTrue(w0 < 0);
|
assertTrue(w0 < 0);
|
||||||
|
|
||||||
// but it's there if we search for 3 elements
|
// but it's there if we search for 3 elements
|
||||||
assertEquals(3, suggester.lookup("the", null, 3, true, false).size());
|
assertEquals(3, suggester.lookup("the", 3, true, false).size());
|
||||||
long w1 = getInResults(suggester, "the", ret, 3);
|
long w1 = getInResults(suggester, "the", ret, 3);
|
||||||
assertTrue(w1 > 0);
|
assertTrue(w1 > 0);
|
||||||
|
|
||||||
|
@ -188,7 +188,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
|
||||||
suggester.build(new InputArrayIterator(keys));
|
suggester.build(new InputArrayIterator(keys));
|
||||||
|
|
||||||
|
|
||||||
List<Lookup.LookupResult> responses = suggester.lookup("the", null, 4, true, false);
|
List<Lookup.LookupResult> responses = suggester.lookup("the", 4, true, false);
|
||||||
|
|
||||||
for (Lookup.LookupResult response : responses) {
|
for (Lookup.LookupResult response : responses) {
|
||||||
System.out.println(response);
|
System.out.println(response);
|
||||||
|
@ -199,7 +199,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
|
||||||
|
|
||||||
private static long getInResults(BlendedInfixSuggester suggester, String prefix, BytesRef payload, int num) throws IOException {
|
private static long getInResults(BlendedInfixSuggester suggester, String prefix, BytesRef payload, int num) throws IOException {
|
||||||
|
|
||||||
List<Lookup.LookupResult> responses = suggester.lookup(prefix, null, num, true, false);
|
List<Lookup.LookupResult> responses = suggester.lookup(prefix, num, true, false);
|
||||||
|
|
||||||
for (Lookup.LookupResult response : responses) {
|
for (Lookup.LookupResult response : responses) {
|
||||||
if (response.payload.equals(payload)) {
|
if (response.payload.equals(payload)) {
|
||||||
|
|
Loading…
Reference in New Issue