mirror of https://github.com/apache/lucene.git
LUCENE-6464: add expert AnalyzingInfixSuggester.lookup method that accepts arbitrary BooleanQuery to filter contexts
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1679579 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
73283d1455
commit
213cdf6793
|
@ -98,6 +98,11 @@ New Features
|
||||||
Circle, Path (buffered line string), and Polygon.
|
Circle, Path (buffered line string), and Polygon.
|
||||||
(Karl Wright via David Smiley)
|
(Karl Wright via David Smiley)
|
||||||
|
|
||||||
|
* LUCENE-6464: Add a new expert lookup method to
|
||||||
|
AnalyzingInfixSuggester to accept an arbitrary BooleanQuery to
|
||||||
|
express how contexts should be filtered. (Arcadius Ahouansou via
|
||||||
|
Mike McCandless)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if
|
* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if
|
||||||
|
|
|
@ -24,7 +24,6 @@ import java.nio.file.Path;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -61,6 +60,7 @@ import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.index.SortingMergePolicy;
|
import org.apache.lucene.index.SortingMergePolicy;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.Collector;
|
import org.apache.lucene.search.Collector;
|
||||||
import org.apache.lucene.search.EarlyTerminatingSortingCollector;
|
import org.apache.lucene.search.EarlyTerminatingSortingCollector;
|
||||||
|
@ -407,22 +407,13 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
|
|
||||||
/** Lookup, without any context. */
|
/** Lookup, without any context. */
|
||||||
public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
||||||
return lookup(key, (Map<BytesRef, BooleanClause.Occur>)null, num, allTermsRequired, doHighlight);
|
return lookup(key, (BooleanQuery)null, num, allTermsRequired, doHighlight);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Lookup, with context but without booleans. Context booleans default to SHOULD,
|
/** Lookup, with context but without booleans. Context booleans default to SHOULD,
|
||||||
* so each suggestion must have at least one of the contexts. */
|
* so each suggestion must have at least one of the contexts. */
|
||||||
public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
||||||
|
return lookup(key, toQuery(contexts), num, allTermsRequired, doHighlight);
|
||||||
if (contexts == null) {
|
|
||||||
return lookup(key, num, allTermsRequired, doHighlight);
|
|
||||||
}
|
|
||||||
|
|
||||||
Map<BytesRef, BooleanClause.Occur> contextInfo = new HashMap<>();
|
|
||||||
for (BytesRef context : contexts) {
|
|
||||||
contextInfo.put(context, BooleanClause.Occur.SHOULD);
|
|
||||||
}
|
|
||||||
return lookup(key, contextInfo, num, allTermsRequired, doHighlight);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This is called if the last token isn't ended
|
/** This is called if the last token isn't ended
|
||||||
|
@ -441,6 +432,66 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
* must match ({@code allTermsRequired}) and whether the hits
|
* must match ({@code allTermsRequired}) and whether the hits
|
||||||
* should be highlighted ({@code doHighlight}). */
|
* should be highlighted ({@code doHighlight}). */
|
||||||
public List<LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
public List<LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
||||||
|
return lookup(key, toQuery(contextInfo), num, allTermsRequired, doHighlight);
|
||||||
|
}
|
||||||
|
|
||||||
|
private BooleanQuery toQuery(Map<BytesRef,BooleanClause.Occur> contextInfo) {
|
||||||
|
if (contextInfo == null || contextInfo.isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
BooleanQuery contextFilter = new BooleanQuery();
|
||||||
|
for (Map.Entry<BytesRef,BooleanClause.Occur> entry : contextInfo.entrySet()) {
|
||||||
|
addContextToQuery(contextFilter, entry.getKey(), entry.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
return contextFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
private BooleanQuery toQuery(Set<BytesRef> contextInfo) {
|
||||||
|
if (contextInfo == null || contextInfo.isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
BooleanQuery contextFilter = new BooleanQuery();
|
||||||
|
for (BytesRef context : contextInfo) {
|
||||||
|
addContextToQuery(contextFilter, context, BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
return contextFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method is handy as we do not need access to internal fields such as CONTEXTS_FIELD_NAME in order to build queries
|
||||||
|
* However, here may not be its best location.
|
||||||
|
*
|
||||||
|
* @param query an instance of @See {@link BooleanQuery}
|
||||||
|
* @param context the context
|
||||||
|
* @param clause one of {@link Occur}
|
||||||
|
*/
|
||||||
|
public void addContextToQuery(BooleanQuery query, BytesRef context, BooleanClause.Occur clause) {
|
||||||
|
// NOTE: we "should" wrap this in
|
||||||
|
// ConstantScoreQuery, or maybe send this as a
|
||||||
|
// Filter instead to search.
|
||||||
|
|
||||||
|
// TODO: if we had a BinaryTermField we could fix
|
||||||
|
// this "must be valid ut8f" limitation:
|
||||||
|
query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context)), clause);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is an advanced method providing the capability to send down to the suggester any
|
||||||
|
* arbitrary lucene query to be used to filter the result of the suggester
|
||||||
|
*
|
||||||
|
* @param key the keyword being looked for
|
||||||
|
* @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
|
||||||
|
* @param num number of items to return
|
||||||
|
* @param allTermsRequired all searched terms must match or not
|
||||||
|
* @param doHighlight if true, the matching term will be highlighted in the search result
|
||||||
|
* @return the result of the suggester
|
||||||
|
* @throws IOException f the is IO exception while reading data from the index
|
||||||
|
*/
|
||||||
|
public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
||||||
|
|
||||||
if (searcherMgr == null) {
|
if (searcherMgr == null) {
|
||||||
throw new IllegalStateException("suggester was not built");
|
throw new IllegalStateException("suggester was not built");
|
||||||
|
@ -454,7 +505,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
BooleanQuery query;
|
BooleanQuery query;
|
||||||
Set<String> matchedTokens = new HashSet<>();
|
Set<String> matchedTokens;
|
||||||
String prefixToken = null;
|
String prefixToken = null;
|
||||||
|
|
||||||
try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
|
try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
|
||||||
|
@ -496,37 +547,29 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
matchedTokens.add(lastToken);
|
matchedTokens.add(lastToken);
|
||||||
lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
|
lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lastQuery != null) {
|
if (lastQuery != null) {
|
||||||
query.add(lastQuery, occur);
|
query.add(lastQuery, occur);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (contextInfo != null) {
|
if (contextQuery != null) {
|
||||||
|
|
||||||
boolean allMustNot = true;
|
boolean allMustNot = true;
|
||||||
for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
|
for (BooleanClause clause : contextQuery.clauses()) {
|
||||||
if (entry.getValue() != BooleanClause.Occur.MUST_NOT) {
|
if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
|
||||||
allMustNot = false;
|
allMustNot = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// do not make a subquery if all context booleans are must not
|
if (allMustNot) {
|
||||||
if (allMustNot == true) {
|
//all are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
|
||||||
for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
|
for (BooleanClause clause : contextQuery.clauses()) {
|
||||||
query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), BooleanClause.Occur.MUST_NOT);
|
query.add(clause);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
BooleanQuery sub = new BooleanQuery();
|
//Add contextQuery as sub-query
|
||||||
query.add(sub, BooleanClause.Occur.MUST);
|
query.add(contextQuery, BooleanClause.Occur.MUST);
|
||||||
|
|
||||||
for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
|
|
||||||
// NOTE: we "should" wrap this in
|
|
||||||
// ConstantScoreQuery, or maybe send this as a
|
|
||||||
// Filter instead to search.
|
|
||||||
sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey())), entry.getValue());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.lucene.index.MultiDocValues;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.FieldDoc;
|
import org.apache.lucene.search.FieldDoc;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.TopFieldDocs;
|
import org.apache.lucene.search.TopFieldDocs;
|
||||||
|
@ -156,6 +157,12 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
|
||||||
return super.lookup(key, contextInfo, num * numFactor, allTermsRequired, doHighlight);
|
return super.lookup(key, contextInfo, num * numFactor, allTermsRequired, doHighlight);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Lookup.LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
|
||||||
|
// here we multiply the number of searched element by the defined factor
|
||||||
|
return super.lookup(key, contextQuery, num * numFactor, allTermsRequired, doHighlight);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected FieldType getTextFieldType() {
|
protected FieldType getTextFieldType() {
|
||||||
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.util.CharArraySet;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.suggest.Input;
|
import org.apache.lucene.search.suggest.Input;
|
||||||
import org.apache.lucene.search.suggest.InputArrayIterator;
|
import org.apache.lucene.search.suggest.InputArrayIterator;
|
||||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||||
|
@ -949,7 +950,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// LUCENE-5528
|
// LUCENE-5528 and LUCENE-6464
|
||||||
public void testBasicContext() throws Exception {
|
public void testBasicContext() throws Exception {
|
||||||
Input keys[] = new Input[] {
|
Input keys[] = new Input[] {
|
||||||
new Input("lend me your ear", 8, new BytesRef("foobar"), asSet("foo", "bar")),
|
new Input("lend me your ear", 8, new BytesRef("foobar"), asSet("foo", "bar")),
|
||||||
|
@ -1175,6 +1176,14 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
||||||
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
assertTrue(result.contexts.contains(new BytesRef("foo")));
|
||||||
assertTrue(result.contexts.contains(new BytesRef("baz")));
|
assertTrue(result.contexts.contains(new BytesRef("baz")));
|
||||||
|
|
||||||
|
//LUCENE-6464 Using the advanced context filtering by query.
|
||||||
|
//Note that this is just a sanity test as all the above tests run through the filter by query method
|
||||||
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
suggester.addContextToQuery(query, new BytesRef("foo"), BooleanClause.Occur.MUST);
|
||||||
|
suggester.addContextToQuery(query, new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
|
||||||
|
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), query, 10, true, true);
|
||||||
|
assertEquals(1, results.size());
|
||||||
|
|
||||||
suggester.close();
|
suggester.close();
|
||||||
a.close();
|
a.close();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue