From 213cdf67930536921b71ebbc3724c9a3eb034595 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Fri, 15 May 2015 14:58:55 +0000 Subject: [PATCH] LUCENE-6464: add expert AnalyzingInfixSuggester.lookup method that accepts arbitrary BooleanQuery to filter contexts git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1679579 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 + .../analyzing/AnalyzingInfixSuggester.java | 111 ++++++++++++------ .../analyzing/BlendedInfixSuggester.java | 7 ++ .../AnalyzingInfixSuggesterTest.java | 13 +- 4 files changed, 100 insertions(+), 36 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 81a5d60386b..5671dacf483 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -98,6 +98,11 @@ New Features Circle, Path (buffered line string), and Polygon. (Karl Wright via David Smiley) +* LUCENE-6464: Add a new expert lookup method to + AnalyzingInfixSuggester to accept an arbitrary BooleanQuery to + express how contexts should be filtered. (Arcadius Ahouansou via + Mike McCandless) + Optimizations * LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index 410c3aeca9d..e3ebd4c91bb 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -24,7 +24,6 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -61,6 +60,7 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortingMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; import org.apache.lucene.search.EarlyTerminatingSortingCollector; @@ -407,22 +407,13 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { /** Lookup, without any context. */ public List lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException { - return lookup(key, (Map)null, num, allTermsRequired, doHighlight); + return lookup(key, (BooleanQuery)null, num, allTermsRequired, doHighlight); } /** Lookup, with context but without booleans. Context booleans default to SHOULD, * so each suggestion must have at least one of the contexts. */ public List lookup(CharSequence key, Set contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException { - - if (contexts == null) { - return lookup(key, num, allTermsRequired, doHighlight); - } - - Map contextInfo = new HashMap<>(); - for (BytesRef context : contexts) { - contextInfo.put(context, BooleanClause.Occur.SHOULD); - } - return lookup(key, contextInfo, num, allTermsRequired, doHighlight); + return lookup(key, toQuery(contexts), num, allTermsRequired, doHighlight); } /** This is called if the last token isn't ended @@ -441,6 +432,66 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { * must match ({@code allTermsRequired}) and whether the hits * should be highlighted ({@code doHighlight}). */ public List lookup(CharSequence key, Map contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException { + return lookup(key, toQuery(contextInfo), num, allTermsRequired, doHighlight); + } + + private BooleanQuery toQuery(Map contextInfo) { + if (contextInfo == null || contextInfo.isEmpty()) { + return null; + } + + BooleanQuery contextFilter = new BooleanQuery(); + for (Map.Entry entry : contextInfo.entrySet()) { + addContextToQuery(contextFilter, entry.getKey(), entry.getValue()); + } + + return contextFilter; + } + + private BooleanQuery toQuery(Set contextInfo) { + if (contextInfo == null || contextInfo.isEmpty()) { + return null; + } + + BooleanQuery contextFilter = new BooleanQuery(); + for (BytesRef context : contextInfo) { + addContextToQuery(contextFilter, context, BooleanClause.Occur.SHOULD); + } + return contextFilter; + } + + + /** + * This method is handy as we do not need access to internal fields such as CONTEXTS_FIELD_NAME in order to build queries + * However, here may not be its best location. + * + * @param query an instance of @See {@link BooleanQuery} + * @param context the context + * @param clause one of {@link Occur} + */ + public void addContextToQuery(BooleanQuery query, BytesRef context, BooleanClause.Occur clause) { + // NOTE: we "should" wrap this in + // ConstantScoreQuery, or maybe send this as a + // Filter instead to search. + + // TODO: if we had a BinaryTermField we could fix + // this "must be valid ut8f" limitation: + query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context)), clause); + } + + /** + * This is an advanced method providing the capability to send down to the suggester any + * arbitrary lucene query to be used to filter the result of the suggester + * + * @param key the keyword being looked for + * @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery. + * @param num number of items to return + * @param allTermsRequired all searched terms must match or not + * @param doHighlight if true, the matching term will be highlighted in the search result + * @return the result of the suggester + * @throws IOException f the is IO exception while reading data from the index + */ + public List lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException { if (searcherMgr == null) { throw new IllegalStateException("suggester was not built"); @@ -454,7 +505,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { } BooleanQuery query; - Set matchedTokens = new HashSet<>(); + Set matchedTokens; String prefixToken = null; try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) { @@ -496,41 +547,33 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { matchedTokens.add(lastToken); lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)); } + if (lastQuery != null) { query.add(lastQuery, occur); } } - if (contextInfo != null) { - + if (contextQuery != null) { boolean allMustNot = true; - for (Map.Entry entry : contextInfo.entrySet()) { - if (entry.getValue() != BooleanClause.Occur.MUST_NOT) { + for (BooleanClause clause : contextQuery.clauses()) { + if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) { allMustNot = false; break; } } - - // do not make a subquery if all context booleans are must not - if (allMustNot == true) { - for (Map.Entry entry : contextInfo.entrySet()) { - query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), BooleanClause.Occur.MUST_NOT); + + if (allMustNot) { + //all are MUST_NOT: add the contextQuery to the main query instead (not as sub-query) + for (BooleanClause clause : contextQuery.clauses()) { + query.add(clause); } - } else { - BooleanQuery sub = new BooleanQuery(); - query.add(sub, BooleanClause.Occur.MUST); - - for (Map.Entry entry : contextInfo.entrySet()) { - // NOTE: we "should" wrap this in - // ConstantScoreQuery, or maybe send this as a - // Filter instead to search. - sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey())), entry.getValue()); - } + //Add contextQuery as sub-query + query.add(contextQuery, BooleanClause.Occur.MUST); } } } - + // TODO: we could allow blended sort here, combining // weight w/ score. Now we ignore score and sort only // by weight: @@ -566,7 +609,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { return results; } - + /** * Create the results based on the search hits. * Can be overridden by subclass to add particular behavior (e.g. weight transformation). diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java index 0537fd0a165..ce5edc067bc 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java @@ -35,6 +35,7 @@ import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TopFieldDocs; @@ -156,6 +157,12 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester { return super.lookup(key, contextInfo, num * numFactor, allTermsRequired, doHighlight); } + @Override + public List lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException { + // here we multiply the number of searched element by the defined factor + return super.lookup(key, contextQuery, num * numFactor, allTermsRequired, doHighlight); + } + @Override protected FieldType getTextFieldType() { FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java index 1818006ac9a..32028e432b1 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java @@ -38,6 +38,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.suggest.Input; import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.search.suggest.Lookup.LookupResult; @@ -949,7 +950,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { return result; } - // LUCENE-5528 + // LUCENE-5528 and LUCENE-6464 public void testBasicContext() throws Exception { Input keys[] = new Input[] { new Input("lend me your ear", 8, new BytesRef("foobar"), asSet("foo", "bar")), @@ -1174,7 +1175,15 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { assertEquals(2, result.contexts.size()); assertTrue(result.contexts.contains(new BytesRef("foo"))); assertTrue(result.contexts.contains(new BytesRef("baz"))); - + + //LUCENE-6464 Using the advanced context filtering by query. + //Note that this is just a sanity test as all the above tests run through the filter by query method + BooleanQuery query = new BooleanQuery(); + suggester.addContextToQuery(query, new BytesRef("foo"), BooleanClause.Occur.MUST); + suggester.addContextToQuery(query, new BytesRef("bar"), BooleanClause.Occur.MUST_NOT); + results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), query, 10, true, true); + assertEquals(1, results.size()); + suggester.close(); a.close(); }