mirror of https://github.com/apache/lucene.git
[LUCENE-10629]: Add fast match query support to FacetSets (#1015)
This commit is contained in:
parent
f93e52e5bb
commit
7ac75135b9
|
@ -89,7 +89,7 @@ API Changes
|
|||
|
||||
New Features
|
||||
---------------------
|
||||
(No changes)
|
||||
* LUCENE-10629: Support match set filtering with a query in MatchingFacetSetCounts. (Stefan Vodita, Shai Erera)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.demo.facet;
|
|||
import java.io.IOException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.ZoneOffset;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -42,8 +43,12 @@ import org.apache.lucene.index.DirectoryReader;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -85,6 +90,7 @@ public class CustomFacetSetExample {
|
|||
new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES),
|
||||
new TemperatureReadingFacetSet(JUNE_SECOND_2022, EIGHTY_DEGREES),
|
||||
new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
|
||||
addFastMatchFields(doc);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
|
@ -95,11 +101,24 @@ public class CustomFacetSetExample {
|
|||
new TemperatureReadingFacetSet(MAY_SECOND_2022, EIGHTY_DEGREES),
|
||||
new TemperatureReadingFacetSet(JUNE_SECOND_2022, HUNDRED_DEGREES),
|
||||
new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
|
||||
addFastMatchFields(doc);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
indexWriter.close();
|
||||
}
|
||||
|
||||
private void addFastMatchFields(Document doc) {
|
||||
// day field
|
||||
doc.add(new StringField("day", String.valueOf(MAY_SECOND_2022), Field.Store.NO));
|
||||
doc.add(new StringField("day", String.valueOf(JUNE_SECOND_2022), Field.Store.NO));
|
||||
doc.add(new StringField("day", String.valueOf(JULY_SECOND_2022), Field.Store.NO));
|
||||
|
||||
// temp field
|
||||
doc.add(new StringField("temp", String.valueOf(EIGHTY_DEGREES), Field.Store.NO));
|
||||
doc.add(new StringField("temp", String.valueOf(HUNDRED_DEGREES), Field.Store.NO));
|
||||
doc.add(new StringField("temp", String.valueOf(HUNDRED_TWENTY_DEGREES), Field.Store.NO));
|
||||
}
|
||||
|
||||
/** Counting documents which exactly match a given {@link FacetSet}. */
|
||||
private FacetResult exactMatching() throws IOException {
|
||||
try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
|
||||
|
@ -128,6 +147,59 @@ public class CustomFacetSetExample {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Counting documents which exactly match a given {@link FacetSet}. This example also demonstrates
|
||||
* how to use a fast match query to improve the counting efficiency by skipping over documents
|
||||
* which cannot possibly match a set.
|
||||
*/
|
||||
private FacetResult exactMatchingWithFastMatchQuery() throws IOException {
|
||||
try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
|
||||
// MatchAllDocsQuery is for "browsing" (counts facets
|
||||
// for all non-deleted docs in the index); normally
|
||||
// you'd use a "normal" query:
|
||||
FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
|
||||
|
||||
// Match documents whose "day" field is either "May 2022" or "July 2022"
|
||||
Query dateQuery =
|
||||
new TermInSetQuery(
|
||||
"day",
|
||||
Arrays.asList(
|
||||
new BytesRef(String.valueOf(MAY_SECOND_2022)),
|
||||
new BytesRef(String.valueOf(JULY_SECOND_2022))));
|
||||
// Match documents whose "temp" field is either "80" or "120" degrees
|
||||
Query temperatureQuery =
|
||||
new TermInSetQuery(
|
||||
"temp",
|
||||
Arrays.asList(
|
||||
new BytesRef(String.valueOf(HUNDRED_DEGREES)),
|
||||
new BytesRef(String.valueOf(HUNDRED_TWENTY_DEGREES))));
|
||||
// Documents must match both clauses
|
||||
Query fastMatchQuery =
|
||||
new BooleanQuery.Builder()
|
||||
.add(dateQuery, BooleanClause.Occur.MUST)
|
||||
.add(temperatureQuery, BooleanClause.Occur.MUST)
|
||||
.build();
|
||||
|
||||
// Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions
|
||||
Facets facets =
|
||||
new MatchingFacetSetsCounts(
|
||||
"temperature",
|
||||
fc,
|
||||
TemperatureReadingFacetSet::decodeTemperatureReading,
|
||||
fastMatchQuery,
|
||||
new ExactFacetSetMatcher(
|
||||
"May 2022 (100f)",
|
||||
new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)),
|
||||
new ExactFacetSetMatcher(
|
||||
"July 2022 (120f)",
|
||||
new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
|
||||
|
||||
// Retrieve results
|
||||
return facets.getAllChildren("temperature");
|
||||
}
|
||||
}
|
||||
/** Counting documents which match a certain degrees value for any date. */
|
||||
private FacetResult rangeMatching() throws IOException {
|
||||
try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
|
||||
|
@ -197,6 +269,12 @@ public class CustomFacetSetExample {
|
|||
return exactMatching();
|
||||
}
|
||||
|
||||
/** Runs the exact matching with fast match query example. */
|
||||
public FacetResult runExactMatchingWithFastMatchQuery() throws IOException {
|
||||
index();
|
||||
return exactMatchingWithFastMatchQuery();
|
||||
}
|
||||
|
||||
/** Runs the range matching example. */
|
||||
public FacetResult runRangeMatching() throws IOException {
|
||||
index();
|
||||
|
@ -218,6 +296,11 @@ public class CustomFacetSetExample {
|
|||
FacetResult result = example.runExactMatching();
|
||||
System.out.println("Temperature Reading: " + result);
|
||||
|
||||
System.out.println("Exact Facet Set matching with fast match query example:");
|
||||
System.out.println("-----------------------");
|
||||
result = example.runExactMatchingWithFastMatchQuery();
|
||||
System.out.println("Temperature Reading: " + result);
|
||||
|
||||
System.out.println("Range Facet Set matching example:");
|
||||
System.out.println("-----------------------");
|
||||
result = example.runRangeMatching();
|
||||
|
|
|
@ -36,6 +36,19 @@ public class TestCustomFacetSetExample extends LuceneTestCase {
|
|||
assertEquals(new LabelAndValue("July 2022 (120f)", 2), result.labelValues[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExactMatchingWithFastMatchQuery() throws Exception {
|
||||
FacetResult result = new CustomFacetSetExample().runExactMatchingWithFastMatchQuery();
|
||||
|
||||
assertEquals("temperature", result.dim);
|
||||
assertEquals(0, result.path.length);
|
||||
assertEquals(2, result.value);
|
||||
assertEquals(2, result.childCount);
|
||||
|
||||
assertEquals(new LabelAndValue("May 2022 (100f)", 1), result.labelValues[0]);
|
||||
assertEquals(new LabelAndValue("July 2022 (120f)", 2), result.labelValues[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRangeMatching() throws Exception {
|
||||
FacetResult result = new CustomFacetSetExample().runRangeMatching();
|
||||
|
|
|
@ -118,7 +118,14 @@ off-the-shelf `Int/FloatFacetSet` can use `FacetSetDecoder::decodeInts` and `Lon
|
|||
|
||||
A `Facets` implementation which counts how many documents are matched by each of a list of `FacetSetMatcher`, in
|
||||
addition to counting the total number of documents that were matched. It uses the `FacetSetDecoder` to decode the
|
||||
dimension values.
|
||||
dimension values. When there are many possible facet set combinations, you can improve the counting efficiency by
|
||||
passing a `fastMatchQuery` which will skip over documents which cannot be matched by any of the facet set matchers,
|
||||
e.g. because the target dimension values were not indexed for a document at all. For instance, in the <<movie-actors>>
|
||||
example there are many possible combinations of indexed `genre` and `year` facet sets. If one only wants to count the
|
||||
combinations of "Thriller + 2010", "Drama + 2010" and "Comedy + 2010", one can pass a `fastMatchQuery` which will match
|
||||
documents whose `genre` field contains one of the genres and their `releaseYear` field contains the value "2010". That
|
||||
way documents which match none of the "genre" values or the desired "releaseYear", will not even be evaluated by the
|
||||
facet set matchers.
|
||||
|
||||
== FacetSets Under the Hood
|
||||
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.facet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.search.ConjunctionUtils;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
|
||||
/**
|
||||
* Base class for facet counts. It allows for a query to be passed in to filter the match set.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class FacetCountsWithFilterQuery extends Facets {
|
||||
|
||||
/**
|
||||
* Optional: if specified, we first test this Query to see whether the document should be checked
|
||||
* for matching ranges. If this is null, all documents are checked.
|
||||
*/
|
||||
protected final Query fastMatchQuery;
|
||||
|
||||
/** Create {@code FacetCounts} */
|
||||
protected FacetCountsWithFilterQuery(Query fastMatchQuery) {
|
||||
this.fastMatchQuery = fastMatchQuery;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link DocIdSetIterator} from the provided {@code hits} that relies on {@code
|
||||
* fastMatchQuery} if available for first-pass filtering. If {@code iterators} is not empty then
|
||||
* all iterators are intersected. If any of the iterators is null, it indicates no documents will
|
||||
* be matched by it, and therefore no documents will be matched overall. A null response indicates
|
||||
* no documents will match.
|
||||
*/
|
||||
protected DocIdSetIterator createIterator(
|
||||
FacetsCollector.MatchingDocs hits, DocIdSetIterator... iterators) throws IOException {
|
||||
List<DocIdSetIterator> allIterators = new ArrayList<>();
|
||||
allIterators.add(hits.bits.iterator());
|
||||
allIterators.addAll(Arrays.asList(iterators));
|
||||
if (allIterators.stream().anyMatch(Objects::isNull)) {
|
||||
// if any of the iterators are null, there are no matching docs
|
||||
return null;
|
||||
}
|
||||
|
||||
if (fastMatchQuery != null) {
|
||||
final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(hits.context);
|
||||
final IndexSearcher searcher = new IndexSearcher(topLevelContext);
|
||||
searcher.setQueryCache(null);
|
||||
final Weight fastMatchWeight =
|
||||
searcher.createWeight(searcher.rewrite(fastMatchQuery), ScoreMode.COMPLETE_NO_SCORES, 1);
|
||||
final Scorer s = fastMatchWeight.scorer(hits.context);
|
||||
DocIdSetIterator fastMatchQueryIterator = s.iterator();
|
||||
if (fastMatchQueryIterator == null) {
|
||||
// no matching docs by the fast match query
|
||||
return null;
|
||||
} else {
|
||||
allIterators.add(fastMatchQueryIterator);
|
||||
}
|
||||
}
|
||||
|
||||
if (allIterators.size() == 1) {
|
||||
return allIterators.get(0);
|
||||
} else {
|
||||
return ConjunctionUtils.intersectIterators(allIterators);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -21,14 +21,14 @@ import java.util.Arrays;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.facet.FacetCountsWithFilterQuery;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.search.ConjunctionUtils;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
|
@ -36,7 +36,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class MatchingFacetSetsCounts extends Facets {
|
||||
public class MatchingFacetSetsCounts extends FacetCountsWithFilterQuery {
|
||||
|
||||
private final FacetSetMatcher[] facetSetMatchers;
|
||||
private final int[] counts;
|
||||
|
@ -54,6 +54,22 @@ public class MatchingFacetSetsCounts extends Facets {
|
|||
FacetSetDecoder facetSetDecoder,
|
||||
FacetSetMatcher... facetSetMatchers)
|
||||
throws IOException {
|
||||
this(field, hits, facetSetDecoder, null, facetSetMatchers);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new instance of matching facet set counts which calculates the counts for each
|
||||
* given facet set matcher. If {@code fastMatchQuery} is not {@code null}, then only documents
|
||||
* which are matched by it will be counted.
|
||||
*/
|
||||
public MatchingFacetSetsCounts(
|
||||
String field,
|
||||
FacetsCollector hits,
|
||||
FacetSetDecoder facetSetDecoder,
|
||||
Query fastMatchQuery,
|
||||
FacetSetMatcher... facetSetMatchers)
|
||||
throws IOException {
|
||||
super(fastMatchQuery);
|
||||
if (facetSetMatchers == null || facetSetMatchers.length == 0) {
|
||||
throw new IllegalArgumentException("facetSetMatchers cannot be null or empty");
|
||||
}
|
||||
|
@ -76,8 +92,7 @@ public class MatchingFacetSetsCounts extends Facets {
|
|||
|
||||
BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field);
|
||||
|
||||
final DocIdSetIterator it =
|
||||
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), binaryDocValues));
|
||||
final DocIdSetIterator it = createIterator(hits, binaryDocValues);
|
||||
if (it == null) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -17,25 +17,17 @@
|
|||
package org.apache.lucene.facet.range;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.facet.FacetCountsWithFilterQuery;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.ConjunctionUtils;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
|
@ -43,19 +35,13 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
abstract class RangeFacetCounts extends Facets {
|
||||
abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
|
||||
/** Ranges passed to constructor. */
|
||||
protected final Range[] ranges;
|
||||
|
||||
/** Counts, initialized in by subclass. */
|
||||
protected final int[] counts;
|
||||
|
||||
/**
|
||||
* Optional: if specified, we first test this Query to see whether the document should be checked
|
||||
* for matching ranges. If this is null, all documents are checked.
|
||||
*/
|
||||
protected final Query fastMatchQuery;
|
||||
|
||||
/** Our field name. */
|
||||
protected final String field;
|
||||
|
||||
|
@ -63,42 +49,13 @@ abstract class RangeFacetCounts extends Facets {
|
|||
protected int totCount;
|
||||
|
||||
/** Create {@code RangeFacetCounts} */
|
||||
protected RangeFacetCounts(String field, Range[] ranges, Query fastMatchQuery)
|
||||
throws IOException {
|
||||
protected RangeFacetCounts(String field, Range[] ranges, Query fastMatchQuery) {
|
||||
super(fastMatchQuery);
|
||||
this.field = field;
|
||||
this.ranges = ranges;
|
||||
this.fastMatchQuery = fastMatchQuery;
|
||||
counts = new int[ranges.length];
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link org.apache.lucene.search.DocIdSetIterator} from the provided {@code hits} that
|
||||
* relies on {@code fastMatchQuery} if available for first-pass filtering. A null response
|
||||
* indicates no documents will match.
|
||||
*/
|
||||
protected DocIdSetIterator createIterator(FacetsCollector.MatchingDocs hits) throws IOException {
|
||||
|
||||
if (fastMatchQuery != null) {
|
||||
|
||||
final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(hits.context);
|
||||
final IndexSearcher searcher = new IndexSearcher(topLevelContext);
|
||||
searcher.setQueryCache(null);
|
||||
final Weight fastMatchWeight =
|
||||
searcher.createWeight(searcher.rewrite(fastMatchQuery), ScoreMode.COMPLETE_NO_SCORES, 1);
|
||||
final Scorer s = fastMatchWeight.scorer(hits.context);
|
||||
if (s == null) {
|
||||
return null; // no hits from the fastMatchQuery; return null
|
||||
} else {
|
||||
DocIdSetIterator fastMatchDocs = s.iterator();
|
||||
return ConjunctionUtils.intersectIterators(
|
||||
Arrays.asList(hits.bits.iterator(), fastMatchDocs));
|
||||
}
|
||||
|
||||
} else {
|
||||
return hits.bits.iterator();
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract LongRange[] getLongRanges();
|
||||
|
||||
protected long mapDocValue(long l) {
|
||||
|
|
|
@ -17,16 +17,25 @@
|
|||
package org.apache.lucene.facet.facetset;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.facet.*;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
public class TestExactFacetSetMatcher extends FacetTestCase {
|
||||
|
||||
|
@ -330,4 +339,93 @@ public class TestExactFacetSetMatcher extends FacetTestCase {
|
|||
r.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
public void testLongFacetSetMatchingWithFastMatchQuery() throws Exception {
|
||||
Directory d = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), d);
|
||||
|
||||
List<LongFacetSet> allSets = new ArrayList<>();
|
||||
for (int manufacturerOrd : MANUFACTURER_ORDS) {
|
||||
for (int year : YEARS) {
|
||||
allSets.add(new LongFacetSet(manufacturerOrd, year));
|
||||
}
|
||||
}
|
||||
|
||||
int numFord2010 = 0;
|
||||
int numChevy2011 = 0;
|
||||
int numMatchingDocs = 0;
|
||||
for (int i = 0; i < 100; i++) {
|
||||
Document doc = new Document();
|
||||
int numSets = TestUtil.nextInt(random(), 1, 4);
|
||||
Collections.shuffle(allSets, random());
|
||||
LongFacetSet[] facetSets = allSets.subList(0, numSets).toArray(LongFacetSet[]::new);
|
||||
boolean matchingDoc = false;
|
||||
for (LongFacetSet facetSet : facetSets) {
|
||||
if (FORD_ORD == facetSet.values[0] && facetSet.values[1] == 2010) {
|
||||
++numFord2010;
|
||||
matchingDoc = true;
|
||||
} else if (CHEVY_ORD == facetSet.values[0] && facetSet.values[1] == 2011) {
|
||||
++numChevy2011;
|
||||
matchingDoc = true;
|
||||
}
|
||||
}
|
||||
numMatchingDocs += matchingDoc ? 1 : 0;
|
||||
doc.add(FacetSetsField.create("field", facetSets)); // field for aggregation
|
||||
// add fields for drill-down + fast matching
|
||||
addFastMatchField("manufacturer", doc, facetSets, 0);
|
||||
addFastMatchField("year", doc, facetSets, 1);
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
IndexSearcher s = newSearcher(r);
|
||||
FacetsCollector fc = s.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
|
||||
|
||||
Query fastMatchQuery =
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
createFastMatchQuery("manufacturer", FORD_ORD, CHEVY_ORD), BooleanClause.Occur.MUST)
|
||||
.add(createFastMatchQuery("year", 2010, 2011), BooleanClause.Occur.MUST)
|
||||
.build();
|
||||
Facets facets =
|
||||
new MatchingFacetSetsCounts(
|
||||
"field",
|
||||
fc,
|
||||
FacetSetDecoder::decodeLongs,
|
||||
fastMatchQuery,
|
||||
new ExactFacetSetMatcher("Ford 2010", new LongFacetSet(FORD_ORD, 2010)),
|
||||
new ExactFacetSetMatcher("Chevy 2011", new LongFacetSet(CHEVY_ORD, 2011)));
|
||||
|
||||
FacetResult result = facets.getAllChildren("field");
|
||||
|
||||
assertEquals("field", result.dim);
|
||||
assertEquals(0, result.path.length);
|
||||
assertEquals(numMatchingDocs, result.value);
|
||||
assertEquals(2, result.childCount);
|
||||
|
||||
assertEquals(new LabelAndValue("Ford 2010", numFord2010), result.labelValues[0]);
|
||||
assertEquals(new LabelAndValue("Chevy 2011", numChevy2011), result.labelValues[1]);
|
||||
|
||||
r.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
private static Query createFastMatchQuery(String field, int... values) {
|
||||
return new TermInSetQuery(
|
||||
field,
|
||||
Arrays.stream(values)
|
||||
.mapToObj(String::valueOf)
|
||||
.map(BytesRef::new)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
private static void addFastMatchField(
|
||||
String field, Document doc, LongFacetSet[] facetSets, int index) {
|
||||
Arrays.stream(facetSets)
|
||||
.map(facetSet -> facetSet.values[index])
|
||||
.distinct()
|
||||
.forEach(value -> doc.add(new StringField(field, String.valueOf(value), Field.Store.NO)));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,13 +17,19 @@
|
|||
package org.apache.lucene.facet.facetset;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.LongRange;
|
||||
import org.apache.lucene.facet.*;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
|
@ -389,6 +395,99 @@ public class TestRangeFacetSetMatcher extends FacetTestCase {
|
|||
d.close();
|
||||
}
|
||||
|
||||
public void testLongRangeFacetSetMatchingWithFastMatchQuery() throws Exception {
|
||||
Directory d = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), d);
|
||||
|
||||
List<LongFacetSet> allSets = new ArrayList<>();
|
||||
for (int manufacturerOrd : MANUFACTURER_ORDS) {
|
||||
for (int year : YEARS) {
|
||||
allSets.add(new LongFacetSet(manufacturerOrd, year));
|
||||
}
|
||||
}
|
||||
|
||||
int numFord2011_2013 = 0;
|
||||
int numFord2010_2014 = 0;
|
||||
int numFord2011_2014 = 0;
|
||||
int numFord2010_2013 = 0;
|
||||
int numMatchingDocs = 0;
|
||||
for (int i = 0; i < 100; i++) {
|
||||
Document doc = new Document();
|
||||
int numSets = TestUtil.nextInt(random(), 1, 4);
|
||||
Collections.shuffle(allSets, random());
|
||||
LongFacetSet[] facetSets = allSets.subList(0, numSets).toArray(LongFacetSet[]::new);
|
||||
boolean matchingDoc = false;
|
||||
for (LongFacetSet facetSet : facetSets) {
|
||||
if (FORD_ORD != facetSet.values[0]) {
|
||||
continue;
|
||||
}
|
||||
long year = facetSet.values[1];
|
||||
if (year > 2010 && year < 2014) {
|
||||
++numFord2010_2013;
|
||||
++numFord2010_2014;
|
||||
++numFord2011_2013;
|
||||
++numFord2011_2014;
|
||||
matchingDoc = true;
|
||||
} else if (year == 2014) {
|
||||
++numFord2010_2014;
|
||||
++numFord2011_2014;
|
||||
matchingDoc = true;
|
||||
} else if (year == 2010) {
|
||||
++numFord2010_2014;
|
||||
++numFord2010_2013;
|
||||
matchingDoc = true;
|
||||
}
|
||||
}
|
||||
numMatchingDocs += matchingDoc ? 1 : 0;
|
||||
doc.add(FacetSetsField.create("field", facetSets));
|
||||
// add fields for drill-down + fast matching
|
||||
addFastMatchField("manufacturer", doc, facetSets, 0);
|
||||
addFastMatchField("year", doc, facetSets, 1);
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
IndexSearcher s = newSearcher(r);
|
||||
FacetsCollector fc = s.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
|
||||
|
||||
Query fastMatchQuery =
|
||||
new BooleanQuery.Builder()
|
||||
.add(createFastMatchQuery("manufacturer", FORD_ORD, FORD_ORD), BooleanClause.Occur.MUST)
|
||||
.add(createFastMatchQuery("year", 2010, 2014), BooleanClause.Occur.MUST)
|
||||
.build();
|
||||
Facets facets =
|
||||
new MatchingFacetSetsCounts(
|
||||
"field",
|
||||
fc,
|
||||
FacetSetDecoder::decodeLongs,
|
||||
fastMatchQuery,
|
||||
new RangeFacetSetMatcher(
|
||||
"Ford [2010-2014]", singleLong(FORD_ORD), longRange(2010, true, 2014, true)),
|
||||
new RangeFacetSetMatcher(
|
||||
"Ford (2010-2014]", singleLong(FORD_ORD), longRange(2010, false, 2014, true)),
|
||||
new RangeFacetSetMatcher(
|
||||
"Ford [2010-2014)", singleLong(FORD_ORD), longRange(2010, true, 2014, false)),
|
||||
new RangeFacetSetMatcher(
|
||||
"Ford (2010-2014)", singleLong(FORD_ORD), longRange(2010, false, 2014, false)));
|
||||
|
||||
FacetResult result = facets.getAllChildren("field");
|
||||
|
||||
assertEquals("field", result.dim);
|
||||
assertEquals(0, result.path.length);
|
||||
assertEquals(numMatchingDocs, result.value);
|
||||
assertEquals(4, result.childCount);
|
||||
|
||||
assertEquals(new LabelAndValue("Ford [2010-2014]", numFord2010_2014), result.labelValues[0]);
|
||||
assertEquals(new LabelAndValue("Ford (2010-2014]", numFord2011_2014), result.labelValues[1]);
|
||||
assertEquals(new LabelAndValue("Ford [2010-2014)", numFord2010_2013), result.labelValues[2]);
|
||||
assertEquals(new LabelAndValue("Ford (2010-2014)", numFord2011_2013), result.labelValues[3]);
|
||||
|
||||
r.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
private static DimRange singleLong(long value) {
|
||||
return DimRange.fromLongs(value, true, value, true);
|
||||
}
|
||||
|
@ -415,4 +514,17 @@ public class TestRangeFacetSetMatcher extends FacetTestCase {
|
|||
float min, boolean minExclusive, float max, boolean maxExclusive) {
|
||||
return DimRange.fromFloats(min, minExclusive, max, maxExclusive);
|
||||
}
|
||||
|
||||
private static Query createFastMatchQuery(String field, long min, long max) {
|
||||
return LongRange.newIntersectsQuery(field, new long[] {min}, new long[] {max});
|
||||
}
|
||||
|
||||
private static void addFastMatchField(
|
||||
String field, Document doc, LongFacetSet[] facetSets, int index) {
|
||||
long min =
|
||||
Arrays.stream(facetSets).mapToLong(facetSet -> facetSet.values[index]).min().orElseThrow();
|
||||
long max =
|
||||
Arrays.stream(facetSets).mapToLong(facetSet -> facetSet.values[index]).max().orElseThrow();
|
||||
doc.add(new LongPoint(field, min, max));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue