mirror of https://github.com/apache/lucene.git
Add dynamic range facets (#13689)
This commit is contained in:
parent
634eff1851
commit
0d579338ee
|
@ -312,6 +312,13 @@ New Features
|
|||
|
||||
* GITHUB#13678: Add support JDK 23 to the Panama Vectorization Provider. (Chris Hegarty)
|
||||
|
||||
* GITHUB#13689: Add a new faceting feature, dynamic range facets, which automatically picks a balanced set of numeric
|
||||
ranges based on the distribution of values that occur across all hits. For use cases that have a highly variable
|
||||
numeric doc values field, such as "price" in an e-commerce application, this facet method is powerful as it allows the
|
||||
presented ranges to adapt depending on what hits the query actually matches. This is in contrast to existing range
|
||||
faceting that requires the application to provide the specific fixed ranges up front. (Yuting Gan, Greg Miller,
|
||||
Stefan Vodita)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -0,0 +1,158 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.demo.facet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsCollectorManager;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.range.DynamicRangeUtil;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LongValuesSource;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.NamedThreadFactory;
|
||||
|
||||
/**
|
||||
* Demo dynamic range faceting.
|
||||
*
|
||||
* <p>The results look like so: min: 63 max: 75 centroid: 69.000000 count: 2 weight: 137 min: 79
|
||||
* max: 96 centroid: 86.000000 count: 3 weight: 83
|
||||
*
|
||||
* <p>We've computed dynamic ranges over popularity weighted by number of books. We can read the
|
||||
* results as so: There are 137 books written by authors in the 63 to 75 popularity range.
|
||||
*
|
||||
* <p>How it works: We collect all the values (popularity) and their weights (book counts). We sort
|
||||
* the values and find the approximate weight per range. In this case the total weight is 220 (total
|
||||
* books by all authors) and we want 2 ranges, so we're aiming for 110 books in each range. We add
|
||||
* Chesterton to the first range, since he is the least popular author. He's written a lot of books,
|
||||
* the range's weight is 90. We add Tolstoy to the first range, since he is next in line of
|
||||
* popularity. He's written another 47 books, which brings the total weight to 137. We're over the
|
||||
* 110 target weight, so we stop and add everyone left to the second range.
|
||||
*/
|
||||
public class DynamicRangeFacetsExample {
|
||||
|
||||
private final Directory indexDir = new ByteBuffersDirectory();
|
||||
private final FacetsConfig config = new FacetsConfig();
|
||||
|
||||
/** Empty constructor */
|
||||
public DynamicRangeFacetsExample() {}
|
||||
|
||||
/** Build the example index. */
|
||||
private void index() throws IOException {
|
||||
IndexWriter indexWriter =
|
||||
new IndexWriter(
|
||||
indexDir,
|
||||
new IndexWriterConfig(new WhitespaceAnalyzer())
|
||||
.setOpenMode(IndexWriterConfig.OpenMode.CREATE));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("Author", "J. R. R. Tolkien", Field.Store.NO));
|
||||
doc.add(new NumericDocValuesField("Popularity", 96));
|
||||
doc.add(new NumericDocValuesField("Books", 24));
|
||||
indexWriter.addDocument(config.build(doc));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("Author", "C. S. Lewis", Field.Store.NO));
|
||||
doc.add(new NumericDocValuesField("Popularity", 83));
|
||||
doc.add(new NumericDocValuesField("Books", 48));
|
||||
indexWriter.addDocument(config.build(doc));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("Author", "G. K. Chesterton", Field.Store.NO));
|
||||
doc.add(new NumericDocValuesField("Popularity", 63));
|
||||
doc.add(new NumericDocValuesField("Books", 90));
|
||||
indexWriter.addDocument(config.build(doc));
|
||||
indexWriter.commit();
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("Author", "Fyodor Dostoevsky", Field.Store.NO));
|
||||
doc.add(new NumericDocValuesField("Popularity", 79));
|
||||
doc.add(new NumericDocValuesField("Books", 11));
|
||||
indexWriter.addDocument(config.build(doc));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("Author", "Leo Tolstoy", Field.Store.NO));
|
||||
doc.add(new NumericDocValuesField("Popularity", 75));
|
||||
doc.add(new NumericDocValuesField("Books", 47));
|
||||
indexWriter.addDocument(config.build(doc));
|
||||
|
||||
indexWriter.close();
|
||||
}
|
||||
|
||||
/** User runs a query and counts facets. */
|
||||
private List<DynamicRangeUtil.DynamicRangeInfo> search() throws IOException {
|
||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
|
||||
LongValuesSource valuesSource = LongValuesSource.fromLongField("Popularity");
|
||||
LongValuesSource weightsSource = LongValuesSource.fromLongField("Books");
|
||||
|
||||
// Aggregates the facet counts
|
||||
FacetsCollectorManager fcm = new FacetsCollectorManager();
|
||||
|
||||
// MatchAllDocsQuery is for "browsing" (counts facets
|
||||
// for all non-deleted docs in the index); normally
|
||||
// you'd use a "normal" query:
|
||||
FacetsCollector fc =
|
||||
FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();
|
||||
|
||||
try (ExecutorService executor =
|
||||
Executors.newFixedThreadPool(2, new NamedThreadFactory("dynamic-ranges"))) {
|
||||
// We ask for 2 ranges over popularity weighted by book count
|
||||
return DynamicRangeUtil.computeDynamicRanges(
|
||||
"Books", weightsSource, valuesSource, fc, 2, executor);
|
||||
}
|
||||
}
|
||||
|
||||
/** Runs the search example. */
|
||||
public List<DynamicRangeUtil.DynamicRangeInfo> runSearch() throws IOException {
|
||||
index();
|
||||
return search();
|
||||
}
|
||||
|
||||
/** Runs the search example and prints the results. */
|
||||
public static void main(String[] args) throws Exception {
|
||||
System.out.println("Dynamic range facets example:");
|
||||
System.out.println("-----------------------");
|
||||
DynamicRangeFacetsExample example = new DynamicRangeFacetsExample();
|
||||
List<DynamicRangeUtil.DynamicRangeInfo> results = example.runSearch();
|
||||
for (DynamicRangeUtil.DynamicRangeInfo range : results) {
|
||||
System.out.printf(
|
||||
Locale.ROOT,
|
||||
"min: %d max: %d centroid: %f count: %d weight: %d%n",
|
||||
range.min(),
|
||||
range.max(),
|
||||
range.centroid(),
|
||||
range.count(),
|
||||
range.weight());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -385,6 +385,12 @@
|
|||
* <p>Sampling support is implemented in {@link
|
||||
* org.apache.lucene.facet.RandomSamplingFacetsCollector}.
|
||||
*
|
||||
* <h3 id="drf">Dynamic Range Facets</h3>
|
||||
*
|
||||
* We can build ranges over numeric fields and count the number of values falling in each range. The
|
||||
* values can be weighted and the number of desired ranges can be specified. To see an example,
|
||||
* check {@link org.apache.lucene.demo.facet.DynamicRangeFacetsExample}.
|
||||
*
|
||||
* <h2 id="concurrent_indexing_search">Concurrent Indexing and Search</h2>
|
||||
*
|
||||
* <p>Sometimes, indexing is done once, and when the index is fully prepared, searching starts.
|
||||
|
|
|
@ -215,6 +215,17 @@ by score (i.e. relevance).</p>
|
|||
a more complete set of embeddings is needed to get reasonable results.
|
||||
</p>
|
||||
</div>
|
||||
<h2 id="Faceting" class="boxed">Working with facets</h2>
|
||||
<div class="section">
|
||||
<p>Lucene also provides aggregation capabilities over the index, e.g. counting results across a category
|
||||
(<a href="src-html/org/apache/lucene/demo/facet/SimpleFacetsExample.html">SimpleFacetsExample</a>),
|
||||
computing expressions (<a href=
|
||||
"src-html/org/apache/lucene/demo/facet/ExpressionAggregationFacetsExample.html">
|
||||
ExpressionAggregationFacetsExample</a>), dynamic ranges (<a href=
|
||||
"src-html/org/apache/lucene/demo/facet/DynamicRangeFacetsExample.html">DynamicRangeFacetsExample</a>).
|
||||
For more details, see the dedicated
|
||||
<a href="org/apache/lucene/demo/facet/package-summary.html">faceting guide</a>.
|
||||
</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.demo.facet;
|
||||
|
||||
import java.util.List;
|
||||
import org.apache.lucene.facet.range.DynamicRangeUtil;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestDynamicRangeFacetsExample extends LuceneTestCase {
|
||||
@Test
|
||||
public void testExample() throws Exception {
|
||||
List<DynamicRangeUtil.DynamicRangeInfo> res = new DynamicRangeFacetsExample().runSearch();
|
||||
assertEquals(
|
||||
List.of(
|
||||
new DynamicRangeUtil.DynamicRangeInfo(2, 137, 63, 75, 69d),
|
||||
new DynamicRangeUtil.DynamicRangeInfo(3, 83, 79, 96, 86)),
|
||||
res);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,277 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.facet.range;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.LongValues;
|
||||
import org.apache.lucene.search.LongValuesSource;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
|
||||
/**
|
||||
* Methods to create dynamic ranges for numeric fields.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class DynamicRangeUtil {
|
||||
|
||||
private DynamicRangeUtil() {}
|
||||
|
||||
/**
|
||||
* Construct dynamic ranges using the specified weight field to generate equi-weight range for the
|
||||
* specified numeric bin field
|
||||
*
|
||||
* @param weightFieldName Name of the specified weight field
|
||||
* @param weightValueSource Value source of the weight field
|
||||
* @param fieldValueSource Value source of the value field
|
||||
* @param facetsCollector FacetsCollector
|
||||
* @param topN Number of requested ranges
|
||||
* @param exec An executor service that is used to do the computation
|
||||
* @return A list of DynamicRangeInfo that contains count, relevance, min, max, and centroid for
|
||||
* each range
|
||||
*/
|
||||
public static List<DynamicRangeInfo> computeDynamicRanges(
|
||||
String weightFieldName,
|
||||
LongValuesSource weightValueSource,
|
||||
LongValuesSource fieldValueSource,
|
||||
FacetsCollector facetsCollector,
|
||||
int topN,
|
||||
ExecutorService exec)
|
||||
throws IOException {
|
||||
|
||||
List<FacetsCollector.MatchingDocs> matchingDocsList = facetsCollector.getMatchingDocs();
|
||||
int totalDoc = matchingDocsList.stream().mapToInt(matchingDoc -> matchingDoc.totalHits).sum();
|
||||
long[] values = new long[totalDoc];
|
||||
long[] weights = new long[totalDoc];
|
||||
long totalWeight = 0;
|
||||
int overallLength = 0;
|
||||
|
||||
List<Future<?>> futures = new ArrayList<>();
|
||||
List<SegmentTask> tasks = new ArrayList<>();
|
||||
for (FacetsCollector.MatchingDocs matchingDocs : matchingDocsList) {
|
||||
if (matchingDocs.totalHits > 0) {
|
||||
SegmentOutput segmentOutput = new SegmentOutput(matchingDocs.totalHits);
|
||||
|
||||
// [1] retrieve values and associated weights concurrently
|
||||
SegmentTask task =
|
||||
new SegmentTask(matchingDocs, fieldValueSource, weightValueSource, segmentOutput);
|
||||
tasks.add(task);
|
||||
futures.add(exec.submit(task));
|
||||
}
|
||||
}
|
||||
|
||||
// [2] wait for all segment runs to finish
|
||||
for (Future<?> future : futures) {
|
||||
try {
|
||||
future.get();
|
||||
} catch (InterruptedException ie) {
|
||||
throw new RuntimeException(ie);
|
||||
} catch (ExecutionException ee) {
|
||||
IOUtils.rethrowAlways(ee.getCause());
|
||||
}
|
||||
}
|
||||
|
||||
// [3] merge the segment value and weight arrays into one array respectively and update the
|
||||
// total weights
|
||||
// and valid value length
|
||||
for (SegmentTask task : tasks) {
|
||||
SegmentOutput curSegmentOutput = task.segmentOutput;
|
||||
// if segment total weight overflows, return null
|
||||
if (curSegmentOutput == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
assert curSegmentOutput.values.length == curSegmentOutput.weights.length;
|
||||
|
||||
try {
|
||||
totalWeight = Math.addExact(curSegmentOutput.segmentTotalWeight, totalWeight);
|
||||
} catch (ArithmeticException ae) {
|
||||
throw new IllegalArgumentException(
|
||||
"weight field \"" + weightFieldName + "\": long totalWeight value out of bounds", ae);
|
||||
}
|
||||
|
||||
int currSegmentLen = curSegmentOutput.segmentIdx;
|
||||
System.arraycopy(curSegmentOutput.values, 0, values, overallLength, currSegmentLen);
|
||||
System.arraycopy(curSegmentOutput.weights, 0, weights, overallLength, currSegmentLen);
|
||||
overallLength += currSegmentLen;
|
||||
}
|
||||
return computeDynamicNumericRanges(values, weights, overallLength, totalWeight, topN);
|
||||
}
|
||||
|
||||
private static class SegmentTask implements Callable<Void> {
|
||||
private final FacetsCollector.MatchingDocs matchingDocs;
|
||||
private final DocIdSetIterator matchingParentDocsItr;
|
||||
private final LongValuesSource fieldValueSource;
|
||||
private final LongValuesSource weightValueSource;
|
||||
private SegmentOutput segmentOutput;
|
||||
|
||||
SegmentTask(
|
||||
FacetsCollector.MatchingDocs matchingDocs,
|
||||
LongValuesSource fieldValueSource,
|
||||
LongValuesSource weightValueSource,
|
||||
SegmentOutput segmentOutput)
|
||||
throws IOException {
|
||||
this.matchingDocs = matchingDocs;
|
||||
this.matchingParentDocsItr = matchingDocs.bits.iterator();
|
||||
this.fieldValueSource = fieldValueSource;
|
||||
this.weightValueSource = weightValueSource;
|
||||
this.segmentOutput = segmentOutput;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Void call() throws Exception {
|
||||
LongValues fieldValue = fieldValueSource.getValues(matchingDocs.context, null);
|
||||
LongValues weightValue = weightValueSource.getValues(matchingDocs.context, null);
|
||||
for (int doc = matchingParentDocsItr.nextDoc();
|
||||
doc != DocIdSetIterator.NO_MORE_DOCS;
|
||||
doc = matchingParentDocsItr.nextDoc()) {
|
||||
// If this doc doesn't have a weight, we skip it.
|
||||
if (fieldValue.advanceExact(doc) == false || weightValue.advanceExact(doc) == false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
long curValue = fieldValue.longValue();
|
||||
|
||||
long curWeight = weightValue.longValue();
|
||||
// We skip weights equal to zero, otherwise they can skew the ranges.
|
||||
// Imagine all the weights were zero - any ranges would be valid.
|
||||
if (curWeight == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
segmentOutput.values[segmentOutput.segmentIdx] = curValue;
|
||||
segmentOutput.weights[segmentOutput.segmentIdx] = curWeight;
|
||||
try {
|
||||
segmentOutput.segmentTotalWeight =
|
||||
Math.addExact(segmentOutput.segmentTotalWeight, curWeight);
|
||||
} catch (ArithmeticException ae) {
|
||||
throw new IllegalArgumentException("segment long totalWeight value out of bounds", ae);
|
||||
}
|
||||
segmentOutput.segmentIdx++;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Holds field value array, weight array, totalWeight, valid value index for each segment */
|
||||
private static final class SegmentOutput {
|
||||
private final long[] values;
|
||||
private final long[] weights;
|
||||
private long segmentTotalWeight = 0;
|
||||
private int segmentIdx = 0;
|
||||
|
||||
public SegmentOutput(int hitsLength) {
|
||||
this.values = new long[hitsLength];
|
||||
this.weights = new long[hitsLength];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute dynamic numeric ranges using weights.
|
||||
*
|
||||
* @param values an array that contains the values of matching documents
|
||||
* @param weights an array that contains the weights of matching documents
|
||||
* @param len actual length of values and weights
|
||||
* @param totalWeight the sum of weight values
|
||||
* @param topN the requested top-n parameter
|
||||
* @return A list of DynamicRangeInfo that contains count, relevance, min, max, and centroid
|
||||
* values for each range. The size of dynamic ranges may not be exactly equal to top-N. top-N
|
||||
* is used to compute the equi-weight per bin.
|
||||
*/
|
||||
public static List<DynamicRangeInfo> computeDynamicNumericRanges(
|
||||
long[] values, long[] weights, int len, long totalWeight, int topN) {
|
||||
assert values.length == weights.length && len <= values.length && len >= 0;
|
||||
assert topN >= 0;
|
||||
List<DynamicRangeInfo> dynamicRangeResult = new ArrayList<>();
|
||||
if (len == 0 || topN == 0) {
|
||||
return dynamicRangeResult;
|
||||
}
|
||||
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected int compare(int index1, int index2) {
|
||||
int cmp = Long.compare(values[index1], values[index2]);
|
||||
if (cmp == 0) {
|
||||
// If the values are equal, sort based on the weights.
|
||||
// Any weight order is correct as long as it's deterministic.
|
||||
return Long.compare(weights[index1], weights[index2]);
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int index1, int index2) {
|
||||
long tmp = values[index1];
|
||||
values[index1] = values[index2];
|
||||
values[index2] = tmp;
|
||||
tmp = weights[index1];
|
||||
weights[index1] = weights[index2];
|
||||
weights[index2] = tmp;
|
||||
}
|
||||
}.sort(0, len);
|
||||
|
||||
long accuWeight = 0;
|
||||
long valueSum = 0;
|
||||
int count = 0;
|
||||
int minIdx = 0;
|
||||
|
||||
double rangeWeightTarget = (double) totalWeight / Math.min(topN, len);
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
accuWeight += weights[i];
|
||||
valueSum += values[i];
|
||||
count++;
|
||||
|
||||
if (accuWeight >= rangeWeightTarget) {
|
||||
dynamicRangeResult.add(
|
||||
new DynamicRangeInfo(
|
||||
count, accuWeight, values[minIdx], values[i], (double) valueSum / count));
|
||||
count = 0;
|
||||
accuWeight = 0;
|
||||
valueSum = 0;
|
||||
minIdx = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// capture the remaining values to create the last range
|
||||
if (minIdx < len) {
|
||||
dynamicRangeResult.add(
|
||||
new DynamicRangeInfo(
|
||||
count, accuWeight, values[minIdx], values[len - 1], (double) valueSum / count));
|
||||
}
|
||||
return dynamicRangeResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds parameters of a dynamic numeric range.
|
||||
*
|
||||
* @param count the number of items in the range
|
||||
* @param weight the summed weight of the items in the range
|
||||
* @param min the lower bound of the range (inclusive)
|
||||
* @param max the upper bound of the range (inclusive)
|
||||
* @param centroid the average value in the range
|
||||
*/
|
||||
public record DynamicRangeInfo(int count, long weight, long min, long max, double centroid) {}
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.facet.range;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
|
||||
public class TestDynamicRangeUtil extends LuceneTestCase {
|
||||
public void testComputeDynamicNumericRangesBasic() {
|
||||
List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new ArrayList<>();
|
||||
long[] values = new long[1000];
|
||||
long[] weights = new long[1000];
|
||||
|
||||
long totalWeight = 0;
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
values[i] = i + 1;
|
||||
weights[i] = i;
|
||||
totalWeight += i;
|
||||
}
|
||||
|
||||
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(501, 125250L, 1L, 501L, 251D));
|
||||
expectedRangeInfoList.add(
|
||||
new DynamicRangeUtil.DynamicRangeInfo(207, 125028L, 502L, 708L, 605D));
|
||||
expectedRangeInfoList.add(
|
||||
new DynamicRangeUtil.DynamicRangeInfo(159, 125133L, 709L, 867L, 788D));
|
||||
expectedRangeInfoList.add(
|
||||
new DynamicRangeUtil.DynamicRangeInfo(133, 124089L, 868L, 1000L, 934D));
|
||||
assertDynamicNumericRangeResults(values, weights, 4, totalWeight, expectedRangeInfoList);
|
||||
}
|
||||
|
||||
public void testComputeDynamicNumericRangesWithSameValues() {
|
||||
List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new ArrayList<>();
|
||||
long totalWeight = 0;
|
||||
long[] values = new long[100];
|
||||
long[] weights = new long[100];
|
||||
for (int i = 0; i < 100; i++) {
|
||||
values[i] = 50;
|
||||
weights[i] = i;
|
||||
totalWeight += i;
|
||||
}
|
||||
|
||||
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(51, 1275L, 50L, 50L, 50D));
|
||||
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(21, 1281L, 50L, 50L, 50D));
|
||||
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(16, 1272L, 50L, 50L, 50D));
|
||||
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(12, 1122L, 50L, 50L, 50D));
|
||||
|
||||
assertDynamicNumericRangeResults(values, weights, 4, totalWeight, expectedRangeInfoList);
|
||||
}
|
||||
|
||||
public void testComputeDynamicNumericRangesWithOneValue() {
|
||||
long[] values = new long[] {50};
|
||||
long[] weights = new long[] {1};
|
||||
List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new ArrayList<>();
|
||||
|
||||
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(1, 1L, 50L, 50L, 50D));
|
||||
assertDynamicNumericRangeResults(values, weights, 4, 1, expectedRangeInfoList);
|
||||
}
|
||||
|
||||
public void testComputeDynamicNumericRangesWithOneLargeWeight() {
|
||||
List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new ArrayList<>();
|
||||
long[] values = new long[] {45, 32, 52, 14, 455, 342, 53};
|
||||
long[] weights = new long[] {143, 23, 1, 52343, 53, 12, 2534};
|
||||
|
||||
// value 14 has its own bin since the weight is large, and the rest of values fall the other bin
|
||||
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(1, 52343, 14L, 14L, 14D));
|
||||
expectedRangeInfoList.add(
|
||||
new DynamicRangeUtil.DynamicRangeInfo(6, 2766, 32L, 455L, 163.16666666666666D));
|
||||
assertDynamicNumericRangeResults(values, weights, 4, 55109, expectedRangeInfoList);
|
||||
}
|
||||
|
||||
private static void assertDynamicNumericRangeResults(
|
||||
long[] values,
|
||||
long[] weights,
|
||||
int topN,
|
||||
long totalWeight,
|
||||
List<DynamicRangeUtil.DynamicRangeInfo> expectedDynamicRangeResult) {
|
||||
List<DynamicRangeUtil.DynamicRangeInfo> mockDynamicRangeResult =
|
||||
DynamicRangeUtil.computeDynamicNumericRanges(
|
||||
values, weights, values.length, totalWeight, topN);
|
||||
assertTrue(compareDynamicRangeResult(mockDynamicRangeResult, expectedDynamicRangeResult));
|
||||
}
|
||||
|
||||
private static boolean compareDynamicRangeResult(
|
||||
List<DynamicRangeUtil.DynamicRangeInfo> mockResult,
|
||||
List<DynamicRangeUtil.DynamicRangeInfo> expectedResult) {
|
||||
return mockResult.size() == expectedResult.size() && mockResult.containsAll(expectedResult);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue