From 0d579338eee8ef29ef8a222e87d987ebb8cd7577 Mon Sep 17 00:00:00 2001 From: Stefan Vodita <41467371+stefanvodita@users.noreply.github.com> Date: Tue, 10 Sep 2024 17:24:28 +0100 Subject: [PATCH] Add dynamic range facets (#13689) --- lucene/CHANGES.txt | 7 + .../demo/facet/DynamicRangeFacetsExample.java | 158 ++++++++++ .../lucene/demo/facet/package-info.java | 6 + lucene/demo/src/java/overview.html | 13 +- .../facet/TestDynamicRangeFacetsExample.java | 34 +++ .../lucene/facet/range/DynamicRangeUtil.java | 277 ++++++++++++++++++ .../facet/range/TestDynamicRangeUtil.java | 103 +++++++ 7 files changed, 597 insertions(+), 1 deletion(-) create mode 100644 lucene/demo/src/java/org/apache/lucene/demo/facet/DynamicRangeFacetsExample.java create mode 100644 lucene/demo/src/test/org/apache/lucene/demo/facet/TestDynamicRangeFacetsExample.java create mode 100644 lucene/facet/src/java/org/apache/lucene/facet/range/DynamicRangeUtil.java create mode 100644 lucene/facet/src/test/org/apache/lucene/facet/range/TestDynamicRangeUtil.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 12f7b960ccb..016ac462976 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -312,6 +312,13 @@ New Features * GITHUB#13678: Add support JDK 23 to the Panama Vectorization Provider. (Chris Hegarty) +* GITHUB#13689: Add a new faceting feature, dynamic range facets, which automatically picks a balanced set of numeric + ranges based on the distribution of values that occur across all hits. For use cases that have a highly variable + numeric doc values field, such as "price" in an e-commerce application, this facet method is powerful as it allows the + presented ranges to adapt depending on what hits the query actually matches. This is in contrast to existing range + faceting that requires the application to provide the specific fixed ranges up front. (Yuting Gan, Greg Miller, + Stefan Vodita) + Improvements --------------------- diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/DynamicRangeFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/DynamicRangeFacetsExample.java new file mode 100644 index 00000000000..5b188d42927 --- /dev/null +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/DynamicRangeFacetsExample.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.demo.facet; + +import java.io.IOException; +import java.util.List; +import java.util.Locale; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.FacetsCollectorManager; +import org.apache.lucene.facet.FacetsConfig; +import org.apache.lucene.facet.range.DynamicRangeUtil; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LongValuesSource; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.NamedThreadFactory; + +/** + * Demo dynamic range faceting. + * + *

The results look like so: min: 63 max: 75 centroid: 69.000000 count: 2 weight: 137 min: 79 + * max: 96 centroid: 86.000000 count: 3 weight: 83 + * + *

We've computed dynamic ranges over popularity weighted by number of books. We can read the + * results as so: There are 137 books written by authors in the 63 to 75 popularity range. + * + *

How it works: We collect all the values (popularity) and their weights (book counts). We sort + * the values and find the approximate weight per range. In this case the total weight is 220 (total + * books by all authors) and we want 2 ranges, so we're aiming for 110 books in each range. We add + * Chesterton to the first range, since he is the least popular author. He's written a lot of books, + * the range's weight is 90. We add Tolstoy to the first range, since he is next in line of + * popularity. He's written another 47 books, which brings the total weight to 137. We're over the + * 110 target weight, so we stop and add everyone left to the second range. + */ +public class DynamicRangeFacetsExample { + + private final Directory indexDir = new ByteBuffersDirectory(); + private final FacetsConfig config = new FacetsConfig(); + + /** Empty constructor */ + public DynamicRangeFacetsExample() {} + + /** Build the example index. */ + private void index() throws IOException { + IndexWriter indexWriter = + new IndexWriter( + indexDir, + new IndexWriterConfig(new WhitespaceAnalyzer()) + .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); + + Document doc = new Document(); + doc.add(new StringField("Author", "J. R. R. Tolkien", Field.Store.NO)); + doc.add(new NumericDocValuesField("Popularity", 96)); + doc.add(new NumericDocValuesField("Books", 24)); + indexWriter.addDocument(config.build(doc)); + + doc = new Document(); + doc.add(new StringField("Author", "C. S. Lewis", Field.Store.NO)); + doc.add(new NumericDocValuesField("Popularity", 83)); + doc.add(new NumericDocValuesField("Books", 48)); + indexWriter.addDocument(config.build(doc)); + + doc = new Document(); + doc.add(new StringField("Author", "G. K. Chesterton", Field.Store.NO)); + doc.add(new NumericDocValuesField("Popularity", 63)); + doc.add(new NumericDocValuesField("Books", 90)); + indexWriter.addDocument(config.build(doc)); + indexWriter.commit(); + + doc = new Document(); + doc.add(new StringField("Author", "Fyodor Dostoevsky", Field.Store.NO)); + doc.add(new NumericDocValuesField("Popularity", 79)); + doc.add(new NumericDocValuesField("Books", 11)); + indexWriter.addDocument(config.build(doc)); + + doc = new Document(); + doc.add(new StringField("Author", "Leo Tolstoy", Field.Store.NO)); + doc.add(new NumericDocValuesField("Popularity", 75)); + doc.add(new NumericDocValuesField("Books", 47)); + indexWriter.addDocument(config.build(doc)); + + indexWriter.close(); + } + + /** User runs a query and counts facets. */ + private List search() throws IOException { + DirectoryReader indexReader = DirectoryReader.open(indexDir); + IndexSearcher searcher = new IndexSearcher(indexReader); + + LongValuesSource valuesSource = LongValuesSource.fromLongField("Popularity"); + LongValuesSource weightsSource = LongValuesSource.fromLongField("Books"); + + // Aggregates the facet counts + FacetsCollectorManager fcm = new FacetsCollectorManager(); + + // MatchAllDocsQuery is for "browsing" (counts facets + // for all non-deleted docs in the index); normally + // you'd use a "normal" query: + FacetsCollector fc = + FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector(); + + try (ExecutorService executor = + Executors.newFixedThreadPool(2, new NamedThreadFactory("dynamic-ranges"))) { + // We ask for 2 ranges over popularity weighted by book count + return DynamicRangeUtil.computeDynamicRanges( + "Books", weightsSource, valuesSource, fc, 2, executor); + } + } + + /** Runs the search example. */ + public List runSearch() throws IOException { + index(); + return search(); + } + + /** Runs the search example and prints the results. */ + public static void main(String[] args) throws Exception { + System.out.println("Dynamic range facets example:"); + System.out.println("-----------------------"); + DynamicRangeFacetsExample example = new DynamicRangeFacetsExample(); + List results = example.runSearch(); + for (DynamicRangeUtil.DynamicRangeInfo range : results) { + System.out.printf( + Locale.ROOT, + "min: %d max: %d centroid: %f count: %d weight: %d%n", + range.min(), + range.max(), + range.centroid(), + range.count(), + range.weight()); + } + } +} diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/package-info.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/package-info.java index 0e0713e18d2..2fbf29999ce 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/package-info.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/package-info.java @@ -385,6 +385,12 @@ *

Sampling support is implemented in {@link * org.apache.lucene.facet.RandomSamplingFacetsCollector}. * + *

Dynamic Range Facets

+ * + * We can build ranges over numeric fields and count the number of values falling in each range. The + * values can be weighted and the number of desired ranges can be specified. To see an example, + * check {@link org.apache.lucene.demo.facet.DynamicRangeFacetsExample}. + * * * *

Sometimes, indexing is done once, and when the index is fully prepared, searching starts. diff --git a/lucene/demo/src/java/overview.html b/lucene/demo/src/java/overview.html index fd62e2b45f7..f904aca8380 100644 --- a/lucene/demo/src/java/overview.html +++ b/lucene/demo/src/java/overview.html @@ -215,6 +215,17 @@ by score (i.e. relevance).

a more complete set of embeddings is needed to get reasonable results.

+

Working with facets

+
+

Lucene also provides aggregation capabilities over the index, e.g. counting results across a category + (SimpleFacetsExample), + computing expressions ( + ExpressionAggregationFacetsExample), dynamic ranges (DynamicRangeFacetsExample). + For more details, see the dedicated + faceting guide. +

+
- diff --git a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestDynamicRangeFacetsExample.java b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestDynamicRangeFacetsExample.java new file mode 100644 index 00000000000..8724f2dcf27 --- /dev/null +++ b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestDynamicRangeFacetsExample.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.demo.facet; + +import java.util.List; +import org.apache.lucene.facet.range.DynamicRangeUtil; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.junit.Test; + +public class TestDynamicRangeFacetsExample extends LuceneTestCase { + @Test + public void testExample() throws Exception { + List res = new DynamicRangeFacetsExample().runSearch(); + assertEquals( + List.of( + new DynamicRangeUtil.DynamicRangeInfo(2, 137, 63, 75, 69d), + new DynamicRangeUtil.DynamicRangeInfo(3, 83, 79, 96, 86)), + res); + } +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/DynamicRangeUtil.java b/lucene/facet/src/java/org/apache/lucene/facet/range/DynamicRangeUtil.java new file mode 100644 index 00000000000..9d55b64eb55 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/DynamicRangeUtil.java @@ -0,0 +1,277 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.range; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.LongValues; +import org.apache.lucene.search.LongValuesSource; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.InPlaceMergeSorter; + +/** + * Methods to create dynamic ranges for numeric fields. + * + * @lucene.experimental + */ +public final class DynamicRangeUtil { + + private DynamicRangeUtil() {} + + /** + * Construct dynamic ranges using the specified weight field to generate equi-weight range for the + * specified numeric bin field + * + * @param weightFieldName Name of the specified weight field + * @param weightValueSource Value source of the weight field + * @param fieldValueSource Value source of the value field + * @param facetsCollector FacetsCollector + * @param topN Number of requested ranges + * @param exec An executor service that is used to do the computation + * @return A list of DynamicRangeInfo that contains count, relevance, min, max, and centroid for + * each range + */ + public static List computeDynamicRanges( + String weightFieldName, + LongValuesSource weightValueSource, + LongValuesSource fieldValueSource, + FacetsCollector facetsCollector, + int topN, + ExecutorService exec) + throws IOException { + + List matchingDocsList = facetsCollector.getMatchingDocs(); + int totalDoc = matchingDocsList.stream().mapToInt(matchingDoc -> matchingDoc.totalHits).sum(); + long[] values = new long[totalDoc]; + long[] weights = new long[totalDoc]; + long totalWeight = 0; + int overallLength = 0; + + List> futures = new ArrayList<>(); + List tasks = new ArrayList<>(); + for (FacetsCollector.MatchingDocs matchingDocs : matchingDocsList) { + if (matchingDocs.totalHits > 0) { + SegmentOutput segmentOutput = new SegmentOutput(matchingDocs.totalHits); + + // [1] retrieve values and associated weights concurrently + SegmentTask task = + new SegmentTask(matchingDocs, fieldValueSource, weightValueSource, segmentOutput); + tasks.add(task); + futures.add(exec.submit(task)); + } + } + + // [2] wait for all segment runs to finish + for (Future future : futures) { + try { + future.get(); + } catch (InterruptedException ie) { + throw new RuntimeException(ie); + } catch (ExecutionException ee) { + IOUtils.rethrowAlways(ee.getCause()); + } + } + + // [3] merge the segment value and weight arrays into one array respectively and update the + // total weights + // and valid value length + for (SegmentTask task : tasks) { + SegmentOutput curSegmentOutput = task.segmentOutput; + // if segment total weight overflows, return null + if (curSegmentOutput == null) { + return null; + } + + assert curSegmentOutput.values.length == curSegmentOutput.weights.length; + + try { + totalWeight = Math.addExact(curSegmentOutput.segmentTotalWeight, totalWeight); + } catch (ArithmeticException ae) { + throw new IllegalArgumentException( + "weight field \"" + weightFieldName + "\": long totalWeight value out of bounds", ae); + } + + int currSegmentLen = curSegmentOutput.segmentIdx; + System.arraycopy(curSegmentOutput.values, 0, values, overallLength, currSegmentLen); + System.arraycopy(curSegmentOutput.weights, 0, weights, overallLength, currSegmentLen); + overallLength += currSegmentLen; + } + return computeDynamicNumericRanges(values, weights, overallLength, totalWeight, topN); + } + + private static class SegmentTask implements Callable { + private final FacetsCollector.MatchingDocs matchingDocs; + private final DocIdSetIterator matchingParentDocsItr; + private final LongValuesSource fieldValueSource; + private final LongValuesSource weightValueSource; + private SegmentOutput segmentOutput; + + SegmentTask( + FacetsCollector.MatchingDocs matchingDocs, + LongValuesSource fieldValueSource, + LongValuesSource weightValueSource, + SegmentOutput segmentOutput) + throws IOException { + this.matchingDocs = matchingDocs; + this.matchingParentDocsItr = matchingDocs.bits.iterator(); + this.fieldValueSource = fieldValueSource; + this.weightValueSource = weightValueSource; + this.segmentOutput = segmentOutput; + } + + @Override + public Void call() throws Exception { + LongValues fieldValue = fieldValueSource.getValues(matchingDocs.context, null); + LongValues weightValue = weightValueSource.getValues(matchingDocs.context, null); + for (int doc = matchingParentDocsItr.nextDoc(); + doc != DocIdSetIterator.NO_MORE_DOCS; + doc = matchingParentDocsItr.nextDoc()) { + // If this doc doesn't have a weight, we skip it. + if (fieldValue.advanceExact(doc) == false || weightValue.advanceExact(doc) == false) { + continue; + } + + long curValue = fieldValue.longValue(); + + long curWeight = weightValue.longValue(); + // We skip weights equal to zero, otherwise they can skew the ranges. + // Imagine all the weights were zero - any ranges would be valid. + if (curWeight == 0) { + continue; + } + + segmentOutput.values[segmentOutput.segmentIdx] = curValue; + segmentOutput.weights[segmentOutput.segmentIdx] = curWeight; + try { + segmentOutput.segmentTotalWeight = + Math.addExact(segmentOutput.segmentTotalWeight, curWeight); + } catch (ArithmeticException ae) { + throw new IllegalArgumentException("segment long totalWeight value out of bounds", ae); + } + segmentOutput.segmentIdx++; + } + return null; + } + } + + /** Holds field value array, weight array, totalWeight, valid value index for each segment */ + private static final class SegmentOutput { + private final long[] values; + private final long[] weights; + private long segmentTotalWeight = 0; + private int segmentIdx = 0; + + public SegmentOutput(int hitsLength) { + this.values = new long[hitsLength]; + this.weights = new long[hitsLength]; + } + } + + /** + * Compute dynamic numeric ranges using weights. + * + * @param values an array that contains the values of matching documents + * @param weights an array that contains the weights of matching documents + * @param len actual length of values and weights + * @param totalWeight the sum of weight values + * @param topN the requested top-n parameter + * @return A list of DynamicRangeInfo that contains count, relevance, min, max, and centroid + * values for each range. The size of dynamic ranges may not be exactly equal to top-N. top-N + * is used to compute the equi-weight per bin. + */ + public static List computeDynamicNumericRanges( + long[] values, long[] weights, int len, long totalWeight, int topN) { + assert values.length == weights.length && len <= values.length && len >= 0; + assert topN >= 0; + List dynamicRangeResult = new ArrayList<>(); + if (len == 0 || topN == 0) { + return dynamicRangeResult; + } + + new InPlaceMergeSorter() { + @Override + protected int compare(int index1, int index2) { + int cmp = Long.compare(values[index1], values[index2]); + if (cmp == 0) { + // If the values are equal, sort based on the weights. + // Any weight order is correct as long as it's deterministic. + return Long.compare(weights[index1], weights[index2]); + } + return cmp; + } + + @Override + protected void swap(int index1, int index2) { + long tmp = values[index1]; + values[index1] = values[index2]; + values[index2] = tmp; + tmp = weights[index1]; + weights[index1] = weights[index2]; + weights[index2] = tmp; + } + }.sort(0, len); + + long accuWeight = 0; + long valueSum = 0; + int count = 0; + int minIdx = 0; + + double rangeWeightTarget = (double) totalWeight / Math.min(topN, len); + + for (int i = 0; i < len; i++) { + accuWeight += weights[i]; + valueSum += values[i]; + count++; + + if (accuWeight >= rangeWeightTarget) { + dynamicRangeResult.add( + new DynamicRangeInfo( + count, accuWeight, values[minIdx], values[i], (double) valueSum / count)); + count = 0; + accuWeight = 0; + valueSum = 0; + minIdx = i + 1; + } + } + + // capture the remaining values to create the last range + if (minIdx < len) { + dynamicRangeResult.add( + new DynamicRangeInfo( + count, accuWeight, values[minIdx], values[len - 1], (double) valueSum / count)); + } + return dynamicRangeResult; + } + + /** + * Holds parameters of a dynamic numeric range. + * + * @param count the number of items in the range + * @param weight the summed weight of the items in the range + * @param min the lower bound of the range (inclusive) + * @param max the upper bound of the range (inclusive) + * @param centroid the average value in the range + */ + public record DynamicRangeInfo(int count, long weight, long min, long max, double centroid) {} +} diff --git a/lucene/facet/src/test/org/apache/lucene/facet/range/TestDynamicRangeUtil.java b/lucene/facet/src/test/org/apache/lucene/facet/range/TestDynamicRangeUtil.java new file mode 100644 index 00000000000..db78b03e6e3 --- /dev/null +++ b/lucene/facet/src/test/org/apache/lucene/facet/range/TestDynamicRangeUtil.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.range; + +import java.util.ArrayList; +import java.util.List; +import org.apache.lucene.tests.util.LuceneTestCase; + +public class TestDynamicRangeUtil extends LuceneTestCase { + public void testComputeDynamicNumericRangesBasic() { + List expectedRangeInfoList = new ArrayList<>(); + long[] values = new long[1000]; + long[] weights = new long[1000]; + + long totalWeight = 0; + for (int i = 0; i < 1000; i++) { + values[i] = i + 1; + weights[i] = i; + totalWeight += i; + } + + expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(501, 125250L, 1L, 501L, 251D)); + expectedRangeInfoList.add( + new DynamicRangeUtil.DynamicRangeInfo(207, 125028L, 502L, 708L, 605D)); + expectedRangeInfoList.add( + new DynamicRangeUtil.DynamicRangeInfo(159, 125133L, 709L, 867L, 788D)); + expectedRangeInfoList.add( + new DynamicRangeUtil.DynamicRangeInfo(133, 124089L, 868L, 1000L, 934D)); + assertDynamicNumericRangeResults(values, weights, 4, totalWeight, expectedRangeInfoList); + } + + public void testComputeDynamicNumericRangesWithSameValues() { + List expectedRangeInfoList = new ArrayList<>(); + long totalWeight = 0; + long[] values = new long[100]; + long[] weights = new long[100]; + for (int i = 0; i < 100; i++) { + values[i] = 50; + weights[i] = i; + totalWeight += i; + } + + expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(51, 1275L, 50L, 50L, 50D)); + expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(21, 1281L, 50L, 50L, 50D)); + expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(16, 1272L, 50L, 50L, 50D)); + expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(12, 1122L, 50L, 50L, 50D)); + + assertDynamicNumericRangeResults(values, weights, 4, totalWeight, expectedRangeInfoList); + } + + public void testComputeDynamicNumericRangesWithOneValue() { + long[] values = new long[] {50}; + long[] weights = new long[] {1}; + List expectedRangeInfoList = new ArrayList<>(); + + expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(1, 1L, 50L, 50L, 50D)); + assertDynamicNumericRangeResults(values, weights, 4, 1, expectedRangeInfoList); + } + + public void testComputeDynamicNumericRangesWithOneLargeWeight() { + List expectedRangeInfoList = new ArrayList<>(); + long[] values = new long[] {45, 32, 52, 14, 455, 342, 53}; + long[] weights = new long[] {143, 23, 1, 52343, 53, 12, 2534}; + + // value 14 has its own bin since the weight is large, and the rest of values fall the other bin + expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(1, 52343, 14L, 14L, 14D)); + expectedRangeInfoList.add( + new DynamicRangeUtil.DynamicRangeInfo(6, 2766, 32L, 455L, 163.16666666666666D)); + assertDynamicNumericRangeResults(values, weights, 4, 55109, expectedRangeInfoList); + } + + private static void assertDynamicNumericRangeResults( + long[] values, + long[] weights, + int topN, + long totalWeight, + List expectedDynamicRangeResult) { + List mockDynamicRangeResult = + DynamicRangeUtil.computeDynamicNumericRanges( + values, weights, values.length, totalWeight, topN); + assertTrue(compareDynamicRangeResult(mockDynamicRangeResult, expectedDynamicRangeResult)); + } + + private static boolean compareDynamicRangeResult( + List mockResult, + List expectedResult) { + return mockResult.size() == expectedResult.size() && mockResult.containsAll(expectedResult); + } +}