GITHUB#11742: MatchingFacetSetsCounts#getTopChildren now returns top children instead of all children (#11764)

This commit is contained in:
Greg Miller 2022-09-13 06:50:52 -07:00 committed by GitHub
parent e491ef797c
commit 4463a0b271
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 173 additions and 1 deletions

View File

@ -85,6 +85,11 @@ Other
======================== Lucene 9.5.0 =======================
API Changes
---------------------
* GITHUB#11742: MatchingFacetSetsCounts#getTopChildren now properly returns "top" children instead
of all children. (Greg Miller)
Bug Fixes
---------------------

View File

@ -30,6 +30,7 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
/**
* Returns the counts for each given {@link FacetSet}
@ -156,7 +157,45 @@ public class MatchingFacetSetsCounts extends FacetCountsWithFilterQuery {
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
validateTopN(topN);
return getAllChildren(dim, path);
topN = Math.min(topN, counts.length);
PriorityQueue<Entry> pq =
new PriorityQueue<>(topN, () -> new Entry("", 0)) {
@Override
protected boolean lessThan(Entry a, Entry b) {
return compare(a.count, b.count, a.label, b.label) < 0;
}
};
int childCount = 0;
Entry reuse = pq.top();
for (int i = 0; i < counts.length; i++) {
int count = counts[i];
if (count > 0) {
childCount++;
String label = facetSetMatchers[i].label;
if (compare(reuse.count, count, reuse.label, label) < 0) {
reuse.label = label;
reuse.count = count;
reuse = pq.updateTop();
}
}
}
// Pop off any sentinel values in the case that we had fewer child labels with non-zero
// counts than the requested top-n:
while (childCount < pq.size()) {
pq.pop();
}
LabelAndValue[] labelValues = new LabelAndValue[Math.min(topN, childCount)];
for (int i = pq.size() - 1; i >= 0; i--) {
Entry e = pq.pop();
labelValues[i] = new LabelAndValue(e.label, e.count);
}
return new FacetResult(dim, path, totCount, labelValues, childCount);
}
@Override
@ -176,4 +215,22 @@ public class MatchingFacetSetsCounts extends FacetCountsWithFilterQuery {
return Arrays.stream(facetSetMatchers)
.anyMatch(facetSetMatcher -> facetSetMatcher.dims != dims);
}
private static int compare(int count1, int count2, String label1, String label2) {
int cmp = Integer.compare(count1, count2);
if (cmp == 0) {
cmp = label2.compareTo(label1);
}
return cmp;
}
private static final class Entry {
String label;
int count;
Entry(String label, int count) {
this.label = label;
this.count = count;
}
}
}

View File

@ -16,19 +16,30 @@
*/
package org.apache.lucene.facet.facetset;
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import java.io.IOException;
import java.util.Locale;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollectorManager;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InPlaceMergeSorter;
public class TestMatchingFacetSetsCounts extends FacetTestCase {
private static final int FORD_ORD = 100;
private static final int TOYOTA_ORD = 101;
private static final int CHEVY_ORD = 102;
private static final int NISSAN_ORD = 103;
private static final int[] MANUFACTURER_ORDS = {FORD_ORD, TOYOTA_ORD, CHEVY_ORD, NISSAN_ORD};
public void testInvalidTopN() throws IOException {
Directory d = newDirectory();
@ -87,4 +98,103 @@ public class TestMatchingFacetSetsCounts extends FacetTestCase {
r.close();
d.close();
}
public void testTopChildren() throws Exception {
Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d);
// As a test scenario, we're faceting on the number of vehicles produced per day/make
// combination over the past 30 days:
final int numBins = 30;
final int[] expectedCounts = new int[numBins * MANUFACTURER_ORDS.length];
FacetSetMatcher[] facetSetMatchers = new FacetSetMatcher[numBins * MANUFACTURER_ORDS.length];
int totalDocs = 0;
int totalNonZeroBins = 0;
int index = 0;
for (int i = 0; i < numBins; i++) {
for (int ord : MANUFACTURER_ORDS) {
facetSetMatchers[index] =
new ExactFacetSetMatcher(
String.format(Locale.ROOT, "%d:%d", i, ord), new LongFacetSet(i, ord));
int carsManufactured = RandomNumbers.randomIntBetween(random(), 0, 100);
for (int k = 0; k < carsManufactured; k++) {
// Create a document for every vehicle produced:
Document doc = new Document();
doc.add(FacetSetsField.create("field", new LongFacetSet(i, ord)));
w.addDocument(doc);
}
if (carsManufactured > 0) {
totalNonZeroBins++;
}
totalDocs += carsManufactured;
expectedCounts[index] = carsManufactured;
index++;
}
}
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
FacetsCollector fc = s.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
Facets facets =
new MatchingFacetSetsCounts("field", fc, FacetSetDecoder::decodeLongs, facetSetMatchers);
// Sort by count (high-to-low) and tie-break on label, same as in
// MatchingFacetCounts#getTopChildren:
final int[] originalIndexes = new int[expectedCounts.length];
for (int i = 0; i < originalIndexes.length; i++) {
originalIndexes[i] = i;
}
new InPlaceMergeSorter() {
@Override
protected int compare(int i, int j) {
int cmp = Integer.compare(expectedCounts[j], expectedCounts[i]);
if (cmp == 0) {
int dayBinI = originalIndexes[i] / MANUFACTURER_ORDS.length;
int dayBinJ = originalIndexes[j] / MANUFACTURER_ORDS.length;
int ordIndexI = originalIndexes[i] % MANUFACTURER_ORDS.length;
int ordIndexJ = originalIndexes[j] % MANUFACTURER_ORDS.length;
String labelI =
String.format(Locale.ROOT, "%d:%d", dayBinI, MANUFACTURER_ORDS[ordIndexI]);
String labelJ =
String.format(Locale.ROOT, "%d:%d", dayBinJ, MANUFACTURER_ORDS[ordIndexJ]);
cmp = new BytesRef(labelI).compareTo(new BytesRef(labelJ));
}
return cmp;
}
@Override
protected void swap(int i, int j) {
int tmp = expectedCounts[i];
expectedCounts[i] = expectedCounts[j];
expectedCounts[j] = tmp;
tmp = originalIndexes[i];
originalIndexes[i] = originalIndexes[j];
originalIndexes[j] = tmp;
}
}.sort(0, expectedCounts.length);
final int topN = 10;
final LabelAndValue[] expected = new LabelAndValue[topN];
for (int i = 0; i < topN; i++) {
int count = expectedCounts[i];
int dayBin = originalIndexes[i] / MANUFACTURER_ORDS.length;
int ordIndex = originalIndexes[i] % MANUFACTURER_ORDS.length;
expected[i] =
new LabelAndValue(
String.format(Locale.ROOT, "%d:%d", dayBin, MANUFACTURER_ORDS[ordIndex]), count);
}
final FacetResult result = facets.getTopChildren(topN, "field");
assertFacetResult(result, "field", new String[0], totalNonZeroBins, totalDocs, expected);
r.close();
d.close();
}
}