mirror of https://github.com/apache/lucene.git
GITHUB#11742: MatchingFacetSetsCounts#getTopChildren now returns top children instead of all children (#11764)
This commit is contained in:
parent
e491ef797c
commit
4463a0b271
|
@ -85,6 +85,11 @@ Other
|
|||
|
||||
======================== Lucene 9.5.0 =======================
|
||||
|
||||
API Changes
|
||||
---------------------
|
||||
* GITHUB#11742: MatchingFacetSetsCounts#getTopChildren now properly returns "top" children instead
|
||||
of all children. (Greg Miller)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* Returns the counts for each given {@link FacetSet}
|
||||
|
@ -156,7 +157,45 @@ public class MatchingFacetSetsCounts extends FacetCountsWithFilterQuery {
|
|||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
validateTopN(topN);
|
||||
return getAllChildren(dim, path);
|
||||
|
||||
topN = Math.min(topN, counts.length);
|
||||
|
||||
PriorityQueue<Entry> pq =
|
||||
new PriorityQueue<>(topN, () -> new Entry("", 0)) {
|
||||
@Override
|
||||
protected boolean lessThan(Entry a, Entry b) {
|
||||
return compare(a.count, b.count, a.label, b.label) < 0;
|
||||
}
|
||||
};
|
||||
|
||||
int childCount = 0;
|
||||
Entry reuse = pq.top();
|
||||
for (int i = 0; i < counts.length; i++) {
|
||||
int count = counts[i];
|
||||
if (count > 0) {
|
||||
childCount++;
|
||||
String label = facetSetMatchers[i].label;
|
||||
if (compare(reuse.count, count, reuse.label, label) < 0) {
|
||||
reuse.label = label;
|
||||
reuse.count = count;
|
||||
reuse = pq.updateTop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pop off any sentinel values in the case that we had fewer child labels with non-zero
|
||||
// counts than the requested top-n:
|
||||
while (childCount < pq.size()) {
|
||||
pq.pop();
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[Math.min(topN, childCount)];
|
||||
for (int i = pq.size() - 1; i >= 0; i--) {
|
||||
Entry e = pq.pop();
|
||||
labelValues[i] = new LabelAndValue(e.label, e.count);
|
||||
}
|
||||
|
||||
return new FacetResult(dim, path, totCount, labelValues, childCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -176,4 +215,22 @@ public class MatchingFacetSetsCounts extends FacetCountsWithFilterQuery {
|
|||
return Arrays.stream(facetSetMatchers)
|
||||
.anyMatch(facetSetMatcher -> facetSetMatcher.dims != dims);
|
||||
}
|
||||
|
||||
private static int compare(int count1, int count2, String label1, String label2) {
|
||||
int cmp = Integer.compare(count1, count2);
|
||||
if (cmp == 0) {
|
||||
cmp = label2.compareTo(label1);
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
|
||||
private static final class Entry {
|
||||
String label;
|
||||
int count;
|
||||
|
||||
Entry(String label, int count) {
|
||||
this.label = label;
|
||||
this.count = count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,19 +16,30 @@
|
|||
*/
|
||||
package org.apache.lucene.facet.facetset;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsCollectorManager;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
|
||||
public class TestMatchingFacetSetsCounts extends FacetTestCase {
|
||||
private static final int FORD_ORD = 100;
|
||||
private static final int TOYOTA_ORD = 101;
|
||||
private static final int CHEVY_ORD = 102;
|
||||
private static final int NISSAN_ORD = 103;
|
||||
private static final int[] MANUFACTURER_ORDS = {FORD_ORD, TOYOTA_ORD, CHEVY_ORD, NISSAN_ORD};
|
||||
|
||||
public void testInvalidTopN() throws IOException {
|
||||
Directory d = newDirectory();
|
||||
|
@ -87,4 +98,103 @@ public class TestMatchingFacetSetsCounts extends FacetTestCase {
|
|||
r.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
public void testTopChildren() throws Exception {
|
||||
Directory d = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), d);
|
||||
|
||||
// As a test scenario, we're faceting on the number of vehicles produced per day/make
|
||||
// combination over the past 30 days:
|
||||
final int numBins = 30;
|
||||
|
||||
final int[] expectedCounts = new int[numBins * MANUFACTURER_ORDS.length];
|
||||
FacetSetMatcher[] facetSetMatchers = new FacetSetMatcher[numBins * MANUFACTURER_ORDS.length];
|
||||
|
||||
int totalDocs = 0;
|
||||
int totalNonZeroBins = 0;
|
||||
int index = 0;
|
||||
for (int i = 0; i < numBins; i++) {
|
||||
for (int ord : MANUFACTURER_ORDS) {
|
||||
facetSetMatchers[index] =
|
||||
new ExactFacetSetMatcher(
|
||||
String.format(Locale.ROOT, "%d:%d", i, ord), new LongFacetSet(i, ord));
|
||||
|
||||
int carsManufactured = RandomNumbers.randomIntBetween(random(), 0, 100);
|
||||
for (int k = 0; k < carsManufactured; k++) {
|
||||
// Create a document for every vehicle produced:
|
||||
Document doc = new Document();
|
||||
doc.add(FacetSetsField.create("field", new LongFacetSet(i, ord)));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
if (carsManufactured > 0) {
|
||||
totalNonZeroBins++;
|
||||
}
|
||||
totalDocs += carsManufactured;
|
||||
expectedCounts[index] = carsManufactured;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
IndexSearcher s = newSearcher(r);
|
||||
FacetsCollector fc = s.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
|
||||
|
||||
Facets facets =
|
||||
new MatchingFacetSetsCounts("field", fc, FacetSetDecoder::decodeLongs, facetSetMatchers);
|
||||
|
||||
// Sort by count (high-to-low) and tie-break on label, same as in
|
||||
// MatchingFacetCounts#getTopChildren:
|
||||
final int[] originalIndexes = new int[expectedCounts.length];
|
||||
for (int i = 0; i < originalIndexes.length; i++) {
|
||||
originalIndexes[i] = i;
|
||||
}
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
int cmp = Integer.compare(expectedCounts[j], expectedCounts[i]);
|
||||
if (cmp == 0) {
|
||||
int dayBinI = originalIndexes[i] / MANUFACTURER_ORDS.length;
|
||||
int dayBinJ = originalIndexes[j] / MANUFACTURER_ORDS.length;
|
||||
int ordIndexI = originalIndexes[i] % MANUFACTURER_ORDS.length;
|
||||
int ordIndexJ = originalIndexes[j] % MANUFACTURER_ORDS.length;
|
||||
String labelI =
|
||||
String.format(Locale.ROOT, "%d:%d", dayBinI, MANUFACTURER_ORDS[ordIndexI]);
|
||||
String labelJ =
|
||||
String.format(Locale.ROOT, "%d:%d", dayBinJ, MANUFACTURER_ORDS[ordIndexJ]);
|
||||
cmp = new BytesRef(labelI).compareTo(new BytesRef(labelJ));
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
int tmp = expectedCounts[i];
|
||||
expectedCounts[i] = expectedCounts[j];
|
||||
expectedCounts[j] = tmp;
|
||||
tmp = originalIndexes[i];
|
||||
originalIndexes[i] = originalIndexes[j];
|
||||
originalIndexes[j] = tmp;
|
||||
}
|
||||
}.sort(0, expectedCounts.length);
|
||||
|
||||
final int topN = 10;
|
||||
final LabelAndValue[] expected = new LabelAndValue[topN];
|
||||
for (int i = 0; i < topN; i++) {
|
||||
int count = expectedCounts[i];
|
||||
int dayBin = originalIndexes[i] / MANUFACTURER_ORDS.length;
|
||||
int ordIndex = originalIndexes[i] % MANUFACTURER_ORDS.length;
|
||||
expected[i] =
|
||||
new LabelAndValue(
|
||||
String.format(Locale.ROOT, "%d:%d", dayBin, MANUFACTURER_ORDS[ordIndex]), count);
|
||||
}
|
||||
|
||||
final FacetResult result = facets.getTopChildren(topN, "field");
|
||||
assertFacetResult(result, "field", new String[0], totalNonZeroBins, totalDocs, expected);
|
||||
|
||||
r.close();
|
||||
d.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue