Added a top-n range faceting example (#1035)

This commit is contained in:
Yuting Gan 2022-09-08 12:19:42 -07:00 committed by GitHub
parent 09a13aeaf2
commit 49b596ef02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 66 additions and 0 deletions

View File

@ -52,6 +52,8 @@ Improvements
* LUCENE-10614: Properly support getTopChildren in RangeFacetCounts. (Yuting Gan)
* LUCENE-10652: Add a top-n range faceting example to RangeFacetsExample. (Yuting Gan)
Optimizations
---------------------
(No changes)

View File

@ -18,10 +18,13 @@ package org.apache.lucene.demo.facet;
import java.io.Closeable;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.DrillDownQuery;
import org.apache.lucene.facet.DrillSideways;
import org.apache.lucene.facet.FacetResult;
@ -45,6 +48,7 @@ public class RangeFacetsExample implements Closeable {
private final Directory indexDir = new ByteBuffersDirectory();
private IndexSearcher searcher;
private LongRange[] logTimestampRanges = new LongRange[168];
private final long nowSec = System.currentTimeMillis() / 1000L;
final LongRange PAST_HOUR = new LongRange("Past hour", nowSec - 3600, true, nowSec, true);
@ -73,6 +77,31 @@ public class RangeFacetsExample implements Closeable {
indexWriter.addDocument(doc);
}
// Add documents with a fake timestamp for the past 7 days (24 * 7 = 168 hours), 3600 sec (1
// hour) from "now", 7200 sec (2 hours) from "now", ...:
long startTime = 0;
for (int i = 0; i < 168; i++) {
long endTime = (i + 1) * 3600;
// Choose a relatively large number, e,g., "35", to create variation in count for
// the top n children, so that calling getTopChildren(10) can return top 10 children with
// different counts
for (int j = 0; j < i % 35; j++) {
Document doc = new Document();
Random r = new Random();
// Randomly generate a timestamp within the current range
long randomTimestamp = r.nextLong(1, endTime - startTime) + startTime;
// Add as doc values field, so we can compute range facets:
doc.add(new NumericDocValuesField("error timestamp", randomTimestamp));
doc.add(
new StringField(
"error message", "server encountered error at " + randomTimestamp, Field.Store.NO));
indexWriter.addDocument(doc);
}
logTimestampRanges[i] =
new LongRange("Hour " + i + "-" + (i + 1), startTime, false, endTime, true);
startTime = endTime;
}
// Open near-real-time searcher
searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
indexWriter.close();
@ -97,6 +126,21 @@ public class RangeFacetsExample implements Closeable {
return facets.getAllChildren("timestamp");
}
/** User runs a query and counts facets. */
public FacetResult searchTopChildren() throws IOException {
// Aggregates the facet counts
FacetsCollector fc = new FacetsCollector();
// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query:
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
Facets facets = new LongRangeFacetCounts("error timestamp", fc, logTimestampRanges);
return facets.getTopChildren(10, "error timestamp");
}
/** User drills down on the specified range. */
public TopDocs drillDown(LongRange range) throws IOException {
@ -152,6 +196,11 @@ public class RangeFacetsExample implements Closeable {
System.out.println("-----------------------");
System.out.println(example.search());
System.out.println("\n");
System.out.println("Facet counting example:");
System.out.println("-----------------------");
System.out.println(example.searchTopChildren());
System.out.println("\n");
System.out.println("Facet drill-down example (timestamp/Past six hours):");
System.out.println("---------------------------------------------");

View File

@ -31,6 +31,21 @@ public class TestRangeFacetsExample extends LuceneTestCase {
assertEquals(
"dim=timestamp path=[] value=87 childCount=3\n Past hour (4)\n Past six hours (22)\n Past day (87)\n",
result.toString());
result = example.searchTopChildren();
assertEquals(
"dim=error timestamp path=[] value=2758 childCount=163\n"
+ " Hour 104-105 (34)\n"
+ " Hour 139-140 (34)\n"
+ " Hour 34-35 (34)\n"
+ " Hour 69-70 (34)\n"
+ " Hour 103-104 (33)\n"
+ " Hour 138-139 (33)\n"
+ " Hour 33-34 (33)\n"
+ " Hour 68-69 (33)\n"
+ " Hour 102-103 (32)\n"
+ " Hour 137-138 (32)\n",
result.toString());
example.close();
}