mirror of https://github.com/apache/lucene.git
LUCENE-7519: add optimized implementations for browse-only facets
This commit is contained in:
parent
b7aa582dff
commit
0782b09571
|
@ -45,6 +45,9 @@ Optimizations
|
|||
that have a facet value, so sparse faceting works as expected
|
||||
(Adrien Grand via Mike McCandless)
|
||||
|
||||
* LUCENE-7519: Add optimized APIs to compute browse-only top level
|
||||
facets (Mike McCandless)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
|
||||
|
|
|
@ -36,7 +36,8 @@ import org.apache.lucene.index.SortedSetDocValues;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Default implementation of {@link SortedSetDocValuesFacetCounts}
|
||||
* Default implementation of {@link SortedSetDocValuesFacetCounts}. You must ensure the original
|
||||
* {@link IndexReader} passed to the constructor is not closed whenever you use this class!
|
||||
*/
|
||||
public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesReaderState {
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.facet.sortedset;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
@ -33,11 +34,15 @@ import org.apache.lucene.facet.TopOrdAndIntQueue;
|
|||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.search.ConjunctionDISI;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
|
||||
|
@ -77,6 +82,17 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
count(hits.getMatchingDocs());
|
||||
}
|
||||
|
||||
/** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */
|
||||
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state)
|
||||
throws IOException {
|
||||
this.state = state;
|
||||
this.field = state.getField();
|
||||
dv = state.getDocValues();
|
||||
counts = new int[state.getSize()];
|
||||
//System.out.println("field=" + field);
|
||||
countAll();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
if (topN <= 0) {
|
||||
|
@ -176,7 +192,8 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
continue;
|
||||
}
|
||||
|
||||
DocIdSetIterator docs = hits.bits.iterator();
|
||||
DocIdSetIterator it = ConjunctionDISI.intersectIterators(Arrays.asList(
|
||||
hits.bits.iterator(), segValues));
|
||||
|
||||
// TODO: yet another option is to count all segs
|
||||
// first, only in seg-ord space, and then do a
|
||||
|
@ -196,10 +213,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
if (hits.totalHits < numSegOrds/10) {
|
||||
//System.out.println(" remap as-we-go");
|
||||
// Remap every ord to global ord as we iterate:
|
||||
int doc;
|
||||
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
//System.out.println(" doc=" + doc);
|
||||
if (segValues.advanceExact(doc)) {
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
int term = (int) segValues.nextOrd();
|
||||
while (term != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
//System.out.println(" segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term));
|
||||
|
@ -207,16 +221,12 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
term = (int) segValues.nextOrd();
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//System.out.println(" count in seg ord first");
|
||||
|
||||
// First count in seg-ord space:
|
||||
final int[] segCounts = new int[numSegOrds];
|
||||
int doc;
|
||||
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
//System.out.println(" doc=" + doc);
|
||||
if (segValues.advanceExact(doc)) {
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
int term = (int) segValues.nextOrd();
|
||||
while (term != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
//System.out.println(" ord=" + term);
|
||||
|
@ -224,7 +234,6 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
term = (int) segValues.nextOrd();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Then, migrate to global ords:
|
||||
for(int ord=0;ord<numSegOrds;ord++) {
|
||||
|
@ -238,9 +247,76 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
} else {
|
||||
// No ord mapping (e.g., single segment index):
|
||||
// just aggregate directly into counts:
|
||||
int doc;
|
||||
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (segValues.advanceExact(doc)) {
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
int term = (int) segValues.nextOrd();
|
||||
while (term != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
counts[term]++;
|
||||
term = (int) segValues.nextOrd();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Does all the "real work" of tallying up the counts. */
|
||||
private final void countAll() throws IOException {
|
||||
//System.out.println("ssdv count");
|
||||
|
||||
MultiDocValues.OrdinalMap ordinalMap;
|
||||
|
||||
// TODO: is this right? really, we need a way to
|
||||
// verify that this ordinalMap "matches" the leaves in
|
||||
// matchingDocs...
|
||||
if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
|
||||
ordinalMap = ((MultiSortedSetDocValues) dv).mapping;
|
||||
} else {
|
||||
ordinalMap = null;
|
||||
}
|
||||
|
||||
IndexReader origReader = state.getOrigReader();
|
||||
|
||||
for(LeafReaderContext context : origReader.leaves()) {
|
||||
|
||||
LeafReader reader = context.reader();
|
||||
|
||||
SortedSetDocValues segValues = reader.getSortedSetDocValues(field);
|
||||
if (segValues == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Bits liveDocs = reader.getLiveDocs();
|
||||
|
||||
if (ordinalMap != null) {
|
||||
final LongValues ordMap = ordinalMap.getGlobalOrds(context.ord);
|
||||
|
||||
int numSegOrds = (int) segValues.getValueCount();
|
||||
|
||||
// First count in seg-ord space:
|
||||
final int[] segCounts = new int[numSegOrds];
|
||||
int docID;
|
||||
while ((docID = segValues.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (liveDocs == null || liveDocs.get(docID)) {
|
||||
int term = (int) segValues.nextOrd();
|
||||
while (term != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
segCounts[term]++;
|
||||
term = (int) segValues.nextOrd();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Then, migrate to global ords:
|
||||
for(int ord=0;ord<numSegOrds;ord++) {
|
||||
int count = segCounts[ord];
|
||||
if (count != 0) {
|
||||
counts[(int) ordMap.get(ord)] += count;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No ord mapping (e.g., single segment index):
|
||||
// just aggregate directly into counts:
|
||||
int docID;
|
||||
while ((docID = segValues.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (liveDocs == null || liveDocs.get(docID)) {
|
||||
int term = (int) segValues.nextOrd();
|
||||
while (term != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
counts[term]++;
|
||||
|
|
|
@ -24,8 +24,12 @@ import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
|||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.ConjunctionDISI;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Computes facets counts, assuming the default encoding
|
||||
|
@ -50,6 +54,16 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
|||
count(fc.getMatchingDocs());
|
||||
}
|
||||
|
||||
/** Create {@code FastTaxonomyFacetCounts}, using the
|
||||
* specified {@code indexFieldName} for ordinals, and
|
||||
* counting all non-deleted documents in the index. This is
|
||||
* the same result as searching on {@link MatchAllDocsQuery},
|
||||
* but faster */
|
||||
public FastTaxonomyFacetCounts(String indexFieldName, IndexReader reader, TaxonomyReader taxoReader, FacetsConfig config) throws IOException {
|
||||
super(indexFieldName, taxoReader, config);
|
||||
countAll(reader);
|
||||
}
|
||||
|
||||
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
|
||||
for(MatchingDocs hits : matchingDocs) {
|
||||
BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName);
|
||||
|
@ -82,4 +96,39 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
|||
|
||||
rollup();
|
||||
}
|
||||
|
||||
private final void countAll(IndexReader reader) throws IOException {
|
||||
for(LeafReaderContext context : reader.leaves()) {
|
||||
BinaryDocValues dv = context.reader().getBinaryDocValues(indexFieldName);
|
||||
if (dv == null) { // this reader does not have DocValues for the requested category list
|
||||
continue;
|
||||
}
|
||||
|
||||
Bits liveDocs = context.reader().getLiveDocs();
|
||||
|
||||
for (int doc = dv.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = dv.nextDoc()) {
|
||||
if (liveDocs != null && liveDocs.get(doc) == false) {
|
||||
continue;
|
||||
}
|
||||
final BytesRef bytesRef = dv.binaryValue();
|
||||
byte[] bytes = bytesRef.bytes;
|
||||
int end = bytesRef.offset + bytesRef.length;
|
||||
int ord = 0;
|
||||
int offset = bytesRef.offset;
|
||||
int prev = 0;
|
||||
while (offset < end) {
|
||||
byte b = bytes[offset++];
|
||||
if (b >= 0) {
|
||||
prev = ord = ((ord << 7) | b) + prev;
|
||||
++values[ord];
|
||||
ord = 0;
|
||||
} else {
|
||||
ord = (ord << 7) | (b & 0x7F);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rollup();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -76,7 +76,7 @@ public abstract class TaxonomyFacets extends Facets {
|
|||
protected FacetsConfig.DimConfig verifyDim(String dim) {
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
if (!dimConfig.indexFieldName.equals(indexFieldName)) {
|
||||
throw new IllegalArgumentException("dimension \"" + dim + "\" was not indexed into field \"" + indexFieldName);
|
||||
throw new IllegalArgumentException("dimension \"" + dim + "\" was not indexed into field \"" + indexFieldName + "\"");
|
||||
}
|
||||
return dimConfig;
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.facet.sortedset;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
@ -73,11 +74,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
|
||||
SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);
|
||||
SortedSetDocValuesFacetCounts facets = getAllFacets(searcher, state);
|
||||
|
||||
assertEquals("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.getTopChildren(10, "a").toString());
|
||||
assertEquals("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.getTopChildren(10, "b").toString());
|
||||
|
@ -171,9 +168,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);
|
||||
SortedSetDocValuesFacetCounts facets = getAllFacets(searcher, state);
|
||||
|
||||
// Ask for top 10 labels for any dims that have counts:
|
||||
List<FacetResult> results = facets.getAllDims(10);
|
||||
|
@ -215,9 +210,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);
|
||||
SortedSetDocValuesFacetCounts facets = getAllFacets(searcher, state);
|
||||
|
||||
// Ask for top 10 labels for any dims that have counts:
|
||||
assertEquals("dim=a path=[] value=2 childCount=2\n foo1 (1)\n foo2 (1)\n", facets.getTopChildren(10, "a").toString());
|
||||
|
@ -312,4 +305,14 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
w.close();
|
||||
IOUtils.close(searcher.getIndexReader(), indexDir, taxoDir);
|
||||
}
|
||||
|
||||
private static SortedSetDocValuesFacetCounts getAllFacets(IndexSearcher searcher, SortedSetDocValuesReaderState state) throws IOException {
|
||||
if (random().nextBoolean()) {
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
return new SortedSetDocValuesFacetCounts(state, c);
|
||||
} else {
|
||||
return new SortedSetDocValuesFacetCounts(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
@ -102,16 +103,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
// NRT open
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||
|
||||
// Aggregate the facet counts:
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
|
||||
// MatchAllDocsQuery is for "browsing" (counts facets
|
||||
// for all non-deleted docs in the index); normally
|
||||
// you'd use a "normal" query, and use MultiCollector to
|
||||
// wrap collecting the "normal" hits and also facets:
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
|
||||
Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
|
||||
Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config);
|
||||
|
||||
// Retrieve & verify results:
|
||||
assertEquals("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", facets.getTopChildren(10, "Publish Date").toString());
|
||||
|
@ -120,7 +112,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
// Now user drills down on Publish Date/2010:
|
||||
DrillDownQuery q2 = new DrillDownQuery(config);
|
||||
q2.add("Publish Date", "2010");
|
||||
c = new FacetsCollector();
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
searcher.search(q2, c);
|
||||
facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
|
||||
assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", facets.getTopChildren(10, "Author").toString());
|
||||
|
@ -185,10 +177,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
// NRT open
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
|
||||
Facets facets = getTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c);
|
||||
Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config);
|
||||
|
||||
// Ask for top 10 labels for any dims that have counts:
|
||||
List<FacetResult> results = facets.getAllDims(10);
|
||||
|
@ -301,15 +290,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
// NRT open
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||
|
||||
// Aggregate the facet counts:
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
|
||||
// MatchAllDocsQuery is for "browsing" (counts facets
|
||||
// for all non-deleted docs in the index); normally
|
||||
// you'd use a "normal" query, and use MultiCollector to
|
||||
// wrap collecting the "normal" hits and also facets:
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
Facets facets = getTaxonomyFacetCounts(taxoReader, config, c);
|
||||
Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config);
|
||||
|
||||
expectThrows(IllegalArgumentException.class, () -> {
|
||||
facets.getSpecificValue("a");
|
||||
|
@ -344,10 +325,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
// NRT open
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config);
|
||||
|
||||
Facets facets = getTaxonomyFacetCounts(taxoReader, config, c);
|
||||
assertEquals(1, facets.getSpecificValue("dim", "test\u001Fone"));
|
||||
assertEquals(1, facets.getSpecificValue("dim", "test\u001Etwo"));
|
||||
|
||||
|
@ -387,11 +366,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
|
||||
// NRT open
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||
Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config);
|
||||
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
|
||||
Facets facets = getTaxonomyFacetCounts(taxoReader, config, c);
|
||||
assertEquals(1, facets.getTopChildren(10, "dim").value);
|
||||
assertEquals(1, facets.getTopChildren(10, "dim2").value);
|
||||
assertEquals(1, facets.getTopChildren(10, "dim3").value);
|
||||
|
@ -432,15 +408,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
// NRT open
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||
|
||||
// Aggregate the facet counts:
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
|
||||
// MatchAllDocsQuery is for "browsing" (counts facets
|
||||
// for all non-deleted docs in the index); normally
|
||||
// you'd use a "normal" query, and use MultiCollector to
|
||||
// wrap collecting the "normal" hits and also facets:
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
Facets facets = getTaxonomyFacetCounts(taxoReader, config, c);
|
||||
Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config);
|
||||
|
||||
FacetResult result = facets.getTopChildren(Integer.MAX_VALUE, "dim");
|
||||
assertEquals(numLabels, result.labelValues.length);
|
||||
|
@ -544,9 +512,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
DirectoryReader r = DirectoryReader.open(iw);
|
||||
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||
|
||||
FacetsCollector sfc = new FacetsCollector();
|
||||
newSearcher(r).search(new MatchAllDocsQuery(), sfc);
|
||||
Facets facets = getTaxonomyFacetCounts(taxoReader, config, sfc);
|
||||
Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader, config);
|
||||
|
||||
for (FacetResult result : facets.getAllDims(10)) {
|
||||
assertEquals(r.numDocs(), result.value.intValue());
|
||||
}
|
||||
|
@ -572,10 +539,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
DirectoryReader r = DirectoryReader.open(iw);
|
||||
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||
|
||||
final FacetsCollector sfc = new FacetsCollector();
|
||||
newSearcher(r).search(new MatchAllDocsQuery(), sfc);
|
||||
Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader, config);
|
||||
|
||||
Facets facets = getTaxonomyFacetCounts(taxoReader, config, sfc);
|
||||
List<FacetResult> res1 = facets.getAllDims(10);
|
||||
List<FacetResult> res2 = facets.getAllDims(10);
|
||||
assertEquals("calling getFacetResults twice should return the .equals()=true result", res1, res2);
|
||||
|
@ -601,9 +566,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
DirectoryReader r = DirectoryReader.open(iw);
|
||||
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||
|
||||
FacetsCollector sfc = new FacetsCollector();
|
||||
newSearcher(r).search(new MatchAllDocsQuery(), sfc);
|
||||
Facets facets = getTaxonomyFacetCounts(taxoReader, config, sfc);
|
||||
Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader, config);
|
||||
|
||||
assertEquals(10, facets.getTopChildren(2, "a").childCount);
|
||||
|
||||
|
@ -754,4 +717,21 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
w.close();
|
||||
IOUtils.close(tw, searcher.getIndexReader(), tr, indexDir, taxoDir);
|
||||
}
|
||||
|
||||
private static Facets getAllFacets(String indexFieldName, IndexSearcher searcher, TaxonomyReader taxoReader, FacetsConfig config) throws IOException {
|
||||
if (random().nextBoolean()) {
|
||||
// Aggregate the facet counts:
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
|
||||
// MatchAllDocsQuery is for "browsing" (counts facets
|
||||
// for all non-deleted docs in the index); normally
|
||||
// you'd use a "normal" query, and use MultiCollector to
|
||||
// wrap collecting the "normal" hits and also facets:
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
|
||||
return new FastTaxonomyFacetCounts(taxoReader, config, c);
|
||||
} else {
|
||||
return new FastTaxonomyFacetCounts(indexFieldName, searcher.getIndexReader(), taxoReader, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue