mirror of https://github.com/apache/lucene.git
LUCENE-5735: number/date range faceting on NumberRangePrefixTreeStrategy
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1647727 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7204caba96
commit
fea7e276cf
|
@ -12,6 +12,10 @@ System Requirements
|
|||
|
||||
New Features
|
||||
|
||||
* LUCENE-5735: NumberRangePrefixTreeStrategy now includes interval/range faceting
|
||||
for counting ranges that align with the underlying terms as defined by the
|
||||
NumberRangePrefixTree (e.g. familiar date units like days). (David Smiley)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3312: The API of oal.document was restructured to
|
||||
|
|
|
@ -17,13 +17,24 @@ package org.apache.lucene.spatial;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.spatial4j.core.shape.Point;
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.spatial.prefix.NumberRangePrefixTreeFacets;
|
||||
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
|
||||
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import static org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
|
||||
|
||||
/** A PrefixTree based on Number/Date ranges. This isn't very "spatial" on the surface (to the user) but
|
||||
* it's implemented using spatial so that's why it's here extending a SpatialStrategy. When using this class, you will
|
||||
|
@ -61,4 +72,92 @@ public class NumberRangePrefixTreeStrategy extends RecursivePrefixTreeStrategy {
|
|||
public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/** Calculates facets between {@code start} and {@code end} to a detail level one greater than that provided by the
|
||||
* arguments. For example providing March to October of 2014 would return facets to the day level of those months.
|
||||
* This is just a convenience method.
|
||||
* @see #calcFacets(IndexReaderContext, Bits, Shape, int)
|
||||
*/
|
||||
public Facets calcFacets(IndexReaderContext context, final Bits acceptDocs, UnitNRShape start, UnitNRShape end)
|
||||
throws IOException {
|
||||
Shape filter = getGrid().toRangeShape(start, end);
|
||||
int detailLevel = Math.max(start.getLevel(), end.getLevel()) + 1;
|
||||
return calcFacets(context, acceptDocs, filter, detailLevel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates facets (aggregated counts) given a range shape (start-end span) and a level, which specifies the detail.
|
||||
* To get the level of an existing shape, say a Calendar, call
|
||||
* {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree#toUnitShape(Object)} then call
|
||||
* {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape#getLevel()}.
|
||||
* Facet computation is implemented by navigating the underlying indexed terms efficiently.
|
||||
*/
|
||||
public Facets calcFacets(IndexReaderContext context, final Bits acceptDocs, Shape facetRange, int level)
|
||||
throws IOException {
|
||||
return NumberRangePrefixTreeFacets.compute(this, context, acceptDocs, facetRange, level);
|
||||
}
|
||||
|
||||
/** Facet response information */
|
||||
public static class Facets {
|
||||
//TODO consider a variable-level structure -- more general purpose.
|
||||
|
||||
public Facets(int detailLevel) {
|
||||
this.detailLevel = detailLevel;
|
||||
}
|
||||
|
||||
/** The bottom-most detail-level counted, as requested. */
|
||||
public final int detailLevel;
|
||||
|
||||
/**
|
||||
* The count of documents with ranges that completely spanned the parents of the detail level. In more technical
|
||||
* terms, this is the count of leaf cells 2 up and higher from the bottom. Usually you only care about counts at
|
||||
* detailLevel, and so you will add this number to all other counts below, including to omitted/implied children
|
||||
* counts of 0. If there are no indexed ranges (just instances, i.e. fully specified dates) then this value will
|
||||
* always be 0.
|
||||
*/
|
||||
public int topLeaves;
|
||||
|
||||
/** Holds all the {@link FacetParentVal} instances in order of the key. This is sparse; there won't be an
|
||||
* instance if it's count and children are all 0. The keys are {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape} shapes, which can be
|
||||
* converted back to the original Object (i.e. a Calendar) via
|
||||
* {@link NumberRangePrefixTree#toObject(org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape)}. */
|
||||
public final SortedMap<UnitNRShape,FacetParentVal> parents = new TreeMap<>();
|
||||
|
||||
/** Holds a block of detailLevel counts aggregated to their parent level. */
|
||||
public static class FacetParentVal {
|
||||
|
||||
/** The count of ranges that span all of the childCounts. In more technical terms, this is the number of leaf
|
||||
* cells found at this parent. Treat this like {@link Facets#topLeaves}. */
|
||||
public int parentLeaves;
|
||||
|
||||
/** The length of {@link #childCounts}. If childCounts is not null then this is childCounts.length, otherwise it
|
||||
* says how long it would have been if it weren't null. */
|
||||
public int childCountsLen;
|
||||
|
||||
/** The detail level counts. It will be null if there are none, and thus they are assumed 0. Most apps, when
|
||||
* presenting the information, will add {@link #topLeaves} and {@link #parentLeaves} to each count. */
|
||||
public int[] childCounts;
|
||||
//assert childCountsLen == childCounts.length
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder buf = new StringBuilder(2048);
|
||||
buf.append("Facets: level=" + detailLevel + " topLeaves=" + topLeaves + " parentCount=" + parents.size());
|
||||
for (Map.Entry<UnitNRShape, FacetParentVal> entry : parents.entrySet()) {
|
||||
buf.append('\n');
|
||||
if (buf.length() > 1000) {
|
||||
buf.append("...");
|
||||
break;
|
||||
}
|
||||
final FacetParentVal pVal = entry.getValue();
|
||||
buf.append(' ').append(entry.getKey()+" leafCount=" + pVal.parentLeaves);
|
||||
if (pVal.childCounts != null) {
|
||||
buf.append(' ').append(Arrays.toString(pVal.childCounts));
|
||||
}
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,215 @@
|
|||
package org.apache.lucene.spatial.prefix;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.spatial.NumberRangePrefixTreeStrategy;
|
||||
import org.apache.lucene.spatial.NumberRangePrefixTreeStrategy.Facets;
|
||||
import org.apache.lucene.spatial.prefix.tree.Cell;
|
||||
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
|
||||
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
* Computes range facets for {@link NumberRangePrefixTreeStrategy}.
|
||||
*
|
||||
* @see NumberRangePrefixTreeStrategy#calcFacets(IndexReaderContext, Bits, Shape, int)
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class NumberRangePrefixTreeFacets {
|
||||
|
||||
public static Facets compute(NumberRangePrefixTreeStrategy strategy,
|
||||
IndexReaderContext context, final Bits acceptDocs, Shape queryShape, int facetLevel)
|
||||
throws IOException {
|
||||
|
||||
Facets facets = new Facets(facetLevel);
|
||||
|
||||
// TODO should we pre-create all parent buckets? It's not necessary, but the client/user may find it convenient to
|
||||
// have so it needn't do a bunch of calendar work itself to ascertain which buckets are missing. It would
|
||||
// also then easily allow us to have a too-many-facets exception (e.g. you ask for a millisecond bucket for
|
||||
// the entire year). We could do that now but we would only be able to throw if the actual counts get to the
|
||||
// threshold vs. being able to know the possible values consistently a-priori which is much preferred. Now on the
|
||||
// other hand, we can facet over extremely sparse data sets without needless parent buckets.
|
||||
|
||||
//We collect per-leaf
|
||||
final List<LeafReaderContext> leaves = context.leaves();
|
||||
for (final LeafReaderContext leafCtx : leaves) {
|
||||
//determine leaf acceptDocs
|
||||
Bits leafAcceptDocs;
|
||||
if (acceptDocs == null) {
|
||||
leafAcceptDocs = leafCtx.reader().getLiveDocs();
|
||||
} else if (leaves.size() == 1) {
|
||||
leafAcceptDocs = acceptDocs;
|
||||
} else {
|
||||
leafAcceptDocs = new Bits() {//note: it'd be nice if Lucene's BitsSlice was public.
|
||||
|
||||
final int docBase = leafCtx.docBase;
|
||||
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
return acceptDocs.get(docBase + index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return leafCtx.reader().maxDoc();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
facets = compute(strategy, leafCtx, leafAcceptDocs, queryShape, facets);
|
||||
}
|
||||
return facets;
|
||||
|
||||
}
|
||||
|
||||
public static Facets compute(final NumberRangePrefixTreeStrategy strategy,
|
||||
LeafReaderContext context, Bits acceptDocs, Shape queryShape, final Facets facets)
|
||||
throws IOException {
|
||||
final NumberRangePrefixTree tree = strategy.getGrid();
|
||||
final int scanLevel = tree.getMaxLevels();
|
||||
|
||||
//TODO extract AbstractVisitingPrefixTreeFilter / VisitorTemplate to be generic, not necessarily a Filter/DocIdSet.
|
||||
new AbstractVisitingPrefixTreeFilter(queryShape, strategy.getFieldName(), tree, facets.detailLevel, scanLevel) {
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return new VisitorTemplate(context, acceptDocs, !strategy.pointsOnly) {
|
||||
|
||||
Facets.FacetParentVal parentFacet;
|
||||
|
||||
@Override
|
||||
protected void start() throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSet finish() throws IOException {
|
||||
return null;//unused
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean visit(Cell cell) throws IOException {
|
||||
// At facetLevel...
|
||||
if (cell.getLevel() == facets.detailLevel) {
|
||||
//note: parentFacet shouldn't be null if we get here
|
||||
|
||||
// Count docs
|
||||
int count = countDocsAtThisTermInSet(acceptDocs);
|
||||
if (count > 0) {
|
||||
//lazy init childCounts
|
||||
if (parentFacet.childCounts == null) {
|
||||
parentFacet.childCounts = new int[parentFacet.childCountsLen];
|
||||
}
|
||||
UnitNRShape unitShape = (UnitNRShape) cell.getShape();
|
||||
parentFacet.childCounts[unitShape.getValAtLevel(cell.getLevel())] += count;
|
||||
}
|
||||
return false;//don't descend further; this is enough detail
|
||||
}
|
||||
|
||||
parentFacet = null;//reset
|
||||
|
||||
// At parent
|
||||
if (cell.getLevel() == facets.detailLevel - 1) {
|
||||
if (!hasDocsAtThisTermInSet(acceptDocs)) {
|
||||
return false;
|
||||
}
|
||||
//Look for existing parentFacet (from previous segment)
|
||||
UnitNRShape unitShape = (UnitNRShape) cell.getShape();
|
||||
UnitNRShape key = unitShape.clone();
|
||||
parentFacet = facets.parents.get(key);
|
||||
if (parentFacet == null) {//didn't find one; make a new one
|
||||
parentFacet = new Facets.FacetParentVal();
|
||||
parentFacet.childCountsLen = tree.getNumSubCells(unitShape);
|
||||
facets.parents.put(key, parentFacet);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void visitLeaf(Cell cell) throws IOException {
|
||||
final int levelsToGo = facets.detailLevel - cell.getLevel();
|
||||
if (levelsToGo <= 0) {
|
||||
return;//do nothing; we already collected in visit()
|
||||
//note: once we index ranges without direct prefix's of leaves,
|
||||
// we'll need to collect here at levelsToGo==0 too.
|
||||
}
|
||||
int count = countDocsAtThisTermInSet(acceptDocs);
|
||||
if (count == 0) {
|
||||
return;
|
||||
}
|
||||
if (levelsToGo == 1) {
|
||||
// Because all leaves also have an indexed non-leaf, we can be sure we have parentCell set via visit().
|
||||
parentFacet.parentLeaves += count;
|
||||
} else {
|
||||
facets.topLeaves += count;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void visitScanned(Cell cell) throws IOException {
|
||||
//TODO does this belong in superclass? It ignores boolean result from visit(), but that's ok.
|
||||
if (queryShape.relate(cell.getShape()).intersects()) {
|
||||
if (cell.isLeaf()) {
|
||||
visitLeaf(cell);
|
||||
} else {
|
||||
visit(cell);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//TODO These utility methods could move to superclass
|
||||
|
||||
private int countDocsAtThisTermInSet(Bits actualBaseDocs) throws IOException {
|
||||
if (actualBaseDocs == null) {
|
||||
return termsEnum.docFreq();
|
||||
}
|
||||
int count = 0;
|
||||
docsEnum = termsEnum.docs(actualBaseDocs, docsEnum, DocsEnum.FLAG_NONE);
|
||||
while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private boolean hasDocsAtThisTermInSet(Bits actualBaseDocs) throws IOException {
|
||||
if (actualBaseDocs == null) {
|
||||
return true;
|
||||
}
|
||||
docsEnum = termsEnum.docs(actualBaseDocs, docsEnum, DocsEnum.FLAG_NONE);
|
||||
return (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
}
|
||||
|
||||
}.getDocIdSet();
|
||||
}
|
||||
}.getDocIdSet(context, acceptDocs);
|
||||
|
||||
return facets;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,260 @@
|
|||
package org.apache.lucene.spatial.prefix;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.Repeat;
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.spatial.NumberRangePrefixTreeStrategy;
|
||||
import org.apache.lucene.spatial.NumberRangePrefixTreeStrategy.Facets;
|
||||
import org.apache.lucene.spatial.StrategyTestCase;
|
||||
import org.apache.lucene.spatial.prefix.tree.Cell;
|
||||
import org.apache.lucene.spatial.prefix.tree.CellIterator;
|
||||
import org.apache.lucene.spatial.prefix.tree.DateRangePrefixTree;
|
||||
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
|
||||
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
|
||||
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
|
||||
|
||||
public class NumberRangeFacetsTest extends StrategyTestCase {
|
||||
|
||||
DateRangePrefixTree tree;
|
||||
|
||||
int randomCalWindowField;
|
||||
long randomCalWindowMs;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
tree = DateRangePrefixTree.INSTANCE;
|
||||
strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange");
|
||||
Calendar tmpCal = tree.newCal();
|
||||
randomCalWindowField = randomIntBetween(1, Calendar.ZONE_OFFSET - 1);//we're not allowed to add zone offset
|
||||
tmpCal.add(randomCalWindowField, 2_000);
|
||||
randomCalWindowMs = Math.max(2000L, tmpCal.getTimeInMillis());
|
||||
}
|
||||
|
||||
@Repeat(iterations = 100)
|
||||
@Test
|
||||
public void test() throws IOException {
|
||||
//generate test data
|
||||
List<Shape> indexedShapes = new ArrayList<>();
|
||||
final int numIndexedShapes = random().nextInt(15);
|
||||
for (int i = 0; i < numIndexedShapes; i++) {
|
||||
indexedShapes.add(randomShape());
|
||||
}
|
||||
|
||||
//Main index loop:
|
||||
for (int i = 0; i < indexedShapes.size(); i++) {
|
||||
Shape shape = indexedShapes.get(i);
|
||||
adoc(""+i, shape);
|
||||
|
||||
if (random().nextInt(10) == 0)
|
||||
commit();//intermediate commit, produces extra segments
|
||||
}
|
||||
|
||||
//delete some documents randomly
|
||||
for (int id = 0; id < indexedShapes.size(); id++) {
|
||||
if (random().nextInt(10) == 0) {
|
||||
deleteDoc(""+id);
|
||||
indexedShapes.set(id, null);
|
||||
}
|
||||
}
|
||||
|
||||
commit();
|
||||
|
||||
//Main query loop:
|
||||
for (int queryIdx = 0; queryIdx < 10; queryIdx++) {
|
||||
preQueryHavoc();
|
||||
|
||||
// We need to have a facet range window to do the facets between (a start time & end time). We randomly
|
||||
// pick a date, decide the level we want to facet on, and then pick a right end time that is up to 2 thousand
|
||||
// values later.
|
||||
int calFieldFacet = randomCalWindowField - 1;
|
||||
if (calFieldFacet > 1 && rarely()) {
|
||||
calFieldFacet--;
|
||||
}
|
||||
final Calendar leftCal = randomCalendar();
|
||||
leftCal.add(calFieldFacet, -1 * randomInt(1000));
|
||||
Calendar rightCal = (Calendar) leftCal.clone();
|
||||
rightCal.add(calFieldFacet, randomInt(2000));
|
||||
// Pick facet detail level based on cal field.
|
||||
int detailLevel = tree.getTreeLevelForCalendarField(calFieldFacet);
|
||||
if (detailLevel < 0) {//no exact match
|
||||
detailLevel = -1 * detailLevel;
|
||||
}
|
||||
|
||||
//Randomly pick a filter as Bits/acceptDocs
|
||||
FixedBitSet acceptDocs = null;//the answer
|
||||
List<Integer> acceptFieldIds = new ArrayList<>();
|
||||
if (usually()) {
|
||||
//get all possible IDs into a list, random shuffle it, then randomly choose how many of the first we use to
|
||||
// replace the list.
|
||||
for (int i = 0; i < indexedShapes.size(); i++) {
|
||||
if (indexedShapes.get(i) == null) {
|
||||
continue;
|
||||
}
|
||||
acceptFieldIds.add(i);
|
||||
}
|
||||
Collections.shuffle(acceptFieldIds, random());
|
||||
acceptFieldIds = acceptFieldIds.subList(0, randomInt(acceptFieldIds.size()));
|
||||
acceptDocs = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
|
||||
//query for their Lucene docIds to put into acceptDocs
|
||||
if (!acceptFieldIds.isEmpty()) {
|
||||
BooleanQuery acceptQuery = new BooleanQuery();
|
||||
for (Integer acceptDocId : acceptFieldIds) {
|
||||
acceptQuery.add(new TermQuery(new Term("id", acceptDocId.toString())), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
final TopDocs topDocs = indexSearcher.search(acceptQuery, numIndexedShapes);
|
||||
|
||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||
acceptDocs.set(scoreDoc.doc);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//Lets do it!
|
||||
NumberRangePrefixTree.NRShape facetRange = tree.toRangeShape(tree.toShape(leftCal), tree.toShape(rightCal));
|
||||
Facets facets = ((NumberRangePrefixTreeStrategy) strategy)
|
||||
.calcFacets(indexSearcher.getTopReaderContext(), acceptDocs, facetRange, detailLevel);
|
||||
|
||||
//System.out.println("Q: " + queryIdx + " " + facets);
|
||||
|
||||
//Verify results. We do it by looping over indexed shapes and reducing the facet counts.
|
||||
Shape facetShapeRounded = facetRange.roundToLevel(detailLevel);
|
||||
for (int indexedShapeId = 0; indexedShapeId < indexedShapes.size(); indexedShapeId++) {
|
||||
if (acceptDocs != null && !acceptFieldIds.contains(indexedShapeId)) {
|
||||
continue;// this doc was filtered out via acceptDocs
|
||||
}
|
||||
Shape indexedShape = indexedShapes.get(indexedShapeId);
|
||||
if (indexedShape == null) {//was deleted
|
||||
continue;
|
||||
}
|
||||
Shape indexedShapeRounded = ((NumberRangePrefixTree.NRShape) indexedShape).roundToLevel(detailLevel);
|
||||
if (!indexedShapeRounded.relate(facetShapeRounded).intersects()) { // no intersection at all
|
||||
continue;
|
||||
}
|
||||
// walk the cells
|
||||
final CellIterator cellIterator = tree.getTreeCellIterator(indexedShape, detailLevel);
|
||||
while (cellIterator.hasNext()) {
|
||||
Cell cell = cellIterator.next();
|
||||
if (!cell.getShape().relate(facetShapeRounded).intersects()) {
|
||||
cellIterator.remove();//no intersection; prune
|
||||
continue;
|
||||
}
|
||||
assert cell.getLevel() <= detailLevel;
|
||||
|
||||
if (cell.getLevel() == detailLevel) {
|
||||
//count it
|
||||
UnitNRShape shape = (UnitNRShape) cell.getShape();
|
||||
final UnitNRShape parentShape = shape.getShapeAtLevel(detailLevel - 1);//get parent
|
||||
final Facets.FacetParentVal facetParentVal = facets.parents.get(parentShape);
|
||||
assertNotNull(facetParentVal);
|
||||
int index = shape.getValAtLevel(shape.getLevel());
|
||||
assertNotNull(facetParentVal.childCounts);
|
||||
assert facetParentVal.childCounts[index] > 0;
|
||||
facetParentVal.childCounts[index]--;
|
||||
|
||||
} else if (cell.isLeaf()) {
|
||||
//count it, and remove/prune.
|
||||
if (cell.getLevel() < detailLevel - 1) {
|
||||
assert facets.topLeaves > 0;
|
||||
facets.topLeaves--;
|
||||
} else {
|
||||
UnitNRShape shape = (UnitNRShape) cell.getShape();
|
||||
final UnitNRShape parentShape = shape.getShapeAtLevel(detailLevel - 1);//get parent
|
||||
final Facets.FacetParentVal facetParentVal = facets.parents.get(parentShape);
|
||||
assertNotNull(facetParentVal);
|
||||
assert facetParentVal.parentLeaves > 0;
|
||||
facetParentVal.parentLeaves--;
|
||||
}
|
||||
|
||||
cellIterator.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
// At this point; all counts should be down to zero.
|
||||
assertTrue(facets.topLeaves == 0);
|
||||
for (Facets.FacetParentVal facetParentVal : facets.parents.values()) {
|
||||
assertTrue(facetParentVal.parentLeaves == 0);
|
||||
if (facetParentVal.childCounts != null) {
|
||||
for (int childCount : facetParentVal.childCounts) {
|
||||
assertTrue(childCount == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private void preQueryHavoc() {
|
||||
if (strategy instanceof RecursivePrefixTreeStrategy) {
|
||||
RecursivePrefixTreeStrategy rpts = (RecursivePrefixTreeStrategy) strategy;
|
||||
int scanLevel = randomInt(rpts.getGrid().getMaxLevels());
|
||||
rpts.setPrefixGridScanLevel(scanLevel);
|
||||
}
|
||||
}
|
||||
|
||||
protected Shape randomShape() {
|
||||
Calendar cal1 = randomCalendar();
|
||||
UnitNRShape s1 = tree.toShape(cal1);
|
||||
if (rarely()) {
|
||||
return s1;
|
||||
}
|
||||
try {
|
||||
Calendar cal2 = randomCalendar();
|
||||
UnitNRShape s2 = tree.toShape(cal2);
|
||||
if (cal1.compareTo(cal2) < 0) {
|
||||
return tree.toRangeShape(s1, s2);
|
||||
} else {
|
||||
return tree.toRangeShape(s2, s1);
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
assert e.getMessage().startsWith("Differing precision");
|
||||
return s1;
|
||||
}
|
||||
}
|
||||
|
||||
private Calendar randomCalendar() {
|
||||
Calendar cal = tree.newCal();
|
||||
cal.setTimeInMillis(random().nextLong() % randomCalWindowMs);
|
||||
try {
|
||||
tree.clearFieldsAfter(cal, random().nextInt(Calendar.FIELD_COUNT+1)-1);
|
||||
} catch (AssertionError e) {
|
||||
if (!e.getMessage().equals("Calendar underflow"))
|
||||
throw e;
|
||||
}
|
||||
return cal;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue