mirror of
https://github.com/apache/lucene.git
synced 2025-02-08 02:58:58 +00:00
Merge remote-tracking branch 'origin/branch_6x' into branch_6x
This commit is contained in:
commit
eab3a48ba2
@ -25,10 +25,6 @@ New Features
|
||||
input tokens. Useful for normalizing short text in clustering/linking
|
||||
tasks. (Mark Harwood, Adrien Grand)
|
||||
|
||||
* LUCENE-5735: NumberRangePrefixTreeStrategy now includes interval/range faceting
|
||||
for counting ranges that align with the underlying terms as defined by the
|
||||
NumberRangePrefixTree (e.g. familiar date units like days). (David Smiley)
|
||||
|
||||
* LUCENE-6711: Use CollectionStatistics.docCount() for IDF and average field
|
||||
length computations, to avoid skew from documents that don't have the field.
|
||||
(Ahmet Arslan via Robert Muir)
|
||||
@ -159,6 +155,13 @@ Tests
|
||||
expression to encapsulate a statement that is expected to throw an exception.
|
||||
(Ryan Ernst)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-7065: Fix the explain for the global ordinals join query. Before the
|
||||
explain would also indicate that non matching documents would match.
|
||||
On top of that with score mode average, the explain would fail with a NPE.
|
||||
(Martijn van Groningen)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-7035: Upgrade icu4j to 56.1/unicode 8. (Robert Muir)
|
||||
|
@ -666,7 +666,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||
// true if this is a 4.9+ index
|
||||
final boolean is49Index = MultiFields.getMergedFieldInfos(reader).fieldInfo("dvSortedNumeric") != null;
|
||||
|
||||
assert is40Index; // NOTE: currently we can only do this on trunk!
|
||||
assert is40Index;
|
||||
|
||||
final Bits liveDocs = MultiFields.getLiveDocs(reader);
|
||||
|
||||
|
@ -131,6 +131,13 @@ public abstract class PointWriter implements Closeable {
|
||||
/** Default merge implementation to merge incoming points readers by visiting all their points and
|
||||
* adding to this writer */
|
||||
public void merge(MergeState mergeState) throws IOException {
|
||||
// check each incoming reader
|
||||
for (PointReader reader : mergeState.pointReaders) {
|
||||
if (reader != null) {
|
||||
reader.checkIntegrity();
|
||||
}
|
||||
}
|
||||
// merge field at a time
|
||||
for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
|
||||
if (fieldInfo.getPointDimensionCount() != 0) {
|
||||
mergeOneField(mergeState, fieldInfo);
|
||||
|
@ -122,6 +122,11 @@ public class Lucene60PointWriter extends PointWriter implements Closeable {
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (PointReader reader : mergeState.pointReaders) {
|
||||
if (reader != null) {
|
||||
reader.checkIntegrity();
|
||||
}
|
||||
}
|
||||
|
||||
for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
|
||||
if (fieldInfo.getPointDimensionCount() != 0) {
|
||||
|
@ -392,5 +392,10 @@ public abstract class CodecReader extends LeafReader implements Accountable {
|
||||
if (getTermVectorsReader() != null) {
|
||||
getTermVectorsReader().checkIntegrity();
|
||||
}
|
||||
|
||||
// points
|
||||
if (getPointReader() != null) {
|
||||
getPointReader().checkIntegrity();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -112,16 +112,29 @@ final class GlobalOrdinalsQuery extends Query {
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
|
||||
if (values != null) {
|
||||
int segmentOrd = values.getOrd(doc);
|
||||
if (segmentOrd != -1) {
|
||||
BytesRef joinValue = values.lookupOrd(segmentOrd);
|
||||
return Explanation.match(score(), "Score based on join value " + joinValue.utf8ToString());
|
||||
}
|
||||
}
|
||||
if (values == null) {
|
||||
return Explanation.noMatch("Not a match");
|
||||
}
|
||||
|
||||
int segmentOrd = values.getOrd(doc);
|
||||
if (segmentOrd == -1) {
|
||||
return Explanation.noMatch("Not a match");
|
||||
}
|
||||
BytesRef joinValue = values.lookupOrd(segmentOrd);
|
||||
|
||||
int ord;
|
||||
if (globalOrds != null) {
|
||||
ord = (int) globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
|
||||
} else {
|
||||
ord = segmentOrd;
|
||||
}
|
||||
if (foundOrds.get(ord) == false) {
|
||||
return Explanation.noMatch("Not a match, join value " + Term.toString(joinValue));
|
||||
}
|
||||
|
||||
return Explanation.match(score(), "A match, join value " + Term.toString(joinValue));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
|
||||
|
@ -120,21 +120,28 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
|
||||
if (values != null) {
|
||||
if (values == null) {
|
||||
return Explanation.noMatch("Not a match");
|
||||
}
|
||||
|
||||
int segmentOrd = values.getOrd(doc);
|
||||
if (segmentOrd != -1) {
|
||||
final float score;
|
||||
if (globalOrds != null) {
|
||||
long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
|
||||
score = collector.score((int) globalOrd);
|
||||
} else {
|
||||
score = collector.score(segmentOrd);
|
||||
if (segmentOrd == -1) {
|
||||
return Explanation.noMatch("Not a match");
|
||||
}
|
||||
BytesRef joinValue = values.lookupOrd(segmentOrd);
|
||||
return Explanation.match(score, "Score based on join value " + joinValue.utf8ToString());
|
||||
|
||||
int ord;
|
||||
if (globalOrds != null) {
|
||||
ord = (int) globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
|
||||
} else {
|
||||
ord = segmentOrd;
|
||||
}
|
||||
if (collector.match(ord) == false) {
|
||||
return Explanation.noMatch("Not a match, join value " + Term.toString(joinValue));
|
||||
}
|
||||
return Explanation.noMatch("Not a match");
|
||||
|
||||
float score = collector.score(ord);
|
||||
return Explanation.match(score, "A match, join value " + Term.toString(joinValue));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -48,6 +48,7 @@ import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
@ -297,6 +298,108 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testOrdinalsJoinExplainNoMatches() throws Exception {
|
||||
final String idField = "id";
|
||||
final String productIdField = "productId";
|
||||
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
|
||||
final String typeField = "type";
|
||||
// A single sorted doc values field that holds the join values for all document types.
|
||||
// Typically during indexing a schema will automatically create this field with the values
|
||||
final String joinField = idField + productIdField;
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(
|
||||
dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE)
|
||||
);
|
||||
|
||||
// 0
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField(idField, "1", Field.Store.NO));
|
||||
doc.add(new TextField(typeField, "product", Field.Store.NO));
|
||||
doc.add(new TextField("description", "random text", Field.Store.NO));
|
||||
doc.add(new TextField("name", "name1", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
|
||||
w.addDocument(doc);
|
||||
|
||||
// 1
|
||||
doc = new Document();
|
||||
doc.add(new TextField(idField, "2", Field.Store.NO));
|
||||
doc.add(new TextField(typeField, "product", Field.Store.NO));
|
||||
doc.add(new TextField("description", "random text", Field.Store.NO));
|
||||
doc.add(new TextField("name", "name2", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
|
||||
w.addDocument(doc);
|
||||
|
||||
// 2
|
||||
doc = new Document();
|
||||
doc.add(new TextField(productIdField, "1", Field.Store.NO));
|
||||
doc.add(new TextField(typeField, "price", Field.Store.NO));
|
||||
doc.add(new TextField("price", "10.0", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
|
||||
w.addDocument(doc);
|
||||
|
||||
// 3
|
||||
doc = new Document();
|
||||
doc.add(new TextField(productIdField, "2", Field.Store.NO));
|
||||
doc.add(new TextField(typeField, "price", Field.Store.NO));
|
||||
doc.add(new TextField("price", "20.0", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
|
||||
w.addDocument(doc);
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
w.flush();
|
||||
}
|
||||
|
||||
// 4
|
||||
doc = new Document();
|
||||
doc.add(new TextField(productIdField, "3", Field.Store.NO));
|
||||
doc.add(new TextField(typeField, "price", Field.Store.NO));
|
||||
doc.add(new TextField("price", "5.0", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
|
||||
w.addDocument(doc);
|
||||
|
||||
// 5
|
||||
doc = new Document();
|
||||
doc.add(new TextField("field", "value", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
IndexSearcher indexSearcher = new IndexSearcher(r);
|
||||
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
LeafReader leafReader = r.leaves().get(i).reader();
|
||||
values[i] = DocValues.getSorted(leafReader, joinField);
|
||||
}
|
||||
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
|
||||
r.getCoreCacheKey(), values, PackedInts.DEFAULT
|
||||
);
|
||||
|
||||
Query toQuery = new TermQuery(new Term("price", "5.0"));
|
||||
Query fromQuery = new TermQuery(new Term("name", "name2"));
|
||||
|
||||
for (ScoreMode scoreMode : ScoreMode.values()) {
|
||||
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, scoreMode, ordinalMap);
|
||||
TopDocs result = indexSearcher.search(joinQuery, 10);
|
||||
assertEquals(1, result.totalHits);
|
||||
assertEquals(4, result.scoreDocs[0].doc); // doc with price: 5.0
|
||||
Explanation explanation = indexSearcher.explain(joinQuery, 4);
|
||||
assertTrue(explanation.isMatch());
|
||||
assertEquals(explanation.getDescription(), "A match, join value 2");
|
||||
|
||||
explanation = indexSearcher.explain(joinQuery, 3);
|
||||
assertFalse(explanation.isMatch());
|
||||
assertEquals(explanation.getDescription(), "Not a match, join value 1");
|
||||
|
||||
explanation = indexSearcher.explain(joinQuery, 5);
|
||||
assertFalse(explanation.isMatch());
|
||||
assertEquals(explanation.getDescription(), "Not a match");
|
||||
}
|
||||
|
||||
w.close();
|
||||
indexSearcher.getIndexReader().close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRandomOrdinalsJoin() throws Exception {
|
||||
IndexIterationContext context = createContext(512, false, true);
|
||||
int searchIters = 10;
|
||||
|
@ -70,7 +70,7 @@ final class LatLonPointDistanceQuery extends Query {
|
||||
final GeoRect box2;
|
||||
|
||||
// crosses dateline: split
|
||||
if (box.maxLon < box.minLon) {
|
||||
if (box.crossesDateline()) {
|
||||
box1 = new GeoRect(-180.0, box.maxLon, box.minLat, box.maxLat);
|
||||
box2 = new GeoRect(box.minLon, 180.0, box.minLat, box.maxLat);
|
||||
} else {
|
||||
|
@ -16,11 +16,12 @@
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.document.LatLonPoint;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.LatLonPoint;
|
||||
import org.apache.lucene.spatial.util.BaseGeoPointTestCase;
|
||||
import org.apache.lucene.spatial.util.GeoDistanceUtils;
|
||||
import org.apache.lucene.spatial.util.GeoRect;
|
||||
import org.apache.lucene.spatial.util.GeoUtils;
|
||||
|
||||
public class TestLatLonPointQueries extends BaseGeoPointTestCase {
|
||||
// TODO: remove this!
|
||||
@ -146,4 +147,87 @@ public class TestLatLonPointQueries extends BaseGeoPointTestCase {
|
||||
final double d = GeoDistanceUtils.haversin(centerLat, centerLon, pointLat, pointLon);
|
||||
return d >= minRadiusMeters && d <= radiusMeters;
|
||||
}
|
||||
|
||||
/** Returns random double min to max or up to 1% outside of that range */
|
||||
private double randomRangeMaybeSlightlyOutside(double min, double max) {
|
||||
return min + (random().nextDouble() + (0.5 - random().nextDouble()) * .02) * (max - min);
|
||||
}
|
||||
|
||||
// We rely heavily on GeoUtils.circleToBBox so we test it here:
|
||||
public void testRandomCircleToBBox() throws Exception {
|
||||
int iters = atLeast(1000);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
|
||||
boolean useSmallRanges = random().nextBoolean();
|
||||
|
||||
double radiusMeters;
|
||||
|
||||
double centerLat = randomLat(useSmallRanges);
|
||||
double centerLon = randomLon(useSmallRanges);
|
||||
|
||||
if (useSmallRanges) {
|
||||
// Approx 4 degrees lon at the equator:
|
||||
radiusMeters = random().nextDouble() * 444000;
|
||||
} else {
|
||||
radiusMeters = random().nextDouble() * 50000000;
|
||||
}
|
||||
|
||||
// TODO: randomly quantize radius too, to provoke exact math errors?
|
||||
|
||||
GeoRect bbox = GeoUtils.circleToBBox(centerLon, centerLat, radiusMeters);
|
||||
|
||||
int numPointsToTry = 1000;
|
||||
for(int i=0;i<numPointsToTry;i++) {
|
||||
|
||||
double lat;
|
||||
double lon;
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
lat = randomLat(useSmallRanges);
|
||||
lon = randomLon(useSmallRanges);
|
||||
} else {
|
||||
// pick a lat/lon within the bbox or "slightly" outside it to try to improve test efficiency
|
||||
lat = quantizeLat(GeoUtils.normalizeLat(randomRangeMaybeSlightlyOutside(bbox.minLat, bbox.maxLat)));
|
||||
if (bbox.crossesDateline()) {
|
||||
if (random().nextBoolean()) {
|
||||
lon = quantizeLon(GeoUtils.normalizeLon(randomRangeMaybeSlightlyOutside(bbox.maxLon, -180)));
|
||||
} else {
|
||||
lon = quantizeLon(GeoUtils.normalizeLon(randomRangeMaybeSlightlyOutside(0, bbox.minLon)));
|
||||
}
|
||||
} else {
|
||||
lon = quantizeLon(GeoUtils.normalizeLon(randomRangeMaybeSlightlyOutside(bbox.minLon, bbox.maxLon)));
|
||||
}
|
||||
}
|
||||
|
||||
double distanceMeters = GeoDistanceUtils.haversin(centerLat, centerLon, lat, lon);
|
||||
|
||||
// Haversin says it's within the circle:
|
||||
boolean haversinSays = distanceMeters <= radiusMeters;
|
||||
|
||||
// BBox says its within the box:
|
||||
boolean bboxSays;
|
||||
if (bbox.crossesDateline()) {
|
||||
if (lat >= bbox.minLat && lat <= bbox.maxLat) {
|
||||
bboxSays = lon <= bbox.maxLon || lon >= bbox.minLon;
|
||||
} else {
|
||||
bboxSays = false;
|
||||
}
|
||||
} else {
|
||||
bboxSays = lat >= bbox.minLat && lat <= bbox.maxLat && lon >= bbox.minLon && lon <= bbox.maxLon;
|
||||
}
|
||||
|
||||
if (haversinSays) {
|
||||
if (bboxSays == false) {
|
||||
System.out.println("small=" + useSmallRanges + " centerLat=" + centerLat + " cetnerLon=" + centerLon + " radiusMeters=" + radiusMeters);
|
||||
System.out.println(" bbox: lat=" + bbox.minLat + " to " + bbox.maxLat + " lon=" + bbox.minLon + " to " + bbox.maxLon);
|
||||
System.out.println(" point: lat=" + lat + " lon=" + lon);
|
||||
System.out.println(" haversin: " + distanceMeters);
|
||||
fail("point was within the distance according to haversin, but the bbox doesn't contain it");
|
||||
}
|
||||
} else {
|
||||
// it's fine if haversin said it was outside the radius and bbox said it was inside the box
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,22 +16,13 @@
|
||||
*/
|
||||
package org.apache.lucene.spatial.prefix;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.spatial.prefix.tree.Cell;
|
||||
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import static org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
|
||||
/** A PrefixTree based on Number/Date ranges. This isn't very "spatial" on the surface (to the user) but
|
||||
* it's implemented using spatial so that's why it's here extending a SpatialStrategy. When using this class, you will
|
||||
@ -68,132 +59,4 @@ public class NumberRangePrefixTreeStrategy extends RecursivePrefixTreeStrategy {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/** Calculates facets between {@code start} and {@code end} to a detail level one greater than that provided by the
|
||||
* arguments. For example providing March to October of 2014 would return facets to the day level of those months.
|
||||
* This is just a convenience method.
|
||||
* @see #calcFacets(IndexReaderContext, Bits, Shape, int)
|
||||
*/
|
||||
public Facets calcFacets(IndexReaderContext context, Bits topAcceptDocs, UnitNRShape start, UnitNRShape end)
|
||||
throws IOException {
|
||||
Shape facetRange = getGrid().toRangeShape(start, end);
|
||||
int detailLevel = Math.max(start.getLevel(), end.getLevel()) + 1;
|
||||
return calcFacets(context, topAcceptDocs, facetRange, detailLevel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates facets (aggregated counts) given a range shape (start-end span) and a level, which specifies the detail.
|
||||
* To get the level of an existing shape, say a Calendar, call
|
||||
* {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree#toUnitShape(Object)} then call
|
||||
* {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape#getLevel()}.
|
||||
* Facet computation is implemented by navigating the underlying indexed terms efficiently.
|
||||
*/
|
||||
public Facets calcFacets(IndexReaderContext context, Bits topAcceptDocs, Shape facetRange, final int level)
|
||||
throws IOException {
|
||||
final Facets facets = new Facets(level);
|
||||
PrefixTreeFacetCounter.compute(this, context, topAcceptDocs, facetRange, level,
|
||||
new PrefixTreeFacetCounter.FacetVisitor() {
|
||||
Facets.FacetParentVal parentFacet;
|
||||
UnitNRShape parentShape;
|
||||
|
||||
@Override
|
||||
public void visit(Cell cell, int count) {
|
||||
if (cell.getLevel() < level - 1) {//some ancestor of parent facet level, direct or distant
|
||||
parentFacet = null;//reset
|
||||
parentShape = null;//reset
|
||||
facets.topLeaves += count;
|
||||
} else if (cell.getLevel() == level - 1) {//parent
|
||||
//set up FacetParentVal
|
||||
setupParent((UnitNRShape) cell.getShape());
|
||||
parentFacet.parentLeaves += count;
|
||||
} else {//at facet level
|
||||
UnitNRShape unitShape = (UnitNRShape) cell.getShape();
|
||||
UnitNRShape unitShapeParent = unitShape.getShapeAtLevel(unitShape.getLevel() - 1);
|
||||
if (parentFacet == null || !parentShape.equals(unitShapeParent)) {
|
||||
setupParent(unitShapeParent);
|
||||
}
|
||||
//lazy init childCounts
|
||||
if (parentFacet.childCounts == null) {
|
||||
parentFacet.childCounts = new int[parentFacet.childCountsLen];
|
||||
}
|
||||
parentFacet.childCounts[unitShape.getValAtLevel(cell.getLevel())] += count;
|
||||
}
|
||||
}
|
||||
|
||||
private void setupParent(UnitNRShape unitShape) {
|
||||
parentShape = unitShape.clone();
|
||||
//Look for existing parentFacet (from previous segment), or create anew if needed
|
||||
parentFacet = facets.parents.get(parentShape);
|
||||
if (parentFacet == null) {//didn't find one; make a new one
|
||||
parentFacet = new Facets.FacetParentVal();
|
||||
parentFacet.childCountsLen = getGrid().getNumSubCells(parentShape);
|
||||
facets.parents.put(parentShape, parentFacet);
|
||||
}
|
||||
}
|
||||
});
|
||||
return facets;
|
||||
}
|
||||
|
||||
/** Facet response information */
|
||||
public static class Facets {
|
||||
//TODO consider a variable-level structure -- more general purpose.
|
||||
|
||||
public Facets(int detailLevel) {
|
||||
this.detailLevel = detailLevel;
|
||||
}
|
||||
|
||||
/** The bottom-most detail-level counted, as requested. */
|
||||
public final int detailLevel;
|
||||
|
||||
/**
|
||||
* The count of documents with ranges that completely spanned the parents of the detail level. In more technical
|
||||
* terms, this is the count of leaf cells 2 up and higher from the bottom. Usually you only care about counts at
|
||||
* detailLevel, and so you will add this number to all other counts below, including to omitted/implied children
|
||||
* counts of 0. If there are no indexed ranges (just instances, i.e. fully specified dates) then this value will
|
||||
* always be 0.
|
||||
*/
|
||||
public int topLeaves;
|
||||
|
||||
/** Holds all the {@link FacetParentVal} instances in order of the key. This is sparse; there won't be an
|
||||
* instance if it's count and children are all 0. The keys are {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape} shapes, which can be
|
||||
* converted back to the original Object (i.e. a Calendar) via
|
||||
* {@link NumberRangePrefixTree#toObject(org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape)}. */
|
||||
public final SortedMap<UnitNRShape,FacetParentVal> parents = new TreeMap<>();
|
||||
|
||||
/** Holds a block of detailLevel counts aggregated to their parent level. */
|
||||
public static class FacetParentVal {
|
||||
|
||||
/** The count of ranges that span all of the childCounts. In more technical terms, this is the number of leaf
|
||||
* cells found at this parent. Treat this like {@link Facets#topLeaves}. */
|
||||
public int parentLeaves;
|
||||
|
||||
/** The length of {@link #childCounts}. If childCounts is not null then this is childCounts.length, otherwise it
|
||||
* says how long it would have been if it weren't null. */
|
||||
public int childCountsLen;
|
||||
|
||||
/** The detail level counts. It will be null if there are none, and thus they are assumed 0. Most apps, when
|
||||
* presenting the information, will add {@link #topLeaves} and {@link #parentLeaves} to each count. */
|
||||
public int[] childCounts;
|
||||
//assert childCountsLen == childCounts.length
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder buf = new StringBuilder(2048);
|
||||
buf.append("Facets: level=" + detailLevel + " topLeaves=" + topLeaves + " parentCount=" + parents.size());
|
||||
for (Map.Entry<UnitNRShape, FacetParentVal> entry : parents.entrySet()) {
|
||||
buf.append('\n');
|
||||
if (buf.length() > 1000) {
|
||||
buf.append("...");
|
||||
break;
|
||||
}
|
||||
final FacetParentVal pVal = entry.getValue();
|
||||
buf.append(' ').append(entry.getKey()+" leafCount=" + pVal.parentLeaves);
|
||||
if (pVal.childCounts != null) {
|
||||
buf.append(' ').append(Arrays.toString(pVal.childCounts));
|
||||
}
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,275 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.spatial.prefix;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.Repeat;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SimpleCollector;
|
||||
import org.apache.lucene.spatial.StrategyTestCase;
|
||||
import org.apache.lucene.spatial.prefix.NumberRangePrefixTreeStrategy.Facets;
|
||||
import org.apache.lucene.spatial.prefix.tree.Cell;
|
||||
import org.apache.lucene.spatial.prefix.tree.CellIterator;
|
||||
import org.apache.lucene.spatial.prefix.tree.DateRangePrefixTree;
|
||||
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
|
||||
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
|
||||
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
|
||||
|
||||
public class NumberRangeFacetsTest extends StrategyTestCase {
|
||||
|
||||
DateRangePrefixTree tree;
|
||||
|
||||
int randomCalWindowField;
|
||||
long randomCalWindowMs;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
tree = DateRangePrefixTree.INSTANCE;
|
||||
strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange");
|
||||
Calendar tmpCal = tree.newCal();
|
||||
randomCalWindowField = randomIntBetween(1, Calendar.ZONE_OFFSET - 1);//we're not allowed to add zone offset
|
||||
tmpCal.add(randomCalWindowField, 2_000);
|
||||
randomCalWindowMs = Math.max(2000L, tmpCal.getTimeInMillis());
|
||||
}
|
||||
|
||||
@Repeat(iterations = 20)
|
||||
@Test
|
||||
public void test() throws IOException {
|
||||
//generate test data
|
||||
List<Shape> indexedShapes = new ArrayList<>();
|
||||
final int numIndexedShapes = random().nextInt(15);
|
||||
for (int i = 0; i < numIndexedShapes; i++) {
|
||||
indexedShapes.add(randomShape());
|
||||
}
|
||||
|
||||
//Main index loop:
|
||||
for (int i = 0; i < indexedShapes.size(); i++) {
|
||||
Shape shape = indexedShapes.get(i);
|
||||
adoc(""+i, shape);
|
||||
|
||||
if (random().nextInt(10) == 0)
|
||||
commit();//intermediate commit, produces extra segments
|
||||
}
|
||||
|
||||
//delete some documents randomly
|
||||
for (int id = 0; id < indexedShapes.size(); id++) {
|
||||
if (random().nextInt(10) == 0) {
|
||||
deleteDoc(""+id);
|
||||
indexedShapes.set(id, null);
|
||||
}
|
||||
}
|
||||
|
||||
commit();
|
||||
|
||||
//Main query loop:
|
||||
for (int queryIdx = 0; queryIdx < 10; queryIdx++) {
|
||||
preQueryHavoc();
|
||||
|
||||
// We need to have a facet range window to do the facets between (a start time & end time). We randomly
|
||||
// pick a date, decide the level we want to facet on, and then pick a right end time that is up to 2 thousand
|
||||
// values later.
|
||||
int calFieldFacet = randomCalWindowField - 1;
|
||||
if (calFieldFacet > 1 && rarely()) {
|
||||
calFieldFacet--;
|
||||
}
|
||||
final Calendar leftCal = randomCalendar();
|
||||
leftCal.add(calFieldFacet, -1 * randomInt(1000));
|
||||
Calendar rightCal = (Calendar) leftCal.clone();
|
||||
rightCal.add(calFieldFacet, randomInt(2000));
|
||||
// Pick facet detail level based on cal field.
|
||||
int detailLevel = tree.getTreeLevelForCalendarField(calFieldFacet);
|
||||
if (detailLevel < 0) {//no exact match
|
||||
detailLevel = -1 * detailLevel;
|
||||
}
|
||||
|
||||
//Randomly pick a filter/acceptDocs
|
||||
Bits topAcceptDocs = null;
|
||||
List<Integer> acceptFieldIds = new ArrayList<>();
|
||||
if (usually()) {
|
||||
//get all possible IDs into a list, random shuffle it, then randomly choose how many of the first we use to
|
||||
// replace the list.
|
||||
for (int i = 0; i < indexedShapes.size(); i++) {
|
||||
if (indexedShapes.get(i) == null) { // we deleted this one
|
||||
continue;
|
||||
}
|
||||
acceptFieldIds.add(i);
|
||||
}
|
||||
Collections.shuffle(acceptFieldIds, random());
|
||||
acceptFieldIds = acceptFieldIds.subList(0, randomInt(acceptFieldIds.size()));
|
||||
if (!acceptFieldIds.isEmpty()) {
|
||||
List<Term> terms = new ArrayList<>();
|
||||
for (Integer acceptDocId : acceptFieldIds) {
|
||||
terms.add(new Term("id", acceptDocId.toString()));
|
||||
}
|
||||
|
||||
topAcceptDocs = searchForDocBits(new TermsQuery(terms));
|
||||
}
|
||||
}
|
||||
|
||||
//Lets do it!
|
||||
NumberRangePrefixTree.NRShape facetRange = tree.toRangeShape(tree.toShape(leftCal), tree.toShape(rightCal));
|
||||
Facets facets = ((NumberRangePrefixTreeStrategy) strategy)
|
||||
.calcFacets(indexSearcher.getTopReaderContext(), topAcceptDocs, facetRange, detailLevel);
|
||||
|
||||
//System.out.println("Q: " + queryIdx + " " + facets);
|
||||
|
||||
//Verify results. We do it by looping over indexed shapes and reducing the facet counts.
|
||||
Shape facetShapeRounded = facetRange.roundToLevel(detailLevel);
|
||||
for (int indexedShapeId = 0; indexedShapeId < indexedShapes.size(); indexedShapeId++) {
|
||||
if (topAcceptDocs != null && !acceptFieldIds.contains(indexedShapeId)) {
|
||||
continue;// this doc was filtered out via acceptDocs
|
||||
}
|
||||
Shape indexedShape = indexedShapes.get(indexedShapeId);
|
||||
if (indexedShape == null) {//was deleted
|
||||
continue;
|
||||
}
|
||||
Shape indexedShapeRounded = ((NumberRangePrefixTree.NRShape) indexedShape).roundToLevel(detailLevel);
|
||||
if (!indexedShapeRounded.relate(facetShapeRounded).intersects()) { // no intersection at all
|
||||
continue;
|
||||
}
|
||||
// walk the cells
|
||||
final CellIterator cellIterator = tree.getTreeCellIterator(indexedShape, detailLevel);
|
||||
while (cellIterator.hasNext()) {
|
||||
Cell cell = cellIterator.next();
|
||||
if (!cell.getShape().relate(facetShapeRounded).intersects()) {
|
||||
cellIterator.remove();//no intersection; prune
|
||||
continue;
|
||||
}
|
||||
assert cell.getLevel() <= detailLevel;
|
||||
|
||||
if (cell.getLevel() == detailLevel) {
|
||||
//count it
|
||||
UnitNRShape shape = (UnitNRShape) cell.getShape();
|
||||
final UnitNRShape parentShape = shape.getShapeAtLevel(detailLevel - 1);//get parent
|
||||
final Facets.FacetParentVal facetParentVal = facets.parents.get(parentShape);
|
||||
assertNotNull(facetParentVal);
|
||||
int index = shape.getValAtLevel(shape.getLevel());
|
||||
assertNotNull(facetParentVal.childCounts);
|
||||
assert facetParentVal.childCounts[index] > 0;
|
||||
facetParentVal.childCounts[index]--;
|
||||
|
||||
} else if (cell.isLeaf()) {
|
||||
//count it, and remove/prune.
|
||||
if (cell.getLevel() < detailLevel - 1) {
|
||||
assert facets.topLeaves > 0;
|
||||
facets.topLeaves--;
|
||||
} else {
|
||||
UnitNRShape shape = (UnitNRShape) cell.getShape();
|
||||
final UnitNRShape parentShape = shape.getShapeAtLevel(detailLevel - 1);//get parent
|
||||
final Facets.FacetParentVal facetParentVal = facets.parents.get(parentShape);
|
||||
assertNotNull(facetParentVal);
|
||||
assert facetParentVal.parentLeaves > 0;
|
||||
facetParentVal.parentLeaves--;
|
||||
}
|
||||
|
||||
cellIterator.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
// At this point; all counts should be down to zero.
|
||||
assertTrue(facets.topLeaves == 0);
|
||||
for (Facets.FacetParentVal facetParentVal : facets.parents.values()) {
|
||||
assertTrue(facetParentVal.parentLeaves == 0);
|
||||
if (facetParentVal.childCounts != null) {
|
||||
for (int childCount : facetParentVal.childCounts) {
|
||||
assertTrue(childCount == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private Bits searchForDocBits(Query query) throws IOException {
|
||||
FixedBitSet bitSet = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
|
||||
indexSearcher.search(query,
|
||||
new SimpleCollector() {
|
||||
int leafDocBase;
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
bitSet.set(leafDocBase + doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
leafDocBase = context.docBase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
return bitSet;
|
||||
}
|
||||
|
||||
private void preQueryHavoc() {
|
||||
if (strategy instanceof RecursivePrefixTreeStrategy) {
|
||||
RecursivePrefixTreeStrategy rpts = (RecursivePrefixTreeStrategy) strategy;
|
||||
int scanLevel = randomInt(rpts.getGrid().getMaxLevels());
|
||||
rpts.setPrefixGridScanLevel(scanLevel);
|
||||
}
|
||||
}
|
||||
|
||||
protected Shape randomShape() {
|
||||
Calendar cal1 = randomCalendar();
|
||||
UnitNRShape s1 = tree.toShape(cal1);
|
||||
if (rarely()) {
|
||||
return s1;
|
||||
}
|
||||
try {
|
||||
Calendar cal2 = randomCalendar();
|
||||
UnitNRShape s2 = tree.toShape(cal2);
|
||||
if (cal1.compareTo(cal2) < 0) {
|
||||
return tree.toRangeShape(s1, s2);
|
||||
} else {
|
||||
return tree.toRangeShape(s2, s1);
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
assert e.getMessage().startsWith("Differing precision");
|
||||
return s1;
|
||||
}
|
||||
}
|
||||
|
||||
private Calendar randomCalendar() {
|
||||
Calendar cal = tree.newCal();
|
||||
cal.setTimeInMillis(random().nextLong() % randomCalWindowMs);
|
||||
try {
|
||||
tree.clearFieldsAfter(cal, random().nextInt(Calendar.FIELD_COUNT+1)-1);
|
||||
} catch (AssertionError e) {
|
||||
if (!e.getMessage().equals("Calendar underflow"))
|
||||
throw e;
|
||||
}
|
||||
return cal;
|
||||
}
|
||||
}
|
@ -70,4 +70,9 @@ public class GeoRect {
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
/** Returns true if this bounding box crosses the dateline */
|
||||
public boolean crossesDateline() {
|
||||
return maxLon < minLon;
|
||||
}
|
||||
}
|
||||
|
@ -262,6 +262,8 @@ Bug Fixes
|
||||
* SOLR-8728: ReplicaAssigner throws NPE when a partial list of nodes are only participating in replica
|
||||
placement. splitshard should preassign nodes using rules, if rules are present (noble, Shai Erera)
|
||||
|
||||
* SOLR-8779: Fix missing InterruptedException handling in ZkStateReader.java (Varun Thacker)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been
|
||||
|
@ -45,14 +45,16 @@ import org.apache.solr.common.params.CoreAdminParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.util.TimeOut;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.apache.solr.cloud.ReplicaPropertiesBase.verifyUniqueAcrossCollection;
|
||||
|
||||
@LuceneTestCase.Slow
|
||||
public class CollectionsAPISolrJTests extends AbstractFullDistribZkTestBase {
|
||||
public class CollectionsAPISolrJTest extends AbstractFullDistribZkTestBase {
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void test() throws Exception {
|
||||
testCreateAndDeleteCollection();
|
||||
testCreateAndDeleteShard();
|
@ -165,12 +165,10 @@ public class ZkStateReader implements Closeable {
|
||||
} else {
|
||||
throw new ZooKeeperException(ErrorCode.INVALID_STATE, "No config data found at path: " + path);
|
||||
}
|
||||
}
|
||||
catch (KeeperException e) {
|
||||
} catch (KeeperException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Error loading config name for collection " + collection, e);
|
||||
}
|
||||
catch (InterruptedException e) {
|
||||
Thread.interrupted();
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Error loading config name for collection " + collection, e);
|
||||
}
|
||||
|
||||
@ -691,14 +689,17 @@ public class ZkStateReader implements Closeable {
|
||||
this.aliases = ClusterState.load(data);
|
||||
}
|
||||
|
||||
public Map getClusterProps(){
|
||||
public Map getClusterProps() {
|
||||
try {
|
||||
if (getZkClient().exists(ZkStateReader.CLUSTER_PROPS, true)) {
|
||||
return (Map) Utils.fromJSON(getZkClient().getData(ZkStateReader.CLUSTER_PROPS, null, new Stat(), true)) ;
|
||||
} else {
|
||||
return new LinkedHashMap();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Thread interrupted. Error reading cluster properties", e);
|
||||
} catch (KeeperException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Error reading cluster properties", e);
|
||||
}
|
||||
}
|
||||
@ -741,9 +742,13 @@ public class ZkStateReader implements Closeable {
|
||||
LOG.warn("Race condition while trying to set a new cluster prop on current version [{}]", s.getVersion());
|
||||
//race condition
|
||||
continue;
|
||||
} catch (Exception ex) {
|
||||
LOG.error("Error updating path [{}]", CLUSTER_PROPS, ex);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Error updating cluster property " + propertyName, ex);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
LOG.error("Thread Interrupted. Error updating path [{}]", CLUSTER_PROPS, e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Thread Interrupted. Error updating cluster property " + propertyName, e);
|
||||
} catch (KeeperException e) {
|
||||
LOG.error("Error updating path [{}]", CLUSTER_PROPS, e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Error updating cluster property " + propertyName, e);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -766,8 +771,11 @@ public class ZkStateReader implements Closeable {
|
||||
new ConfigData((Map<String, Object>) Utils.fromJSON(data), stat.getVersion()) :
|
||||
null;
|
||||
}
|
||||
} catch (KeeperException | InterruptedException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,"Error reading security properties",e) ;
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,"Error reading security properties", e) ;
|
||||
} catch (KeeperException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,"Error reading security properties", e) ;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user