Merge remote-tracking branch 'origin/branch_6x' into branch_6x

This commit is contained in:
Noble Paul 2016-03-05 11:31:05 +05:30
commit eab3a48ba2
16 changed files with 287 additions and 455 deletions

View File

@ -25,10 +25,6 @@ New Features
input tokens. Useful for normalizing short text in clustering/linking
tasks. (Mark Harwood, Adrien Grand)
* LUCENE-5735: NumberRangePrefixTreeStrategy now includes interval/range faceting
for counting ranges that align with the underlying terms as defined by the
NumberRangePrefixTree (e.g. familiar date units like days). (David Smiley)
* LUCENE-6711: Use CollectionStatistics.docCount() for IDF and average field
length computations, to avoid skew from documents that don't have the field.
(Ahmet Arslan via Robert Muir)
@ -159,6 +155,13 @@ Tests
expression to encapsulate a statement that is expected to throw an exception.
(Ryan Ernst)
Bug Fixes
* LUCENE-7065: Fix the explain for the global ordinals join query. Before the
explain would also indicate that non matching documents would match.
On top of that with score mode average, the explain would fail with a NPE.
(Martijn van Groningen)
Other
* LUCENE-7035: Upgrade icu4j to 56.1/unicode 8. (Robert Muir)

View File

@ -666,7 +666,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
// true if this is a 4.9+ index
final boolean is49Index = MultiFields.getMergedFieldInfos(reader).fieldInfo("dvSortedNumeric") != null;
assert is40Index; // NOTE: currently we can only do this on trunk!
assert is40Index;
final Bits liveDocs = MultiFields.getLiveDocs(reader);

View File

@ -131,6 +131,13 @@ public abstract class PointWriter implements Closeable {
/** Default merge implementation to merge incoming points readers by visiting all their points and
* adding to this writer */
public void merge(MergeState mergeState) throws IOException {
// check each incoming reader
for (PointReader reader : mergeState.pointReaders) {
if (reader != null) {
reader.checkIntegrity();
}
}
// merge field at a time
for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
if (fieldInfo.getPointDimensionCount() != 0) {
mergeOneField(mergeState, fieldInfo);

View File

@ -122,6 +122,11 @@ public class Lucene60PointWriter extends PointWriter implements Closeable {
return;
}
}
for (PointReader reader : mergeState.pointReaders) {
if (reader != null) {
reader.checkIntegrity();
}
}
for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
if (fieldInfo.getPointDimensionCount() != 0) {

View File

@ -392,5 +392,10 @@ public abstract class CodecReader extends LeafReader implements Accountable {
if (getTermVectorsReader() != null) {
getTermVectorsReader().checkIntegrity();
}
// points
if (getPointReader() != null) {
getPointReader().checkIntegrity();
}
}
}

View File

@ -112,14 +112,27 @@ final class GlobalOrdinalsQuery extends Query {
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
if (values != null) {
int segmentOrd = values.getOrd(doc);
if (segmentOrd != -1) {
BytesRef joinValue = values.lookupOrd(segmentOrd);
return Explanation.match(score(), "Score based on join value " + joinValue.utf8ToString());
}
if (values == null) {
return Explanation.noMatch("Not a match");
}
return Explanation.noMatch("Not a match");
int segmentOrd = values.getOrd(doc);
if (segmentOrd == -1) {
return Explanation.noMatch("Not a match");
}
BytesRef joinValue = values.lookupOrd(segmentOrd);
int ord;
if (globalOrds != null) {
ord = (int) globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
} else {
ord = segmentOrd;
}
if (foundOrds.get(ord) == false) {
return Explanation.noMatch("Not a match, join value " + Term.toString(joinValue));
}
return Explanation.match(score(), "A match, join value " + Term.toString(joinValue));
}
@Override

View File

@ -120,21 +120,28 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
if (values != null) {
int segmentOrd = values.getOrd(doc);
if (segmentOrd != -1) {
final float score;
if (globalOrds != null) {
long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
score = collector.score((int) globalOrd);
} else {
score = collector.score(segmentOrd);
}
BytesRef joinValue = values.lookupOrd(segmentOrd);
return Explanation.match(score, "Score based on join value " + joinValue.utf8ToString());
}
if (values == null) {
return Explanation.noMatch("Not a match");
}
return Explanation.noMatch("Not a match");
int segmentOrd = values.getOrd(doc);
if (segmentOrd == -1) {
return Explanation.noMatch("Not a match");
}
BytesRef joinValue = values.lookupOrd(segmentOrd);
int ord;
if (globalOrds != null) {
ord = (int) globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
} else {
ord = segmentOrd;
}
if (collector.match(ord) == false) {
return Explanation.noMatch("Not a match, join value " + Term.toString(joinValue));
}
float score = collector.score(ord);
return Explanation.match(score, "A match, join value " + Term.toString(joinValue));
}
@Override

View File

@ -48,6 +48,7 @@ import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
@ -297,6 +298,108 @@ public class TestJoinUtil extends LuceneTestCase {
dir.close();
}
public void testOrdinalsJoinExplainNoMatches() throws Exception {
final String idField = "id";
final String productIdField = "productId";
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
final String typeField = "type";
// A single sorted doc values field that holds the join values for all document types.
// Typically during indexing a schema will automatically create this field with the values
final String joinField = idField + productIdField;
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(
dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE)
);
// 0
Document doc = new Document();
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
if (random().nextBoolean()) {
w.flush();
}
// 4
doc = new Document();
doc.add(new TextField(productIdField, "3", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "5.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField("field", "value", Field.Store.NO));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
IndexSearcher indexSearcher = new IndexSearcher(r);
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
r.getCoreCacheKey(), values, PackedInts.DEFAULT
);
Query toQuery = new TermQuery(new Term("price", "5.0"));
Query fromQuery = new TermQuery(new Term("name", "name2"));
for (ScoreMode scoreMode : ScoreMode.values()) {
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, scoreMode, ordinalMap);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(1, result.totalHits);
assertEquals(4, result.scoreDocs[0].doc); // doc with price: 5.0
Explanation explanation = indexSearcher.explain(joinQuery, 4);
assertTrue(explanation.isMatch());
assertEquals(explanation.getDescription(), "A match, join value 2");
explanation = indexSearcher.explain(joinQuery, 3);
assertFalse(explanation.isMatch());
assertEquals(explanation.getDescription(), "Not a match, join value 1");
explanation = indexSearcher.explain(joinQuery, 5);
assertFalse(explanation.isMatch());
assertEquals(explanation.getDescription(), "Not a match");
}
w.close();
indexSearcher.getIndexReader().close();
dir.close();
}
public void testRandomOrdinalsJoin() throws Exception {
IndexIterationContext context = createContext(512, false, true);
int searchIters = 10;

View File

@ -70,7 +70,7 @@ final class LatLonPointDistanceQuery extends Query {
final GeoRect box2;
// crosses dateline: split
if (box.maxLon < box.minLon) {
if (box.crossesDateline()) {
box1 = new GeoRect(-180.0, box.maxLon, box.minLat, box.maxLat);
box2 = new GeoRect(box.minLon, 180.0, box.minLat, box.maxLat);
} else {

View File

@ -16,11 +16,12 @@
*/
package org.apache.lucene.search;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.spatial.util.BaseGeoPointTestCase;
import org.apache.lucene.spatial.util.GeoDistanceUtils;
import org.apache.lucene.spatial.util.GeoRect;
import org.apache.lucene.spatial.util.GeoUtils;
public class TestLatLonPointQueries extends BaseGeoPointTestCase {
// TODO: remove this!
@ -146,4 +147,87 @@ public class TestLatLonPointQueries extends BaseGeoPointTestCase {
final double d = GeoDistanceUtils.haversin(centerLat, centerLon, pointLat, pointLon);
return d >= minRadiusMeters && d <= radiusMeters;
}
/** Returns random double min to max or up to 1% outside of that range */
private double randomRangeMaybeSlightlyOutside(double min, double max) {
return min + (random().nextDouble() + (0.5 - random().nextDouble()) * .02) * (max - min);
}
// We rely heavily on GeoUtils.circleToBBox so we test it here:
public void testRandomCircleToBBox() throws Exception {
int iters = atLeast(1000);
for(int iter=0;iter<iters;iter++) {
boolean useSmallRanges = random().nextBoolean();
double radiusMeters;
double centerLat = randomLat(useSmallRanges);
double centerLon = randomLon(useSmallRanges);
if (useSmallRanges) {
// Approx 4 degrees lon at the equator:
radiusMeters = random().nextDouble() * 444000;
} else {
radiusMeters = random().nextDouble() * 50000000;
}
// TODO: randomly quantize radius too, to provoke exact math errors?
GeoRect bbox = GeoUtils.circleToBBox(centerLon, centerLat, radiusMeters);
int numPointsToTry = 1000;
for(int i=0;i<numPointsToTry;i++) {
double lat;
double lon;
if (random().nextBoolean()) {
lat = randomLat(useSmallRanges);
lon = randomLon(useSmallRanges);
} else {
// pick a lat/lon within the bbox or "slightly" outside it to try to improve test efficiency
lat = quantizeLat(GeoUtils.normalizeLat(randomRangeMaybeSlightlyOutside(bbox.minLat, bbox.maxLat)));
if (bbox.crossesDateline()) {
if (random().nextBoolean()) {
lon = quantizeLon(GeoUtils.normalizeLon(randomRangeMaybeSlightlyOutside(bbox.maxLon, -180)));
} else {
lon = quantizeLon(GeoUtils.normalizeLon(randomRangeMaybeSlightlyOutside(0, bbox.minLon)));
}
} else {
lon = quantizeLon(GeoUtils.normalizeLon(randomRangeMaybeSlightlyOutside(bbox.minLon, bbox.maxLon)));
}
}
double distanceMeters = GeoDistanceUtils.haversin(centerLat, centerLon, lat, lon);
// Haversin says it's within the circle:
boolean haversinSays = distanceMeters <= radiusMeters;
// BBox says its within the box:
boolean bboxSays;
if (bbox.crossesDateline()) {
if (lat >= bbox.minLat && lat <= bbox.maxLat) {
bboxSays = lon <= bbox.maxLon || lon >= bbox.minLon;
} else {
bboxSays = false;
}
} else {
bboxSays = lat >= bbox.minLat && lat <= bbox.maxLat && lon >= bbox.minLon && lon <= bbox.maxLon;
}
if (haversinSays) {
if (bboxSays == false) {
System.out.println("small=" + useSmallRanges + " centerLat=" + centerLat + " cetnerLon=" + centerLon + " radiusMeters=" + radiusMeters);
System.out.println(" bbox: lat=" + bbox.minLat + " to " + bbox.maxLat + " lon=" + bbox.minLon + " to " + bbox.maxLon);
System.out.println(" point: lat=" + lat + " lon=" + lon);
System.out.println(" haversin: " + distanceMeters);
fail("point was within the distance according to haversin, but the bbox doesn't contain it");
}
} else {
// it's fine if haversin said it was outside the radius and bbox said it was inside the box
}
}
}
}
}

View File

@ -16,22 +16,13 @@
*/
package org.apache.lucene.spatial.prefix;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Shape;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
import org.apache.lucene.util.Bits;
import static org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Shape;
/** A PrefixTree based on Number/Date ranges. This isn't very "spatial" on the surface (to the user) but
* it's implemented using spatial so that's why it's here extending a SpatialStrategy. When using this class, you will
@ -68,132 +59,4 @@ public class NumberRangePrefixTreeStrategy extends RecursivePrefixTreeStrategy {
throw new UnsupportedOperationException();
}
/** Calculates facets between {@code start} and {@code end} to a detail level one greater than that provided by the
* arguments. For example providing March to October of 2014 would return facets to the day level of those months.
* This is just a convenience method.
* @see #calcFacets(IndexReaderContext, Bits, Shape, int)
*/
public Facets calcFacets(IndexReaderContext context, Bits topAcceptDocs, UnitNRShape start, UnitNRShape end)
throws IOException {
Shape facetRange = getGrid().toRangeShape(start, end);
int detailLevel = Math.max(start.getLevel(), end.getLevel()) + 1;
return calcFacets(context, topAcceptDocs, facetRange, detailLevel);
}
/**
* Calculates facets (aggregated counts) given a range shape (start-end span) and a level, which specifies the detail.
* To get the level of an existing shape, say a Calendar, call
* {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree#toUnitShape(Object)} then call
* {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape#getLevel()}.
* Facet computation is implemented by navigating the underlying indexed terms efficiently.
*/
public Facets calcFacets(IndexReaderContext context, Bits topAcceptDocs, Shape facetRange, final int level)
throws IOException {
final Facets facets = new Facets(level);
PrefixTreeFacetCounter.compute(this, context, topAcceptDocs, facetRange, level,
new PrefixTreeFacetCounter.FacetVisitor() {
Facets.FacetParentVal parentFacet;
UnitNRShape parentShape;
@Override
public void visit(Cell cell, int count) {
if (cell.getLevel() < level - 1) {//some ancestor of parent facet level, direct or distant
parentFacet = null;//reset
parentShape = null;//reset
facets.topLeaves += count;
} else if (cell.getLevel() == level - 1) {//parent
//set up FacetParentVal
setupParent((UnitNRShape) cell.getShape());
parentFacet.parentLeaves += count;
} else {//at facet level
UnitNRShape unitShape = (UnitNRShape) cell.getShape();
UnitNRShape unitShapeParent = unitShape.getShapeAtLevel(unitShape.getLevel() - 1);
if (parentFacet == null || !parentShape.equals(unitShapeParent)) {
setupParent(unitShapeParent);
}
//lazy init childCounts
if (parentFacet.childCounts == null) {
parentFacet.childCounts = new int[parentFacet.childCountsLen];
}
parentFacet.childCounts[unitShape.getValAtLevel(cell.getLevel())] += count;
}
}
private void setupParent(UnitNRShape unitShape) {
parentShape = unitShape.clone();
//Look for existing parentFacet (from previous segment), or create anew if needed
parentFacet = facets.parents.get(parentShape);
if (parentFacet == null) {//didn't find one; make a new one
parentFacet = new Facets.FacetParentVal();
parentFacet.childCountsLen = getGrid().getNumSubCells(parentShape);
facets.parents.put(parentShape, parentFacet);
}
}
});
return facets;
}
/** Facet response information */
public static class Facets {
//TODO consider a variable-level structure -- more general purpose.
public Facets(int detailLevel) {
this.detailLevel = detailLevel;
}
/** The bottom-most detail-level counted, as requested. */
public final int detailLevel;
/**
* The count of documents with ranges that completely spanned the parents of the detail level. In more technical
* terms, this is the count of leaf cells 2 up and higher from the bottom. Usually you only care about counts at
* detailLevel, and so you will add this number to all other counts below, including to omitted/implied children
* counts of 0. If there are no indexed ranges (just instances, i.e. fully specified dates) then this value will
* always be 0.
*/
public int topLeaves;
/** Holds all the {@link FacetParentVal} instances in order of the key. This is sparse; there won't be an
* instance if it's count and children are all 0. The keys are {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape} shapes, which can be
* converted back to the original Object (i.e. a Calendar) via
* {@link NumberRangePrefixTree#toObject(org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape)}. */
public final SortedMap<UnitNRShape,FacetParentVal> parents = new TreeMap<>();
/** Holds a block of detailLevel counts aggregated to their parent level. */
public static class FacetParentVal {
/** The count of ranges that span all of the childCounts. In more technical terms, this is the number of leaf
* cells found at this parent. Treat this like {@link Facets#topLeaves}. */
public int parentLeaves;
/** The length of {@link #childCounts}. If childCounts is not null then this is childCounts.length, otherwise it
* says how long it would have been if it weren't null. */
public int childCountsLen;
/** The detail level counts. It will be null if there are none, and thus they are assumed 0. Most apps, when
* presenting the information, will add {@link #topLeaves} and {@link #parentLeaves} to each count. */
public int[] childCounts;
//assert childCountsLen == childCounts.length
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder(2048);
buf.append("Facets: level=" + detailLevel + " topLeaves=" + topLeaves + " parentCount=" + parents.size());
for (Map.Entry<UnitNRShape, FacetParentVal> entry : parents.entrySet()) {
buf.append('\n');
if (buf.length() > 1000) {
buf.append("...");
break;
}
final FacetParentVal pVal = entry.getValue();
buf.append(' ').append(entry.getKey()+" leafCount=" + pVal.parentLeaves);
if (pVal.childCounts != null) {
buf.append(' ').append(Arrays.toString(pVal.childCounts));
}
}
return buf.toString();
}
}
}

View File

@ -1,275 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.spatial.prefix;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.List;
import com.carrotsearch.randomizedtesting.annotations.Repeat;
import org.locationtech.spatial4j.shape.Shape;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.spatial.StrategyTestCase;
import org.apache.lucene.spatial.prefix.NumberRangePrefixTreeStrategy.Facets;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.DateRangePrefixTree;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.junit.Before;
import org.junit.Test;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
public class NumberRangeFacetsTest extends StrategyTestCase {
DateRangePrefixTree tree;
int randomCalWindowField;
long randomCalWindowMs;
@Before
public void setUp() throws Exception {
super.setUp();
tree = DateRangePrefixTree.INSTANCE;
strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange");
Calendar tmpCal = tree.newCal();
randomCalWindowField = randomIntBetween(1, Calendar.ZONE_OFFSET - 1);//we're not allowed to add zone offset
tmpCal.add(randomCalWindowField, 2_000);
randomCalWindowMs = Math.max(2000L, tmpCal.getTimeInMillis());
}
@Repeat(iterations = 20)
@Test
public void test() throws IOException {
//generate test data
List<Shape> indexedShapes = new ArrayList<>();
final int numIndexedShapes = random().nextInt(15);
for (int i = 0; i < numIndexedShapes; i++) {
indexedShapes.add(randomShape());
}
//Main index loop:
for (int i = 0; i < indexedShapes.size(); i++) {
Shape shape = indexedShapes.get(i);
adoc(""+i, shape);
if (random().nextInt(10) == 0)
commit();//intermediate commit, produces extra segments
}
//delete some documents randomly
for (int id = 0; id < indexedShapes.size(); id++) {
if (random().nextInt(10) == 0) {
deleteDoc(""+id);
indexedShapes.set(id, null);
}
}
commit();
//Main query loop:
for (int queryIdx = 0; queryIdx < 10; queryIdx++) {
preQueryHavoc();
// We need to have a facet range window to do the facets between (a start time & end time). We randomly
// pick a date, decide the level we want to facet on, and then pick a right end time that is up to 2 thousand
// values later.
int calFieldFacet = randomCalWindowField - 1;
if (calFieldFacet > 1 && rarely()) {
calFieldFacet--;
}
final Calendar leftCal = randomCalendar();
leftCal.add(calFieldFacet, -1 * randomInt(1000));
Calendar rightCal = (Calendar) leftCal.clone();
rightCal.add(calFieldFacet, randomInt(2000));
// Pick facet detail level based on cal field.
int detailLevel = tree.getTreeLevelForCalendarField(calFieldFacet);
if (detailLevel < 0) {//no exact match
detailLevel = -1 * detailLevel;
}
//Randomly pick a filter/acceptDocs
Bits topAcceptDocs = null;
List<Integer> acceptFieldIds = new ArrayList<>();
if (usually()) {
//get all possible IDs into a list, random shuffle it, then randomly choose how many of the first we use to
// replace the list.
for (int i = 0; i < indexedShapes.size(); i++) {
if (indexedShapes.get(i) == null) { // we deleted this one
continue;
}
acceptFieldIds.add(i);
}
Collections.shuffle(acceptFieldIds, random());
acceptFieldIds = acceptFieldIds.subList(0, randomInt(acceptFieldIds.size()));
if (!acceptFieldIds.isEmpty()) {
List<Term> terms = new ArrayList<>();
for (Integer acceptDocId : acceptFieldIds) {
terms.add(new Term("id", acceptDocId.toString()));
}
topAcceptDocs = searchForDocBits(new TermsQuery(terms));
}
}
//Lets do it!
NumberRangePrefixTree.NRShape facetRange = tree.toRangeShape(tree.toShape(leftCal), tree.toShape(rightCal));
Facets facets = ((NumberRangePrefixTreeStrategy) strategy)
.calcFacets(indexSearcher.getTopReaderContext(), topAcceptDocs, facetRange, detailLevel);
//System.out.println("Q: " + queryIdx + " " + facets);
//Verify results. We do it by looping over indexed shapes and reducing the facet counts.
Shape facetShapeRounded = facetRange.roundToLevel(detailLevel);
for (int indexedShapeId = 0; indexedShapeId < indexedShapes.size(); indexedShapeId++) {
if (topAcceptDocs != null && !acceptFieldIds.contains(indexedShapeId)) {
continue;// this doc was filtered out via acceptDocs
}
Shape indexedShape = indexedShapes.get(indexedShapeId);
if (indexedShape == null) {//was deleted
continue;
}
Shape indexedShapeRounded = ((NumberRangePrefixTree.NRShape) indexedShape).roundToLevel(detailLevel);
if (!indexedShapeRounded.relate(facetShapeRounded).intersects()) { // no intersection at all
continue;
}
// walk the cells
final CellIterator cellIterator = tree.getTreeCellIterator(indexedShape, detailLevel);
while (cellIterator.hasNext()) {
Cell cell = cellIterator.next();
if (!cell.getShape().relate(facetShapeRounded).intersects()) {
cellIterator.remove();//no intersection; prune
continue;
}
assert cell.getLevel() <= detailLevel;
if (cell.getLevel() == detailLevel) {
//count it
UnitNRShape shape = (UnitNRShape) cell.getShape();
final UnitNRShape parentShape = shape.getShapeAtLevel(detailLevel - 1);//get parent
final Facets.FacetParentVal facetParentVal = facets.parents.get(parentShape);
assertNotNull(facetParentVal);
int index = shape.getValAtLevel(shape.getLevel());
assertNotNull(facetParentVal.childCounts);
assert facetParentVal.childCounts[index] > 0;
facetParentVal.childCounts[index]--;
} else if (cell.isLeaf()) {
//count it, and remove/prune.
if (cell.getLevel() < detailLevel - 1) {
assert facets.topLeaves > 0;
facets.topLeaves--;
} else {
UnitNRShape shape = (UnitNRShape) cell.getShape();
final UnitNRShape parentShape = shape.getShapeAtLevel(detailLevel - 1);//get parent
final Facets.FacetParentVal facetParentVal = facets.parents.get(parentShape);
assertNotNull(facetParentVal);
assert facetParentVal.parentLeaves > 0;
facetParentVal.parentLeaves--;
}
cellIterator.remove();
}
}
}
// At this point; all counts should be down to zero.
assertTrue(facets.topLeaves == 0);
for (Facets.FacetParentVal facetParentVal : facets.parents.values()) {
assertTrue(facetParentVal.parentLeaves == 0);
if (facetParentVal.childCounts != null) {
for (int childCount : facetParentVal.childCounts) {
assertTrue(childCount == 0);
}
}
}
}
}
private Bits searchForDocBits(Query query) throws IOException {
FixedBitSet bitSet = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
indexSearcher.search(query,
new SimpleCollector() {
int leafDocBase;
@Override
public void collect(int doc) throws IOException {
bitSet.set(leafDocBase + doc);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
leafDocBase = context.docBase;
}
@Override
public boolean needsScores() {
return false;
}
});
return bitSet;
}
private void preQueryHavoc() {
if (strategy instanceof RecursivePrefixTreeStrategy) {
RecursivePrefixTreeStrategy rpts = (RecursivePrefixTreeStrategy) strategy;
int scanLevel = randomInt(rpts.getGrid().getMaxLevels());
rpts.setPrefixGridScanLevel(scanLevel);
}
}
protected Shape randomShape() {
Calendar cal1 = randomCalendar();
UnitNRShape s1 = tree.toShape(cal1);
if (rarely()) {
return s1;
}
try {
Calendar cal2 = randomCalendar();
UnitNRShape s2 = tree.toShape(cal2);
if (cal1.compareTo(cal2) < 0) {
return tree.toRangeShape(s1, s2);
} else {
return tree.toRangeShape(s2, s1);
}
} catch (IllegalArgumentException e) {
assert e.getMessage().startsWith("Differing precision");
return s1;
}
}
private Calendar randomCalendar() {
Calendar cal = tree.newCal();
cal.setTimeInMillis(random().nextLong() % randomCalWindowMs);
try {
tree.clearFieldsAfter(cal, random().nextInt(Calendar.FIELD_COUNT+1)-1);
} catch (AssertionError e) {
if (!e.getMessage().equals("Calendar underflow"))
throw e;
}
return cal;
}
}

View File

@ -70,4 +70,9 @@ public class GeoRect {
return b.toString();
}
/** Returns true if this bounding box crosses the dateline */
public boolean crossesDateline() {
return maxLon < minLon;
}
}

View File

@ -262,6 +262,8 @@ Bug Fixes
* SOLR-8728: ReplicaAssigner throws NPE when a partial list of nodes are only participating in replica
placement. splitshard should preassign nodes using rules, if rules are present (noble, Shai Erera)
* SOLR-8779: Fix missing InterruptedException handling in ZkStateReader.java (Varun Thacker)
Optimizations
----------------------
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been

View File

@ -45,14 +45,16 @@ import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.KeeperException;
import org.junit.Ignore;
import org.junit.Test;
import static org.apache.solr.cloud.ReplicaPropertiesBase.verifyUniqueAcrossCollection;
@LuceneTestCase.Slow
public class CollectionsAPISolrJTests extends AbstractFullDistribZkTestBase {
public class CollectionsAPISolrJTest extends AbstractFullDistribZkTestBase {
@Test
@Ignore
public void test() throws Exception {
testCreateAndDeleteCollection();
testCreateAndDeleteShard();

View File

@ -165,12 +165,10 @@ public class ZkStateReader implements Closeable {
} else {
throw new ZooKeeperException(ErrorCode.INVALID_STATE, "No config data found at path: " + path);
}
}
catch (KeeperException e) {
} catch (KeeperException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Error loading config name for collection " + collection, e);
}
catch (InterruptedException e) {
Thread.interrupted();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SolrException(ErrorCode.SERVER_ERROR, "Error loading config name for collection " + collection, e);
}
@ -691,14 +689,17 @@ public class ZkStateReader implements Closeable {
this.aliases = ClusterState.load(data);
}
public Map getClusterProps(){
public Map getClusterProps() {
try {
if (getZkClient().exists(ZkStateReader.CLUSTER_PROPS, true)) {
return (Map) Utils.fromJSON(getZkClient().getData(ZkStateReader.CLUSTER_PROPS, null, new Stat(), true)) ;
} else {
return new LinkedHashMap();
}
} catch (Exception e) {
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SolrException(ErrorCode.SERVER_ERROR, "Thread interrupted. Error reading cluster properties", e);
} catch (KeeperException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Error reading cluster properties", e);
}
}
@ -741,9 +742,13 @@ public class ZkStateReader implements Closeable {
LOG.warn("Race condition while trying to set a new cluster prop on current version [{}]", s.getVersion());
//race condition
continue;
} catch (Exception ex) {
LOG.error("Error updating path [{}]", CLUSTER_PROPS, ex);
throw new SolrException(ErrorCode.SERVER_ERROR, "Error updating cluster property " + propertyName, ex);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOG.error("Thread Interrupted. Error updating path [{}]", CLUSTER_PROPS, e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Thread Interrupted. Error updating cluster property " + propertyName, e);
} catch (KeeperException e) {
LOG.error("Error updating path [{}]", CLUSTER_PROPS, e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Error updating cluster property " + propertyName, e);
}
break;
}
@ -766,8 +771,11 @@ public class ZkStateReader implements Closeable {
new ConfigData((Map<String, Object>) Utils.fromJSON(data), stat.getVersion()) :
null;
}
} catch (KeeperException | InterruptedException e) {
throw new SolrException(ErrorCode.SERVER_ERROR,"Error reading security properties",e) ;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SolrException(ErrorCode.SERVER_ERROR,"Error reading security properties", e) ;
} catch (KeeperException e) {
throw new SolrException(ErrorCode.SERVER_ERROR,"Error reading security properties", e) ;
}
return null;
}