LUCENE-5579: CompositeSpatialStrategy (RPT + SDV) with optimized Intersect

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1672736 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2015-04-10 19:51:58 +00:00
parent 94ad565747
commit 489f56def8
12 changed files with 823 additions and 39 deletions

View File

@ -54,6 +54,10 @@ New Features
dictionary, exposed as AutoPrefixPostingsFormat (Adrien Grand,
Uwe Schindler, Robert Muir, Mike McCandless)
* LUCENE-5579: New CompositeSpatialStrategy combines speed of RPT with
accuracy of SDV. Includes optimized Intersect predicate to avoid many
geometry checks. Uses TwoPhaseIterator. (David Smiley)
Optimizations
* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if

View File

@ -27,15 +27,19 @@ doc.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialDocMaker
#spatial.geo=true
#spatial.distCalculator=haversine
#spatial.worldBounds=...
# Spatial Strategy: composite | rpt | ...
spatial.strategy=rpt
# Spatial Grid: (PrefixTree) see SpatialPrefixTreeFactory.makeSPT
#spatial.prefixTree=geohash (or quad)
spatial.prefixTree=quad
#spatial.maxLevels=11
#27 quad or 11 geohash are very close
spatial.maxLevels=27
#spatial.maxDistErr (in degrees) to compute maxLevels -- defaults to 1 meter's worth
# RecursivePrefixTreeStrategy:
spatial.docPointsOnly=true
#spatial.distErrPct=.25
#spatial.prefixGridScanLevel=-4
#spatial.docPointsOnly=true
#spatial.distErrPct=0.025
#spatial.pruneLeafyBranches=true
#codec.postingsFormat=Memory or (Direct)
### Source & Doc
@ -54,7 +58,6 @@ directory=FSDirectory
compound=false
merge.factor=10
ram.flush.mb=64
concurrent.merge.scheduler.max.thread.count=2
### Query
query.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialFileQueryMaker
@ -66,10 +69,13 @@ query.spatial.radiusDegrees=0
query.spatial.radiusDegreesRandPlusMinus=3
query.spatial.bbox=false
#query.spatial.prefixGridScanLevel=scanLevel:-1:-2:-3:-4
#query.spatial.composite.optimizePredicates=optPred:true:false
query.spatial.score=false
#query.spatial.predicate=Intersects
# (defaults to spatial.distErrPct)
query.spatial.distErrPct=qDistErrPct:0.0:0.025:0.1:0.5
#query.spatial.distErrPct=0.0
### Misc
@ -104,7 +110,7 @@ CloseReader
CloseReader
NewRound
} : 4
} : 1
#RepSumByName

View File

@ -17,6 +17,12 @@ package org.apache.lucene.benchmark.byTask.feeds;
* limitations under the License.
*/
import java.util.AbstractMap;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.context.SpatialContextFactory;
import com.spatial4j.core.shape.Point;
@ -25,15 +31,11 @@ import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.composite.CompositeSpatialStrategy;
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory;
import java.util.AbstractMap;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.spatial.serialized.SerializedDVStrategy;
/**
* Indexes spatial data according to a configured {@link SpatialStrategy} with optional
@ -87,17 +89,27 @@ public class SpatialDocMaker extends DocMaker {
SpatialContext ctx = SpatialContextFactory.makeSpatialContext(configMap, null);
//Some day the strategy might be initialized with a factory but such a factory
// is non-existent.
return makeSpatialStrategy(config, configMap, ctx);
}
protected SpatialStrategy makeSpatialStrategy(final Config config, Map<String, String> configMap,
SpatialContext ctx) {
//TODO once strategies have factories, we could use them here.
final String strategyName = config.get("spatial.strategy", "rpt");
switch (strategyName) {
case "rpt": return makeRPTStrategy(SPATIAL_FIELD, config, configMap, ctx);
case "composite": return makeCompositeStrategy(config, configMap, ctx);
//TODO add more as-needed
default: throw new IllegalStateException("Unknown spatial.strategy: " + strategyName);
}
}
protected RecursivePrefixTreeStrategy makeRPTStrategy(String spatialField, Config config,
Map<String, String> configMap, SpatialContext ctx) {
//A factory for the prefix tree grid
SpatialPrefixTree grid = SpatialPrefixTreeFactory.makeSPT(configMap, null, ctx);
RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, SPATIAL_FIELD);
RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, spatialField);
strategy.setPointsOnly(config.get("spatial.docPointsOnly", false));
strategy.setPruneLeafyBranches(config.get("spatial.pruneLeafyBranches", true));
@ -111,6 +123,20 @@ public class SpatialDocMaker extends DocMaker {
return strategy;
}
protected SerializedDVStrategy makeSerializedDVStrategy(String spatialField, Config config,
Map<String, String> configMap, SpatialContext ctx) {
return new SerializedDVStrategy(ctx, spatialField);
}
protected SpatialStrategy makeCompositeStrategy(Config config, Map<String, String> configMap, SpatialContext ctx) {
final CompositeSpatialStrategy strategy = new CompositeSpatialStrategy(
SPATIAL_FIELD, makeRPTStrategy(SPATIAL_FIELD + "_rpt", config, configMap, ctx),
makeSerializedDVStrategy(SPATIAL_FIELD + "_sdv", config, configMap, ctx)
);
strategy.setOptimizePredicates(config.get("query.spatial.composite.optimizePredicates", true));
return strategy;
}
@Override
public void setConfig(Config config, ContentSource source) {
super.setConfig(config, source);

View File

@ -0,0 +1,141 @@
package org.apache.lucene.spatial.composite;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.document.Field;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
import org.apache.lucene.spatial.serialized.SerializedDVStrategy;
import org.apache.lucene.spatial.util.ShapePredicateValueSource;
/**
* A composite {@link SpatialStrategy} based on {@link RecursivePrefixTreeStrategy} (RPT) and
* {@link SerializedDVStrategy} (SDV).
* RPT acts as an index to the precision available in SDV, and in some circumstances can avoid geometry lookups based
* on where a cell is in relation to the query shape. Currently the only predicate optimized like this is Intersects.
* All predicates are supported except for the BBox* ones, and Disjoint.
*
* @lucene.experimental
*/
public class CompositeSpatialStrategy extends SpatialStrategy {
//TODO support others? (BBox)
private final RecursivePrefixTreeStrategy indexStrategy;
/** Has the geometry. */ // TODO support others?
private final SerializedDVStrategy geometryStrategy;
private boolean optimizePredicates = true;
public CompositeSpatialStrategy(String fieldName,
RecursivePrefixTreeStrategy indexStrategy, SerializedDVStrategy geometryStrategy) {
super(indexStrategy.getSpatialContext(), fieldName);//field name; unused
this.indexStrategy = indexStrategy;
this.geometryStrategy = geometryStrategy;
}
/** Set to false to NOT use optimized search predicates that avoid checking the geometry sometimes. Only useful for
* benchmarking. */
public void setOptimizePredicates(boolean optimizePredicates) {
this.optimizePredicates = optimizePredicates;
}
@Override
public Field[] createIndexableFields(Shape shape) {
List<Field> fields = new ArrayList<>();
Collections.addAll(fields, indexStrategy.createIndexableFields(shape));
Collections.addAll(fields, geometryStrategy.createIndexableFields(shape));
return fields.toArray(new Field[fields.size()]);
}
@Override
public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) {
//TODO consider indexing center-point in DV? Guarantee contained by the shape, which could then be used for
// other purposes like faster WITHIN predicate?
throw new UnsupportedOperationException();
}
@Override
public Query makeQuery(SpatialArgs args) {
final SpatialOperation pred = args.getOperation();
if (pred == SpatialOperation.BBoxIntersects || pred == SpatialOperation.BBoxWithin) {
throw new UnsupportedSpatialOperation(pred);
}
if (pred == SpatialOperation.IsDisjointTo) {
// final Query intersectQuery = makeQuery(new SpatialArgs(SpatialOperation.Intersects, args.getShape()));
// DocValues.getDocsWithField(reader, geometryStrategy.getFieldName());
//TODO resurrect Disjoint spatial query utility accepting a field name known to have DocValues.
// update class docs when it's added.
throw new UnsupportedSpatialOperation(pred);
}
final ShapePredicateValueSource predicateValueSource =
new ShapePredicateValueSource(geometryStrategy.makeShapeValueSource(), pred, args.getShape());
//System.out.println("PredOpt: " + optimizePredicates);
if (pred == SpatialOperation.Intersects && optimizePredicates) {
// We have a smart Intersects impl
final SpatialPrefixTree grid = indexStrategy.getGrid();
final int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, 0.0));//default to max precision
return new IntersectsRPTVerifyQuery(args.getShape(), indexStrategy.getFieldName(), grid,
detailLevel, indexStrategy.getPrefixGridScanLevel(), predicateValueSource);
} else {
//The general path; all index matches get verified
SpatialArgs indexArgs;
if (pred == SpatialOperation.Contains) {
// note: we could map IsWithin as well but it's pretty darned slow since it touches all world grids
indexArgs = args;
} else {
//TODO add args.clone method with new predicate? Or simply make non-final?
indexArgs = new SpatialArgs(SpatialOperation.Intersects, args.getShape());
indexArgs.setDistErr(args.getDistErr());
indexArgs.setDistErrPct(args.getDistErrPct());
}
if (indexArgs.getDistErr() == null && indexArgs.getDistErrPct() == null) {
indexArgs.setDistErrPct(0.10);
}
final Query indexQuery = indexStrategy.makeQuery(indexArgs);
return new CompositeVerifyQuery(indexQuery, predicateValueSource);
}
}
@Override
public Filter makeFilter(SpatialArgs args) {
//note: Filters are being deprecated in LUCENE-6301
return new QueryWrapperFilter(makeQuery(args));
}
}

View File

@ -0,0 +1,123 @@
package org.apache.lucene.spatial.composite;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
/**
* A Query that considers an "indexQuery" to have approximate results, and a follow-on
* {@link ValueSource}/{@link FunctionValues#boolVal(int)} is called to verify each hit
* from {@link TwoPhaseIterator#matches()}.
*
* @lucene.experimental
*/
public class CompositeVerifyQuery extends Query {
final Query indexQuery;//approximation (matches more than needed)
final ValueSource predicateValueSource;//we call boolVal(doc)
public CompositeVerifyQuery(Query indexQuery, ValueSource predicateValueSource) {
this.indexQuery = indexQuery;
this.predicateValueSource = predicateValueSource;
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
final Query rewritten = indexQuery.rewrite(reader);
if (rewritten != indexQuery) {
return new CompositeVerifyQuery(rewritten, predicateValueSource);
}
return this;
}
@Override
public void extractTerms(Set<Term> terms) {
indexQuery.extractTerms(terms);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
CompositeVerifyQuery that = (CompositeVerifyQuery) o;
if (!indexQuery.equals(that.indexQuery)) return false;
if (!predicateValueSource.equals(that.predicateValueSource)) return false;
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + indexQuery.hashCode();
result = 31 * result + predicateValueSource.hashCode();
return result;
}
@Override
public String toString(String field) {
//TODO verify this looks good
return getClass().getSimpleName() + "(" + indexQuery.toString(field) + ", " + predicateValueSource + ")";
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
final Weight indexQueryWeight = indexQuery.createWeight(searcher, false);//scores aren't unsupported
final Map valueSourceContext = ValueSource.newContext(searcher);
return new ConstantScoreWeight(this) {
@Override
protected Scorer scorer(LeafReaderContext context, Bits acceptDocs, float score) throws IOException {
final Scorer indexQueryScorer = indexQueryWeight.scorer(context, acceptDocs);//pass acceptDocs through
if (indexQueryScorer == null) {
return null;
}
final FunctionValues predFuncValues = predicateValueSource.getValues(valueSourceContext, context);
final TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator(indexQueryScorer) {
@Override
public boolean matches() throws IOException {
return predFuncValues.boolVal(indexQueryScorer.docID());
}
};
return new ConstantScoreScorer(this, score, twoPhaseIterator);
}
};
}
}

View File

@ -0,0 +1,90 @@
package org.apache.lucene.spatial.composite;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
/**
* A constant-scoring {@link Scorer}.
*
* @lucene.internal
*/
public final class ConstantScoreScorer extends Scorer {
// TODO refactor CSQ's Scorer to be re-usable and look like this
private final Weight weight;
private final float score;
private final TwoPhaseIterator twoPhaseIterator;
private final DocIdSetIterator disi;
public ConstantScoreScorer(Weight weight, float score, DocIdSetIterator disi) {
super(weight);
this.weight = weight;
this.score = score;
this.twoPhaseIterator = null;
this.disi = disi;
}
protected ConstantScoreScorer(Weight weight, float score, TwoPhaseIterator twoPhaseIterator) {
super(weight);
this.weight = weight;
this.score = score;
this.twoPhaseIterator = twoPhaseIterator;
this.disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator);
}
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
return twoPhaseIterator;
}
@Override
public float score() throws IOException {
return score;
}
@Override
public int freq() throws IOException {
return 1;
}
@Override
public int docID() {
return disi.docID();
}
@Override
public int nextDoc() throws IOException {
return disi.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return disi.advance(target);
}
@Override
public long cost() {
return disi.cost();
}
}

View File

@ -0,0 +1,213 @@
package org.apache.lucene.spatial.composite;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.shape.SpatialRelation;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.spatial.prefix.AbstractVisitingPrefixTreeFilter;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits;
/**
* A spatial Intersects predicate that distinguishes an approximated match from an exact match based on which cells
* are within the query shape. It exposes a {@link TwoPhaseIterator} that will verify a match with a provided
* predicate in the form of a {@link ValueSource} by calling {@link FunctionValues#boolVal(int)}.
*
* @lucene.internal
*/
public class IntersectsRPTVerifyQuery extends Query {
private final IntersectsDifferentiatingFilter intersectsDiffFilter;
private final ValueSource predicateValueSource; // we call FunctionValues.boolVal(doc)
public IntersectsRPTVerifyQuery(Shape queryShape, String fieldName, SpatialPrefixTree grid, int detailLevel,
int prefixGridScanLevel, ValueSource predicateValueSource) {
this.predicateValueSource = predicateValueSource;
this.intersectsDiffFilter = new IntersectsDifferentiatingFilter(queryShape, fieldName, grid, detailLevel,
prefixGridScanLevel);
}
@Override
public String toString(String field) {
return "IntersectsVerified(fieldName=" + field + ")";
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof IntersectsRPTVerifyQuery)) return false;
if (!super.equals(o)) return false;
IntersectsRPTVerifyQuery that = (IntersectsRPTVerifyQuery) o;
if (!intersectsDiffFilter.equals(that.intersectsDiffFilter)) return false;
return predicateValueSource.equals(that.predicateValueSource);
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + intersectsDiffFilter.hashCode();
result = 31 * result + predicateValueSource.hashCode();
return result;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
final Map valueSourceContext = ValueSource.newContext(searcher);
return new ConstantScoreWeight(this) {
@Override
protected Scorer scorer(LeafReaderContext context, Bits acceptDocs, float score) throws IOException {
// Compute approx & exact
final IntersectsDifferentiatingFilter.IntersectsDifferentiatingVisitor result =
intersectsDiffFilter.compute(context, acceptDocs);
if (result.approxDocIdSet == null) {
return null;
}
final DocIdSetIterator approxDISI = result.approxDocIdSet.iterator();
if (approxDISI == null) {
return null;
}
final Bits exactDocBits;
if (result.exactDocIdSet != null) {
// If both sets are the same, there's nothing to verify; we needn't return a TwoPhaseIterator
if (result.approxDocIdSet.equals(result.exactDocIdSet)) {
return new ConstantScoreScorer(this, score, approxDISI);
}
exactDocBits = result.exactDocIdSet.bits();
assert exactDocBits != null;
} else {
exactDocBits = null;
}
final FunctionValues predFuncValues = predicateValueSource.getValues(valueSourceContext, context);
final TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator(approxDISI) {
@Override
public boolean matches() throws IOException {
if (exactDocBits != null && exactDocBits.get(approxDISI.docID())) {
return true;
}
return predFuncValues.boolVal(approxDISI.docID());
}
};
return new ConstantScoreScorer(this, score, twoPhaseIterator);
}
};
}
//This is a "Filter" but we don't use it as-such; the caller calls the constructor and then compute() and examines
// the results which consists of two parts -- the approximated results, and a subset of exact matches. The
// difference needs to be verified.
// TODO refactor AVPTF to not be a Query/Filter?
private static class IntersectsDifferentiatingFilter extends AbstractVisitingPrefixTreeFilter {
public IntersectsDifferentiatingFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid,
int detailLevel, int prefixGridScanLevel) {
super(queryShape, fieldName, grid, detailLevel, prefixGridScanLevel);
}
IntersectsDifferentiatingFilter.IntersectsDifferentiatingVisitor compute(LeafReaderContext context, Bits acceptDocs) throws IOException {
final IntersectsDifferentiatingFilter.IntersectsDifferentiatingVisitor result = new IntersectsDifferentiatingFilter.IntersectsDifferentiatingVisitor(context, acceptDocs);
result.getDocIdSet();//computes
return result;
}
// TODO consider if IntersectsPrefixTreeFilter should simply do this and provide both sets
class IntersectsDifferentiatingVisitor extends VisitorTemplate {
BitDocIdSet.Builder approxBuilder = new BitDocIdSet.Builder(maxDoc);
BitDocIdSet.Builder exactBuilder = new BitDocIdSet.Builder(maxDoc);
BitDocIdSet exactDocIdSet;
BitDocIdSet approxDocIdSet;
public IntersectsDifferentiatingVisitor(LeafReaderContext context, Bits acceptDocs) throws IOException {
super(context, acceptDocs);
}
@Override
protected void start() throws IOException {
}
@Override
protected DocIdSet finish() throws IOException {
exactDocIdSet = exactBuilder.build();
if (approxBuilder.isDefinitelyEmpty()) {
approxDocIdSet = exactDocIdSet;//optimization
} else {
if (exactDocIdSet != null) {
approxBuilder.or(exactDocIdSet.iterator());
}
approxDocIdSet = approxBuilder.build();
}
return null;//unused in this weird re-use of AVPTF
}
@Override
protected boolean visitPrefix(Cell cell) throws IOException {
if (cell.getShapeRel() == SpatialRelation.WITHIN) {
collectDocs(exactBuilder);//note: we'll add exact to approx on finish()
return false;
} else if (cell.getLevel() == detailLevel) {
collectDocs(approxBuilder);
return false;
}
return true;
}
@Override
protected void visitLeaf(Cell cell) throws IOException {
if (cell.getShapeRel() == SpatialRelation.WITHIN) {
collectDocs(exactBuilder);//note: we'll add exact to approx on finish()
} else {
collectDocs(approxBuilder);
}
}
}
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
throw new IllegalStateException();
}
@Override
public String toString(String field) {
throw new IllegalStateException();
}
}
}

View File

@ -0,0 +1,19 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Composite strategies. */
package org.apache.lucene.spatial.composite;

View File

@ -20,13 +20,14 @@ package org.apache.lucene.spatial.prefix;
import java.io.IOException;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Filter;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
@ -98,6 +99,11 @@ public abstract class AbstractPrefixTreeFilter extends Filter {
bitSet.or(postingsEnum);
}
protected void collectDocs(BitDocIdSet.Builder bitSetBuilder) throws IOException {
assert termsEnum != null;
postingsEnum = termsEnum.postings(acceptDocs, postingsEnum, PostingsEnum.NONE);
bitSetBuilder.or(postingsEnum);
}
}
}

View File

@ -17,9 +17,13 @@ package org.apache.lucene.spatial;
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Collection;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.spatial.bbox.BBoxStrategy;
import org.apache.lucene.spatial.composite.CompositeSpatialStrategy;
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
@ -32,25 +36,33 @@ import org.apache.lucene.spatial.vector.PointVectorStrategy;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Collection;
public class QueryEqualsHashCodeTest extends LuceneTestCase {
private final SpatialContext ctx = SpatialContext.GEO;
private SpatialOperation predicate;
@Test
public void testEqualsHashCode() {
switch (random().nextInt(4)) {//0-3
case 0: predicate = SpatialOperation.Contains; break;
case 1: predicate = SpatialOperation.IsWithin; break;
default: predicate = SpatialOperation.Intersects; break;
}
final SpatialPrefixTree gridQuad = new QuadPrefixTree(ctx,10);
final SpatialPrefixTree gridGeohash = new GeohashPrefixTree(ctx,10);
Collection<SpatialStrategy> strategies = new ArrayList<>();
strategies.add(new RecursivePrefixTreeStrategy(gridGeohash, "recursive_geohash"));
RecursivePrefixTreeStrategy recursive_geohash = new RecursivePrefixTreeStrategy(gridGeohash, "recursive_geohash");
strategies.add(recursive_geohash);
strategies.add(new TermQueryPrefixTreeStrategy(gridQuad, "termquery_quad"));
strategies.add(new PointVectorStrategy(ctx, "pointvector"));
strategies.add(new BBoxStrategy(ctx, "bbox"));
strategies.add(new SerializedDVStrategy(ctx, "serialized"));
final SerializedDVStrategy serialized = new SerializedDVStrategy(ctx, "serialized");
strategies.add(serialized);
strategies.add(new CompositeSpatialStrategy("composite", recursive_geohash, serialized));
for (SpatialStrategy strategy : strategies) {
testEqualsHashcode(strategy);
}
@ -91,20 +103,20 @@ public class QueryEqualsHashCodeTest extends LuceneTestCase {
Object second = generator.gen(args1);//should be the same
assertEquals(first, second);
assertEquals(first.hashCode(), second.hashCode());
assertNotSame(args1, args2);
assertTrue(args1.equals(args2) == false);
second = generator.gen(args2);//now should be different
assertNotSame(first, second);
assertNotSame(first.hashCode(), second.hashCode());
assertTrue(first.equals(second) == false);
assertTrue(first.hashCode() != second.hashCode());
}
private SpatialArgs makeArgs1() {
final Shape shape1 = ctx.makeRectangle(0, 0, 10, 10);
return new SpatialArgs(SpatialOperation.Intersects, shape1);
return new SpatialArgs(predicate, shape1);
}
private SpatialArgs makeArgs2() {
final Shape shape2 = ctx.makeRectangle(0, 0, 20, 20);
return new SpatialArgs(SpatialOperation.Intersects, shape2);
return new SpatialArgs(predicate, shape2);
}
interface ObjGenerator {

View File

@ -0,0 +1,143 @@
package org.apache.lucene.spatial.composite;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import com.carrotsearch.randomizedtesting.annotations.Repeat;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.context.SpatialContextFactory;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.shape.impl.RectangleImpl;
import org.apache.lucene.spatial.prefix.RandomSpatialOpStrategyTestCase;
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.spatial.serialized.SerializedDVStrategy;
import org.junit.Test;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomDouble;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
public class CompositeStrategyTest extends RandomSpatialOpStrategyTestCase {
private SpatialPrefixTree grid;
private RecursivePrefixTreeStrategy rptStrategy;
private void setupQuadGrid(int maxLevels) {
//non-geospatial makes this test a little easier (in gridSnap), and using boundary values 2^X raises
// the prospect of edge conditions we want to test, plus makes for simpler numbers (no decimals).
SpatialContextFactory factory = new SpatialContextFactory();
factory.geo = false;
factory.worldBounds = new RectangleImpl(0, 256, -128, 128, null);
this.ctx = factory.newSpatialContext();
//A fairly shallow grid
if (maxLevels == -1)
maxLevels = randomIntBetween(1, 8);//max 64k cells (4^8), also 256*256
this.grid = new QuadPrefixTree(ctx, maxLevels);
this.rptStrategy = newRPT();
}
private void setupGeohashGrid(int maxLevels) {
this.ctx = SpatialContext.GEO;
//A fairly shallow grid
if (maxLevels == -1)
maxLevels = randomIntBetween(1, 3);//max 16k cells (32^3)
this.grid = new GeohashPrefixTree(ctx, maxLevels);
this.rptStrategy = newRPT();
}
protected RecursivePrefixTreeStrategy newRPT() {
final RecursivePrefixTreeStrategy rpt = new RecursivePrefixTreeStrategy(this.grid,
getClass().getSimpleName() + "_rpt");
rpt.setDistErrPct(0.10);//not too many cells
return rpt;
}
@Test
@Repeat(iterations = 20)
public void testOperations() throws IOException {
//setup
if (randomBoolean()) {
setupQuadGrid(-1);
} else {
setupGeohashGrid(-1);
}
SerializedDVStrategy serializedDVStrategy = new SerializedDVStrategy(ctx, getClass().getSimpleName() + "_sdv");
this.strategy = new CompositeSpatialStrategy("composite_" + getClass().getSimpleName(),
rptStrategy, serializedDVStrategy);
//Do it!
for (SpatialOperation pred : SpatialOperation.values()) {
if (pred == SpatialOperation.BBoxIntersects || pred == SpatialOperation.BBoxWithin) {
continue;
}
if (pred == SpatialOperation.IsDisjointTo) {//TODO
continue;
}
testOperationRandomShapes(pred);
deleteAll();
commit();
}
}
@Override
protected boolean needsDocValues() {
return true;//due to SerializedDVStrategy
}
@Override
protected Shape randomIndexedShape() {
return randomShape();
}
@Override
protected Shape randomQueryShape() {
return randomShape();
}
private Shape randomShape() {
return random().nextBoolean() ? randomCircle() : randomRectangle();
}
//TODO move up
private Shape randomCircle() {
final Point point = randomPoint();
//TODO pick using gaussian
double radius;
if (ctx.isGeo()) {
radius = randomDouble() * 100;
} else {
//find distance to closest edge
final Rectangle worldBounds = ctx.getWorldBounds();
double maxRad = point.getX() - worldBounds.getMinX();
maxRad = Math.min(maxRad, worldBounds.getMaxX() - point.getX());
maxRad = Math.min(maxRad, point.getY() - worldBounds.getMinY());
maxRad = Math.min(maxRad, worldBounds.getMaxY() - point.getY());
radius = randomDouble() * maxRad;
}
return ctx.makeCircle(point, radius);
}
}

View File

@ -17,18 +17,18 @@ package org.apache.lucene.spatial.prefix;
* limitations under the License.
*/
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.search.Query;
import org.apache.lucene.spatial.StrategyTestCase;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.search.Query;
import org.apache.lucene.spatial.StrategyTestCase;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
@ -40,12 +40,6 @@ public abstract class RandomSpatialOpStrategyTestCase extends StrategyTestCase {
//Note: this is partially redundant with StrategyTestCase.runTestQuery & testOperation
protected void testOperationRandomShapes(final SpatialOperation operation) throws IOException {
//first show that when there's no data, a query will result in no results
{
Query query = strategy.makeQuery(new SpatialArgs(operation, randomQueryShape()));
SearchResults searchResults = executeQuery(query, 1);
assertEquals(0, searchResults.numFound);
}
final int numIndexedShapes = randomIntBetween(1, 6);
List<Shape> indexedShapes = new ArrayList<>(numIndexedShapes);
@ -64,6 +58,13 @@ public abstract class RandomSpatialOpStrategyTestCase extends StrategyTestCase {
protected void testOperation(final SpatialOperation operation,
List<Shape> indexedShapes, List<Shape> queryShapes, boolean havoc) throws IOException {
//first show that when there's no data, a query will result in no results
{
Query query = strategy.makeQuery(new SpatialArgs(operation, randomQueryShape()));
SearchResults searchResults = executeQuery(query, 1);
assertEquals(0, searchResults.numFound);
}
//Main index loop:
for (int i = 0; i < indexedShapes.size(); i++) {
Shape shape = indexedShapes.get(i);