LUCENE-6190: PrefixTreeStrategy pointsOnly bug: don't switch predicate

Also refactored placement of hasIndexedLeaves, and fixed a minor equals/hashcode bug.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1656473 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2015-02-02 14:31:01 +00:00
parent 5422ecba4e
commit 4ee47bdcde
8 changed files with 86 additions and 62 deletions

View File

@ -31,6 +31,11 @@ API Changes
======================= Lucene 5.1.0 =======================
Bug Fixes
* Spatial pointsOnly flag on PrefixTreeStrategy shouldn't switch all predicates to
Intersects. (David Smiley)
Optimizations
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields

View File

@ -53,27 +53,27 @@ public abstract class AbstractVisitingPrefixTreeFilter extends AbstractPrefixTre
// least it would just make things more complicated.
protected final int prefixGridScanLevel;//at least one less than grid.getMaxLevels()
protected final boolean hasIndexedLeaves;
public AbstractVisitingPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid,
int detailLevel, int prefixGridScanLevel) {
int detailLevel, int prefixGridScanLevel, boolean hasIndexedLeaves) {
super(queryShape, fieldName, grid, detailLevel);
this.prefixGridScanLevel = Math.max(0, Math.min(prefixGridScanLevel, grid.getMaxLevels() - 1));
this.hasIndexedLeaves = hasIndexedLeaves;
assert detailLevel <= grid.getMaxLevels();
}
@Override
public boolean equals(Object o) {
if (!super.equals(o)) return false;//checks getClass == o.getClass & instanceof
return super.equals(o);//checks getClass == o.getClass & instanceof
//Ignore hasIndexedLeaves as it's fixed for a specific field, which super.equals compares
//Ignore prefixGridScanLevel as it is merely a tuning parameter.
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
return result;
return super.hashCode();
}
/**
@ -124,18 +124,14 @@ public abstract class AbstractVisitingPrefixTreeFilter extends AbstractPrefixTre
// TODO MAJOR REFACTOR SIMPLIFICATION BASED ON TreeCellIterator TODO
//
protected final boolean hasIndexedLeaves;//if false then we can skip looking for them
private VNode curVNode;//current pointer, derived from query shape
private BytesRef curVNodeTerm = new BytesRef();//curVNode.cell's term, without leaf
private Cell scanCell;
private BytesRef thisTerm;//the result of termsEnum.term()
public VisitorTemplate(LeafReaderContext context, Bits acceptDocs,
boolean hasIndexedLeaves) throws IOException {
public VisitorTemplate(LeafReaderContext context, Bits acceptDocs) throws IOException {
super(context, acceptDocs);
this.hasIndexedLeaves = hasIndexedLeaves;
}
public DocIdSet getDocIdSet() throws IOException {

View File

@ -37,18 +37,10 @@ import org.apache.lucene.util.FixedBitSet;
*/
public class IntersectsPrefixTreeFilter extends AbstractVisitingPrefixTreeFilter {
private final boolean hasIndexedLeaves;
public IntersectsPrefixTreeFilter(Shape queryShape, String fieldName,
SpatialPrefixTree grid, int detailLevel,
int prefixGridScanLevel, boolean hasIndexedLeaves) {
super(queryShape, fieldName, grid, detailLevel, prefixGridScanLevel);
this.hasIndexedLeaves = hasIndexedLeaves;
}
@Override
public boolean equals(Object o) {
return super.equals(o) && hasIndexedLeaves == ((IntersectsPrefixTreeFilter)o).hasIndexedLeaves;
super(queryShape, fieldName, grid, detailLevel, prefixGridScanLevel, hasIndexedLeaves);
}
@Override
@ -62,7 +54,7 @@ public class IntersectsPrefixTreeFilter extends AbstractVisitingPrefixTreeFilter
* Point query shape optimization when the only indexed data is a point (no leaves). Result is a term query.
*/
return new VisitorTemplate(context, acceptDocs, hasIndexedLeaves) {
return new VisitorTemplate(context, acceptDocs) {
private FixedBitSet results;
@Override

View File

@ -114,15 +114,14 @@ public class PrefixTreeFacetCounter {
//AbstractVisitingPrefixTreeFilter is a Lucene Filter. We don't need a filter; we use it for its great prefix-tree
// traversal code. TODO consider refactoring if/when it makes sense (more use cases than this)
new AbstractVisitingPrefixTreeFilter(queryShape, strategy.getFieldName(), tree, facetLevel, scanLevel) {
new AbstractVisitingPrefixTreeFilter(queryShape, strategy.getFieldName(), tree, facetLevel, scanLevel,
!strategy.isPointsOnly()) {
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
assert facetLevel == super.detailLevel;//same thing, FYI. (constant)
final boolean hasIndexedLeaves = !strategy.isPointsOnly();
return new VisitorTemplate(context, acceptDocs, hasIndexedLeaves) {
return new VisitorTemplate(context, acceptDocs) {
@Override
protected void start() throws IOException {

View File

@ -172,12 +172,12 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
Shape shape = args.getShape();
int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
if (pointsOnly || op == SpatialOperation.Intersects) {
if (op == SpatialOperation.Intersects) {
return new IntersectsPrefixTreeFilter(
shape, getFieldName(), grid, detailLevel, prefixGridScanLevel, !pointsOnly);
} else if (op == SpatialOperation.IsWithin) {
return new WithinPrefixTreeFilter(
shape, getFieldName(), grid, detailLevel, prefixGridScanLevel,
shape, getFieldName(), grid, detailLevel, prefixGridScanLevel, !pointsOnly,
-1);//-1 flag is slower but ensures correct results
} else if (op == SpatialOperation.Contains) {
return new ContainsPrefixTreeFilter(shape, getFieldName(), grid, detailLevel,

View File

@ -60,18 +60,35 @@ public class WithinPrefixTreeFilter extends AbstractVisitingPrefixTreeFilter {
private final Shape bufferedQueryShape;//if null then the whole world
/**
* See {@link AbstractVisitingPrefixTreeFilter#AbstractVisitingPrefixTreeFilter(com.spatial4j.core.shape.Shape, String, org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree, int, int)}.
* See {@link AbstractVisitingPrefixTreeFilter#AbstractVisitingPrefixTreeFilter(com.spatial4j.core.shape.Shape, String, org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree, int, int, boolean)}.
* {@code queryBuffer} is the (minimum) distance beyond the query shape edge
* where non-matching documents are looked for so they can be excluded. If
* -1 is used then the whole world is examined (a good default for correctness).
*/
public WithinPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid,
int detailLevel, int prefixGridScanLevel, double queryBuffer) {
super(queryShape, fieldName, grid, detailLevel, prefixGridScanLevel);
if (queryBuffer == -1)
this.bufferedQueryShape = null;
else
this.bufferedQueryShape = bufferShape(queryShape, queryBuffer);
int detailLevel, int prefixGridScanLevel, boolean hasIndexedLeaves,
double queryBuffer) {
super(queryShape, fieldName, grid, detailLevel, prefixGridScanLevel, hasIndexedLeaves);
this.bufferedQueryShape = queryBuffer == -1 ? null : bufferShape(queryShape, queryBuffer);
}
@Override
public boolean equals(Object o) {
if (!super.equals(o)) return false;//checks getClass == o.getClass & instanceof
WithinPrefixTreeFilter that = (WithinPrefixTreeFilter) o;
if (bufferedQueryShape != null ? !bufferedQueryShape.equals(that.bufferedQueryShape) : that.bufferedQueryShape != null)
return false;
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (bufferedQueryShape != null ? bufferedQueryShape.hashCode() : 0);
return result;
}
/** Returns a new shape that is larger than shape by at distErr.
@ -121,7 +138,7 @@ public class WithinPrefixTreeFilter extends AbstractVisitingPrefixTreeFilter {
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
return new VisitorTemplate(context, acceptDocs, true) {
return new VisitorTemplate(context, acceptDocs) {
private FixedBitSet inside;
private FixedBitSet outside;
private SpatialRelation visitRelation;

View File

@ -18,6 +18,18 @@ package org.apache.lucene.spatial;
* limitations under the License.
*/
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.logging.Logger;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.document.Document;
@ -36,18 +48,6 @@ import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialArgsParser;
import org.apache.lucene.spatial.query.SpatialOperation;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.logging.Logger;
public abstract class StrategyTestCase extends SpatialTestCase {
public static final String DATA_SIMPLE_BBOX = "simple-bbox.txt";
@ -62,7 +62,7 @@ public abstract class StrategyTestCase extends SpatialTestCase {
public static final String QTEST_Cities_Intersects_BBox = "cities-Intersects-BBox.txt";
public static final String QTEST_Simple_Queries_BBox = "simple-Queries-BBox.txt";
private Logger log = Logger.getLogger(getClass().getName());
protected Logger log = Logger.getLogger(getClass().getName());
protected final SpatialArgsParser argsParser = new SpatialArgsParser();

View File

@ -17,6 +17,18 @@ package org.apache.lucene.spatial.prefix;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.carrotsearch.randomizedtesting.annotations.Repeat;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.context.SpatialContextFactory;
@ -41,18 +53,6 @@ import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
@ -80,8 +80,11 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase {
//((PrefixTreeStrategy) strategy).setDistErrPct(0);//fully precise to grid
((RecursivePrefixTreeStrategy)strategy).setPruneLeafyBranches(randomBoolean());
if (maxLevels == -1 && rarely()) {
((PrefixTreeStrategy) strategy).setPointsOnly(true);
}
System.out.println("Strategy: " + strategy.toString());
log.info("Strategy: " + strategy.toString());
}
private void setupCtx2D(SpatialContext ctx) {
@ -238,13 +241,14 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase {
Map<String, Shape> indexedShapesGS = new LinkedHashMap<>();//grid snapped
final int numIndexedShapes = randomIntBetween(1, 6);
boolean indexedAtLeastOneShapePair = false;
final boolean pointsOnly = ((PrefixTreeStrategy) strategy).isPointsOnly();
for (int i = 0; i < numIndexedShapes; i++) {
String id = "" + i;
Shape indexedShape;
int R = random().nextInt(12);
if (R == 0) {//1 in 12
indexedShape = null;
} else if (R == 1) {//1 in 12
} else if (R == 1 || pointsOnly) {//1 in 12
indexedShape = randomPoint();//just one point
} else if (R <= 4) {//3 in 12
//comprised of more than one shape
@ -292,6 +296,17 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase {
// queryShape = randomShapePairRect(!biasContains);//invert biasContains for query side
// break;
// }
case 4:
//choose an existing indexed shape
if (!indexedShapes.isEmpty()) {
Shape tmp = indexedShapes.values().iterator().next();
if (tmp instanceof Point || tmp instanceof Rectangle) {//avoids null and shapePair
queryShape = tmp;
break;
}
}//else fall-through
default: queryShape = randomRectangle();
}
final Shape queryShapeGS = gridSnap(queryShape);