LUCENE-8755: Spatial-extras quad and packed-quad trees now index

points a little faster, and also fix an edge case bug.
 Fixes #824
This commit is contained in:
nppoly 2019-08-07 16:35:55 +08:00 committed by David Smiley
parent 8725944fba
commit 26628b2717
7 changed files with 194 additions and 34 deletions

View File

@ -64,6 +64,11 @@ API Changes
* LUCENE-8909: IndexWriter#getFieldNames() method is used to get fields present in index. After LUCENE-8316, this
method is no longer required. Hence, deprecate IndexWriter#getFieldNames() method. (Adrien Grand, Munendra S N)
* LUCENE-8755: SpatialPrefixTreeFactory now consumes the "version" parsed with Lucene's Version class. The quad
and packed quad prefix trees are sensitive to this. It's recommended to pass the version like you
should do likewise for analysis components for tokenized text, or else changes to the encoding in future versions
may be incompatible with older indexes. (Chongchen Chen, David Smiley)
New Features
* LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
@ -108,6 +113,14 @@ the total hits is not requested.
* LUCENE-8941: Matches on wildcard queries will defer building their full
disjunction until a MatchesIterator is pulled (Alan Woodward)
* LUCENE-8755: spatial-extras quad and packed quad prefix trees now index points faster.
(Chongchen Chen, David Smiley)
Bug Fixes
* LUCENE-8755: spatial-extras quad and packed quad prefix trees could throw a
NullPointerException for certain cell edge coordinates (Chongchen Chen, David Smiley)
Other
* LUCENE-8778 LUCENE-8911: Define analyzer SPI names as static final fields and document the names in Javadocs.

View File

@ -22,6 +22,7 @@ import java.util.List;
import java.util.NoSuchElementException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
@ -60,7 +61,9 @@ public class PackedQuadPrefixTree extends QuadPrefixTree {
public static class Factory extends QuadPrefixTree.Factory {
@Override
protected SpatialPrefixTree newSPT() {
return new PackedQuadPrefixTree(ctx, maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE);
PackedQuadPrefixTree tree = new PackedQuadPrefixTree(ctx, maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE);
tree.robust = getVersion().onOrAfter(Version.LUCENE_8_3_0);
return tree;
}
}
@ -83,24 +86,65 @@ public class PackedQuadPrefixTree extends QuadPrefixTree {
@Override
public Cell getCell(Point p, int level) {
if (!robust) { // old method
List<Cell> cells = new ArrayList<>(1);
build(xmid, ymid, 0, cells, 0x0L, ctx.makePoint(p.getX(), p.getY()), level);
buildNotRobustly(xmid, ymid, 0, cells, 0x0L, ctx.makePoint(p.getX(), p.getY()), level);
if (!cells.isEmpty()) {
return cells.get(0);//note cells could be longer if p on edge
}
}
protected void build(double x, double y, int level, List<Cell> matches, long term, Shape shape, int maxLevel) {
double currentXmid = xmid;
double currentYmid = ymid;
double xp = p.getX();
double yp = p.getY();
long term = 0L;
int levelLimit = level > maxLevels ? maxLevels : level;
SpatialRelation rel = SpatialRelation.CONTAINS;
for (int lvl = 0; lvl < levelLimit; lvl++){
int quad = battenberg(currentXmid, currentYmid, xp, yp);
double halfWidth = levelW[lvl + 1];
double halfHeight = levelH[lvl + 1];
switch(quad){
case 0:
currentXmid -= halfWidth;
currentYmid += halfHeight;
break;
case 1:
currentXmid += halfWidth;
currentYmid += halfHeight;
break;
case 2:
currentXmid -= halfWidth;
currentYmid -= halfHeight;
break;
case 3:
currentXmid += halfWidth;
currentYmid -= halfHeight;
break;
default:
}
// set bits for next level
term |= (((long)(quad))<<(64-((lvl + 1)<<1)));
// increment level
term = ((term>>>1)+1)<<1;
}
return new PackedQuadCell(term, rel);
}
protected void buildNotRobustly(double x, double y, int level, List<Cell> matches, long term, Shape shape, int maxLevel) {
double w = levelW[level] / 2;
double h = levelH[level] / 2;
// Z-Order
// http://en.wikipedia.org/wiki/Z-order_%28curve%29
checkBattenberg(QUAD[0], x - w, y + h, level, matches, term, shape, maxLevel);
checkBattenberg(QUAD[1], x + w, y + h, level, matches, term, shape, maxLevel);
checkBattenberg(QUAD[2], x - w, y - h, level, matches, term, shape, maxLevel);
checkBattenberg(QUAD[3], x + w, y - h, level, matches, term, shape, maxLevel);
checkBattenbergNotRobustly(QUAD[0], x - w, y + h, level, matches, term, shape, maxLevel);
checkBattenbergNotRobustly(QUAD[1], x + w, y + h, level, matches, term, shape, maxLevel);
checkBattenbergNotRobustly(QUAD[2], x - w, y - h, level, matches, term, shape, maxLevel);
checkBattenbergNotRobustly(QUAD[3], x + w, y - h, level, matches, term, shape, maxLevel);
}
protected void checkBattenberg(byte quad, double cx, double cy, int level, List<Cell> matches,
protected void checkBattenbergNotRobustly(byte quad, double cx, double cy, int level, List<Cell> matches,
long term, Shape shape, int maxLevel) {
// short-circuit if we find a match for the point (no need to continue recursion)
if (shape instanceof Point && !matches.isEmpty())
@ -122,7 +166,7 @@ public class PackedQuadPrefixTree extends QuadPrefixTree {
if (SpatialRelation.CONTAINS == v || (level >= maxLevel)) {
matches.add(new PackedQuadCell(term, v.transpose()));
} else {// SpatialRelation.WITHIN, SpatialRelation.INTERSECTS
build(cx, cy, level, matches, term, shape, maxLevel);
buildNotRobustly(cx, cy, level, matches, term, shape, maxLevel);
}
}

View File

@ -23,6 +23,7 @@ import java.util.Collection;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.util.Version;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
@ -43,17 +44,17 @@ public class QuadPrefixTree extends LegacyPrefixTree {
* Factory for creating {@link QuadPrefixTree} instances with useful defaults
*/
public static class Factory extends SpatialPrefixTreeFactory {
@Override
protected int getLevelForDistance(double degrees) {
QuadPrefixTree grid = new QuadPrefixTree(ctx, MAX_LEVELS_POSSIBLE);
return grid.getLevelForDistance(degrees);
return newSPT().getLevelForDistance(degrees);
}
@Override
protected SpatialPrefixTree newSPT() {
return new QuadPrefixTree(ctx,
QuadPrefixTree tree = new QuadPrefixTree(ctx,
maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE);
tree.robust = getVersion().onOrAfter(Version.LUCENE_8_3_0);
return tree;
}
}
@ -75,6 +76,8 @@ public class QuadPrefixTree extends LegacyPrefixTree {
final int[] levelS; // side
final int[] levelN; // number
protected boolean robust = true; // for backward compatibility, use the old method if user specified old version.
public QuadPrefixTree(
SpatialContext ctx, Rectangle bounds, int maxLevels) {
super(ctx, maxLevels);
@ -83,10 +86,10 @@ public class QuadPrefixTree extends LegacyPrefixTree {
this.ymin = bounds.getMinY();
this.ymax = bounds.getMaxY();
levelW = new double[maxLevels];
levelH = new double[maxLevels];
levelS = new int[maxLevels];
levelN = new int[maxLevels];
levelW = new double[maxLevels + 1];
levelH = new double[maxLevels + 1];
levelS = new int[maxLevels + 1];
levelN = new int[maxLevels + 1];
gridW = xmax - xmin;
gridH = ymax - ymin;
@ -146,12 +149,50 @@ public class QuadPrefixTree extends LegacyPrefixTree {
@Override
public Cell getCell(Point p, int level) {
if (!robust) { // old method
List<Cell> cells = new ArrayList<>(1);
build(xmid, ymid, 0, cells, new BytesRef(maxLevels+1), ctx.makePoint(p.getX(),p.getY()), level);
buildNotRobustly(xmid, ymid, 0, cells, new BytesRef(maxLevels+1), ctx.makePoint(p.getX(),p.getY()), level);
if (!cells.isEmpty()) {
return cells.get(0);//note cells could be longer if p on edge
}
}
private void build(
double currentXmid = xmid;
double currentYmid = ymid;
double xp = p.getX();
double yp = p.getY();
BytesRef str = new BytesRef(maxLevels+1);
int levelLimit = level > maxLevels ? maxLevels : level;
SpatialRelation rel = SpatialRelation.CONTAINS;
for (int lvl = 0; lvl < levelLimit; lvl++){
int c = battenberg(currentXmid, currentYmid, xp, yp);
double halfWidth = levelW[lvl + 1];
double halfHeight = levelH[lvl + 1];
switch(c){
case 0:
currentXmid -= halfWidth;
currentYmid += halfHeight;
break;
case 1:
currentXmid += halfWidth;
currentYmid += halfHeight;
break;
case 2:
currentXmid -= halfWidth;
currentYmid -= halfHeight;
break;
case 3:
currentXmid += halfWidth;
currentYmid -= halfHeight;
break;
default:
}
str.bytes[str.length++] = (byte)('A' + c);
}
return new QuadCell(str, rel);
}
private void buildNotRobustly(
double x,
double y,
int level,
@ -165,10 +206,10 @@ public class QuadPrefixTree extends LegacyPrefixTree {
// Z-Order
// http://en.wikipedia.org/wiki/Z-order_%28curve%29
checkBattenberg('A', x - w, y + h, level, matches, str, shape, maxLevel);
checkBattenberg('B', x + w, y + h, level, matches, str, shape, maxLevel);
checkBattenberg('C', x - w, y - h, level, matches, str, shape, maxLevel);
checkBattenberg('D', x + w, y - h, level, matches, str, shape, maxLevel);
checkBattenbergNotRobustly('A', x - w, y + h, level, matches, str, shape, maxLevel);
checkBattenbergNotRobustly('B', x + w, y + h, level, matches, str, shape, maxLevel);
checkBattenbergNotRobustly('C', x - w, y - h, level, matches, str, shape, maxLevel);
checkBattenbergNotRobustly('D', x + w, y - h, level, matches, str, shape, maxLevel);
// possibly consider hilbert curve
// http://en.wikipedia.org/wiki/Hilbert_curve
@ -176,7 +217,7 @@ public class QuadPrefixTree extends LegacyPrefixTree {
// if we actually use the range property in the query, this could be useful
}
protected void checkBattenberg(
protected void checkBattenbergNotRobustly(
char c,
double cx,
double cy,
@ -207,12 +248,32 @@ public class QuadPrefixTree extends LegacyPrefixTree {
//str.append(SpatialPrefixGrid.INTERSECTS);
matches.add(new QuadCell(BytesRef.deepCopyOf(str), v.transpose()));
} else {
build(cx, cy, nextLevel, matches, str, shape, maxLevel);
buildNotRobustly(cx, cy, nextLevel, matches, str, shape, maxLevel);
}
}
str.length = strlen;
}
/** Returns a Z-Order quadrant [0-3]. */
protected int battenberg(double xmid, double ymid, double xp, double yp){
// http://en.wikipedia.org/wiki/Z-order_%28curve%29
if (ymid <= yp) {
if (xmid >= xp) {
return 0;
}
return 1;
} else {
if (xmid >= xp) {
return 2;
}
return 3;
}
// possibly consider hilbert curve
// http://en.wikipedia.org/wiki/Hilbert_curve
// http://blog.notdot.net/2009/11/Damn-Cool-Algorithms-Spatial-indexing-with-Quadtrees-and-Hilbert-Curves
// if we actually use the range property in the query, this could be useful
}
protected class QuadCell extends LegacyCell {
QuadCell(byte[] bytes, int off, int len) {

View File

@ -16,8 +16,10 @@
*/
package org.apache.lucene.spatial.prefix.tree;
import java.text.ParseException;
import java.util.Map;
import org.apache.lucene.util.Version;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.distance.DistanceUtils;
@ -33,14 +35,17 @@ public abstract class SpatialPrefixTreeFactory {
public static final String PREFIX_TREE = "prefixTree";
public static final String MAX_LEVELS = "maxLevels";
public static final String MAX_DIST_ERR = "maxDistErr";
public static final String VERSION = "version";
protected Map<String, String> args;
protected SpatialContext ctx;
protected Integer maxLevels;
private Version version;
/**
* The factory is looked up via "prefixTree" in args, expecting "geohash" or "quad".
* If it's neither of these, then "geohash" is chosen for a geo context, otherwise "quad" is chosen.
* The "version" arg, if present, is parsed with {@link Version} and the prefix tree might be sensitive to it.
*/
public static SpatialPrefixTree makeSPT(Map<String,String> args, ClassLoader classLoader, SpatialContext ctx) {
//TODO refactor to use Java SPI like how Lucene already does for codecs/postingsFormats, etc
@ -64,16 +69,26 @@ public abstract class SpatialPrefixTreeFactory {
throw new RuntimeException(e);
}
}
instance.init(args,ctx);
instance.init(args, ctx);
return instance.newSPT();
}
protected void init(Map<String, String> args, SpatialContext ctx) {
this.args = args;
this.ctx = ctx;
initVersion();
initMaxLevels();
}
protected void initVersion() {
String versionStr = args.get(VERSION);
try {
setVersion(versionStr == null ? Version.LATEST : Version.parseLeniently(versionStr));
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
protected void initMaxLevels() {
String mlStr = args.get(MAX_LEVELS);
if (mlStr != null) {
@ -94,6 +109,20 @@ public abstract class SpatialPrefixTreeFactory {
maxLevels = getLevelForDistance(degrees);
}
/**
* Set the version of Lucene this tree should mimic the behavior for for analysis.
*/
public void setVersion(Version v) {
version = v;
}
/**
* Return the version of Lucene this tree will mimic the behavior of for analysis.
*/
public Version getVersion() {
return version;
}
/** Calls {@link SpatialPrefixTree#getLevelForDistance(double)}. */
protected abstract int getLevelForDistance(double degrees);

View File

@ -49,6 +49,8 @@ public abstract class AbstractSpatialPrefixTreeFieldType<T extends PrefixTreeStr
protected void init(IndexSchema schema, Map<String, String> args) {
super.init(schema, args);
args.putIfAbsent(SpatialPrefixTreeFactory.VERSION, schema.getDefaultLuceneMatchVersion().toString());
// Convert the maxDistErr to degrees (based on distanceUnits) since Lucene spatial layer depends on degrees
if(args.containsKey(SpatialPrefixTreeFactory.MAX_DIST_ERR)) {
double maxDistErrOriginal = Double.parseDouble(args.get(SpatialPrefixTreeFactory.MAX_DIST_ERR));

View File

@ -72,9 +72,13 @@
numberType="pdouble" distanceUnits="degrees" storeSubFields="false"/>
<fieldType name="llp" class="solr.LatLonPointSpatialField" distanceUnits="degrees" multiValued="true" />
<fieldType name="oslocation" class="solr.SpatialRecursivePrefixTreeFieldType" geo="false" maxDistErr="0.000009"
worldBounds="ENVELOPE(0,700000,1300000,0)" distErrPct="0.15"/>
<fieldType name="oslocationold" class="solr.SpatialRecursivePrefixTreeFieldType" geo="false" maxDistErr="0.000009"
worldBounds="ENVELOPE(0,700000,1300000,0)" distErrPct="0.15" version="8.2.0"/>
<field name="id" type="string" required="true"/>
<field name="oslocation" type="oslocation" />
<field name="oslocationold" type="oslocationold" />
<field name="srpt_geohash" type="srpt_geohash" multiValued="true"/>
<field name="srpt_quad" type="srpt_quad" multiValued="true"/>
<field name="srpt_packedquad" type="srpt_packedquad" multiValued="true"/>

View File

@ -61,6 +61,13 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
RetrievalCombo.idCounter = 0;
}
@Test
public void testQuadTreeRobustness() {
assertU(adoc("id", "0", "oslocation", "244502.06 639062.07"));
// old (pre 8.3.0) still works
assertU(adoc("id", "0", "oslocationold", "244502.06 639062.07"));
}
@Test
public void testBBox() throws Exception {
String fieldName = random().nextBoolean() ? "bbox" : "bboxD_dynamic";