mirror of https://github.com/apache/lucene.git
LUCENE-8755: Spatial-extras quad and packed-quad trees now index
points a little faster, and also fix an edge case bug. Fixes #824
This commit is contained in:
parent
8725944fba
commit
26628b2717
|
@ -64,6 +64,11 @@ API Changes
|
|||
* LUCENE-8909: IndexWriter#getFieldNames() method is used to get fields present in index. After LUCENE-8316, this
|
||||
method is no longer required. Hence, deprecate IndexWriter#getFieldNames() method. (Adrien Grand, Munendra S N)
|
||||
|
||||
* LUCENE-8755: SpatialPrefixTreeFactory now consumes the "version" parsed with Lucene's Version class. The quad
|
||||
and packed quad prefix trees are sensitive to this. It's recommended to pass the version like you
|
||||
should do likewise for analysis components for tokenized text, or else changes to the encoding in future versions
|
||||
may be incompatible with older indexes. (Chongchen Chen, David Smiley)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
|
||||
|
@ -108,6 +113,14 @@ the total hits is not requested.
|
|||
* LUCENE-8941: Matches on wildcard queries will defer building their full
|
||||
disjunction until a MatchesIterator is pulled (Alan Woodward)
|
||||
|
||||
* LUCENE-8755: spatial-extras quad and packed quad prefix trees now index points faster.
|
||||
(Chongchen Chen, David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-8755: spatial-extras quad and packed quad prefix trees could throw a
|
||||
NullPointerException for certain cell edge coordinates (Chongchen Chen, David Smiley)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-8778 LUCENE-8911: Define analyzer SPI names as static final fields and document the names in Javadocs.
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.List;
|
|||
import java.util.NoSuchElementException;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Rectangle;
|
||||
|
@ -60,7 +61,9 @@ public class PackedQuadPrefixTree extends QuadPrefixTree {
|
|||
public static class Factory extends QuadPrefixTree.Factory {
|
||||
@Override
|
||||
protected SpatialPrefixTree newSPT() {
|
||||
return new PackedQuadPrefixTree(ctx, maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE);
|
||||
PackedQuadPrefixTree tree = new PackedQuadPrefixTree(ctx, maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE);
|
||||
tree.robust = getVersion().onOrAfter(Version.LUCENE_8_3_0);
|
||||
return tree;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -83,25 +86,66 @@ public class PackedQuadPrefixTree extends QuadPrefixTree {
|
|||
|
||||
@Override
|
||||
public Cell getCell(Point p, int level) {
|
||||
List<Cell> cells = new ArrayList<>(1);
|
||||
build(xmid, ymid, 0, cells, 0x0L, ctx.makePoint(p.getX(), p.getY()), level);
|
||||
return cells.get(0);//note cells could be longer if p on edge
|
||||
if (!robust) { // old method
|
||||
List<Cell> cells = new ArrayList<>(1);
|
||||
buildNotRobustly(xmid, ymid, 0, cells, 0x0L, ctx.makePoint(p.getX(), p.getY()), level);
|
||||
if (!cells.isEmpty()) {
|
||||
return cells.get(0);//note cells could be longer if p on edge
|
||||
}
|
||||
}
|
||||
|
||||
double currentXmid = xmid;
|
||||
double currentYmid = ymid;
|
||||
double xp = p.getX();
|
||||
double yp = p.getY();
|
||||
long term = 0L;
|
||||
int levelLimit = level > maxLevels ? maxLevels : level;
|
||||
SpatialRelation rel = SpatialRelation.CONTAINS;
|
||||
for (int lvl = 0; lvl < levelLimit; lvl++){
|
||||
int quad = battenberg(currentXmid, currentYmid, xp, yp);
|
||||
double halfWidth = levelW[lvl + 1];
|
||||
double halfHeight = levelH[lvl + 1];
|
||||
switch(quad){
|
||||
case 0:
|
||||
currentXmid -= halfWidth;
|
||||
currentYmid += halfHeight;
|
||||
break;
|
||||
case 1:
|
||||
currentXmid += halfWidth;
|
||||
currentYmid += halfHeight;
|
||||
break;
|
||||
case 2:
|
||||
currentXmid -= halfWidth;
|
||||
currentYmid -= halfHeight;
|
||||
break;
|
||||
case 3:
|
||||
currentXmid += halfWidth;
|
||||
currentYmid -= halfHeight;
|
||||
break;
|
||||
default:
|
||||
}
|
||||
// set bits for next level
|
||||
term |= (((long)(quad))<<(64-((lvl + 1)<<1)));
|
||||
// increment level
|
||||
term = ((term>>>1)+1)<<1;
|
||||
}
|
||||
return new PackedQuadCell(term, rel);
|
||||
}
|
||||
|
||||
protected void build(double x, double y, int level, List<Cell> matches, long term, Shape shape, int maxLevel) {
|
||||
protected void buildNotRobustly(double x, double y, int level, List<Cell> matches, long term, Shape shape, int maxLevel) {
|
||||
double w = levelW[level] / 2;
|
||||
double h = levelH[level] / 2;
|
||||
|
||||
// Z-Order
|
||||
// http://en.wikipedia.org/wiki/Z-order_%28curve%29
|
||||
checkBattenberg(QUAD[0], x - w, y + h, level, matches, term, shape, maxLevel);
|
||||
checkBattenberg(QUAD[1], x + w, y + h, level, matches, term, shape, maxLevel);
|
||||
checkBattenberg(QUAD[2], x - w, y - h, level, matches, term, shape, maxLevel);
|
||||
checkBattenberg(QUAD[3], x + w, y - h, level, matches, term, shape, maxLevel);
|
||||
checkBattenbergNotRobustly(QUAD[0], x - w, y + h, level, matches, term, shape, maxLevel);
|
||||
checkBattenbergNotRobustly(QUAD[1], x + w, y + h, level, matches, term, shape, maxLevel);
|
||||
checkBattenbergNotRobustly(QUAD[2], x - w, y - h, level, matches, term, shape, maxLevel);
|
||||
checkBattenbergNotRobustly(QUAD[3], x + w, y - h, level, matches, term, shape, maxLevel);
|
||||
}
|
||||
|
||||
protected void checkBattenberg(byte quad, double cx, double cy, int level, List<Cell> matches,
|
||||
long term, Shape shape, int maxLevel) {
|
||||
protected void checkBattenbergNotRobustly(byte quad, double cx, double cy, int level, List<Cell> matches,
|
||||
long term, Shape shape, int maxLevel) {
|
||||
// short-circuit if we find a match for the point (no need to continue recursion)
|
||||
if (shape instanceof Point && !matches.isEmpty())
|
||||
return;
|
||||
|
@ -122,7 +166,7 @@ public class PackedQuadPrefixTree extends QuadPrefixTree {
|
|||
if (SpatialRelation.CONTAINS == v || (level >= maxLevel)) {
|
||||
matches.add(new PackedQuadCell(term, v.transpose()));
|
||||
} else {// SpatialRelation.WITHIN, SpatialRelation.INTERSECTS
|
||||
build(cx, cy, level, matches, term, shape, maxLevel);
|
||||
buildNotRobustly(cx, cy, level, matches, term, shape, maxLevel);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Collection;
|
|||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Rectangle;
|
||||
|
@ -43,17 +44,17 @@ public class QuadPrefixTree extends LegacyPrefixTree {
|
|||
* Factory for creating {@link QuadPrefixTree} instances with useful defaults
|
||||
*/
|
||||
public static class Factory extends SpatialPrefixTreeFactory {
|
||||
|
||||
@Override
|
||||
protected int getLevelForDistance(double degrees) {
|
||||
QuadPrefixTree grid = new QuadPrefixTree(ctx, MAX_LEVELS_POSSIBLE);
|
||||
return grid.getLevelForDistance(degrees);
|
||||
return newSPT().getLevelForDistance(degrees);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SpatialPrefixTree newSPT() {
|
||||
return new QuadPrefixTree(ctx,
|
||||
QuadPrefixTree tree = new QuadPrefixTree(ctx,
|
||||
maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE);
|
||||
tree.robust = getVersion().onOrAfter(Version.LUCENE_8_3_0);
|
||||
return tree;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -75,6 +76,8 @@ public class QuadPrefixTree extends LegacyPrefixTree {
|
|||
final int[] levelS; // side
|
||||
final int[] levelN; // number
|
||||
|
||||
protected boolean robust = true; // for backward compatibility, use the old method if user specified old version.
|
||||
|
||||
public QuadPrefixTree(
|
||||
SpatialContext ctx, Rectangle bounds, int maxLevels) {
|
||||
super(ctx, maxLevels);
|
||||
|
@ -83,10 +86,10 @@ public class QuadPrefixTree extends LegacyPrefixTree {
|
|||
this.ymin = bounds.getMinY();
|
||||
this.ymax = bounds.getMaxY();
|
||||
|
||||
levelW = new double[maxLevels];
|
||||
levelH = new double[maxLevels];
|
||||
levelS = new int[maxLevels];
|
||||
levelN = new int[maxLevels];
|
||||
levelW = new double[maxLevels + 1];
|
||||
levelH = new double[maxLevels + 1];
|
||||
levelS = new int[maxLevels + 1];
|
||||
levelN = new int[maxLevels + 1];
|
||||
|
||||
gridW = xmax - xmin;
|
||||
gridH = ymax - ymin;
|
||||
|
@ -146,12 +149,50 @@ public class QuadPrefixTree extends LegacyPrefixTree {
|
|||
|
||||
@Override
|
||||
public Cell getCell(Point p, int level) {
|
||||
List<Cell> cells = new ArrayList<>(1);
|
||||
build(xmid, ymid, 0, cells, new BytesRef(maxLevels+1), ctx.makePoint(p.getX(),p.getY()), level);
|
||||
return cells.get(0);//note cells could be longer if p on edge
|
||||
if (!robust) { // old method
|
||||
List<Cell> cells = new ArrayList<>(1);
|
||||
buildNotRobustly(xmid, ymid, 0, cells, new BytesRef(maxLevels+1), ctx.makePoint(p.getX(),p.getY()), level);
|
||||
if (!cells.isEmpty()) {
|
||||
return cells.get(0);//note cells could be longer if p on edge
|
||||
}
|
||||
}
|
||||
|
||||
double currentXmid = xmid;
|
||||
double currentYmid = ymid;
|
||||
double xp = p.getX();
|
||||
double yp = p.getY();
|
||||
BytesRef str = new BytesRef(maxLevels+1);
|
||||
int levelLimit = level > maxLevels ? maxLevels : level;
|
||||
SpatialRelation rel = SpatialRelation.CONTAINS;
|
||||
for (int lvl = 0; lvl < levelLimit; lvl++){
|
||||
int c = battenberg(currentXmid, currentYmid, xp, yp);
|
||||
double halfWidth = levelW[lvl + 1];
|
||||
double halfHeight = levelH[lvl + 1];
|
||||
switch(c){
|
||||
case 0:
|
||||
currentXmid -= halfWidth;
|
||||
currentYmid += halfHeight;
|
||||
break;
|
||||
case 1:
|
||||
currentXmid += halfWidth;
|
||||
currentYmid += halfHeight;
|
||||
break;
|
||||
case 2:
|
||||
currentXmid -= halfWidth;
|
||||
currentYmid -= halfHeight;
|
||||
break;
|
||||
case 3:
|
||||
currentXmid += halfWidth;
|
||||
currentYmid -= halfHeight;
|
||||
break;
|
||||
default:
|
||||
}
|
||||
str.bytes[str.length++] = (byte)('A' + c);
|
||||
}
|
||||
return new QuadCell(str, rel);
|
||||
}
|
||||
|
||||
private void build(
|
||||
private void buildNotRobustly(
|
||||
double x,
|
||||
double y,
|
||||
int level,
|
||||
|
@ -165,10 +206,10 @@ public class QuadPrefixTree extends LegacyPrefixTree {
|
|||
|
||||
// Z-Order
|
||||
// http://en.wikipedia.org/wiki/Z-order_%28curve%29
|
||||
checkBattenberg('A', x - w, y + h, level, matches, str, shape, maxLevel);
|
||||
checkBattenberg('B', x + w, y + h, level, matches, str, shape, maxLevel);
|
||||
checkBattenberg('C', x - w, y - h, level, matches, str, shape, maxLevel);
|
||||
checkBattenberg('D', x + w, y - h, level, matches, str, shape, maxLevel);
|
||||
checkBattenbergNotRobustly('A', x - w, y + h, level, matches, str, shape, maxLevel);
|
||||
checkBattenbergNotRobustly('B', x + w, y + h, level, matches, str, shape, maxLevel);
|
||||
checkBattenbergNotRobustly('C', x - w, y - h, level, matches, str, shape, maxLevel);
|
||||
checkBattenbergNotRobustly('D', x + w, y - h, level, matches, str, shape, maxLevel);
|
||||
|
||||
// possibly consider hilbert curve
|
||||
// http://en.wikipedia.org/wiki/Hilbert_curve
|
||||
|
@ -176,7 +217,7 @@ public class QuadPrefixTree extends LegacyPrefixTree {
|
|||
// if we actually use the range property in the query, this could be useful
|
||||
}
|
||||
|
||||
protected void checkBattenberg(
|
||||
protected void checkBattenbergNotRobustly(
|
||||
char c,
|
||||
double cx,
|
||||
double cy,
|
||||
|
@ -207,12 +248,32 @@ public class QuadPrefixTree extends LegacyPrefixTree {
|
|||
//str.append(SpatialPrefixGrid.INTERSECTS);
|
||||
matches.add(new QuadCell(BytesRef.deepCopyOf(str), v.transpose()));
|
||||
} else {
|
||||
build(cx, cy, nextLevel, matches, str, shape, maxLevel);
|
||||
buildNotRobustly(cx, cy, nextLevel, matches, str, shape, maxLevel);
|
||||
}
|
||||
}
|
||||
str.length = strlen;
|
||||
}
|
||||
|
||||
/** Returns a Z-Order quadrant [0-3]. */
|
||||
protected int battenberg(double xmid, double ymid, double xp, double yp){
|
||||
// http://en.wikipedia.org/wiki/Z-order_%28curve%29
|
||||
if (ymid <= yp) {
|
||||
if (xmid >= xp) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
} else {
|
||||
if (xmid >= xp) {
|
||||
return 2;
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
// possibly consider hilbert curve
|
||||
// http://en.wikipedia.org/wiki/Hilbert_curve
|
||||
// http://blog.notdot.net/2009/11/Damn-Cool-Algorithms-Spatial-indexing-with-Quadtrees-and-Hilbert-Curves
|
||||
// if we actually use the range property in the query, this could be useful
|
||||
}
|
||||
|
||||
protected class QuadCell extends LegacyCell {
|
||||
|
||||
QuadCell(byte[] bytes, int off, int len) {
|
||||
|
|
|
@ -16,8 +16,10 @@
|
|||
*/
|
||||
package org.apache.lucene.spatial.prefix.tree;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
|
||||
|
@ -33,14 +35,17 @@ public abstract class SpatialPrefixTreeFactory {
|
|||
public static final String PREFIX_TREE = "prefixTree";
|
||||
public static final String MAX_LEVELS = "maxLevels";
|
||||
public static final String MAX_DIST_ERR = "maxDistErr";
|
||||
public static final String VERSION = "version";
|
||||
|
||||
protected Map<String, String> args;
|
||||
protected SpatialContext ctx;
|
||||
protected Integer maxLevels;
|
||||
private Version version;
|
||||
|
||||
/**
|
||||
* The factory is looked up via "prefixTree" in args, expecting "geohash" or "quad".
|
||||
* The factory is looked up via "prefixTree" in args, expecting "geohash" or "quad".
|
||||
* If it's neither of these, then "geohash" is chosen for a geo context, otherwise "quad" is chosen.
|
||||
* The "version" arg, if present, is parsed with {@link Version} and the prefix tree might be sensitive to it.
|
||||
*/
|
||||
public static SpatialPrefixTree makeSPT(Map<String,String> args, ClassLoader classLoader, SpatialContext ctx) {
|
||||
//TODO refactor to use Java SPI like how Lucene already does for codecs/postingsFormats, etc
|
||||
|
@ -64,16 +69,26 @@ public abstract class SpatialPrefixTreeFactory {
|
|||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
instance.init(args,ctx);
|
||||
instance.init(args, ctx);
|
||||
return instance.newSPT();
|
||||
}
|
||||
|
||||
protected void init(Map<String, String> args, SpatialContext ctx) {
|
||||
this.args = args;
|
||||
this.ctx = ctx;
|
||||
initVersion();
|
||||
initMaxLevels();
|
||||
}
|
||||
|
||||
protected void initVersion() {
|
||||
String versionStr = args.get(VERSION);
|
||||
try {
|
||||
setVersion(versionStr == null ? Version.LATEST : Version.parseLeniently(versionStr));
|
||||
} catch (ParseException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
protected void initMaxLevels() {
|
||||
String mlStr = args.get(MAX_LEVELS);
|
||||
if (mlStr != null) {
|
||||
|
@ -94,6 +109,20 @@ public abstract class SpatialPrefixTreeFactory {
|
|||
maxLevels = getLevelForDistance(degrees);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the version of Lucene this tree should mimic the behavior for for analysis.
|
||||
*/
|
||||
public void setVersion(Version v) {
|
||||
version = v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the version of Lucene this tree will mimic the behavior of for analysis.
|
||||
*/
|
||||
public Version getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
/** Calls {@link SpatialPrefixTree#getLevelForDistance(double)}. */
|
||||
protected abstract int getLevelForDistance(double degrees);
|
||||
|
||||
|
|
|
@ -49,6 +49,8 @@ public abstract class AbstractSpatialPrefixTreeFieldType<T extends PrefixTreeStr
|
|||
protected void init(IndexSchema schema, Map<String, String> args) {
|
||||
super.init(schema, args);
|
||||
|
||||
args.putIfAbsent(SpatialPrefixTreeFactory.VERSION, schema.getDefaultLuceneMatchVersion().toString());
|
||||
|
||||
// Convert the maxDistErr to degrees (based on distanceUnits) since Lucene spatial layer depends on degrees
|
||||
if(args.containsKey(SpatialPrefixTreeFactory.MAX_DIST_ERR)) {
|
||||
double maxDistErrOriginal = Double.parseDouble(args.get(SpatialPrefixTreeFactory.MAX_DIST_ERR));
|
||||
|
|
|
@ -72,9 +72,13 @@
|
|||
numberType="pdouble" distanceUnits="degrees" storeSubFields="false"/>
|
||||
|
||||
<fieldType name="llp" class="solr.LatLonPointSpatialField" distanceUnits="degrees" multiValued="true" />
|
||||
|
||||
<fieldType name="oslocation" class="solr.SpatialRecursivePrefixTreeFieldType" geo="false" maxDistErr="0.000009"
|
||||
worldBounds="ENVELOPE(0,700000,1300000,0)" distErrPct="0.15"/>
|
||||
<fieldType name="oslocationold" class="solr.SpatialRecursivePrefixTreeFieldType" geo="false" maxDistErr="0.000009"
|
||||
worldBounds="ENVELOPE(0,700000,1300000,0)" distErrPct="0.15" version="8.2.0"/>
|
||||
<field name="id" type="string" required="true"/>
|
||||
|
||||
<field name="oslocation" type="oslocation" />
|
||||
<field name="oslocationold" type="oslocationold" />
|
||||
<field name="srpt_geohash" type="srpt_geohash" multiValued="true"/>
|
||||
<field name="srpt_quad" type="srpt_quad" multiValued="true"/>
|
||||
<field name="srpt_packedquad" type="srpt_packedquad" multiValued="true"/>
|
||||
|
|
|
@ -61,6 +61,13 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
|
|||
RetrievalCombo.idCounter = 0;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuadTreeRobustness() {
|
||||
assertU(adoc("id", "0", "oslocation", "244502.06 639062.07"));
|
||||
// old (pre 8.3.0) still works
|
||||
assertU(adoc("id", "0", "oslocationold", "244502.06 639062.07"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBBox() throws Exception {
|
||||
String fieldName = random().nextBoolean() ? "bbox" : "bboxD_dynamic";
|
||||
|
|
Loading…
Reference in New Issue