diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 872c7377c01..7c6c143228b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -73,6 +73,11 @@ New Features * LUCENE-6423: New LimitTokenOffsetFilter that limits tokens to those before a configured maximum start offset. (David Smiley) +* LUCENE-6422: New spatial PackedQuadPrefixTree, a generally more efficient +
 choice than QuadPrefixTree, especially for high precision shapes. +
 When used, you should typically disable RPT's pruneLeafyBranches option.
 + (Nick Knize, David Smiley) + Optimizations * LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java index 02fdcc5067b..b3491590d9c 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java @@ -33,6 +33,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.spatial.SpatialStrategy; import org.apache.lucene.spatial.composite.CompositeSpatialStrategy; import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory; import org.apache.lucene.spatial.serialized.SerializedDVStrategy; @@ -111,7 +112,13 @@ public class SpatialDocMaker extends DocMaker { RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, spatialField); strategy.setPointsOnly(config.get("spatial.docPointsOnly", false)); - strategy.setPruneLeafyBranches(config.get("spatial.pruneLeafyBranches", true)); + final boolean pruneLeafyBranches = config.get("spatial.pruneLeafyBranches", true); + if (grid instanceof PackedQuadPrefixTree) { + ((PackedQuadPrefixTree) grid).setPruneLeafyBranches(pruneLeafyBranches); + strategy.setPruneLeafyBranches(false);//always leave it to packed grid, even though it isn't the same + } else { + strategy.setPruneLeafyBranches(pruneLeafyBranches); + } int prefixGridScanLevel = config.get("query.spatial.prefixGridScanLevel", -4); if (prefixGridScanLevel < 0) diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java index a301a9fd527..0209b7d12f6 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java @@ -92,9 +92,14 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy { return pruneLeafyBranches; } - /** An optional hint affecting non-point shapes: it will - * simplify/aggregate sets of complete leaves in a cell to its parent, resulting in ~20-25% - * fewer indexed cells. However, it will likely be removed in the future. (default=true) + /** + * An optional hint affecting non-point shapes: it will + * prune away a complete set sibling leaves to their parent (recursively), resulting in ~20-50% + * fewer indexed cells, and consequently that much less disk and that much faster indexing. + * So if it's a quad tree and all 4 sub-cells are there marked as a leaf, then they will be + * removed (pruned) and the parent is marked as a leaf instead. This occurs recursively on up. Unfortunately, the + * current implementation will buffer all cells to do this, so consider disabling for high precision (low distErrPct) + * shapes. (default=true) */ public void setPruneLeafyBranches(boolean pruneLeafyBranches) { this.pruneLeafyBranches = pruneLeafyBranches; diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java index 4539c8ebdf6..42e02595cc3 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java @@ -65,7 +65,7 @@ public abstract class CellIterator implements Iterator { } @Override - public final Cell next() { + public Cell next() { if (nextCell == null) { if (!hasNext()) throw new NoSuchElementException(); diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java index ee804dbbd9e..f6a4770345f 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java @@ -36,9 +36,9 @@ public abstract class LegacyCell implements Cell { private static final byte LEAF_BYTE = '+';//NOTE: must sort before letters & numbers //Arguably we could simply use a BytesRef, using an extra Object. - private byte[] bytes;//generally bigger to potentially hold a leaf - private int b_off; - private int b_len;//doesn't reflect leaf; same as getLevel() + protected byte[] bytes;//generally bigger to potentially hold a leaf + protected int b_off; + protected int b_len;//doesn't reflect leaf; same as getLevel() protected boolean isLeaf; @@ -68,7 +68,7 @@ public abstract class LegacyCell implements Cell { readLeafAdjust(); } - private void readLeafAdjust() { + protected void readLeafAdjust() { isLeaf = (b_len > 0 && bytes[b_off + b_len - 1] == LEAF_BYTE); if (isLeaf) b_len--; @@ -76,18 +76,6 @@ public abstract class LegacyCell implements Cell { isLeaf = true; } -// @Override -// public void copyFrom(Cell source) { -// LegacyCell src = (LegacyCell) source; -// shapeRel = src.shapeRel; -// shape = src.shape; -// isLeaf = src.isLeaf; -// //we don't actually copy the bytes because in LegacyCell the bytes aren't modified. (leaf byte doesn't count) -// bytes = src.bytes; -// b_off = src.b_off; -// b_len = src.b_len; -// } - protected abstract SpatialPrefixTree getGrid(); protected abstract int getMaxLevels(); @@ -214,7 +202,7 @@ public abstract class LegacyCell implements Cell { /** Copied from {@link BytesRef#compareTo(BytesRef)}. * This is to avoid creating a BytesRef. */ - private static int compare(byte[] aBytes, int aUpto, int a_length, byte[] bBytes, int bUpto, int b_length) { + protected static int compare(byte[] aBytes, int aUpto, int a_length, byte[] bBytes, int bUpto, int b_length) { final int aStop = aUpto + Math.min(a_length, b_length); while(aUpto < aStop) { int aByte = aBytes[aUpto++] & 0xff; diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/PackedQuadPrefixTree.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/PackedQuadPrefixTree.java new file mode 100644 index 00000000000..71508cc5075 --- /dev/null +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/PackedQuadPrefixTree.java @@ -0,0 +1,460 @@ +package org.apache.lucene.spatial.prefix.tree; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.NoSuchElementException; + +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.shape.Point; +import com.spatial4j.core.shape.Rectangle; +import com.spatial4j.core.shape.Shape; +import com.spatial4j.core.shape.SpatialRelation; +import com.spatial4j.core.shape.impl.RectangleImpl; +import org.apache.lucene.util.BytesRef; + +/** + * Uses a compact binary representation of 8 bytes to encode a spatial quad trie. + * + * The binary representation is as follows: + *
+ * CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDL
+ *
+ * Where C = Cell bits (2 per quad)
+ *       D = Depth bits (5 with max of 29 levels)
+ *       L = isLeaf bit
+ * 
+ * + * It includes a built-in "pruneLeafyBranches" setting (true by default) similar to + * {@link org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy#setPruneLeafyBranches(boolean)} although + * this one only prunes at the target detail level (where it has the most effect). Usually you should disable RPT's + * prune, since it is very memory in-efficient. + * + * @lucene.experimental + */ +public class PackedQuadPrefixTree extends QuadPrefixTree { + public static final int MAX_LEVELS_POSSIBLE = 29; + protected static final byte[] QUAD = new byte[] {0x00, 0x01, 0x02, 0x03}; + + protected boolean leafyPrune = true; + + /** + * Factory for creating {@link PackedQuadPrefixTree} instances with useful defaults. + */ + public static class Factory extends QuadPrefixTree.Factory { + @Override + protected SpatialPrefixTree newSPT() { + return new PackedQuadPrefixTree(ctx, maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE); + } + } + + public PackedQuadPrefixTree(SpatialContext ctx, int maxLevels) { + super(ctx, maxLevels); + if (maxLevels > MAX_LEVELS_POSSIBLE) { + throw new IllegalArgumentException("maxLevels of " + maxLevels + " exceeds limit of " + MAX_LEVELS_POSSIBLE); + } + } + + @Override + public String toString() { + return getClass().getSimpleName() + "(maxLevels:" + maxLevels + ",ctx:" + ctx + ",prune:" + leafyPrune + ")"; + } + + @Override + public Cell getWorldCell() { + return new PackedQuadCell(0x0L); + } + + @Override + public Cell getCell(Point p, int level) { + List cells = new ArrayList<>(1); + build(xmid, ymid, 0, cells, 0x0L, ctx.makePoint(p.getX(), p.getY()), level); + return cells.get(0);//note cells could be longer if p on edge + } + + protected void build(double x, double y, int level, List matches, long term, Shape shape, int maxLevel) { + double w = levelW[level] / 2; + double h = levelH[level] / 2; + + // Z-Order + // http://en.wikipedia.org/wiki/Z-order_%28curve%29 + checkBattenberg(QUAD[0], x - w, y + h, level, matches, term, shape, maxLevel); + checkBattenberg(QUAD[1], x + w, y + h, level, matches, term, shape, maxLevel); + checkBattenberg(QUAD[2], x - w, y - h, level, matches, term, shape, maxLevel); + checkBattenberg(QUAD[3], x + w, y - h, level, matches, term, shape, maxLevel); + } + + protected void checkBattenberg(byte quad, double cx, double cy, int level, List matches, + long term, Shape shape, int maxLevel) { + // short-circuit if we find a match for the point (no need to continue recursion) + if (shape instanceof Point && !matches.isEmpty()) + return; + double w = levelW[level] / 2; + double h = levelH[level] / 2; + + SpatialRelation v = shape.relate(ctx.makeRectangle(cx - w, cx + w, cy - h, cy + h)); + + if (SpatialRelation.DISJOINT == v) { + return; + } + + // set bits for next level + term |= (((long)(quad))<<(64-(++level<<1))); + // increment level + term = ((term>>>1)+1)<<1; + + if (SpatialRelation.CONTAINS == v || (level >= maxLevel)) { + matches.add(new PackedQuadCell(term, v.transpose())); + } else {// SpatialRelation.WITHIN, SpatialRelation.INTERSECTS + build(cx, cy, level, matches, term, shape, maxLevel); + } + } + + @Override + public Cell readCell(BytesRef term, Cell scratch) { + PackedQuadCell cell = (PackedQuadCell) scratch; + if (cell == null) + cell = (PackedQuadCell) getWorldCell(); + cell.readCell(term); + return cell; + } + + @Override + public CellIterator getTreeCellIterator(Shape shape, int detailLevel) { + if (detailLevel > maxLevels) { + throw new IllegalArgumentException("detailLevel:" + detailLevel +" exceed max: " + maxLevels); + } + return new PrefixTreeIterator(shape, (short) detailLevel); + } + + public boolean isPruneLeafyBranches() { + return leafyPrune; + } + + /** Like {@link org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy#setPruneLeafyBranches(boolean)} + * but more memory efficient and only applies to the detailLevel, where it has the most effect. */ + public void setPruneLeafyBranches( boolean pruneLeafyBranches ) { + this.leafyPrune = pruneLeafyBranches; + } + + /** See binary representation in the javadocs of {@link PackedQuadPrefixTree}. */ + protected class PackedQuadCell extends QuadCell { + private long term; + + PackedQuadCell(long term) { + super(null, 0, 0); + this.term = term; + this.b_off = 0; + this.bytes = longToByteArray(this.term); + this.b_len = 8; + readLeafAdjust(); + } + + PackedQuadCell(long term, SpatialRelation shapeRel) { + this(term); + this.shapeRel = shapeRel; + } + + @Override + protected void readCell(BytesRef bytes) { + shapeRel = null; + shape = null; + this.bytes = bytes.bytes; + this.b_off = bytes.offset; + this.b_len = (short) bytes.length; + this.term = longFromByteArray(this.bytes, bytes.offset); + readLeafAdjust(); + } + + private final int getShiftForLevel(final int level) { + return 64 - (level<<1); + } + + public boolean isEnd(final int level, final int shift) { + return (term != 0x0L && ((((0x1L<<(level<<1))-1)-(term>>>shift)) == 0x0L)); + } + + /** + * Get the next cell in the tree without using recursion. descend parameter requests traversal to the child nodes, + * setting this to false will step to the next sibling. + * Note: This complies with lexicographical ordering, once you've moved to the next sibling there is no backtracking. + */ + public PackedQuadCell nextCell(boolean descend) { + final int level = getLevel(); + final int shift = getShiftForLevel(level); + // base case: can't go further + if ( (!descend && isEnd(level, shift)) || isEnd(maxLevels, getShiftForLevel(maxLevels))) { + return null; + } + long newTerm; + final boolean isLeaf = (term&0x1L)==0x1L; + // if descend requested && we're not at the maxLevel + if ((descend && !isLeaf && (level != maxLevels)) || level == 0) { + // simple case: increment level bits (next level) + newTerm = ((term>>>1)+0x1L)<<1; + } else { // we're not descending or we can't descend + newTerm = term + (0x1L<>>shift)&0x3L) == 0x3L) { + // adjust level for number popping up + newTerm = ((newTerm>>>1) - (Long.numberOfTrailingZeros(newTerm>>>shift)>>>1))<<1; + } + } + return new PackedQuadCell(newTerm); + } + + @Override + protected void readLeafAdjust() { + isLeaf = ((0x1L)&term) == 0x1L; + if (getLevel() == getMaxLevels()) { + isLeaf = true; + } + } + + @Override + public BytesRef getTokenBytesWithLeaf(BytesRef result) { + if (isLeaf) { + term |= 0x1L; + } + return getTokenBytesNoLeaf(result); + } + + @Override + public BytesRef getTokenBytesNoLeaf(BytesRef result) { + if (result == null) + return new BytesRef(bytes, b_off, b_len); + result.bytes = longToByteArray(this.term); + result.offset = 0; + result.length = result.bytes.length; + return result; + } + + @Override + public int compareToNoLeaf(Cell fromCell) { + PackedQuadCell b = (PackedQuadCell) fromCell; + final long thisTerm = (((0x1L)&term) == 0x1L) ? term-1 : term; + final long fromTerm = (((0x1L)&b.term) == 0x1L) ? b.term-1 : b.term; + final int result = Long.compareUnsigned(thisTerm, fromTerm); + assert Math.signum(result) + == Math.signum(compare(longToByteArray(thisTerm), 0, 8, longToByteArray(fromTerm), 0, 8)); // TODO remove + return result; + } + + @Override + public int getLevel() { + int l = (int)((term >>> 1)&0x1FL); + return l; + } + + @Override + protected Collection getSubCells() { + List cells = new ArrayList<>(4); + PackedQuadCell pqc = (new PackedQuadCell(((term&0x1)==0x1) ? this.term-1 : this.term)) + .nextCell(true); + cells.add(pqc); + cells.add((pqc = pqc.nextCell(false))); + cells.add((pqc = pqc.nextCell(false))); + cells.add(pqc.nextCell(false)); + return cells; + } + + @Override + protected QuadCell getSubCell(Point p) { + return (PackedQuadCell) PackedQuadPrefixTree.this.getCell(p, getLevel() + 1);//not performant! + } + + @Override + public boolean isPrefixOf(Cell c) { + PackedQuadCell cell = (PackedQuadCell)c; + return (this.term == 0x0L) || isInternalPrefix(cell); + } + + protected boolean isInternalPrefix(PackedQuadCell c) { + final int shift = 64 - (getLevel()<<1); + return ((term>>>shift)-(c.term>>>shift)) == 0x0L; + } + + protected long concat(byte postfix) { + // extra leaf bit + return this.term | (((long)(postfix))<<((getMaxLevels()-getLevel()<<1)+6)); + } + + /** + * Constructs a bounding box shape out of the encoded cell + */ + @Override + protected Rectangle makeShape() { + double xmin = PackedQuadPrefixTree.this.xmin; + double ymin = PackedQuadPrefixTree.this.ymin; + int level = getLevel(); + + byte b; + for (short l=0, i=1; l>>(64-(i<<1))) & 0x3L); + + switch (b) { + case 0x00: + ymin += levelH[l]; + break; + case 0x01: + xmin += levelW[l]; + ymin += levelH[l]; + break; + case 0x02: + break;//nothing really + case 0x03: + xmin += levelW[l]; + break; + default: + throw new RuntimeException("unexpected quadrant"); + } + } + + double width, height; + if (level > 0) { + width = levelW[level - 1]; + height = levelH[level - 1]; + } else { + width = gridW; + height = gridH; + } + return new RectangleImpl(xmin, xmin + width, ymin, ymin + height, ctx); + } + + private long fromBytes(byte b1, byte b2, byte b3, byte b4, byte b5, byte b6, byte b7, byte b8) { + return ((long)b1 & 255L) << 56 | ((long)b2 & 255L) << 48 | ((long)b3 & 255L) << 40 + | ((long)b4 & 255L) << 32 | ((long)b5 & 255L) << 24 | ((long)b6 & 255L) << 16 + | ((long)b7 & 255L) << 8 | (long)b8 & 255L; + } + + private byte[] longToByteArray(long value) { + byte[] result = new byte[8]; + for(int i = 7; i >= 0; --i) { + result[i] = (byte)((int)(value & 255L)); + value >>= 8; + } + return result; + } + + private long longFromByteArray(byte[] bytes, int ofs) { + assert bytes.length >= 8; + return fromBytes(bytes[0+ofs], bytes[1+ofs], bytes[2+ofs], bytes[3+ofs], + bytes[4+ofs], bytes[5+ofs], bytes[6+ofs], bytes[7+ofs]); + } + + /** + * Used for debugging, this will print the bits of the cell + */ + @Override + public String toString() { + StringBuilder s = new StringBuilder(64); + final int numberOfLeadingZeros = Long.numberOfLeadingZeros(term); + for (int i = 0; i < numberOfLeadingZeros; i++) { + s.append('0'); + } + if (term != 0) + s.append(Long.toBinaryString(term)); + return s.toString(); + } + } // PackedQuadCell + + /** This is a streamlined version of TreeCellIterator, with built-in support to prune at detailLevel + * (but not recursively upwards). */ + protected class PrefixTreeIterator extends CellIterator { + private Shape shape; + private PackedQuadCell thisCell; + private PackedQuadCell nextCell; + + private short level; + private final short detailLevel; + private CellIterator pruneIter; + + PrefixTreeIterator(Shape shape, short detailLevel) { + this.shape = shape; + this.thisCell = ((PackedQuadCell)(getWorldCell())).nextCell(true); + this.detailLevel = detailLevel; + this.nextCell = null; + } + + @Override + public boolean hasNext() { + if (nextCell != null) { + return true; + } + SpatialRelation rel; + // loop until we're at the end of the quad tree or we hit a relation + while (thisCell != null) { + rel = thisCell.getShape().relate(shape); + if (rel == SpatialRelation.DISJOINT) { + thisCell = thisCell.nextCell(false); + } else { // within || intersects || contains + thisCell.setShapeRel(rel); + nextCell = thisCell; + if (rel == SpatialRelation.WITHIN) { + thisCell.setLeaf(); + thisCell = thisCell.nextCell(false); + } else { // intersects || contains + level = (short) (thisCell.getLevel()); + if (level == detailLevel || pruned(rel)) { + thisCell.setLeaf(); + if (shape instanceof Point) { + thisCell.setShapeRel(SpatialRelation.WITHIN); + thisCell = null; + } else { + thisCell = thisCell.nextCell(false); + } + break; + } + thisCell = thisCell.nextCell(true); + } + break; + } + } + return nextCell != null; + } + + private boolean pruned(SpatialRelation rel) { + int leaves; + if (rel == SpatialRelation.INTERSECTS && leafyPrune && level == detailLevel - 1) { + for (leaves=0, pruneIter=thisCell.getNextLevelCells(shape); pruneIter.hasNext(); pruneIter.next(), ++leaves); + return leaves == 4; + } + return false; + } + + @Override + public Cell next() { + if (nextCell == null) { + if (!hasNext()) { + throw new NoSuchElementException(); + } + } + // overriding since this implementation sets thisCell in hasNext + Cell temp = nextCell; + nextCell = null; + return temp; + } + + @Override + public void remove() { + //no-op + } + } +} diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java index 1b6bd2245fd..99204abbfa0 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java @@ -62,14 +62,14 @@ public class QuadPrefixTree extends LegacyPrefixTree { public static final int MAX_LEVELS_POSSIBLE = 50;//not really sure how big this should be public static final int DEFAULT_MAX_LEVELS = 12; - private final double xmin; - private final double xmax; - private final double ymin; - private final double ymax; - private final double xmid; - private final double ymid; + protected final double xmin; + protected final double xmax; + protected final double ymin; + protected final double ymax; + protected final double xmid; + protected final double ymid; - private final double gridW; + protected final double gridW; public final double gridH; final double[] levelW; @@ -178,7 +178,7 @@ public class QuadPrefixTree extends LegacyPrefixTree { // if we actually use the range property in the query, this could be useful } - private void checkBattenberg( + protected void checkBattenberg( char c, double cx, double cy, @@ -215,7 +215,7 @@ public class QuadPrefixTree extends LegacyPrefixTree { str.length = strlen; } - private class QuadCell extends LegacyCell { + protected class QuadCell extends LegacyCell { QuadCell(byte[] bytes, int off, int len) { super(bytes, off, len); @@ -244,7 +244,7 @@ public class QuadPrefixTree extends LegacyPrefixTree { return cells; } - private BytesRef concat(BytesRef source, byte b) { + protected BytesRef concat(BytesRef source, byte b) { //+2 for new char + potential leaf final byte[] buffer = Arrays.copyOfRange(source.bytes, source.offset, source.offset + source.length + 2); BytesRef target = new BytesRef(buffer); @@ -270,7 +270,7 @@ public class QuadPrefixTree extends LegacyPrefixTree { return shape; } - private Rectangle makeShape() { + protected Rectangle makeShape() { BytesRef token = getTokenBytesNoLeaf(null); double xmin = QuadPrefixTree.this.xmin; double ymin = QuadPrefixTree.this.ymin; diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java index 51ba2aa83d8..dd9aa1fb914 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java @@ -17,11 +17,11 @@ package org.apache.lucene.spatial.prefix.tree; +import java.util.Map; + import com.spatial4j.core.context.SpatialContext; import com.spatial4j.core.distance.DistanceUtils; -import java.util.Map; - /** * Abstract Factory for creating {@link SpatialPrefixTree} instances with useful * defaults and passed on configurations defined in a Map. @@ -52,6 +52,8 @@ public abstract class SpatialPrefixTreeFactory { instance = new GeohashPrefixTree.Factory(); else if ("quad".equalsIgnoreCase(cname)) instance = new QuadPrefixTree.Factory(); + else if ("packedQuad".equalsIgnoreCase(cname)) + instance = new PackedQuadPrefixTree.Factory(); else { try { Class c = classLoader.loadClass(cname); diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java b/lucene/spatial/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java index 06966c7223b..109ba20f534 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java @@ -22,22 +22,23 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.shape.Point; +import com.spatial4j.core.shape.Shape; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.spatial.bbox.BBoxStrategy; import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy; import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; +import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.spatial.serialized.SerializedDVStrategy; import org.apache.lucene.spatial.vector.PointVectorStrategy; import org.junit.Test; -import com.carrotsearch.randomizedtesting.annotations.Name; -import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; -import com.spatial4j.core.context.SpatialContext; -import com.spatial4j.core.shape.Point; -import com.spatial4j.core.shape.Shape; public class DistanceStrategyTest extends StrategyTestCase { @@ -57,6 +58,10 @@ public class DistanceStrategyTest extends StrategyTestCase { strategy = new TermQueryPrefixTreeStrategy(grid, "termquery_geohash"); ctorArgs.add(new Object[]{new Param(strategy)}); + grid = new PackedQuadPrefixTree(ctx,25); + strategy = new RecursivePrefixTreeStrategy(grid, "recursive_packedquad"); + ctorArgs.add(new Object[]{new Param(strategy)}); + strategy = new PointVectorStrategy(ctx, "pointvector"); ctorArgs.add(new Object[]{new Param(strategy)}); diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/RandomSpatialOpFuzzyPrefixTreeTest.java b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/RandomSpatialOpFuzzyPrefixTreeTest.java index f815ec1b3f9..16ff56f2700 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/RandomSpatialOpFuzzyPrefixTreeTest.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/RandomSpatialOpFuzzyPrefixTreeTest.java @@ -47,6 +47,7 @@ import org.apache.lucene.spatial.StrategyTestCase; import org.apache.lucene.spatial.prefix.tree.Cell; import org.apache.lucene.spatial.prefix.tree.CellIterator; import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; +import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.spatial.query.SpatialArgs; @@ -72,14 +73,17 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { public void setupGrid(int maxLevels) throws IOException { if (randomBoolean()) - setupQuadGrid(maxLevels); + setupQuadGrid(maxLevels, randomBoolean()); else setupGeohashGrid(maxLevels); setupCtx2D(ctx); - //((PrefixTreeStrategy) strategy).setDistErrPct(0);//fully precise to grid - + // set prune independently on strategy & grid randomly; should work ((RecursivePrefixTreeStrategy)strategy).setPruneLeafyBranches(randomBoolean()); + if (this.grid instanceof PackedQuadPrefixTree) { + ((PackedQuadPrefixTree) this.grid).setPruneLeafyBranches(randomBoolean()); + } + if (maxLevels == -1 && rarely()) { ((PrefixTreeStrategy) strategy).setPointsOnly(true); } @@ -97,7 +101,7 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { ctx2D = ctxFactory.newSpatialContext(); } - private void setupQuadGrid(int maxLevels) { + private void setupQuadGrid(int maxLevels, boolean packedQuadPrefixTree) { //non-geospatial makes this test a little easier (in gridSnap), and using boundary values 2^X raises // the prospect of edge conditions we want to test, plus makes for simpler numbers (no decimals). SpatialContextFactory factory = new SpatialContextFactory(); @@ -107,7 +111,11 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { //A fairly shallow grid, and default 2.5% distErrPct if (maxLevels == -1) maxLevels = randomIntBetween(1, 8);//max 64k cells (4^8), also 256*256 - this.grid = new QuadPrefixTree(ctx, maxLevels); + if (packedQuadPrefixTree) { + this.grid = new PackedQuadPrefixTree(ctx, maxLevels); + } else { + this.grid = new QuadPrefixTree(ctx, maxLevels); + } this.strategy = newRPT(); } @@ -148,7 +156,7 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { /** See LUCENE-5062, {@link ContainsPrefixTreeFilter#multiOverlappingIndexedShapes}. */ @Test public void testContainsPairOverlap() throws IOException { - setupQuadGrid(3); + setupQuadGrid(3, randomBoolean()); adoc("0", new ShapePair(ctx.makeRectangle(0, 33, -128, 128), ctx.makeRectangle(33, 128, -128, 128), true)); commit(); Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.Contains, @@ -159,7 +167,7 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { @Test public void testWithinDisjointParts() throws IOException { - setupQuadGrid(7); + setupQuadGrid(7, randomBoolean()); //one shape comprised of two parts, quite separated apart adoc("0", new ShapePair(ctx.makeRectangle(0, 10, -120, -100), ctx.makeRectangle(220, 240, 110, 125), false)); commit(); @@ -173,7 +181,7 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { @Test /** LUCENE-4916 */ public void testWithinLeafApproxRule() throws IOException { - setupQuadGrid(2);//4x4 grid + setupQuadGrid(2, randomBoolean());//4x4 grid //indexed shape will simplify to entire right half (2 top cells) adoc("0", ctx.makeRectangle(192, 204, -128, 128)); commit(); diff --git a/solr/core/src/java/org/apache/solr/schema/SpatialRecursivePrefixTreeFieldType.java b/solr/core/src/java/org/apache/solr/schema/SpatialRecursivePrefixTreeFieldType.java index 2b951ff92db..4e6df22f474 100644 --- a/solr/core/src/java/org/apache/solr/schema/SpatialRecursivePrefixTreeFieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/SpatialRecursivePrefixTreeFieldType.java @@ -17,10 +17,11 @@ package org.apache.solr.schema; * limitations under the License. */ -import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; - import java.util.Map; +import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree; + /** * @see RecursivePrefixTreeStrategy * @lucene.experimental @@ -45,6 +46,11 @@ public class SpatialRecursivePrefixTreeFieldType extends AbstractSpatialPrefixTr RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, fieldName); if (prefixGridScanLevel != null) strategy.setPrefixGridScanLevel(prefixGridScanLevel); + if (grid instanceof PackedQuadPrefixTree) { + // This grid has a (usually) better prune leafy branch implementation + ((PackedQuadPrefixTree) grid).setPruneLeafyBranches(true); + strategy.setPruneLeafyBranches(false); + } return strategy; } } diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml b/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml index 5f5d856ca22..3635dc182d9 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml @@ -35,6 +35,9 @@ + @@ -58,6 +61,7 @@ + diff --git a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial.java b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial.java index fbcb0eaf2e6..85874391802 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial.java +++ b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial.java @@ -26,7 +26,6 @@ import com.spatial4j.core.context.SpatialContext; import com.spatial4j.core.distance.DistanceUtils; import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Rectangle; - import org.apache.lucene.spatial.bbox.BBoxStrategy; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; @@ -54,7 +53,7 @@ public class TestSolr4Spatial extends SolrTestCaseJ4 { @ParametersFactory public static Iterable parameters() { return Arrays.asList(new Object[][]{ - {"srpt_geohash"}, {"srpt_quad"}, {"stqpt_geohash"}, {"pointvector"}, {"bbox"} + {"srpt_geohash"}, {"srpt_quad"}, {"srpt_packedquad"}, {"stqpt_geohash"}, {"pointvector"}, {"bbox"} }); }