mirror of https://github.com/apache/lucene.git
LUCENE-4585: PrefixTree bugs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1418006 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ffb1558e91
commit
4eb1c502c9
|
@ -69,6 +69,12 @@ Changes in backwards compatibility policy
|
||||||
previous two methods returned by calling parents(), children() or siblings()
|
previous two methods returned by calling parents(), children() or siblings()
|
||||||
on the returned ParallelTaxonomyArrays. (Shai Erera)
|
on the returned ParallelTaxonomyArrays. (Shai Erera)
|
||||||
|
|
||||||
|
* LUCENE-4585: Spatial PrefixTree based Strategies (either TermQuery or
|
||||||
|
RecursivePrefix based) MAY want to re-index if used for point data. If a
|
||||||
|
re-index is not done, then an indexed point is ~1/2 the smallest grid cell
|
||||||
|
larger and as such is slightly more likely to match a query shape.
|
||||||
|
(David Smiley)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
|
* LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
|
||||||
|
@ -207,16 +213,6 @@ Bug Fixes
|
||||||
* LUCENE-4009: Improve TermsFilter.toString (Tim Costermans via Chris
|
* LUCENE-4009: Improve TermsFilter.toString (Tim Costermans via Chris
|
||||||
Male, Mike McCandless)
|
Male, Mike McCandless)
|
||||||
|
|
||||||
* LUCENE-4588: Benchmark's EnwikiContentSource was discarding last wiki
|
|
||||||
document and had leaking threads in 'forever' mode. (Doron Cohen)
|
|
||||||
|
|
||||||
Changes in Runtime Behavior
|
|
||||||
|
|
||||||
* LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE.
|
|
||||||
This only affects requests with depth>1. If you execute such requests and
|
|
||||||
rely on the facet results being returned flat (i.e. no hierarchy), you should
|
|
||||||
set the ResultMode to GLOBAL_FLAT. (Shai Erera, Gilad Barkai)
|
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-2221: oal.util.BitUtil was modified to use Long.bitCount and
|
* LUCENE-2221: oal.util.BitUtil was modified to use Long.bitCount and
|
||||||
|
@ -269,9 +265,6 @@ Optimizations
|
||||||
Users of this API can now simply obtain an instance via DocValues#getDirectSource per thread.
|
Users of this API can now simply obtain an instance via DocValues#getDirectSource per thread.
|
||||||
(Simon Willnauer)
|
(Simon Willnauer)
|
||||||
|
|
||||||
* LUCENE-4580: DrillDown.query variants return a ConstantScoreQuery with boost set to 0.0f
|
|
||||||
so that documents scores are not affected by running a drill-down query. (Shai Erera)
|
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
|
|
||||||
* LUCENE-4483: Refer to BytesRef.deepCopyOf in Term's constructor that takes BytesRef.
|
* LUCENE-4483: Refer to BytesRef.deepCopyOf in Term's constructor that takes BytesRef.
|
||||||
|
@ -287,10 +280,6 @@ Build
|
||||||
RandomizedContext.contexts static map. Upgrade randomized testing
|
RandomizedContext.contexts static map. Upgrade randomized testing
|
||||||
to version 2.0.2 (Mike McCandless, Dawid Weiss)
|
to version 2.0.2 (Mike McCandless, Dawid Weiss)
|
||||||
|
|
||||||
* LUCENE-4589: Upgraded benchmark module's Nekohtml dependency to version
|
|
||||||
1.9.17, removing the workaround in Lucene's HTML parser for the
|
|
||||||
Turkish locale. (Uwe Schindler)
|
|
||||||
|
|
||||||
|
|
||||||
======================= Lucene 4.0.0 =======================
|
======================= Lucene 4.0.0 =======================
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,8 @@ public class PointPrefixTreeFieldCacheProvider extends ShapeFieldCacheProvider<P
|
||||||
@Override
|
@Override
|
||||||
protected Point readShape(BytesRef term) {
|
protected Point readShape(BytesRef term) {
|
||||||
scanCell = grid.getNode(term.bytes, term.offset, term.length, scanCell);
|
scanCell = grid.getNode(term.bytes, term.offset, term.length, scanCell);
|
||||||
return scanCell.isLeaf() ? scanCell.getShape().getCenter() : null;
|
if (scanCell.getLevel() == grid.getMaxLevels() && !scanCell.isLeaf())
|
||||||
|
return scanCell.getCenter();
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,11 @@ package org.apache.lucene.spatial.prefix;
|
||||||
|
|
||||||
import com.spatial4j.core.shape.Shape;
|
import com.spatial4j.core.shape.Shape;
|
||||||
import com.spatial4j.core.shape.SpatialRelation;
|
import com.spatial4j.core.shape.SpatialRelation;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.AtomicReader;
|
||||||
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.DocIdSet;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.Filter;
|
import org.apache.lucene.search.Filter;
|
||||||
|
@ -110,23 +114,28 @@ RE "scan" threshold:
|
||||||
while(!cells.isEmpty()) {
|
while(!cells.isEmpty()) {
|
||||||
final Node cell = cells.removeFirst();
|
final Node cell = cells.removeFirst();
|
||||||
final BytesRef cellTerm = new BytesRef(cell.getTokenBytes());
|
final BytesRef cellTerm = new BytesRef(cell.getTokenBytes());
|
||||||
TermsEnum.SeekStatus seekStat = termsEnum.seekCeil(cellTerm);
|
if (!termsEnum.seekExact(cellTerm, true))
|
||||||
if (seekStat == TermsEnum.SeekStatus.END)
|
|
||||||
break;
|
|
||||||
if (seekStat == TermsEnum.SeekStatus.NOT_FOUND)
|
|
||||||
continue;
|
continue;
|
||||||
if (cell.getLevel() == detailLevel || cell.isLeaf()) {
|
if (cell.getLevel() == detailLevel || cell.isLeaf()) {
|
||||||
docsEnum = termsEnum.docs(acceptDocs, docsEnum, 0);
|
docsEnum = termsEnum.docs(acceptDocs, docsEnum, 0);
|
||||||
addDocs(docsEnum,bits);
|
addDocs(docsEnum,bits);
|
||||||
} else {//any other intersection
|
} else {//any other intersection
|
||||||
//If the next indexed term is the leaf marker, then add all of them
|
assert cell.getLevel() < detailLevel; //assertions help clarify logic
|
||||||
|
assert !cell.isLeaf();
|
||||||
|
//If the next indexed term just adds a leaf marker ('+') to cell,
|
||||||
|
// then add all of those docs
|
||||||
BytesRef nextCellTerm = termsEnum.next();
|
BytesRef nextCellTerm = termsEnum.next();
|
||||||
|
if (nextCellTerm == null)
|
||||||
|
break;
|
||||||
assert StringHelper.startsWith(nextCellTerm, cellTerm);
|
assert StringHelper.startsWith(nextCellTerm, cellTerm);
|
||||||
scanCell = grid.getNode(nextCellTerm.bytes, nextCellTerm.offset, nextCellTerm.length, scanCell);
|
scanCell = grid.getNode(nextCellTerm.bytes, nextCellTerm.offset, nextCellTerm.length, scanCell);
|
||||||
if (scanCell.isLeaf()) {
|
if (scanCell.getLevel() == cell.getLevel() && scanCell.isLeaf()) {
|
||||||
docsEnum = termsEnum.docs(acceptDocs, docsEnum, 0);
|
docsEnum = termsEnum.docs(acceptDocs, docsEnum, 0);
|
||||||
addDocs(docsEnum,bits);
|
addDocs(docsEnum,bits);
|
||||||
termsEnum.next();//move pointer to avoid potential redundant addDocs() below
|
//increment pointer to avoid potential redundant addDocs() below
|
||||||
|
nextCellTerm = termsEnum.next();
|
||||||
|
if (nextCellTerm == null)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Decide whether to continue to divide & conquer, or whether it's time to scan through terms beneath this cell.
|
//Decide whether to continue to divide & conquer, or whether it's time to scan through terms beneath this cell.
|
||||||
|
@ -144,8 +153,13 @@ RE "scan" threshold:
|
||||||
if (termLevel > detailLevel)
|
if (termLevel > detailLevel)
|
||||||
continue;
|
continue;
|
||||||
if (termLevel == detailLevel || scanCell.isLeaf()) {
|
if (termLevel == detailLevel || scanCell.isLeaf()) {
|
||||||
//TODO should put more thought into implications of box vs point
|
Shape cShape;
|
||||||
Shape cShape = termLevel == grid.getMaxLevels() ? scanCell.getCenter() : scanCell.getShape();
|
//if this cell represents a point, use the cell center vs the box
|
||||||
|
// (points never have isLeaf())
|
||||||
|
if (termLevel == grid.getMaxLevels() && !scanCell.isLeaf())
|
||||||
|
cShape = scanCell.getCenter();
|
||||||
|
else
|
||||||
|
cShape = scanCell.getShape();
|
||||||
if(queryShape.relate(cShape) == SpatialRelation.DISJOINT)
|
if(queryShape.relate(cShape) == SpatialRelation.DISJOINT)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
|
@ -101,11 +101,11 @@ public class GeohashPrefixTree extends SpatialPrefixTree {
|
||||||
|
|
||||||
class GhCell extends Node {
|
class GhCell extends Node {
|
||||||
GhCell(String token) {
|
GhCell(String token) {
|
||||||
super(GeohashPrefixTree.this, token);
|
super(token);
|
||||||
}
|
}
|
||||||
|
|
||||||
GhCell(byte[] bytes, int off, int len) {
|
GhCell(byte[] bytes, int off, int len) {
|
||||||
super(GeohashPrefixTree.this, bytes, off, len);
|
super(bytes, off, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -44,11 +44,14 @@ public abstract class Node implements Comparable<Node> {
|
||||||
|
|
||||||
private String token;//this is the only part of equality
|
private String token;//this is the only part of equality
|
||||||
|
|
||||||
protected SpatialRelation shapeRel;//set in getSubCells(filter), and via setLeaf().
|
/** When set via getSubCells(filter), it is the relationship between this
|
||||||
private SpatialPrefixTree spatialPrefixTree;
|
* cell and the given shape filter. If set via setLeaf() (to WITHIN), it is
|
||||||
|
* meant to indicate no further sub-cells are going to be provided because
|
||||||
|
* maxLevels or a detailLevel is hit. It's always null for points.
|
||||||
|
*/
|
||||||
|
protected SpatialRelation shapeRel;
|
||||||
|
|
||||||
protected Node(SpatialPrefixTree spatialPrefixTree, String token) {
|
protected Node(String token) {
|
||||||
this.spatialPrefixTree = spatialPrefixTree;
|
|
||||||
this.token = token;
|
this.token = token;
|
||||||
if (token.length() > 0 && token.charAt(token.length() - 1) == (char) LEAF_BYTE) {
|
if (token.length() > 0 && token.charAt(token.length() - 1) == (char) LEAF_BYTE) {
|
||||||
this.token = token.substring(0, token.length() - 1);
|
this.token = token.substring(0, token.length() - 1);
|
||||||
|
@ -59,8 +62,7 @@ public abstract class Node implements Comparable<Node> {
|
||||||
getShape();//ensure any lazy instantiation completes to make this threadsafe
|
getShape();//ensure any lazy instantiation completes to make this threadsafe
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Node(SpatialPrefixTree spatialPrefixTree, byte[] bytes, int off, int len) {
|
protected Node(byte[] bytes, int off, int len) {
|
||||||
this.spatialPrefixTree = spatialPrefixTree;
|
|
||||||
this.bytes = bytes;
|
this.bytes = bytes;
|
||||||
this.b_off = off;
|
this.b_off = off;
|
||||||
this.b_len = len;
|
this.b_len = len;
|
||||||
|
@ -78,11 +80,10 @@ public abstract class Node implements Comparable<Node> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void b_fixLeaf() {
|
private void b_fixLeaf() {
|
||||||
|
//note that non-point shapes always have the maxLevels cell set with setLeaf
|
||||||
if (bytes[b_off + b_len - 1] == LEAF_BYTE) {
|
if (bytes[b_off + b_len - 1] == LEAF_BYTE) {
|
||||||
b_len--;
|
b_len--;
|
||||||
setLeaf();
|
setLeaf();
|
||||||
} else if (getLevel() == spatialPrefixTree.getMaxLevels()) {
|
|
||||||
setLeaf();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,6 +91,10 @@ public abstract class Node implements Comparable<Node> {
|
||||||
return shapeRel;
|
return shapeRel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For points, this is always false. Otherwise this is true if there are no
|
||||||
|
* further cells with this prefix for the shape (always true at maxLevels).
|
||||||
|
*/
|
||||||
public boolean isLeaf() {
|
public boolean isLeaf() {
|
||||||
return shapeRel == SpatialRelation.WITHIN;
|
return shapeRel == SpatialRelation.WITHIN;
|
||||||
}
|
}
|
||||||
|
@ -133,8 +138,14 @@ public abstract class Node implements Comparable<Node> {
|
||||||
//public Cell getParent();
|
//public Cell getParent();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Like {@link #getSubCells()} but with the results filtered by a shape. If that shape is a {@link com.spatial4j.core.shape.Point} then it
|
* Like {@link #getSubCells()} but with the results filtered by a shape. If
|
||||||
* must call {@link #getSubCell(com.spatial4j.core.shape.Point)};
|
* that shape is a {@link com.spatial4j.core.shape.Point} then it
|
||||||
|
* must call {@link #getSubCell(com.spatial4j.core.shape.Point)}.
|
||||||
|
* The returned cells should have their {@link Node#shapeRel} set to their
|
||||||
|
* relation with {@code shapeFilter} for non-point. As such,
|
||||||
|
* {@link org.apache.lucene.spatial.prefix.tree.Node#isLeaf()} should be
|
||||||
|
* accurate.
|
||||||
|
* <p/>
|
||||||
* Precondition: Never called when getLevel() == maxLevel.
|
* Precondition: Never called when getLevel() == maxLevel.
|
||||||
*
|
*
|
||||||
* @param shapeFilter an optional filter for the returned cells.
|
* @param shapeFilter an optional filter for the returned cells.
|
||||||
|
|
|
@ -228,16 +228,16 @@ public class QuadPrefixTree extends SpatialPrefixTree {
|
||||||
class QuadCell extends Node {
|
class QuadCell extends Node {
|
||||||
|
|
||||||
public QuadCell(String token) {
|
public QuadCell(String token) {
|
||||||
super(QuadPrefixTree.this, token);
|
super(token);
|
||||||
}
|
}
|
||||||
|
|
||||||
public QuadCell(String token, SpatialRelation shapeRel) {
|
public QuadCell(String token, SpatialRelation shapeRel) {
|
||||||
super(QuadPrefixTree.this, token);
|
super(token);
|
||||||
this.shapeRel = shapeRel;
|
this.shapeRel = shapeRel;
|
||||||
}
|
}
|
||||||
|
|
||||||
QuadCell(byte[] bytes, int off, int len) {
|
QuadCell(byte[] bytes, int off, int len) {
|
||||||
super(QuadPrefixTree.this, bytes, off, len);
|
super(bytes, off, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -156,7 +156,7 @@ public abstract class SpatialPrefixTree {
|
||||||
}
|
}
|
||||||
final Collection<Node> subCells = node.getSubCells(shape);
|
final Collection<Node> subCells = node.getSubCells(shape);
|
||||||
if (node.getLevel() == detailLevel - 1) {
|
if (node.getLevel() == detailLevel - 1) {
|
||||||
if (subCells.size() < node.getSubCellsSize()) {
|
if (subCells.size() < node.getSubCellsSize() || node.getLevel() == 0) {
|
||||||
if (inclParents)
|
if (inclParents)
|
||||||
result.add(node);
|
result.add(node);
|
||||||
for (Node subCell : subCells) {
|
for (Node subCell : subCells) {
|
||||||
|
@ -164,7 +164,7 @@ public abstract class SpatialPrefixTree {
|
||||||
}
|
}
|
||||||
result.addAll(subCells);
|
result.addAll(subCells);
|
||||||
} else {//a bottom level (i.e. detail level) optimization where all boxes intersect, so use parent cell.
|
} else {//a bottom level (i.e. detail level) optimization where all boxes intersect, so use parent cell.
|
||||||
node.setLeaf();
|
node.setLeaf();//the cell may not be strictly within but its close
|
||||||
result.add(node);
|
result.add(node);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in New Issue