From 84c5a4addeb8a160ea19c6e0e42d861f97f5f358 Mon Sep 17 00:00:00 2001 From: David Wayne Smiley Date: Mon, 24 Sep 2012 04:46:41 +0000 Subject: [PATCH] Document the SpatialStrategies consistently and more thoroughly. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1389204 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/spatial/SpatialStrategy.java | 14 ++--- .../lucene/spatial/bbox/BBoxStrategy.java | 24 +++++++- .../spatial/prefix/PrefixTreeStrategy.java | 57 +++++++++++++++++-- .../prefix/RecursivePrefixTreeFilter.java | 18 +++--- .../prefix/RecursivePrefixTreeStrategy.java | 13 ++++- .../prefix/TermQueryPrefixTreeStrategy.java | 16 +++--- .../prefix/tree/SpatialPrefixTree.java | 11 ++-- .../spatial/vector/PointVectorStrategy.java | 29 ++++++++-- 8 files changed, 140 insertions(+), 42 deletions(-) diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/SpatialStrategy.java b/lucene/spatial/src/java/org/apache/lucene/spatial/SpatialStrategy.java index 2fb173518fc..c5c33db8047 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/SpatialStrategy.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/SpatialStrategy.java @@ -40,12 +40,12 @@ import org.apache.lucene.spatial.query.SpatialArgs; *
  • What types of query shapes can be used?
  • *
  • What types of query operations are supported? * This might vary per shape.
  • - *
  • Are there caches? Under what circumstances are they used? - * Roughly how big are they? Is it segmented by Lucene segments, such as is - * done by the Lucene {@link org.apache.lucene.search.FieldCache} and - * {@link org.apache.lucene.index.DocValues} (ideal) or is it for the entire - * index? + *
  • Does it use the {@link org.apache.lucene.search.FieldCache}, {@link + * org.apache.lucene.index.DocValues} or some other type of cache? When? * + * If a strategy only supports certain shapes at index or query time, then in + * general it will throw an exception if given an incompatible one. It will not + * be coerced into compatibility. *

    * Note that a SpatialStrategy is not involved with the Lucene stored field * values of shapes, which is immaterial to indexing & search. @@ -85,7 +85,7 @@ public abstract class SpatialStrategy { } /** - * Returns the IndexableField(s) from the shape that are to be + * Returns the IndexableField(s) from the {@code shape} that are to be * added to the {@link org.apache.lucene.document.Document}. These fields * are expected to be marked as indexed and not stored. *

    @@ -139,7 +139,7 @@ public abstract class SpatialStrategy { /** * Returns a ValueSource with values ranging from 1 to 0, depending inversely * on the distance from {@link #makeDistanceValueSource(com.spatial4j.core.shape.Point)}. - * The formula is c/(d + c) where 'd' is the distance and 'c' is + * The formula is {@code c/(d + c)} where 'd' is the distance and 'c' is * one tenth the distance to the farthest edge from the center. Thus the * scores will be 1 for indexed points at the center of the query shape and as * low as ~0.1 at its furthest edges. diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java b/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java index e723890efc4..7b2204ff478 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java @@ -43,8 +43,28 @@ import org.apache.lucene.spatial.query.UnsupportedSpatialOperation; /** - * Based on GeoPortal's - * SpatialClauseAdapter. + * A SpatialStrategy for indexing and searching Rectangles by storing its + * coordinates in numeric fields. It supports all {@link SpatialOperation}s and + * has a custom overlap relevancy. It is based on GeoPortal's SpatialClauseAdapter. + * + *

    Characteristics:

    + * + * + *

    Implementation:

    + * This uses 4 double fields for minX, maxX, minY, maxY + * and a boolean to mark a dateline cross. Depending on the particular {@link + * SpatialOperation}s, there is a variety of {@link NumericRangeQuery}s to be + * done. + * The {@link #makeBBoxAreaSimilarityValueSource(com.spatial4j.core.shape.Rectangle)} + * works by calculating the query bbox overlap percentage against the indexed + * shape overlap percentage. The indexed shape's coordinates are retrieved from + * the {@link org.apache.lucene.search.FieldCache}. * * @lucene.experimental */ diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java index 7c31d3e2926..60525604dee 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java @@ -37,8 +37,41 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; /** - * Abstract SpatialStrategy which provides common functionality for those - * Strategys which use {@link SpatialPrefixTree}s + * An abstract SpatialStrategy based on {@link SpatialPrefixTree}. The two + * subclasses are {@link RecursivePrefixTreeStrategy} and {@link + * TermQueryPrefixTreeStrategy}. This strategy is most effective as a fast + * approximate spatial search filter. + * + *

    Characteristics:

    + * + * + *

    Implementation:

    + * The {@link SpatialPrefixTree} does most of the work, for example returning + * a list of terms representing grids of various sizes for a supplied shape. + * An important + * configuration item is {@link #setDistErrPct(double)} which balances + * shape precision against scalability. See those javadocs. + * * @lucene.internal */ public abstract class PrefixTreeStrategy extends SpatialStrategy { @@ -52,7 +85,12 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy { this.grid = grid; } - /** Used in the in-memory ValueSource as a default ArrayList length for this field's array of values, per doc. */ + /** + * A memory hint used by {@link #makeDistanceValueSource(com.spatial4j.core.shape.Point)} + * for how big the initial size of each Document's array should be. The + * default is 2. Set this to slightly more than the default expected number + * of points per document. + */ public void setDefaultFieldValuesArrayLen(int defaultFieldValuesArrayLen) { this.defaultFieldValuesArrayLen = defaultFieldValuesArrayLen; } @@ -62,8 +100,14 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy { } /** - * The default measure of shape precision affecting indexed and query shapes. - * Specific shapes at index and query time can use something different. + * The default measure of shape precision affecting shapes at index and query + * times. Points don't use this as they are always indexed at the configured + * maximum precision ({@link org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree#getMaxLevels()}); + * this applies to all other shapes. Specific shapes at index and query time + * can use something different than this default value. If you don't set a + * default then the default is {@link SpatialArgs#DEFAULT_DISTERRPCT} -- + * 2.5%. + * * @see org.apache.lucene.spatial.query.SpatialArgs#getDistErrPct() */ public void setDistErrPct(double distErrPct) { @@ -81,7 +125,8 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy { List cells = grid.getNodes(shape, detailLevel, true);//true=intermediates cells //If shape isn't a point, add a full-resolution center-point so that // PointPrefixTreeFieldCacheProvider has the center-points. - // TODO index each center of a multi-point? Yes/no? + //TODO index each point of a multi-point or other aggregate. + //TODO remove this once support for a distance ValueSource is removed. if (!(shape instanceof Point)) { Point ctr = shape.getCenter(); //TODO should be smarter; don't index 2 tokens for this in CellTokenStream. Harmless though. diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeFilter.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeFilter.java index 92ac72a36a8..3023070ad8c 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeFilter.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeFilter.java @@ -34,13 +34,15 @@ import java.io.IOException; import java.util.LinkedList; /** - * Performs a spatial intersection filter between a query shape and a field indexed with {@link SpatialPrefixTree}, a Trie. - * SPT yields terms (grids) at length 1 and at greater lengths corresponding to greater precisions. - * This filter recursively traverses each grid length and uses methods on {@link Shape} to efficiently know - * that all points at a prefix fit in the shape or not to either short-circuit unnecessary traversals or to efficiently - * load all enclosed points. If no indexed data lies in a portion of the shape - * then that portion of the query shape is quickly passed over without - * decomposing the shape unnecessarily. + * Performs a spatial intersection filter between a query shape and a field + * indexed with {@link SpatialPrefixTree}, a Trie. SPT yields terms (grids) at + * length 1 (aka "Level 1") and at greater lengths corresponding to greater + * precisions. This filter recursively traverses each grid length and uses + * methods on {@link Shape} to efficiently know that all points at a prefix fit + * in the shape or not to either short-circuit unnecessary traversals or to + * efficiently load all enclosed points. If no indexed data lies in a portion + * of the shape then that portion of the query shape is quickly passed over + * without decomposing the shape unnecessarily. * * @lucene.internal */ @@ -167,7 +169,7 @@ RE "scan" threshold: @Override public String toString() { - return "GeoFilter{fieldName='" + fieldName + '\'' + ", shape=" + queryShape + '}'; + return getClass().getSimpleName()+"{fieldName='" + fieldName + '\'' + ", shape=" + queryShape + '}'; } @Override diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java index dcbb21525b4..1615b457fd2 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java @@ -25,7 +25,11 @@ import org.apache.lucene.spatial.query.SpatialOperation; import org.apache.lucene.spatial.query.UnsupportedSpatialOperation; /** - * Based on {@link RecursivePrefixTreeFilter}. + * A {@link PrefixTreeStrategy} which uses {@link RecursivePrefixTreeFilter}. + * This strategy has support for searching non-point shapes (note: not tested). + * Even a query shape with distErrPct=0 (fully precise to the grid) should have + * good performance for typical data, unless there is a lot of indexed data + * coincident with the shape's edge. * * @lucene.experimental */ @@ -38,6 +42,13 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy { prefixGridScanLevel = grid.getMaxLevels() - 4;//TODO this default constant is dependent on the prefix grid size } + /** + * Sets the grid level [1-maxLevels] at which indexed terms are scanned brute-force + * instead of by grid decomposition. By default this is maxLevels - 4. The + * final level, maxLevels, is always scanned. + * + * @param prefixGridScanLevel 1 to maxLevels + */ public void setPrefixGridScanLevel(int prefixGridScanLevel) { //TODO if negative then subtract from maxlevels this.prefixGridScanLevel = prefixGridScanLevel; diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java index c2f260c0e67..8eb14eee00a 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java @@ -30,14 +30,14 @@ import org.apache.lucene.spatial.query.UnsupportedSpatialOperation; import java.util.List; /** - * A basic implementation of {@link PrefixTreeStrategy} using a large - * {@link TermsFilter} of all the nodes from - * {@link SpatialPrefixTree#getNodes(com.spatial4j.core.shape.Shape, int, boolean)}. - * It only supports the search of indexed Point shapes. - *

    - * The precision of query shapes is an important factor in using this Strategy. - * If the precision is too precise then it will result in many terms which will - * amount to a slower query. + * A basic implementation of {@link PrefixTreeStrategy} using a large {@link + * TermsFilter} of all the nodes from {@link SpatialPrefixTree#getNodes(com.spatial4j.core.shape.Shape, + * int, boolean)}. It only supports the search of indexed Point shapes. + *

    + * The precision of query shapes (distErrPct) is an important factor in using + * this Strategy. If the precision is too precise then it will result in many + * terms which will amount to a slower query. + * * @lucene.experimental */ public class TermQueryPrefixTreeStrategy extends PrefixTreeStrategy { diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java index fd35c8081cc..4e80364c2ca 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java @@ -28,10 +28,13 @@ import java.util.Collections; import java.util.List; /** - * A spatial Prefix Tree, or Trie, which decomposes shapes into prefixed strings at variable lengths corresponding to - * variable precision. Each string corresponds to a spatial region. - * - * Implementations of this class should be thread-safe and immutable once initialized. + * A spatial Prefix Tree, or Trie, which decomposes shapes into prefixed strings + * at variable lengths corresponding to variable precision. Each string + * corresponds to a rectangular spatial region. This approach is + * also referred to "Grids", "Tiles", and "Spatial Tiers". + *

    + * Implementations of this class should be thread-safe and immutable once + * initialized. * * @lucene.experimental */ diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java b/lucene/spatial/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java index 19b0721577a..9126226a6c8 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java @@ -44,14 +44,31 @@ import org.apache.lucene.spatial.util.CachingDoubleValueSource; import org.apache.lucene.spatial.util.ValueSourceFilter; /** - * Simple {@link SpatialStrategy} which represents Points in two numeric {@link DoubleField}s. + * Simple {@link SpatialStrategy} which represents Points in two numeric {@link + * DoubleField}s. The Strategy's best feature is decent distance sort. * - * Note, currently only Points can be indexed by this Strategy. At query time, the bounding - * box of the given Shape is used to create {@link NumericRangeQuery}s to efficiently - * find Points within the Shape. + *

    Characteristics:

    + *
      + *
    • Only indexes points; just one per field value.
    • + *
    • Can query by a rectangle or circle.
    • + *
    • {@link + * org.apache.lucene.spatial.query.SpatialOperation#Intersects} and {@link + * SpatialOperation#IsWithin} is supported.
    • + *
    • Uses the FieldCache for + * {@link #makeDistanceValueSource(com.spatial4j.core.shape.Point)} and for + * searching with a Circle.
    • + *
    * - * Due to the simple use of numeric fields, this Strategy provides support for sorting by - * distance through {@link DistanceValueSource} + *

    Implementation:

    + * This is a simple Strategy. Search works with {@link NumericRangeQuery}s on + * an x & y pair of fields. A Circle query does the same bbox query but adds a + * ValueSource filter on + * {@link #makeDistanceValueSource(com.spatial4j.core.shape.Point)}. + *

    + * One performance shortcoming with this strategy is that a scenario involving + * both a search using a Circle and sort will result in calculations for the + * spatial distance being done twice -- once for the filter and second for the + * sort. * * @lucene.experimental */