LUCENE-4223 spatial docs: overview.html, SpatialStrategy, and added SpatialExample.java sample

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1364782 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2012-07-23 20:19:02 +00:00
parent c0f8cd69a8
commit 618c38fd98
3 changed files with 241 additions and 4 deletions

View File

@ -28,10 +28,26 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.spatial.query.SpatialArgs;
/**
* The SpatialStrategy encapsulates an approach to indexing and searching based on shapes.
* The SpatialStrategy encapsulates an approach to indexing and searching based
* on shapes.
* <p/>
* Note that a SpatialStrategy is not involved with the Lucene stored field values of shapes, which is
* immaterial to indexing & search.
* Different implementations will support different features. A strategy should
* document these common elements:
* <ul>
* <li>Can it index more than one shape per field?</li>
* <li>What types of shapes can be indexed?</li>
* <li>What types of query shapes can be used?</li>
* <li>What types of query operations are supported?
* This might vary per shape.</li>
* <li>Are there caches? Under what circumstances are they used?
* Roughly how big are they? Is it segmented by Lucene segments, such as is
* done by the Lucene {@link org.apache.lucene.search.FieldCache} and
* {@link org.apache.lucene.index.DocValues} (ideal) or is it for the entire
* index?
* </ul>
* <p/>
* Note that a SpatialStrategy is not involved with the Lucene stored field
* values of shapes, which is immaterial to indexing & search.
* <p/>
* Thread-safe.
*

View File

@ -16,8 +16,49 @@
-->
<html>
<head>
<title>Apache Lucene Spatial Strategies</title>
<title>Apache Lucene Spatial Module</title>
</head>
<body>
<h1>The Spatial Module for Apache Lucene</h1>
<p>
The spatial module is new is Lucene 4, replacing the old contrib module
that came before it. The principle interface to the module is
a {@link org.apache.lucene.spatial.SpatialStrategy}
which encapsulates an approach to indexing and searching
based on shapes. Different Strategies have different features and
performance profiles, which are documented at each Strategy class level.
</p>
<p>
For some sample code showing how to use the API, see SpatialExample.java in
the tests.
</p>
<p>
The spatial module uses
<a href="https://github.com/spatial4j/spatial4j">Spatial4j</a>
heavily. Spatial4j is an ASL licensed library with these capabilities:
<ul>
<li>Provides shape implementations, namely point, rectangle,
and circle. Both geospatial contexts and plain 2D Euclidean/Cartesian contexts
are supported.
With an additional dependency, it adds polygon and other geometry shape
support via integration with
<a href="http://sourceforge.net/projects/jts-topo-suite/">JTS Topology Suite</a>.
This includes dateline wrap support.</li>
<li>Shape parsing and serialization, including
<a href="http://en.wikipedia.org/wiki/Well-known_text">Well-Known Text (WKT)</a>
(via JTS).</li>
<li>Distance and other spatial related math calculations.</li>
</ul>
</p>
<p>
Historical note: The new spatial module was once known as
Lucene Spatial Playground (LSP) as an external project. In ~March 2012, LSP
split into this new module as part of Lucene and Spatial4j externally. A
large chunk of the LSP implementation originated as SOLR-2155 which uses
trie/prefix-tree algorithms with a geohash encoding.
</p>
</body>
</html>

View File

@ -0,0 +1,180 @@
package org.apache.lucene.spatial;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.carrotsearch.randomizedtesting.RandomizedTest;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.context.simple.SimpleSpatialContext;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialArgsParser;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import java.io.IOException;
/**
* This class serves as example code to show how to use the Lucene spatial
* module.
*/
public class SpatialExample extends RandomizedTest {
public static void main(String[] args) throws IOException {
new SpatialExample().test();
}
public void test() throws IOException {
init();
indexPoints();
search();
}
/**
* The Spatial4j {@link SpatialContext} is a sort of global-ish singleton
* needed by Lucene spatial. It's a facade to the rest of Spatial4j, acting
* as a factory for {@link Shape}s and provides access to reading and writing
* them from Strings.
*/
private SpatialContext ctx;//"ctx" is the conventional variable name
/**
* The Lucene spatial {@link SpatialStrategy} encapsulates an approach to
* indexing and searching shapes, and providing relevancy scores for them.
* It's a simple API to unify different approaches.
* <p />
* Note that these are initialized with a field name.
*/
private SpatialStrategy strategy;
private Directory directory;
protected void init() {
//Typical geospatial context with kilometer units.
// These can also be constructed from a factory: SpatialContextFactory
this.ctx = SimpleSpatialContext.GEO_KM;
int maxLevels = 10;//results in sub-meter precision for geohash
//TODO demo lookup by detail distance
// This can also be constructed from a factory: SpatialPrefixTreeFactory
SpatialPrefixTree grid = new GeohashPrefixTree(ctx, maxLevels);
this.strategy = new RecursivePrefixTreeStrategy(grid, "myGeoField");
this.directory = new RAMDirectory();
}
private void indexPoints() throws IOException {
IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_50,null);
IndexWriter indexWriter = new IndexWriter(directory, iwConfig);
//Spatial4j is x-y order for arguments
indexWriter.addDocument(newSampleDocument(
2, ctx.makePoint(-80.93, 33.77)));
//When parsing a string to a shape, the presence of a comma means it's y-x
// order (lon, lat)
indexWriter.addDocument(newSampleDocument(
4, ctx.readShape("-50.7693246, 60.9289094")));
indexWriter.addDocument(newSampleDocument(
20, ctx.makePoint(0.1,0.1), ctx.makePoint(0, 0)));
indexWriter.close();
}
private Document newSampleDocument(int id, Shape... shapes) {
Document doc = new Document();
doc.add(new IntField("id", id, Field.Store.YES));
//Potentially more than one shape in this field is supported by some
// strategies; see the javadocs of the SpatialStrategy impl to see.
for (Shape shape : shapes) {
for (IndexableField f : strategy.createIndexableFields(shape)) {
doc.add(f);
}
//store it too; the format is up to you
doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape)));
}
return doc;
}
private void search() throws IOException {
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
Sort idSort = new Sort(new SortField("id", SortField.Type.INT));
//--Filter by circle (<= distance from a point)
{
//Search with circle
//note: SpatialArgs can be parsed from a string
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,
ctx.makeCircle(-80.0, 33.0, 200));//200km (since km == ctx.getDistanceUnits
Filter filter = strategy.makeFilter(args);
TopDocs docs = indexSearcher.search(new MatchAllDocsQuery(), filter, 10, idSort);
assertDocMatchedIds(indexSearcher, docs, 2);
}
//--Match all, order by distance
{
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,//doesn't matter
ctx.makePoint(60, -50));
ValueSource valueSource = strategy.makeValueSource(args);//the distance
Sort reverseDistSort = new Sort(valueSource.getSortField(false)).rewrite(indexSearcher);//true=asc dist
TopDocs docs = indexSearcher.search(new MatchAllDocsQuery(), 10, reverseDistSort);
assertDocMatchedIds(indexSearcher, docs, 4, 20, 2);
}
//demo arg parsing
{
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,
ctx.makeCircle(-80.0, 33.0, 200));
SpatialArgs args2 = new SpatialArgsParser().parse("Intersects(Circle(33,-80 d=200))", ctx);
assertEquals(args.toString(),args2.toString());
}
indexReader.close();
}
private void assertDocMatchedIds(IndexSearcher indexSearcher, TopDocs docs, int... ids) throws IOException {
int[] gotIds = new int[docs.totalHits];
for (int i = 0; i < gotIds.length; i++) {
gotIds[i] = indexSearcher.doc(docs.scoreDocs[i].doc).getField("id").numericValue().intValue();
}
assertArrayEquals(ids,gotIds);
}
}