SOLR-7379: Spatial RptWithGeometrySpatialField (based on CompositeSpatialStrategy)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1680862 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2015-05-21 13:04:33 +00:00
parent f5cf37e4f3
commit 961960f1c2
7 changed files with 375 additions and 11 deletions

View File

@ -62,6 +62,18 @@ public class CompositeSpatialStrategy extends SpatialStrategy {
this.geometryStrategy = geometryStrategy;
}
public RecursivePrefixTreeStrategy getIndexStrategy() {
return indexStrategy;
}
public SerializedDVStrategy getGeometryStrategy() {
return geometryStrategy;
}
public boolean isOptimizePredicates() {
return optimizePredicates;
}
/** Set to false to NOT use optimized search predicates that avoid checking the geometry sometimes. Only useful for
* benchmarking. */
public void setOptimizePredicates(boolean optimizePredicates) {

View File

@ -212,6 +212,10 @@ New Features
* SOLR-7274: Pluggable authentication module in Solr. This defines an interface and a mechanism to create,
load, and use an Authentication plugin. (Noble Paul, Ishan Chattopadhyaya, Gregory Chanan, Anshum Gupta)
* SOLR-7379: (experimental) New spatial RptWithGeometrySpatialField, based on CompositeSpatialStrategy,
which blends RPT indexes for speed with serialized geometry for accuracy. Includes a Lucene segment based
in-memory shape cache. (David Smiley)
Bug Fixes
----------------------

View File

@ -47,10 +47,12 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.AbstractSpatialPrefixTreeFieldType;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.RptWithGeometrySpatialField;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.SpatialRecursivePrefixTreeFieldType;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.util.DistanceUnits;
import org.apache.solr.util.SpatialUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -75,13 +77,24 @@ public class SpatialHeatmapFacets {
//get the strategy from the field type
final SchemaField schemaField = rb.req.getSchema().getField(fieldName);
final FieldType type = schemaField.getType();
if (!(type instanceof AbstractSpatialPrefixTreeFieldType)) {
final PrefixTreeStrategy strategy;
final DistanceUnits distanceUnits;
// note: the two instanceof conditions is not ideal, versus one. If we start needing to add more then refactor.
if ((type instanceof AbstractSpatialPrefixTreeFieldType)) {
AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type;
strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName);
distanceUnits = rptType.getDistanceUnits();
} else if (type instanceof RptWithGeometrySpatialField) {
RptWithGeometrySpatialField rptSdvType = (RptWithGeometrySpatialField) type;
strategy = rptSdvType.getStrategy(fieldName).getIndexStrategy();
distanceUnits = rptSdvType.getDistanceUnits();
} else {
//FYI we support the term query one too but few people use that one
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "heatmap field needs to be of type "
+ SpatialRecursivePrefixTreeFieldType.class);
+ SpatialRecursivePrefixTreeFieldType.class + " or " + RptWithGeometrySpatialField.class);
}
AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type;
final PrefixTreeStrategy strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName);
final SpatialContext ctx = strategy.getSpatialContext();
//get the bbox (query Rectangle)
@ -106,7 +119,7 @@ public class SpatialHeatmapFacets {
final Double distErrObj = params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR);
if (distErrObj != null) {
// convert distErr units based on configured units
spatialArgs.setDistErr(distErrObj * rptType.getDistanceUnits().multiplierFromThisUnitToDegrees());
spatialArgs.setDistErr(distErrObj * distanceUnits.multiplierFromThisUnitToDegrees());
}
spatialArgs.setDistErrPct(params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR_PCT));
double distErr = spatialArgs.resolveDistErr(ctx, DEFAULT_DIST_ERR_PCT);

View File

@ -0,0 +1,249 @@
package org.apache.solr.schema;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.lang.ref.WeakReference;
import java.util.Map;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.shape.jts.JtsGeometry;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.spatial.composite.CompositeSpatialStrategy;
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
import org.apache.lucene.spatial.query.SpatialArgsParser;
import org.apache.lucene.spatial.serialized.SerializedDVStrategy;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.search.SolrCache;
/** A Solr Spatial FieldType based on {@link CompositeSpatialStrategy}.
* @lucene.experimental */
public class RptWithGeometrySpatialField extends AbstractSpatialFieldType<CompositeSpatialStrategy> {
public static final String DEFAULT_DIST_ERR_PCT = "0.15";
private SpatialRecursivePrefixTreeFieldType rptFieldType;
private SolrCore core;
@Override
protected void init(IndexSchema schema, Map<String, String> args) {
// Do NOT call super.init(); instead we delegate to an RPT field. Admittedly this is error prone.
//TODO Move this check to a call from AbstractSpatialFieldType.createFields() so the type can declare
// if it supports multi-valued or not. It's insufficient here; we can't see if you set multiValued on the field.
if (isMultiValued()) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Not capable of multiValued: " + getTypeName());
}
// Choose a better default distErrPct if not configured
if (args.containsKey(SpatialArgsParser.DIST_ERR_PCT) == false) {
args.put(SpatialArgsParser.DIST_ERR_PCT, DEFAULT_DIST_ERR_PCT);
}
rptFieldType = new SpatialRecursivePrefixTreeFieldType();
rptFieldType.setTypeName(getTypeName());
rptFieldType.properties = properties;
rptFieldType.init(schema, args);
rptFieldType.argsParser = argsParser = newSpatialArgsParser();
this.ctx = rptFieldType.ctx;
this.distanceUnits = rptFieldType.distanceUnits;
this.units = rptFieldType.units;
}
@Override
protected CompositeSpatialStrategy newSpatialStrategy(String fieldName) {
// We use the same field name for both sub-strategies knowing there will be no conflict for these two
RecursivePrefixTreeStrategy rptStrategy = rptFieldType.newSpatialStrategy(fieldName);
SerializedDVStrategy geomStrategy = new CachingSerializedDVStrategy(ctx, fieldName);
return new CompositeSpatialStrategy(fieldName, rptStrategy, geomStrategy);
}
@Override
public Analyzer getQueryAnalyzer() {
return rptFieldType.getQueryAnalyzer();
}
@Override
public Analyzer getIndexAnalyzer() {
return rptFieldType.getIndexAnalyzer();
}
// Most of the complexity of this field type is below, which is all about caching the shapes in a SolrCache
private static class CachingSerializedDVStrategy extends SerializedDVStrategy {
public CachingSerializedDVStrategy(SpatialContext ctx, String fieldName) {
super(ctx, fieldName);
}
@Override
public ValueSource makeShapeValueSource() {
return new CachingShapeValuesource(super.makeShapeValueSource(), getFieldName());
}
}
private static class CachingShapeValuesource extends ValueSource {
private final ValueSource targetValueSource;
private final String fieldName;
private CachingShapeValuesource(ValueSource targetValueSource, String fieldName) {
this.targetValueSource = targetValueSource;
this.fieldName = fieldName;
}
@Override
public String description() {
return "cache(" + targetValueSource.description() + ")";
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
CachingShapeValuesource that = (CachingShapeValuesource) o;
if (!targetValueSource.equals(that.targetValueSource)) return false;
return fieldName.equals(that.fieldName);
}
@Override
public int hashCode() {
int result = targetValueSource.hashCode();
result = 31 * result + fieldName.hashCode();
return result;
}
@SuppressWarnings("unchecked")
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
final FunctionValues targetFuncValues = targetValueSource.getValues(context, readerContext);
// The key is a pair of leaf reader with a docId relative to that reader. The value is a Map from field to Shape.
final SolrCache<PerSegCacheKey,Shape> cache =
SolrRequestInfo.getRequestInfo().getReq().getSearcher().getCache(CACHE_KEY_PREFIX + fieldName);
if (cache == null) {
return targetFuncValues; // no caching; no configured cache
}
return new FunctionValues() {
int docId = -1;
Shape shape = null;
private void setShapeFromDoc(int doc) {
if (docId == doc) {
return;
}
docId = doc;
//lookup in cache
PerSegCacheKey key = new PerSegCacheKey(readerContext.reader().getCoreCacheKey(), doc);
shape = cache.get(key);
if (shape == null) {
shape = (Shape) targetFuncValues.objectVal(doc);
if (shape != null) {
cache.put(key, shape);
}
} else {
//optimize shape on a cache hit if possible. This must be thread-safe and it is.
if (shape instanceof JtsGeometry) {
((JtsGeometry) shape).index(); // TODO would be nice if some day we didn't have to cast
}
}
}
// Use the cache for exists & objectVal;
@Override
public boolean exists(int doc) {
setShapeFromDoc(doc);
return shape != null;
}
@Override
public Object objectVal(int doc) {
setShapeFromDoc(doc);
return shape;
}
@Override
public Explanation explain(int doc) {
return targetFuncValues.explain(doc);
}
@Override
public String toString(int doc) {
return targetFuncValues.toString(doc);
}
};
}
}
public static final String CACHE_KEY_PREFIX = "perSegSpatialFieldCache_";//then field name
// Used in a SolrCache for the key
private static class PerSegCacheKey {
final WeakReference<Object> segCoreKeyRef;
final int docId;
final int hashCode;//cached because we can't necessarily compute after construction
private PerSegCacheKey(Object segCoreKey, int docId) {
this.segCoreKeyRef = new WeakReference<>(segCoreKey);
this.docId = docId;
this.hashCode = segCoreKey.hashCode() * 31 + docId;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
PerSegCacheKey that = (PerSegCacheKey) o;
if (docId != that.docId) return false;
//compare by referent not reference
Object segCoreKey = segCoreKeyRef.get();
if (segCoreKey == null) {
return false;
}
return segCoreKey.equals(that.segCoreKeyRef.get());
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public String toString() {
return "Key{seg=" + segCoreKeyRef.get() + ", docId=" + docId + '}';
}
}
}

View File

@ -50,6 +50,8 @@
<fieldType name="pointvector" class="solr.SpatialPointVectorFieldType"
numberType="tdouble" distanceUnits="degrees"/>
<fieldType name="srptgeom" class="solr.RptWithGeometrySpatialField" />
<fieldType name="bbox" class="solr.BBoxField"
numberType="tdoubleDV" distanceUnits="degrees" storeSubFields="false"/>
</types>
@ -64,6 +66,7 @@
<field name="srpt_packedquad" type="srpt_packedquad" multiValued="true" />
<field name="stqpt_geohash" type="stqpt_geohash" multiValued="true" />
<field name="pointvector" type="pointvector" />
<field name="srptgeom" type="srptgeom" />
<field name="bbox" type="bbox" />
<dynamicField name="bboxD_*" type="bbox" indexed="true" />

View File

@ -0,0 +1,36 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- a basic solrconfig that tests NoOpRegenerator -->
<config>
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
<dataDir>${solr.data.dir:}</dataDir>
<xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
<requestHandler name="standard" class="solr.StandardRequestHandler" />
<requestHandler name="/update" class="solr.UpdateRequestHandler" />
<query>
<cache name="perSegSpatialFieldCache_srptgeom"
class="solr.LRUCache"
size="3"
initialSize="0"
autowarmCount="100%"
regenerator="solr.NoOpRegenerator"/>
</query>
</config>

View File

@ -19,6 +19,8 @@ package org.apache.solr.search;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
@ -28,7 +30,7 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml", "schema-spatial.xml");
initCore("solrconfig-spatial.xml", "schema-spatial.xml");
}
@Override
@ -72,19 +74,19 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
);
//area2D
assertJQ(req("q", "{!field f="+fieldName+" filter=false score=area2D}" +
assertJQ(req("q", "{!field f=" + fieldName + " filter=false score=area2D}" +
"Intersects(ENVELOPE(0,0,12,12))",//pt
"fl", "id,score",
"debug", "results"),//explain info
"/response/docs/[0]/id=='1'" ,
"/response/docs/[0]/id=='1'",
"/response/docs/[0]/score==" + (30f * 5f) + "]"//150
);
//area (not 2D)
assertJQ(req("q", "{!field f="+fieldName+" filter=false score=area}" +
assertJQ(req("q", "{!field f=" + fieldName + " filter=false score=area}" +
"Intersects(ENVELOPE(0,0,12,12))",//pt
"fl", "id,score",
"debug", "results"),//explain info
"/response/docs/[0]/id=='1'" ,
"/response/docs/[0]/id=='1'",
"/response/docs/[0]/score==" + 146.39793f + "]"//a bit less than 150
);
}
@ -94,8 +96,53 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
String fieldName = "bbox";
assertQEx("expect friendly error message",
"area2D",
req("{!field f="+fieldName+" filter=false score=bogus}Intersects(ENVELOPE(0,0,12,12))"),
req("{!field f=" + fieldName + " filter=false score=bogus}Intersects(ENVELOPE(0,0,12,12))"),
SolrException.ErrorCode.BAD_REQUEST);
}
@Test
public void testRptWithGeometryField() throws Exception {
String fieldName = "srptgeom"; //note: fails with "srpt_geohash" because it's not as precise
assertU(adoc("id", "0", fieldName, "ENVELOPE(-10, 20, 15, 10)"));
assertU(adoc("id", "1", fieldName, "BUFFER(POINT(-10 15), 5)"));//circle at top-left corner
assertU(optimize());// one segment.
assertU(commit());
// Search to the edge but not quite touching the indexed envelope of id=0. It requires geom validation to
// eliminate id=0. id=1 is found and doesn't require validation. cache=false means no query cache.
final SolrQueryRequest sameReq = req(
"q", "{!cache=false field f=" + fieldName + "}Intersects(ENVELOPE(-20, -10.0001, 30, 15.0001))",
"sort", "id asc");
assertJQ(sameReq, "/response/numFound==1", "/response/docs/[0]/id=='1'");
// The tricky thing is verifying the cache works correctly...
SolrCache cache = (SolrCache) h.getCore().getInfoRegistry().get("perSegSpatialFieldCache_srptgeom");
assertEquals("1", cache.getStatistics().get("cumulative_inserts").toString());
assertEquals("0", cache.getStatistics().get("cumulative_hits").toString());
// Repeat the query earlier
assertJQ(sameReq, "/response/numFound==1", "/response/docs/[0]/id=='1'");
assertEquals("1", cache.getStatistics().get("cumulative_hits").toString());
assertEquals("1 segment",
1, ((SolrIndexSearcher) h.getCore().getInfoRegistry().get("searcher")).getRawReader().leaves().size());
// add new segment
assertU(adoc("id", "3"));
assertU(commit()); // sometimes merges (to one seg), sometimes won't
boolean newSeg =
(((SolrIndexSearcher)h.getCore().getInfoRegistry().get("searcher")).getRawReader().leaves().size() > 1);
// can still find the same document
assertJQ(sameReq, "/response/numFound==1", "/response/docs/[0]/id=='1'");
// when there are new segments, we accumulate another hit. This tests the cache was not blown away on commit.
assertEquals(newSeg ? "2" : "1", cache.getStatistics().get("cumulative_hits").toString());
// Now try to see if heatmaps work:
assertJQ(req("q", "*:*", "facet", "true", FacetParams.FACET_HEATMAP, fieldName, "json.nl", "map"),
"/facet_counts/facet_heatmaps/" + fieldName + "/minX==-180.0");
}
}