SOLR-11731: LatLonPointSpatialField now supports docValue retrieval

Closes #323
This commit is contained in:
David Smiley 2018-03-17 12:21:53 -04:00
parent be8dca3c7b
commit 4b08efcf1c
5 changed files with 176 additions and 17 deletions

View File

@ -68,6 +68,9 @@ Optimizations
differential fetching now speeds up recovery times when full index replication is needed, but only
a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher)
* SOLR-11731: LatLonPointSpatialField can now decode points from docValues when stored=false docValues=true,
albeit with maximum precision of 1.33cm (Karthik Ramachandran, David Smiley)
Other Changes
----------------------

View File

@ -18,11 +18,13 @@
package org.apache.solr.schema;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.Objects;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.ValueSource;
@ -46,6 +48,8 @@ import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
import org.locationtech.spatial4j.shape.Shape;
import static java.math.RoundingMode.CEILING;
/**
* A spatial implementation based on Lucene's {@code LatLonPoint} and {@code LatLonDocValuesField}. The
* first is based on Lucene's "Points" API, which is a BKD Index. This field type is strictly limited to
@ -71,6 +75,26 @@ public class LatLonPointSpatialField extends AbstractSpatialFieldType implements
SchemaField schemaField = schema.getField(fieldName); // TODO change AbstractSpatialFieldType so we get schemaField?
return new LatLonPointSpatialStrategy(ctx, fieldName, schemaField.indexed(), schemaField.hasDocValues());
}
/**
* Decodes the docValues number into latitude and longitude components, formatting as "lat,lon".
* The encoding is governed by {@code LatLonDocValuesField}. The decimal output representation is reflective
* of the available precision.
* @param value Non-null; stored location field data
* @return Non-null; "lat, lon" with 6 decimal point precision
*/
public static String decodeDocValueToString(long value) {
final double latDouble = GeoEncodingUtils.decodeLatitude((int) (value >> 32));
final double lonDouble = GeoEncodingUtils.decodeLongitude((int) (value & 0xFFFFFFFFL));
// 7 decimal places maximizes our available precision to just over a centimeter; we have a test for it.
// CEILING round-trips (decode then re-encode then decode to get identical results). Others did not. It also
// reverses the "floor" that occurs when we encode.
BigDecimal latitudeDecoded = BigDecimal.valueOf(latDouble).setScale(7, CEILING);
BigDecimal longitudeDecoded = BigDecimal.valueOf(lonDouble).setScale(7, CEILING);
return latitudeDecoded.stripTrailingZeros().toPlainString() + ","
+ longitudeDecoded.stripTrailingZeros().toPlainString();
// return ((float)latDouble) + "," + ((float)lonDouble); crude but not quite as accurate
}
// TODO move to Lucene-spatial-extras once LatLonPoint & LatLonDocValuesField moves out of sandbox
public static class LatLonPointSpatialStrategy extends SpatialStrategy {

View File

@ -56,6 +56,7 @@ import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.SolrDocumentBase;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.LatLonPointSpatialField;
import org.apache.solr.schema.AbstractEnumField;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
@ -490,8 +491,16 @@ public class SolrDocumentFetcher {
long number = numericDv.nextValue();
Object value = decodeNumberFromDV(schemaField, number, true);
// return immediately if the number is not decodable, hence won't return an empty list.
if (value == null) return null;
else outValues.add(value);
if (value == null) {
return null;
}
// normally never true but LatLonPointSpatialField uses SORTED_NUMERIC even when single valued
else if (schemaField.multiValued() == false) {
return value;
}
else {
outValues.add(value);
}
}
assert outValues.size() > 0;
return outValues;
@ -515,6 +524,12 @@ public class SolrDocumentFetcher {
}
private Object decodeNumberFromDV(SchemaField schemaField, long value, boolean sortableNumeric) {
// note: This special-case is unfortunate; if we have to add any more than perhaps the fieldType should
// have this method so that specific field types can customize it.
if (schemaField.getType() instanceof LatLonPointSpatialField) {
return LatLonPointSpatialField.decodeDocValueToString(value);
}
if (schemaField.getType().getNumberType() == null) {
log.warn("Couldn't decode docValues for field: [{}], schemaField: [{}], numberType is unknown",
schemaField.getName(), schemaField);

View File

@ -86,8 +86,11 @@
<field name="llp_idx" type="llp" indexed="true" docValues="false" />
<field name="llp_dv" type="llp" indexed="false" docValues="true" />
<field name="llp_1_dv_st" type="llp" indexed="false" docValues="true" stored="true" multiValued="false"/>
<field name="llp_N_dv_st" type="llp" indexed="false" docValues="true" stored="true" multiValued="true"/>
<field name="llp_1_dv" type="llp" indexed="false" docValues="true" stored="false" multiValued="false" useDocValuesAsStored="false"/>
<field name="llp_N_dv" type="llp" indexed="false" docValues="true" stored="false" multiValued="true" useDocValuesAsStored="false"/>
<field name="llp_1_dv_dvasst" type="llp" indexed="false" docValues="true" stored="false" multiValued="false" useDocValuesAsStored="true"/>
<field name="llp_N_dv_dvasst" type="llp" indexed="false" docValues="true" stored="false" multiValued="true" useDocValuesAsStored="true"/>
<dynamicField name="bboxD_*" type="bbox" indexed="true"/>
<dynamicField name="str_*" type="string" indexed="true" stored="true"/>

View File

@ -16,15 +16,31 @@
*/
package org.apache.solr.search;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import com.carrotsearch.randomizedtesting.annotations.Repeat;
import org.apache.lucene.geo.GeoTestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.metrics.MetricsMap;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.SpatialUtils;
import org.apache.solr.util.TestUtils;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.distance.DistanceUtils;
import org.locationtech.spatial4j.shape.Point;
//Unlike TestSolr4Spatial, not parametrized / not generic.
public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
@ -117,24 +133,122 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
"q", "{!cache=false field f=" + fieldName + "}Intersects(" + polygonWKT + ")",
"sort", "id asc"), "/response/numFound==2");
}
@Test @Repeat(iterations = 10)
public void testLLPDecodeIsStableAndPrecise() throws Exception {
// test that LatLonPointSpatialField decode of docValue will round-trip (re-index then re-decode) to the same value
@SuppressWarnings({"resource", "IOResourceOpenedButNotSafelyClosed"})
SolrClient client = new EmbeddedSolrServer(h.getCore());// do NOT close it; it will close Solr
final String fld = "llp_1_dv_dvasst";
String ptOrig = GeoTestUtil.nextLatitude() + "," + GeoTestUtil.nextLongitude();
assertU(adoc("id", "0", fld, ptOrig));
assertU(commit());
// retrieve it (probably less precision
String ptDecoded1 = (String) client.query(params("q", "id:0")).getResults().get(0).get(fld);
// now write it back
assertU(adoc("id", "0", fld, ptDecoded1));
assertU(commit());
// retrieve it and hopefully the same
String ptDecoded2 = (String) client.query(params("q", "id:0")).getResults().get(0).get(fld);
assertEquals("orig:" + ptOrig, ptDecoded1, ptDecoded2);
// test that the representation is pretty accurate
final Point ptOrigObj = SpatialUtils.parsePoint(ptOrig, SpatialContext.GEO);
final Point ptDecodedObj = SpatialUtils.parsePoint(ptDecoded1, SpatialContext.GEO);
double deltaCentimeters = SpatialContext.GEO.calcDistance(ptOrigObj, ptDecodedObj) * DistanceUtils.DEG_TO_KM * 1000.0 * 100.0;
// //See javadocs of LatLonDocValuesField
// final Point absErrorPt = SpatialContext.GEO.getShapeFactory().pointXY(8.381903171539307E-8, 4.190951585769653E-8);
// double deltaCentimetersMax
// = SpatialContext.GEO.calcDistance(absErrorPt, 0,0) * DistanceUtils.DEG_TO_KM * 1000.0 * 100.0;
// // equals 1.0420371840922256 which is a bit lower than what we're able to do
assertTrue("deltaCm too high: " + deltaCentimeters, deltaCentimeters < 1.33);
}
@Test
public void testLatLonRetrieval() throws Exception {
assertU(adoc("id", "0",
"llp_1_dv_st", "-75,41",
"llp_1_dv", "-80,20",
"llp_1_dv_dvasst", "10,-30"));
final String ptHighPrecision = "40.2996543270,-74.0824956673";
final String ptLossOfPrecision = "40.2996544,-74.0824957"; // rounded version of the one above, losing precision
// "_1" is single, "_N" is multiValued
// "_dv" is docValues (otherwise not), "_dvasst" is useDocValuesAsStored (otherwise not)
// "_st" is stored" (otherwise not)
List<RetrievalCombo> combos = Arrays.asList(
new RetrievalCombo("llp_1_dv_st", ptHighPrecision),
new RetrievalCombo("llp_N_dv_st", Arrays.asList("-40,40", "-45,45")),
new RetrievalCombo("llp_N_dv_st", Arrays.asList("-40,40")), // multiValued but 1 value
new RetrievalCombo("llp_1_dv_dvasst", ptHighPrecision, ptLossOfPrecision),
// this one comes back in a different order since it gets sorted low to high
new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList("-40,40", "-45,45"), Arrays.asList("-45,45", "-40,40")),
new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList("-40,40")), // multiValued but 1 value
// edge cases. (note we sorted it as Lucene will internally)
new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList(
"-90,180", "-90,-180",
"0,0", "0,180", "0,-180",
"90,0", "90,180", "90,-180")),
new RetrievalCombo("llp_1_dv", ptHighPrecision, ptLossOfPrecision),
new RetrievalCombo("llp_N_dv", Arrays.asList("-45,45", "-40,40"))
);
Collections.shuffle(combos, random());
// add and commit
for (RetrievalCombo combo : combos) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", "" + combo.id);
for (String indexValue : combo.indexValues) {
doc.addField(combo.fieldName, indexValue);
}
assertU(adoc(doc));
if (TestUtils.rarely()) { // induce segments to potentially change internal behavior
assertU(commit());
}
}
assertU(commit());
assertJQ(req("q","*:*", "fl","*"),
"response/docs/[0]/llp_1_dv_st=='-75,41'",
// Right now we do not support decoding point value from dv field
"!response/docs/[0]/llp_1_dv=='-80,20'",
"!response/docs/[0]/llp_1_dv_dvasst=='10,-30'");
assertJQ(req("q","*:*", "fl","llp_1_dv_st, llp_1_dv, llp_1_dv_dvasst"),
"response/docs/[0]/llp_1_dv_st=='-75,41'",
// Even when these fields are specified, we won't return them
"!response/docs/[0]/llp_1_dv=='-80,20'",
"!response/docs/[0]/llp_1_dv_dvasst=='10,-30'");
// create an assertJQ assertion string, once for fl=*, another for when the field is listed
List<String> assertJQsFlListed = new ArrayList<>();
List<String> assertJQsFlStar = new ArrayList<>();
for (RetrievalCombo combo : combos) {
String expect = "response/docs/[" + combo.id + "]/" + combo.fieldName + "==" + combo.expectReturnJSON;
assertJQsFlListed.add(expect);
if (combo.fieldName.endsWith("_dv")) {
expect = "response/docs/[" + combo.id + "]=={'id':'" + combo.id + "'}"; // only the id, nothing else
}
assertJQsFlStar.add(expect);
}
// check
assertJQ(req("q","*:*", "sort", "id asc",
"fl","*"),
assertJQsFlStar.toArray(new String[0]));
assertJQ(req("q","*:*", "sort", "id asc",
"fl", "id," + combos.stream().map(c -> c.fieldName).collect(Collectors.joining(","))),
assertJQsFlListed.toArray(new String[0]));
}
private static class RetrievalCombo {
static int idCounter = 0;
final int id = idCounter++;
final String fieldName;
final List<String> indexValues;
final String expectReturnJSON; //or null if not expected in response
RetrievalCombo(String fieldName, List<String> indexValues) { this(fieldName, indexValues, indexValues);}
RetrievalCombo(String fieldName, List<String> indexValues, List<String> returnValues) {
this.fieldName = fieldName;
this.indexValues = indexValues;
this.expectReturnJSON = returnValues.stream().collect(Collectors.joining("', '", "['", "']"));
}
RetrievalCombo(String fieldName, String indexValue) { this(fieldName, indexValue, indexValue); }
RetrievalCombo(String fieldName, String indexValue, String returnValue) {
this.fieldName = fieldName;
this.indexValues = Collections.singletonList(indexValue);
this.expectReturnJSON = "'" + returnValue + "'";
}
}
private void testRptWithGeometryField(String fieldName) throws Exception {