SOLR-11731: LatLonPointSpatialField now supports docValue retrieval

Closes #323
This commit is contained in:
David Smiley 2018-03-17 12:21:53 -04:00
parent be8dca3c7b
commit 4b08efcf1c
5 changed files with 176 additions and 17 deletions

View File

@ -68,6 +68,9 @@ Optimizations
differential fetching now speeds up recovery times when full index replication is needed, but only differential fetching now speeds up recovery times when full index replication is needed, but only
a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher) a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher)
* SOLR-11731: LatLonPointSpatialField can now decode points from docValues when stored=false docValues=true,
albeit with maximum precision of 1.33cm (Karthik Ramachandran, David Smiley)
Other Changes Other Changes
---------------------- ----------------------

View File

@ -18,11 +18,13 @@
package org.apache.solr.schema; package org.apache.solr.schema;
import java.io.IOException; import java.io.IOException;
import java.math.BigDecimal;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.LatLonDocValuesField; import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.LatLonPoint; import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
@ -46,6 +48,8 @@ import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle; import org.locationtech.spatial4j.shape.Rectangle;
import org.locationtech.spatial4j.shape.Shape; import org.locationtech.spatial4j.shape.Shape;
import static java.math.RoundingMode.CEILING;
/** /**
* A spatial implementation based on Lucene's {@code LatLonPoint} and {@code LatLonDocValuesField}. The * A spatial implementation based on Lucene's {@code LatLonPoint} and {@code LatLonDocValuesField}. The
* first is based on Lucene's "Points" API, which is a BKD Index. This field type is strictly limited to * first is based on Lucene's "Points" API, which is a BKD Index. This field type is strictly limited to
@ -71,6 +75,26 @@ public class LatLonPointSpatialField extends AbstractSpatialFieldType implements
SchemaField schemaField = schema.getField(fieldName); // TODO change AbstractSpatialFieldType so we get schemaField? SchemaField schemaField = schema.getField(fieldName); // TODO change AbstractSpatialFieldType so we get schemaField?
return new LatLonPointSpatialStrategy(ctx, fieldName, schemaField.indexed(), schemaField.hasDocValues()); return new LatLonPointSpatialStrategy(ctx, fieldName, schemaField.indexed(), schemaField.hasDocValues());
} }
/**
* Decodes the docValues number into latitude and longitude components, formatting as "lat,lon".
* The encoding is governed by {@code LatLonDocValuesField}. The decimal output representation is reflective
* of the available precision.
* @param value Non-null; stored location field data
* @return Non-null; "lat, lon" with 6 decimal point precision
*/
public static String decodeDocValueToString(long value) {
final double latDouble = GeoEncodingUtils.decodeLatitude((int) (value >> 32));
final double lonDouble = GeoEncodingUtils.decodeLongitude((int) (value & 0xFFFFFFFFL));
// 7 decimal places maximizes our available precision to just over a centimeter; we have a test for it.
// CEILING round-trips (decode then re-encode then decode to get identical results). Others did not. It also
// reverses the "floor" that occurs when we encode.
BigDecimal latitudeDecoded = BigDecimal.valueOf(latDouble).setScale(7, CEILING);
BigDecimal longitudeDecoded = BigDecimal.valueOf(lonDouble).setScale(7, CEILING);
return latitudeDecoded.stripTrailingZeros().toPlainString() + ","
+ longitudeDecoded.stripTrailingZeros().toPlainString();
// return ((float)latDouble) + "," + ((float)lonDouble); crude but not quite as accurate
}
// TODO move to Lucene-spatial-extras once LatLonPoint & LatLonDocValuesField moves out of sandbox // TODO move to Lucene-spatial-extras once LatLonPoint & LatLonDocValuesField moves out of sandbox
public static class LatLonPointSpatialStrategy extends SpatialStrategy { public static class LatLonPointSpatialStrategy extends SpatialStrategy {

View File

@ -56,6 +56,7 @@ import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.SolrDocumentBase; import org.apache.solr.common.SolrDocumentBase;
import org.apache.solr.core.SolrConfig; import org.apache.solr.core.SolrConfig;
import org.apache.solr.schema.BoolField; import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.LatLonPointSpatialField;
import org.apache.solr.schema.AbstractEnumField; import org.apache.solr.schema.AbstractEnumField;
import org.apache.solr.schema.NumberType; import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
@ -490,8 +491,16 @@ public class SolrDocumentFetcher {
long number = numericDv.nextValue(); long number = numericDv.nextValue();
Object value = decodeNumberFromDV(schemaField, number, true); Object value = decodeNumberFromDV(schemaField, number, true);
// return immediately if the number is not decodable, hence won't return an empty list. // return immediately if the number is not decodable, hence won't return an empty list.
if (value == null) return null; if (value == null) {
else outValues.add(value); return null;
}
// normally never true but LatLonPointSpatialField uses SORTED_NUMERIC even when single valued
else if (schemaField.multiValued() == false) {
return value;
}
else {
outValues.add(value);
}
} }
assert outValues.size() > 0; assert outValues.size() > 0;
return outValues; return outValues;
@ -515,6 +524,12 @@ public class SolrDocumentFetcher {
} }
private Object decodeNumberFromDV(SchemaField schemaField, long value, boolean sortableNumeric) { private Object decodeNumberFromDV(SchemaField schemaField, long value, boolean sortableNumeric) {
// note: This special-case is unfortunate; if we have to add any more than perhaps the fieldType should
// have this method so that specific field types can customize it.
if (schemaField.getType() instanceof LatLonPointSpatialField) {
return LatLonPointSpatialField.decodeDocValueToString(value);
}
if (schemaField.getType().getNumberType() == null) { if (schemaField.getType().getNumberType() == null) {
log.warn("Couldn't decode docValues for field: [{}], schemaField: [{}], numberType is unknown", log.warn("Couldn't decode docValues for field: [{}], schemaField: [{}], numberType is unknown",
schemaField.getName(), schemaField); schemaField.getName(), schemaField);

View File

@ -86,8 +86,11 @@
<field name="llp_idx" type="llp" indexed="true" docValues="false" /> <field name="llp_idx" type="llp" indexed="true" docValues="false" />
<field name="llp_dv" type="llp" indexed="false" docValues="true" /> <field name="llp_dv" type="llp" indexed="false" docValues="true" />
<field name="llp_1_dv_st" type="llp" indexed="false" docValues="true" stored="true" multiValued="false"/> <field name="llp_1_dv_st" type="llp" indexed="false" docValues="true" stored="true" multiValued="false"/>
<field name="llp_N_dv_st" type="llp" indexed="false" docValues="true" stored="true" multiValued="true"/>
<field name="llp_1_dv" type="llp" indexed="false" docValues="true" stored="false" multiValued="false" useDocValuesAsStored="false"/> <field name="llp_1_dv" type="llp" indexed="false" docValues="true" stored="false" multiValued="false" useDocValuesAsStored="false"/>
<field name="llp_N_dv" type="llp" indexed="false" docValues="true" stored="false" multiValued="true" useDocValuesAsStored="false"/>
<field name="llp_1_dv_dvasst" type="llp" indexed="false" docValues="true" stored="false" multiValued="false" useDocValuesAsStored="true"/> <field name="llp_1_dv_dvasst" type="llp" indexed="false" docValues="true" stored="false" multiValued="false" useDocValuesAsStored="true"/>
<field name="llp_N_dv_dvasst" type="llp" indexed="false" docValues="true" stored="false" multiValued="true" useDocValuesAsStored="true"/>
<dynamicField name="bboxD_*" type="bbox" indexed="true"/> <dynamicField name="bboxD_*" type="bbox" indexed="true"/>
<dynamicField name="str_*" type="string" indexed="true" stored="true"/> <dynamicField name="str_*" type="string" indexed="true" stored="true"/>

View File

@ -16,15 +16,31 @@
*/ */
package org.apache.solr.search; package org.apache.solr.search;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import com.carrotsearch.randomizedtesting.annotations.Repeat;
import org.apache.lucene.geo.GeoTestUtil;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.metrics.MetricsMap; import org.apache.solr.metrics.MetricsMap;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.SpatialUtils;
import org.apache.solr.util.TestUtils;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.distance.DistanceUtils;
import org.locationtech.spatial4j.shape.Point;
//Unlike TestSolr4Spatial, not parametrized / not generic. //Unlike TestSolr4Spatial, not parametrized / not generic.
public class TestSolr4Spatial2 extends SolrTestCaseJ4 { public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
@ -117,24 +133,122 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
"q", "{!cache=false field f=" + fieldName + "}Intersects(" + polygonWKT + ")", "q", "{!cache=false field f=" + fieldName + "}Intersects(" + polygonWKT + ")",
"sort", "id asc"), "/response/numFound==2"); "sort", "id asc"), "/response/numFound==2");
} }
@Test @Repeat(iterations = 10)
public void testLLPDecodeIsStableAndPrecise() throws Exception {
// test that LatLonPointSpatialField decode of docValue will round-trip (re-index then re-decode) to the same value
@SuppressWarnings({"resource", "IOResourceOpenedButNotSafelyClosed"})
SolrClient client = new EmbeddedSolrServer(h.getCore());// do NOT close it; it will close Solr
final String fld = "llp_1_dv_dvasst";
String ptOrig = GeoTestUtil.nextLatitude() + "," + GeoTestUtil.nextLongitude();
assertU(adoc("id", "0", fld, ptOrig));
assertU(commit());
// retrieve it (probably less precision
String ptDecoded1 = (String) client.query(params("q", "id:0")).getResults().get(0).get(fld);
// now write it back
assertU(adoc("id", "0", fld, ptDecoded1));
assertU(commit());
// retrieve it and hopefully the same
String ptDecoded2 = (String) client.query(params("q", "id:0")).getResults().get(0).get(fld);
assertEquals("orig:" + ptOrig, ptDecoded1, ptDecoded2);
// test that the representation is pretty accurate
final Point ptOrigObj = SpatialUtils.parsePoint(ptOrig, SpatialContext.GEO);
final Point ptDecodedObj = SpatialUtils.parsePoint(ptDecoded1, SpatialContext.GEO);
double deltaCentimeters = SpatialContext.GEO.calcDistance(ptOrigObj, ptDecodedObj) * DistanceUtils.DEG_TO_KM * 1000.0 * 100.0;
// //See javadocs of LatLonDocValuesField
// final Point absErrorPt = SpatialContext.GEO.getShapeFactory().pointXY(8.381903171539307E-8, 4.190951585769653E-8);
// double deltaCentimetersMax
// = SpatialContext.GEO.calcDistance(absErrorPt, 0,0) * DistanceUtils.DEG_TO_KM * 1000.0 * 100.0;
// // equals 1.0420371840922256 which is a bit lower than what we're able to do
assertTrue("deltaCm too high: " + deltaCentimeters, deltaCentimeters < 1.33);
}
@Test @Test
public void testLatLonRetrieval() throws Exception { public void testLatLonRetrieval() throws Exception {
assertU(adoc("id", "0", final String ptHighPrecision = "40.2996543270,-74.0824956673";
"llp_1_dv_st", "-75,41", final String ptLossOfPrecision = "40.2996544,-74.0824957"; // rounded version of the one above, losing precision
"llp_1_dv", "-80,20",
"llp_1_dv_dvasst", "10,-30")); // "_1" is single, "_N" is multiValued
// "_dv" is docValues (otherwise not), "_dvasst" is useDocValuesAsStored (otherwise not)
// "_st" is stored" (otherwise not)
List<RetrievalCombo> combos = Arrays.asList(
new RetrievalCombo("llp_1_dv_st", ptHighPrecision),
new RetrievalCombo("llp_N_dv_st", Arrays.asList("-40,40", "-45,45")),
new RetrievalCombo("llp_N_dv_st", Arrays.asList("-40,40")), // multiValued but 1 value
new RetrievalCombo("llp_1_dv_dvasst", ptHighPrecision, ptLossOfPrecision),
// this one comes back in a different order since it gets sorted low to high
new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList("-40,40", "-45,45"), Arrays.asList("-45,45", "-40,40")),
new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList("-40,40")), // multiValued but 1 value
// edge cases. (note we sorted it as Lucene will internally)
new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList(
"-90,180", "-90,-180",
"0,0", "0,180", "0,-180",
"90,0", "90,180", "90,-180")),
new RetrievalCombo("llp_1_dv", ptHighPrecision, ptLossOfPrecision),
new RetrievalCombo("llp_N_dv", Arrays.asList("-45,45", "-40,40"))
);
Collections.shuffle(combos, random());
// add and commit
for (RetrievalCombo combo : combos) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", "" + combo.id);
for (String indexValue : combo.indexValues) {
doc.addField(combo.fieldName, indexValue);
}
assertU(adoc(doc));
if (TestUtils.rarely()) { // induce segments to potentially change internal behavior
assertU(commit());
}
}
assertU(commit()); assertU(commit());
assertJQ(req("q","*:*", "fl","*"),
"response/docs/[0]/llp_1_dv_st=='-75,41'", // create an assertJQ assertion string, once for fl=*, another for when the field is listed
// Right now we do not support decoding point value from dv field List<String> assertJQsFlListed = new ArrayList<>();
"!response/docs/[0]/llp_1_dv=='-80,20'", List<String> assertJQsFlStar = new ArrayList<>();
"!response/docs/[0]/llp_1_dv_dvasst=='10,-30'"); for (RetrievalCombo combo : combos) {
assertJQ(req("q","*:*", "fl","llp_1_dv_st, llp_1_dv, llp_1_dv_dvasst"), String expect = "response/docs/[" + combo.id + "]/" + combo.fieldName + "==" + combo.expectReturnJSON;
"response/docs/[0]/llp_1_dv_st=='-75,41'", assertJQsFlListed.add(expect);
// Even when these fields are specified, we won't return them if (combo.fieldName.endsWith("_dv")) {
"!response/docs/[0]/llp_1_dv=='-80,20'", expect = "response/docs/[" + combo.id + "]=={'id':'" + combo.id + "'}"; // only the id, nothing else
"!response/docs/[0]/llp_1_dv_dvasst=='10,-30'"); }
assertJQsFlStar.add(expect);
}
// check
assertJQ(req("q","*:*", "sort", "id asc",
"fl","*"),
assertJQsFlStar.toArray(new String[0]));
assertJQ(req("q","*:*", "sort", "id asc",
"fl", "id," + combos.stream().map(c -> c.fieldName).collect(Collectors.joining(","))),
assertJQsFlListed.toArray(new String[0]));
}
private static class RetrievalCombo {
static int idCounter = 0;
final int id = idCounter++;
final String fieldName;
final List<String> indexValues;
final String expectReturnJSON; //or null if not expected in response
RetrievalCombo(String fieldName, List<String> indexValues) { this(fieldName, indexValues, indexValues);}
RetrievalCombo(String fieldName, List<String> indexValues, List<String> returnValues) {
this.fieldName = fieldName;
this.indexValues = indexValues;
this.expectReturnJSON = returnValues.stream().collect(Collectors.joining("', '", "['", "']"));
}
RetrievalCombo(String fieldName, String indexValue) { this(fieldName, indexValue, indexValue); }
RetrievalCombo(String fieldName, String indexValue, String returnValue) {
this.fieldName = fieldName;
this.indexValues = Collections.singletonList(indexValue);
this.expectReturnJSON = "'" + returnValue + "'";
}
} }
private void testRptWithGeometryField(String fieldName) throws Exception { private void testRptWithGeometryField(String fieldName) throws Exception {