mirror of https://github.com/apache/lucene.git
SOLR-11731: LatLonPointSpatialField now supports docValue retrieval
Closes #323
This commit is contained in:
parent
be8dca3c7b
commit
4b08efcf1c
|
@ -68,6 +68,9 @@ Optimizations
|
|||
differential fetching now speeds up recovery times when full index replication is needed, but only
|
||||
a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher)
|
||||
|
||||
* SOLR-11731: LatLonPointSpatialField can now decode points from docValues when stored=false docValues=true,
|
||||
albeit with maximum precision of 1.33cm (Karthik Ramachandran, David Smiley)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -18,11 +18,13 @@
|
|||
package org.apache.solr.schema;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.math.BigDecimal;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LatLonDocValuesField;
|
||||
import org.apache.lucene.document.LatLonPoint;
|
||||
import org.apache.lucene.geo.GeoEncodingUtils;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
|
@ -46,6 +48,8 @@ import org.locationtech.spatial4j.shape.Point;
|
|||
import org.locationtech.spatial4j.shape.Rectangle;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
|
||||
import static java.math.RoundingMode.CEILING;
|
||||
|
||||
/**
|
||||
* A spatial implementation based on Lucene's {@code LatLonPoint} and {@code LatLonDocValuesField}. The
|
||||
* first is based on Lucene's "Points" API, which is a BKD Index. This field type is strictly limited to
|
||||
|
@ -71,6 +75,26 @@ public class LatLonPointSpatialField extends AbstractSpatialFieldType implements
|
|||
SchemaField schemaField = schema.getField(fieldName); // TODO change AbstractSpatialFieldType so we get schemaField?
|
||||
return new LatLonPointSpatialStrategy(ctx, fieldName, schemaField.indexed(), schemaField.hasDocValues());
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes the docValues number into latitude and longitude components, formatting as "lat,lon".
|
||||
* The encoding is governed by {@code LatLonDocValuesField}. The decimal output representation is reflective
|
||||
* of the available precision.
|
||||
* @param value Non-null; stored location field data
|
||||
* @return Non-null; "lat, lon" with 6 decimal point precision
|
||||
*/
|
||||
public static String decodeDocValueToString(long value) {
|
||||
final double latDouble = GeoEncodingUtils.decodeLatitude((int) (value >> 32));
|
||||
final double lonDouble = GeoEncodingUtils.decodeLongitude((int) (value & 0xFFFFFFFFL));
|
||||
// 7 decimal places maximizes our available precision to just over a centimeter; we have a test for it.
|
||||
// CEILING round-trips (decode then re-encode then decode to get identical results). Others did not. It also
|
||||
// reverses the "floor" that occurs when we encode.
|
||||
BigDecimal latitudeDecoded = BigDecimal.valueOf(latDouble).setScale(7, CEILING);
|
||||
BigDecimal longitudeDecoded = BigDecimal.valueOf(lonDouble).setScale(7, CEILING);
|
||||
return latitudeDecoded.stripTrailingZeros().toPlainString() + ","
|
||||
+ longitudeDecoded.stripTrailingZeros().toPlainString();
|
||||
// return ((float)latDouble) + "," + ((float)lonDouble); crude but not quite as accurate
|
||||
}
|
||||
|
||||
// TODO move to Lucene-spatial-extras once LatLonPoint & LatLonDocValuesField moves out of sandbox
|
||||
public static class LatLonPointSpatialStrategy extends SpatialStrategy {
|
||||
|
|
|
@ -56,6 +56,7 @@ import org.apache.lucene.util.NumericUtils;
|
|||
import org.apache.solr.common.SolrDocumentBase;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.schema.BoolField;
|
||||
import org.apache.solr.schema.LatLonPointSpatialField;
|
||||
import org.apache.solr.schema.AbstractEnumField;
|
||||
import org.apache.solr.schema.NumberType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
|
@ -490,8 +491,16 @@ public class SolrDocumentFetcher {
|
|||
long number = numericDv.nextValue();
|
||||
Object value = decodeNumberFromDV(schemaField, number, true);
|
||||
// return immediately if the number is not decodable, hence won't return an empty list.
|
||||
if (value == null) return null;
|
||||
else outValues.add(value);
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
// normally never true but LatLonPointSpatialField uses SORTED_NUMERIC even when single valued
|
||||
else if (schemaField.multiValued() == false) {
|
||||
return value;
|
||||
}
|
||||
else {
|
||||
outValues.add(value);
|
||||
}
|
||||
}
|
||||
assert outValues.size() > 0;
|
||||
return outValues;
|
||||
|
@ -515,6 +524,12 @@ public class SolrDocumentFetcher {
|
|||
}
|
||||
|
||||
private Object decodeNumberFromDV(SchemaField schemaField, long value, boolean sortableNumeric) {
|
||||
// note: This special-case is unfortunate; if we have to add any more than perhaps the fieldType should
|
||||
// have this method so that specific field types can customize it.
|
||||
if (schemaField.getType() instanceof LatLonPointSpatialField) {
|
||||
return LatLonPointSpatialField.decodeDocValueToString(value);
|
||||
}
|
||||
|
||||
if (schemaField.getType().getNumberType() == null) {
|
||||
log.warn("Couldn't decode docValues for field: [{}], schemaField: [{}], numberType is unknown",
|
||||
schemaField.getName(), schemaField);
|
||||
|
|
|
@ -86,8 +86,11 @@
|
|||
<field name="llp_idx" type="llp" indexed="true" docValues="false" />
|
||||
<field name="llp_dv" type="llp" indexed="false" docValues="true" />
|
||||
<field name="llp_1_dv_st" type="llp" indexed="false" docValues="true" stored="true" multiValued="false"/>
|
||||
<field name="llp_N_dv_st" type="llp" indexed="false" docValues="true" stored="true" multiValued="true"/>
|
||||
<field name="llp_1_dv" type="llp" indexed="false" docValues="true" stored="false" multiValued="false" useDocValuesAsStored="false"/>
|
||||
<field name="llp_N_dv" type="llp" indexed="false" docValues="true" stored="false" multiValued="true" useDocValuesAsStored="false"/>
|
||||
<field name="llp_1_dv_dvasst" type="llp" indexed="false" docValues="true" stored="false" multiValued="false" useDocValuesAsStored="true"/>
|
||||
<field name="llp_N_dv_dvasst" type="llp" indexed="false" docValues="true" stored="false" multiValued="true" useDocValuesAsStored="true"/>
|
||||
|
||||
<dynamicField name="bboxD_*" type="bbox" indexed="true"/>
|
||||
<dynamicField name="str_*" type="string" indexed="true" stored="true"/>
|
||||
|
|
|
@ -16,15 +16,31 @@
|
|||
*/
|
||||
package org.apache.solr.search;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.Repeat;
|
||||
import org.apache.lucene.geo.GeoTestUtil;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.metrics.MetricsMap;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.util.SpatialUtils;
|
||||
import org.apache.solr.util.TestUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
|
||||
//Unlike TestSolr4Spatial, not parametrized / not generic.
|
||||
public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
|
||||
|
@ -117,24 +133,122 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
|
|||
"q", "{!cache=false field f=" + fieldName + "}Intersects(" + polygonWKT + ")",
|
||||
"sort", "id asc"), "/response/numFound==2");
|
||||
}
|
||||
|
||||
|
||||
@Test @Repeat(iterations = 10)
|
||||
public void testLLPDecodeIsStableAndPrecise() throws Exception {
|
||||
// test that LatLonPointSpatialField decode of docValue will round-trip (re-index then re-decode) to the same value
|
||||
@SuppressWarnings({"resource", "IOResourceOpenedButNotSafelyClosed"})
|
||||
SolrClient client = new EmbeddedSolrServer(h.getCore());// do NOT close it; it will close Solr
|
||||
|
||||
final String fld = "llp_1_dv_dvasst";
|
||||
String ptOrig = GeoTestUtil.nextLatitude() + "," + GeoTestUtil.nextLongitude();
|
||||
assertU(adoc("id", "0", fld, ptOrig));
|
||||
assertU(commit());
|
||||
// retrieve it (probably less precision
|
||||
String ptDecoded1 = (String) client.query(params("q", "id:0")).getResults().get(0).get(fld);
|
||||
// now write it back
|
||||
assertU(adoc("id", "0", fld, ptDecoded1));
|
||||
assertU(commit());
|
||||
// retrieve it and hopefully the same
|
||||
String ptDecoded2 = (String) client.query(params("q", "id:0")).getResults().get(0).get(fld);
|
||||
assertEquals("orig:" + ptOrig, ptDecoded1, ptDecoded2);
|
||||
|
||||
// test that the representation is pretty accurate
|
||||
final Point ptOrigObj = SpatialUtils.parsePoint(ptOrig, SpatialContext.GEO);
|
||||
final Point ptDecodedObj = SpatialUtils.parsePoint(ptDecoded1, SpatialContext.GEO);
|
||||
double deltaCentimeters = SpatialContext.GEO.calcDistance(ptOrigObj, ptDecodedObj) * DistanceUtils.DEG_TO_KM * 1000.0 * 100.0;
|
||||
// //See javadocs of LatLonDocValuesField
|
||||
// final Point absErrorPt = SpatialContext.GEO.getShapeFactory().pointXY(8.381903171539307E-8, 4.190951585769653E-8);
|
||||
// double deltaCentimetersMax
|
||||
// = SpatialContext.GEO.calcDistance(absErrorPt, 0,0) * DistanceUtils.DEG_TO_KM * 1000.0 * 100.0;
|
||||
// // equals 1.0420371840922256 which is a bit lower than what we're able to do
|
||||
|
||||
assertTrue("deltaCm too high: " + deltaCentimeters, deltaCentimeters < 1.33);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLatLonRetrieval() throws Exception {
|
||||
assertU(adoc("id", "0",
|
||||
"llp_1_dv_st", "-75,41",
|
||||
"llp_1_dv", "-80,20",
|
||||
"llp_1_dv_dvasst", "10,-30"));
|
||||
final String ptHighPrecision = "40.2996543270,-74.0824956673";
|
||||
final String ptLossOfPrecision = "40.2996544,-74.0824957"; // rounded version of the one above, losing precision
|
||||
|
||||
// "_1" is single, "_N" is multiValued
|
||||
// "_dv" is docValues (otherwise not), "_dvasst" is useDocValuesAsStored (otherwise not)
|
||||
// "_st" is stored" (otherwise not)
|
||||
|
||||
List<RetrievalCombo> combos = Arrays.asList(
|
||||
new RetrievalCombo("llp_1_dv_st", ptHighPrecision),
|
||||
new RetrievalCombo("llp_N_dv_st", Arrays.asList("-40,40", "-45,45")),
|
||||
new RetrievalCombo("llp_N_dv_st", Arrays.asList("-40,40")), // multiValued but 1 value
|
||||
|
||||
new RetrievalCombo("llp_1_dv_dvasst", ptHighPrecision, ptLossOfPrecision),
|
||||
// this one comes back in a different order since it gets sorted low to high
|
||||
new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList("-40,40", "-45,45"), Arrays.asList("-45,45", "-40,40")),
|
||||
new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList("-40,40")), // multiValued but 1 value
|
||||
// edge cases. (note we sorted it as Lucene will internally)
|
||||
new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList(
|
||||
"-90,180", "-90,-180",
|
||||
"0,0", "0,180", "0,-180",
|
||||
"90,0", "90,180", "90,-180")),
|
||||
|
||||
new RetrievalCombo("llp_1_dv", ptHighPrecision, ptLossOfPrecision),
|
||||
new RetrievalCombo("llp_N_dv", Arrays.asList("-45,45", "-40,40"))
|
||||
|
||||
);
|
||||
Collections.shuffle(combos, random());
|
||||
|
||||
// add and commit
|
||||
for (RetrievalCombo combo : combos) {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("id", "" + combo.id);
|
||||
for (String indexValue : combo.indexValues) {
|
||||
doc.addField(combo.fieldName, indexValue);
|
||||
}
|
||||
assertU(adoc(doc));
|
||||
if (TestUtils.rarely()) { // induce segments to potentially change internal behavior
|
||||
assertU(commit());
|
||||
}
|
||||
}
|
||||
assertU(commit());
|
||||
assertJQ(req("q","*:*", "fl","*"),
|
||||
"response/docs/[0]/llp_1_dv_st=='-75,41'",
|
||||
// Right now we do not support decoding point value from dv field
|
||||
"!response/docs/[0]/llp_1_dv=='-80,20'",
|
||||
"!response/docs/[0]/llp_1_dv_dvasst=='10,-30'");
|
||||
assertJQ(req("q","*:*", "fl","llp_1_dv_st, llp_1_dv, llp_1_dv_dvasst"),
|
||||
"response/docs/[0]/llp_1_dv_st=='-75,41'",
|
||||
// Even when these fields are specified, we won't return them
|
||||
"!response/docs/[0]/llp_1_dv=='-80,20'",
|
||||
"!response/docs/[0]/llp_1_dv_dvasst=='10,-30'");
|
||||
|
||||
// create an assertJQ assertion string, once for fl=*, another for when the field is listed
|
||||
List<String> assertJQsFlListed = new ArrayList<>();
|
||||
List<String> assertJQsFlStar = new ArrayList<>();
|
||||
for (RetrievalCombo combo : combos) {
|
||||
String expect = "response/docs/[" + combo.id + "]/" + combo.fieldName + "==" + combo.expectReturnJSON;
|
||||
assertJQsFlListed.add(expect);
|
||||
if (combo.fieldName.endsWith("_dv")) {
|
||||
expect = "response/docs/[" + combo.id + "]=={'id':'" + combo.id + "'}"; // only the id, nothing else
|
||||
}
|
||||
assertJQsFlStar.add(expect);
|
||||
}
|
||||
// check
|
||||
assertJQ(req("q","*:*", "sort", "id asc",
|
||||
"fl","*"),
|
||||
assertJQsFlStar.toArray(new String[0]));
|
||||
assertJQ(req("q","*:*", "sort", "id asc",
|
||||
"fl", "id," + combos.stream().map(c -> c.fieldName).collect(Collectors.joining(","))),
|
||||
assertJQsFlListed.toArray(new String[0]));
|
||||
}
|
||||
|
||||
private static class RetrievalCombo {
|
||||
static int idCounter = 0;
|
||||
final int id = idCounter++;
|
||||
final String fieldName;
|
||||
final List<String> indexValues;
|
||||
final String expectReturnJSON; //or null if not expected in response
|
||||
|
||||
RetrievalCombo(String fieldName, List<String> indexValues) { this(fieldName, indexValues, indexValues);}
|
||||
RetrievalCombo(String fieldName, List<String> indexValues, List<String> returnValues) {
|
||||
this.fieldName = fieldName;
|
||||
this.indexValues = indexValues;
|
||||
this.expectReturnJSON = returnValues.stream().collect(Collectors.joining("', '", "['", "']"));
|
||||
}
|
||||
RetrievalCombo(String fieldName, String indexValue) { this(fieldName, indexValue, indexValue); }
|
||||
RetrievalCombo(String fieldName, String indexValue, String returnValue) {
|
||||
this.fieldName = fieldName;
|
||||
this.indexValues = Collections.singletonList(indexValue);
|
||||
this.expectReturnJSON = "'" + returnValue + "'";
|
||||
}
|
||||
}
|
||||
|
||||
private void testRptWithGeometryField(String fieldName) throws Exception {
|
||||
|
|
Loading…
Reference in New Issue