LUCENE-5648: DateRangePrefixTree and NumberRangePrefixTreeStrategy

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1600555 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2014-06-05 01:43:12 +00:00
parent 19da2d5f1e
commit bb267a143e
8 changed files with 1778 additions and 13 deletions

View File

@ -21,6 +21,13 @@ New Features
PushPostingsWriterBase for single-pass push of docs/positions to the
postings format. (Mike McCandless)
* LUCENE-5648: Index and search date ranges, particularly multi-valued ones. It's
implemented in the spatial module as DateRangePrefixTree used with
NumberRangePrefixTreeStrategy. (David Smiley)
* LUCENE-4175: Index and search rectangles with spatial BBoxSpatialStrategy.
Sort documents by relative overlap of query areas. (Ryan McKinley)
API Changes
* LUCENE-4535: oal.util.FilterIterator is now an internal API.

View File

@ -0,0 +1,79 @@
package org.apache.lucene.spatial;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
import java.text.ParseException;
/** A PrefixTree based on Number/Date ranges. This isn't very "spatial" on the surface (to the user) but
* it's implemented using spatial so that's why it's here extending a SpatialStrategy.
*
* @lucene.experimental
*/
public class NumberRangePrefixTreeStrategy extends RecursivePrefixTreeStrategy {
public NumberRangePrefixTreeStrategy(NumberRangePrefixTree prefixTree, String fieldName) {
super(prefixTree, fieldName);
setPruneLeafyBranches(false);
setPrefixGridScanLevel(prefixTree.getMaxLevels()-2);//user might want to change, however
setPointsOnly(false);
setDistErrPct(0);
}
@Override
public NumberRangePrefixTree getGrid() {
return (NumberRangePrefixTree) super.getGrid();
}
@Override
public Field[] createIndexableFields(Shape shape) {
//levels doesn't actually matter; NumberRange based Shapes have their own "level".
TokenStream tokenStream = createTokenStream(shape, grid.getMaxLevels());
Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE);
return new Field[]{field};
}
/** For a Date based tree, pass in a Calendar, with unspecified fields marked as cleared.
* See {@link NumberRangePrefixTree#toShape(Object)}. */
public Shape toShape(Object value) {
return getGrid().toShape(value);
}
/** See {@link NumberRangePrefixTree#toRangeShape(Shape, Shape)}. */
public Shape toRangeShape(Shape min, Shape max) {
return getGrid().toRangeShape(min, max);
}
/** See {@link NumberRangePrefixTree#parseShape(String)}. */
public Shape parseShape(String str) throws ParseException {
return getGrid().parseShape(str);
}
/** Unsupported. */
@Override
public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) {
throw new UnsupportedOperationException();
}
}

View File

@ -0,0 +1,428 @@
package org.apache.lucene.spatial.prefix.tree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.spatial4j.core.shape.Shape;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.Locale;
import java.util.TimeZone;
/**
* A PrefixTree for date ranges in which the levels of the tree occur at natural periods of time (e.g. years,
* months, ...). You pass in {@link Calendar} objects with the desired fields set and the unspecified
* fields unset, which conveys the precision. The implementation tries to be generic to the Calendar
* abstraction, making some optimizations when a Gregorian is used, but no others have been tested.
* <p/>
* Warning: If you construct a Calendar and then get something from the object like a field (e.g. year) or
* milliseconds, then every field is fully set by side-effect. So after setting the fields, pass it to this
* API first.
* @lucene.experimental
*/
public class DateRangePrefixTree extends NumberRangePrefixTree {
/*
WARNING java.util.Calendar is tricky to work with:
* If you "get" any field value, every fields because "set". This can introduce a Heisenbug effect,
when in a debugger in some cases. Fortunately, Calendar.toString() doesn't apply.
* Beware Calendar underflow of the underlying long. If you create a Calendar from LONG.MIN_VALUE, and clear
a field, it will underflow and appear close to LONG.MAX_VALUE (BC to AD).
There are no doubt other reasons but those two were hard fought lessons here.
TODO Improvements:
* Make max precision configurable (i.e. to SECOND).
* Make min & max year span configurable. Use that to remove pointless top levels of the SPT.
If year span is > 10k, then add 1k year level. If year span is > 10k of 1k levels, add 1M level.
* NumberRangePrefixTree: override getTreeCellIterator for optimized case where the shape isn't a date span; use
FilterCellIterator of the cell stack.
*/
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
private static Calendar CAL_TMP;//template
static {
CAL_TMP = Calendar.getInstance(UTC, Locale.ROOT);
CAL_TMP.clear();
}
private static final Calendar MINCAL = (Calendar) CAL_TMP.clone();
private static final Calendar MAXCAL = (Calendar) CAL_TMP.clone();
static {
MINCAL.setTimeInMillis(Long.MIN_VALUE);
MAXCAL.setTimeInMillis(Long.MAX_VALUE);
}
//BC years are decreasing, remember. Yet ActualMaximum is the numerically high value, ActualMinimum is 1.
private static final int BC_FIRSTYEAR = MINCAL.getActualMaximum(Calendar.YEAR);
private static final int BC_LASTYEAR = MINCAL.getActualMinimum(Calendar.YEAR);//1
private static final int BC_YEARS = BC_FIRSTYEAR - BC_LASTYEAR + 1;
private static final int AD_FIRSTYEAR = MAXCAL.getActualMinimum(Calendar.YEAR);//1
private static final int AD_LASTYEAR = MAXCAL.getActualMaximum(Calendar.YEAR);
private static final int AD_YEAR_BASE = (((BC_YEARS-1) / 1000_000)+1) * 1000_000;
static { assert BC_LASTYEAR == 1 && AD_FIRSTYEAR == 1; }
//how many million years are there?
private static final int NUM_MYEARS = (AD_YEAR_BASE + AD_LASTYEAR) / 1000_000;
private static int calFieldLen(int field) {
return CAL_TMP.getMaximum(field) - CAL_TMP.getMinimum(field) + 1;
}
private static final int[] FIELD_BY_LEVEL = {
-1/*unused*/, -1, -1, Calendar.YEAR, Calendar.MONTH, Calendar.DAY_OF_MONTH,
Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND};
private static final int yearLevel = 3;
public static final DateRangePrefixTree INSTANCE = new DateRangePrefixTree();
private final LevelledValue minLV, maxLV;
private final LevelledValue gregorianChangeDateLV;
private DateRangePrefixTree() {
super(new int[]{//sublevels by level
NUM_MYEARS,
1000,//1 thousand thousand-years in a million years
1000,//1 thousand years in a thousand-year
calFieldLen(Calendar.MONTH),
calFieldLen(Calendar.DAY_OF_MONTH),
calFieldLen(Calendar.HOUR_OF_DAY),
calFieldLen(Calendar.MINUTE),
calFieldLen(Calendar.SECOND),
calFieldLen(Calendar.MILLISECOND),
});
maxLV = (LevelledValue) toShape((Calendar)MAXCAL.clone());
minLV = (LevelledValue) toShape((Calendar)MINCAL.clone());
if (MAXCAL instanceof GregorianCalendar) {
//TODO this should be a configurable param by passing a Calendar surving as a template.
GregorianCalendar gCal = (GregorianCalendar)MAXCAL;
gregorianChangeDateLV = (LevelledValue) toShape(gCal.getGregorianChange());
} else {
gregorianChangeDateLV = null;
}
}
@Override
protected int getNumSubCells(LevelledValue lv) {
int cmp = comparePrefixLV(lv, maxLV);
assert cmp <= 0;
if (cmp == 0)//edge case (literally!)
return maxLV.getValAtLevel(lv.getLevel()+1);
// if using GregorianCalendar and we're after the "Gregorian change date" then we'll compute
// the sub-cells ourselves more efficiently without the need to construct a Calendar.
cmp = gregorianChangeDateLV != null ? comparePrefixLV(lv, gregorianChangeDateLV) : -1;
//TODO consider also doing fast-path if field is <= hours even if before greg change date
if (cmp >= 0) {
int result = fastSubCells(lv);
assert result == slowSubCells(lv) : "fast/slow numSubCells inconsistency";
return result;
} else {
return slowSubCells(lv);
}
}
private int fastSubCells(LevelledValue lv) {
if (lv.getLevel() == yearLevel+1) {//month
switch (lv.getValAtLevel(lv.getLevel())) {
case Calendar.SEPTEMBER:
case Calendar.APRIL:
case Calendar.JUNE:
case Calendar.NOVEMBER:
return 30;
case Calendar.FEBRUARY:
//get the year (negative numbers for BC)
int yearAdj = lv.getValAtLevel(1) * 1_000_000;
yearAdj += lv.getValAtLevel(2) * 1000;
yearAdj += lv.getValAtLevel(3);
int year = yearAdj - AD_YEAR_BASE;
if (year % 4 == 0 && !(year % 100 == 0 && year % 400 != 0) )//leap year
return 29;
else
return 28;
default:
return 31;
}
} else {//typical:
return super.getNumSubCells(lv);
}
}
private int slowSubCells(LevelledValue lv) {
int field = FIELD_BY_LEVEL[lv.getLevel()+1];
//short-circuit optimization (GregorianCalendar assumptions)
if (field == -1 || field == Calendar.YEAR || field >= Calendar.HOUR_OF_DAY)//TODO make configurable
return super.getNumSubCells(lv);
Calendar cal = toCalendarLV(lv);//somewhat heavyweight op; ideally should be stored on LevelledValue somehow
return cal.getActualMaximum(field) - cal.getActualMinimum(field) + 1;
}
/** Calendar utility method:
* Returns a new {@link Calendar} in UTC TimeZone, ROOT Locale, with all fields cleared. */
public Calendar newCal() {
return (Calendar) CAL_TMP.clone();
}
/** Calendar utility method:
* Gets the Calendar field code of the last field that is set prior to an unset field. It only
* examines fields relevant to the prefix tree. If no fields are set, it returns -1. */
public int getCalPrecisionField(Calendar cal) {
int lastField = -1;
for (int level = yearLevel; level < FIELD_BY_LEVEL.length; level++) {
int field = FIELD_BY_LEVEL[level];
if (!cal.isSet(field))
break;
lastField = field;
}
return lastField;
}
/** Calendar utility method:
* Calls {@link Calendar#clear(int)} for every field after {@code field}. Beware of Calendar underflow. */
public void clearFieldsAfter(Calendar cal, int field) {
if (field == -1) {
cal.clear();
return;
}
int assertEra = -1;
assert (assertEra = (((Calendar)cal.clone()).get(Calendar.ERA))) >= 0;//a trick to only get this if assert enabled
for (int f = field+1; f < Calendar.FIELD_COUNT; f++) {
cal.clear(f);
}
assert ((Calendar)cal.clone()).get(Calendar.ERA) == assertEra : "Calendar underflow";
}
/** Converts {@code value} from a {@link Calendar} or {@link Date} to a {@link Shape}. Other arguments
* result in a {@link java.lang.IllegalArgumentException}.
*/
@Override
public Shape toShape(Object value) {
if (value instanceof Calendar) {
return toShape((Calendar) value);
} else if (value instanceof Date) {
Calendar cal = newCal();
cal.setTime((Date)value);
return toShape(cal);
}
throw new IllegalArgumentException("Expecting Calendar or Date but got: "+value.getClass());
}
/** Converts the Calendar into a Shape.
* The isSet() state of the Calendar is re-instated when done. */
public Shape toShape(Calendar cal) {
// Convert a Calendar into a stack of cell numbers
final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields
try {
int[] valStack = new int[maxLevels];//starts at level 1, not 0
int len = 0;
if (calPrecField >= Calendar.YEAR) {//year or better precision
int year = cal.get(Calendar.YEAR);
int yearAdj = cal.get(Calendar.ERA) == 0 ? AD_YEAR_BASE - (year - 1) : AD_YEAR_BASE + year;
valStack[len++] = yearAdj / 1000_000;
yearAdj -= valStack[len-1] * 1000_000;
valStack[len++] = yearAdj / 1000;
yearAdj -= valStack[len-1] * 1000;
valStack[len++] = yearAdj;
for (int level = yearLevel+1; level < FIELD_BY_LEVEL.length; level++) {
int field = FIELD_BY_LEVEL[level];
if (field > calPrecField)
break;
valStack[len++] = cal.get(field) - cal.getActualMinimum(field);
}
}
return toShape(valStack, len);
} finally {
clearFieldsAfter(cal, calPrecField);//restore precision state modified by get()
}
}
public Calendar toCalendar(Shape shape) {
if (shape instanceof LevelledValue)
return toCalendarLV((LevelledValue) shape);
throw new IllegalArgumentException("Can't be converted to Calendar: "+shape);
}
private Calendar toCalendarLV(LevelledValue lv) {
if (lv.getLevel() == 0)
return newCal();
if (comparePrefixLV(lv, minLV) <= 0) {//shouldn't typically happen; sometimes in a debugger
return (Calendar) MINCAL.clone();//full precision; truncation would cause underflow
}
assert comparePrefixLV(lv, maxLV) <= 0;
Calendar cal = newCal();
int yearAdj = lv.getValAtLevel(1) * 1_000_000;
if (lv.getLevel() > 1) {
yearAdj += lv.getValAtLevel(2) * 1000;
if (lv.getLevel() > 2) {
yearAdj += lv.getValAtLevel(3);
}
}
if (yearAdj > AD_YEAR_BASE) {
cal.set(Calendar.ERA, 1);
cal.set(Calendar.YEAR, yearAdj - AD_YEAR_BASE);//setting the year resets the era
} else {
cal.set(Calendar.ERA, 0);//we assert this "sticks" at the end
cal.set(Calendar.YEAR, (AD_YEAR_BASE - yearAdj) + 1);
}
for (int level = yearLevel+1; level <= lv.getLevel(); level++) {
int field = FIELD_BY_LEVEL[level];
cal.set(field, lv.getValAtLevel(level) + cal.getActualMinimum(field));
}
assert yearAdj > AD_YEAR_BASE || ((Calendar)cal.clone()).get(Calendar.ERA) == 0 : "ERA / YEAR underflow";
return cal;
}
@Override
protected String toStringLV(LevelledValue lv) {
return toString(toCalendarLV(lv));
}
/** Calendar utility method:
* Converts to calendar to ISO-8601, to include proper BC handling (1BC is "0000", 2BC is "-0001", etc.);
* and WITHOUT a trailing 'Z'.
* A fully cleared calendar will yield the string "*".
* The isSet() state of the Calendar is re-instated when done. */
@SuppressWarnings("fallthrough")
public String toString(Calendar cal) {
final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields
if (calPrecField == -1)
return "*";
try {
//TODO not fully optimized because I only expect this to be used in tests / debugging.
// Borrow code from Solr DateUtil, and have it reference this back?
String pattern = "yyyy-MM-dd'T'HH:mm:ss.SSS";
int ptnLen = 0;
switch (calPrecField) {//switch fall-through is deliberate
case Calendar.MILLISECOND: ptnLen += 4;
case Calendar.SECOND: ptnLen += 3;
case Calendar.MINUTE: ptnLen += 3;
case Calendar.HOUR_OF_DAY: ptnLen += 5;
case Calendar.DAY_OF_MONTH: ptnLen += 3;
case Calendar.MONTH: ptnLen += 3;
case Calendar.YEAR: ptnLen += 4;
break;
default: throw new IllegalStateException(""+calPrecField);
}
pattern = pattern.substring(0, ptnLen);
SimpleDateFormat format = new SimpleDateFormat(pattern, Locale.ROOT);
format.setTimeZone(cal.getTimeZone());
if (cal.get(Calendar.ERA) == 0) {//BC
//SDF doesn't do this properly according to ISO-8601
// Example: 1BC == "0000" (actually 0 AD), 2BC == "-0001", 3BC == "-0002", ...
final int yearOrig = cal.get(Calendar.YEAR);
cal.set(Calendar.YEAR, yearOrig-1);
String str;
try {
str = format.format(cal.getTime());
} finally {
//reset to what it was
cal.set(Calendar.ERA, 0);//necessary!
cal.set(Calendar.YEAR, yearOrig);
}
if (yearOrig > 1)
return "-" + str;
else
return "0000" + str.substring(4);
}
return format.format(cal.getTime());
} finally {
clearFieldsAfter(cal, calPrecField);//restore precision state modified by get()
}
}
@Override
protected LevelledValue parseShapeLV(String str) throws ParseException {
return (LevelledValue) toShape(parseCalendar(str));
}
/** Calendar utility method:
* The reverse of {@link #toString(java.util.Calendar)}. It will only set the fields found, leaving
* the remainder in an un-set state. A leading '-' or '+' is optional (positive assumed), and a
* trailing 'Z' is also optional.
* @param str not null and not empty
* @return not null
*/
public Calendar parseCalendar(String str) throws ParseException {
// example: +2014-10-23T21:22:33.159Z
if (str == null || str.isEmpty())
throw new IllegalArgumentException("str is null or blank");
Calendar cal = newCal();
if (str.equals("*"))
return cal;
int offset = 0;//a pointer
try {
//year & era:
int lastOffset = str.charAt(str.length()-1) == 'Z' ? str.length() - 1 : str.length();
int hyphenIdx = str.indexOf('-', 1);//look past possible leading hyphen
if (hyphenIdx < 0)
hyphenIdx = lastOffset;
int year = Integer.parseInt(str.substring(offset, hyphenIdx));
cal.set(Calendar.ERA, year <= 0 ? 0 : 1);
cal.set(Calendar.YEAR, year <= 0 ? -1*year + 1 : year);
offset = hyphenIdx + 1;
if (lastOffset < offset)
return cal;
//NOTE: We aren't validating separator chars, and we unintentionally accept leading +/-.
// The str.substring()'s hopefully get optimized to be stack-allocated.
//month:
cal.set(Calendar.MONTH, Integer.parseInt(str.substring(offset, offset+2)) - 1);//starts at 0
offset += 3;
if (lastOffset < offset)
return cal;
//day:
cal.set(Calendar.DAY_OF_MONTH, Integer.parseInt(str.substring(offset, offset+2)));
offset += 3;
if (lastOffset < offset)
return cal;
//hour:
cal.set(Calendar.HOUR_OF_DAY, Integer.parseInt(str.substring(offset, offset+2)));
offset += 3;
if (lastOffset < offset)
return cal;
//minute:
cal.set(Calendar.MINUTE, Integer.parseInt(str.substring(offset, offset+2)));
offset += 3;
if (lastOffset < offset)
return cal;
//second:
cal.set(Calendar.SECOND, Integer.parseInt(str.substring(offset, offset+2)));
offset += 3;
if (lastOffset < offset)
return cal;
//ms:
cal.set(Calendar.MILLISECOND, Integer.parseInt(str.substring(offset, offset+3)));
offset += 3;//last one, move to next char
if (lastOffset == offset)
return cal;
} catch (Exception e) {
ParseException pe = new ParseException("Improperly formatted date: "+str, offset);
pe.initCause(e);
throw pe;
}
throw new ParseException("Improperly formatted date: "+str, offset);
}
}

View File

@ -0,0 +1,811 @@
package org.apache.lucene.spatial.prefix.tree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.context.SpatialContextFactory;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.shape.SpatialRelation;
import com.spatial4j.core.shape.impl.RectangleImpl;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
import java.text.ParseException;
/**
* A special SpatialPrefixTree for single-dimensional number ranges of integral values. It's based
* on a stack of integers, and thus it's not limited to a long.
* @see <a href="https://issues.apache.org/jira/browse/LUCENE-5648">LUCENE-5648</a>
* @lucene.experimental
*/
public abstract class NumberRangePrefixTree extends SpatialPrefixTree {
//
// Dummy SpatialContext
//
private static final SpatialContext DUMMY_CTX;
static {
SpatialContextFactory factory = new SpatialContextFactory();
factory.geo = false;
factory.worldBounds = new RectangleImpl(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0L, 0L, null);
DUMMY_CTX = factory.newSpatialContext();
}
//
// LevelledValue
//
/** A value implemented as a stack of numbers. Spatially speaking, it's
* analogous to a Point but 1D yet has some precision width.
* @lucene.internal */
protected static interface LevelledValue extends Shape {
int getLevel();//0 means the world (universe).
int getValAtLevel(int level);//level >= 0 && <= getLevel()
LevelledValue getLVAtLevel(int level);
}
/** Compares a to b, returning less than 0, 0, or greater than 0, if a is less than, equal to, or
* greater than b, respectively. Only min(a.levels,b.levels) are compared.
* @lucene.internal */
protected static int comparePrefixLV(LevelledValue a, LevelledValue b) {
int minLevel = Math.min(a.getLevel(), b.getLevel());
for (int level = 1; level <= minLevel; level++) {
int diff = a.getValAtLevel(level) - b.getValAtLevel(level);
if (diff != 0)
return diff;
}
return 0;
}
protected String toStringLV(LevelledValue lv) {
StringBuilder buf = new StringBuilder();
buf.append('[');
for (int level = 1; level <= lv.getLevel(); level++) {
buf.append(lv.getValAtLevel(level)).append(',');
}
buf.setLength(buf.length()-1);//chop off ','
buf.append(']');
return buf.toString();
}
//
// NRShape
//
/** Number Range Shape; based on a pair of {@link LevelledValue}.
* Spatially speaking, it's analogous to a Rectangle but 1D.
* @lucene.internal */
protected class NRShape implements Shape {
private final LevelledValue minLV, maxLV;
/** Don't call directly; see {@link #toRangeShape(com.spatial4j.core.shape.Shape, com.spatial4j.core.shape.Shape)}. */
private NRShape(LevelledValue minLV, LevelledValue maxLV) {
this.minLV = minLV;
this.maxLV = maxLV;
}
public LevelledValue getMinLV() { return minLV; }
public LevelledValue getMaxLV() { return maxLV; }
@Override
public SpatialRelation relate(Shape shape) {
// if (shape instanceof LevelledValue)
// return relate((LevelledValue)shape);
if (shape instanceof NRShape)
return relate((NRShape) shape);
return shape.relate(this).transpose();//probably a LevelledValue
}
public SpatialRelation relate(NRShape ext) {
//This logic somewhat mirrors RectangleImpl.relate_range()
int extMin_intMax = comparePrefixLV(ext.getMinLV(), getMaxLV());
if (extMin_intMax > 0)
return SpatialRelation.DISJOINT;
int extMax_intMin = comparePrefixLV(ext.getMaxLV(), getMinLV());
if (extMax_intMin < 0)
return SpatialRelation.DISJOINT;
int extMin_intMin = comparePrefixLV(ext.getMinLV(), getMinLV());
int extMax_intMax = comparePrefixLV(ext.getMaxLV(), getMaxLV());
if ((extMin_intMin > 0 || extMin_intMin == 0 && ext.getMinLV().getLevel() >= getMinLV().getLevel())
&& (extMax_intMax < 0 || extMax_intMax == 0 && ext.getMaxLV().getLevel() >= getMaxLV().getLevel()))
return SpatialRelation.CONTAINS;
if ((extMin_intMin < 0 || extMin_intMin == 0 && ext.getMinLV().getLevel() <= getMinLV().getLevel())
&& (extMax_intMax > 0 || extMax_intMax == 0 && ext.getMaxLV().getLevel() <= getMaxLV().getLevel()))
return SpatialRelation.WITHIN;
return SpatialRelation.INTERSECTS;
}
@Override
public Rectangle getBoundingBox() { throw new UnsupportedOperationException(); }
@Override
public boolean hasArea() { return true; }
@Override
public double getArea(SpatialContext spatialContext) { throw new UnsupportedOperationException(); }
@Override
public Point getCenter() { throw new UnsupportedOperationException(); }
@Override
public Shape getBuffered(double v, SpatialContext spatialContext) { throw new UnsupportedOperationException(); }
@Override
public boolean isEmpty() { return false; }
@Override
public String toString() { return "[" + toStringLV(minLV) + " TO " + toStringLV(maxLV) + "]"; }
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
NRShape nrShape = (NRShape) o;
if (!maxLV.equals(nrShape.maxLV)) return false;
if (!minLV.equals(nrShape.minLV)) return false;
return true;
}
@Override
public int hashCode() {
int result = minLV.hashCode();
result = 31 * result + maxLV.hashCode();
return result;
}
}// class NRShapeImpl
/** Converts the value to a shape (usually not a range). If it's a JDK object (e.g. Number, Calendar)
* that could be parsed from a String, this class won't do it; you must parse it. */
public abstract Shape toShape(Object value);
/** Detects a range pattern and parses it, otherwise it's parsed as one shape via
* {@link #parseShapeLV(String)}. The range pattern looks like this BNF:
* <pre>
* '[' + parseShapeLV + ' TO ' + parseShapeLV + ']'
* </pre>
* It's the same thing as the toString() of the range shape, notwithstanding range optimization.
* @param str not null or empty
* @return not null
* @throws java.text.ParseException If there is a problem
*/
public Shape parseShape(String str) throws ParseException {
if (str == null || str.isEmpty())
throw new IllegalArgumentException("str is null or blank");
if (str.charAt(0) == '[') {
if (str.charAt(str.length()-1) != ']')
throw new ParseException("If starts with [ must end with ]; got "+str, str.length()-1);
int middle = str.indexOf(" TO ");
if (middle < 0)
throw new ParseException("If starts with [ must contain ' TO '; got "+str, -1);
String leftStr = str.substring(1, middle);
String rightStr = str.substring(middle + " TO ".length(), str.length()-1);
return toRangeShape(parseShapeLV(leftStr), parseShapeLV(rightStr));
} else if (str.charAt(0) == '{') {
throw new ParseException("Exclusive ranges not supported; got "+str, 0);
} else {
return parseShapeLV(str);
}
}
/** Parse a String to a LevelledValue. "*" should be the full-range. */
protected abstract LevelledValue parseShapeLV(String str) throws ParseException;
/** Returns a shape that represents the continuous range between {@code start} and {@code end}. It will
* be optimized.
* @throws IllegalArgumentException if the arguments are in the wrong order, or if either contains the other.
*/
public Shape toRangeShape(Shape start, Shape end) {
if (!(start instanceof LevelledValue && end instanceof LevelledValue))
throw new IllegalArgumentException("Must pass "+LevelledValue.class+" but got "+start.getClass());
LevelledValue minLV = (LevelledValue) start;
LevelledValue maxLV = (LevelledValue) end;
if (minLV.equals(maxLV))
return minLV;
//Optimize precision of the range, e.g. April 1st to April 30th is April.
minLV = minLV.getLVAtLevel(truncateStartVals(minLV, 0));
maxLV = maxLV.getLVAtLevel(truncateEndVals(maxLV, 0));
int cmp = comparePrefixLV(minLV, maxLV);
if (cmp > 0) {
throw new IllegalArgumentException("Wrong order: "+start+" TO "+end);
}
if (cmp == 0 && minLV.getLevel() == maxLV.getLevel())
return minLV;
return new NRShape(minLV, maxLV);
}
/** From lv.getLevel on up, it returns the first Level seen with val != 0. It doesn't check past endLevel. */
private int truncateStartVals(LevelledValue lv, int endLevel) {
for (int level = lv.getLevel(); level > endLevel; level--) {
if (lv.getValAtLevel(level) != 0)
return level;
}
return endLevel;
}
private int truncateEndVals(LevelledValue lv, int endLevel) {
for (int level = lv.getLevel(); level > endLevel; level--) {
int max = getNumSubCells(lv.getLVAtLevel(level-1)) - 1;
if (lv.getValAtLevel(level) != max)
return level;
}
return endLevel;
}
//
// NumberRangePrefixTree
//
protected final int[] maxSubCellsByLevel;
protected final int[] termLenByLevel;
protected final int[] levelByTermLen;
protected final int maxTermLen; // how long could cell.getToken... (that is a leaf) possibly be?
protected NumberRangePrefixTree(int[] maxSubCellsByLevel) {
super(DUMMY_CTX, maxSubCellsByLevel.length);
this.maxSubCellsByLevel = maxSubCellsByLevel;
// Fill termLenByLevel
this.termLenByLevel = new int[maxLevels + 1];
termLenByLevel[0] = 0;
final int MAX_STATES = 1 << 15;//1 bit less than 2 bytes
for (int level = 1; level <= maxLevels; level++) {
final int states = maxSubCellsByLevel[level - 1];
if (states >= MAX_STATES || states <= 1) {
throw new IllegalArgumentException("Max states is "+MAX_STATES+", given "+states+" at level "+level);
}
boolean twoBytes = states >= 256;
termLenByLevel[level] = termLenByLevel[level-1] + (twoBytes ? 2 : 1);
}
maxTermLen = termLenByLevel[maxLevels] + 1;// + 1 for leaf byte
// Fill levelByTermLen
levelByTermLen = new int[maxTermLen];
levelByTermLen[0] = 0;
for (int level = 1; level < termLenByLevel.length; level++) {
int termLen = termLenByLevel[level];
int prevTermLen = termLenByLevel[level-1];
if (termLen - prevTermLen == 2) {//2 byte delta
//if the term doesn't completely cover this cell then it must be a leaf of the prior.
levelByTermLen[termLen-1] = -1;//won't be used; otherwise erroneous
levelByTermLen[termLen] = level;
} else {//1 byte delta
assert termLen - prevTermLen == 1;
levelByTermLen[termLen] = level;
}
}
}
@Override
public String toString() {
return getClass().getSimpleName();
}
@Override
public int getLevelForDistance(double dist) {
return maxLevels;
}
@Override
public double getDistanceForLevel(int level) {
throw new UnsupportedOperationException("Not applicable.");
}
protected Shape toShape(int[] valStack, int len) {
final NRCell[] cellStack = newCellStack(len);
for (int i = 0; i < len; i++) {
cellStack[i+1].resetCellWithCellNum(valStack[i]);
}
return cellStack[len];
}
@Override
public Cell getWorldCell() {
return newCellStack(maxLevels)[0];
}
protected NRCell[] newCellStack(int levels) {
final NRCell[] cellsByLevel = new NRCell[levels + 1];
final BytesRef term = new BytesRef(maxTermLen);
for (int level = 0; level <= levels; level++) {
cellsByLevel[level] = new NRCell(cellsByLevel,term,level);
}
return cellsByLevel;
}
@Override
public Cell readCell(BytesRef term, Cell scratch) {
if (scratch == null)
scratch = getWorldCell();
//We decode level, leaf, and populate bytes.
//reverse lookup term length to the level and hence the cell
NRCell[] cellsByLevel = ((NRCell) scratch).cellsByLevel;
boolean isLeaf = term.bytes[term.offset + term.length - 1] == 0;
int lenNoLeaf = isLeaf ? term.length - 1 : term.length;
NRCell result = cellsByLevel[levelByTermLen[lenNoLeaf]];
if (cellsByLevel[0].termBuf == null)
cellsByLevel[0].termBuf = result.term.bytes;//a kluge; see cell.ensureOwnTermBytes()
result.term.bytes = term.bytes;
result.term.offset = term.offset;
result.term.length = lenNoLeaf;//technically this isn't used but may help debugging
result.reset();
if (isLeaf)
result.setLeaf();
result.cellNumber = -1;//lazy decode flag
return result;
}
protected int getNumSubCells(LevelledValue lv) {
return maxSubCellsByLevel[lv.getLevel()];
}
//
// NRCell
//
/** Most of the PrefixTree implementation is in this one class, which is both
* the Cell, the CellIterator, and the Shape to reduce object allocation. It's implemented as a re-used array/stack
* of Cells at adjacent levels, that all have a reference back to the cell array to traverse. They also share a common
* BytesRef for the term.
* @lucene.internal */
protected class NRCell extends CellIterator implements Cell, LevelledValue {
//Shared: (TODO put this in a new class)
final NRCell[] cellsByLevel;
final BytesRef term;//AKA the token
byte[] termBuf;// see ensureOwnTermBytes(), only for cell0
//Cell state...
final int cellLevel; // assert levelStack[cellLevel] == this
int cellNumber; //relative to parent cell. It's unused for level 0. Starts at 0.
SpatialRelation cellShapeRel;
boolean cellIsLeaf;
//CellIterator state is defined further below
NRCell(NRCell[] cellsByLevel, BytesRef term, int cellLevel) {
this.cellsByLevel = cellsByLevel;
this.term = term;
this.cellLevel = cellLevel;
this.cellNumber = cellLevel == 0 ? 0 : -1;
this.cellIsLeaf = false;
assert cellsByLevel[cellLevel] == null;
}
/** Ensure we own term.bytes so that it's safe to modify. We detect via a kluge in which cellsByLevel[0].termBuf
* is non-null, which is a pre-allocated for use to replace term.bytes. */
void ensureOwnTermBytes() {
NRCell cell0 = cellsByLevel[0];
if (cell0.termBuf == null)
return;//we already own the bytes
System.arraycopy(term.bytes, term.offset, cell0.termBuf, 0, term.length);
term.bytes = cell0.termBuf;
term.offset = 0;
cell0.termBuf = null;
}
private void reset() {
this.cellIsLeaf = false;
this.cellShapeRel = null;
}
private void resetCellWithCellNum(int cellNumber) {
reset();
//update bytes
// note: see lazyInitCellNumsFromBytes() for the reverse
if (cellNumber >= 0) {//valid
ensureOwnTermBytes();
int termLen = termLenByLevel[getLevel()];
boolean twoBytes = (termLen - termLenByLevel[getLevel()-1]) > 1;
if (twoBytes) {
//right 7 bits, plus 1 (may overflow to 8th bit which is okay)
term.bytes[termLen-2] = (byte) (cellNumber >> 7);
term.bytes[termLen-1] = (byte) ((cellNumber & 0x7F) + 1);
} else {
term.bytes[termLen-1] = (byte) (cellNumber+1);
}
assert term.bytes[termLen-1] != 0;
term.length = termLen;
}
this.cellNumber = cellNumber;
}
private void ensureDecoded() {
if (cellNumber >= 0)
return;
//Decode cell numbers from bytes. This is the inverse of resetCellWithCellNum().
for (int level = 1; level <= getLevel(); level++) {
NRCell cell = cellsByLevel[level];
int termLen = termLenByLevel[level];
boolean twoBytes = (termLen - termLenByLevel[level-1]) > 1;
if (twoBytes) {
int byteH = (term.bytes[term.offset + termLen - 2] & 0xFF);
int byteL = (term.bytes[term.offset + termLen - 1] & 0xFF);
assert byteL - 1 < (1<<7);
cell.cellNumber = (byteH << 7) + (byteL-1);
assert cell.cellNumber < 1<<15;
} else {
cell.cellNumber = (term.bytes[term.offset + termLen - 1] & 0xFF) - 1;
assert cell.cellNumber < 255;
}
assert cell.cellNumber >= 0;
}
}
@Override // for Cell & for LevelledValue
public int getLevel() {
return cellLevel;
}
@Override
public SpatialRelation getShapeRel() {
return cellShapeRel;
}
@Override
public void setShapeRel(SpatialRelation rel) {
cellShapeRel = rel;
}
@Override
public boolean isLeaf() {
return cellIsLeaf;
}
@Override
public void setLeaf() {
cellIsLeaf = true;
}
@Override
public Shape getShape() {
ensureDecoded(); return this;
}
@Override
public BytesRef getTokenBytesNoLeaf(BytesRef result) {
if (result == null)
result = new BytesRef();
result.bytes = term.bytes;
result.offset = term.offset;
result.length = termLenByLevel[cellLevel];
assert result.length <= term.length;
return result;
}
@Override
public BytesRef getTokenBytesWithLeaf(BytesRef result) {
ensureOwnTermBytes();//normally shouldn't do anything
result = getTokenBytesNoLeaf(result);
if (isLeaf()) {
result.bytes[result.length++] = 0;
}
return result;
}
@Override
public boolean isPrefixOf(Cell c) {
NRCell otherCell = (NRCell) c;
assert term != otherCell.term;
//trick to re-use bytesref; provided that we re-instate it
int myLastLen = term.length;
term.length = termLenByLevel[getLevel()];
int otherLastLen = otherCell.term.length;
otherCell.term.length = termLenByLevel[otherCell.getLevel()];
boolean answer = StringHelper.startsWith(otherCell.term, term);
term.length = myLastLen;
otherCell.term.length = otherLastLen;
return answer;
}
@Override
public int compareToNoLeaf(Cell fromCell) {
final NRCell nrCell = (NRCell) fromCell;
assert term != nrCell.term;
//trick to re-use bytesref; provided that we re-instate it
int myLastLen = term.length;
int otherLastLen = nrCell.term.length;
term.length = termLenByLevel[getLevel()];
nrCell.term.length = termLenByLevel[nrCell.getLevel()];
int answer = term.compareTo(nrCell.term);
term.length = myLastLen;
nrCell.term.length = otherLastLen;
return answer;
}
@Override
public CellIterator getNextLevelCells(Shape shapeFilter) {
ensureDecoded();
NRCell subCell = cellsByLevel[cellLevel + 1];
subCell.initIter(shapeFilter);
return subCell;
}
//----------- CellIterator
Shape iterFilter;//LevelledValue or NRShape
boolean iterFirstIsIntersects;
boolean iterLastIsIntersects;
int iterFirstCellNumber;
int iterLastCellNumber;
private void initIter(Shape filter) {
cellNumber = -1;
if (filter instanceof LevelledValue && ((LevelledValue)filter).getLevel() == 0)
filter = null;//world means everything -- no filter
iterFilter = filter;
NRCell parent = getLVAtLevel(getLevel()-1);
// Initialize iter* members.
//no filter means all subcells
if (filter == null) {
iterFirstCellNumber = 0;
iterFirstIsIntersects = false;
iterLastCellNumber = getNumSubCells(parent) - 1;
iterLastIsIntersects = false;
return;
}
final LevelledValue minLV;
final LevelledValue maxLV;
if (filter instanceof NRShape) {
NRShape nrShape = (NRShape) iterFilter;
minLV = nrShape.getMinLV();
maxLV = nrShape.getMaxLV();
} else {
minLV = (LevelledValue)iterFilter;
maxLV = minLV;
}
//fast path check when using same filter
if (iterFilter == parent.iterFilter) {
if (parent.iterFirstIsIntersects && parent.cellNumber == parent.iterFirstCellNumber
&& minLV.getLevel() >= getLevel()) {
iterFirstCellNumber = minLV.getValAtLevel(getLevel());
iterFirstIsIntersects = (minLV.getLevel() > getLevel());
} else {
iterFirstCellNumber = 0;
iterFirstIsIntersects = false;
}
if (parent.iterLastIsIntersects && parent.cellNumber == parent.iterLastCellNumber
&& maxLV.getLevel() >= getLevel()) {
iterLastCellNumber = maxLV.getValAtLevel(getLevel());
iterLastIsIntersects = (maxLV.getLevel() > getLevel());
} else {
iterLastCellNumber = getNumSubCells(parent) - 1;
iterLastIsIntersects = false;
}
if (iterFirstCellNumber == iterLastCellNumber) {
if (iterLastIsIntersects)
iterFirstIsIntersects = true;
else if (iterFirstIsIntersects)
iterLastIsIntersects = true;
}
return;
}
//uncommon to get here, except for level 1 which always happens
int startCmp = comparePrefixLV(minLV, parent);
if (startCmp > 0) {//start comes after this cell
iterFirstCellNumber = 0;
iterFirstIsIntersects = false;
iterLastCellNumber = -1;//so ends early (no cells)
iterLastIsIntersects = false;
return;
}
int endCmp = comparePrefixLV(maxLV, parent);//compare to end cell
if (endCmp < 0) {//end comes before this cell
iterFirstCellNumber = 0;
iterFirstIsIntersects = false;
iterLastCellNumber = -1;//so ends early (no cells)
iterLastIsIntersects = false;
return;
}
if (startCmp < 0 || minLV.getLevel() < getLevel()) {
//start comes before...
iterFirstCellNumber = 0;
iterFirstIsIntersects = false;
} else {
iterFirstCellNumber = minLV.getValAtLevel(getLevel());
iterFirstIsIntersects = (minLV.getLevel() > getLevel());
}
if (endCmp > 0 || maxLV.getLevel() < getLevel()) {
//end comes after...
iterLastCellNumber = getNumSubCells(parent) - 1;
iterLastIsIntersects = false;
} else {
iterLastCellNumber = maxLV.getValAtLevel(getLevel());
iterLastIsIntersects = (maxLV.getLevel() > getLevel());
}
}
@Override
public boolean hasNext() {
thisCell = null;
if (nextCell != null)//calling hasNext twice in a row
return true;
if (cellNumber >= iterLastCellNumber)
return false;
resetCellWithCellNum(cellNumber < iterFirstCellNumber ? iterFirstCellNumber : cellNumber + 1);
boolean hasChildren =
(cellNumber == iterFirstCellNumber && iterFirstIsIntersects)
|| (cellNumber == iterLastCellNumber && iterLastIsIntersects);
if (!hasChildren) {
setLeaf();
setShapeRel(SpatialRelation.WITHIN);
} else if (iterFirstCellNumber == iterLastCellNumber) {
setShapeRel(SpatialRelation.CONTAINS);
} else {
setShapeRel(SpatialRelation.INTERSECTS);
}
nextCell = this;
return true;
}
//TODO override nextFrom to be more efficient
//----------- LevelledValue / Shape
@Override
public int getValAtLevel(int level) {
final int result = cellsByLevel[level].cellNumber;
assert result >= 0;//initialized
return result;
}
@Override
public NRCell getLVAtLevel(int level) {
assert level <= cellLevel;
return cellsByLevel[level];
}
@Override
public SpatialRelation relate(Shape shape) {
ensureDecoded();
if (shape == iterFilter && cellShapeRel != null)
return cellShapeRel;
if (shape instanceof LevelledValue)
return relate((LevelledValue)shape);
if (shape instanceof NRShape)
return relate((NRShape)shape);
return shape.relate(this).transpose();
}
public SpatialRelation relate(LevelledValue lv) {
ensureDecoded();
int cmp = comparePrefixLV(this, lv);
if (cmp != 0)
return SpatialRelation.DISJOINT;
if (getLevel() > lv.getLevel())
return SpatialRelation.WITHIN;//or equals
return SpatialRelation.CONTAINS;
//no INTERSECTS; that won't happen.
}
public SpatialRelation relate(NRShape nrShape) {
ensureDecoded();
int startCmp = comparePrefixLV(nrShape.getMinLV(), this);
if (startCmp > 0) {//start comes after this cell
return SpatialRelation.DISJOINT;
}
int endCmp = comparePrefixLV(nrShape.getMaxLV(), this);
if (endCmp < 0) {//end comes before this cell
return SpatialRelation.DISJOINT;
}
if ((startCmp < 0 || startCmp == 0 && nrShape.getMinLV().getLevel() <= getLevel())
&& (endCmp > 0 || endCmp == 0 && nrShape.getMaxLV().getLevel() <= getLevel()))
return SpatialRelation.WITHIN;//or equals
if (startCmp == 0 && endCmp == 0
&& nrShape.getMinLV().getLevel() >= getLevel() && nrShape.getMaxLV().getLevel() >= getLevel())
return SpatialRelation.CONTAINS;
return SpatialRelation.INTERSECTS;
}
@Override
public Rectangle getBoundingBox() {
throw new UnsupportedOperationException();
}
@Override
public boolean hasArea() {
return true;
}
@Override
public double getArea(SpatialContext ctx) {
throw new UnsupportedOperationException();
}
@Override
public Point getCenter() {
throw new UnsupportedOperationException();
}
@Override
public Shape getBuffered(double distance, SpatialContext ctx) { throw new UnsupportedOperationException(); }
@Override
public boolean isEmpty() {
return false;
}
//------- Object
@Override
public boolean equals(Object obj) {
if (!(obj instanceof NRCell)) {
return false;
}
if (this == obj)
return true;
NRCell nrCell = (NRCell) obj;
assert term != nrCell.term;
if (getLevel() != nrCell.getLevel())
return false;
//trick to re-use bytesref; provided that we re-instate it
int myLastLen = term.length;
int otherLastLen = nrCell.term.length;
boolean answer = getTokenBytesNoLeaf(term).equals(nrCell.getTokenBytesNoLeaf(nrCell.term));
term.length = myLastLen;
nrCell.term.length = otherLastLen;
return answer;
}
@Override
public int hashCode() {
//trick to re-use bytesref; provided that we re-instate it
int myLastLen = term.length;
int result = getTokenBytesNoLeaf(term).hashCode();
term.length = myLastLen;
return result;
}
@Override
public String toString() {
ensureDecoded();
String str = NumberRangePrefixTree.this.toStringLV(this);
if (isLeaf())
str += "";//bullet (won't be confused with textual representation)
return str;
}
} // END OF NRCell
}

View File

@ -234,19 +234,20 @@ public abstract class StrategyTestCase extends SpatialTestCase {
CheckHits.checkExplanations(q, "", indexSearcher);
}
protected void assertOperation(Map<String,Shape> indexedDocs,
SpatialOperation operation, Shape queryShape) {
//Generate truth via brute force
Set<String> expectedIds = new HashSet<>();
for (Map.Entry<String, Shape> stringShapeEntry : indexedDocs.entrySet()) {
if (operation.evaluate(stringShapeEntry.getValue(), queryShape))
expectedIds.add(stringShapeEntry.getKey());
}
SpatialTestQuery testQuery = new SpatialTestQuery();
testQuery.args = new SpatialArgs(operation, queryShape);
testQuery.ids = new ArrayList<>(expectedIds);
runTestQuery(SpatialMatchConcern.FILTER, testQuery);
protected void testOperation(Shape indexedShape, SpatialOperation operation,
Shape queryShape, boolean match) throws IOException {
assertTrue("Faulty test",
operation.evaluate(indexedShape, queryShape) == match ||
indexedShape.equals(queryShape) &&
(operation == SpatialOperation.Contains || operation == SpatialOperation.IsWithin));
adoc("0", indexedShape);
commit();
Query query = strategy.makeQuery(new SpatialArgs(operation, queryShape));
SearchResults got = executeQuery(query, 1);
assert got.numFound <= 1 : "unclean test env";
if ((got.numFound == 1) != match)
fail(operation+" I:" + indexedShape + " Q:" + queryShape);
deleteAll();//clean up after ourselves
}
}

View File

@ -0,0 +1,140 @@
package org.apache.lucene.spatial.prefix;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.search.Query;
import org.apache.lucene.spatial.StrategyTestCase;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
/** Base test harness, ideally for SpatialStrategy impls that have exact results
* (not grid approximated), hence "not fuzzy".
*/
public abstract class BaseNonFuzzySpatialOpStrategyTest extends StrategyTestCase {
//TODO this is partially redundant with StrategyTestCase.runTestQuery & testOperation
protected void testOperationRandomShapes(final SpatialOperation operation) throws IOException {
//first show that when there's no data, a query will result in no results
{
Query query = strategy.makeQuery(new SpatialArgs(operation, randomQueryShape()));
SearchResults searchResults = executeQuery(query, 1);
assertEquals(0, searchResults.numFound);
}
final int numIndexedShapes = randomIntBetween(1, 6);
List<Shape> indexedShapes = new ArrayList<>(numIndexedShapes);
for (int i = 0; i < numIndexedShapes; i++) {
indexedShapes.add(randomIndexedShape());
}
final int numQueryShapes = atLeast(20);
List<Shape> queryShapes = new ArrayList<>(numQueryShapes);
for (int i = 0; i < numQueryShapes; i++) {
queryShapes.add(randomQueryShape());
}
testOperation(operation, indexedShapes, queryShapes, true/*havoc*/);
}
protected void testOperation(final SpatialOperation operation,
List<Shape> indexedShapes, List<Shape> queryShapes, boolean havoc) throws IOException {
//Main index loop:
for (int i = 0; i < indexedShapes.size(); i++) {
Shape shape = indexedShapes.get(i);
adoc(""+i, shape);
if (havoc && random().nextInt(10) == 0)
commit();//intermediate commit, produces extra segments
}
if (havoc) {
//delete some documents randomly
for (int id = 0; id < indexedShapes.size(); id++) {
if (random().nextInt(10) == 0) {
deleteDoc(""+id);
indexedShapes.set(id, null);
}
}
}
commit();
//Main query loop:
for (int queryIdx = 0; queryIdx < queryShapes.size(); queryIdx++) {
final Shape queryShape = queryShapes.get(queryIdx);
if (havoc)
preQueryHavoc();
//Generate truth via brute force:
// We ensure true-positive matches (if the predicate on the raw shapes match
// then the search should find those same matches).
Set<String> expectedIds = new LinkedHashSet<>();//true-positives
for (int id = 0; id < indexedShapes.size(); id++) {
Shape indexedShape = indexedShapes.get(id);
if (indexedShape == null)
continue;
if (operation.evaluate(indexedShape, queryShape)) {
expectedIds.add(""+id);
}
}
//Search and verify results
SpatialArgs args = new SpatialArgs(operation, queryShape);
Query query = strategy.makeQuery(args);
SearchResults got = executeQuery(query, 100);
Set<String> remainingExpectedIds = new LinkedHashSet<>(expectedIds);
for (SearchResult result : got.results) {
String id = result.getId();
if (!remainingExpectedIds.remove(id)) {
fail("Shouldn't match", id, indexedShapes, queryShape);
}
}
if (!remainingExpectedIds.isEmpty()) {
String id = remainingExpectedIds.iterator().next();
fail("Should have matched", id, indexedShapes, queryShape);
}
}
}
private void fail(String label, String id, List<Shape> indexedShapes, Shape queryShape) {
fail(label + " I#" + id + ":" + indexedShapes.get(Integer.parseInt(id)) + " Q:" + queryShape);
}
protected void preQueryHavoc() {
if (strategy instanceof RecursivePrefixTreeStrategy) {
RecursivePrefixTreeStrategy rpts = (RecursivePrefixTreeStrategy) strategy;
int scanLevel = randomInt(rpts.getGrid().getMaxLevels());
rpts.setPrefixGridScanLevel(scanLevel);
}
}
protected abstract Shape randomIndexedShape();
protected abstract Shape randomQueryShape();
}

View File

@ -0,0 +1,130 @@
package org.apache.lucene.spatial.prefix;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.carrotsearch.randomizedtesting.annotations.Repeat;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.spatial.NumberRangePrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.tree.DateRangePrefixTree;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import java.io.IOException;
import java.util.Calendar;
public class DateNRStrategyTest extends BaseNonFuzzySpatialOpStrategyTest {
static final int ITERATIONS = 10;
DateRangePrefixTree tree;
int era;
int year;
@Before
public void setUp() throws Exception {
super.setUp();
tree = DateRangePrefixTree.INSTANCE;
strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange");
era = random().nextBoolean() ? 0 : 1;
year = 1 + random().nextInt(2_000_000);
}
@Test
@Repeat(iterations = ITERATIONS)
public void testIntersects() throws IOException {
testOperationRandomShapes(SpatialOperation.Intersects);
}
@Test
@Repeat(iterations = ITERATIONS)
public void testWithin() throws IOException {
testOperationRandomShapes(SpatialOperation.IsWithin);
}
@Test
@Repeat(iterations = ITERATIONS)
public void testContains() throws IOException {
testOperationRandomShapes(SpatialOperation.Contains);
}
@Test @Ignore("see LUCENE-5692")
@Repeat(iterations = ITERATIONS)
public void testDisjoint() throws IOException {
testOperationRandomShapes(SpatialOperation.IsDisjointTo);
}
@Test
public void testWithinSame() throws IOException {
final Calendar cal = tree.newCal();
cal.set(Calendar.ERA, era);
cal.set(Calendar.YEAR, year);
testOperation(
tree.toShape(cal),
SpatialOperation.IsWithin,
tree.toShape(cal), true);//is within itself
}
@Test
public void testWorld() throws IOException {
testOperation(
tree.toShape(tree.newCal()),//world matches everything
SpatialOperation.Contains,
tree.toShape(randomCalendar()), true);
}
@Override
protected Shape randomIndexedShape() {
Calendar cal1 = randomCalendar();
Shape s1 = tree.toShape(cal1);
try {
Calendar cal2 = randomCalendar();
Shape s2 = tree.toShape(cal2);
if (cal1.compareTo(cal2) < 0) {
return tree.toRangeShape(s1, s2);
} else {
return tree.toRangeShape(s2, s1);
}
} catch (IllegalArgumentException e) {
assert e.getMessage().startsWith("Differing precision");
return s1;
}
}
private Calendar randomCalendar() {
Calendar cal = tree.newCal();
cal.setTimeInMillis(random().nextLong());
cal.set(Calendar.ERA, era);
cal.set(Calendar.YEAR, year);
try {
tree.clearFieldsAfter(cal, random().nextInt(Calendar.FIELD_COUNT+1)-1);
} catch (AssertionError e) {
if (!e.getMessage().equals("Calendar underflow"))
throw e;
}
return cal;
}
@Override
protected Shape randomQueryShape() {
return randomIndexedShape();
}
}

View File

@ -0,0 +1,169 @@
package org.apache.lucene.spatial.prefix.tree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.shape.SpatialRelation;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Calendar;
import java.util.GregorianCalendar;
public class DateRangePrefixTreeTest extends LuceneTestCase {
private DateRangePrefixTree tree = DateRangePrefixTree.INSTANCE;
public void testRoundTrip() throws Exception {
Calendar cal = tree.newCal();
assertEquals("*", tree.toString(cal));
//test no underflow
assertTrue(tree.toShape(new int[]{0}, 1).toString().startsWith("-"));
//Some arbitrary date
cal.set(2014, Calendar.MAY, 9);
roundTrip(cal);
assertEquals("2014-05-09",tree.toString(cal));
//Earliest date
cal.setTimeInMillis(Long.MIN_VALUE);
roundTrip(cal);
//Farthest date
cal.setTimeInMillis(Long.MAX_VALUE);
roundTrip(cal);
//1BC is "0000".
cal.clear();
cal.set(Calendar.ERA, GregorianCalendar.BC);
cal.set(Calendar.YEAR, 1);
roundTrip(cal);
assertEquals("0000", tree.toString(cal));
//adding a "+" parses to the same; and a trailing 'Z' is fine too
assertEquals(cal, tree.parseCalendar("+0000Z"));
//2BC is "-0001"
cal.clear();
cal.set(Calendar.ERA, GregorianCalendar.BC);
cal.set(Calendar.YEAR, 2);
roundTrip(cal);
assertEquals("-0001", tree.toString(cal));
//1AD is "0001"
cal.clear();
cal.set(Calendar.YEAR, 1);
roundTrip(cal);
assertEquals("0001", tree.toString(cal));
//test random
cal.setTimeInMillis(random().nextLong());
roundTrip(cal);
}
//copies from DateRangePrefixTree
private static final int[] CAL_FIELDS = {
Calendar.YEAR, Calendar.MONTH, Calendar.DAY_OF_MONTH,
Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND};
private void roundTrip(Calendar calOrig) throws ParseException {
Calendar cal = (Calendar) calOrig.clone();
String lastString = null;
while (true) {
String calString = tree.toString(cal);
assert lastString == null || calString.length() < lastString.length();
//test parseCalendar
assertEquals(cal, tree.parseCalendar(calString));
//to Shape and back to Cal
Shape shape = tree.toShape(cal);
Calendar cal2 = tree.toCalendar(shape);
assertEquals(calString, tree.toString(cal2));
if (!calString.equals("*")) {//not world cell
//to Term and back to Cell
Cell cell = (Cell) shape;
BytesRef term = cell.getTokenBytesNoLeaf(null);
Cell cell2 = tree.readCell(BytesRef.deepCopyOf(term), null);
assertEquals(calString, cell, cell2);
Calendar cal3 = tree.toCalendar(cell2.getShape());
assertEquals(calString, tree.toString(cal3));
// setLeaf comparison
cell2.setLeaf();
BytesRef termLeaf = cell2.getTokenBytesWithLeaf(null);
assertTrue(term.compareTo(termLeaf) < 0);
assertEquals(termLeaf.length, term.length + 1);
assertEquals(0, termLeaf.bytes[termLeaf.offset + termLeaf.length - 1]);
assertTrue(cell.isPrefixOf(cell2));
}
//end of loop; decide if should loop again with lower precision
final int calPrecField = tree.getCalPrecisionField(cal);
if (calPrecField == -1)
break;
int fieldIdx = Arrays.binarySearch(CAL_FIELDS, calPrecField);
assert fieldIdx >= 0;
int prevPrecField = (fieldIdx == 0 ? -1 : CAL_FIELDS[--fieldIdx]);
try {
tree.clearFieldsAfter(cal, prevPrecField);
} catch (AssertionError e) {
if (e.getMessage().equals("Calendar underflow"))
return;
throw e;
}
lastString = calString;
}
}
public void testShapeRelations() throws ParseException {
Shape shapeA = tree.parseShape("[3122-01-23 TO 3122-11-27]");
Shape shapeB = tree.parseShape("[3122-08 TO 3122-11]");
assertEquals(SpatialRelation.INTERSECTS, shapeA.relate(shapeB));
shapeA = tree.parseShape("3122");
shapeB = tree.parseShape("[* TO 3122-10-31]");
assertEquals(SpatialRelation.INTERSECTS, shapeA.relate(shapeB));
shapeA = tree.parseShape("[3122-05-28 TO 3122-06-29]");
shapeB = tree.parseShape("[3122 TO 3122-04]");
assertEquals(SpatialRelation.DISJOINT, shapeA.relate(shapeB));
}
public void testShapeRangeOptimizer() throws ParseException {
assertEquals("[2014-08 TO 2014-09]", tree.parseShape("[2014-08-01 TO 2014-09-30]").toString());
assertEquals("2014", tree.parseShape("[2014-01-01 TO 2014-12-31]").toString());
assertEquals("2014", tree.parseShape("[2014-01 TO 2014]").toString());
assertEquals("[2014 TO 2014-04-06]", tree.parseShape("[2014-01 TO 2014-04-06]").toString());
assertEquals("*", tree.parseShape("[* TO *]").toString());
assertEquals("2014-08-01", tree.parseShape("[2014-08-01 TO 2014-08-01]").toString());
assertEquals("[2014 TO 2014-09-15]", tree.parseShape("[2014 TO 2014-09-15]").toString());
assertEquals("[* TO 2014-09-15]", tree.parseShape("[* TO 2014-09-15]").toString());
}
}