mirror of https://github.com/apache/lucene.git
LUCENE-5648: DateRangePrefixTree and NumberRangePrefixTreeStrategy
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1600555 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
19da2d5f1e
commit
bb267a143e
|
@ -21,6 +21,13 @@ New Features
|
|||
PushPostingsWriterBase for single-pass push of docs/positions to the
|
||||
postings format. (Mike McCandless)
|
||||
|
||||
* LUCENE-5648: Index and search date ranges, particularly multi-valued ones. It's
|
||||
implemented in the spatial module as DateRangePrefixTree used with
|
||||
NumberRangePrefixTreeStrategy. (David Smiley)
|
||||
|
||||
* LUCENE-4175: Index and search rectangles with spatial BBoxSpatialStrategy.
|
||||
Sort documents by relative overlap of query areas. (Ryan McKinley)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-4535: oal.util.FilterIterator is now an internal API.
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
package org.apache.lucene.spatial;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import com.spatial4j.core.shape.Point;
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
|
||||
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
|
||||
|
||||
import java.text.ParseException;
|
||||
|
||||
/** A PrefixTree based on Number/Date ranges. This isn't very "spatial" on the surface (to the user) but
|
||||
* it's implemented using spatial so that's why it's here extending a SpatialStrategy.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class NumberRangePrefixTreeStrategy extends RecursivePrefixTreeStrategy {
|
||||
|
||||
public NumberRangePrefixTreeStrategy(NumberRangePrefixTree prefixTree, String fieldName) {
|
||||
super(prefixTree, fieldName);
|
||||
setPruneLeafyBranches(false);
|
||||
setPrefixGridScanLevel(prefixTree.getMaxLevels()-2);//user might want to change, however
|
||||
setPointsOnly(false);
|
||||
setDistErrPct(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumberRangePrefixTree getGrid() {
|
||||
return (NumberRangePrefixTree) super.getGrid();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Field[] createIndexableFields(Shape shape) {
|
||||
//levels doesn't actually matter; NumberRange based Shapes have their own "level".
|
||||
TokenStream tokenStream = createTokenStream(shape, grid.getMaxLevels());
|
||||
Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE);
|
||||
return new Field[]{field};
|
||||
}
|
||||
|
||||
/** For a Date based tree, pass in a Calendar, with unspecified fields marked as cleared.
|
||||
* See {@link NumberRangePrefixTree#toShape(Object)}. */
|
||||
public Shape toShape(Object value) {
|
||||
return getGrid().toShape(value);
|
||||
}
|
||||
|
||||
/** See {@link NumberRangePrefixTree#toRangeShape(Shape, Shape)}. */
|
||||
public Shape toRangeShape(Shape min, Shape max) {
|
||||
return getGrid().toRangeShape(min, max);
|
||||
}
|
||||
|
||||
/** See {@link NumberRangePrefixTree#parseShape(String)}. */
|
||||
public Shape parseShape(String str) throws ParseException {
|
||||
return getGrid().parseShape(str);
|
||||
}
|
||||
|
||||
/** Unsupported. */
|
||||
@Override
|
||||
public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,428 @@
|
|||
package org.apache.lucene.spatial.prefix.tree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.Locale;
|
||||
import java.util.TimeZone;
|
||||
|
||||
/**
|
||||
* A PrefixTree for date ranges in which the levels of the tree occur at natural periods of time (e.g. years,
|
||||
* months, ...). You pass in {@link Calendar} objects with the desired fields set and the unspecified
|
||||
* fields unset, which conveys the precision. The implementation tries to be generic to the Calendar
|
||||
* abstraction, making some optimizations when a Gregorian is used, but no others have been tested.
|
||||
* <p/>
|
||||
* Warning: If you construct a Calendar and then get something from the object like a field (e.g. year) or
|
||||
* milliseconds, then every field is fully set by side-effect. So after setting the fields, pass it to this
|
||||
* API first.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DateRangePrefixTree extends NumberRangePrefixTree {
|
||||
|
||||
/*
|
||||
WARNING java.util.Calendar is tricky to work with:
|
||||
* If you "get" any field value, every fields because "set". This can introduce a Heisenbug effect,
|
||||
when in a debugger in some cases. Fortunately, Calendar.toString() doesn't apply.
|
||||
* Beware Calendar underflow of the underlying long. If you create a Calendar from LONG.MIN_VALUE, and clear
|
||||
a field, it will underflow and appear close to LONG.MAX_VALUE (BC to AD).
|
||||
|
||||
There are no doubt other reasons but those two were hard fought lessons here.
|
||||
|
||||
TODO Improvements:
|
||||
* Make max precision configurable (i.e. to SECOND).
|
||||
* Make min & max year span configurable. Use that to remove pointless top levels of the SPT.
|
||||
If year span is > 10k, then add 1k year level. If year span is > 10k of 1k levels, add 1M level.
|
||||
* NumberRangePrefixTree: override getTreeCellIterator for optimized case where the shape isn't a date span; use
|
||||
FilterCellIterator of the cell stack.
|
||||
|
||||
*/
|
||||
|
||||
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
|
||||
private static Calendar CAL_TMP;//template
|
||||
static {
|
||||
CAL_TMP = Calendar.getInstance(UTC, Locale.ROOT);
|
||||
CAL_TMP.clear();
|
||||
}
|
||||
|
||||
private static final Calendar MINCAL = (Calendar) CAL_TMP.clone();
|
||||
private static final Calendar MAXCAL = (Calendar) CAL_TMP.clone();
|
||||
static {
|
||||
MINCAL.setTimeInMillis(Long.MIN_VALUE);
|
||||
MAXCAL.setTimeInMillis(Long.MAX_VALUE);
|
||||
}
|
||||
//BC years are decreasing, remember. Yet ActualMaximum is the numerically high value, ActualMinimum is 1.
|
||||
private static final int BC_FIRSTYEAR = MINCAL.getActualMaximum(Calendar.YEAR);
|
||||
private static final int BC_LASTYEAR = MINCAL.getActualMinimum(Calendar.YEAR);//1
|
||||
private static final int BC_YEARS = BC_FIRSTYEAR - BC_LASTYEAR + 1;
|
||||
private static final int AD_FIRSTYEAR = MAXCAL.getActualMinimum(Calendar.YEAR);//1
|
||||
private static final int AD_LASTYEAR = MAXCAL.getActualMaximum(Calendar.YEAR);
|
||||
private static final int AD_YEAR_BASE = (((BC_YEARS-1) / 1000_000)+1) * 1000_000;
|
||||
static { assert BC_LASTYEAR == 1 && AD_FIRSTYEAR == 1; }
|
||||
|
||||
//how many million years are there?
|
||||
private static final int NUM_MYEARS = (AD_YEAR_BASE + AD_LASTYEAR) / 1000_000;
|
||||
|
||||
private static int calFieldLen(int field) {
|
||||
return CAL_TMP.getMaximum(field) - CAL_TMP.getMinimum(field) + 1;
|
||||
}
|
||||
|
||||
private static final int[] FIELD_BY_LEVEL = {
|
||||
-1/*unused*/, -1, -1, Calendar.YEAR, Calendar.MONTH, Calendar.DAY_OF_MONTH,
|
||||
Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND};
|
||||
private static final int yearLevel = 3;
|
||||
|
||||
public static final DateRangePrefixTree INSTANCE = new DateRangePrefixTree();
|
||||
|
||||
private final LevelledValue minLV, maxLV;
|
||||
private final LevelledValue gregorianChangeDateLV;
|
||||
|
||||
private DateRangePrefixTree() {
|
||||
super(new int[]{//sublevels by level
|
||||
NUM_MYEARS,
|
||||
1000,//1 thousand thousand-years in a million years
|
||||
1000,//1 thousand years in a thousand-year
|
||||
calFieldLen(Calendar.MONTH),
|
||||
calFieldLen(Calendar.DAY_OF_MONTH),
|
||||
calFieldLen(Calendar.HOUR_OF_DAY),
|
||||
calFieldLen(Calendar.MINUTE),
|
||||
calFieldLen(Calendar.SECOND),
|
||||
calFieldLen(Calendar.MILLISECOND),
|
||||
});
|
||||
maxLV = (LevelledValue) toShape((Calendar)MAXCAL.clone());
|
||||
minLV = (LevelledValue) toShape((Calendar)MINCAL.clone());
|
||||
if (MAXCAL instanceof GregorianCalendar) {
|
||||
//TODO this should be a configurable param by passing a Calendar surving as a template.
|
||||
GregorianCalendar gCal = (GregorianCalendar)MAXCAL;
|
||||
gregorianChangeDateLV = (LevelledValue) toShape(gCal.getGregorianChange());
|
||||
} else {
|
||||
gregorianChangeDateLV = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getNumSubCells(LevelledValue lv) {
|
||||
int cmp = comparePrefixLV(lv, maxLV);
|
||||
assert cmp <= 0;
|
||||
if (cmp == 0)//edge case (literally!)
|
||||
return maxLV.getValAtLevel(lv.getLevel()+1);
|
||||
|
||||
// if using GregorianCalendar and we're after the "Gregorian change date" then we'll compute
|
||||
// the sub-cells ourselves more efficiently without the need to construct a Calendar.
|
||||
cmp = gregorianChangeDateLV != null ? comparePrefixLV(lv, gregorianChangeDateLV) : -1;
|
||||
//TODO consider also doing fast-path if field is <= hours even if before greg change date
|
||||
if (cmp >= 0) {
|
||||
int result = fastSubCells(lv);
|
||||
assert result == slowSubCells(lv) : "fast/slow numSubCells inconsistency";
|
||||
return result;
|
||||
} else {
|
||||
return slowSubCells(lv);
|
||||
}
|
||||
}
|
||||
|
||||
private int fastSubCells(LevelledValue lv) {
|
||||
if (lv.getLevel() == yearLevel+1) {//month
|
||||
switch (lv.getValAtLevel(lv.getLevel())) {
|
||||
case Calendar.SEPTEMBER:
|
||||
case Calendar.APRIL:
|
||||
case Calendar.JUNE:
|
||||
case Calendar.NOVEMBER:
|
||||
return 30;
|
||||
case Calendar.FEBRUARY:
|
||||
//get the year (negative numbers for BC)
|
||||
int yearAdj = lv.getValAtLevel(1) * 1_000_000;
|
||||
yearAdj += lv.getValAtLevel(2) * 1000;
|
||||
yearAdj += lv.getValAtLevel(3);
|
||||
int year = yearAdj - AD_YEAR_BASE;
|
||||
if (year % 4 == 0 && !(year % 100 == 0 && year % 400 != 0) )//leap year
|
||||
return 29;
|
||||
else
|
||||
return 28;
|
||||
default:
|
||||
return 31;
|
||||
}
|
||||
} else {//typical:
|
||||
return super.getNumSubCells(lv);
|
||||
}
|
||||
}
|
||||
|
||||
private int slowSubCells(LevelledValue lv) {
|
||||
int field = FIELD_BY_LEVEL[lv.getLevel()+1];
|
||||
//short-circuit optimization (GregorianCalendar assumptions)
|
||||
if (field == -1 || field == Calendar.YEAR || field >= Calendar.HOUR_OF_DAY)//TODO make configurable
|
||||
return super.getNumSubCells(lv);
|
||||
Calendar cal = toCalendarLV(lv);//somewhat heavyweight op; ideally should be stored on LevelledValue somehow
|
||||
return cal.getActualMaximum(field) - cal.getActualMinimum(field) + 1;
|
||||
}
|
||||
|
||||
/** Calendar utility method:
|
||||
* Returns a new {@link Calendar} in UTC TimeZone, ROOT Locale, with all fields cleared. */
|
||||
public Calendar newCal() {
|
||||
return (Calendar) CAL_TMP.clone();
|
||||
}
|
||||
|
||||
/** Calendar utility method:
|
||||
* Gets the Calendar field code of the last field that is set prior to an unset field. It only
|
||||
* examines fields relevant to the prefix tree. If no fields are set, it returns -1. */
|
||||
public int getCalPrecisionField(Calendar cal) {
|
||||
int lastField = -1;
|
||||
for (int level = yearLevel; level < FIELD_BY_LEVEL.length; level++) {
|
||||
int field = FIELD_BY_LEVEL[level];
|
||||
if (!cal.isSet(field))
|
||||
break;
|
||||
lastField = field;
|
||||
}
|
||||
return lastField;
|
||||
}
|
||||
|
||||
/** Calendar utility method:
|
||||
* Calls {@link Calendar#clear(int)} for every field after {@code field}. Beware of Calendar underflow. */
|
||||
public void clearFieldsAfter(Calendar cal, int field) {
|
||||
if (field == -1) {
|
||||
cal.clear();
|
||||
return;
|
||||
}
|
||||
int assertEra = -1;
|
||||
assert (assertEra = (((Calendar)cal.clone()).get(Calendar.ERA))) >= 0;//a trick to only get this if assert enabled
|
||||
for (int f = field+1; f < Calendar.FIELD_COUNT; f++) {
|
||||
cal.clear(f);
|
||||
}
|
||||
assert ((Calendar)cal.clone()).get(Calendar.ERA) == assertEra : "Calendar underflow";
|
||||
}
|
||||
|
||||
/** Converts {@code value} from a {@link Calendar} or {@link Date} to a {@link Shape}. Other arguments
|
||||
* result in a {@link java.lang.IllegalArgumentException}.
|
||||
*/
|
||||
@Override
|
||||
public Shape toShape(Object value) {
|
||||
if (value instanceof Calendar) {
|
||||
return toShape((Calendar) value);
|
||||
} else if (value instanceof Date) {
|
||||
Calendar cal = newCal();
|
||||
cal.setTime((Date)value);
|
||||
return toShape(cal);
|
||||
}
|
||||
throw new IllegalArgumentException("Expecting Calendar or Date but got: "+value.getClass());
|
||||
}
|
||||
|
||||
/** Converts the Calendar into a Shape.
|
||||
* The isSet() state of the Calendar is re-instated when done. */
|
||||
public Shape toShape(Calendar cal) {
|
||||
// Convert a Calendar into a stack of cell numbers
|
||||
final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields
|
||||
try {
|
||||
int[] valStack = new int[maxLevels];//starts at level 1, not 0
|
||||
int len = 0;
|
||||
if (calPrecField >= Calendar.YEAR) {//year or better precision
|
||||
int year = cal.get(Calendar.YEAR);
|
||||
int yearAdj = cal.get(Calendar.ERA) == 0 ? AD_YEAR_BASE - (year - 1) : AD_YEAR_BASE + year;
|
||||
|
||||
valStack[len++] = yearAdj / 1000_000;
|
||||
yearAdj -= valStack[len-1] * 1000_000;
|
||||
valStack[len++] = yearAdj / 1000;
|
||||
yearAdj -= valStack[len-1] * 1000;
|
||||
valStack[len++] = yearAdj;
|
||||
for (int level = yearLevel+1; level < FIELD_BY_LEVEL.length; level++) {
|
||||
int field = FIELD_BY_LEVEL[level];
|
||||
if (field > calPrecField)
|
||||
break;
|
||||
valStack[len++] = cal.get(field) - cal.getActualMinimum(field);
|
||||
}
|
||||
}
|
||||
|
||||
return toShape(valStack, len);
|
||||
} finally {
|
||||
clearFieldsAfter(cal, calPrecField);//restore precision state modified by get()
|
||||
}
|
||||
}
|
||||
|
||||
public Calendar toCalendar(Shape shape) {
|
||||
if (shape instanceof LevelledValue)
|
||||
return toCalendarLV((LevelledValue) shape);
|
||||
throw new IllegalArgumentException("Can't be converted to Calendar: "+shape);
|
||||
}
|
||||
|
||||
private Calendar toCalendarLV(LevelledValue lv) {
|
||||
if (lv.getLevel() == 0)
|
||||
return newCal();
|
||||
if (comparePrefixLV(lv, minLV) <= 0) {//shouldn't typically happen; sometimes in a debugger
|
||||
return (Calendar) MINCAL.clone();//full precision; truncation would cause underflow
|
||||
}
|
||||
assert comparePrefixLV(lv, maxLV) <= 0;
|
||||
Calendar cal = newCal();
|
||||
|
||||
int yearAdj = lv.getValAtLevel(1) * 1_000_000;
|
||||
if (lv.getLevel() > 1) {
|
||||
yearAdj += lv.getValAtLevel(2) * 1000;
|
||||
if (lv.getLevel() > 2) {
|
||||
yearAdj += lv.getValAtLevel(3);
|
||||
}
|
||||
}
|
||||
if (yearAdj > AD_YEAR_BASE) {
|
||||
cal.set(Calendar.ERA, 1);
|
||||
cal.set(Calendar.YEAR, yearAdj - AD_YEAR_BASE);//setting the year resets the era
|
||||
} else {
|
||||
cal.set(Calendar.ERA, 0);//we assert this "sticks" at the end
|
||||
cal.set(Calendar.YEAR, (AD_YEAR_BASE - yearAdj) + 1);
|
||||
}
|
||||
for (int level = yearLevel+1; level <= lv.getLevel(); level++) {
|
||||
int field = FIELD_BY_LEVEL[level];
|
||||
cal.set(field, lv.getValAtLevel(level) + cal.getActualMinimum(field));
|
||||
}
|
||||
assert yearAdj > AD_YEAR_BASE || ((Calendar)cal.clone()).get(Calendar.ERA) == 0 : "ERA / YEAR underflow";
|
||||
return cal;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String toStringLV(LevelledValue lv) {
|
||||
return toString(toCalendarLV(lv));
|
||||
}
|
||||
|
||||
/** Calendar utility method:
|
||||
* Converts to calendar to ISO-8601, to include proper BC handling (1BC is "0000", 2BC is "-0001", etc.);
|
||||
* and WITHOUT a trailing 'Z'.
|
||||
* A fully cleared calendar will yield the string "*".
|
||||
* The isSet() state of the Calendar is re-instated when done. */
|
||||
@SuppressWarnings("fallthrough")
|
||||
public String toString(Calendar cal) {
|
||||
final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields
|
||||
if (calPrecField == -1)
|
||||
return "*";
|
||||
try {
|
||||
//TODO not fully optimized because I only expect this to be used in tests / debugging.
|
||||
// Borrow code from Solr DateUtil, and have it reference this back?
|
||||
String pattern = "yyyy-MM-dd'T'HH:mm:ss.SSS";
|
||||
int ptnLen = 0;
|
||||
switch (calPrecField) {//switch fall-through is deliberate
|
||||
case Calendar.MILLISECOND: ptnLen += 4;
|
||||
case Calendar.SECOND: ptnLen += 3;
|
||||
case Calendar.MINUTE: ptnLen += 3;
|
||||
case Calendar.HOUR_OF_DAY: ptnLen += 5;
|
||||
case Calendar.DAY_OF_MONTH: ptnLen += 3;
|
||||
case Calendar.MONTH: ptnLen += 3;
|
||||
case Calendar.YEAR: ptnLen += 4;
|
||||
break;
|
||||
default: throw new IllegalStateException(""+calPrecField);
|
||||
}
|
||||
pattern = pattern.substring(0, ptnLen);
|
||||
SimpleDateFormat format = new SimpleDateFormat(pattern, Locale.ROOT);
|
||||
format.setTimeZone(cal.getTimeZone());
|
||||
if (cal.get(Calendar.ERA) == 0) {//BC
|
||||
//SDF doesn't do this properly according to ISO-8601
|
||||
// Example: 1BC == "0000" (actually 0 AD), 2BC == "-0001", 3BC == "-0002", ...
|
||||
final int yearOrig = cal.get(Calendar.YEAR);
|
||||
cal.set(Calendar.YEAR, yearOrig-1);
|
||||
String str;
|
||||
try {
|
||||
str = format.format(cal.getTime());
|
||||
} finally {
|
||||
//reset to what it was
|
||||
cal.set(Calendar.ERA, 0);//necessary!
|
||||
cal.set(Calendar.YEAR, yearOrig);
|
||||
}
|
||||
if (yearOrig > 1)
|
||||
return "-" + str;
|
||||
else
|
||||
return "0000" + str.substring(4);
|
||||
}
|
||||
return format.format(cal.getTime());
|
||||
} finally {
|
||||
clearFieldsAfter(cal, calPrecField);//restore precision state modified by get()
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected LevelledValue parseShapeLV(String str) throws ParseException {
|
||||
return (LevelledValue) toShape(parseCalendar(str));
|
||||
}
|
||||
|
||||
/** Calendar utility method:
|
||||
* The reverse of {@link #toString(java.util.Calendar)}. It will only set the fields found, leaving
|
||||
* the remainder in an un-set state. A leading '-' or '+' is optional (positive assumed), and a
|
||||
* trailing 'Z' is also optional.
|
||||
* @param str not null and not empty
|
||||
* @return not null
|
||||
*/
|
||||
public Calendar parseCalendar(String str) throws ParseException {
|
||||
// example: +2014-10-23T21:22:33.159Z
|
||||
if (str == null || str.isEmpty())
|
||||
throw new IllegalArgumentException("str is null or blank");
|
||||
Calendar cal = newCal();
|
||||
if (str.equals("*"))
|
||||
return cal;
|
||||
int offset = 0;//a pointer
|
||||
try {
|
||||
//year & era:
|
||||
int lastOffset = str.charAt(str.length()-1) == 'Z' ? str.length() - 1 : str.length();
|
||||
int hyphenIdx = str.indexOf('-', 1);//look past possible leading hyphen
|
||||
if (hyphenIdx < 0)
|
||||
hyphenIdx = lastOffset;
|
||||
int year = Integer.parseInt(str.substring(offset, hyphenIdx));
|
||||
cal.set(Calendar.ERA, year <= 0 ? 0 : 1);
|
||||
cal.set(Calendar.YEAR, year <= 0 ? -1*year + 1 : year);
|
||||
offset = hyphenIdx + 1;
|
||||
if (lastOffset < offset)
|
||||
return cal;
|
||||
|
||||
//NOTE: We aren't validating separator chars, and we unintentionally accept leading +/-.
|
||||
// The str.substring()'s hopefully get optimized to be stack-allocated.
|
||||
|
||||
//month:
|
||||
cal.set(Calendar.MONTH, Integer.parseInt(str.substring(offset, offset+2)) - 1);//starts at 0
|
||||
offset += 3;
|
||||
if (lastOffset < offset)
|
||||
return cal;
|
||||
//day:
|
||||
cal.set(Calendar.DAY_OF_MONTH, Integer.parseInt(str.substring(offset, offset+2)));
|
||||
offset += 3;
|
||||
if (lastOffset < offset)
|
||||
return cal;
|
||||
//hour:
|
||||
cal.set(Calendar.HOUR_OF_DAY, Integer.parseInt(str.substring(offset, offset+2)));
|
||||
offset += 3;
|
||||
if (lastOffset < offset)
|
||||
return cal;
|
||||
//minute:
|
||||
cal.set(Calendar.MINUTE, Integer.parseInt(str.substring(offset, offset+2)));
|
||||
offset += 3;
|
||||
if (lastOffset < offset)
|
||||
return cal;
|
||||
//second:
|
||||
cal.set(Calendar.SECOND, Integer.parseInt(str.substring(offset, offset+2)));
|
||||
offset += 3;
|
||||
if (lastOffset < offset)
|
||||
return cal;
|
||||
//ms:
|
||||
cal.set(Calendar.MILLISECOND, Integer.parseInt(str.substring(offset, offset+3)));
|
||||
offset += 3;//last one, move to next char
|
||||
if (lastOffset == offset)
|
||||
return cal;
|
||||
} catch (Exception e) {
|
||||
ParseException pe = new ParseException("Improperly formatted date: "+str, offset);
|
||||
pe.initCause(e);
|
||||
throw pe;
|
||||
}
|
||||
throw new ParseException("Improperly formatted date: "+str, offset);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,811 @@
|
|||
package org.apache.lucene.spatial.prefix.tree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import com.spatial4j.core.context.SpatialContext;
|
||||
import com.spatial4j.core.context.SpatialContextFactory;
|
||||
import com.spatial4j.core.shape.Point;
|
||||
import com.spatial4j.core.shape.Rectangle;
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
import com.spatial4j.core.shape.SpatialRelation;
|
||||
import com.spatial4j.core.shape.impl.RectangleImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
import java.text.ParseException;
|
||||
|
||||
/**
|
||||
* A special SpatialPrefixTree for single-dimensional number ranges of integral values. It's based
|
||||
* on a stack of integers, and thus it's not limited to a long.
|
||||
* @see <a href="https://issues.apache.org/jira/browse/LUCENE-5648">LUCENE-5648</a>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class NumberRangePrefixTree extends SpatialPrefixTree {
|
||||
|
||||
//
|
||||
// Dummy SpatialContext
|
||||
//
|
||||
|
||||
private static final SpatialContext DUMMY_CTX;
|
||||
static {
|
||||
SpatialContextFactory factory = new SpatialContextFactory();
|
||||
factory.geo = false;
|
||||
factory.worldBounds = new RectangleImpl(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0L, 0L, null);
|
||||
DUMMY_CTX = factory.newSpatialContext();
|
||||
}
|
||||
|
||||
//
|
||||
// LevelledValue
|
||||
//
|
||||
|
||||
/** A value implemented as a stack of numbers. Spatially speaking, it's
|
||||
* analogous to a Point but 1D yet has some precision width.
|
||||
* @lucene.internal */
|
||||
protected static interface LevelledValue extends Shape {
|
||||
int getLevel();//0 means the world (universe).
|
||||
int getValAtLevel(int level);//level >= 0 && <= getLevel()
|
||||
LevelledValue getLVAtLevel(int level);
|
||||
}
|
||||
|
||||
/** Compares a to b, returning less than 0, 0, or greater than 0, if a is less than, equal to, or
|
||||
* greater than b, respectively. Only min(a.levels,b.levels) are compared.
|
||||
* @lucene.internal */
|
||||
protected static int comparePrefixLV(LevelledValue a, LevelledValue b) {
|
||||
int minLevel = Math.min(a.getLevel(), b.getLevel());
|
||||
for (int level = 1; level <= minLevel; level++) {
|
||||
int diff = a.getValAtLevel(level) - b.getValAtLevel(level);
|
||||
if (diff != 0)
|
||||
return diff;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
protected String toStringLV(LevelledValue lv) {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
buf.append('[');
|
||||
for (int level = 1; level <= lv.getLevel(); level++) {
|
||||
buf.append(lv.getValAtLevel(level)).append(',');
|
||||
}
|
||||
buf.setLength(buf.length()-1);//chop off ','
|
||||
buf.append(']');
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
//
|
||||
// NRShape
|
||||
//
|
||||
|
||||
/** Number Range Shape; based on a pair of {@link LevelledValue}.
|
||||
* Spatially speaking, it's analogous to a Rectangle but 1D.
|
||||
* @lucene.internal */
|
||||
protected class NRShape implements Shape {
|
||||
|
||||
private final LevelledValue minLV, maxLV;
|
||||
|
||||
/** Don't call directly; see {@link #toRangeShape(com.spatial4j.core.shape.Shape, com.spatial4j.core.shape.Shape)}. */
|
||||
private NRShape(LevelledValue minLV, LevelledValue maxLV) {
|
||||
this.minLV = minLV;
|
||||
this.maxLV = maxLV;
|
||||
}
|
||||
|
||||
public LevelledValue getMinLV() { return minLV; }
|
||||
|
||||
public LevelledValue getMaxLV() { return maxLV; }
|
||||
|
||||
@Override
|
||||
public SpatialRelation relate(Shape shape) {
|
||||
// if (shape instanceof LevelledValue)
|
||||
// return relate((LevelledValue)shape);
|
||||
if (shape instanceof NRShape)
|
||||
return relate((NRShape) shape);
|
||||
return shape.relate(this).transpose();//probably a LevelledValue
|
||||
}
|
||||
|
||||
public SpatialRelation relate(NRShape ext) {
|
||||
//This logic somewhat mirrors RectangleImpl.relate_range()
|
||||
int extMin_intMax = comparePrefixLV(ext.getMinLV(), getMaxLV());
|
||||
if (extMin_intMax > 0)
|
||||
return SpatialRelation.DISJOINT;
|
||||
int extMax_intMin = comparePrefixLV(ext.getMaxLV(), getMinLV());
|
||||
if (extMax_intMin < 0)
|
||||
return SpatialRelation.DISJOINT;
|
||||
int extMin_intMin = comparePrefixLV(ext.getMinLV(), getMinLV());
|
||||
int extMax_intMax = comparePrefixLV(ext.getMaxLV(), getMaxLV());
|
||||
if ((extMin_intMin > 0 || extMin_intMin == 0 && ext.getMinLV().getLevel() >= getMinLV().getLevel())
|
||||
&& (extMax_intMax < 0 || extMax_intMax == 0 && ext.getMaxLV().getLevel() >= getMaxLV().getLevel()))
|
||||
return SpatialRelation.CONTAINS;
|
||||
if ((extMin_intMin < 0 || extMin_intMin == 0 && ext.getMinLV().getLevel() <= getMinLV().getLevel())
|
||||
&& (extMax_intMax > 0 || extMax_intMax == 0 && ext.getMaxLV().getLevel() <= getMaxLV().getLevel()))
|
||||
return SpatialRelation.WITHIN;
|
||||
return SpatialRelation.INTERSECTS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Rectangle getBoundingBox() { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public boolean hasArea() { return true; }
|
||||
|
||||
@Override
|
||||
public double getArea(SpatialContext spatialContext) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public Point getCenter() { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public Shape getBuffered(double v, SpatialContext spatialContext) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() { return false; }
|
||||
|
||||
@Override
|
||||
public String toString() { return "[" + toStringLV(minLV) + " TO " + toStringLV(maxLV) + "]"; }
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
NRShape nrShape = (NRShape) o;
|
||||
|
||||
if (!maxLV.equals(nrShape.maxLV)) return false;
|
||||
if (!minLV.equals(nrShape.minLV)) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = minLV.hashCode();
|
||||
result = 31 * result + maxLV.hashCode();
|
||||
return result;
|
||||
}
|
||||
}// class NRShapeImpl
|
||||
|
||||
/** Converts the value to a shape (usually not a range). If it's a JDK object (e.g. Number, Calendar)
|
||||
* that could be parsed from a String, this class won't do it; you must parse it. */
|
||||
public abstract Shape toShape(Object value);
|
||||
|
||||
/** Detects a range pattern and parses it, otherwise it's parsed as one shape via
|
||||
* {@link #parseShapeLV(String)}. The range pattern looks like this BNF:
|
||||
* <pre>
|
||||
* '[' + parseShapeLV + ' TO ' + parseShapeLV + ']'
|
||||
* </pre>
|
||||
* It's the same thing as the toString() of the range shape, notwithstanding range optimization.
|
||||
* @param str not null or empty
|
||||
* @return not null
|
||||
* @throws java.text.ParseException If there is a problem
|
||||
*/
|
||||
public Shape parseShape(String str) throws ParseException {
|
||||
if (str == null || str.isEmpty())
|
||||
throw new IllegalArgumentException("str is null or blank");
|
||||
if (str.charAt(0) == '[') {
|
||||
if (str.charAt(str.length()-1) != ']')
|
||||
throw new ParseException("If starts with [ must end with ]; got "+str, str.length()-1);
|
||||
int middle = str.indexOf(" TO ");
|
||||
if (middle < 0)
|
||||
throw new ParseException("If starts with [ must contain ' TO '; got "+str, -1);
|
||||
String leftStr = str.substring(1, middle);
|
||||
String rightStr = str.substring(middle + " TO ".length(), str.length()-1);
|
||||
return toRangeShape(parseShapeLV(leftStr), parseShapeLV(rightStr));
|
||||
} else if (str.charAt(0) == '{') {
|
||||
throw new ParseException("Exclusive ranges not supported; got "+str, 0);
|
||||
} else {
|
||||
return parseShapeLV(str);
|
||||
}
|
||||
}
|
||||
|
||||
/** Parse a String to a LevelledValue. "*" should be the full-range. */
|
||||
protected abstract LevelledValue parseShapeLV(String str) throws ParseException;
|
||||
|
||||
/** Returns a shape that represents the continuous range between {@code start} and {@code end}. It will
|
||||
* be optimized.
|
||||
* @throws IllegalArgumentException if the arguments are in the wrong order, or if either contains the other.
|
||||
*/
|
||||
public Shape toRangeShape(Shape start, Shape end) {
|
||||
if (!(start instanceof LevelledValue && end instanceof LevelledValue))
|
||||
throw new IllegalArgumentException("Must pass "+LevelledValue.class+" but got "+start.getClass());
|
||||
LevelledValue minLV = (LevelledValue) start;
|
||||
LevelledValue maxLV = (LevelledValue) end;
|
||||
if (minLV.equals(maxLV))
|
||||
return minLV;
|
||||
//Optimize precision of the range, e.g. April 1st to April 30th is April.
|
||||
minLV = minLV.getLVAtLevel(truncateStartVals(minLV, 0));
|
||||
maxLV = maxLV.getLVAtLevel(truncateEndVals(maxLV, 0));
|
||||
int cmp = comparePrefixLV(minLV, maxLV);
|
||||
if (cmp > 0) {
|
||||
throw new IllegalArgumentException("Wrong order: "+start+" TO "+end);
|
||||
}
|
||||
if (cmp == 0 && minLV.getLevel() == maxLV.getLevel())
|
||||
return minLV;
|
||||
return new NRShape(minLV, maxLV);
|
||||
}
|
||||
|
||||
/** From lv.getLevel on up, it returns the first Level seen with val != 0. It doesn't check past endLevel. */
|
||||
private int truncateStartVals(LevelledValue lv, int endLevel) {
|
||||
for (int level = lv.getLevel(); level > endLevel; level--) {
|
||||
if (lv.getValAtLevel(level) != 0)
|
||||
return level;
|
||||
}
|
||||
return endLevel;
|
||||
}
|
||||
|
||||
private int truncateEndVals(LevelledValue lv, int endLevel) {
|
||||
for (int level = lv.getLevel(); level > endLevel; level--) {
|
||||
int max = getNumSubCells(lv.getLVAtLevel(level-1)) - 1;
|
||||
if (lv.getValAtLevel(level) != max)
|
||||
return level;
|
||||
}
|
||||
return endLevel;
|
||||
}
|
||||
|
||||
//
|
||||
// NumberRangePrefixTree
|
||||
//
|
||||
|
||||
protected final int[] maxSubCellsByLevel;
|
||||
protected final int[] termLenByLevel;
|
||||
protected final int[] levelByTermLen;
|
||||
protected final int maxTermLen; // how long could cell.getToken... (that is a leaf) possibly be?
|
||||
|
||||
protected NumberRangePrefixTree(int[] maxSubCellsByLevel) {
|
||||
super(DUMMY_CTX, maxSubCellsByLevel.length);
|
||||
this.maxSubCellsByLevel = maxSubCellsByLevel;
|
||||
|
||||
// Fill termLenByLevel
|
||||
this.termLenByLevel = new int[maxLevels + 1];
|
||||
termLenByLevel[0] = 0;
|
||||
final int MAX_STATES = 1 << 15;//1 bit less than 2 bytes
|
||||
for (int level = 1; level <= maxLevels; level++) {
|
||||
final int states = maxSubCellsByLevel[level - 1];
|
||||
if (states >= MAX_STATES || states <= 1) {
|
||||
throw new IllegalArgumentException("Max states is "+MAX_STATES+", given "+states+" at level "+level);
|
||||
}
|
||||
boolean twoBytes = states >= 256;
|
||||
termLenByLevel[level] = termLenByLevel[level-1] + (twoBytes ? 2 : 1);
|
||||
}
|
||||
maxTermLen = termLenByLevel[maxLevels] + 1;// + 1 for leaf byte
|
||||
|
||||
// Fill levelByTermLen
|
||||
levelByTermLen = new int[maxTermLen];
|
||||
levelByTermLen[0] = 0;
|
||||
for (int level = 1; level < termLenByLevel.length; level++) {
|
||||
int termLen = termLenByLevel[level];
|
||||
int prevTermLen = termLenByLevel[level-1];
|
||||
if (termLen - prevTermLen == 2) {//2 byte delta
|
||||
//if the term doesn't completely cover this cell then it must be a leaf of the prior.
|
||||
levelByTermLen[termLen-1] = -1;//won't be used; otherwise erroneous
|
||||
levelByTermLen[termLen] = level;
|
||||
} else {//1 byte delta
|
||||
assert termLen - prevTermLen == 1;
|
||||
levelByTermLen[termLen] = level;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getLevelForDistance(double dist) {
|
||||
return maxLevels;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getDistanceForLevel(int level) {
|
||||
throw new UnsupportedOperationException("Not applicable.");
|
||||
}
|
||||
|
||||
protected Shape toShape(int[] valStack, int len) {
|
||||
final NRCell[] cellStack = newCellStack(len);
|
||||
for (int i = 0; i < len; i++) {
|
||||
cellStack[i+1].resetCellWithCellNum(valStack[i]);
|
||||
}
|
||||
return cellStack[len];
|
||||
}
|
||||
|
||||
@Override
|
||||
public Cell getWorldCell() {
|
||||
return newCellStack(maxLevels)[0];
|
||||
}
|
||||
|
||||
protected NRCell[] newCellStack(int levels) {
|
||||
final NRCell[] cellsByLevel = new NRCell[levels + 1];
|
||||
final BytesRef term = new BytesRef(maxTermLen);
|
||||
for (int level = 0; level <= levels; level++) {
|
||||
cellsByLevel[level] = new NRCell(cellsByLevel,term,level);
|
||||
}
|
||||
return cellsByLevel;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Cell readCell(BytesRef term, Cell scratch) {
|
||||
if (scratch == null)
|
||||
scratch = getWorldCell();
|
||||
|
||||
//We decode level, leaf, and populate bytes.
|
||||
|
||||
//reverse lookup term length to the level and hence the cell
|
||||
NRCell[] cellsByLevel = ((NRCell) scratch).cellsByLevel;
|
||||
boolean isLeaf = term.bytes[term.offset + term.length - 1] == 0;
|
||||
int lenNoLeaf = isLeaf ? term.length - 1 : term.length;
|
||||
|
||||
NRCell result = cellsByLevel[levelByTermLen[lenNoLeaf]];
|
||||
if (cellsByLevel[0].termBuf == null)
|
||||
cellsByLevel[0].termBuf = result.term.bytes;//a kluge; see cell.ensureOwnTermBytes()
|
||||
result.term.bytes = term.bytes;
|
||||
result.term.offset = term.offset;
|
||||
result.term.length = lenNoLeaf;//technically this isn't used but may help debugging
|
||||
result.reset();
|
||||
if (isLeaf)
|
||||
result.setLeaf();
|
||||
|
||||
result.cellNumber = -1;//lazy decode flag
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
protected int getNumSubCells(LevelledValue lv) {
|
||||
return maxSubCellsByLevel[lv.getLevel()];
|
||||
}
|
||||
|
||||
//
|
||||
// NRCell
|
||||
//
|
||||
|
||||
/** Most of the PrefixTree implementation is in this one class, which is both
|
||||
* the Cell, the CellIterator, and the Shape to reduce object allocation. It's implemented as a re-used array/stack
|
||||
* of Cells at adjacent levels, that all have a reference back to the cell array to traverse. They also share a common
|
||||
* BytesRef for the term.
|
||||
* @lucene.internal */
|
||||
protected class NRCell extends CellIterator implements Cell, LevelledValue {
|
||||
|
||||
//Shared: (TODO put this in a new class)
|
||||
final NRCell[] cellsByLevel;
|
||||
final BytesRef term;//AKA the token
|
||||
byte[] termBuf;// see ensureOwnTermBytes(), only for cell0
|
||||
|
||||
//Cell state...
|
||||
final int cellLevel; // assert levelStack[cellLevel] == this
|
||||
int cellNumber; //relative to parent cell. It's unused for level 0. Starts at 0.
|
||||
|
||||
SpatialRelation cellShapeRel;
|
||||
boolean cellIsLeaf;
|
||||
|
||||
//CellIterator state is defined further below
|
||||
|
||||
NRCell(NRCell[] cellsByLevel, BytesRef term, int cellLevel) {
|
||||
this.cellsByLevel = cellsByLevel;
|
||||
this.term = term;
|
||||
this.cellLevel = cellLevel;
|
||||
this.cellNumber = cellLevel == 0 ? 0 : -1;
|
||||
this.cellIsLeaf = false;
|
||||
assert cellsByLevel[cellLevel] == null;
|
||||
}
|
||||
|
||||
/** Ensure we own term.bytes so that it's safe to modify. We detect via a kluge in which cellsByLevel[0].termBuf
|
||||
* is non-null, which is a pre-allocated for use to replace term.bytes. */
|
||||
void ensureOwnTermBytes() {
|
||||
NRCell cell0 = cellsByLevel[0];
|
||||
if (cell0.termBuf == null)
|
||||
return;//we already own the bytes
|
||||
System.arraycopy(term.bytes, term.offset, cell0.termBuf, 0, term.length);
|
||||
term.bytes = cell0.termBuf;
|
||||
term.offset = 0;
|
||||
cell0.termBuf = null;
|
||||
}
|
||||
|
||||
private void reset() {
|
||||
this.cellIsLeaf = false;
|
||||
this.cellShapeRel = null;
|
||||
}
|
||||
|
||||
private void resetCellWithCellNum(int cellNumber) {
|
||||
reset();
|
||||
|
||||
//update bytes
|
||||
// note: see lazyInitCellNumsFromBytes() for the reverse
|
||||
if (cellNumber >= 0) {//valid
|
||||
ensureOwnTermBytes();
|
||||
int termLen = termLenByLevel[getLevel()];
|
||||
boolean twoBytes = (termLen - termLenByLevel[getLevel()-1]) > 1;
|
||||
if (twoBytes) {
|
||||
//right 7 bits, plus 1 (may overflow to 8th bit which is okay)
|
||||
term.bytes[termLen-2] = (byte) (cellNumber >> 7);
|
||||
term.bytes[termLen-1] = (byte) ((cellNumber & 0x7F) + 1);
|
||||
} else {
|
||||
term.bytes[termLen-1] = (byte) (cellNumber+1);
|
||||
}
|
||||
assert term.bytes[termLen-1] != 0;
|
||||
term.length = termLen;
|
||||
}
|
||||
this.cellNumber = cellNumber;
|
||||
}
|
||||
|
||||
private void ensureDecoded() {
|
||||
if (cellNumber >= 0)
|
||||
return;
|
||||
//Decode cell numbers from bytes. This is the inverse of resetCellWithCellNum().
|
||||
for (int level = 1; level <= getLevel(); level++) {
|
||||
NRCell cell = cellsByLevel[level];
|
||||
int termLen = termLenByLevel[level];
|
||||
boolean twoBytes = (termLen - termLenByLevel[level-1]) > 1;
|
||||
if (twoBytes) {
|
||||
int byteH = (term.bytes[term.offset + termLen - 2] & 0xFF);
|
||||
int byteL = (term.bytes[term.offset + termLen - 1] & 0xFF);
|
||||
assert byteL - 1 < (1<<7);
|
||||
cell.cellNumber = (byteH << 7) + (byteL-1);
|
||||
assert cell.cellNumber < 1<<15;
|
||||
} else {
|
||||
cell.cellNumber = (term.bytes[term.offset + termLen - 1] & 0xFF) - 1;
|
||||
assert cell.cellNumber < 255;
|
||||
}
|
||||
assert cell.cellNumber >= 0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override // for Cell & for LevelledValue
|
||||
public int getLevel() {
|
||||
return cellLevel;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpatialRelation getShapeRel() {
|
||||
return cellShapeRel;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setShapeRel(SpatialRelation rel) {
|
||||
cellShapeRel = rel;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isLeaf() {
|
||||
return cellIsLeaf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setLeaf() {
|
||||
cellIsLeaf = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Shape getShape() {
|
||||
ensureDecoded(); return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getTokenBytesNoLeaf(BytesRef result) {
|
||||
if (result == null)
|
||||
result = new BytesRef();
|
||||
result.bytes = term.bytes;
|
||||
result.offset = term.offset;
|
||||
result.length = termLenByLevel[cellLevel];
|
||||
assert result.length <= term.length;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getTokenBytesWithLeaf(BytesRef result) {
|
||||
ensureOwnTermBytes();//normally shouldn't do anything
|
||||
result = getTokenBytesNoLeaf(result);
|
||||
if (isLeaf()) {
|
||||
result.bytes[result.length++] = 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPrefixOf(Cell c) {
|
||||
NRCell otherCell = (NRCell) c;
|
||||
assert term != otherCell.term;
|
||||
//trick to re-use bytesref; provided that we re-instate it
|
||||
int myLastLen = term.length;
|
||||
term.length = termLenByLevel[getLevel()];
|
||||
int otherLastLen = otherCell.term.length;
|
||||
otherCell.term.length = termLenByLevel[otherCell.getLevel()];
|
||||
boolean answer = StringHelper.startsWith(otherCell.term, term);
|
||||
term.length = myLastLen;
|
||||
otherCell.term.length = otherLastLen;
|
||||
return answer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareToNoLeaf(Cell fromCell) {
|
||||
final NRCell nrCell = (NRCell) fromCell;
|
||||
assert term != nrCell.term;
|
||||
//trick to re-use bytesref; provided that we re-instate it
|
||||
int myLastLen = term.length;
|
||||
int otherLastLen = nrCell.term.length;
|
||||
term.length = termLenByLevel[getLevel()];
|
||||
nrCell.term.length = termLenByLevel[nrCell.getLevel()];
|
||||
int answer = term.compareTo(nrCell.term);
|
||||
term.length = myLastLen;
|
||||
nrCell.term.length = otherLastLen;
|
||||
return answer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CellIterator getNextLevelCells(Shape shapeFilter) {
|
||||
ensureDecoded();
|
||||
NRCell subCell = cellsByLevel[cellLevel + 1];
|
||||
subCell.initIter(shapeFilter);
|
||||
return subCell;
|
||||
}
|
||||
|
||||
//----------- CellIterator
|
||||
|
||||
Shape iterFilter;//LevelledValue or NRShape
|
||||
boolean iterFirstIsIntersects;
|
||||
boolean iterLastIsIntersects;
|
||||
int iterFirstCellNumber;
|
||||
int iterLastCellNumber;
|
||||
|
||||
private void initIter(Shape filter) {
|
||||
cellNumber = -1;
|
||||
if (filter instanceof LevelledValue && ((LevelledValue)filter).getLevel() == 0)
|
||||
filter = null;//world means everything -- no filter
|
||||
iterFilter = filter;
|
||||
|
||||
NRCell parent = getLVAtLevel(getLevel()-1);
|
||||
|
||||
// Initialize iter* members.
|
||||
|
||||
//no filter means all subcells
|
||||
if (filter == null) {
|
||||
iterFirstCellNumber = 0;
|
||||
iterFirstIsIntersects = false;
|
||||
iterLastCellNumber = getNumSubCells(parent) - 1;
|
||||
iterLastIsIntersects = false;
|
||||
return;
|
||||
}
|
||||
|
||||
final LevelledValue minLV;
|
||||
final LevelledValue maxLV;
|
||||
if (filter instanceof NRShape) {
|
||||
NRShape nrShape = (NRShape) iterFilter;
|
||||
minLV = nrShape.getMinLV();
|
||||
maxLV = nrShape.getMaxLV();
|
||||
} else {
|
||||
minLV = (LevelledValue)iterFilter;
|
||||
maxLV = minLV;
|
||||
}
|
||||
|
||||
//fast path check when using same filter
|
||||
if (iterFilter == parent.iterFilter) {
|
||||
if (parent.iterFirstIsIntersects && parent.cellNumber == parent.iterFirstCellNumber
|
||||
&& minLV.getLevel() >= getLevel()) {
|
||||
iterFirstCellNumber = minLV.getValAtLevel(getLevel());
|
||||
iterFirstIsIntersects = (minLV.getLevel() > getLevel());
|
||||
} else {
|
||||
iterFirstCellNumber = 0;
|
||||
iterFirstIsIntersects = false;
|
||||
}
|
||||
if (parent.iterLastIsIntersects && parent.cellNumber == parent.iterLastCellNumber
|
||||
&& maxLV.getLevel() >= getLevel()) {
|
||||
iterLastCellNumber = maxLV.getValAtLevel(getLevel());
|
||||
iterLastIsIntersects = (maxLV.getLevel() > getLevel());
|
||||
} else {
|
||||
iterLastCellNumber = getNumSubCells(parent) - 1;
|
||||
iterLastIsIntersects = false;
|
||||
}
|
||||
if (iterFirstCellNumber == iterLastCellNumber) {
|
||||
if (iterLastIsIntersects)
|
||||
iterFirstIsIntersects = true;
|
||||
else if (iterFirstIsIntersects)
|
||||
iterLastIsIntersects = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
//uncommon to get here, except for level 1 which always happens
|
||||
|
||||
int startCmp = comparePrefixLV(minLV, parent);
|
||||
if (startCmp > 0) {//start comes after this cell
|
||||
iterFirstCellNumber = 0;
|
||||
iterFirstIsIntersects = false;
|
||||
iterLastCellNumber = -1;//so ends early (no cells)
|
||||
iterLastIsIntersects = false;
|
||||
return;
|
||||
}
|
||||
int endCmp = comparePrefixLV(maxLV, parent);//compare to end cell
|
||||
if (endCmp < 0) {//end comes before this cell
|
||||
iterFirstCellNumber = 0;
|
||||
iterFirstIsIntersects = false;
|
||||
iterLastCellNumber = -1;//so ends early (no cells)
|
||||
iterLastIsIntersects = false;
|
||||
return;
|
||||
}
|
||||
if (startCmp < 0 || minLV.getLevel() < getLevel()) {
|
||||
//start comes before...
|
||||
iterFirstCellNumber = 0;
|
||||
iterFirstIsIntersects = false;
|
||||
} else {
|
||||
iterFirstCellNumber = minLV.getValAtLevel(getLevel());
|
||||
iterFirstIsIntersects = (minLV.getLevel() > getLevel());
|
||||
}
|
||||
if (endCmp > 0 || maxLV.getLevel() < getLevel()) {
|
||||
//end comes after...
|
||||
iterLastCellNumber = getNumSubCells(parent) - 1;
|
||||
iterLastIsIntersects = false;
|
||||
} else {
|
||||
iterLastCellNumber = maxLV.getValAtLevel(getLevel());
|
||||
iterLastIsIntersects = (maxLV.getLevel() > getLevel());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
thisCell = null;
|
||||
if (nextCell != null)//calling hasNext twice in a row
|
||||
return true;
|
||||
|
||||
if (cellNumber >= iterLastCellNumber)
|
||||
return false;
|
||||
|
||||
resetCellWithCellNum(cellNumber < iterFirstCellNumber ? iterFirstCellNumber : cellNumber + 1);
|
||||
|
||||
boolean hasChildren =
|
||||
(cellNumber == iterFirstCellNumber && iterFirstIsIntersects)
|
||||
|| (cellNumber == iterLastCellNumber && iterLastIsIntersects);
|
||||
|
||||
if (!hasChildren) {
|
||||
setLeaf();
|
||||
setShapeRel(SpatialRelation.WITHIN);
|
||||
} else if (iterFirstCellNumber == iterLastCellNumber) {
|
||||
setShapeRel(SpatialRelation.CONTAINS);
|
||||
} else {
|
||||
setShapeRel(SpatialRelation.INTERSECTS);
|
||||
}
|
||||
|
||||
nextCell = this;
|
||||
return true;
|
||||
}
|
||||
|
||||
//TODO override nextFrom to be more efficient
|
||||
|
||||
//----------- LevelledValue / Shape
|
||||
|
||||
@Override
|
||||
public int getValAtLevel(int level) {
|
||||
final int result = cellsByLevel[level].cellNumber;
|
||||
assert result >= 0;//initialized
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public NRCell getLVAtLevel(int level) {
|
||||
assert level <= cellLevel;
|
||||
return cellsByLevel[level];
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpatialRelation relate(Shape shape) {
|
||||
ensureDecoded();
|
||||
if (shape == iterFilter && cellShapeRel != null)
|
||||
return cellShapeRel;
|
||||
if (shape instanceof LevelledValue)
|
||||
return relate((LevelledValue)shape);
|
||||
if (shape instanceof NRShape)
|
||||
return relate((NRShape)shape);
|
||||
return shape.relate(this).transpose();
|
||||
}
|
||||
|
||||
public SpatialRelation relate(LevelledValue lv) {
|
||||
ensureDecoded();
|
||||
int cmp = comparePrefixLV(this, lv);
|
||||
if (cmp != 0)
|
||||
return SpatialRelation.DISJOINT;
|
||||
if (getLevel() > lv.getLevel())
|
||||
return SpatialRelation.WITHIN;//or equals
|
||||
return SpatialRelation.CONTAINS;
|
||||
//no INTERSECTS; that won't happen.
|
||||
}
|
||||
|
||||
public SpatialRelation relate(NRShape nrShape) {
|
||||
ensureDecoded();
|
||||
int startCmp = comparePrefixLV(nrShape.getMinLV(), this);
|
||||
if (startCmp > 0) {//start comes after this cell
|
||||
return SpatialRelation.DISJOINT;
|
||||
}
|
||||
int endCmp = comparePrefixLV(nrShape.getMaxLV(), this);
|
||||
if (endCmp < 0) {//end comes before this cell
|
||||
return SpatialRelation.DISJOINT;
|
||||
}
|
||||
if ((startCmp < 0 || startCmp == 0 && nrShape.getMinLV().getLevel() <= getLevel())
|
||||
&& (endCmp > 0 || endCmp == 0 && nrShape.getMaxLV().getLevel() <= getLevel()))
|
||||
return SpatialRelation.WITHIN;//or equals
|
||||
if (startCmp == 0 && endCmp == 0
|
||||
&& nrShape.getMinLV().getLevel() >= getLevel() && nrShape.getMaxLV().getLevel() >= getLevel())
|
||||
return SpatialRelation.CONTAINS;
|
||||
return SpatialRelation.INTERSECTS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Rectangle getBoundingBox() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasArea() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getArea(SpatialContext ctx) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Point getCenter() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Shape getBuffered(double distance, SpatialContext ctx) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
//------- Object
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (!(obj instanceof NRCell)) {
|
||||
return false;
|
||||
}
|
||||
if (this == obj)
|
||||
return true;
|
||||
NRCell nrCell = (NRCell) obj;
|
||||
assert term != nrCell.term;
|
||||
if (getLevel() != nrCell.getLevel())
|
||||
return false;
|
||||
//trick to re-use bytesref; provided that we re-instate it
|
||||
int myLastLen = term.length;
|
||||
int otherLastLen = nrCell.term.length;
|
||||
boolean answer = getTokenBytesNoLeaf(term).equals(nrCell.getTokenBytesNoLeaf(nrCell.term));
|
||||
term.length = myLastLen;
|
||||
nrCell.term.length = otherLastLen;
|
||||
return answer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
//trick to re-use bytesref; provided that we re-instate it
|
||||
int myLastLen = term.length;
|
||||
int result = getTokenBytesNoLeaf(term).hashCode();
|
||||
term.length = myLastLen;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
ensureDecoded();
|
||||
String str = NumberRangePrefixTree.this.toStringLV(this);
|
||||
if (isLeaf())
|
||||
str += "•";//bullet (won't be confused with textual representation)
|
||||
return str;
|
||||
}
|
||||
} // END OF NRCell
|
||||
|
||||
}
|
|
@ -234,19 +234,20 @@ public abstract class StrategyTestCase extends SpatialTestCase {
|
|||
CheckHits.checkExplanations(q, "", indexSearcher);
|
||||
}
|
||||
|
||||
protected void assertOperation(Map<String,Shape> indexedDocs,
|
||||
SpatialOperation operation, Shape queryShape) {
|
||||
//Generate truth via brute force
|
||||
Set<String> expectedIds = new HashSet<>();
|
||||
for (Map.Entry<String, Shape> stringShapeEntry : indexedDocs.entrySet()) {
|
||||
if (operation.evaluate(stringShapeEntry.getValue(), queryShape))
|
||||
expectedIds.add(stringShapeEntry.getKey());
|
||||
}
|
||||
|
||||
SpatialTestQuery testQuery = new SpatialTestQuery();
|
||||
testQuery.args = new SpatialArgs(operation, queryShape);
|
||||
testQuery.ids = new ArrayList<>(expectedIds);
|
||||
runTestQuery(SpatialMatchConcern.FILTER, testQuery);
|
||||
protected void testOperation(Shape indexedShape, SpatialOperation operation,
|
||||
Shape queryShape, boolean match) throws IOException {
|
||||
assertTrue("Faulty test",
|
||||
operation.evaluate(indexedShape, queryShape) == match ||
|
||||
indexedShape.equals(queryShape) &&
|
||||
(operation == SpatialOperation.Contains || operation == SpatialOperation.IsWithin));
|
||||
adoc("0", indexedShape);
|
||||
commit();
|
||||
Query query = strategy.makeQuery(new SpatialArgs(operation, queryShape));
|
||||
SearchResults got = executeQuery(query, 1);
|
||||
assert got.numFound <= 1 : "unclean test env";
|
||||
if ((got.numFound == 1) != match)
|
||||
fail(operation+" I:" + indexedShape + " Q:" + queryShape);
|
||||
deleteAll();//clean up after ourselves
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,140 @@
|
|||
package org.apache.lucene.spatial.prefix;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.spatial.StrategyTestCase;
|
||||
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||
import org.apache.lucene.spatial.query.SpatialOperation;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
|
||||
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
|
||||
|
||||
/** Base test harness, ideally for SpatialStrategy impls that have exact results
|
||||
* (not grid approximated), hence "not fuzzy".
|
||||
*/
|
||||
public abstract class BaseNonFuzzySpatialOpStrategyTest extends StrategyTestCase {
|
||||
|
||||
//TODO this is partially redundant with StrategyTestCase.runTestQuery & testOperation
|
||||
|
||||
protected void testOperationRandomShapes(final SpatialOperation operation) throws IOException {
|
||||
//first show that when there's no data, a query will result in no results
|
||||
{
|
||||
Query query = strategy.makeQuery(new SpatialArgs(operation, randomQueryShape()));
|
||||
SearchResults searchResults = executeQuery(query, 1);
|
||||
assertEquals(0, searchResults.numFound);
|
||||
}
|
||||
|
||||
final int numIndexedShapes = randomIntBetween(1, 6);
|
||||
List<Shape> indexedShapes = new ArrayList<>(numIndexedShapes);
|
||||
for (int i = 0; i < numIndexedShapes; i++) {
|
||||
indexedShapes.add(randomIndexedShape());
|
||||
}
|
||||
|
||||
final int numQueryShapes = atLeast(20);
|
||||
List<Shape> queryShapes = new ArrayList<>(numQueryShapes);
|
||||
for (int i = 0; i < numQueryShapes; i++) {
|
||||
queryShapes.add(randomQueryShape());
|
||||
}
|
||||
|
||||
testOperation(operation, indexedShapes, queryShapes, true/*havoc*/);
|
||||
}
|
||||
|
||||
protected void testOperation(final SpatialOperation operation,
|
||||
List<Shape> indexedShapes, List<Shape> queryShapes, boolean havoc) throws IOException {
|
||||
//Main index loop:
|
||||
for (int i = 0; i < indexedShapes.size(); i++) {
|
||||
Shape shape = indexedShapes.get(i);
|
||||
adoc(""+i, shape);
|
||||
|
||||
if (havoc && random().nextInt(10) == 0)
|
||||
commit();//intermediate commit, produces extra segments
|
||||
}
|
||||
if (havoc) {
|
||||
//delete some documents randomly
|
||||
for (int id = 0; id < indexedShapes.size(); id++) {
|
||||
if (random().nextInt(10) == 0) {
|
||||
deleteDoc(""+id);
|
||||
indexedShapes.set(id, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
commit();
|
||||
|
||||
//Main query loop:
|
||||
for (int queryIdx = 0; queryIdx < queryShapes.size(); queryIdx++) {
|
||||
final Shape queryShape = queryShapes.get(queryIdx);
|
||||
|
||||
if (havoc)
|
||||
preQueryHavoc();
|
||||
|
||||
//Generate truth via brute force:
|
||||
// We ensure true-positive matches (if the predicate on the raw shapes match
|
||||
// then the search should find those same matches).
|
||||
Set<String> expectedIds = new LinkedHashSet<>();//true-positives
|
||||
for (int id = 0; id < indexedShapes.size(); id++) {
|
||||
Shape indexedShape = indexedShapes.get(id);
|
||||
if (indexedShape == null)
|
||||
continue;
|
||||
if (operation.evaluate(indexedShape, queryShape)) {
|
||||
expectedIds.add(""+id);
|
||||
}
|
||||
}
|
||||
|
||||
//Search and verify results
|
||||
SpatialArgs args = new SpatialArgs(operation, queryShape);
|
||||
Query query = strategy.makeQuery(args);
|
||||
SearchResults got = executeQuery(query, 100);
|
||||
Set<String> remainingExpectedIds = new LinkedHashSet<>(expectedIds);
|
||||
for (SearchResult result : got.results) {
|
||||
String id = result.getId();
|
||||
if (!remainingExpectedIds.remove(id)) {
|
||||
fail("Shouldn't match", id, indexedShapes, queryShape);
|
||||
}
|
||||
}
|
||||
if (!remainingExpectedIds.isEmpty()) {
|
||||
String id = remainingExpectedIds.iterator().next();
|
||||
fail("Should have matched", id, indexedShapes, queryShape);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void fail(String label, String id, List<Shape> indexedShapes, Shape queryShape) {
|
||||
fail(label + " I#" + id + ":" + indexedShapes.get(Integer.parseInt(id)) + " Q:" + queryShape);
|
||||
}
|
||||
|
||||
protected void preQueryHavoc() {
|
||||
if (strategy instanceof RecursivePrefixTreeStrategy) {
|
||||
RecursivePrefixTreeStrategy rpts = (RecursivePrefixTreeStrategy) strategy;
|
||||
int scanLevel = randomInt(rpts.getGrid().getMaxLevels());
|
||||
rpts.setPrefixGridScanLevel(scanLevel);
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract Shape randomIndexedShape();
|
||||
|
||||
protected abstract Shape randomQueryShape();
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
package org.apache.lucene.spatial.prefix;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.Repeat;
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
import org.apache.lucene.spatial.NumberRangePrefixTreeStrategy;
|
||||
import org.apache.lucene.spatial.prefix.tree.DateRangePrefixTree;
|
||||
import org.apache.lucene.spatial.query.SpatialOperation;
|
||||
import org.junit.Before;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Calendar;
|
||||
|
||||
public class DateNRStrategyTest extends BaseNonFuzzySpatialOpStrategyTest {
|
||||
|
||||
static final int ITERATIONS = 10;
|
||||
|
||||
DateRangePrefixTree tree;
|
||||
|
||||
int era;
|
||||
int year;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
tree = DateRangePrefixTree.INSTANCE;
|
||||
strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange");
|
||||
era = random().nextBoolean() ? 0 : 1;
|
||||
year = 1 + random().nextInt(2_000_000);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Repeat(iterations = ITERATIONS)
|
||||
public void testIntersects() throws IOException {
|
||||
testOperationRandomShapes(SpatialOperation.Intersects);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Repeat(iterations = ITERATIONS)
|
||||
public void testWithin() throws IOException {
|
||||
testOperationRandomShapes(SpatialOperation.IsWithin);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Repeat(iterations = ITERATIONS)
|
||||
public void testContains() throws IOException {
|
||||
testOperationRandomShapes(SpatialOperation.Contains);
|
||||
}
|
||||
|
||||
@Test @Ignore("see LUCENE-5692")
|
||||
@Repeat(iterations = ITERATIONS)
|
||||
public void testDisjoint() throws IOException {
|
||||
testOperationRandomShapes(SpatialOperation.IsDisjointTo);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithinSame() throws IOException {
|
||||
final Calendar cal = tree.newCal();
|
||||
cal.set(Calendar.ERA, era);
|
||||
cal.set(Calendar.YEAR, year);
|
||||
|
||||
testOperation(
|
||||
tree.toShape(cal),
|
||||
SpatialOperation.IsWithin,
|
||||
tree.toShape(cal), true);//is within itself
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWorld() throws IOException {
|
||||
testOperation(
|
||||
tree.toShape(tree.newCal()),//world matches everything
|
||||
SpatialOperation.Contains,
|
||||
tree.toShape(randomCalendar()), true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Shape randomIndexedShape() {
|
||||
Calendar cal1 = randomCalendar();
|
||||
Shape s1 = tree.toShape(cal1);
|
||||
try {
|
||||
Calendar cal2 = randomCalendar();
|
||||
Shape s2 = tree.toShape(cal2);
|
||||
if (cal1.compareTo(cal2) < 0) {
|
||||
return tree.toRangeShape(s1, s2);
|
||||
} else {
|
||||
return tree.toRangeShape(s2, s1);
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
assert e.getMessage().startsWith("Differing precision");
|
||||
return s1;
|
||||
}
|
||||
}
|
||||
|
||||
private Calendar randomCalendar() {
|
||||
Calendar cal = tree.newCal();
|
||||
cal.setTimeInMillis(random().nextLong());
|
||||
cal.set(Calendar.ERA, era);
|
||||
cal.set(Calendar.YEAR, year);
|
||||
try {
|
||||
tree.clearFieldsAfter(cal, random().nextInt(Calendar.FIELD_COUNT+1)-1);
|
||||
} catch (AssertionError e) {
|
||||
if (!e.getMessage().equals("Calendar underflow"))
|
||||
throw e;
|
||||
}
|
||||
return cal;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Shape randomQueryShape() {
|
||||
return randomIndexedShape();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,169 @@
|
|||
package org.apache.lucene.spatial.prefix.tree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
import com.spatial4j.core.shape.SpatialRelation;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Calendar;
|
||||
import java.util.GregorianCalendar;
|
||||
|
||||
public class DateRangePrefixTreeTest extends LuceneTestCase {
|
||||
|
||||
private DateRangePrefixTree tree = DateRangePrefixTree.INSTANCE;
|
||||
|
||||
public void testRoundTrip() throws Exception {
|
||||
Calendar cal = tree.newCal();
|
||||
|
||||
assertEquals("*", tree.toString(cal));
|
||||
|
||||
//test no underflow
|
||||
assertTrue(tree.toShape(new int[]{0}, 1).toString().startsWith("-"));
|
||||
|
||||
//Some arbitrary date
|
||||
cal.set(2014, Calendar.MAY, 9);
|
||||
roundTrip(cal);
|
||||
assertEquals("2014-05-09",tree.toString(cal));
|
||||
|
||||
//Earliest date
|
||||
cal.setTimeInMillis(Long.MIN_VALUE);
|
||||
roundTrip(cal);
|
||||
|
||||
//Farthest date
|
||||
cal.setTimeInMillis(Long.MAX_VALUE);
|
||||
roundTrip(cal);
|
||||
|
||||
//1BC is "0000".
|
||||
cal.clear();
|
||||
cal.set(Calendar.ERA, GregorianCalendar.BC);
|
||||
cal.set(Calendar.YEAR, 1);
|
||||
roundTrip(cal);
|
||||
assertEquals("0000", tree.toString(cal));
|
||||
//adding a "+" parses to the same; and a trailing 'Z' is fine too
|
||||
assertEquals(cal, tree.parseCalendar("+0000Z"));
|
||||
|
||||
//2BC is "-0001"
|
||||
cal.clear();
|
||||
cal.set(Calendar.ERA, GregorianCalendar.BC);
|
||||
cal.set(Calendar.YEAR, 2);
|
||||
roundTrip(cal);
|
||||
assertEquals("-0001", tree.toString(cal));
|
||||
|
||||
//1AD is "0001"
|
||||
cal.clear();
|
||||
cal.set(Calendar.YEAR, 1);
|
||||
roundTrip(cal);
|
||||
assertEquals("0001", tree.toString(cal));
|
||||
|
||||
//test random
|
||||
cal.setTimeInMillis(random().nextLong());
|
||||
roundTrip(cal);
|
||||
}
|
||||
|
||||
//copies from DateRangePrefixTree
|
||||
private static final int[] CAL_FIELDS = {
|
||||
Calendar.YEAR, Calendar.MONTH, Calendar.DAY_OF_MONTH,
|
||||
Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND};
|
||||
|
||||
private void roundTrip(Calendar calOrig) throws ParseException {
|
||||
Calendar cal = (Calendar) calOrig.clone();
|
||||
String lastString = null;
|
||||
while (true) {
|
||||
String calString = tree.toString(cal);
|
||||
assert lastString == null || calString.length() < lastString.length();
|
||||
//test parseCalendar
|
||||
assertEquals(cal, tree.parseCalendar(calString));
|
||||
|
||||
//to Shape and back to Cal
|
||||
Shape shape = tree.toShape(cal);
|
||||
Calendar cal2 = tree.toCalendar(shape);
|
||||
assertEquals(calString, tree.toString(cal2));
|
||||
|
||||
if (!calString.equals("*")) {//not world cell
|
||||
//to Term and back to Cell
|
||||
Cell cell = (Cell) shape;
|
||||
BytesRef term = cell.getTokenBytesNoLeaf(null);
|
||||
Cell cell2 = tree.readCell(BytesRef.deepCopyOf(term), null);
|
||||
assertEquals(calString, cell, cell2);
|
||||
Calendar cal3 = tree.toCalendar(cell2.getShape());
|
||||
assertEquals(calString, tree.toString(cal3));
|
||||
|
||||
// setLeaf comparison
|
||||
cell2.setLeaf();
|
||||
BytesRef termLeaf = cell2.getTokenBytesWithLeaf(null);
|
||||
assertTrue(term.compareTo(termLeaf) < 0);
|
||||
assertEquals(termLeaf.length, term.length + 1);
|
||||
assertEquals(0, termLeaf.bytes[termLeaf.offset + termLeaf.length - 1]);
|
||||
assertTrue(cell.isPrefixOf(cell2));
|
||||
}
|
||||
|
||||
//end of loop; decide if should loop again with lower precision
|
||||
final int calPrecField = tree.getCalPrecisionField(cal);
|
||||
if (calPrecField == -1)
|
||||
break;
|
||||
int fieldIdx = Arrays.binarySearch(CAL_FIELDS, calPrecField);
|
||||
assert fieldIdx >= 0;
|
||||
int prevPrecField = (fieldIdx == 0 ? -1 : CAL_FIELDS[--fieldIdx]);
|
||||
try {
|
||||
tree.clearFieldsAfter(cal, prevPrecField);
|
||||
} catch (AssertionError e) {
|
||||
if (e.getMessage().equals("Calendar underflow"))
|
||||
return;
|
||||
throw e;
|
||||
}
|
||||
lastString = calString;
|
||||
}
|
||||
}
|
||||
|
||||
public void testShapeRelations() throws ParseException {
|
||||
Shape shapeA = tree.parseShape("[3122-01-23 TO 3122-11-27]");
|
||||
Shape shapeB = tree.parseShape("[3122-08 TO 3122-11]");
|
||||
assertEquals(SpatialRelation.INTERSECTS, shapeA.relate(shapeB));
|
||||
|
||||
shapeA = tree.parseShape("3122");
|
||||
shapeB = tree.parseShape("[* TO 3122-10-31]");
|
||||
assertEquals(SpatialRelation.INTERSECTS, shapeA.relate(shapeB));
|
||||
|
||||
shapeA = tree.parseShape("[3122-05-28 TO 3122-06-29]");
|
||||
shapeB = tree.parseShape("[3122 TO 3122-04]");
|
||||
assertEquals(SpatialRelation.DISJOINT, shapeA.relate(shapeB));
|
||||
}
|
||||
|
||||
public void testShapeRangeOptimizer() throws ParseException {
|
||||
assertEquals("[2014-08 TO 2014-09]", tree.parseShape("[2014-08-01 TO 2014-09-30]").toString());
|
||||
|
||||
assertEquals("2014", tree.parseShape("[2014-01-01 TO 2014-12-31]").toString());
|
||||
|
||||
assertEquals("2014", tree.parseShape("[2014-01 TO 2014]").toString());
|
||||
|
||||
assertEquals("[2014 TO 2014-04-06]", tree.parseShape("[2014-01 TO 2014-04-06]").toString());
|
||||
|
||||
assertEquals("*", tree.parseShape("[* TO *]").toString());
|
||||
|
||||
assertEquals("2014-08-01", tree.parseShape("[2014-08-01 TO 2014-08-01]").toString());
|
||||
|
||||
assertEquals("[2014 TO 2014-09-15]", tree.parseShape("[2014 TO 2014-09-15]").toString());
|
||||
|
||||
assertEquals("[* TO 2014-09-15]", tree.parseShape("[* TO 2014-09-15]").toString());
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue