From bb267a143e009f7b1375f7fa9755b76fcc82ec4f Mon Sep 17 00:00:00 2001 From: David Wayne Smiley Date: Thu, 5 Jun 2014 01:43:12 +0000 Subject: [PATCH] LUCENE-5648: DateRangePrefixTree and NumberRangePrefixTreeStrategy git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1600555 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 7 + .../NumberRangePrefixTreeStrategy.java | 79 ++ .../prefix/tree/DateRangePrefixTree.java | 428 +++++++++ .../prefix/tree/NumberRangePrefixTree.java | 811 ++++++++++++++++++ .../lucene/spatial/StrategyTestCase.java | 27 +- .../BaseNonFuzzySpatialOpStrategyTest.java | 140 +++ .../spatial/prefix/DateNRStrategyTest.java | 130 +++ .../prefix/tree/DateRangePrefixTreeTest.java | 169 ++++ 8 files changed, 1778 insertions(+), 13 deletions(-) create mode 100644 lucene/spatial/src/java/org/apache/lucene/spatial/NumberRangePrefixTreeStrategy.java create mode 100644 lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java create mode 100644 lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java create mode 100644 lucene/spatial/src/test/org/apache/lucene/spatial/prefix/BaseNonFuzzySpatialOpStrategyTest.java create mode 100644 lucene/spatial/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java create mode 100644 lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 457c7d60aa5..25207373c08 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -21,6 +21,13 @@ New Features PushPostingsWriterBase for single-pass push of docs/positions to the postings format. (Mike McCandless) +* LUCENE-5648: Index and search date ranges, particularly multi-valued ones. It's + implemented in the spatial module as DateRangePrefixTree used with + NumberRangePrefixTreeStrategy. (David Smiley) + +* LUCENE-4175: Index and search rectangles with spatial BBoxSpatialStrategy. + Sort documents by relative overlap of query areas. (Ryan McKinley) + API Changes * LUCENE-4535: oal.util.FilterIterator is now an internal API. diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/NumberRangePrefixTreeStrategy.java b/lucene/spatial/src/java/org/apache/lucene/spatial/NumberRangePrefixTreeStrategy.java new file mode 100644 index 00000000000..273e893bf82 --- /dev/null +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/NumberRangePrefixTreeStrategy.java @@ -0,0 +1,79 @@ +package org.apache.lucene.spatial; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.shape.Point; +import com.spatial4j.core.shape.Shape; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Field; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree; + +import java.text.ParseException; + +/** A PrefixTree based on Number/Date ranges. This isn't very "spatial" on the surface (to the user) but + * it's implemented using spatial so that's why it's here extending a SpatialStrategy. + * + * @lucene.experimental + */ +public class NumberRangePrefixTreeStrategy extends RecursivePrefixTreeStrategy { + + public NumberRangePrefixTreeStrategy(NumberRangePrefixTree prefixTree, String fieldName) { + super(prefixTree, fieldName); + setPruneLeafyBranches(false); + setPrefixGridScanLevel(prefixTree.getMaxLevels()-2);//user might want to change, however + setPointsOnly(false); + setDistErrPct(0); + } + + @Override + public NumberRangePrefixTree getGrid() { + return (NumberRangePrefixTree) super.getGrid(); + } + + @Override + public Field[] createIndexableFields(Shape shape) { + //levels doesn't actually matter; NumberRange based Shapes have their own "level". + TokenStream tokenStream = createTokenStream(shape, grid.getMaxLevels()); + Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE); + return new Field[]{field}; + } + + /** For a Date based tree, pass in a Calendar, with unspecified fields marked as cleared. + * See {@link NumberRangePrefixTree#toShape(Object)}. */ + public Shape toShape(Object value) { + return getGrid().toShape(value); + } + + /** See {@link NumberRangePrefixTree#toRangeShape(Shape, Shape)}. */ + public Shape toRangeShape(Shape min, Shape max) { + return getGrid().toRangeShape(min, max); + } + + /** See {@link NumberRangePrefixTree#parseShape(String)}. */ + public Shape parseShape(String str) throws ParseException { + return getGrid().parseShape(str); + } + + /** Unsupported. */ + @Override + public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) { + throw new UnsupportedOperationException(); + } +} diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java new file mode 100644 index 00000000000..ad951981b8f --- /dev/null +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java @@ -0,0 +1,428 @@ +package org.apache.lucene.spatial.prefix.tree; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.shape.Shape; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.Locale; +import java.util.TimeZone; + +/** + * A PrefixTree for date ranges in which the levels of the tree occur at natural periods of time (e.g. years, + * months, ...). You pass in {@link Calendar} objects with the desired fields set and the unspecified + * fields unset, which conveys the precision. The implementation tries to be generic to the Calendar + * abstraction, making some optimizations when a Gregorian is used, but no others have been tested. + *

+ * Warning: If you construct a Calendar and then get something from the object like a field (e.g. year) or + * milliseconds, then every field is fully set by side-effect. So after setting the fields, pass it to this + * API first. + * @lucene.experimental + */ +public class DateRangePrefixTree extends NumberRangePrefixTree { + + /* + WARNING java.util.Calendar is tricky to work with: + * If you "get" any field value, every fields because "set". This can introduce a Heisenbug effect, + when in a debugger in some cases. Fortunately, Calendar.toString() doesn't apply. + * Beware Calendar underflow of the underlying long. If you create a Calendar from LONG.MIN_VALUE, and clear + a field, it will underflow and appear close to LONG.MAX_VALUE (BC to AD). + + There are no doubt other reasons but those two were hard fought lessons here. + + TODO Improvements: + * Make max precision configurable (i.e. to SECOND). + * Make min & max year span configurable. Use that to remove pointless top levels of the SPT. + If year span is > 10k, then add 1k year level. If year span is > 10k of 1k levels, add 1M level. + * NumberRangePrefixTree: override getTreeCellIterator for optimized case where the shape isn't a date span; use + FilterCellIterator of the cell stack. + + */ + + private static final TimeZone UTC = TimeZone.getTimeZone("UTC"); + private static Calendar CAL_TMP;//template + static { + CAL_TMP = Calendar.getInstance(UTC, Locale.ROOT); + CAL_TMP.clear(); + } + + private static final Calendar MINCAL = (Calendar) CAL_TMP.clone(); + private static final Calendar MAXCAL = (Calendar) CAL_TMP.clone(); + static { + MINCAL.setTimeInMillis(Long.MIN_VALUE); + MAXCAL.setTimeInMillis(Long.MAX_VALUE); + } + //BC years are decreasing, remember. Yet ActualMaximum is the numerically high value, ActualMinimum is 1. + private static final int BC_FIRSTYEAR = MINCAL.getActualMaximum(Calendar.YEAR); + private static final int BC_LASTYEAR = MINCAL.getActualMinimum(Calendar.YEAR);//1 + private static final int BC_YEARS = BC_FIRSTYEAR - BC_LASTYEAR + 1; + private static final int AD_FIRSTYEAR = MAXCAL.getActualMinimum(Calendar.YEAR);//1 + private static final int AD_LASTYEAR = MAXCAL.getActualMaximum(Calendar.YEAR); + private static final int AD_YEAR_BASE = (((BC_YEARS-1) / 1000_000)+1) * 1000_000; + static { assert BC_LASTYEAR == 1 && AD_FIRSTYEAR == 1; } + + //how many million years are there? + private static final int NUM_MYEARS = (AD_YEAR_BASE + AD_LASTYEAR) / 1000_000; + + private static int calFieldLen(int field) { + return CAL_TMP.getMaximum(field) - CAL_TMP.getMinimum(field) + 1; + } + + private static final int[] FIELD_BY_LEVEL = { + -1/*unused*/, -1, -1, Calendar.YEAR, Calendar.MONTH, Calendar.DAY_OF_MONTH, + Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND}; + private static final int yearLevel = 3; + + public static final DateRangePrefixTree INSTANCE = new DateRangePrefixTree(); + + private final LevelledValue minLV, maxLV; + private final LevelledValue gregorianChangeDateLV; + + private DateRangePrefixTree() { + super(new int[]{//sublevels by level + NUM_MYEARS, + 1000,//1 thousand thousand-years in a million years + 1000,//1 thousand years in a thousand-year + calFieldLen(Calendar.MONTH), + calFieldLen(Calendar.DAY_OF_MONTH), + calFieldLen(Calendar.HOUR_OF_DAY), + calFieldLen(Calendar.MINUTE), + calFieldLen(Calendar.SECOND), + calFieldLen(Calendar.MILLISECOND), + }); + maxLV = (LevelledValue) toShape((Calendar)MAXCAL.clone()); + minLV = (LevelledValue) toShape((Calendar)MINCAL.clone()); + if (MAXCAL instanceof GregorianCalendar) { + //TODO this should be a configurable param by passing a Calendar surving as a template. + GregorianCalendar gCal = (GregorianCalendar)MAXCAL; + gregorianChangeDateLV = (LevelledValue) toShape(gCal.getGregorianChange()); + } else { + gregorianChangeDateLV = null; + } + } + + @Override + protected int getNumSubCells(LevelledValue lv) { + int cmp = comparePrefixLV(lv, maxLV); + assert cmp <= 0; + if (cmp == 0)//edge case (literally!) + return maxLV.getValAtLevel(lv.getLevel()+1); + + // if using GregorianCalendar and we're after the "Gregorian change date" then we'll compute + // the sub-cells ourselves more efficiently without the need to construct a Calendar. + cmp = gregorianChangeDateLV != null ? comparePrefixLV(lv, gregorianChangeDateLV) : -1; + //TODO consider also doing fast-path if field is <= hours even if before greg change date + if (cmp >= 0) { + int result = fastSubCells(lv); + assert result == slowSubCells(lv) : "fast/slow numSubCells inconsistency"; + return result; + } else { + return slowSubCells(lv); + } + } + + private int fastSubCells(LevelledValue lv) { + if (lv.getLevel() == yearLevel+1) {//month + switch (lv.getValAtLevel(lv.getLevel())) { + case Calendar.SEPTEMBER: + case Calendar.APRIL: + case Calendar.JUNE: + case Calendar.NOVEMBER: + return 30; + case Calendar.FEBRUARY: + //get the year (negative numbers for BC) + int yearAdj = lv.getValAtLevel(1) * 1_000_000; + yearAdj += lv.getValAtLevel(2) * 1000; + yearAdj += lv.getValAtLevel(3); + int year = yearAdj - AD_YEAR_BASE; + if (year % 4 == 0 && !(year % 100 == 0 && year % 400 != 0) )//leap year + return 29; + else + return 28; + default: + return 31; + } + } else {//typical: + return super.getNumSubCells(lv); + } + } + + private int slowSubCells(LevelledValue lv) { + int field = FIELD_BY_LEVEL[lv.getLevel()+1]; + //short-circuit optimization (GregorianCalendar assumptions) + if (field == -1 || field == Calendar.YEAR || field >= Calendar.HOUR_OF_DAY)//TODO make configurable + return super.getNumSubCells(lv); + Calendar cal = toCalendarLV(lv);//somewhat heavyweight op; ideally should be stored on LevelledValue somehow + return cal.getActualMaximum(field) - cal.getActualMinimum(field) + 1; + } + + /** Calendar utility method: + * Returns a new {@link Calendar} in UTC TimeZone, ROOT Locale, with all fields cleared. */ + public Calendar newCal() { + return (Calendar) CAL_TMP.clone(); + } + + /** Calendar utility method: + * Gets the Calendar field code of the last field that is set prior to an unset field. It only + * examines fields relevant to the prefix tree. If no fields are set, it returns -1. */ + public int getCalPrecisionField(Calendar cal) { + int lastField = -1; + for (int level = yearLevel; level < FIELD_BY_LEVEL.length; level++) { + int field = FIELD_BY_LEVEL[level]; + if (!cal.isSet(field)) + break; + lastField = field; + } + return lastField; + } + + /** Calendar utility method: + * Calls {@link Calendar#clear(int)} for every field after {@code field}. Beware of Calendar underflow. */ + public void clearFieldsAfter(Calendar cal, int field) { + if (field == -1) { + cal.clear(); + return; + } + int assertEra = -1; + assert (assertEra = (((Calendar)cal.clone()).get(Calendar.ERA))) >= 0;//a trick to only get this if assert enabled + for (int f = field+1; f < Calendar.FIELD_COUNT; f++) { + cal.clear(f); + } + assert ((Calendar)cal.clone()).get(Calendar.ERA) == assertEra : "Calendar underflow"; + } + + /** Converts {@code value} from a {@link Calendar} or {@link Date} to a {@link Shape}. Other arguments + * result in a {@link java.lang.IllegalArgumentException}. + */ + @Override + public Shape toShape(Object value) { + if (value instanceof Calendar) { + return toShape((Calendar) value); + } else if (value instanceof Date) { + Calendar cal = newCal(); + cal.setTime((Date)value); + return toShape(cal); + } + throw new IllegalArgumentException("Expecting Calendar or Date but got: "+value.getClass()); + } + + /** Converts the Calendar into a Shape. + * The isSet() state of the Calendar is re-instated when done. */ + public Shape toShape(Calendar cal) { + // Convert a Calendar into a stack of cell numbers + final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields + try { + int[] valStack = new int[maxLevels];//starts at level 1, not 0 + int len = 0; + if (calPrecField >= Calendar.YEAR) {//year or better precision + int year = cal.get(Calendar.YEAR); + int yearAdj = cal.get(Calendar.ERA) == 0 ? AD_YEAR_BASE - (year - 1) : AD_YEAR_BASE + year; + + valStack[len++] = yearAdj / 1000_000; + yearAdj -= valStack[len-1] * 1000_000; + valStack[len++] = yearAdj / 1000; + yearAdj -= valStack[len-1] * 1000; + valStack[len++] = yearAdj; + for (int level = yearLevel+1; level < FIELD_BY_LEVEL.length; level++) { + int field = FIELD_BY_LEVEL[level]; + if (field > calPrecField) + break; + valStack[len++] = cal.get(field) - cal.getActualMinimum(field); + } + } + + return toShape(valStack, len); + } finally { + clearFieldsAfter(cal, calPrecField);//restore precision state modified by get() + } + } + + public Calendar toCalendar(Shape shape) { + if (shape instanceof LevelledValue) + return toCalendarLV((LevelledValue) shape); + throw new IllegalArgumentException("Can't be converted to Calendar: "+shape); + } + + private Calendar toCalendarLV(LevelledValue lv) { + if (lv.getLevel() == 0) + return newCal(); + if (comparePrefixLV(lv, minLV) <= 0) {//shouldn't typically happen; sometimes in a debugger + return (Calendar) MINCAL.clone();//full precision; truncation would cause underflow + } + assert comparePrefixLV(lv, maxLV) <= 0; + Calendar cal = newCal(); + + int yearAdj = lv.getValAtLevel(1) * 1_000_000; + if (lv.getLevel() > 1) { + yearAdj += lv.getValAtLevel(2) * 1000; + if (lv.getLevel() > 2) { + yearAdj += lv.getValAtLevel(3); + } + } + if (yearAdj > AD_YEAR_BASE) { + cal.set(Calendar.ERA, 1); + cal.set(Calendar.YEAR, yearAdj - AD_YEAR_BASE);//setting the year resets the era + } else { + cal.set(Calendar.ERA, 0);//we assert this "sticks" at the end + cal.set(Calendar.YEAR, (AD_YEAR_BASE - yearAdj) + 1); + } + for (int level = yearLevel+1; level <= lv.getLevel(); level++) { + int field = FIELD_BY_LEVEL[level]; + cal.set(field, lv.getValAtLevel(level) + cal.getActualMinimum(field)); + } + assert yearAdj > AD_YEAR_BASE || ((Calendar)cal.clone()).get(Calendar.ERA) == 0 : "ERA / YEAR underflow"; + return cal; + } + + @Override + protected String toStringLV(LevelledValue lv) { + return toString(toCalendarLV(lv)); + } + + /** Calendar utility method: + * Converts to calendar to ISO-8601, to include proper BC handling (1BC is "0000", 2BC is "-0001", etc.); + * and WITHOUT a trailing 'Z'. + * A fully cleared calendar will yield the string "*". + * The isSet() state of the Calendar is re-instated when done. */ + @SuppressWarnings("fallthrough") + public String toString(Calendar cal) { + final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields + if (calPrecField == -1) + return "*"; + try { + //TODO not fully optimized because I only expect this to be used in tests / debugging. + // Borrow code from Solr DateUtil, and have it reference this back? + String pattern = "yyyy-MM-dd'T'HH:mm:ss.SSS"; + int ptnLen = 0; + switch (calPrecField) {//switch fall-through is deliberate + case Calendar.MILLISECOND: ptnLen += 4; + case Calendar.SECOND: ptnLen += 3; + case Calendar.MINUTE: ptnLen += 3; + case Calendar.HOUR_OF_DAY: ptnLen += 5; + case Calendar.DAY_OF_MONTH: ptnLen += 3; + case Calendar.MONTH: ptnLen += 3; + case Calendar.YEAR: ptnLen += 4; + break; + default: throw new IllegalStateException(""+calPrecField); + } + pattern = pattern.substring(0, ptnLen); + SimpleDateFormat format = new SimpleDateFormat(pattern, Locale.ROOT); + format.setTimeZone(cal.getTimeZone()); + if (cal.get(Calendar.ERA) == 0) {//BC + //SDF doesn't do this properly according to ISO-8601 + // Example: 1BC == "0000" (actually 0 AD), 2BC == "-0001", 3BC == "-0002", ... + final int yearOrig = cal.get(Calendar.YEAR); + cal.set(Calendar.YEAR, yearOrig-1); + String str; + try { + str = format.format(cal.getTime()); + } finally { + //reset to what it was + cal.set(Calendar.ERA, 0);//necessary! + cal.set(Calendar.YEAR, yearOrig); + } + if (yearOrig > 1) + return "-" + str; + else + return "0000" + str.substring(4); + } + return format.format(cal.getTime()); + } finally { + clearFieldsAfter(cal, calPrecField);//restore precision state modified by get() + } + } + + @Override + protected LevelledValue parseShapeLV(String str) throws ParseException { + return (LevelledValue) toShape(parseCalendar(str)); + } + + /** Calendar utility method: + * The reverse of {@link #toString(java.util.Calendar)}. It will only set the fields found, leaving + * the remainder in an un-set state. A leading '-' or '+' is optional (positive assumed), and a + * trailing 'Z' is also optional. + * @param str not null and not empty + * @return not null + */ + public Calendar parseCalendar(String str) throws ParseException { + // example: +2014-10-23T21:22:33.159Z + if (str == null || str.isEmpty()) + throw new IllegalArgumentException("str is null or blank"); + Calendar cal = newCal(); + if (str.equals("*")) + return cal; + int offset = 0;//a pointer + try { + //year & era: + int lastOffset = str.charAt(str.length()-1) == 'Z' ? str.length() - 1 : str.length(); + int hyphenIdx = str.indexOf('-', 1);//look past possible leading hyphen + if (hyphenIdx < 0) + hyphenIdx = lastOffset; + int year = Integer.parseInt(str.substring(offset, hyphenIdx)); + cal.set(Calendar.ERA, year <= 0 ? 0 : 1); + cal.set(Calendar.YEAR, year <= 0 ? -1*year + 1 : year); + offset = hyphenIdx + 1; + if (lastOffset < offset) + return cal; + + //NOTE: We aren't validating separator chars, and we unintentionally accept leading +/-. + // The str.substring()'s hopefully get optimized to be stack-allocated. + + //month: + cal.set(Calendar.MONTH, Integer.parseInt(str.substring(offset, offset+2)) - 1);//starts at 0 + offset += 3; + if (lastOffset < offset) + return cal; + //day: + cal.set(Calendar.DAY_OF_MONTH, Integer.parseInt(str.substring(offset, offset+2))); + offset += 3; + if (lastOffset < offset) + return cal; + //hour: + cal.set(Calendar.HOUR_OF_DAY, Integer.parseInt(str.substring(offset, offset+2))); + offset += 3; + if (lastOffset < offset) + return cal; + //minute: + cal.set(Calendar.MINUTE, Integer.parseInt(str.substring(offset, offset+2))); + offset += 3; + if (lastOffset < offset) + return cal; + //second: + cal.set(Calendar.SECOND, Integer.parseInt(str.substring(offset, offset+2))); + offset += 3; + if (lastOffset < offset) + return cal; + //ms: + cal.set(Calendar.MILLISECOND, Integer.parseInt(str.substring(offset, offset+3))); + offset += 3;//last one, move to next char + if (lastOffset == offset) + return cal; + } catch (Exception e) { + ParseException pe = new ParseException("Improperly formatted date: "+str, offset); + pe.initCause(e); + throw pe; + } + throw new ParseException("Improperly formatted date: "+str, offset); + } + +} diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java new file mode 100644 index 00000000000..83a093ab345 --- /dev/null +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java @@ -0,0 +1,811 @@ +package org.apache.lucene.spatial.prefix.tree; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.context.SpatialContextFactory; +import com.spatial4j.core.shape.Point; +import com.spatial4j.core.shape.Rectangle; +import com.spatial4j.core.shape.Shape; +import com.spatial4j.core.shape.SpatialRelation; +import com.spatial4j.core.shape.impl.RectangleImpl; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.StringHelper; + +import java.text.ParseException; + +/** + * A special SpatialPrefixTree for single-dimensional number ranges of integral values. It's based + * on a stack of integers, and thus it's not limited to a long. + * @see LUCENE-5648 + * @lucene.experimental + */ +public abstract class NumberRangePrefixTree extends SpatialPrefixTree { + + // + // Dummy SpatialContext + // + + private static final SpatialContext DUMMY_CTX; + static { + SpatialContextFactory factory = new SpatialContextFactory(); + factory.geo = false; + factory.worldBounds = new RectangleImpl(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0L, 0L, null); + DUMMY_CTX = factory.newSpatialContext(); + } + + // + // LevelledValue + // + + /** A value implemented as a stack of numbers. Spatially speaking, it's + * analogous to a Point but 1D yet has some precision width. + * @lucene.internal */ + protected static interface LevelledValue extends Shape { + int getLevel();//0 means the world (universe). + int getValAtLevel(int level);//level >= 0 && <= getLevel() + LevelledValue getLVAtLevel(int level); + } + + /** Compares a to b, returning less than 0, 0, or greater than 0, if a is less than, equal to, or + * greater than b, respectively. Only min(a.levels,b.levels) are compared. + * @lucene.internal */ + protected static int comparePrefixLV(LevelledValue a, LevelledValue b) { + int minLevel = Math.min(a.getLevel(), b.getLevel()); + for (int level = 1; level <= minLevel; level++) { + int diff = a.getValAtLevel(level) - b.getValAtLevel(level); + if (diff != 0) + return diff; + } + return 0; + } + + protected String toStringLV(LevelledValue lv) { + StringBuilder buf = new StringBuilder(); + buf.append('['); + for (int level = 1; level <= lv.getLevel(); level++) { + buf.append(lv.getValAtLevel(level)).append(','); + } + buf.setLength(buf.length()-1);//chop off ',' + buf.append(']'); + return buf.toString(); + } + + // + // NRShape + // + + /** Number Range Shape; based on a pair of {@link LevelledValue}. + * Spatially speaking, it's analogous to a Rectangle but 1D. + * @lucene.internal */ + protected class NRShape implements Shape { + + private final LevelledValue minLV, maxLV; + + /** Don't call directly; see {@link #toRangeShape(com.spatial4j.core.shape.Shape, com.spatial4j.core.shape.Shape)}. */ + private NRShape(LevelledValue minLV, LevelledValue maxLV) { + this.minLV = minLV; + this.maxLV = maxLV; + } + + public LevelledValue getMinLV() { return minLV; } + + public LevelledValue getMaxLV() { return maxLV; } + + @Override + public SpatialRelation relate(Shape shape) { +// if (shape instanceof LevelledValue) +// return relate((LevelledValue)shape); + if (shape instanceof NRShape) + return relate((NRShape) shape); + return shape.relate(this).transpose();//probably a LevelledValue + } + + public SpatialRelation relate(NRShape ext) { + //This logic somewhat mirrors RectangleImpl.relate_range() + int extMin_intMax = comparePrefixLV(ext.getMinLV(), getMaxLV()); + if (extMin_intMax > 0) + return SpatialRelation.DISJOINT; + int extMax_intMin = comparePrefixLV(ext.getMaxLV(), getMinLV()); + if (extMax_intMin < 0) + return SpatialRelation.DISJOINT; + int extMin_intMin = comparePrefixLV(ext.getMinLV(), getMinLV()); + int extMax_intMax = comparePrefixLV(ext.getMaxLV(), getMaxLV()); + if ((extMin_intMin > 0 || extMin_intMin == 0 && ext.getMinLV().getLevel() >= getMinLV().getLevel()) + && (extMax_intMax < 0 || extMax_intMax == 0 && ext.getMaxLV().getLevel() >= getMaxLV().getLevel())) + return SpatialRelation.CONTAINS; + if ((extMin_intMin < 0 || extMin_intMin == 0 && ext.getMinLV().getLevel() <= getMinLV().getLevel()) + && (extMax_intMax > 0 || extMax_intMax == 0 && ext.getMaxLV().getLevel() <= getMaxLV().getLevel())) + return SpatialRelation.WITHIN; + return SpatialRelation.INTERSECTS; + } + + @Override + public Rectangle getBoundingBox() { throw new UnsupportedOperationException(); } + + @Override + public boolean hasArea() { return true; } + + @Override + public double getArea(SpatialContext spatialContext) { throw new UnsupportedOperationException(); } + + @Override + public Point getCenter() { throw new UnsupportedOperationException(); } + + @Override + public Shape getBuffered(double v, SpatialContext spatialContext) { throw new UnsupportedOperationException(); } + + @Override + public boolean isEmpty() { return false; } + + @Override + public String toString() { return "[" + toStringLV(minLV) + " TO " + toStringLV(maxLV) + "]"; } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + NRShape nrShape = (NRShape) o; + + if (!maxLV.equals(nrShape.maxLV)) return false; + if (!minLV.equals(nrShape.minLV)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = minLV.hashCode(); + result = 31 * result + maxLV.hashCode(); + return result; + } + }// class NRShapeImpl + + /** Converts the value to a shape (usually not a range). If it's a JDK object (e.g. Number, Calendar) + * that could be parsed from a String, this class won't do it; you must parse it. */ + public abstract Shape toShape(Object value); + + /** Detects a range pattern and parses it, otherwise it's parsed as one shape via + * {@link #parseShapeLV(String)}. The range pattern looks like this BNF: + *

+   *   '[' + parseShapeLV + ' TO ' + parseShapeLV + ']'
+   * 
+ * It's the same thing as the toString() of the range shape, notwithstanding range optimization. + * @param str not null or empty + * @return not null + * @throws java.text.ParseException If there is a problem + */ + public Shape parseShape(String str) throws ParseException { + if (str == null || str.isEmpty()) + throw new IllegalArgumentException("str is null or blank"); + if (str.charAt(0) == '[') { + if (str.charAt(str.length()-1) != ']') + throw new ParseException("If starts with [ must end with ]; got "+str, str.length()-1); + int middle = str.indexOf(" TO "); + if (middle < 0) + throw new ParseException("If starts with [ must contain ' TO '; got "+str, -1); + String leftStr = str.substring(1, middle); + String rightStr = str.substring(middle + " TO ".length(), str.length()-1); + return toRangeShape(parseShapeLV(leftStr), parseShapeLV(rightStr)); + } else if (str.charAt(0) == '{') { + throw new ParseException("Exclusive ranges not supported; got "+str, 0); + } else { + return parseShapeLV(str); + } + } + + /** Parse a String to a LevelledValue. "*" should be the full-range. */ + protected abstract LevelledValue parseShapeLV(String str) throws ParseException; + + /** Returns a shape that represents the continuous range between {@code start} and {@code end}. It will + * be optimized. + * @throws IllegalArgumentException if the arguments are in the wrong order, or if either contains the other. + */ + public Shape toRangeShape(Shape start, Shape end) { + if (!(start instanceof LevelledValue && end instanceof LevelledValue)) + throw new IllegalArgumentException("Must pass "+LevelledValue.class+" but got "+start.getClass()); + LevelledValue minLV = (LevelledValue) start; + LevelledValue maxLV = (LevelledValue) end; + if (minLV.equals(maxLV)) + return minLV; + //Optimize precision of the range, e.g. April 1st to April 30th is April. + minLV = minLV.getLVAtLevel(truncateStartVals(minLV, 0)); + maxLV = maxLV.getLVAtLevel(truncateEndVals(maxLV, 0)); + int cmp = comparePrefixLV(minLV, maxLV); + if (cmp > 0) { + throw new IllegalArgumentException("Wrong order: "+start+" TO "+end); + } + if (cmp == 0 && minLV.getLevel() == maxLV.getLevel()) + return minLV; + return new NRShape(minLV, maxLV); + } + + /** From lv.getLevel on up, it returns the first Level seen with val != 0. It doesn't check past endLevel. */ + private int truncateStartVals(LevelledValue lv, int endLevel) { + for (int level = lv.getLevel(); level > endLevel; level--) { + if (lv.getValAtLevel(level) != 0) + return level; + } + return endLevel; + } + + private int truncateEndVals(LevelledValue lv, int endLevel) { + for (int level = lv.getLevel(); level > endLevel; level--) { + int max = getNumSubCells(lv.getLVAtLevel(level-1)) - 1; + if (lv.getValAtLevel(level) != max) + return level; + } + return endLevel; + } + + // + // NumberRangePrefixTree + // + + protected final int[] maxSubCellsByLevel; + protected final int[] termLenByLevel; + protected final int[] levelByTermLen; + protected final int maxTermLen; // how long could cell.getToken... (that is a leaf) possibly be? + + protected NumberRangePrefixTree(int[] maxSubCellsByLevel) { + super(DUMMY_CTX, maxSubCellsByLevel.length); + this.maxSubCellsByLevel = maxSubCellsByLevel; + + // Fill termLenByLevel + this.termLenByLevel = new int[maxLevels + 1]; + termLenByLevel[0] = 0; + final int MAX_STATES = 1 << 15;//1 bit less than 2 bytes + for (int level = 1; level <= maxLevels; level++) { + final int states = maxSubCellsByLevel[level - 1]; + if (states >= MAX_STATES || states <= 1) { + throw new IllegalArgumentException("Max states is "+MAX_STATES+", given "+states+" at level "+level); + } + boolean twoBytes = states >= 256; + termLenByLevel[level] = termLenByLevel[level-1] + (twoBytes ? 2 : 1); + } + maxTermLen = termLenByLevel[maxLevels] + 1;// + 1 for leaf byte + + // Fill levelByTermLen + levelByTermLen = new int[maxTermLen]; + levelByTermLen[0] = 0; + for (int level = 1; level < termLenByLevel.length; level++) { + int termLen = termLenByLevel[level]; + int prevTermLen = termLenByLevel[level-1]; + if (termLen - prevTermLen == 2) {//2 byte delta + //if the term doesn't completely cover this cell then it must be a leaf of the prior. + levelByTermLen[termLen-1] = -1;//won't be used; otherwise erroneous + levelByTermLen[termLen] = level; + } else {//1 byte delta + assert termLen - prevTermLen == 1; + levelByTermLen[termLen] = level; + } + } + + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } + + @Override + public int getLevelForDistance(double dist) { + return maxLevels; + } + + @Override + public double getDistanceForLevel(int level) { + throw new UnsupportedOperationException("Not applicable."); + } + + protected Shape toShape(int[] valStack, int len) { + final NRCell[] cellStack = newCellStack(len); + for (int i = 0; i < len; i++) { + cellStack[i+1].resetCellWithCellNum(valStack[i]); + } + return cellStack[len]; + } + + @Override + public Cell getWorldCell() { + return newCellStack(maxLevels)[0]; + } + + protected NRCell[] newCellStack(int levels) { + final NRCell[] cellsByLevel = new NRCell[levels + 1]; + final BytesRef term = new BytesRef(maxTermLen); + for (int level = 0; level <= levels; level++) { + cellsByLevel[level] = new NRCell(cellsByLevel,term,level); + } + return cellsByLevel; + } + + @Override + public Cell readCell(BytesRef term, Cell scratch) { + if (scratch == null) + scratch = getWorldCell(); + + //We decode level, leaf, and populate bytes. + + //reverse lookup term length to the level and hence the cell + NRCell[] cellsByLevel = ((NRCell) scratch).cellsByLevel; + boolean isLeaf = term.bytes[term.offset + term.length - 1] == 0; + int lenNoLeaf = isLeaf ? term.length - 1 : term.length; + + NRCell result = cellsByLevel[levelByTermLen[lenNoLeaf]]; + if (cellsByLevel[0].termBuf == null) + cellsByLevel[0].termBuf = result.term.bytes;//a kluge; see cell.ensureOwnTermBytes() + result.term.bytes = term.bytes; + result.term.offset = term.offset; + result.term.length = lenNoLeaf;//technically this isn't used but may help debugging + result.reset(); + if (isLeaf) + result.setLeaf(); + + result.cellNumber = -1;//lazy decode flag + + return result; + } + + protected int getNumSubCells(LevelledValue lv) { + return maxSubCellsByLevel[lv.getLevel()]; + } + + // + // NRCell + // + + /** Most of the PrefixTree implementation is in this one class, which is both + * the Cell, the CellIterator, and the Shape to reduce object allocation. It's implemented as a re-used array/stack + * of Cells at adjacent levels, that all have a reference back to the cell array to traverse. They also share a common + * BytesRef for the term. + * @lucene.internal */ + protected class NRCell extends CellIterator implements Cell, LevelledValue { + + //Shared: (TODO put this in a new class) + final NRCell[] cellsByLevel; + final BytesRef term;//AKA the token + byte[] termBuf;// see ensureOwnTermBytes(), only for cell0 + + //Cell state... + final int cellLevel; // assert levelStack[cellLevel] == this + int cellNumber; //relative to parent cell. It's unused for level 0. Starts at 0. + + SpatialRelation cellShapeRel; + boolean cellIsLeaf; + + //CellIterator state is defined further below + + NRCell(NRCell[] cellsByLevel, BytesRef term, int cellLevel) { + this.cellsByLevel = cellsByLevel; + this.term = term; + this.cellLevel = cellLevel; + this.cellNumber = cellLevel == 0 ? 0 : -1; + this.cellIsLeaf = false; + assert cellsByLevel[cellLevel] == null; + } + + /** Ensure we own term.bytes so that it's safe to modify. We detect via a kluge in which cellsByLevel[0].termBuf + * is non-null, which is a pre-allocated for use to replace term.bytes. */ + void ensureOwnTermBytes() { + NRCell cell0 = cellsByLevel[0]; + if (cell0.termBuf == null) + return;//we already own the bytes + System.arraycopy(term.bytes, term.offset, cell0.termBuf, 0, term.length); + term.bytes = cell0.termBuf; + term.offset = 0; + cell0.termBuf = null; + } + + private void reset() { + this.cellIsLeaf = false; + this.cellShapeRel = null; + } + + private void resetCellWithCellNum(int cellNumber) { + reset(); + + //update bytes + // note: see lazyInitCellNumsFromBytes() for the reverse + if (cellNumber >= 0) {//valid + ensureOwnTermBytes(); + int termLen = termLenByLevel[getLevel()]; + boolean twoBytes = (termLen - termLenByLevel[getLevel()-1]) > 1; + if (twoBytes) { + //right 7 bits, plus 1 (may overflow to 8th bit which is okay) + term.bytes[termLen-2] = (byte) (cellNumber >> 7); + term.bytes[termLen-1] = (byte) ((cellNumber & 0x7F) + 1); + } else { + term.bytes[termLen-1] = (byte) (cellNumber+1); + } + assert term.bytes[termLen-1] != 0; + term.length = termLen; + } + this.cellNumber = cellNumber; + } + + private void ensureDecoded() { + if (cellNumber >= 0) + return; + //Decode cell numbers from bytes. This is the inverse of resetCellWithCellNum(). + for (int level = 1; level <= getLevel(); level++) { + NRCell cell = cellsByLevel[level]; + int termLen = termLenByLevel[level]; + boolean twoBytes = (termLen - termLenByLevel[level-1]) > 1; + if (twoBytes) { + int byteH = (term.bytes[term.offset + termLen - 2] & 0xFF); + int byteL = (term.bytes[term.offset + termLen - 1] & 0xFF); + assert byteL - 1 < (1<<7); + cell.cellNumber = (byteH << 7) + (byteL-1); + assert cell.cellNumber < 1<<15; + } else { + cell.cellNumber = (term.bytes[term.offset + termLen - 1] & 0xFF) - 1; + assert cell.cellNumber < 255; + } + assert cell.cellNumber >= 0; + } + } + + @Override // for Cell & for LevelledValue + public int getLevel() { + return cellLevel; + } + + @Override + public SpatialRelation getShapeRel() { + return cellShapeRel; + } + + @Override + public void setShapeRel(SpatialRelation rel) { + cellShapeRel = rel; + } + + @Override + public boolean isLeaf() { + return cellIsLeaf; + } + + @Override + public void setLeaf() { + cellIsLeaf = true; + } + + @Override + public Shape getShape() { + ensureDecoded(); return this; + } + + @Override + public BytesRef getTokenBytesNoLeaf(BytesRef result) { + if (result == null) + result = new BytesRef(); + result.bytes = term.bytes; + result.offset = term.offset; + result.length = termLenByLevel[cellLevel]; + assert result.length <= term.length; + return result; + } + + @Override + public BytesRef getTokenBytesWithLeaf(BytesRef result) { + ensureOwnTermBytes();//normally shouldn't do anything + result = getTokenBytesNoLeaf(result); + if (isLeaf()) { + result.bytes[result.length++] = 0; + } + return result; + } + + @Override + public boolean isPrefixOf(Cell c) { + NRCell otherCell = (NRCell) c; + assert term != otherCell.term; + //trick to re-use bytesref; provided that we re-instate it + int myLastLen = term.length; + term.length = termLenByLevel[getLevel()]; + int otherLastLen = otherCell.term.length; + otherCell.term.length = termLenByLevel[otherCell.getLevel()]; + boolean answer = StringHelper.startsWith(otherCell.term, term); + term.length = myLastLen; + otherCell.term.length = otherLastLen; + return answer; + } + + @Override + public int compareToNoLeaf(Cell fromCell) { + final NRCell nrCell = (NRCell) fromCell; + assert term != nrCell.term; + //trick to re-use bytesref; provided that we re-instate it + int myLastLen = term.length; + int otherLastLen = nrCell.term.length; + term.length = termLenByLevel[getLevel()]; + nrCell.term.length = termLenByLevel[nrCell.getLevel()]; + int answer = term.compareTo(nrCell.term); + term.length = myLastLen; + nrCell.term.length = otherLastLen; + return answer; + } + + @Override + public CellIterator getNextLevelCells(Shape shapeFilter) { + ensureDecoded(); + NRCell subCell = cellsByLevel[cellLevel + 1]; + subCell.initIter(shapeFilter); + return subCell; + } + + //----------- CellIterator + + Shape iterFilter;//LevelledValue or NRShape + boolean iterFirstIsIntersects; + boolean iterLastIsIntersects; + int iterFirstCellNumber; + int iterLastCellNumber; + + private void initIter(Shape filter) { + cellNumber = -1; + if (filter instanceof LevelledValue && ((LevelledValue)filter).getLevel() == 0) + filter = null;//world means everything -- no filter + iterFilter = filter; + + NRCell parent = getLVAtLevel(getLevel()-1); + + // Initialize iter* members. + + //no filter means all subcells + if (filter == null) { + iterFirstCellNumber = 0; + iterFirstIsIntersects = false; + iterLastCellNumber = getNumSubCells(parent) - 1; + iterLastIsIntersects = false; + return; + } + + final LevelledValue minLV; + final LevelledValue maxLV; + if (filter instanceof NRShape) { + NRShape nrShape = (NRShape) iterFilter; + minLV = nrShape.getMinLV(); + maxLV = nrShape.getMaxLV(); + } else { + minLV = (LevelledValue)iterFilter; + maxLV = minLV; + } + + //fast path check when using same filter + if (iterFilter == parent.iterFilter) { + if (parent.iterFirstIsIntersects && parent.cellNumber == parent.iterFirstCellNumber + && minLV.getLevel() >= getLevel()) { + iterFirstCellNumber = minLV.getValAtLevel(getLevel()); + iterFirstIsIntersects = (minLV.getLevel() > getLevel()); + } else { + iterFirstCellNumber = 0; + iterFirstIsIntersects = false; + } + if (parent.iterLastIsIntersects && parent.cellNumber == parent.iterLastCellNumber + && maxLV.getLevel() >= getLevel()) { + iterLastCellNumber = maxLV.getValAtLevel(getLevel()); + iterLastIsIntersects = (maxLV.getLevel() > getLevel()); + } else { + iterLastCellNumber = getNumSubCells(parent) - 1; + iterLastIsIntersects = false; + } + if (iterFirstCellNumber == iterLastCellNumber) { + if (iterLastIsIntersects) + iterFirstIsIntersects = true; + else if (iterFirstIsIntersects) + iterLastIsIntersects = true; + } + return; + } + + //uncommon to get here, except for level 1 which always happens + + int startCmp = comparePrefixLV(minLV, parent); + if (startCmp > 0) {//start comes after this cell + iterFirstCellNumber = 0; + iterFirstIsIntersects = false; + iterLastCellNumber = -1;//so ends early (no cells) + iterLastIsIntersects = false; + return; + } + int endCmp = comparePrefixLV(maxLV, parent);//compare to end cell + if (endCmp < 0) {//end comes before this cell + iterFirstCellNumber = 0; + iterFirstIsIntersects = false; + iterLastCellNumber = -1;//so ends early (no cells) + iterLastIsIntersects = false; + return; + } + if (startCmp < 0 || minLV.getLevel() < getLevel()) { + //start comes before... + iterFirstCellNumber = 0; + iterFirstIsIntersects = false; + } else { + iterFirstCellNumber = minLV.getValAtLevel(getLevel()); + iterFirstIsIntersects = (minLV.getLevel() > getLevel()); + } + if (endCmp > 0 || maxLV.getLevel() < getLevel()) { + //end comes after... + iterLastCellNumber = getNumSubCells(parent) - 1; + iterLastIsIntersects = false; + } else { + iterLastCellNumber = maxLV.getValAtLevel(getLevel()); + iterLastIsIntersects = (maxLV.getLevel() > getLevel()); + } + } + + @Override + public boolean hasNext() { + thisCell = null; + if (nextCell != null)//calling hasNext twice in a row + return true; + + if (cellNumber >= iterLastCellNumber) + return false; + + resetCellWithCellNum(cellNumber < iterFirstCellNumber ? iterFirstCellNumber : cellNumber + 1); + + boolean hasChildren = + (cellNumber == iterFirstCellNumber && iterFirstIsIntersects) + || (cellNumber == iterLastCellNumber && iterLastIsIntersects); + + if (!hasChildren) { + setLeaf(); + setShapeRel(SpatialRelation.WITHIN); + } else if (iterFirstCellNumber == iterLastCellNumber) { + setShapeRel(SpatialRelation.CONTAINS); + } else { + setShapeRel(SpatialRelation.INTERSECTS); + } + + nextCell = this; + return true; + } + + //TODO override nextFrom to be more efficient + + //----------- LevelledValue / Shape + + @Override + public int getValAtLevel(int level) { + final int result = cellsByLevel[level].cellNumber; + assert result >= 0;//initialized + return result; + } + + @Override + public NRCell getLVAtLevel(int level) { + assert level <= cellLevel; + return cellsByLevel[level]; + } + + @Override + public SpatialRelation relate(Shape shape) { + ensureDecoded(); + if (shape == iterFilter && cellShapeRel != null) + return cellShapeRel; + if (shape instanceof LevelledValue) + return relate((LevelledValue)shape); + if (shape instanceof NRShape) + return relate((NRShape)shape); + return shape.relate(this).transpose(); + } + + public SpatialRelation relate(LevelledValue lv) { + ensureDecoded(); + int cmp = comparePrefixLV(this, lv); + if (cmp != 0) + return SpatialRelation.DISJOINT; + if (getLevel() > lv.getLevel()) + return SpatialRelation.WITHIN;//or equals + return SpatialRelation.CONTAINS; + //no INTERSECTS; that won't happen. + } + + public SpatialRelation relate(NRShape nrShape) { + ensureDecoded(); + int startCmp = comparePrefixLV(nrShape.getMinLV(), this); + if (startCmp > 0) {//start comes after this cell + return SpatialRelation.DISJOINT; + } + int endCmp = comparePrefixLV(nrShape.getMaxLV(), this); + if (endCmp < 0) {//end comes before this cell + return SpatialRelation.DISJOINT; + } + if ((startCmp < 0 || startCmp == 0 && nrShape.getMinLV().getLevel() <= getLevel()) + && (endCmp > 0 || endCmp == 0 && nrShape.getMaxLV().getLevel() <= getLevel())) + return SpatialRelation.WITHIN;//or equals + if (startCmp == 0 && endCmp == 0 + && nrShape.getMinLV().getLevel() >= getLevel() && nrShape.getMaxLV().getLevel() >= getLevel()) + return SpatialRelation.CONTAINS; + return SpatialRelation.INTERSECTS; + } + + @Override + public Rectangle getBoundingBox() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasArea() { + return true; + } + + @Override + public double getArea(SpatialContext ctx) { + throw new UnsupportedOperationException(); + } + + @Override + public Point getCenter() { + throw new UnsupportedOperationException(); + } + + @Override + public Shape getBuffered(double distance, SpatialContext ctx) { throw new UnsupportedOperationException(); } + + @Override + public boolean isEmpty() { + return false; + } + + //------- Object + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof NRCell)) { + return false; + } + if (this == obj) + return true; + NRCell nrCell = (NRCell) obj; + assert term != nrCell.term; + if (getLevel() != nrCell.getLevel()) + return false; + //trick to re-use bytesref; provided that we re-instate it + int myLastLen = term.length; + int otherLastLen = nrCell.term.length; + boolean answer = getTokenBytesNoLeaf(term).equals(nrCell.getTokenBytesNoLeaf(nrCell.term)); + term.length = myLastLen; + nrCell.term.length = otherLastLen; + return answer; + } + + @Override + public int hashCode() { + //trick to re-use bytesref; provided that we re-instate it + int myLastLen = term.length; + int result = getTokenBytesNoLeaf(term).hashCode(); + term.length = myLastLen; + return result; + } + + @Override + public String toString() { + ensureDecoded(); + String str = NumberRangePrefixTree.this.toStringLV(this); + if (isLeaf()) + str += "•";//bullet (won't be confused with textual representation) + return str; + } + } // END OF NRCell + +} diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java b/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java index 52b0882fe86..6d1129066e6 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java @@ -234,19 +234,20 @@ public abstract class StrategyTestCase extends SpatialTestCase { CheckHits.checkExplanations(q, "", indexSearcher); } - protected void assertOperation(Map indexedDocs, - SpatialOperation operation, Shape queryShape) { - //Generate truth via brute force - Set expectedIds = new HashSet<>(); - for (Map.Entry stringShapeEntry : indexedDocs.entrySet()) { - if (operation.evaluate(stringShapeEntry.getValue(), queryShape)) - expectedIds.add(stringShapeEntry.getKey()); - } - - SpatialTestQuery testQuery = new SpatialTestQuery(); - testQuery.args = new SpatialArgs(operation, queryShape); - testQuery.ids = new ArrayList<>(expectedIds); - runTestQuery(SpatialMatchConcern.FILTER, testQuery); + protected void testOperation(Shape indexedShape, SpatialOperation operation, + Shape queryShape, boolean match) throws IOException { + assertTrue("Faulty test", + operation.evaluate(indexedShape, queryShape) == match || + indexedShape.equals(queryShape) && + (operation == SpatialOperation.Contains || operation == SpatialOperation.IsWithin)); + adoc("0", indexedShape); + commit(); + Query query = strategy.makeQuery(new SpatialArgs(operation, queryShape)); + SearchResults got = executeQuery(query, 1); + assert got.numFound <= 1 : "unclean test env"; + if ((got.numFound == 1) != match) + fail(operation+" I:" + indexedShape + " Q:" + queryShape); + deleteAll();//clean up after ourselves } } diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/BaseNonFuzzySpatialOpStrategyTest.java b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/BaseNonFuzzySpatialOpStrategyTest.java new file mode 100644 index 00000000000..2990e79c51e --- /dev/null +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/BaseNonFuzzySpatialOpStrategyTest.java @@ -0,0 +1,140 @@ +package org.apache.lucene.spatial.prefix; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.shape.Shape; +import org.apache.lucene.search.Query; +import org.apache.lucene.spatial.StrategyTestCase; +import org.apache.lucene.spatial.query.SpatialArgs; +import org.apache.lucene.spatial.query.SpatialOperation; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt; +import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween; + +/** Base test harness, ideally for SpatialStrategy impls that have exact results + * (not grid approximated), hence "not fuzzy". + */ +public abstract class BaseNonFuzzySpatialOpStrategyTest extends StrategyTestCase { + + //TODO this is partially redundant with StrategyTestCase.runTestQuery & testOperation + + protected void testOperationRandomShapes(final SpatialOperation operation) throws IOException { + //first show that when there's no data, a query will result in no results + { + Query query = strategy.makeQuery(new SpatialArgs(operation, randomQueryShape())); + SearchResults searchResults = executeQuery(query, 1); + assertEquals(0, searchResults.numFound); + } + + final int numIndexedShapes = randomIntBetween(1, 6); + List indexedShapes = new ArrayList<>(numIndexedShapes); + for (int i = 0; i < numIndexedShapes; i++) { + indexedShapes.add(randomIndexedShape()); + } + + final int numQueryShapes = atLeast(20); + List queryShapes = new ArrayList<>(numQueryShapes); + for (int i = 0; i < numQueryShapes; i++) { + queryShapes.add(randomQueryShape()); + } + + testOperation(operation, indexedShapes, queryShapes, true/*havoc*/); + } + + protected void testOperation(final SpatialOperation operation, + List indexedShapes, List queryShapes, boolean havoc) throws IOException { + //Main index loop: + for (int i = 0; i < indexedShapes.size(); i++) { + Shape shape = indexedShapes.get(i); + adoc(""+i, shape); + + if (havoc && random().nextInt(10) == 0) + commit();//intermediate commit, produces extra segments + } + if (havoc) { + //delete some documents randomly + for (int id = 0; id < indexedShapes.size(); id++) { + if (random().nextInt(10) == 0) { + deleteDoc(""+id); + indexedShapes.set(id, null); + } + } + } + + commit(); + + //Main query loop: + for (int queryIdx = 0; queryIdx < queryShapes.size(); queryIdx++) { + final Shape queryShape = queryShapes.get(queryIdx); + + if (havoc) + preQueryHavoc(); + + //Generate truth via brute force: + // We ensure true-positive matches (if the predicate on the raw shapes match + // then the search should find those same matches). + Set expectedIds = new LinkedHashSet<>();//true-positives + for (int id = 0; id < indexedShapes.size(); id++) { + Shape indexedShape = indexedShapes.get(id); + if (indexedShape == null) + continue; + if (operation.evaluate(indexedShape, queryShape)) { + expectedIds.add(""+id); + } + } + + //Search and verify results + SpatialArgs args = new SpatialArgs(operation, queryShape); + Query query = strategy.makeQuery(args); + SearchResults got = executeQuery(query, 100); + Set remainingExpectedIds = new LinkedHashSet<>(expectedIds); + for (SearchResult result : got.results) { + String id = result.getId(); + if (!remainingExpectedIds.remove(id)) { + fail("Shouldn't match", id, indexedShapes, queryShape); + } + } + if (!remainingExpectedIds.isEmpty()) { + String id = remainingExpectedIds.iterator().next(); + fail("Should have matched", id, indexedShapes, queryShape); + } + } + } + + private void fail(String label, String id, List indexedShapes, Shape queryShape) { + fail(label + " I#" + id + ":" + indexedShapes.get(Integer.parseInt(id)) + " Q:" + queryShape); + } + + protected void preQueryHavoc() { + if (strategy instanceof RecursivePrefixTreeStrategy) { + RecursivePrefixTreeStrategy rpts = (RecursivePrefixTreeStrategy) strategy; + int scanLevel = randomInt(rpts.getGrid().getMaxLevels()); + rpts.setPrefixGridScanLevel(scanLevel); + } + } + + protected abstract Shape randomIndexedShape(); + + protected abstract Shape randomQueryShape(); +} diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java new file mode 100644 index 00000000000..8808da9ba71 --- /dev/null +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java @@ -0,0 +1,130 @@ +package org.apache.lucene.spatial.prefix; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.carrotsearch.randomizedtesting.annotations.Repeat; +import com.spatial4j.core.shape.Shape; +import org.apache.lucene.spatial.NumberRangePrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.DateRangePrefixTree; +import org.apache.lucene.spatial.query.SpatialOperation; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; + +import java.io.IOException; +import java.util.Calendar; + +public class DateNRStrategyTest extends BaseNonFuzzySpatialOpStrategyTest { + + static final int ITERATIONS = 10; + + DateRangePrefixTree tree; + + int era; + int year; + + @Before + public void setUp() throws Exception { + super.setUp(); + tree = DateRangePrefixTree.INSTANCE; + strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange"); + era = random().nextBoolean() ? 0 : 1; + year = 1 + random().nextInt(2_000_000); + } + + @Test + @Repeat(iterations = ITERATIONS) + public void testIntersects() throws IOException { + testOperationRandomShapes(SpatialOperation.Intersects); + } + + @Test + @Repeat(iterations = ITERATIONS) + public void testWithin() throws IOException { + testOperationRandomShapes(SpatialOperation.IsWithin); + } + + @Test + @Repeat(iterations = ITERATIONS) + public void testContains() throws IOException { + testOperationRandomShapes(SpatialOperation.Contains); + } + + @Test @Ignore("see LUCENE-5692") + @Repeat(iterations = ITERATIONS) + public void testDisjoint() throws IOException { + testOperationRandomShapes(SpatialOperation.IsDisjointTo); + } + + @Test + public void testWithinSame() throws IOException { + final Calendar cal = tree.newCal(); + cal.set(Calendar.ERA, era); + cal.set(Calendar.YEAR, year); + + testOperation( + tree.toShape(cal), + SpatialOperation.IsWithin, + tree.toShape(cal), true);//is within itself + } + + @Test + public void testWorld() throws IOException { + testOperation( + tree.toShape(tree.newCal()),//world matches everything + SpatialOperation.Contains, + tree.toShape(randomCalendar()), true); + } + + @Override + protected Shape randomIndexedShape() { + Calendar cal1 = randomCalendar(); + Shape s1 = tree.toShape(cal1); + try { + Calendar cal2 = randomCalendar(); + Shape s2 = tree.toShape(cal2); + if (cal1.compareTo(cal2) < 0) { + return tree.toRangeShape(s1, s2); + } else { + return tree.toRangeShape(s2, s1); + } + } catch (IllegalArgumentException e) { + assert e.getMessage().startsWith("Differing precision"); + return s1; + } + } + + private Calendar randomCalendar() { + Calendar cal = tree.newCal(); + cal.setTimeInMillis(random().nextLong()); + cal.set(Calendar.ERA, era); + cal.set(Calendar.YEAR, year); + try { + tree.clearFieldsAfter(cal, random().nextInt(Calendar.FIELD_COUNT+1)-1); + } catch (AssertionError e) { + if (!e.getMessage().equals("Calendar underflow")) + throw e; + } + return cal; + } + + @Override + protected Shape randomQueryShape() { + return randomIndexedShape(); + } +} diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java new file mode 100644 index 00000000000..e377eb6ff77 --- /dev/null +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java @@ -0,0 +1,169 @@ +package org.apache.lucene.spatial.prefix.tree; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.shape.Shape; +import com.spatial4j.core.shape.SpatialRelation; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + +import java.text.ParseException; +import java.util.Arrays; +import java.util.Calendar; +import java.util.GregorianCalendar; + +public class DateRangePrefixTreeTest extends LuceneTestCase { + + private DateRangePrefixTree tree = DateRangePrefixTree.INSTANCE; + + public void testRoundTrip() throws Exception { + Calendar cal = tree.newCal(); + + assertEquals("*", tree.toString(cal)); + + //test no underflow + assertTrue(tree.toShape(new int[]{0}, 1).toString().startsWith("-")); + + //Some arbitrary date + cal.set(2014, Calendar.MAY, 9); + roundTrip(cal); + assertEquals("2014-05-09",tree.toString(cal)); + + //Earliest date + cal.setTimeInMillis(Long.MIN_VALUE); + roundTrip(cal); + + //Farthest date + cal.setTimeInMillis(Long.MAX_VALUE); + roundTrip(cal); + + //1BC is "0000". + cal.clear(); + cal.set(Calendar.ERA, GregorianCalendar.BC); + cal.set(Calendar.YEAR, 1); + roundTrip(cal); + assertEquals("0000", tree.toString(cal)); + //adding a "+" parses to the same; and a trailing 'Z' is fine too + assertEquals(cal, tree.parseCalendar("+0000Z")); + + //2BC is "-0001" + cal.clear(); + cal.set(Calendar.ERA, GregorianCalendar.BC); + cal.set(Calendar.YEAR, 2); + roundTrip(cal); + assertEquals("-0001", tree.toString(cal)); + + //1AD is "0001" + cal.clear(); + cal.set(Calendar.YEAR, 1); + roundTrip(cal); + assertEquals("0001", tree.toString(cal)); + + //test random + cal.setTimeInMillis(random().nextLong()); + roundTrip(cal); + } + + //copies from DateRangePrefixTree + private static final int[] CAL_FIELDS = { + Calendar.YEAR, Calendar.MONTH, Calendar.DAY_OF_MONTH, + Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND}; + + private void roundTrip(Calendar calOrig) throws ParseException { + Calendar cal = (Calendar) calOrig.clone(); + String lastString = null; + while (true) { + String calString = tree.toString(cal); + assert lastString == null || calString.length() < lastString.length(); + //test parseCalendar + assertEquals(cal, tree.parseCalendar(calString)); + + //to Shape and back to Cal + Shape shape = tree.toShape(cal); + Calendar cal2 = tree.toCalendar(shape); + assertEquals(calString, tree.toString(cal2)); + + if (!calString.equals("*")) {//not world cell + //to Term and back to Cell + Cell cell = (Cell) shape; + BytesRef term = cell.getTokenBytesNoLeaf(null); + Cell cell2 = tree.readCell(BytesRef.deepCopyOf(term), null); + assertEquals(calString, cell, cell2); + Calendar cal3 = tree.toCalendar(cell2.getShape()); + assertEquals(calString, tree.toString(cal3)); + + // setLeaf comparison + cell2.setLeaf(); + BytesRef termLeaf = cell2.getTokenBytesWithLeaf(null); + assertTrue(term.compareTo(termLeaf) < 0); + assertEquals(termLeaf.length, term.length + 1); + assertEquals(0, termLeaf.bytes[termLeaf.offset + termLeaf.length - 1]); + assertTrue(cell.isPrefixOf(cell2)); + } + + //end of loop; decide if should loop again with lower precision + final int calPrecField = tree.getCalPrecisionField(cal); + if (calPrecField == -1) + break; + int fieldIdx = Arrays.binarySearch(CAL_FIELDS, calPrecField); + assert fieldIdx >= 0; + int prevPrecField = (fieldIdx == 0 ? -1 : CAL_FIELDS[--fieldIdx]); + try { + tree.clearFieldsAfter(cal, prevPrecField); + } catch (AssertionError e) { + if (e.getMessage().equals("Calendar underflow")) + return; + throw e; + } + lastString = calString; + } + } + + public void testShapeRelations() throws ParseException { + Shape shapeA = tree.parseShape("[3122-01-23 TO 3122-11-27]"); + Shape shapeB = tree.parseShape("[3122-08 TO 3122-11]"); + assertEquals(SpatialRelation.INTERSECTS, shapeA.relate(shapeB)); + + shapeA = tree.parseShape("3122"); + shapeB = tree.parseShape("[* TO 3122-10-31]"); + assertEquals(SpatialRelation.INTERSECTS, shapeA.relate(shapeB)); + + shapeA = tree.parseShape("[3122-05-28 TO 3122-06-29]"); + shapeB = tree.parseShape("[3122 TO 3122-04]"); + assertEquals(SpatialRelation.DISJOINT, shapeA.relate(shapeB)); + } + + public void testShapeRangeOptimizer() throws ParseException { + assertEquals("[2014-08 TO 2014-09]", tree.parseShape("[2014-08-01 TO 2014-09-30]").toString()); + + assertEquals("2014", tree.parseShape("[2014-01-01 TO 2014-12-31]").toString()); + + assertEquals("2014", tree.parseShape("[2014-01 TO 2014]").toString()); + + assertEquals("[2014 TO 2014-04-06]", tree.parseShape("[2014-01 TO 2014-04-06]").toString()); + + assertEquals("*", tree.parseShape("[* TO *]").toString()); + + assertEquals("2014-08-01", tree.parseShape("[2014-08-01 TO 2014-08-01]").toString()); + + assertEquals("[2014 TO 2014-09-15]", tree.parseShape("[2014 TO 2014-09-15]").toString()); + + assertEquals("[* TO 2014-09-15]", tree.parseShape("[* TO 2014-09-15]").toString()); + } + +} \ No newline at end of file