diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 94909bed99f..331c65e9f65 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -49,6 +49,9 @@ New Features * LUCENE-6879: Allow to define custom CharTokenizer instances without subclassing using Java 8 lambdas or method references. (Uwe Schindler) +* LUCENE-6881: Cutover all BKD implementations to dimensional values + (Mike McCandless) + API Changes * LUCENE-3312: The API of oal.document was restructured to diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java index 31d807caa43..22ede84b06f 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java @@ -148,7 +148,7 @@ class SimpleTextDimensionalWriter extends DimensionalWriter { @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } }); indexFPs.put(fieldInfo.name, writer.finish(dataOut)); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java index dc86ab2777d..b572144f61b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java @@ -77,7 +77,7 @@ public abstract class DimensionalWriter implements Closeable { @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { // Forces this segment's DimensionalReader to always visit all docs + values: - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } }); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalWriter.java index d0989f77da6..73608cafd31 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalWriter.java @@ -1,6 +1,5 @@ package org.apache.lucene.codecs.lucene60; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -97,7 +96,7 @@ public class Lucene60DimensionalWriter extends DimensionalWriter implements Clos @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } }); diff --git a/lucene/core/src/java/org/apache/lucene/document/DimensionalField.java b/lucene/core/src/java/org/apache/lucene/document/DimensionalField.java index 52bb3b556c0..5e95d010afd 100644 --- a/lucene/core/src/java/org/apache/lucene/document/DimensionalField.java +++ b/lucene/core/src/java/org/apache/lucene/document/DimensionalField.java @@ -18,6 +18,8 @@ package org.apache.lucene.document; */ import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.bkd.BKDUtil; /** A field that is indexed dimensionally such that finding * all documents within an N-dimensional at search time is @@ -65,6 +67,9 @@ public final class DimensionalField extends Field { if (point.length == 0) { throw new IllegalArgumentException("point cannot be 0 dimensions"); } + if (point.length == 1) { + return new BytesRef(point[0]); + } int bytesPerDim = -1; for(byte[] dim : point) { if (dim == null) { @@ -86,19 +91,20 @@ public final class DimensionalField extends Field { return new BytesRef(packed); } - /** Sugar API: indexes a one-dimensional point */ - public DimensionalField(String name, byte[] dim1) { - super(name, dim1, getType(1, dim1.length)); - } + private static BytesRef pack(long... point) { + if (point == null) { + throw new IllegalArgumentException("point cannot be null"); + } + if (point.length == 0) { + throw new IllegalArgumentException("point cannot be 0 dimensions"); + } + byte[] packed = new byte[point.length * RamUsageEstimator.NUM_BYTES_LONG]; + + for(int dim=0;dim= leafNodeOffset) { + //System.out.println("FILTER"); // Leaf node; scan and filter all points in this block: int count = readDocIDs(state.in, leafBlockFPs[nodeID-leafNodeOffset], state.scratchDocIDs); diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDUtil.java index 11251680a51..eeadd016d8e 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDUtil.java @@ -20,14 +20,14 @@ package org.apache.lucene.util.bkd; import java.math.BigInteger; import java.util.Arrays; -/** Utility methods to convert to/from N-dimensional packed byte[] as numbers */ +/** Utility methods to convert to/from N-dimensional packed byte[] as unsigned numbers */ public final class BKDUtil { private BKDUtil() { // No instance } - /** result = a - b, where a >= b */ + /** Result = a - b, where a >= b, else {@code IllegalArgumentException} is thrown. */ public static void subtract(int bytesPerDim, int dim, byte[] a, byte[] b, byte[] result) { int start = dim * bytesPerDim; int end = start + bytesPerDim; @@ -43,10 +43,30 @@ public final class BKDUtil { result[i-start] = (byte) diff; } if (borrow != 0) { - throw new IllegalArgumentException("a < b?"); + throw new IllegalArgumentException("a < b"); } } - + + /** Result = a + b, where a and b are unsigned. If there is an overflow, {@code IllegalArgumentException} is thrown. */ + public static void add(int bytesPerDim, int dim, byte[] a, byte[] b, byte[] result) { + int start = dim * bytesPerDim; + int end = start + bytesPerDim; + int carry = 0; + for(int i=end-1;i>=start;i--) { + int digitSum = (a[i]&0xff) + (b[i]&0xff) + carry; + if (digitSum > 255) { + digitSum -= 256; + carry = 1; + } else { + carry = 0; + } + result[i-start] = (byte) digitSum; + } + if (carry != 0) { + throw new IllegalArgumentException("a + b overflows bytesPerDim=" + bytesPerDim); + } + } + /** Returns positive int if a > b, negative int if a < b and 0 if a == b */ public static int compare(int bytesPerDim, byte[] a, int aIndex, byte[] b, int bIndex) { for(int i=0;i> 56); + bytes[offset+1] = (byte) (v >> 48); + bytes[offset+2] = (byte) (v >> 40); + bytes[offset+3] = (byte) (v >> 32); + bytes[offset+4] = (byte) (v >> 24); + bytes[offset+5] = (byte) (v >> 16); + bytes[offset+6] = (byte) (v >> 8); + bytes[offset+7] = (byte) v; + } + + public static long bytesToLong(byte[] bytes, int index) { + int offset = 8 * index; + long v = ((bytes[offset] & 0xffL) << 56) | + ((bytes[offset+1] & 0xffL) << 48) | + ((bytes[offset+2] & 0xffL) << 40) | + ((bytes[offset+3] & 0xffL) << 32) | + ((bytes[offset+4] & 0xffL) << 24) | + ((bytes[offset+5] & 0xffL) << 16) | + ((bytes[offset+6] & 0xffL) << 8) | + (bytes[offset+7] & 0xffL); + + // Flip the sign bit back + v ^= 0x8000000000000000L; + return v; + } + public static void sortableBigIntBytes(byte[] bytes) { bytes[0] ^= 0x80; for(int i=1;i= maxPointsInLeafNode, so we better be in heap at this point: HeapPointWriter heapSource = (HeapPointWriter) source.writer; - // Sort by docID in the leaf so we can delta-vInt encode: - sortHeapPointWriter(heapSource, Math.toIntExact(source.start), Math.toIntExact(source.count), -1); - // Save the block file pointer: leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java b/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java index d460a08c743..1bb6da1b6cb 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java @@ -77,7 +77,7 @@ public class TestDimensionalValues extends LuceneTestCase { new IntersectVisitor() { @Override public Relation compare(byte[] minPacked, byte[] maxPacked) { - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } public void visit(int docID) { throw new IllegalStateException(); @@ -119,7 +119,7 @@ public class TestDimensionalValues extends LuceneTestCase { new IntersectVisitor() { @Override public Relation compare(byte[] minPacked, byte[] maxPacked) { - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } public void visit(int docID) { throw new IllegalStateException(); @@ -164,7 +164,7 @@ public class TestDimensionalValues extends LuceneTestCase { new IntersectVisitor() { @Override public Relation compare(byte[] minPacked, byte[] maxPacked) { - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } public void visit(int docID) { throw new IllegalStateException(); @@ -411,14 +411,14 @@ public class TestDimensionalValues extends LuceneTestCase { assert max.compareTo(min) >= 0; if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) { - return Relation.QUERY_OUTSIDE_CELL; + return Relation.CELL_OUTSIDE_QUERY; } else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) { crosses = true; } } if (crosses) { - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } else { return Relation.CELL_INSIDE_QUERY; } @@ -1079,7 +1079,7 @@ public class TestDimensionalValues extends LuceneTestCase { if (BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMin[dim], 0) < 0 || BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMax[dim], 0) > 0) { //System.out.println(" query_outside_cell"); - return Relation.QUERY_OUTSIDE_CELL; + return Relation.CELL_OUTSIDE_QUERY; } else if (BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMin[dim], 0) < 0 || BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMax[dim], 0) > 0) { crosses = true; @@ -1088,7 +1088,7 @@ public class TestDimensionalValues extends LuceneTestCase { if (crosses) { //System.out.println(" query_crosses_cell"); - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } else { //System.out.println(" cell_inside_query"); return Relation.CELL_INSIDE_QUERY; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDimensionalRangeQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestDimensionalRangeQuery.java new file mode 100644 index 00000000000..435009f0077 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestDimensionalRangeQuery.java @@ -0,0 +1,1029 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.DimensionalFormat; +import org.apache.lucene.codecs.DimensionalReader; +import org.apache.lucene.codecs.DimensionalWriter; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.lucene60.Lucene60DimensionalReader; +import org.apache.lucene.codecs.lucene60.Lucene60DimensionalWriter; +import org.apache.lucene.document.DimensionalField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; +import org.apache.lucene.util.bkd.BKDUtil; +import org.junit.BeforeClass; + +public class TestDimensionalRangeQuery extends LuceneTestCase { + + // Controls what range of values we randomly generate, so we sometimes test narrow ranges: + static long valueMid; + static int valueRange; + + @BeforeClass + public static void beforeClass() { + if (random().nextBoolean()) { + valueMid = random().nextLong(); + if (random().nextBoolean()) { + // Wide range + valueRange = TestUtil.nextInt(random(), 1, Integer.MAX_VALUE); + } else { + // Narrow range + valueRange = TestUtil.nextInt(random(), 1, 100000); + } + if (VERBOSE) { + System.out.println("TEST: will generate long values " + valueMid + " +/- " + valueRange); + } + } else { + // All longs + valueRange = 0; + if (VERBOSE) { + System.out.println("TEST: will generate all long values"); + } + } + } + + public void testAllEqual() throws Exception { + int numValues = atLeast(10000); + long value = randomValue(false); + long[] values = new long[numValues]; + FixedBitSet missing = new FixedBitSet(numValues); + + if (VERBOSE) { + System.out.println("TEST: use same value=" + value); + } + + for(int docID=0;docID 0 && random().nextInt(100) < sameValuePct) { + // Identical to old value + docValues[ord] = docValues[random().nextInt(ord)]; + } else { + // Make a new random value + byte[][] values = new byte[numDims][]; + for(int dim=0;dim 100000) { + dir = noVirusChecker(newFSDirectory(createTempDir("TestDimensionalRangeQuery"))); + } else { + dir = getDirectory(); + } + + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numValues = docValues.length; + if (VERBOSE) { + System.out.println("TEST: numValues=" + numValues + " numDims=" + numDims + " numBytesPerDim=" + numBytesPerDim); + } + + int missingPct = random().nextInt(100); + int deletedPct = random().nextInt(100); + if (VERBOSE) { + System.out.println(" missingPct=" + missingPct); + System.out.println(" deletedPct=" + deletedPct); + } + + BitSet missing = new BitSet(); + BitSet deleted = new BitSet(); + + Document doc = null; + int lastID = -1; + + for(int ord=0;ord 0) { + byte[] x = lower[dim]; + lower[dim] = upper[dim]; + upper[dim] = x; + } + + includeLower[dim] = random().nextBoolean(); + includeUpper[dim] = random().nextBoolean(); + } + + if (VERBOSE) { + System.out.println("\n" + Thread.currentThread().getName() + ": TEST: iter=" + iter); + for(int dim=0;dim= lower) && (upper == null || value <= upper); + } + + static String bytesToString(byte[] bytes) { + if (bytes == null) { + return "null"; + } + return new BytesRef(bytes).toString(); + } + + private static boolean matches(int bytesPerDim, byte[][] lower, boolean[] includeLower, byte[][] upper, boolean[] includeUpper, byte[][] value) { + int numDims = lower.length; + for(int dim=0;dim 0 || (cmp == 0 && includeUpper[dim] == false)) { + // Value is above the upper bound, on this dim + return false; + } + } + + return true; + } + + private static Long randomValue(boolean allowNull) { + if (valueRange == 0) { + if (allowNull && random().nextInt(10) == 1) { + return null; + } else { + return random().nextLong(); + } + } else { + return valueMid + TestUtil.nextInt(random(), -valueRange, valueRange); + } + } + + public void testMinMaxLong() throws Exception { + Directory dir = getDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(getCodec()); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(new DimensionalField("value", Long.MIN_VALUE)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new DimensionalField("value", Long.MAX_VALUE)); + w.addDocument(doc); + + IndexReader r = w.getReader(); + + IndexSearcher s = newSearcher(r); + + assertEquals(1, s.count(new DimensionalRangeQuery("value", Long.MIN_VALUE, true, 0L, true))); + assertEquals(1, s.count(new DimensionalRangeQuery("value", 0L, true, Long.MAX_VALUE, true))); + assertEquals(2, s.count(new DimensionalRangeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, true))); + + IOUtils.close(r, w, dir); + } + + private static byte[] toUTF8(String s) { + return s.getBytes(StandardCharsets.UTF_8); + } + + // Right zero pads: + private static byte[] toUTF8(String s, int length) { + byte[] bytes = s.getBytes(StandardCharsets.UTF_8); + if (length < bytes.length) { + throw new IllegalArgumentException("length=" + length + " but string's UTF8 bytes has length=" + bytes.length); + } + byte[] result = new byte[length]; + System.arraycopy(bytes, 0, result, 0, bytes.length); + return result; + } + + public void testBasicSortedSet() throws Exception { + Directory dir = getDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(getCodec()); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(new DimensionalField("value", toUTF8("abc"))); + w.addDocument(doc); + doc = new Document(); + doc.add(new DimensionalField("value", toUTF8("def"))); + w.addDocument(doc); + + IndexReader r = w.getReader(); + + IndexSearcher s = newSearcher(r); + + assertEquals(1, s.count(new DimensionalRangeQuery("value", + toUTF8("aaa"), + true, + toUTF8("bbb"), + true))); + assertEquals(1, s.count(new DimensionalRangeQuery("value", + toUTF8("c", 3), + true, + toUTF8("e", 3), + true))); + assertEquals(2, s.count(new DimensionalRangeQuery("value", + toUTF8("a", 3), + true, + toUTF8("z", 3), + true))); + assertEquals(1, s.count(new DimensionalRangeQuery("value", + null, + true, + toUTF8("abc"), + true))); + assertEquals(1, s.count(new DimensionalRangeQuery("value", + toUTF8("a", 3), + true, + toUTF8("abc"), + true))); + assertEquals(0, s.count(new DimensionalRangeQuery("value", + toUTF8("a", 3), + true, + toUTF8("abc"), + false))); + assertEquals(1, s.count(new DimensionalRangeQuery("value", + toUTF8("def"), + true, + null, + false))); + assertEquals(1, s.count(new DimensionalRangeQuery("value", + toUTF8(("def")), + true, + toUTF8("z", 3), + true))); + assertEquals(0, s.count(new DimensionalRangeQuery("value", + toUTF8("def"), + false, + toUTF8("z", 3), + true))); + + IOUtils.close(r, w, dir); + } + + public void testLongMinMaxNumeric() throws Exception { + Directory dir = getDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(getCodec()); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(new DimensionalField("value", Long.MIN_VALUE)); + w.addDocument(doc); + doc = new Document(); + doc.add(new DimensionalField("value", Long.MAX_VALUE)); + w.addDocument(doc); + + IndexReader r = w.getReader(); + + IndexSearcher s = newSearcher(r); + + assertEquals(2, s.count(new DimensionalRangeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, true))); + assertEquals(1, s.count(new DimensionalRangeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, false))); + assertEquals(1, s.count(new DimensionalRangeQuery("value", Long.MIN_VALUE, false, Long.MAX_VALUE, true))); + assertEquals(0, s.count(new DimensionalRangeQuery("value", Long.MIN_VALUE, false, Long.MAX_VALUE, false))); + + assertEquals(2, s.count(new DimensionalRangeQuery("value", (byte[]) null, true, null, true))); + + IOUtils.close(r, w, dir); + } + + public void testLongMinMaxSortedSet() throws Exception { + Directory dir = getDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(getCodec()); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(new DimensionalField("value", Long.MIN_VALUE)); + w.addDocument(doc); + doc = new Document(); + doc.add(new DimensionalField("value", Long.MAX_VALUE)); + w.addDocument(doc); + + IndexReader r = w.getReader(); + + // We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat: + IndexSearcher s = newSearcher(r, false); + + assertEquals(2, s.count(new DimensionalRangeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, true))); + assertEquals(1, s.count(new DimensionalRangeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, false))); + assertEquals(1, s.count(new DimensionalRangeQuery("value", Long.MIN_VALUE, false, Long.MAX_VALUE, true))); + assertEquals(0, s.count(new DimensionalRangeQuery("value", Long.MIN_VALUE, false, Long.MAX_VALUE, false))); + + assertEquals(2, s.count(new DimensionalRangeQuery("value", (Long) null, true, null, true))); + + IOUtils.close(r, w, dir); + } + + public void testSortedSetNoOrdsMatch() throws Exception { + Directory dir = getDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(getCodec()); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(new DimensionalField("value", toUTF8("a"))); + w.addDocument(doc); + doc = new Document(); + doc.add(new DimensionalField("value", toUTF8("z"))); + w.addDocument(doc); + + IndexReader r = w.getReader(); + + IndexSearcher s = newSearcher(r); + assertEquals(0, s.count(new DimensionalRangeQuery("value", toUTF8("m"), true, toUTF8("n"), false))); + + assertEquals(2, s.count(new DimensionalRangeQuery("value", (byte[]) null, true, null, true))); + + IOUtils.close(r, w, dir); + } + + public void testNumericNoValuesMatch() throws Exception { + Directory dir = getDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(getCodec()); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(new SortedNumericDocValuesField("value", 17)); + w.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("value", 22)); + w.addDocument(doc); + + IndexReader r = w.getReader(); + + IndexSearcher s = new IndexSearcher(r); + assertEquals(0, s.count(new DimensionalRangeQuery("value", 17L, true, 13L, false))); + + IOUtils.close(r, w, dir); + } + + public void testNoDocs() throws Exception { + Directory dir = getDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(getCodec()); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + w.addDocument(new Document()); + + IndexReader r = w.getReader(); + + IndexSearcher s = newSearcher(r); + assertEquals(0, s.count(new DimensionalRangeQuery("value", 17L, true, 13L, false))); + + IOUtils.close(r, w, dir); + } + + public void testWrongNumDims() throws Exception { + Directory dir = getDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(getCodec()); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(new DimensionalField("value", Long.MIN_VALUE)); + w.addDocument(doc); + + IndexReader r = w.getReader(); + + // no wrapping, else the exc might happen in executor thread: + IndexSearcher s = new IndexSearcher(r); + byte[][] point = new byte[2][]; + try { + s.count(new DimensionalRangeQuery("value", point, new boolean[] {true, true}, point, new boolean[] {true, true})); + } catch (IllegalArgumentException iae) { + assertEquals("field=\"value\" was indexed with numDims=1 but this query has numDims=2", iae.getMessage()); + } + + IOUtils.close(r, w, dir); + } + + public void testWrongNumBytes() throws Exception { + Directory dir = getDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(getCodec()); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(new DimensionalField("value", Long.MIN_VALUE)); + w.addDocument(doc); + + IndexReader r = w.getReader(); + + // no wrapping, else the exc might happen in executor thread: + IndexSearcher s = new IndexSearcher(r); + byte[][] point = new byte[1][]; + point[0] = new byte[10]; + try { + s.count(new DimensionalRangeQuery("value", point, new boolean[] {true}, point, new boolean[] {true})); + } catch (IllegalArgumentException iae) { + assertEquals("field=\"value\" was indexed with bytesPerDim=8 but this query has bytesPerDim=10", iae.getMessage()); + } + + IOUtils.close(r, w, dir); + } + + private static Directory noVirusChecker(Directory dir) { + if (dir instanceof MockDirectoryWrapper) { + ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); + } + return dir; + } + + private static Directory getDirectory() { + return noVirusChecker(newDirectory()); + } + + private static Codec getCodec() { + if (Codec.getDefault().getName().equals("Lucene60")) { + int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048); + double maxMBSortInHeap = 2.0 + (3*random().nextDouble()); + if (VERBOSE) { + System.out.println("TEST: using Lucene60DimensionalFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap); + } + + return new FilterCodec("Lucene60", Codec.getDefault()) { + @Override + public DimensionalFormat dimensionalFormat() { + return new DimensionalFormat() { + @Override + public DimensionalWriter fieldsWriter(SegmentWriteState writeState) throws IOException { + return new Lucene60DimensionalWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap); + } + + @Override + public DimensionalReader fieldsReader(SegmentReadState readState) throws IOException { + return new Lucene60DimensionalReader(readState); + } + }; + } + }; + } else { + return Codec.getDefault(); + } + } +} diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java index 446462f4838..68fbb081a2d 100644 --- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java +++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java @@ -91,11 +91,11 @@ public class TestBKD extends LuceneTestCase { } if (max < queryMin || min > queryMax) { - return Relation.QUERY_OUTSIDE_CELL; + return Relation.CELL_OUTSIDE_QUERY; } else if (min >= queryMin && max <= queryMax) { return Relation.CELL_INSIDE_QUERY; } else { - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } } }); @@ -198,14 +198,14 @@ public class TestBKD extends LuceneTestCase { assert max >= min; if (max < queryMin[dim] || min > queryMax[dim]) { - return Relation.QUERY_OUTSIDE_CELL; + return Relation.CELL_OUTSIDE_QUERY; } else if (min < queryMin[dim] || max > queryMax[dim]) { crosses = true; } } if (crosses) { - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } else { return Relation.CELL_INSIDE_QUERY; } @@ -319,14 +319,14 @@ public class TestBKD extends LuceneTestCase { assert max.compareTo(min) >= 0; if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) { - return Relation.QUERY_OUTSIDE_CELL; + return Relation.CELL_OUTSIDE_QUERY; } else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) { crosses = true; } } if (crosses) { - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } else { return Relation.CELL_INSIDE_QUERY; } @@ -517,6 +517,87 @@ public class TestBKD extends LuceneTestCase { verify(docValuesArray, docIDsArray, numDims, numBytesPerDim); } + public void testBKDUtilAdd() throws Exception { + int iters = atLeast(10000); + int numBytes = TestUtil.nextInt(random(), 1, 100); + for(int iter=0;iter 0) { - return Relation.QUERY_OUTSIDE_CELL; + return Relation.CELL_OUTSIDE_QUERY; } else if (BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMin[dim], 0) < 0 || BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMax[dim], 0) > 0) { crosses = true; @@ -635,7 +716,7 @@ public class TestBKD extends LuceneTestCase { } if (crosses) { - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } else { return Relation.CELL_INSIDE_QUERY; } diff --git a/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java b/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java index 802cc30ad19..c341db8ae02 100644 --- a/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java +++ b/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java @@ -32,8 +32,8 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.DimensionalField; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; @@ -395,7 +395,7 @@ public abstract class SorterTestBase extends LuceneTestCase { @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - return Relation.QUERY_CROSSES_CELL; + return Relation.CELL_CROSSES_QUERY; } }); } diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointField.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointField.java deleted file mode 100644 index 8cc1f5b2fa3..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointField.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.index.DocValuesType; - -/** Add this to a document to index lat/lon point, but be sure to use {@link BKDTreeDocValuesFormat} for the field. */ -public final class BKDPointField extends Field { - - public static final FieldType TYPE = new FieldType(); - static { - TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC); - TYPE.freeze(); - } - - /** - * Creates a new BKDPointField field with the specified lat and lon - * @param name field name - * @param lat double latitude - * @param lon double longitude - * @throws IllegalArgumentException if the field name is null or lat or lon are out of bounds - */ - public BKDPointField(String name, double lat, double lon) { - super(name, TYPE); - if (BKDTreeWriter.validLat(lat) == false) { - throw new IllegalArgumentException("invalid lat (" + lat + "): must be -90 to 90"); - } - if (BKDTreeWriter.validLon(lon) == false) { - throw new IllegalArgumentException("invalid lon (" + lon + "): must be -180 to 180"); - } - fieldsData = Long.valueOf(((long) BKDTreeWriter.encodeLat(lat) << 32) | (BKDTreeWriter.encodeLon(lon) & 0xffffffffL)); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeDocValuesConsumer.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeDocValuesConsumer.java deleted file mode 100644 index 2260b129b26..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeDocValuesConsumer.java +++ /dev/null @@ -1,138 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Closeable; -import java.io.IOException; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; - -class BKDTreeDocValuesConsumer extends DocValuesConsumer implements Closeable { - final DocValuesConsumer delegate; - final int maxPointsInLeafNode; - final int maxPointsSortInHeap; - final IndexOutput out; - final Map fieldIndexFPs = new HashMap<>(); - final SegmentWriteState state; - final Directory tempDir; - final String tempFileNamePrefix; - - public BKDTreeDocValuesConsumer(Directory tempDir, String tempFileNamePrefix, DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException { - BKDTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap); - this.tempDir = tempDir; - this.tempFileNamePrefix = tempFileNamePrefix; - this.delegate = delegate; - this.maxPointsInLeafNode = maxPointsInLeafNode; - this.maxPointsSortInHeap = maxPointsSortInHeap; - this.state = state; - String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BKDTreeDocValuesFormat.DATA_EXTENSION); - out = state.directory.createOutput(datFileName, state.context); - CodecUtil.writeIndexHeader(out, BKDTreeDocValuesFormat.DATA_CODEC_NAME, BKDTreeDocValuesFormat.DATA_VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - } - - @Override - public void close() throws IOException { - boolean success = false; - try { - CodecUtil.writeFooter(out); - success = true; - } finally { - if (success) { - IOUtils.close(delegate, out); - } else { - IOUtils.closeWhileHandlingException(delegate, out); - } - } - - String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BKDTreeDocValuesFormat.META_EXTENSION); - IndexOutput metaOut = state.directory.createOutput(metaFileName, state.context); - success = false; - try { - CodecUtil.writeIndexHeader(metaOut, BKDTreeDocValuesFormat.META_CODEC_NAME, BKDTreeDocValuesFormat.META_VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - metaOut.writeVInt(fieldIndexFPs.size()); - for(Map.Entry ent : fieldIndexFPs.entrySet()) { - metaOut.writeVInt(ent.getKey()); - metaOut.writeVLong(ent.getValue()); - } - CodecUtil.writeFooter(metaOut); - success = true; - } finally { - if (success) { - IOUtils.close(metaOut); - } else { - IOUtils.closeWhileHandlingException(metaOut); - } - } - } - - @Override - public void addSortedNumericField(FieldInfo field, Iterable docToValueCount, Iterable values) throws IOException { - delegate.addSortedNumericField(field, docToValueCount, values); - BKDTreeWriter writer = new BKDTreeWriter(tempDir, tempFileNamePrefix, maxPointsInLeafNode, maxPointsSortInHeap); - Iterator valueIt = values.iterator(); - Iterator valueCountIt = docToValueCount.iterator(); - for (int docID=0;docID> 32); - int lonEnc = (int) (value & 0xffffffff); - writer.add(latEnc, lonEnc, docID); - } - } - - long indexStartFP = writer.finish(out); - - fieldIndexFPs.put(field.number, indexStartFP); - } - - @Override - public void addNumericField(FieldInfo field, Iterable values) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public void addBinaryField(FieldInfo field, Iterable values) { - throw new UnsupportedOperationException(); - } - - @Override - public void addSortedField(FieldInfo field, Iterable values, Iterable docToOrd) { - throw new UnsupportedOperationException(); - } - - @Override - public void addSortedSetField(FieldInfo field, Iterable values, Iterable docToOrdCount, Iterable ords) { - throw new UnsupportedOperationException(); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeDocValuesFormat.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeDocValuesFormat.java deleted file mode 100644 index 9f0dcd6ddbf..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeDocValuesFormat.java +++ /dev/null @@ -1,109 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.codecs.DocValuesFormat; -import org.apache.lucene.codecs.DocValuesProducer; -import org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SegmentWriteState; - -/** - * A {@link DocValuesFormat} to efficiently index geo-spatial lat/lon points - * from {@link BKDPointField} for fast bounding-box ({@link BKDPointInBBoxQuery}) - * and polygon ({@link BKDPointInPolygonQuery}) queries. - * - *

This wraps {@link Lucene54DocValuesFormat}, but saves its own BKD tree - * structures to disk for fast query-time intersection. See this paper - * for details. - * - *

The BKD tree slices up 2D (lat/lon) space into smaller and - * smaller rectangles, until the smallest rectangles have approximately - * between X/2 and X (X default is 1024) points in them, at which point - * such leaf cells are written as a block to disk, while the index tree - * structure records how space was sub-divided is loaded into HEAP - * at search time. At search time, the tree is recursed based on whether - * each of left or right child overlap with the query shape, and once - * a leaf block is reached, all documents in that leaf block are collected - * if the cell is fully enclosed by the query shape, or filtered and then - * collected, if not. - * - *

The index is also quite compact, because docs only appear once in - * the tree (no "prefix terms"). - * - *

In addition to the files written by {@link Lucene54DocValuesFormat}, this format writes: - *

    - *
  1. .kdd: BKD leaf data and index
  2. - *
  3. .kdm: BKD metadata
  4. - *
- * - *

The disk format is experimental and free to change suddenly, and this code likely has new and exciting bugs! - * - * @lucene.experimental */ - -public class BKDTreeDocValuesFormat extends DocValuesFormat { - - static final String DATA_CODEC_NAME = "BKDData"; - static final int DATA_VERSION_START = 0; - static final int DATA_VERSION_CURRENT = DATA_VERSION_START; - static final String DATA_EXTENSION = "kdd"; - - static final String META_CODEC_NAME = "BKDMeta"; - static final int META_VERSION_START = 0; - static final int META_VERSION_CURRENT = META_VERSION_START; - static final String META_EXTENSION = "kdm"; - - private final int maxPointsInLeafNode; - private final int maxPointsSortInHeap; - - private final DocValuesFormat delegate = new Lucene54DocValuesFormat(); - - /** Default constructor */ - public BKDTreeDocValuesFormat() { - this(BKDTreeWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDTreeWriter.DEFAULT_MAX_POINTS_SORT_IN_HEAP); - } - - /** Creates this with custom configuration. - * - * @param maxPointsInLeafNode Maximum number of points in each leaf cell. Smaller values create a deeper tree with larger in-heap index and possibly - * faster searching. The default is 1024. - * @param maxPointsSortInHeap Maximum number of points where in-heap sort can be used. When the number of points exceeds this, a (slower) - * offline sort is used. The default is 128 * 1024. - * - * @lucene.experimental */ - public BKDTreeDocValuesFormat(int maxPointsInLeafNode, int maxPointsSortInHeap) { - super("BKDTree"); - BKDTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap); - this.maxPointsInLeafNode = maxPointsInLeafNode; - this.maxPointsSortInHeap = maxPointsSortInHeap; - } - - @Override - public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException { - return new BKDTreeDocValuesConsumer(state.directory, state.segmentInfo.name, delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap); - } - - @Override - public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { - return new BKDTreeDocValuesProducer(delegate.fieldsProducer(state), state); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeDocValuesProducer.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeDocValuesProducer.java deleted file mode 100644 index ce16150d0fa..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeDocValuesProducer.java +++ /dev/null @@ -1,175 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; - -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.DocValuesProducer; -import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SortedDocValues; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.store.ChecksumIndexInput; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.RamUsageEstimator; - -class BKDTreeDocValuesProducer extends DocValuesProducer { - - private final Map treeReaders = new HashMap<>(); - private final Map fieldToIndexFPs = new HashMap<>(); - - private final IndexInput datIn; - private final AtomicLong ramBytesUsed; - private final int maxDoc; - private final DocValuesProducer delegate; - private final boolean merging; - - public BKDTreeDocValuesProducer(DocValuesProducer delegate, SegmentReadState state) throws IOException { - String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BKDTreeDocValuesFormat.META_EXTENSION); - ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaFileName, state.context); - CodecUtil.checkIndexHeader(metaIn, BKDTreeDocValuesFormat.META_CODEC_NAME, BKDTreeDocValuesFormat.META_VERSION_START, BKDTreeDocValuesFormat.META_VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - int fieldCount = metaIn.readVInt(); - for(int i=0;i getChildResources() { - List resources = new ArrayList<>(); - for(Map.Entry ent : treeReaders.entrySet()) { - resources.add(Accountables.namedAccountable("field " + ent.getKey(), ent.getValue())); - } - resources.add(Accountables.namedAccountable("delegate", delegate)); - - return resources; - } - - @Override - public synchronized DocValuesProducer getMergeInstance() throws IOException { - return new BKDTreeDocValuesProducer(this); - } - - @Override - public long ramBytesUsed() { - return ramBytesUsed.get() + delegate.ramBytesUsed(); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java deleted file mode 100644 index f117d0a1026..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java +++ /dev/null @@ -1,379 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.DocIdSetBuilder; -import org.apache.lucene.util.RamUsageEstimator; - -/** Handles intersection of a shape with a BKD tree previously written with {@link BKDTreeWriter}. - * - * @lucene.experimental */ - -final class BKDTreeReader implements Accountable { - final private int[] splitValues; - final private int leafNodeOffset; - final private long[] leafBlockFPs; - final int maxDoc; - final IndexInput in; - - enum Relation {CELL_INSIDE_SHAPE, SHAPE_CROSSES_CELL, SHAPE_OUTSIDE_CELL}; - - interface LatLonFilter { - // TODO: move DVs/encoding out on top: this method should just take a docID - boolean accept(double lat, double lon); - // TODO: move DVs/encoding out on top: this method should take ints and do its own decode - Relation compare(double latMin, double latMax, double lonMin, double lonMax); - } - - public BKDTreeReader(IndexInput in, int maxDoc) throws IOException { - - // Read index: - int numLeaves = in.readVInt(); - leafNodeOffset = numLeaves; - - // Tree is fully balanced binary tree, so number of nodes = numLeaves-1, except our nodeIDs are 1-based (splitValues[0] is unused): - splitValues = new int[numLeaves]; - for(int i=0;i 1.52 sec for 225 OSM London queries: - if (state.latLonFilter != null) { - - // Don't check the filter if the current cell fully contains the query bbox (just keep recursing in that case): - if (cellLatMinEnc > state.latMinEnc || cellLatMaxEnc < state.latMaxEnc || - cellLonMinEnc > state.lonMinEnc || cellLonMaxEnc < state.lonMaxEnc) { - - Relation r = state.latLonFilter.compare(BKDTreeWriter.decodeLat(cellLatMinEnc), - BKDTreeWriter.decodeLat(cellLatMaxEnc), - BKDTreeWriter.decodeLon(cellLonMinEnc), - BKDTreeWriter.decodeLon(cellLonMaxEnc)); - // System.out.println("BKD.intersect cellLat=" + BKDTreeWriter.decodeLat(cellLatMinEnc) + " TO " + BKDTreeWriter.decodeLat(cellLatMaxEnc) + ", cellLon=" + BKDTreeWriter.decodeLon(cellLonMinEnc) + " TO " + BKDTreeWriter.decodeLon(cellLonMaxEnc) + " compare=" + r); - if (r == Relation.SHAPE_OUTSIDE_CELL) { - // This cell is fully outside of the query shape: stop recursing - return 0; - } else if (r == Relation.CELL_INSIDE_SHAPE) { - // This cell is fully inside of the query shape: recursively add all points in this cell without filtering - return addAll(state, nodeID); - } else { - // The cell crosses the shape boundary, so we fall through and do full filtering - } - } else { - //System.out.println(" straight recurse"); - } - // TODO: clean this up: the bbox case should also just be a filter, and we should assert filter != null at the start - } else if (state.latMinEnc <= cellLatMinEnc && state.latMaxEnc >= cellLatMaxEnc && state.lonMinEnc <= cellLonMinEnc && state.lonMaxEnc >= cellLonMaxEnc) { - // Bbox query: optimize the case when the query fully contains this cell: we can - // recursively add all points without checking if they match the query: - return addAll(state, nodeID); - } - - long latRange = (long) cellLatMaxEnc - (long) cellLatMinEnc; - long lonRange = (long) cellLonMaxEnc - (long) cellLonMinEnc; - - int dim; - if (latRange >= lonRange) { - dim = 0; - } else { - dim = 1; - } - - //System.out.println("\nintersect node=" + nodeID + " vs " + leafNodeOffset); - - if (nodeID >= leafNodeOffset) { - - // Leaf node; scan and filter all points in this block: - //System.out.println(" intersect leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + leafBlockFPs[nodeID-leafNodeOffset]); - int hitCount = 0; - - long fp = leafBlockFPs[nodeID-leafNodeOffset]; - //System.out.println(" intersect leaf fp=" + fp); - if (fp == 0) { - // Dead end node (adversary case): - //System.out.println(" dead-end leaf"); - return 0; - } - - /* - System.out.println("I: " + BKDTreeWriter.decodeLat(cellLatMinEnc) - + " " + BKDTreeWriter.decodeLat(cellLatMaxEnc) - + " " + BKDTreeWriter.decodeLon(cellLonMinEnc) - + " " + BKDTreeWriter.decodeLon(cellLonMaxEnc)); - */ - - state.in.seek(fp); - - // How many points are stored in this leaf cell: - int count = state.in.readVInt(); - - state.docs.grow(count); - for(int i=0;i= splitValue) { - //System.out.println(" recurse right"); - count += intersect(state, - 2*nodeID+1, - splitValue, cellLatMaxEnc, cellLonMinEnc, cellLonMaxEnc); - } else { - //System.out.println(" no recurse right"); - } - - } else { - // Inner node split on lon: - assert dim == 1; - - //System.out.println(" split on lon=" + BKDTreeWriter.decodeLon(splitValue)); - - // Left node: - if (state.lonMinEnc < splitValue) { - //System.out.println(" recurse left"); - count += intersect(state, - 2*nodeID, - cellLatMinEnc, cellLatMaxEnc, cellLonMinEnc, splitValue); - } else { - //System.out.println(" no recurse left"); - } - - // Right node: - if (state.lonMaxEnc >= splitValue) { - //System.out.println(" recurse right"); - count += intersect(state, - 2*nodeID+1, - cellLatMinEnc, cellLatMaxEnc, splitValue, cellLonMaxEnc); - } else { - //System.out.println(" no recurse right"); - } - } - //System.out.println(" return nodeID=" + nodeID); - return count; - } - } - - @Override - public long ramBytesUsed() { - return splitValues.length * RamUsageEstimator.NUM_BYTES_INT + - leafBlockFPs.length * RamUsageEstimator.NUM_BYTES_LONG; - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeSortedNumericDocValues.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeSortedNumericDocValues.java deleted file mode 100644 index 1a2c1796623..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeSortedNumericDocValues.java +++ /dev/null @@ -1,49 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.SortedNumericDocValues; - -class BKDTreeSortedNumericDocValues extends SortedNumericDocValues { - final BKDTreeReader bkdTreeReader; - final SortedNumericDocValues delegate; - - public BKDTreeSortedNumericDocValues(BKDTreeReader bkdTreeReader, SortedNumericDocValues delegate) { - this.bkdTreeReader = bkdTreeReader; - this.delegate = delegate; - } - - public BKDTreeReader getBKDTreeReader() { - return bkdTreeReader; - } - - @Override - public void setDocument(int doc) { - delegate.setDocument(doc); - } - - @Override - public long valueAt(int index) { - return delegate.valueAt(index); - } - - @Override - public int count() { - return delegate.count(); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeWriter.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeWriter.java deleted file mode 100644 index 40ef6e858bf..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeWriter.java +++ /dev/null @@ -1,882 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Arrays; -import java.util.Comparator; - -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.store.ByteArrayDataOutput; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.InPlaceMergeSorter; -import org.apache.lucene.util.LongBitSet; -import org.apache.lucene.util.OfflineSorter; -import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter; -import org.apache.lucene.util.RamUsageEstimator; - -// TODO -// - could we just "use postings" to map leaf -> docIDs? -// - the polygon query really should be 2-phase -// - if we could merge trees, we could drop delegating to wrapped DV? -// - we could also index "auto-prefix terms" here, and use better compression, and maybe only use for the "fully contained" case so we'd -// only index docIDs -// - the index could be efficiently encoded as an FST, so we don't have wasteful -// (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then -// the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points -// per leaf, and you can reduce that by putting more points per leaf -// - we can quantize the split values to 2 bytes (short): http://people.csail.mit.edu/tmertens/papers/qkdtree.pdf -// - we could use threads while building; the higher nodes are very parallelizable -// - generalize to N dimenions? i think there are reasonable use cases here, e.g. -// 2 dimensional points to store houses, plus e.g. 3rd dimension for "household income" -// - geo3d integration should be straightforward? better accuracy, faster performance for small-poly-with-bbox cases? right now the poly -// check is very costly... - -/** Recursively builds a BKD tree to assign all incoming points to smaller - * and smaller rectangles until the number of points in a given - * rectangle is <= the maxPointsInLeafNode. The tree is - * fully balanced, which means the leaf nodes will have between 50% and 100% of - * the requested maxPointsInLeafNode, except for the adversarial case - * of indexing exactly the same point many times. - * - *

- * See this paper for details. - * - *

This consumes heap during writing: it allocates a LongBitSet(numPoints), - * and for any nodes with fewer than maxPointsSortInHeap, it holds - * the points in memory as simple java arrays. - * - *

- * NOTE: This can write at most Integer.MAX_VALUE * maxPointsInLeafNode total points. - * - * @lucene.experimental */ - -class BKDTreeWriter { - - // latEnc (int) + lonEnc (int) + ord (long) + docID (int) - static final int BYTES_PER_DOC = RamUsageEstimator.NUM_BYTES_LONG + 3 * RamUsageEstimator.NUM_BYTES_INT; - - //static final boolean DEBUG = false; - - public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 1024; - - /** This works out to max of ~10 MB peak heap tied up during writing: */ - public static final int DEFAULT_MAX_POINTS_SORT_IN_HEAP = 128*1024;; - - private final byte[] scratchBytes = new byte[BYTES_PER_DOC]; - private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes); - - private final Directory tempDir; - private final String tempFileNamePrefix; - - private OfflineSorter.ByteSequencesWriter offlineWriter; - private GrowingHeapLatLonWriter heapWriter; - - private IndexOutput tempInput; - private final int maxPointsInLeafNode; - private final int maxPointsSortInHeap; - - private long pointCount; - - public BKDTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException { - this(tempDir, tempFileNamePrefix, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP); - } - - // TODO: instead of maxPointsSortInHeap, change to maxMBHeap ... the mapping is non-obvious: - public BKDTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException { - verifyParams(maxPointsInLeafNode, maxPointsSortInHeap); - this.tempDir = tempDir; - this.tempFileNamePrefix = tempFileNamePrefix; - this.maxPointsInLeafNode = maxPointsInLeafNode; - this.maxPointsSortInHeap = maxPointsSortInHeap; - - // We write first maxPointsSortInHeap in heap, then cutover to offline for additional points: - heapWriter = new GrowingHeapLatLonWriter(maxPointsSortInHeap); - } - - public static void verifyParams(int maxPointsInLeafNode, int maxPointsSortInHeap) { - if (maxPointsInLeafNode <= 0) { - throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode); - } - if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) { - throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode); - } - if (maxPointsSortInHeap < maxPointsInLeafNode) { - throw new IllegalArgumentException("maxPointsSortInHeap must be >= maxPointsInLeafNode; got " + maxPointsSortInHeap + " vs maxPointsInLeafNode="+ maxPointsInLeafNode); - } - if (maxPointsSortInHeap > ArrayUtil.MAX_ARRAY_LENGTH) { - throw new IllegalArgumentException("maxPointsSortInHeap must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsSortInHeap); - } - } - - public void add(double lat, double lon, int docID) throws IOException { - if (validLat(lat) == false) { - throw new IllegalArgumentException("invalid lat: " + lat); - } - if (validLon(lon) == false) { - throw new IllegalArgumentException("invalid lon: " + lon); - } - - // Quantize to 32 bit precision, which is plenty: ~.0093 meter precision (longitude) at the equator - add(encodeLat(lat), encodeLon(lon), docID); - } - - /** If the current segment has too many points then we switchover to temp files / offline sort. */ - private void switchToOffline() throws IOException { - - // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree: - tempInput = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT); - offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput); - for(int i=0;i Integer.MIN_VALUE; - assert latEnc < Integer.MAX_VALUE; - assert lonEnc > Integer.MIN_VALUE; - assert lonEnc < Integer.MAX_VALUE; - - if (pointCount >= maxPointsSortInHeap) { - if (offlineWriter == null) { - switchToOffline(); - } - scratchBytesOutput.reset(scratchBytes); - scratchBytesOutput.writeInt(latEnc); - scratchBytesOutput.writeInt(lonEnc); - scratchBytesOutput.writeVInt(docID); - scratchBytesOutput.writeVLong(pointCount); - offlineWriter.write(scratchBytes, 0, scratchBytes.length); - } else { - // Not too many points added yet, continue using heap: - heapWriter.append(latEnc, lonEnc, pointCount, docID); - } - - pointCount++; - } - - /** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice - * as we recurse in {@link #build}. */ - private LatLonWriter convertToFixedWidth(String in) throws IOException { - BytesRefBuilder scratch = new BytesRefBuilder(); - scratch.grow(BYTES_PER_DOC); - BytesRef bytes = scratch.get(); - ByteArrayDataInput dataReader = new ByteArrayDataInput(); - - OfflineSorter.ByteSequencesReader reader = null; - LatLonWriter sortedWriter = null; - boolean success = false; - try { - reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE)); - sortedWriter = getWriter(pointCount); - for (long i=0;i= 0: "docID=" + docID; - assert latEnc > Integer.MIN_VALUE; - assert latEnc < Integer.MAX_VALUE; - assert lonEnc > Integer.MIN_VALUE; - assert lonEnc < Integer.MAX_VALUE; - sortedWriter.append(latEnc, lonEnc, ord, docID); - } - success = true; - } finally { - if (success) { - IOUtils.close(sortedWriter, reader); - } else { - IOUtils.closeWhileHandlingException(sortedWriter, reader); - try { - sortedWriter.destroy(); - } catch (Throwable t) { - // Suppress to keep throwing original exc - } - } - } - - return sortedWriter; - } - - private LatLonWriter sort(boolean lon) throws IOException { - if (heapWriter != null) { - // All buffered points are still in heap - - assert pointCount < Integer.MAX_VALUE; - - new InPlaceMergeSorter() { - @Override - protected void swap(int i, int j) { - int docID = heapWriter.docIDs[i]; - heapWriter.docIDs[i] = heapWriter.docIDs[j]; - heapWriter.docIDs[j] = docID; - - long ord = heapWriter.ords[i]; - heapWriter.ords[i] = heapWriter.ords[j]; - heapWriter.ords[j] = ord; - - int latEnc = heapWriter.latEncs[i]; - heapWriter.latEncs[i] = heapWriter.latEncs[j]; - heapWriter.latEncs[j] = latEnc; - - int lonEnc = heapWriter.lonEncs[i]; - heapWriter.lonEncs[i] = heapWriter.lonEncs[j]; - heapWriter.lonEncs[j] = lonEnc; - } - - @Override - protected int compare(int i, int j) { - int cmp; - if (lon) { - cmp = Integer.compare(heapWriter.lonEncs[i], heapWriter.lonEncs[j]); - } else { - cmp = Integer.compare(heapWriter.latEncs[i], heapWriter.latEncs[j]); - } - if (cmp != 0) { - return cmp; - } - - // Tie-break - cmp = Integer.compare(heapWriter.docIDs[i], heapWriter.docIDs[j]); - if (cmp != 0) { - return cmp; - } - - return Long.compare(heapWriter.ords[i], heapWriter.ords[j]); - } - }.sort(0, (int) pointCount); - - HeapLatLonWriter sorted = new HeapLatLonWriter((int) pointCount); - for(int i=0;i cmp = new Comparator() { - private final ByteArrayDataInput readerB = new ByteArrayDataInput(); - - @Override - public int compare(BytesRef a, BytesRef b) { - reader.reset(a.bytes, a.offset, a.length); - final int latAEnc = reader.readInt(); - final int lonAEnc = reader.readInt(); - final int docIDA = reader.readVInt(); - final long ordA = reader.readVLong(); - - reader.reset(b.bytes, b.offset, b.length); - final int latBEnc = reader.readInt(); - final int lonBEnc = reader.readInt(); - final int docIDB = reader.readVInt(); - final long ordB = reader.readVLong(); - - int cmp; - if (lon) { - cmp = Integer.compare(lonAEnc, lonBEnc); - } else { - cmp = Integer.compare(latAEnc, latBEnc); - } - if (cmp != 0) { - return cmp; - } - - // Tie-break - cmp = Integer.compare(docIDA, docIDB); - if (cmp != 0) { - return cmp; - } - - return Long.compare(ordA, ordB); - } - }; - - - boolean success = false; - OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp); - String sortedFileName = sorter.sort(tempInput.getName()); - try { - LatLonWriter writer = convertToFixedWidth(sortedFileName); - success = true; - return writer; - } finally { - if (success) { - tempDir.deleteFile(sortedFileName); - } else { - IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName); - } - } - } - } - - /** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */ - public long finish(IndexOutput out) throws IOException { - //System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter); - - if (offlineWriter != null) { - // This also closes the temp file output: - offlineWriter.close(); - } - - LongBitSet bitSet = new LongBitSet(pointCount); - - long countPerLeaf = pointCount; - long innerNodeCount = 1; - - while (countPerLeaf > maxPointsInLeafNode) { - countPerLeaf = (countPerLeaf+1)/2; - innerNodeCount *= 2; - } - - //System.out.println("innerNodeCount=" + innerNodeCount); - - if (1+2*innerNodeCount >= Integer.MAX_VALUE) { - throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex"); - } - - innerNodeCount--; - - int numLeaves = (int) (innerNodeCount+1); - - // Indexed by nodeID, but first (root) nodeID is 1 - int[] splitValues = new int[numLeaves]; - - // +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7) - long[] leafBlockFPs = new long[numLeaves]; - - // Make sure the math above "worked": - assert pointCount / splitValues.length <= maxPointsInLeafNode: "pointCount=" + pointCount + " splitValues.length=" + splitValues.length + " maxPointsInLeafNode=" + maxPointsInLeafNode; - //System.out.println(" avg pointsPerLeaf=" + (pointCount/splitValues.length)); - - // Sort all docs once by lat, once by lon: - LatLonWriter latSortedWriter = null; - LatLonWriter lonSortedWriter = null; - - boolean success = false; - try { - lonSortedWriter = sort(true); - latSortedWriter = sort(false); - heapWriter = null; - - build(1, numLeaves, new PathSlice(latSortedWriter, 0, pointCount), - new PathSlice(lonSortedWriter, 0, pointCount), - bitSet, out, - Integer.MIN_VALUE, Integer.MAX_VALUE, - Integer.MIN_VALUE, Integer.MAX_VALUE, - //encodeLat(-90.0), encodeLat(Math.nextAfter(90.0, Double.POSITIVE_INFINITY)), - //encodeLon(-180.0), encodeLon(Math.nextAfter(180.0, Double.POSITIVE_INFINITY)), - splitValues, - leafBlockFPs); - success = true; - } finally { - if (success) { - latSortedWriter.destroy(); - lonSortedWriter.destroy(); - if (tempInput != null) { - tempDir.deleteFile(tempInput.getName()); - } - } else { - try { - latSortedWriter.destroy(); - } catch (Throwable t) { - // Suppress to keep throwing original exc - } - try { - lonSortedWriter.destroy(); - } catch (Throwable t) { - // Suppress to keep throwing original exc - } - if (tempInput != null) { - IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName()); - } - } - } - - //System.out.println("Total nodes: " + innerNodeCount); - - // Write index: - long indexFP = out.getFilePointer(); - out.writeVInt(numLeaves); - - // NOTE: splitValues[0] is unused, because nodeID is 1-based: - for (int i=0;i= minLatEnc && latSplitEnc < maxLatEnc: "latSplitEnc=" + latSplitEnc + " minLatEnc=" + minLatEnc + " maxLatEnc=" + maxLatEnc; - - int lonSplitEnc = reader.lonEnc(); - assert lonSplitEnc >= minLonEnc && lonSplitEnc < maxLonEnc: "lonSplitEnc=" + lonSplitEnc + " minLonEnc=" + minLonEnc + " maxLonEnc=" + maxLonEnc; - - if (splitDim == 0) { - splitValue = latSplitEnc; - //if (DEBUG) System.out.println(" splitValue=" + decodeLat(splitValue)); - } else { - splitValue = lonSplitEnc; - //if (DEBUG) System.out.println(" splitValue=" + decodeLon(splitValue)); - } - success = true; - } finally { - if (success) { - IOUtils.close(reader); - } else { - IOUtils.closeWhileHandlingException(reader); - } - } - - splitValueRet[0] = splitValue; - - // Mark ords that fall into the left half, and also handle the == boundary case: - assert bitSet.cardinality() == 0: "cardinality=" + bitSet.cardinality(); - - success = false; - reader = source.writer.getReader(source.start); - try { - int lastValue = Integer.MIN_VALUE; - for (int i=0;i= lastValue; - lastValue = value; - - if (value == splitValue) { - // TODO: we could simplify this, by allowing splitValue to be on either side? - // If we have identical points at the split, we move the count back to before the identical points: - leftCount = i; - break; - } - assert value < splitValue: "i=" + i + " value=" + value + " vs splitValue=" + splitValue; - long ord = reader.ord(); - int docID = reader.docID(); - assert docID >= 0: "docID=" + docID + " reader=" + reader; - - // We should never see dup ords: - assert bitSet.get(ord) == false; - bitSet.set(ord); - } - success = true; - } finally { - if (success) { - IOUtils.close(reader); - } else { - IOUtils.closeWhileHandlingException(reader); - } - } - - assert leftCount == bitSet.cardinality(): "leftCount=" + leftCount + " cardinality=" + bitSet.cardinality(); - - return leftCount; - } - - /** The incoming PathSlice for the dim we will split is already partitioned/sorted. */ - private void build(int nodeID, int leafNodeOffset, - PathSlice lastLatSorted, - PathSlice lastLonSorted, - LongBitSet bitSet, - IndexOutput out, - int minLatEnc, int maxLatEnc, int minLonEnc, int maxLonEnc, - int[] splitValues, - long[] leafBlockFPs) throws IOException { - - PathSlice source; - PathSlice nextSource; - - long latRange = (long) maxLatEnc - (long) minLatEnc; - long lonRange = (long) maxLonEnc - (long) minLonEnc; - - assert lastLatSorted.count == lastLonSorted.count; - - // Compute which dim we should split on at this level: - int splitDim; - if (latRange >= lonRange) { - // Split by lat: - splitDim = 0; - source = lastLatSorted; - nextSource = lastLonSorted; - } else { - // Split by lon: - splitDim = 1; - source = lastLonSorted; - nextSource = lastLatSorted; - } - - long count = source.count; - - //if (DEBUG) System.out.println("\nBUILD: nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset + " splitDim=" + splitDim + "\n lastLatSorted=" + lastLatSorted + "\n lastLonSorted=" + lastLonSorted + "\n count=" + count + " lat=" + decodeLat(minLatEnc) + " TO " + decodeLat(maxLatEnc) + " lon=" + decodeLon(minLonEnc) + " TO " + decodeLon(maxLonEnc)); - - if (count == 0) { - // Dead end in the tree, due to adversary cases, e.g. many identical points: - if (nodeID < splitValues.length) { - // Sentinel used to mark that the tree is dead under here: - splitValues[nodeID] = Integer.MAX_VALUE; - } - //if (DEBUG) System.out.println(" dead-end sub-tree"); - return; - } - - if (nodeID >= leafNodeOffset) { - // Leaf node: write block - //if (DEBUG) System.out.println(" leaf"); - assert maxLatEnc > minLatEnc; - assert maxLonEnc > minLonEnc; - - //System.out.println("\nleaf:\n lat range: " + ((long) maxLatEnc-minLatEnc)); - //System.out.println(" lon range: " + ((long) maxLonEnc-minLonEnc)); - - // Sort by docID in the leaf so we get sequentiality at search time (may not matter?): - LatLonReader reader = source.writer.getReader(source.start); - - // TODO: we can reuse this - int[] docIDs = new int[(int) count]; - - boolean success = false; - try { - for (int i=0;i 539 MB, but query time for 225 queries went from 1.65 sec -> 2.64 sec. - // I think if we also indexed prefix terms here we could do less costly compression - // on those lists: - int docID = docIDs[i]; - if (docID != lastDocID) { - //System.out.println(" docID=" + docID); - out.writeInt(docID); - lastDocID = docID; - } - } - //long endFP = out.getFilePointer(); - //System.out.println(" bytes/doc: " + ((endFP - startFP) / count)); - } else { - // Inner node: partition/recurse - - assert nodeID < splitValues.length: "nodeID=" + nodeID + " splitValues.length=" + splitValues.length; - - int[] splitValueArray = new int[1]; - - long leftCount = markLeftTree(splitDim, source, bitSet, splitValueArray, - minLatEnc, maxLatEnc, minLonEnc, maxLonEnc); - int splitValue = splitValueArray[0]; - - // TODO: we could save split value in here so we don't have to re-open file later: - - // Partition nextSource into sorted left and right sets, so we can recurse. This is somewhat hairy: we partition the next lon set - // according to how we had just partitioned the lat set, and vice/versa: - - LatLonWriter leftWriter = null; - LatLonWriter rightWriter = null; - LatLonReader reader = null; - - boolean success = false; - - int nextLeftCount = 0; - - try { - leftWriter = getWriter(leftCount); - rightWriter = getWriter(count - leftCount); - - //if (DEBUG) System.out.println(" partition:\n splitValueEnc=" + splitValue + "\n " + nextSource + "\n --> leftSorted=" + leftWriter + "\n --> rightSorted=" + rightWriter + ")"); - reader = nextSource.writer.getReader(nextSource.start); - - // TODO: we could compute the split value here for each sub-tree and save an O(N) pass on recursion, but makes code hairier and only - // changes the constant factor of building, not the big-oh: - for (int i=0;i= 0: "docID=" + docID + " reader=" + reader; - if (bitSet.get(ord)) { - if (splitDim == 0) { - assert latEnc < splitValue: "latEnc=" + latEnc + " splitValue=" + splitValue; - } else { - assert lonEnc < splitValue: "lonEnc=" + lonEnc + " splitValue=" + splitValue; - } - leftWriter.append(latEnc, lonEnc, ord, docID); - nextLeftCount++; - } else { - if (splitDim == 0) { - assert latEnc >= splitValue: "latEnc=" + latEnc + " splitValue=" + splitValue; - } else { - assert lonEnc >= splitValue: "lonEnc=" + lonEnc + " splitValue=" + splitValue; - } - rightWriter.append(latEnc, lonEnc, ord, docID); - } - } - bitSet.clear(0, pointCount); - success = true; - } finally { - if (success) { - IOUtils.close(reader, leftWriter, rightWriter); - } else { - IOUtils.closeWhileHandlingException(reader, leftWriter, rightWriter); - } - } - - assert leftCount == nextLeftCount: "leftCount=" + leftCount + " nextLeftCount=" + nextLeftCount; - - success = false; - try { - if (splitDim == 0) { - //if (DEBUG) System.out.println(" recurse left"); - build(2*nodeID, leafNodeOffset, - new PathSlice(source.writer, source.start, leftCount), - new PathSlice(leftWriter, 0, leftCount), - bitSet, - out, - minLatEnc, splitValue, minLonEnc, maxLonEnc, - splitValues, leafBlockFPs); - leftWriter.destroy(); - - //if (DEBUG) System.out.println(" recurse right"); - build(2*nodeID+1, leafNodeOffset, - new PathSlice(source.writer, source.start+leftCount, count-leftCount), - new PathSlice(rightWriter, 0, count - leftCount), - bitSet, - out, - splitValue, maxLatEnc, minLonEnc, maxLonEnc, - splitValues, leafBlockFPs); - rightWriter.destroy(); - } else { - //if (DEBUG) System.out.println(" recurse left"); - build(2*nodeID, leafNodeOffset, - new PathSlice(leftWriter, 0, leftCount), - new PathSlice(source.writer, source.start, leftCount), - bitSet, - out, - minLatEnc, maxLatEnc, minLonEnc, splitValue, - splitValues, leafBlockFPs); - - leftWriter.destroy(); - - //if (DEBUG) System.out.println(" recurse right"); - build(2*nodeID+1, leafNodeOffset, - new PathSlice(rightWriter, 0, count-leftCount), - new PathSlice(source.writer, source.start+leftCount, count-leftCount), - bitSet, - out, - minLatEnc, maxLatEnc, splitValue, maxLonEnc, - splitValues, leafBlockFPs); - rightWriter.destroy(); - } - success = true; - } finally { - if (success == false) { - try { - leftWriter.destroy(); - } catch (Throwable t) { - // Suppress to keep throwing original exc - } - try { - rightWriter.destroy(); - } catch (Throwable t) { - // Suppress to keep throwing original exc - } - } - } - - splitValues[nodeID] = splitValue; - } - } - - LatLonWriter getWriter(long count) throws IOException { - if (count < maxPointsSortInHeap) { - return new HeapLatLonWriter((int) count); - } else { - return new OfflineLatLonWriter(tempDir, tempFileNamePrefix, count); - } - } - - // TODO: move/share all this into GeoUtils - - // We allow one iota over the true max: - static final double MAX_LAT_INCL = Math.nextAfter(90.0D, Double.POSITIVE_INFINITY); - static final double MAX_LON_INCL = Math.nextAfter(180.0D, Double.POSITIVE_INFINITY); - static final double MIN_LAT_INCL = -90.0D; - static final double MIN_LON_INCL = -180.0D; - - static boolean validLat(double lat) { - return Double.isNaN(lat) == false && lat >= MIN_LAT_INCL && lat <= MAX_LAT_INCL; - } - - static boolean validLon(double lon) { - return Double.isNaN(lon) == false && lon >= MIN_LON_INCL && lon <= MAX_LON_INCL; - } - - private static final int BITS = 32; - - // -3 so valid lat/lon never hit the Integer.MIN_VALUE nor Integer.MAX_VALUE: - private static final double LON_SCALE = ((0x1L< Integer.MIN_VALUE: "lat=" + lat + " mapped to Integer.MIN_VALUE"; - return (int) x; - } - - /** Quantizes double (64 bit) longitude into 32 bits */ - static int encodeLon(double lon) { - assert validLon(lon): "lon=" + lon; - long x = (long) (lon * LON_SCALE); - // We use Integer.MAX_VALUE as a sentinel: - assert x < Integer.MAX_VALUE; - assert x > Integer.MIN_VALUE; - return (int) x; - } - - /** Turns quantized value from {@link #encodeLat} back into a double. */ - static double decodeLat(int x) { - return x / LAT_SCALE; - } - - /** Turns quantized value from {@link #encodeLon} back into a double. */ - static double decodeLon(int x) { - return x / LON_SCALE; - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/GrowingHeapLatLonWriter.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/GrowingHeapLatLonWriter.java deleted file mode 100644 index 742fc4f8321..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/GrowingHeapLatLonWriter.java +++ /dev/null @@ -1,88 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.RamUsageEstimator; - -final class GrowingHeapLatLonWriter implements LatLonWriter { - int[] latEncs; - int[] lonEncs; - int[] docIDs; - long[] ords; - private int nextWrite; - final int maxSize; - - public GrowingHeapLatLonWriter(int maxSize) { - latEncs = new int[16]; - lonEncs = new int[16]; - docIDs = new int[16]; - ords = new long[16]; - this.maxSize = maxSize; - } - - private int[] growExact(int[] arr, int size) { - assert size > arr.length; - int[] newArr = new int[size]; - System.arraycopy(arr, 0, newArr, 0, arr.length); - return newArr; - } - - private long[] growExact(long[] arr, int size) { - assert size > arr.length; - long[] newArr = new long[size]; - System.arraycopy(arr, 0, newArr, 0, arr.length); - return newArr; - } - - @Override - public void append(int latEnc, int lonEnc, long ord, int docID) { - assert ord == nextWrite; - if (latEncs.length == nextWrite) { - int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT)); - assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite; - latEncs = growExact(latEncs, nextSize); - lonEncs = growExact(lonEncs, nextSize); - ords = growExact(ords, nextSize); - docIDs = growExact(docIDs, nextSize); - } - latEncs[nextWrite] = latEnc; - lonEncs[nextWrite] = lonEnc; - ords[nextWrite] = ord; - docIDs[nextWrite] = docID; - nextWrite++; - } - - @Override - public LatLonReader getReader(long start) { - return new HeapLatLonReader(latEncs, lonEncs, ords, docIDs, (int) start, nextWrite); - } - - @Override - public void close() { - } - - @Override - public void destroy() { - } - - @Override - public String toString() { - return "GrowingHeapLatLonWriter(count=" + nextWrite + " alloc=" + latEncs.length + ")"; - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/HeapLatLonReader.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/HeapLatLonReader.java deleted file mode 100644 index 67940f6bbea..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/HeapLatLonReader.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -final class HeapLatLonReader implements LatLonReader { - private int curRead; - final int[] latEncs; - final int[] lonEncs; - final long[] ords; - final int[] docIDs; - final int end; - - HeapLatLonReader(int[] latEncs, int[] lonEncs, long[] ords, int[] docIDs, int start, int end) { - this.latEncs = latEncs; - this.lonEncs = lonEncs; - this.ords = ords; - this.docIDs = docIDs; - curRead = start-1; - this.end = end; - } - - @Override - public boolean next() { - curRead++; - return curRead < end; - } - - @Override - public int latEnc() { - return latEncs[curRead]; - } - - @Override - public int lonEnc() { - return lonEncs[curRead]; - } - - @Override - public int docID() { - return docIDs[curRead]; - } - - @Override - public long ord() { - return ords[curRead]; - } - - @Override - public void close() { - } -} - diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/HeapLatLonWriter.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/HeapLatLonWriter.java deleted file mode 100644 index cb32d3caa4c..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/HeapLatLonWriter.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -final class HeapLatLonWriter implements LatLonWriter { - final int[] latEncs; - final int[] lonEncs; - final int[] docIDs; - final long[] ords; - private int nextWrite; - private boolean closed; - - public HeapLatLonWriter(int count) { - latEncs = new int[count]; - lonEncs = new int[count]; - docIDs = new int[count]; - ords = new long[count]; - } - - @Override - public void append(int latEnc, int lonEnc, long ord, int docID) { - latEncs[nextWrite] = latEnc; - lonEncs[nextWrite] = lonEnc; - ords[nextWrite] = ord; - docIDs[nextWrite] = docID; - nextWrite++; - } - - @Override - public LatLonReader getReader(long start) { - assert closed; - return new HeapLatLonReader(latEncs, lonEncs, ords, docIDs, (int) start, latEncs.length); - } - - @Override - public void close() { - closed = true; - if (nextWrite != latEncs.length) { - throw new IllegalStateException("only wrote " + nextWrite + " values, but expected " + latEncs.length); - } - } - - @Override - public void destroy() { - } - - @Override - public String toString() { - return "HeapLatLonWriter(count=" + latEncs.length + ")"; - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/LatLonReader.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/LatLonReader.java deleted file mode 100644 index aadfc7fe7c3..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/LatLonReader.java +++ /dev/null @@ -1,31 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Closeable; -import java.io.IOException; - -/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */ -interface LatLonReader extends Closeable { - boolean next() throws IOException; - int latEnc(); - int lonEnc(); - long ord(); - int docID(); -} - diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/LatLonWriter.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/LatLonWriter.java deleted file mode 100644 index 161fe9caec7..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/LatLonWriter.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Closeable; -import java.io.IOException; - -/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */ -interface LatLonWriter extends Closeable { - void append(int latEnc, int lonEnc, long ord, int docID) throws IOException; - LatLonReader getReader(long start) throws IOException; - void destroy() throws IOException; -} - diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/OfflineLatLonReader.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/OfflineLatLonReader.java deleted file mode 100644 index 50a4e44278b..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/OfflineLatLonReader.java +++ /dev/null @@ -1,78 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; - -final class OfflineLatLonReader implements LatLonReader { - final IndexInput in; - long countLeft; - private int latEnc; - private int lonEnc; - private long ord; - private int docID; - - OfflineLatLonReader(Directory tempDir, String tempFileName, long start, long count) throws IOException { - in = tempDir.openInput(tempFileName, IOContext.READONCE); - in.seek(start * BKDTreeWriter.BYTES_PER_DOC); - this.countLeft = count; - } - - @Override - public boolean next() throws IOException { - if (countLeft == 0) { - return false; - } - countLeft--; - latEnc = in.readInt(); - lonEnc = in.readInt(); - ord = in.readLong(); - docID = in.readInt(); - return true; - } - - @Override - public int latEnc() { - return latEnc; - } - - @Override - public int lonEnc() { - return lonEnc; - } - - @Override - public long ord() { - return ord; - } - - @Override - public int docID() { - return docID; - } - - @Override - public void close() throws IOException { - in.close(); - } -} - diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/OfflineLatLonWriter.java b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/OfflineLatLonWriter.java deleted file mode 100644 index e6758832a6c..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/OfflineLatLonWriter.java +++ /dev/null @@ -1,77 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.store.ByteArrayDataOutput; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexOutput; - -final class OfflineLatLonWriter implements LatLonWriter { - - final Directory tempDir; - final byte[] scratchBytes = new byte[BKDTreeWriter.BYTES_PER_DOC]; - final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes); - final IndexOutput out; - final long count; - private long countWritten; - private boolean closed; - - public OfflineLatLonWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException { - this.tempDir = tempDir; - out = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT); - this.count = count; - } - - @Override - public void append(int latEnc, int lonEnc, long ord, int docID) throws IOException { - out.writeInt(latEnc); - out.writeInt(lonEnc); - out.writeLong(ord); - out.writeInt(docID); - countWritten++; - } - - @Override - public LatLonReader getReader(long start) throws IOException { - assert closed; - return new OfflineLatLonReader(tempDir, out.getName(), start, count-start); - } - - @Override - public void close() throws IOException { - closed = true; - out.close(); - if (count != countWritten) { - throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count); - } - } - - @Override - public void destroy() throws IOException { - tempDir.deleteFile(out.getName()); - } - - @Override - public String toString() { - return "OfflineLatLonWriter(count=" + count + " tempFileName=" + out.getName() + ")"; - } -} - diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/package.html b/lucene/sandbox/src/java/org/apache/lucene/bkdtree/package.html deleted file mode 100644 index 90bf356fb9b..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/package.html +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - - -This package contains a BKD spatial tree implementation for indexing lat/lon points and fast shape searching. - - diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/DimensionalLatLonField.java b/lucene/sandbox/src/java/org/apache/lucene/document/DimensionalLatLonField.java new file mode 100644 index 00000000000..32fd102a139 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/document/DimensionalLatLonField.java @@ -0,0 +1,88 @@ +package org.apache.lucene.document; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.GeoUtils; +import org.apache.lucene.util.bkd.BKDUtil; + +/** Add this to a document to index lat/lon point dimensionally */ +public final class DimensionalLatLonField extends Field { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDimensions(2, 4); + TYPE.freeze(); + } + + /** + * Creates a new DimensionalLatLonField with the specified lat and lon + * @param name field name + * @param lat double latitude + * @param lon double longitude + * @throws IllegalArgumentException if the field name is null or lat or lon are out of bounds + */ + public DimensionalLatLonField(String name, double lat, double lon) { + super(name, TYPE); + if (GeoUtils.isValidLat(lat) == false) { + throw new IllegalArgumentException("invalid lat (" + lat + "): must be -90 to 90"); + } + if (GeoUtils.isValidLon(lon) == false) { + throw new IllegalArgumentException("invalid lon (" + lon + "): must be -180 to 180"); + } + byte[] bytes = new byte[8]; + BKDUtil.intToBytes(encodeLat(lat), bytes, 0); + BKDUtil.intToBytes(encodeLon(lon), bytes, 1); + fieldsData = new BytesRef(bytes); + } + + public static final double TOLERANCE = 1E-7; + + private static final int BITS = 32; + + private static final double LON_SCALE = (0x1L< Integer.MIN_VALUE: "lat=" + lat + " mapped to Integer.MIN_VALUE"; + return (int) x; + } + + /** Quantizes double (64 bit) longitude into 32 bits */ + public static int encodeLon(double lon) { + assert GeoUtils.isValidLon(lon): "lon=" + lon; + long x = (long) (lon * LON_SCALE); + assert x < Integer.MAX_VALUE; + assert x > Integer.MIN_VALUE; + return (int) x; + } + + /** Turns quantized value from {@link #encodeLat} back into a double. */ + public static double decodeLat(int x) { + return x / LAT_SCALE; + } + + /** Turns quantized value from {@link #encodeLon} back into a double. */ + public static double decodeLon(int x) { + return x / LON_SCALE; + } +} diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/GrowingHeapSliceWriter.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/GrowingHeapSliceWriter.java deleted file mode 100644 index f1fe7ac80f0..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/GrowingHeapSliceWriter.java +++ /dev/null @@ -1,84 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.RamUsageEstimator; - -final class GrowingHeapSliceWriter implements SliceWriter { - long[] values; - int[] docIDs; - long[] ords; - private int nextWrite; - final int maxSize; - - public GrowingHeapSliceWriter(int maxSize) { - values = new long[16]; - docIDs = new int[16]; - ords = new long[16]; - this.maxSize = maxSize; - } - - private int[] growExact(int[] arr, int size) { - assert size > arr.length; - int[] newArr = new int[size]; - System.arraycopy(arr, 0, newArr, 0, arr.length); - return newArr; - } - - private long[] growExact(long[] arr, int size) { - assert size > arr.length; - long[] newArr = new long[size]; - System.arraycopy(arr, 0, newArr, 0, arr.length); - return newArr; - } - - @Override - public void append(long value, long ord, int docID) { - assert ord == nextWrite; - if (values.length == nextWrite) { - int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT)); - assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite; - values = growExact(values, nextSize); - ords = growExact(ords, nextSize); - docIDs = growExact(docIDs, nextSize); - } - values[nextWrite] = value; - ords[nextWrite] = ord; - docIDs[nextWrite] = docID; - nextWrite++; - } - - @Override - public SliceReader getReader(long start) { - return new HeapSliceReader(values, ords, docIDs, (int) start, nextWrite); - } - - @Override - public void close() { - } - - @Override - public void destroy() { - } - - @Override - public String toString() { - return "GrowingHeapSliceWriter(count=" + nextWrite + " alloc=" + values.length + ")"; - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceReader.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceReader.java deleted file mode 100644 index fff15eba1b3..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceReader.java +++ /dev/null @@ -1,60 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -final class HeapSliceReader implements SliceReader { - private int curRead; - final long[] values; - final long[] ords; - final int[] docIDs; - final int end; - - HeapSliceReader(long[] values, long[] ords, int[] docIDs, int start, int end) { - this.values = values; - this.ords = ords; - this.docIDs = docIDs; - curRead = start-1; - this.end = end; - } - - @Override - public boolean next() { - curRead++; - return curRead < end; - } - - @Override - public long value() { - return values[curRead]; - } - - @Override - public int docID() { - return docIDs[curRead]; - } - - @Override - public long ord() { - return ords[curRead]; - } - - @Override - public void close() { - } -} - diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceWriter.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceWriter.java deleted file mode 100644 index fef37f4a8f3..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceWriter.java +++ /dev/null @@ -1,63 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -final class HeapSliceWriter implements SliceWriter { - final long[] values; - final int[] docIDs; - final long[] ords; - private int nextWrite; - private boolean closed; - - public HeapSliceWriter(int count) { - values = new long[count]; - docIDs = new int[count]; - ords = new long[count]; - } - - @Override - public void append(long value, long ord, int docID) { - values[nextWrite] = value; - ords[nextWrite] = ord; - docIDs[nextWrite] = docID; - nextWrite++; - } - - @Override - public SliceReader getReader(long start) { - assert closed; - return new HeapSliceReader(values, ords, docIDs, (int) start, values.length); - } - - @Override - public void close() { - closed = true; - if (nextWrite != values.length) { - throw new IllegalStateException("only wrote " + nextWrite + " values, but expected " + values.length); - } - } - - @Override - public void destroy() { - } - - @Override - public String toString() { - return "HeapSliceWriter(count=" + values.length + ")"; - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/NumericRangeTreeQuery.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/NumericRangeTreeQuery.java deleted file mode 100644 index 34723a0fe3a..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/NumericRangeTreeQuery.java +++ /dev/null @@ -1,157 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; - -import java.io.IOException; - -/** Finds all previously indexed long values that fall within the specified range. - * - *

The field must be indexed with {@link RangeTreeDocValuesFormat}, and {@link SortedNumericDocValuesField} added per document. - * - * @lucene.experimental */ - -public class NumericRangeTreeQuery extends Query { - final String field; - final Long minValue; - final Long maxValue; - final boolean minInclusive; - final boolean maxInclusive; - - // TODO: sugar for all numeric conversions? - - /** Matches all values in the specified long range. */ - public NumericRangeTreeQuery(String field, Long minValue, boolean minInclusive, Long maxValue, boolean maxInclusive) { - this.field = field; - this.minInclusive = minInclusive; - this.minValue = minValue; - this.maxInclusive = maxInclusive; - this.maxValue = maxValue; - } - - @Override - public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - - // I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be - // used in the first pass: - - return new ConstantScoreWeight(this) { - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - LeafReader reader = context.reader(); - SortedNumericDocValues sdv = reader.getSortedNumericDocValues(field); - if (sdv == null) { - // No docs in this segment had this field - return null; - } - - if (sdv instanceof RangeTreeSortedNumericDocValues == false) { - throw new IllegalStateException("field \"" + field + "\" was not indexed with RangeTreeDocValuesFormat: got: " + sdv); - } - RangeTreeSortedNumericDocValues treeDV = (RangeTreeSortedNumericDocValues) sdv; - RangeTreeReader tree = treeDV.getRangeTreeReader(); - - // lower - long minBoundIncl = (minValue == null) ? Long.MIN_VALUE : minValue.longValue(); - - if (minInclusive == false && minValue != null) { - if (minBoundIncl == Long.MAX_VALUE) { - return null; - } - minBoundIncl++; - } - - // upper - long maxBoundIncl = (maxValue == null) ? Long.MAX_VALUE : maxValue.longValue(); - if (maxInclusive == false && maxValue != null) { - if (maxBoundIncl == Long.MIN_VALUE) { - return null; - } - maxBoundIncl--; - } - - if (maxBoundIncl < minBoundIncl) { - return null; - } - - DocIdSet result = tree.intersect(minBoundIncl, maxBoundIncl, treeDV.delegate, context.reader().maxDoc()); - - final DocIdSetIterator disi = result.iterator(); - - return new ConstantScoreScorer(this, score(), disi); - } - }; - } - - @Override - public int hashCode() { - int hash = super.hashCode(); - if (minValue != null) hash += minValue.hashCode()^0x14fa55fb; - if (maxValue != null) hash += maxValue.hashCode()^0x733fa5fe; - return hash + - (Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+ - (Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe); - } - - @Override - public boolean equals(Object other) { - if (super.equals(other)) { - final NumericRangeTreeQuery q = (NumericRangeTreeQuery) other; - return ( - (q.minValue == null ? minValue == null : q.minValue.equals(minValue)) && - (q.maxValue == null ? maxValue == null : q.maxValue.equals(maxValue)) && - minInclusive == q.minInclusive && - maxInclusive == q.maxInclusive - ); - } - - return false; - } - - @Override - public String toString(String field) { - final StringBuilder sb = new StringBuilder(); - sb.append(getClass().getSimpleName()); - sb.append(':'); - if (this.field.equals(field) == false) { - sb.append("field="); - sb.append(this.field); - sb.append(':'); - } - - return sb.append(minInclusive ? '[' : '{') - .append((minValue == null) ? "*" : minValue.toString()) - .append(" TO ") - .append((maxValue == null) ? "*" : maxValue.toString()) - .append(maxInclusive ? ']' : '}') - .toString(); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceReader.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceReader.java deleted file mode 100644 index 0f82c49d358..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceReader.java +++ /dev/null @@ -1,71 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; - -final class OfflineSliceReader implements SliceReader { - final IndexInput in; - private long countLeft; - private long value; - private long ord; - private int docID; - - OfflineSliceReader(Directory tempDir, String tempFileName, long start, long count) throws IOException { - in = tempDir.openInput(tempFileName, IOContext.READONCE); - in.seek(start * RangeTreeWriter.BYTES_PER_DOC); - this.countLeft = count; - } - - @Override - public boolean next() throws IOException { - if (countLeft == 0) { - return false; - } - countLeft--; - value = in.readLong(); - ord = in.readLong(); - docID = in.readInt(); - return true; - } - - @Override - public long value() { - return value; - } - - @Override - public long ord() { - return ord; - } - - @Override - public int docID() { - return docID; - } - - @Override - public void close() throws IOException { - in.close(); - } -} - diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceWriter.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceWriter.java deleted file mode 100644 index 7e0b19a2266..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceWriter.java +++ /dev/null @@ -1,76 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.store.ByteArrayDataOutput; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexOutput; - -final class OfflineSliceWriter implements SliceWriter { - - final Directory tempDir; - final byte[] scratchBytes = new byte[RangeTreeWriter.BYTES_PER_DOC]; - final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes); - final IndexOutput tempFile; - final long count; - private boolean closed; - private long countWritten; - - public OfflineSliceWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException { - this.tempDir = tempDir; - tempFile = tempDir.createTempOutput(tempFileNamePrefix, "rangetree", IOContext.DEFAULT); - this.count = count; - } - - @Override - public void append(long value, long ord, int docID) throws IOException { - tempFile.writeLong(value); - tempFile.writeLong(ord); - tempFile.writeInt(docID); - countWritten++; - } - - @Override - public SliceReader getReader(long start) throws IOException { - assert closed; - return new OfflineSliceReader(tempDir, tempFile.getName(), start, count-start); - } - - @Override - public void close() throws IOException { - closed = true; - tempFile.close(); - if (count != countWritten) { - throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count); - } - } - - @Override - public void destroy() throws IOException { - tempDir.deleteFile(tempFile.getName()); - } - - @Override - public String toString() { - return "OfflineSliceWriter(count=" + count + " tempFileName=" + tempFile.getName() + ")"; - } -} - diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesConsumer.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesConsumer.java deleted file mode 100644 index 21e189376ab..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesConsumer.java +++ /dev/null @@ -1,148 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Closeable; -import java.io.IOException; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; - -class RangeTreeDocValuesConsumer extends DocValuesConsumer implements Closeable { - final DocValuesConsumer delegate; - final int maxPointsInLeafNode; - final int maxPointsSortInHeap; - final IndexOutput out; - final Map fieldIndexFPs = new HashMap<>(); - final SegmentWriteState state; - - public RangeTreeDocValuesConsumer(DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException { - RangeTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap); - this.delegate = delegate; - this.maxPointsInLeafNode = maxPointsInLeafNode; - this.maxPointsSortInHeap = maxPointsSortInHeap; - this.state = state; - String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.DATA_EXTENSION); - out = state.directory.createOutput(datFileName, state.context); - CodecUtil.writeIndexHeader(out, RangeTreeDocValuesFormat.DATA_CODEC_NAME, RangeTreeDocValuesFormat.DATA_VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - } - - @Override - public void close() throws IOException { - boolean success = false; - try { - CodecUtil.writeFooter(out); - success = true; - } finally { - if (success) { - IOUtils.close(delegate, out); - } else { - IOUtils.closeWhileHandlingException(delegate, out); - } - } - - String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.META_EXTENSION); - IndexOutput metaOut = state.directory.createOutput(metaFileName, state.context); - success = false; - try { - CodecUtil.writeIndexHeader(metaOut, RangeTreeDocValuesFormat.META_CODEC_NAME, RangeTreeDocValuesFormat.META_VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - metaOut.writeVInt(fieldIndexFPs.size()); - for(Map.Entry ent : fieldIndexFPs.entrySet()) { - metaOut.writeVInt(ent.getKey()); - metaOut.writeVLong(ent.getValue()); - } - CodecUtil.writeFooter(metaOut); - success = true; - } finally { - if (success) { - IOUtils.close(metaOut); - } else { - IOUtils.closeWhileHandlingException(metaOut); - } - } - } - - @Override - public void addSortedNumericField(FieldInfo field, Iterable docToValueCount, Iterable values) throws IOException { - delegate.addSortedNumericField(field, docToValueCount, values); - RangeTreeWriter writer = new RangeTreeWriter(state.directory, state.segmentInfo.name, maxPointsInLeafNode, maxPointsSortInHeap); - Iterator valueIt = values.iterator(); - Iterator valueCountIt = docToValueCount.iterator(); - //System.out.println("\nSNF: field=" + field.name); - for (int docID=0;docID values) throws IOException { - throw new UnsupportedOperationException("use either SortedNumericDocValuesField or SortedSetDocValuesField"); - } - - @Override - public void addBinaryField(FieldInfo field, Iterable values) { - throw new UnsupportedOperationException("use either SortedNumericDocValuesField or SortedSetDocValuesField"); - } - - @Override - public void addSortedField(FieldInfo field, Iterable values, Iterable docToOrd) { - throw new UnsupportedOperationException("use either SortedNumericDocValuesField or SortedSetDocValuesField"); - } - - @Override - public void addSortedSetField(FieldInfo field, Iterable values, Iterable docToOrdCount, Iterable ords) throws IOException { - delegate.addSortedSetField(field, values, docToOrdCount, ords); - RangeTreeWriter writer = new RangeTreeWriter(state.directory, state.segmentInfo.name, maxPointsInLeafNode, maxPointsSortInHeap); - Iterator docToOrdCountIt = docToOrdCount.iterator(); - Iterator ordsIt = ords.iterator(); - //System.out.println("\nSSF: field=" + field.name); - for (int docID=0;docIDThis wraps {@link Lucene54DocValuesFormat}, but saves its own numeric tree - * structures to disk for fast query-time intersection. See this paper - * for details. - * - *

The numeric tree slices up 1D space into smaller and - * smaller ranges, until the smallest ranges have approximately - * between X/2 and X (X default is 1024) values in them, at which point - * such leaf cells are written as a block to disk, while the index tree - * structure records how space was sub-divided is loaded into HEAP - * at search time. At search time, the tree is recursed based on whether - * each of left or right child overlap with the query range, and once - * a leaf block is reached, all documents in that leaf block are collected - * if the cell is fully enclosed by the query shape, or filtered and then - * collected, if not. - * - *

The index is also quite compact, because docs only appear once in - * the tree (no "prefix terms"). - * - *

In addition to the files written by {@link Lucene54DocValuesFormat}, this format writes: - *

    - *
  1. .ndd: numeric tree leaf data and index
  2. - *
  3. .ndm: numeric tree metadata
  4. - *
- * - *

The disk format is experimental and free to change suddenly, and this code likely has new and exciting bugs! - * - * @lucene.experimental */ - -public class RangeTreeDocValuesFormat extends DocValuesFormat { - - static final String DATA_CODEC_NAME = "RangeTreeData"; - static final int DATA_VERSION_START = 0; - static final int DATA_VERSION_CURRENT = DATA_VERSION_START; - static final String DATA_EXTENSION = "ndd"; - - static final String META_CODEC_NAME = "RangeTreeMeta"; - static final int META_VERSION_START = 0; - static final int META_VERSION_CURRENT = META_VERSION_START; - static final String META_EXTENSION = "ndm"; - - private final int maxPointsInLeafNode; - private final int maxPointsSortInHeap; - - private final DocValuesFormat delegate = new Lucene54DocValuesFormat(); - - /** Default constructor */ - public RangeTreeDocValuesFormat() { - this(RangeTreeWriter.DEFAULT_MAX_VALUES_IN_LEAF_NODE, RangeTreeWriter.DEFAULT_MAX_VALUES_SORT_IN_HEAP); - } - - /** Creates this with custom configuration. - * - * @param maxPointsInLeafNode Maximum number of points in each leaf cell. Smaller values create a deeper tree with larger in-heap index and possibly - * faster searching. The default is 1024. - * @param maxPointsSortInHeap Maximum number of points where in-heap sort can be used. When the number of points exceeds this, a (slower) - * offline sort is used. The default is 128 * 1024. - * - * @lucene.experimental */ - public RangeTreeDocValuesFormat(int maxPointsInLeafNode, int maxPointsSortInHeap) { - super("RangeTree"); - RangeTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap); - this.maxPointsInLeafNode = maxPointsInLeafNode; - this.maxPointsSortInHeap = maxPointsSortInHeap; - } - - @Override - public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException { - return new RangeTreeDocValuesConsumer(delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap); - } - - @Override - public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { - return new RangeTreeDocValuesProducer(delegate.fieldsProducer(state), state); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesProducer.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesProducer.java deleted file mode 100644 index a2e6fc3a5da..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesProducer.java +++ /dev/null @@ -1,196 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; - -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.DocValuesProducer; -import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SortedDocValues; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.store.ChecksumIndexInput; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.RamUsageEstimator; - -class RangeTreeDocValuesProducer extends DocValuesProducer { - - private final Map treeReaders = new HashMap<>(); - private final Map fieldToIndexFPs = new HashMap<>(); - - private final IndexInput datIn; - private final AtomicLong ramBytesUsed; - private final int maxDoc; - private final DocValuesProducer delegate; - private final boolean merging; - - public RangeTreeDocValuesProducer(DocValuesProducer delegate, SegmentReadState state) throws IOException { - String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.META_EXTENSION); - ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaFileName, state.context); - CodecUtil.checkIndexHeader(metaIn, RangeTreeDocValuesFormat.META_CODEC_NAME, RangeTreeDocValuesFormat.META_VERSION_START, RangeTreeDocValuesFormat.META_VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - int fieldCount = metaIn.readVInt(); - for(int i=0;i getChildResources() { - List resources = new ArrayList<>(); - for(Map.Entry ent : treeReaders.entrySet()) { - resources.add(Accountables.namedAccountable("field " + ent.getKey(), ent.getValue())); - } - resources.add(Accountables.namedAccountable("delegate", delegate)); - - return resources; - } - - @Override - public synchronized DocValuesProducer getMergeInstance() throws IOException { - return new RangeTreeDocValuesProducer(this); - } - - @Override - public long ramBytesUsed() { - return ramBytesUsed.get() + delegate.ramBytesUsed(); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeReader.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeReader.java deleted file mode 100644 index 5b4b31831f2..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeReader.java +++ /dev/null @@ -1,202 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.DocIdSetBuilder; -import org.apache.lucene.util.RamUsageEstimator; - -import java.io.IOException; -import java.util.Arrays; - -/** Handles intersection of a range with a numeric tree previously written with {@link RangeTreeWriter}. - * - * @lucene.experimental */ - -final class RangeTreeReader implements Accountable { - final private long[] blockFPs; - final private long[] blockMinValues; - final IndexInput in; - final long globalMaxValue; - final int approxDocsPerBlock; - - public RangeTreeReader(IndexInput in) throws IOException { - - // Read index: - int numLeaves = in.readVInt(); - approxDocsPerBlock = in.readVInt(); - - blockMinValues = new long[numLeaves]; - for(int i=0;i maxIncl) { - return DocIdSet.EMPTY; - } - - if (minIncl > globalMaxValue || maxIncl < blockMinValues[0]) { - return DocIdSet.EMPTY; - } - - QueryState state = new QueryState(in.clone(), maxDoc, - minIncl, maxIncl, - sndv); - - int startBlockIncl = Arrays.binarySearch(blockMinValues, minIncl); - if (startBlockIncl >= 0) { - // There can be dups here, when the same value is added many - // times. Also, we need the first block whose min is < minIncl: - while (startBlockIncl > 0 && blockMinValues[startBlockIncl] == minIncl) { - startBlockIncl--; - } - } else { - startBlockIncl = Math.max(-startBlockIncl-2, 0); - } - - int endBlockIncl = Arrays.binarySearch(blockMinValues, maxIncl); - if (endBlockIncl >= 0) { - // There can be dups here, when the same value is added many - // times. Also, we need the first block whose max is > minIncl: - while (endBlockIncl < blockMinValues.length-1 && blockMinValues[endBlockIncl] == maxIncl) { - endBlockIncl++; - } - } else { - endBlockIncl = Math.max(-endBlockIncl-2, 0); - } - - assert startBlockIncl <= endBlockIncl; - - state.in.seek(blockFPs[startBlockIncl]); - - //System.out.println("startBlockIncl=" + startBlockIncl + " endBlockIncl=" + endBlockIncl); - - // Rough estimate of how many hits we'll see. Note that in the degenerate case - // (index same value many times) this could be a big over-estimate, but in the typical - // case it's good: - state.docs.grow(approxDocsPerBlock * (endBlockIncl - startBlockIncl + 1)); - - int hitCount = 0; - for (int block=startBlockIncl;block<=endBlockIncl;block++) { - boolean doFilter = blockMinValues[block] <= minIncl || block == blockMinValues.length-1 || blockMinValues[block+1] >= maxIncl; - //System.out.println(" block=" + block + " min=" + blockMinValues[block] + " doFilter=" + doFilter); - - int newCount; - if (doFilter) { - // We must filter each hit: - newCount = addSome(state); - } else { - newCount = addAll(state); - } - - hitCount += newCount; - } - - // NOTE: hitCount is an over-estimate in the multi-valued case: - return state.docs.build(hitCount); - } - - /** Adds all docs from the current block. */ - private int addAll(QueryState state) throws IOException { - // How many values are stored in this leaf cell: - int count = state.in.readVInt(); - state.docs.grow(count); - for(int i=0;i= state.minValueIncl && value <= state.maxValueIncl) { - state.docs.add(docID); - hitCount++; - - // Stop processing values for this doc: - break; - } - } - } - - return hitCount; - } - - @Override - public long ramBytesUsed() { - return blockMinValues.length * RamUsageEstimator.NUM_BYTES_LONG + - blockFPs.length * RamUsageEstimator.NUM_BYTES_LONG; - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedNumericDocValues.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedNumericDocValues.java deleted file mode 100644 index a5cbd15a6c1..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedNumericDocValues.java +++ /dev/null @@ -1,49 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.SortedNumericDocValues; - -class RangeTreeSortedNumericDocValues extends SortedNumericDocValues { - final RangeTreeReader rangeTreeReader; - final SortedNumericDocValues delegate; - - public RangeTreeSortedNumericDocValues(RangeTreeReader rangeTreeReader, SortedNumericDocValues delegate) { - this.rangeTreeReader = rangeTreeReader; - this.delegate = delegate; - } - - public RangeTreeReader getRangeTreeReader() { - return rangeTreeReader; - } - - @Override - public void setDocument(int doc) { - delegate.setDocument(doc); - } - - @Override - public long valueAt(int index) { - return delegate.valueAt(index); - } - - @Override - public int count() { - return delegate.count(); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedSetDocValues.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedSetDocValues.java deleted file mode 100644 index b881a897a07..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedSetDocValues.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.util.BytesRef; - -class RangeTreeSortedSetDocValues extends SortedSetDocValues { - final RangeTreeReader rangeTreeReader; - final SortedSetDocValues delegate; - - public RangeTreeSortedSetDocValues(RangeTreeReader rangeTreeReader, SortedSetDocValues delegate) { - this.rangeTreeReader = rangeTreeReader; - this.delegate = delegate; - } - - public RangeTreeReader getRangeTreeReader() { - return rangeTreeReader; - } - - @Override - public long nextOrd() { - return delegate.nextOrd(); - } - - @Override - public void setDocument(int doc) { - delegate.setDocument(doc); - } - - @Override - public BytesRef lookupOrd(long ord) { - return delegate.lookupOrd(ord); - } - - @Override - public long getValueCount() { - return delegate.getValueCount(); - } - - @Override - public long lookupTerm(BytesRef key) { - return delegate.lookupTerm(key); - } - - @Override - public TermsEnum termsEnum() { - return delegate.termsEnum(); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeWriter.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeWriter.java deleted file mode 100644 index dc4fde2398e..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeWriter.java +++ /dev/null @@ -1,580 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Arrays; -import java.util.Comparator; - -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.store.ByteArrayDataOutput; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.InPlaceMergeSorter; -import org.apache.lucene.util.OfflineSorter; -import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter; -import org.apache.lucene.util.RamUsageEstimator; - -// TODO -// - could we just "use postings" to map leaf -> docIDs? -// - we could also index "auto-prefix terms" here, and use better compression -// - the index could be efficiently encoded as an FST, so we don't have wasteful -// (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then -// the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points -// per leaf, and you can reduce that by putting more points per leaf -// - we can quantize the split values to 2 bytes (short): http://people.csail.mit.edu/tmertens/papers/qkdtree.pdf - -/** Recursively builds a 1d BKD tree to assign all incoming {@code long} values to smaller - * and smaller ranges until the number of points in a given - * range is <= the maxPointsInLeafNode. The tree is - * fully balanced, which means the leaf nodes will have between 50% and 100% of - * the requested maxPointsInLeafNode, except for the adversarial case - * of indexing exactly the same value many times. - * - *

- * See this paper for details. - * - *

This consumes heap during writing: for any nodes with fewer than maxPointsSortInHeap, it holds - * the points in memory as simple java arrays. - * - *

- * NOTE: This can write at most Integer.MAX_VALUE * maxPointsInLeafNode total values, - * which should be plenty since a Lucene index can have at most Integer.MAX_VALUE-1 documents. - * - * @lucene.experimental */ - -class RangeTreeWriter { - - // value (long) + ord (long) + docID (int) - static final int BYTES_PER_DOC = 2 * RamUsageEstimator.NUM_BYTES_LONG + RamUsageEstimator.NUM_BYTES_INT; - - public static final int DEFAULT_MAX_VALUES_IN_LEAF_NODE = 1024; - - /** This works out to max of ~10 MB peak heap tied up during writing: */ - public static final int DEFAULT_MAX_VALUES_SORT_IN_HEAP = 128*1024;; - - private final byte[] scratchBytes = new byte[BYTES_PER_DOC]; - private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes); - - private final Directory tempDir; - private final String tempFileNamePrefix; - - private OfflineSorter.ByteSequencesWriter offlineWriter; - private GrowingHeapSliceWriter heapWriter; - - private IndexOutput tempInput; - private final int maxValuesInLeafNode; - private final int maxValuesSortInHeap; - - private long valueCount; - private long globalMinValue = Long.MAX_VALUE; - private long globalMaxValue = Long.MIN_VALUE; - - public RangeTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException { - this(tempDir, tempFileNamePrefix, DEFAULT_MAX_VALUES_IN_LEAF_NODE, DEFAULT_MAX_VALUES_SORT_IN_HEAP); - } - - // TODO: instead of maxValuesSortInHeap, change to maxMBHeap ... the mapping is non-obvious: - public RangeTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxValuesInLeafNode, int maxValuesSortInHeap) throws IOException { - verifyParams(maxValuesInLeafNode, maxValuesSortInHeap); - this.tempDir = tempDir; - this.tempFileNamePrefix = tempFileNamePrefix; - this.maxValuesInLeafNode = maxValuesInLeafNode; - this.maxValuesSortInHeap = maxValuesSortInHeap; - - // We write first maxValuesSortInHeap in heap, then cutover to offline for additional points: - heapWriter = new GrowingHeapSliceWriter(maxValuesSortInHeap); - } - - public static void verifyParams(int maxValuesInLeafNode, int maxValuesSortInHeap) { - if (maxValuesInLeafNode <= 0) { - throw new IllegalArgumentException("maxValuesInLeafNode must be > 0; got " + maxValuesInLeafNode); - } - if (maxValuesInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) { - throw new IllegalArgumentException("maxValuesInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxValuesInLeafNode); - } - if (maxValuesSortInHeap < maxValuesInLeafNode) { - throw new IllegalArgumentException("maxValuesSortInHeap must be >= maxValuesInLeafNode; got " + maxValuesSortInHeap + " vs maxValuesInLeafNode="+ maxValuesInLeafNode); - } - if (maxValuesSortInHeap > ArrayUtil.MAX_ARRAY_LENGTH) { - throw new IllegalArgumentException("maxValuesSortInHeap must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxValuesSortInHeap); - } - } - - /** If the current segment has too many points then we switchover to temp files / offline sort. */ - private void switchToOffline() throws IOException { - - // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree: - tempInput = tempDir.createTempOutput(tempFileNamePrefix, "rangetree", IOContext.DEFAULT); - offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput); - for(int i=0;i= maxValuesSortInHeap) { - if (offlineWriter == null) { - switchToOffline(); - } - scratchBytesOutput.reset(scratchBytes); - scratchBytesOutput.writeLong(value); - scratchBytesOutput.writeVInt(docID); - scratchBytesOutput.writeVLong(valueCount); - offlineWriter.write(scratchBytes, 0, scratchBytes.length); - } else { - // Not too many points added yet, continue using heap: - heapWriter.append(value, valueCount, docID); - } - - valueCount++; - globalMaxValue = Math.max(value, globalMaxValue); - globalMinValue = Math.min(value, globalMinValue); - } - - /** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice - * as we recurse in {@link #build}. */ - private SliceWriter convertToFixedWidth(String in) throws IOException { - BytesRefBuilder scratch = new BytesRefBuilder(); - scratch.grow(BYTES_PER_DOC); - BytesRef bytes = scratch.get(); - ByteArrayDataInput dataReader = new ByteArrayDataInput(); - - OfflineSorter.ByteSequencesReader reader = null; - SliceWriter sortedWriter = null; - boolean success = false; - try { - reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE)); - sortedWriter = getWriter(valueCount); - for (long i=0;i= 0: "docID=" + docID; - long ord = dataReader.readVLong(); - sortedWriter.append(value, ord, docID); - } - success = true; - } finally { - if (success) { - IOUtils.close(sortedWriter, reader); - } else { - IOUtils.closeWhileHandlingException(sortedWriter, reader); - try { - sortedWriter.destroy(); - } catch (Throwable t) { - // Suppress to keep throwing original exc - } - } - } - - return sortedWriter; - } - - private SliceWriter sort() throws IOException { - if (heapWriter != null) { - - assert valueCount < Integer.MAX_VALUE; - - // All buffered points are still in heap - new InPlaceMergeSorter() { - @Override - protected void swap(int i, int j) { - int docID = heapWriter.docIDs[i]; - heapWriter.docIDs[i] = heapWriter.docIDs[j]; - heapWriter.docIDs[j] = docID; - - long ord = heapWriter.ords[i]; - heapWriter.ords[i] = heapWriter.ords[j]; - heapWriter.ords[j] = ord; - - long value = heapWriter.values[i]; - heapWriter.values[i] = heapWriter.values[j]; - heapWriter.values[j] = value; - } - - @Override - protected int compare(int i, int j) { - int cmp = Long.compare(heapWriter.values[i], heapWriter.values[j]); - if (cmp != 0) { - return cmp; - } - - // Tie-break - cmp = Integer.compare(heapWriter.docIDs[i], heapWriter.docIDs[j]); - if (cmp != 0) { - return cmp; - } - - return Long.compare(heapWriter.ords[i], heapWriter.ords[j]); - } - }.sort(0, (int) valueCount); - - HeapSliceWriter sorted = new HeapSliceWriter((int) valueCount); - for(int i=0;i cmp = new Comparator() { - private final ByteArrayDataInput readerB = new ByteArrayDataInput(); - - @Override - public int compare(BytesRef a, BytesRef b) { - reader.reset(a.bytes, a.offset, a.length); - final long valueA = reader.readLong(); - final int docIDA = reader.readVInt(); - final long ordA = reader.readVLong(); - - reader.reset(b.bytes, b.offset, b.length); - final long valueB = reader.readLong(); - final int docIDB = reader.readVInt(); - final long ordB = reader.readVLong(); - - int cmp = Long.compare(valueA, valueB); - if (cmp != 0) { - return cmp; - } - - // Tie-break - cmp = Integer.compare(docIDA, docIDB); - if (cmp != 0) { - return cmp; - } - - return Long.compare(ordA, ordB); - } - }; - - boolean success = false; - OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp); - String sortedFileName = sorter.sort(tempInput.getName()); - try { - SliceWriter writer = convertToFixedWidth(sortedFileName); - success = true; - return writer; - } finally { - if (success) { - tempDir.deleteFile(sortedFileName); - } else { - IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName); - } - } - } - } - - /** Writes the 1d BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */ - public long finish(IndexOutput out) throws IOException { - - if (offlineWriter != null) { - offlineWriter.close(); - } - - if (valueCount == 0) { - throw new IllegalStateException("at least one value must be indexed"); - } - - // TODO: we should use in-memory sort here, if number of points is small enough: - - long countPerLeaf = valueCount; - long innerNodeCount = 1; - - while (countPerLeaf > maxValuesInLeafNode) { - countPerLeaf = (countPerLeaf+1)/2; - innerNodeCount *= 2; - } - - //System.out.println("innerNodeCount=" + innerNodeCount); - - if (1+2*innerNodeCount >= Integer.MAX_VALUE) { - throw new IllegalStateException("too many nodes; increase maxValuesInLeafNode (currently " + maxValuesInLeafNode + ") and reindex"); - } - - innerNodeCount--; - - int numLeaves = (int) (innerNodeCount+1); - - // Indexed by nodeID, but first (root) nodeID is 1 - long[] blockMinValues = new long[numLeaves]; - - // +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7) - long[] leafBlockFPs = new long[numLeaves]; - - // Make sure the math above "worked": - assert valueCount / blockMinValues.length <= maxValuesInLeafNode: "valueCount=" + valueCount + " blockMinValues.length=" + blockMinValues.length + " maxValuesInLeafNode=" + maxValuesInLeafNode; - //System.out.println(" avg pointsPerLeaf=" + (valueCount/blockMinValues.length)); - - // Sort all docs by value: - SliceWriter sortedWriter = null; - - boolean success = false; - try { - sortedWriter = sort(); - heapWriter = null; - - build(1, numLeaves, - new PathSlice(sortedWriter, 0, valueCount), - out, - globalMinValue, globalMaxValue, - blockMinValues, - leafBlockFPs); - success = true; - } finally { - if (success) { - sortedWriter.destroy(); - if (tempInput != null) { - tempDir.deleteFile(tempInput.getName()); - } - } else { - try { - sortedWriter.destroy(); - } catch (Throwable t) { - // Suppress to keep throwing original exc - } - if (tempInput != null) { - IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName()); - } - } - } - - //System.out.println("Total nodes: " + innerNodeCount); - - // Write index: - long indexFP = out.getFilePointer(); - out.writeVInt(numLeaves); - out.writeVInt((int) (valueCount / numLeaves)); - - for (int i=0;i= minValue && splitValue <= maxValue: "splitValue=" + splitValue + " minValue=" + minValue + " maxValue=" + maxValue + " reader=" + reader; - success = true; - } finally { - if (success) { - IOUtils.close(reader); - } else { - IOUtils.closeWhileHandlingException(reader); - } - } - - return splitValue; - } - - /** The incoming PathSlice for the dim we will split is already partitioned/sorted. */ - private void build(int nodeID, int leafNodeOffset, - PathSlice source, - IndexOutput out, - long minValue, long maxValue, - long[] blockMinValues, - long[] leafBlockFPs) throws IOException { - - long count = source.count; - - if (source.writer instanceof OfflineSliceWriter && count <= maxValuesSortInHeap) { - // Cutover to heap: - SliceWriter writer = new HeapSliceWriter((int) count); - SliceReader reader = source.writer.getReader(source.start); - try { - for(int i=0;i 0; - - if (nodeID >= leafNodeOffset) { - // Leaf node: write block - assert maxValue >= minValue; - - //System.out.println("\nleaf:\n lat range: " + ((long) maxLatEnc-minLatEnc)); - //System.out.println(" lon range: " + ((long) maxLonEnc-minLonEnc)); - - // Sort by docID in the leaf so we can .or(DISI) at search time: - SliceReader reader = source.writer.getReader(source.start); - - int[] docIDs = new int[(int) count]; - - boolean success = false; - try { - for (int i=0;i 539 MB, but query time for 225 queries went from 1.65 sec -> 2.64 sec. - // I think if we also indexed prefix terms here we could do less costly compression - // on those lists: - int docID = docIDs[i]; - if (docID != lastDocID) { - out.writeInt(docID); - lastDocID = docID; - } - } - //long endFP = out.getFilePointer(); - //System.out.println(" bytes/doc: " + ((endFP - startFP) / count)); - } else { - // Inner node: sort, partition/recurse - - assert nodeID < blockMinValues.length: "nodeID=" + nodeID + " blockMinValues.length=" + blockMinValues.length; - - assert source.count == count; - - long leftCount = source.count / 2; - - // NOTE: we don't tweak leftCount for the boundary cases, which means at search time if we are looking for exactly splitValue then we - // must search both left and right trees: - long splitValue = getSplitValue(source, leftCount, minValue, maxValue); - - build(2*nodeID, leafNodeOffset, - new PathSlice(source.writer, source.start, leftCount), - out, - minValue, splitValue, - blockMinValues, leafBlockFPs); - - build(2*nodeID+1, leafNodeOffset, - new PathSlice(source.writer, source.start+leftCount, count-leftCount), - out, - splitValue, maxValue, - blockMinValues, leafBlockFPs); - } - } - - SliceWriter getWriter(long count) throws IOException { - if (count < maxValuesSortInHeap) { - return new HeapSliceWriter((int) count); - } else { - return new OfflineSliceWriter(tempDir, tempFileNamePrefix, count); - } - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceReader.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceReader.java deleted file mode 100644 index 3256fee6b2e..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceReader.java +++ /dev/null @@ -1,31 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Closeable; -import java.io.IOException; - -/** Iterates over one slice of the sorted values. This abstracts away whether - * OfflineSorter or simple arrays in heap are used. */ -interface SliceReader extends Closeable { - boolean next() throws IOException; - long value(); - long ord(); - int docID(); -} - diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceWriter.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceWriter.java deleted file mode 100644 index 9850f0997f8..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceWriter.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Closeable; -import java.io.IOException; - -/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */ -interface SliceWriter extends Closeable { - void append(long value, long ord, int docID) throws IOException; - SliceReader getReader(long start) throws IOException; - void destroy() throws IOException; -} - diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/SortedSetRangeTreeQuery.java b/lucene/sandbox/src/java/org/apache/lucene/rangetree/SortedSetRangeTreeQuery.java deleted file mode 100644 index 07c36e9f4db..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/SortedSetRangeTreeQuery.java +++ /dev/null @@ -1,217 +0,0 @@ -package org.apache.lucene.rangetree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.document.SortedSetDocValuesField; // javadocs -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; - -import java.io.IOException; - -/** Finds all previously indexed values that fall within the specified {@link BytesRef} range. - * - *

The field must be indexed with {@link RangeTreeDocValuesFormat}, and {@link SortedSetDocValuesField} added per document. - * - * @lucene.experimental */ - -public class SortedSetRangeTreeQuery extends Query { - final String field; - final BytesRef minValue; - final BytesRef maxValue; - final boolean minInclusive; - final boolean maxInclusive; - - /** Matches all values in the specified {@link BytesRef} range. */ - public SortedSetRangeTreeQuery(String field, BytesRef minValue, boolean minInclusive, BytesRef maxValue, boolean maxInclusive) { - this.field = field; - this.minInclusive = minInclusive; - this.minValue = minValue; - this.maxInclusive = maxInclusive; - this.maxValue = maxValue; - } - - @Override - public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - - // I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be - // used in the first pass: - - return new ConstantScoreWeight(this) { - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - LeafReader reader = context.reader(); - final SortedSetDocValues ssdv = reader.getSortedSetDocValues(field); - if (ssdv == null) { - // No docs in this segment had this field - return null; - } - - if (ssdv instanceof RangeTreeSortedSetDocValues == false) { - throw new IllegalStateException("field \"" + field + "\" was not indexed with RangeTreeDocValuesFormat: got: " + ssdv); - } - RangeTreeSortedSetDocValues treeDV = (RangeTreeSortedSetDocValues) ssdv; - RangeTreeReader tree = treeDV.getRangeTreeReader(); - - /* - for(int i=0;i " + treeDV.lookupOrd(i)); - } - */ - - // lower - final long minOrdIncl; - if (minValue == null) { - minOrdIncl = 0; - } else { - long ord = ssdv.lookupTerm(minValue); - if (ord >= 0) { - // Exact match - if (minInclusive) { - minOrdIncl = ord; - } else { - minOrdIncl = ord+1; - } - } else { - minOrdIncl = -ord-1; - } - } - - // upper - final long maxOrdIncl; - if (maxValue == null) { - maxOrdIncl = Long.MAX_VALUE; - } else { - long ord = ssdv.lookupTerm(maxValue); - if (ord >= 0) { - // Exact match - if (maxInclusive) { - maxOrdIncl = ord; - } else { - maxOrdIncl = ord-1; - } - } else { - maxOrdIncl = -ord-2; - } - } - - if (maxOrdIncl < minOrdIncl) { - // This can happen when the requested range lies entirely between 2 adjacent ords: - return null; - } - - //System.out.println(reader + ": ORD: " + minOrdIncl + "-" + maxOrdIncl + "; " + minValue + " - " + maxValue); - - // Just a "view" of only the ords from the SSDV, as an SNDV. Maybe we - // have this view implemented somewhere else already? It's not so bad that - // we are inefficient here (making 2 passes over the ords): this is only - // used in at most 2 leaf cells (the boundary cells). - SortedNumericDocValues ords = new SortedNumericDocValues() { - - private long[] ords = new long[2]; - private int count; - - @Override - public void setDocument(int doc) { - ssdv.setDocument(doc); - long ord; - count = 0; - while ((ord = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { - if (count == ords.length) { - ords = ArrayUtil.grow(ords, count+1); - } - ords[count++] = ord; - } - } - - @Override - public int count() { - return count; - } - - @Override - public long valueAt(int index) { - return ords[index]; - } - }; - - DocIdSet result = tree.intersect(minOrdIncl, maxOrdIncl, ords, context.reader().maxDoc()); - - final DocIdSetIterator disi = result.iterator(); - - return new ConstantScoreScorer(this, score(), disi); - } - }; - } - - @Override - public int hashCode() { - int hash = super.hashCode(); - if (minValue != null) hash += minValue.hashCode()^0x14fa55fb; - if (maxValue != null) hash += maxValue.hashCode()^0x733fa5fe; - return hash + - (Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+ - (Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe); - } - - @Override - public boolean equals(Object other) { - if (super.equals(other)) { - final SortedSetRangeTreeQuery q = (SortedSetRangeTreeQuery) other; - return ( - (q.minValue == null ? minValue == null : q.minValue.equals(minValue)) && - (q.maxValue == null ? maxValue == null : q.maxValue.equals(maxValue)) && - minInclusive == q.minInclusive && - maxInclusive == q.maxInclusive - ); - } - - return false; - } - - @Override - public String toString(String field) { - final StringBuilder sb = new StringBuilder(); - sb.append(getClass().getSimpleName()); - sb.append(':'); - if (this.field.equals(field) == false) { - sb.append("field="); - sb.append(this.field); - sb.append(':'); - } - - return sb.append(minInclusive ? '[' : '{') - .append((minValue == null) ? "*" : minValue.toString()) - .append(" TO ") - .append((maxValue == null) ? "*" : maxValue.toString()) - .append(maxInclusive ? ']' : '}') - .toString(); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/rangetree/package.html b/lucene/sandbox/src/java/org/apache/lucene/rangetree/package.html deleted file mode 100644 index e6574183057..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/rangetree/package.html +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - - -This package contains a numeric tree implementation for indexing long values enabling fast range searching. - - diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/DimensionalPointInPolygonQuery.java similarity index 58% rename from lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInPolygonQuery.java rename to lucene/sandbox/src/java/org/apache/lucene/search/DimensionalPointInPolygonQuery.java index 3252af1e560..1258fb67fa8 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInPolygonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/DimensionalPointInPolygonQuery.java @@ -1,4 +1,4 @@ -package org.apache.lucene.bkdtree; +package org.apache.lucene.search; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -19,35 +19,24 @@ package org.apache.lucene.bkdtree; import java.io.IOException; import java.util.Arrays; -import java.util.Set; +import org.apache.lucene.document.DimensionalLatLonField; +import org.apache.lucene.index.DimensionalValues; +import org.apache.lucene.index.DimensionalValues.IntersectVisitor; +import org.apache.lucene.index.DimensionalValues.Relation; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; +import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.GeoUtils; +import org.apache.lucene.util.bkd.BKDUtil; /** Finds all previously indexed points that fall within the specified polygon. * - *

The field must be indexed with {@link BKDTreeDocValuesFormat}, and {@link BKDPointField} added per document. + *

The field must be indexed with using {@link DimensionalLatLonField} added per document. * - *

Because this implementation cannot intersect each cell with the polygon, it will be costly especially for large polygons, as every - * possible point must be checked. - * - *

NOTE: for fastest performance, this allocates FixedBitSet(maxDoc) for each segment. The score of each hit is the query boost. - * - * @lucene.experimental */ + * @lucene.experimental */ -public class BKDPointInPolygonQuery extends Query { +public class DimensionalPointInPolygonQuery extends Query { final String field; final double minLat; final double maxLat; @@ -57,7 +46,7 @@ public class BKDPointInPolygonQuery extends Query { final double[] polyLons; /** The lats/lons must be clockwise or counter-clockwise. */ - public BKDPointInPolygonQuery(String field, double[] polyLats, double[] polyLons) { + public DimensionalPointInPolygonQuery(String field, double[] polyLats, double[] polyLons) { this.field = field; if (polyLats.length != polyLons.length) { throw new IllegalArgumentException("polyLats and polyLons must be equal length"); @@ -83,13 +72,13 @@ public class BKDPointInPolygonQuery extends Query { double maxLat = Double.NEGATIVE_INFINITY; for(int i=0;i= maxLat && cellMinLon <= minLon && cellMaxLon >= maxLon) { + // Cell fully encloses the query + return Relation.CELL_CROSSES_QUERY; + } else if (GeoUtils.rectWithinPoly(cellMinLon, cellMinLat, cellMaxLon, cellMaxLat, + polyLons, polyLats, + minLon, minLat, maxLon, maxLat)) { + return Relation.CELL_INSIDE_QUERY; + } else if (GeoUtils.rectCrossesPoly(cellMinLon, cellMinLat, cellMaxLon, cellMaxLat, + polyLons, polyLats, + minLon, minLat, maxLon, maxLat)) { + return Relation.CELL_CROSSES_QUERY; + } else { + return Relation.CELL_OUTSIDE_QUERY; + } + } + }); + + // NOTE: hitCount[0] will be over-estimate in multi-valued case + return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator()); } }; } @@ -162,7 +168,7 @@ public class BKDPointInPolygonQuery extends Query { if (o == null || getClass() != o.getClass()) return false; if (!super.equals(o)) return false; - BKDPointInPolygonQuery that = (BKDPointInPolygonQuery) o; + DimensionalPointInPolygonQuery that = (DimensionalPointInPolygonQuery) o; if (Arrays.equals(polyLons, that.polyLons) == false) { return false; diff --git a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInBBoxQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/DimensionalPointInRectQuery.java similarity index 50% rename from lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInBBoxQuery.java rename to lucene/sandbox/src/java/org/apache/lucene/search/DimensionalPointInRectQuery.java index 3696441e2c4..73cffc0e5e4 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDPointInBBoxQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/DimensionalPointInRectQuery.java @@ -1,4 +1,4 @@ -package org.apache.lucene.bkdtree; +package org.apache.lucene.search; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -18,33 +18,25 @@ package org.apache.lucene.bkdtree; */ import java.io.IOException; -import java.util.Set; +import org.apache.lucene.document.DimensionalLatLonField; +import org.apache.lucene.index.DimensionalValues; +import org.apache.lucene.index.DimensionalValues.IntersectVisitor; +import org.apache.lucene.index.DimensionalValues.Relation; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; +import org.apache.lucene.util.DocIdSetBuilder; +import org.apache.lucene.util.GeoUtils; +import org.apache.lucene.util.bkd.BKDUtil; /** Finds all previously indexed points that fall within the specified boundings box. * - *

The field must be indexed with {@link BKDTreeDocValuesFormat}, and {@link BKDPointField} added per document. + *

The field must be indexed with using {@link DimensionalLatLonField} added per document. * - *

NOTE: for fastest performance, this allocates FixedBitSet(maxDoc) for each segment. The score of each hit is the query boost. - * - * @lucene.experimental */ + * @lucene.experimental */ -public class BKDPointInBBoxQuery extends Query { +public class DimensionalPointInRectQuery extends Query { final String field; final double minLat; final double maxLat; @@ -52,18 +44,18 @@ public class BKDPointInBBoxQuery extends Query { final double maxLon; /** Matches all points >= minLon, minLat (inclusive) and < maxLon, maxLat (exclusive). */ - public BKDPointInBBoxQuery(String field, double minLat, double maxLat, double minLon, double maxLon) { + public DimensionalPointInRectQuery(String field, double minLat, double maxLat, double minLon, double maxLon) { this.field = field; - if (BKDTreeWriter.validLat(minLat) == false) { + if (GeoUtils.isValidLat(minLat) == false) { throw new IllegalArgumentException("minLat=" + minLat + " is not a valid latitude"); } - if (BKDTreeWriter.validLat(maxLat) == false) { + if (GeoUtils.isValidLat(maxLat) == false) { throw new IllegalArgumentException("maxLat=" + maxLat + " is not a valid latitude"); } - if (BKDTreeWriter.validLon(minLon) == false) { + if (GeoUtils.isValidLon(minLon) == false) { throw new IllegalArgumentException("minLon=" + minLon + " is not a valid longitude"); } - if (BKDTreeWriter.validLon(maxLon) == false) { + if (GeoUtils.isValidLon(maxLon) == false) { throw new IllegalArgumentException("maxLon=" + maxLon + " is not a valid longitude"); } this.minLon = minLon; @@ -82,21 +74,59 @@ public class BKDPointInBBoxQuery extends Query { @Override public Scorer scorer(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); - SortedNumericDocValues sdv = reader.getSortedNumericDocValues(field); - if (sdv == null) { - // No docs in this segment had this field + DimensionalValues values = reader.getDimensionalValues(); + if (values == null) { + // No docs in this segment had any dimensional fields return null; } - if (sdv instanceof BKDTreeSortedNumericDocValues == false) { - throw new IllegalStateException("field \"" + field + "\" was not indexed with BKDTreeDocValuesFormat: got: " + sdv); - } - BKDTreeSortedNumericDocValues treeDV = (BKDTreeSortedNumericDocValues) sdv; - BKDTreeReader tree = treeDV.getBKDTreeReader(); + DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); + int[] hitCount = new int[1]; + values.intersect(field, + new IntersectVisitor() { + @Override + public void grow(int count) { + result.grow(count); + } - DocIdSet result = tree.intersect(minLat, maxLat, minLon, maxLon, null, treeDV.delegate); + @Override + public void visit(int docID) { + hitCount[0]++; + result.add(docID); + } - return new ConstantScoreScorer(this, score(), result.iterator()); + @Override + public void visit(int docID, byte[] packedValue) { + assert packedValue.length == 8; + double lat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(packedValue, 0)); + double lon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(packedValue, 1)); + if (lat >= minLat && lat <= maxLat && lon >= minLon && lon <= maxLon) { + hitCount[0]++; + result.add(docID); + } + } + + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + double cellMinLat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(minPackedValue, 0)); + double cellMinLon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(minPackedValue, 1)); + double cellMaxLat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(maxPackedValue, 0)); + double cellMaxLon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(maxPackedValue, 1)); + + if (minLat <= cellMinLat && maxLat >= cellMaxLat && minLon <= cellMinLon && maxLon >= cellMaxLon) { + return Relation.CELL_INSIDE_QUERY; + } + + if (cellMaxLat < minLat || cellMinLat > maxLat || cellMaxLon < minLon || cellMinLon > maxLon) { + return Relation.CELL_OUTSIDE_QUERY; + } + + return Relation.CELL_CROSSES_QUERY; + } + }); + + // NOTE: hitCount[0] will be over-estimate in multi-valued case + return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator()); } }; } @@ -111,9 +141,9 @@ public class BKDPointInBBoxQuery extends Query { q.setDisableCoord(true); // E.g.: maxLon = -179, minLon = 179 - BKDPointInBBoxQuery left = new BKDPointInBBoxQuery(field, minLat, maxLat, BKDTreeWriter.MIN_LON_INCL, maxLon); + DimensionalPointInRectQuery left = new DimensionalPointInRectQuery(field, minLat, maxLat, GeoUtils.MIN_LON_INCL, maxLon); q.add(new BooleanClause(left, BooleanClause.Occur.SHOULD)); - BKDPointInBBoxQuery right = new BKDPointInBBoxQuery(field, minLat, maxLat, minLon, BKDTreeWriter.MAX_LON_INCL); + DimensionalPointInRectQuery right = new DimensionalPointInRectQuery(field, minLat, maxLat, minLon, GeoUtils.MAX_LON_INCL); q.add(new BooleanClause(right, BooleanClause.Occur.SHOULD)); return new ConstantScoreQuery(q.build()); } else { @@ -133,8 +163,8 @@ public class BKDPointInBBoxQuery extends Query { @Override public boolean equals(Object other) { - if (super.equals(other) && other instanceof BKDPointInBBoxQuery) { - final BKDPointInBBoxQuery q = (BKDPointInBBoxQuery) other; + if (super.equals(other) && other instanceof DimensionalPointInRectQuery) { + final DimensionalPointInRectQuery q = (DimensionalPointInRectQuery) other; return field.equals(q.field) && minLat == q.minLat && maxLat == q.maxLat && diff --git a/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat deleted file mode 100644 index e1bb6245760..00000000000 --- a/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -org.apache.lucene.bkdtree.BKDTreeDocValuesFormat -org.apache.lucene.rangetree.RangeTreeDocValuesFormat - diff --git a/lucene/sandbox/src/test/org/apache/lucene/bkdtree/TestBKDTree.java b/lucene/sandbox/src/test/org/apache/lucene/bkdtree/TestBKDTree.java deleted file mode 100644 index a2d48a196c5..00000000000 --- a/lucene/sandbox/src/test/org/apache/lucene/bkdtree/TestBKDTree.java +++ /dev/null @@ -1,194 +0,0 @@ -package org.apache.lucene.bkdtree; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.DocValuesFormat; -import org.apache.lucene.codecs.lucene60.Lucene60Codec; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; -import org.apache.lucene.util.BaseGeoPointTestCase; -import org.apache.lucene.util.GeoRect; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.SloppyMath; -import org.apache.lucene.util.TestUtil; - -// TODO: can test framework assert we don't leak temp files? - -public class TestBKDTree extends BaseGeoPointTestCase { - - @Override - protected void addPointToDoc(String field, Document doc, double lat, double lon) { - doc.add(new BKDPointField(field, lat, lon)); - } - - @Override - protected Query newBBoxQuery(String field, GeoRect rect) { - return new BKDPointInBBoxQuery(field, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon); - } - - @Override - protected Query newDistanceQuery(String field, double centerLat, double centerLon, double radiusMeters) { - // return new BKDDistanceQuery(field, centerLat, centerLon, radiusMeters); - return null; - } - - @Override - protected Query newDistanceRangeQuery(String field, double centerLat, double centerLon, double minRadiusMeters, double radiusMeters) { - return null; - } - - @Override - protected Query newPolygonQuery(String field, double[] lats, double[] lons) { - return new BKDPointInPolygonQuery(FIELD_NAME, lats, lons); - } - - @Override - protected void initIndexWriterConfig(final String fieldName, IndexWriterConfig iwc) { - final DocValuesFormat dvFormat = getDocValuesFormat(); - Codec codec = new Lucene60Codec() { - @Override - public DocValuesFormat getDocValuesFormatForField(String field) { - if (field.equals(fieldName)) { - return dvFormat; - } else { - return super.getDocValuesFormatForField(field); - } - } - }; - iwc.setCodec(codec); - } - - @Override - protected Boolean rectContainsPoint(GeoRect rect, double pointLat, double pointLon) { - - assert Double.isNaN(pointLat) == false; - - int rectLatMinEnc = BKDTreeWriter.encodeLat(rect.minLat); - int rectLatMaxEnc = BKDTreeWriter.encodeLat(rect.maxLat); - int rectLonMinEnc = BKDTreeWriter.encodeLon(rect.minLon); - int rectLonMaxEnc = BKDTreeWriter.encodeLon(rect.maxLon); - - int pointLatEnc = BKDTreeWriter.encodeLat(pointLat); - int pointLonEnc = BKDTreeWriter.encodeLon(pointLon); - - if (rect.minLon < rect.maxLon) { - return pointLatEnc >= rectLatMinEnc && - pointLatEnc < rectLatMaxEnc && - pointLonEnc >= rectLonMinEnc && - pointLonEnc < rectLonMaxEnc; - } else { - // Rect crosses dateline: - return pointLatEnc >= rectLatMinEnc && - pointLatEnc < rectLatMaxEnc && - (pointLonEnc >= rectLonMinEnc || - pointLonEnc < rectLonMaxEnc); - } - } - - private static final double POLY_TOLERANCE = 1e-7; - - @Override - protected Boolean polyRectContainsPoint(GeoRect rect, double pointLat, double pointLon) { - if (Math.abs(rect.minLat-pointLat) < POLY_TOLERANCE || - Math.abs(rect.maxLat-pointLat) < POLY_TOLERANCE || - Math.abs(rect.minLon-pointLon) < POLY_TOLERANCE || - Math.abs(rect.maxLon-pointLon) < POLY_TOLERANCE) { - // The poly check quantizes slightly differently, so we allow for boundary cases to disagree - return null; - } else { - return rectContainsPoint(rect, pointLat, pointLon); - } - } - - @Override - protected Boolean circleContainsPoint(double centerLat, double centerLon, double radiusMeters, double pointLat, double pointLon) { - double distanceKM = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon); - boolean result = distanceKM*1000.0 <= radiusMeters; - //System.out.println(" shouldMatch? centerLon=" + centerLon + " centerLat=" + centerLat + " pointLon=" + pointLon + " pointLat=" + pointLat + " result=" + result + " distanceMeters=" + (distanceKM * 1000)); - return result; - } - - @Override - protected Boolean distanceRangeContainsPoint(double centerLat, double centerLon, double minRadiusMeters, double radiusMeters, double pointLat, double pointLon) { - final double d = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon)*1000.0; - return d >= minRadiusMeters && d <= radiusMeters; - } - - public void testEncodeDecode() throws Exception { - int iters = atLeast(10000); - boolean small = random().nextBoolean(); - for(int iter=0;iter 0 && x == 0 && haveRealDoc) { - int oldDocID; - while (true) { - oldDocID = random().nextInt(docID); - if (missing.get(oldDocID) == false) { - break; - } - } - - // Identical to old value - values[docID] = values[oldDocID]; - if (VERBOSE) { - System.out.println(" doc=" + docID + " value=" + values[docID] + " bytes=" + longToBytes(values[docID]) + " (same as doc=" + oldDocID + ")"); - } - } else { - values[docID] = randomValue(); - haveRealDoc = true; - if (VERBOSE) { - System.out.println(" doc=" + docID + " value=" + values[docID] + " bytes=" + longToBytes(values[docID])); - } - } - } - - verify(missing, values); - } - - private static void verify(Bits missing, long[] values) throws Exception { - IndexWriterConfig iwc = newIndexWriterConfig(); - - // Else we can get O(N^2) merging: - int mbd = iwc.getMaxBufferedDocs(); - if (mbd != -1 && mbd < values.length/100) { - iwc.setMaxBufferedDocs(values.length/100); - } - final DocValuesFormat dvFormat = getDocValuesFormat(); - Codec codec = new Lucene60Codec() { - @Override - public DocValuesFormat getDocValuesFormatForField(String field) { - if (field.equals("sn_value") || field.equals("ss_value")) { - return dvFormat; - } else { - return super.getDocValuesFormatForField(field); - } - } - }; - iwc.setCodec(codec); - Directory dir; - if (values.length > 100000) { - dir = noVirusChecker(newFSDirectory(createTempDir("TestRangeTree"))); - } else { - dir = getDirectory(); - } - Set deleted = new HashSet<>(); - // RandomIndexWriter is too slow here: - IndexWriter w = new IndexWriter(dir, iwc); - for(int id=0;id 0 && random().nextInt(100) == 42) { - int idToDelete = random().nextInt(id); - w.deleteDocuments(new Term("id", ""+idToDelete)); - deleted.add(idToDelete); - if (VERBOSE) { - System.out.println(" delete id=" + idToDelete); - } - } - } - if (random().nextBoolean()) { - if (VERBOSE) { - System.out.println(" forceMerge(1)"); - } - w.forceMerge(1); - } - final IndexReader r = DirectoryReader.open(w, true); - w.close(); - - // We can't wrap with "exotic" readers because the NumericRangeTreeQuery must see the RangeTreeDVFormat: - IndexSearcher s = newSearcher(r, false); - - int numThreads = TestUtil.nextInt(random(), 2, 5); - - if (VERBOSE) { - System.out.println("TEST: use " + numThreads + " query threads"); - } - - List threads = new ArrayList<>(); - final int iters = atLeast(100); - - final CountDownLatch startingGun = new CountDownLatch(1); - final AtomicBoolean failed = new AtomicBoolean(); - - for(int i=0;i= lower && value <= upper; - } - - private static long randomValue() { - if (valueRange == 0) { - return random().nextLong(); - } else { - return valueMid + TestUtil.nextInt(random(), -valueRange, valueRange); - } - } - - public void testAccountableHasDelegate() throws Exception { - Directory dir = getDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(); - Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat()); - iwc.setCodec(codec); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - Document doc = new Document(); - doc.add(new SortedNumericDocValuesField("value", 187)); - w.addDocument(doc); - IndexReader r = w.getReader(); - - // We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat: - IndexSearcher s = newSearcher(r, false); - // Need to run a query so the DV field is really loaded: - TopDocs hits = s.search(new NumericRangeTreeQuery("value", -30L, true, 187L, true), 1); - assertEquals(1, hits.totalHits); - assertTrue(Accountables.toString((Accountable) r.leaves().get(0).reader()).contains("delegate")); - IOUtils.close(r, w, dir); - } - - public void testMinMaxLong() throws Exception { - Directory dir = getDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(); - Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat()); - iwc.setCodec(codec); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - Document doc = new Document(); - doc.add(new SortedNumericDocValuesField("value", Long.MIN_VALUE)); - w.addDocument(doc); - - doc = new Document(); - doc.add(new SortedNumericDocValuesField("value", Long.MAX_VALUE)); - w.addDocument(doc); - - IndexReader r = w.getReader(); - - // We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat: - IndexSearcher s = newSearcher(r, false); - - assertEquals(1, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, 0L, true))); - assertEquals(1, s.count(new NumericRangeTreeQuery("value", 0L, true, Long.MAX_VALUE, true))); - assertEquals(2, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, true))); - - IOUtils.close(r, w, dir); - } - - public void testBasicSortedSet() throws Exception { - Directory dir = getDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(); - Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat()); - iwc.setCodec(codec); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - Document doc = new Document(); - doc.add(new SortedSetDocValuesField("value", new BytesRef("abc"))); - w.addDocument(doc); - doc = new Document(); - doc.add(new SortedSetDocValuesField("value", new BytesRef("def"))); - w.addDocument(doc); - - IndexReader r = w.getReader(); - - // We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat: - IndexSearcher s = newSearcher(r, false); - - assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("aaa"), true, new BytesRef("bbb"), true))); - assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("c"), true, new BytesRef("e"), true))); - assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("a"), true, new BytesRef("z"), true))); - - assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", null, true, new BytesRef("abc"), true))); - assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("a"), true, new BytesRef("abc"), true))); - assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("a"), true, new BytesRef("abc"), false))); - - assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("def"), true, null, false))); - assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("def"), true, new BytesRef("z"), true))); - assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("def"), false, new BytesRef("z"), true))); - - IOUtils.close(r, w, dir); - } - - public void testLongMinMaxNumeric() throws Exception { - Directory dir = getDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(); - Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat()); - iwc.setCodec(codec); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - Document doc = new Document(); - doc.add(new SortedNumericDocValuesField("value", Long.MIN_VALUE)); - w.addDocument(doc); - doc = new Document(); - doc.add(new SortedNumericDocValuesField("value", Long.MAX_VALUE)); - w.addDocument(doc); - - IndexReader r = w.getReader(); - - // We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat: - IndexSearcher s = newSearcher(r, false); - - assertEquals(2, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, true))); - assertEquals(1, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, false))); - assertEquals(1, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, false, Long.MAX_VALUE, true))); - assertEquals(0, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, false, Long.MAX_VALUE, false))); - - assertEquals(2, s.count(new NumericRangeTreeQuery("value", null, true, null, true))); - - IOUtils.close(r, w, dir); - } - - public void testLongMinMaxSortedSet() throws Exception { - Directory dir = getDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(); - Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat()); - iwc.setCodec(codec); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - Document doc = new Document(); - doc.add(new SortedSetDocValuesField("value", longToBytes(Long.MIN_VALUE))); - w.addDocument(doc); - doc = new Document(); - doc.add(new SortedSetDocValuesField("value", longToBytes(Long.MAX_VALUE))); - w.addDocument(doc); - - IndexReader r = w.getReader(); - - // We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat: - IndexSearcher s = newSearcher(r, false); - - assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), true, longToBytes(Long.MAX_VALUE), true))); - assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), true, longToBytes(Long.MAX_VALUE), false))); - assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), false, longToBytes(Long.MAX_VALUE), true))); - assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), false, longToBytes(Long.MAX_VALUE), false))); - - assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", null, true, null, true))); - - IOUtils.close(r, w, dir); - } - - public void testSortedSetNoOrdsMatch() throws Exception { - Directory dir = getDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(); - Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat()); - iwc.setCodec(codec); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - Document doc = new Document(); - doc.add(new SortedSetDocValuesField("value", new BytesRef("a"))); - w.addDocument(doc); - doc = new Document(); - doc.add(new SortedSetDocValuesField("value", new BytesRef("z"))); - w.addDocument(doc); - - IndexReader r = w.getReader(); - - // We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat: - IndexSearcher s = newSearcher(r, false); - assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("m"), true, new BytesRef("n"), false))); - - assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", null, true, null, true))); - - IOUtils.close(r, w, dir); - } - - public void testNumericNoValuesMatch() throws Exception { - Directory dir = getDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(); - Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat()); - iwc.setCodec(codec); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - Document doc = new Document(); - doc.add(new SortedNumericDocValuesField("value", 17)); - w.addDocument(doc); - doc = new Document(); - doc.add(new SortedNumericDocValuesField("value", 22)); - w.addDocument(doc); - - IndexReader r = w.getReader(); - - // We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat: - IndexSearcher s = newSearcher(r, false); - assertEquals(0, s.count(new NumericRangeTreeQuery("value", 17L, true, 13L, false))); - - IOUtils.close(r, w, dir); - } - - public void testNoDocs() throws Exception { - Directory dir = getDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(); - Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat()); - iwc.setCodec(codec); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - w.addDocument(new Document()); - - IndexReader r = w.getReader(); - - // We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat: - IndexSearcher s = newSearcher(r, false); - assertEquals(0, s.count(new NumericRangeTreeQuery("value", 17L, true, 13L, false))); - - IOUtils.close(r, w, dir); - } - - private static BytesRef longToBytes(long v) { - // Flip the sign bit so negative longs sort before positive longs: - v ^= 0x8000000000000000L; - byte[] bytes = new byte[8]; - bytes[0] = (byte) (v >> 56); - bytes[1] = (byte) (v >> 48); - bytes[2] = (byte) (v >> 40); - bytes[3] = (byte) (v >> 32); - bytes[4] = (byte) (v >> 24); - bytes[5] = (byte) (v >> 16); - bytes[6] = (byte) (v >> 8); - bytes[7] = (byte) v; - return new BytesRef(bytes); - } - - /* - private static long bytesToLong(BytesRef bytes) { - long v = ((bytes.bytes[bytes.offset]&0xFFL) << 56) | - ((bytes.bytes[bytes.offset+1]&0xFFL) << 48) | - ((bytes.bytes[bytes.offset+2]&0xFFL) << 40) | - ((bytes.bytes[bytes.offset+3]&0xFFL) << 32) | - ((bytes.bytes[bytes.offset+4]&0xFFL) << 24) | - ((bytes.bytes[bytes.offset+5]&0xFFL) << 16) | - ((bytes.bytes[bytes.offset+6]&0xFFL) << 8) | - (bytes.bytes[bytes.offset+7]&0xFFL); - // Flip the sign bit back: - return v ^ 0x8000000000000000L; - } - */ - - private static DocValuesFormat getDocValuesFormat() { - int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048); - int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024); - return new RangeTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap); - } - - private static Directory noVirusChecker(Directory dir) { - if (dir instanceof MockDirectoryWrapper) { - ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); - } - return dir; - } - - private static Directory getDirectory() { - return noVirusChecker(newDirectory()); - } -} diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/TestDimensionalQueries.java b/lucene/sandbox/src/test/org/apache/lucene/search/TestDimensionalQueries.java new file mode 100644 index 00000000000..6e9ef994d71 --- /dev/null +++ b/lucene/sandbox/src/test/org/apache/lucene/search/TestDimensionalQueries.java @@ -0,0 +1,124 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.DimensionalLatLonField; +import org.apache.lucene.document.Document; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BaseGeoPointTestCase; +import org.apache.lucene.util.GeoRect; +import org.apache.lucene.util.SloppyMath; + +public class TestDimensionalQueries extends BaseGeoPointTestCase { + + @Override + protected void addPointToDoc(String field, Document doc, double lat, double lon) { + doc.add(new DimensionalLatLonField(field, lat, lon)); + } + + @Override + protected Query newRectQuery(String field, GeoRect rect) { + return new DimensionalPointInRectQuery(field, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon); + } + + @Override + protected Query newDistanceQuery(String field, double centerLat, double centerLon, double radiusMeters) { + // return new BKDDistanceQuery(field, centerLat, centerLon, radiusMeters); + return null; + } + + @Override + protected Query newDistanceRangeQuery(String field, double centerLat, double centerLon, double minRadiusMeters, double radiusMeters) { + return null; + } + + @Override + protected Query newPolygonQuery(String field, double[] lats, double[] lons) { + return new DimensionalPointInPolygonQuery(FIELD_NAME, lats, lons); + } + + @Override + protected Boolean rectContainsPoint(GeoRect rect, double pointLat, double pointLon) { + + assert Double.isNaN(pointLat) == false; + + int rectLatMinEnc = DimensionalLatLonField.encodeLat(rect.minLat); + int rectLatMaxEnc = DimensionalLatLonField.encodeLat(rect.maxLat); + int rectLonMinEnc = DimensionalLatLonField.encodeLon(rect.minLon); + int rectLonMaxEnc = DimensionalLatLonField.encodeLon(rect.maxLon); + + int pointLatEnc = DimensionalLatLonField.encodeLat(pointLat); + int pointLonEnc = DimensionalLatLonField.encodeLon(pointLon); + + if (rect.minLon < rect.maxLon) { + return pointLatEnc >= rectLatMinEnc && + pointLatEnc <= rectLatMaxEnc && + pointLonEnc >= rectLonMinEnc && + pointLonEnc <= rectLonMaxEnc; + } else { + // Rect crosses dateline: + return pointLatEnc >= rectLatMinEnc && + pointLatEnc <= rectLatMaxEnc && + (pointLonEnc >= rectLonMinEnc || + pointLonEnc <= rectLonMaxEnc); + } + } + + private static final double POLY_TOLERANCE = 1e-7; + + @Override + protected Boolean polyRectContainsPoint(GeoRect rect, double pointLat, double pointLon) { + if (Math.abs(rect.minLat-pointLat) < POLY_TOLERANCE || + Math.abs(rect.maxLat-pointLat) < POLY_TOLERANCE || + Math.abs(rect.minLon-pointLon) < POLY_TOLERANCE || + Math.abs(rect.maxLon-pointLon) < POLY_TOLERANCE) { + // The poly check quantizes slightly differently, so we allow for boundary cases to disagree + return null; + } else { + return rectContainsPoint(rect, pointLat, pointLon); + } + } + + @Override + protected Boolean circleContainsPoint(double centerLat, double centerLon, double radiusMeters, double pointLat, double pointLon) { + double distanceKM = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon); + boolean result = distanceKM*1000.0 <= radiusMeters; + //System.out.println(" shouldMatch? centerLon=" + centerLon + " centerLat=" + centerLat + " pointLon=" + pointLon + " pointLat=" + pointLat + " result=" + result + " distanceMeters=" + (distanceKM * 1000)); + return result; + } + + @Override + protected Boolean distanceRangeContainsPoint(double centerLat, double centerLon, double minRadiusMeters, double radiusMeters, double pointLat, double pointLon) { + final double d = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon)*1000.0; + return d >= minRadiusMeters && d <= radiusMeters; + } + + public void testEncodeDecode() throws Exception { + int iters = atLeast(10000); + boolean small = random().nextBoolean(); + for(int iter=0;iter= state.xMin || - cellXMax <= state.xMax || - cellYMin >= state.yMin || - cellYMax <= state.yMax || - cellZMin >= state.zMin || - cellZMax <= state.zMax) { - - // Only call the filter when the current cell does not fully contain the bbox: - Relation r = state.valueFilter.compare(cellXMin, cellXMax, - cellYMin, cellYMax, - cellZMin, cellZMax); - //System.out.println(" relation: " + r); - - if (r == Relation.SHAPE_OUTSIDE_CELL) { - // This cell is fully outside of the query shape: stop recursing - return 0; - } else if (r == Relation.CELL_INSIDE_SHAPE) { - // This cell is fully inside of the query shape: recursively add all points in this cell without filtering - - /* - System.out.println(Thread.currentThread() + ": switch to addAll at cell" + - " x=" + Geo3DDocValuesFormat.decodeValue(cellXMin) + " to " + Geo3DDocValuesFormat.decodeValue(cellXMax) + - " y=" + Geo3DDocValuesFormat.decodeValue(cellYMin) + " to " + Geo3DDocValuesFormat.decodeValue(cellYMax) + - " z=" + Geo3DDocValuesFormat.decodeValue(cellZMin) + " to " + Geo3DDocValuesFormat.decodeValue(cellZMax)); - */ - return addAll(state, nodeID); - } else { - // The cell crosses the shape boundary, so we fall through and do full filtering - } - } else { - // The whole point of the incoming bbox (state.xMin/xMax/etc.) is that it is - // supposed to fully enclose the shape, so this cell we are visiting, which - // fully contains the query's bbox, better in turn fully contain the shape! - assert state.valueFilter.compare(cellXMin, cellXMax, cellYMin, cellYMax, cellZMin, cellZMax) == Relation.SHAPE_INSIDE_CELL: "got " + state.valueFilter.compare(cellXMin, cellXMax, cellYMin, cellYMax, cellZMin, cellZMax); - } - - //System.out.println("\nintersect node=" + nodeID + " vs " + leafNodeOffset); - - if (nodeID >= leafNodeOffset) { - //System.out.println(" leaf"); - // Leaf node; scan and filter all points in this block: - //System.out.println(" intersect leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + leafBlockFPs[nodeID-leafNodeOffset]); - int hitCount = 0; - - long fp = leafBlockFPs[nodeID-leafNodeOffset]; - - /* - System.out.println("I: " + BKDTreeWriter.decodeLat(cellLatMinEnc) - + " " + BKDTreeWriter.decodeLat(cellLatMaxEnc) - + " " + BKDTreeWriter.decodeLon(cellLonMinEnc) - + " " + BKDTreeWriter.decodeLon(cellLonMaxEnc)); - */ - - state.in.seek(fp); - - // How many points are stored in this leaf cell: - int count = state.in.readVInt(); - - state.docs.grow(count); - //System.out.println(" count=" + count); - for(int i=0;i= splitValue) { - //System.out.println(" recurse right"); - count += intersect(state, - 2*nodeID+1, - splitValue, cellXMax, - cellYMin, cellYMax, - cellZMin, cellZMax); - } - - } else if (splitDim == 1) { - // Inner node split on y: - - // System.out.println(" split on lon=" + splitValue); - - // Left node: - if (state.yMin <= splitValue) { - // System.out.println(" recurse left"); - count += intersect(state, - 2*nodeID, - cellXMin, cellXMax, - cellYMin, splitValue, - cellZMin, cellZMax); - } - - // Right node: - if (state.yMax >= splitValue) { - // System.out.println(" recurse right"); - count += intersect(state, - 2*nodeID+1, - cellXMin, cellXMax, - splitValue, cellYMax, - cellZMin, cellZMax); - } - } else { - // Inner node split on z: - - // System.out.println(" split on lon=" + splitValue); - - // Left node: - if (state.zMin <= splitValue) { - // System.out.println(" recurse left"); - count += intersect(state, - 2*nodeID, - cellXMin, cellXMax, - cellYMin, cellYMax, - cellZMin, splitValue); - } - - // Right node: - if (state.zMax >= splitValue) { - // System.out.println(" recurse right"); - count += intersect(state, - 2*nodeID+1, - cellXMin, cellXMax, - cellYMin, cellYMax, - splitValue, cellZMax); - } - } - - return count; - } - } - - @Override - public long ramBytesUsed() { - return splitValues.length * RamUsageEstimator.NUM_BYTES_INT + - leafBlockFPs.length * RamUsageEstimator.NUM_BYTES_LONG; - } -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/BKD3DTreeWriter.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/BKD3DTreeWriter.java deleted file mode 100644 index 57c1012e41c..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/BKD3DTreeWriter.java +++ /dev/null @@ -1,924 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Arrays; -import java.util.Comparator; - -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.store.ByteArrayDataOutput; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.InPlaceMergeSorter; -import org.apache.lucene.util.LongBitSet; -import org.apache.lucene.util.OfflineSorter; -import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter; -import org.apache.lucene.util.RamUsageEstimator; - -// TODO -// - we could also index "auto-prefix terms" here, and use better compression, and maybe only use for the "fully contained" case so we'd -// only index docIDs -// - the index could be efficiently encoded as an FST, so we don't have wasteful -// (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then -// the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points -// per leaf, and you can reduce that by putting more points per leaf -// - we can quantize the split values to 2 bytes (short): http://people.csail.mit.edu/tmertens/papers/qkdtree.pdf -// - we could use threads while building; the higher nodes are very parallelizable -// - generalize to N dimenions? i think there are reasonable use cases here, e.g. -// 2 dimensional points to store houses, plus e.g. 3rd dimension for "household income" - -/** Recursively builds a BKD tree to assign all incoming points to smaller - * and smaller rectangles until the number of points in a given - * rectangle is <= the maxPointsInLeafNode. The tree is - * fully balanced, which means the leaf nodes will have between 50% and 100% of - * the requested maxPointsInLeafNode, except for the adversarial case - * of indexing exactly the same point many times. - * - *

- * See this paper for details. - * - *

This consumes heap during writing: it allocates a LongBitSet(numPoints), - * and for any nodes with fewer than maxPointsSortInHeap, it holds - * the points in memory as simple java arrays. - * - *

- * NOTE: This can write at most Integer.MAX_VALUE * maxPointsInLeafNode total points. - * - * @lucene.experimental */ - -class BKD3DTreeWriter { - - // x (int), y (int), z (int) + ord (long) + docID (int) - static final int BYTES_PER_DOC = RamUsageEstimator.NUM_BYTES_LONG + 4 * RamUsageEstimator.NUM_BYTES_INT; - - //static final boolean DEBUG = false; - - public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 1024; - - /** This works out to max of ~10 MB peak heap tied up during writing: */ - public static final int DEFAULT_MAX_POINTS_SORT_IN_HEAP = 128*1024;; - - private final byte[] scratchBytes = new byte[BYTES_PER_DOC]; - private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes); - - private final Directory tempDir; - private final String tempFileNamePrefix; - - private OfflineSorter.ByteSequencesWriter offlineWriter; - private GrowingHeapWriter heapWriter; - - private IndexOutput tempInput; - private final int maxPointsInLeafNode; - private final int maxPointsSortInHeap; - - private long pointCount; - - private final int[] scratchDocIDs; - - public BKD3DTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException { - this(tempDir, tempFileNamePrefix, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP); - } - - // TODO: instead of maxPointsSortInHeap, change to maxMBHeap ... the mapping is non-obvious: - public BKD3DTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException { - verifyParams(maxPointsInLeafNode, maxPointsSortInHeap); - this.tempDir = tempDir; - this.tempFileNamePrefix = tempFileNamePrefix; - this.maxPointsInLeafNode = maxPointsInLeafNode; - this.maxPointsSortInHeap = maxPointsSortInHeap; - scratchDocIDs = new int[maxPointsInLeafNode]; - - // We write first maxPointsSortInHeap in heap, then cutover to offline for additional points: - heapWriter = new GrowingHeapWriter(maxPointsSortInHeap); - } - - public static void verifyParams(int maxPointsInLeafNode, int maxPointsSortInHeap) { - if (maxPointsInLeafNode <= 0) { - throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode); - } - if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) { - throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode); - } - if (maxPointsSortInHeap < maxPointsInLeafNode) { - throw new IllegalArgumentException("maxPointsSortInHeap must be >= maxPointsInLeafNode; got " + maxPointsSortInHeap + " vs maxPointsInLeafNode="+ maxPointsInLeafNode); - } - if (maxPointsSortInHeap > ArrayUtil.MAX_ARRAY_LENGTH) { - throw new IllegalArgumentException("maxPointsSortInHeap must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsSortInHeap); - } - } - - /** If the current segment has too many points then we switchover to temp files / offline sort. */ - private void switchToOffline() throws IOException { - - // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree: - tempInput = tempDir.createTempOutput(tempFileNamePrefix, "bkd3d", IOContext.DEFAULT); - offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput); - for(int i=0;i= maxPointsSortInHeap) { - if (offlineWriter == null) { - switchToOffline(); - } - scratchBytesOutput.reset(scratchBytes); - scratchBytesOutput.writeInt(x); - scratchBytesOutput.writeInt(y); - scratchBytesOutput.writeInt(z); - scratchBytesOutput.writeVInt(docID); - scratchBytesOutput.writeVLong(pointCount); - offlineWriter.write(scratchBytes, 0, scratchBytes.length); - } else { - // Not too many points added yet, continue using heap: - heapWriter.append(x, y, z, pointCount, docID); - } - - pointCount++; - } - - /** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice - * as we recurse in {@link #build}. */ - private Writer convertToFixedWidth(String in) throws IOException { - BytesRefBuilder scratch = new BytesRefBuilder(); - scratch.grow(BYTES_PER_DOC); - BytesRef bytes = scratch.get(); - ByteArrayDataInput dataReader = new ByteArrayDataInput(); - - OfflineSorter.ByteSequencesReader reader = null; - Writer sortedWriter = null; - boolean success = false; - try { - reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE)); - sortedWriter = getWriter(pointCount); - for (long i=0;i= 0: "docID=" + docID; - sortedWriter.append(x, y, z, ord, docID); - } - success = true; - } finally { - if (success) { - IOUtils.close(sortedWriter, reader); - } else { - IOUtils.closeWhileHandlingException(sortedWriter, reader); - try { - sortedWriter.destroy(); - } catch (Throwable t) { - // Suppress to keep throwing original exc - } - } - } - - return sortedWriter; - } - - /** dim: 0=x, 1=y, 2=z */ - private Writer sort(int dim) throws IOException { - if (heapWriter != null) { - - assert pointCount < Integer.MAX_VALUE; - - // All buffered points are still in heap - new InPlaceMergeSorter() { - @Override - protected void swap(int i, int j) { - int docID = heapWriter.docIDs[i]; - heapWriter.docIDs[i] = heapWriter.docIDs[j]; - heapWriter.docIDs[j] = docID; - - long ord = heapWriter.ords[i]; - heapWriter.ords[i] = heapWriter.ords[j]; - heapWriter.ords[j] = ord; - - int x = heapWriter.xs[i]; - heapWriter.xs[i] = heapWriter.xs[j]; - heapWriter.xs[j] = x; - - int y = heapWriter.ys[i]; - heapWriter.ys[i] = heapWriter.ys[j]; - heapWriter.ys[j] = y; - - int z = heapWriter.zs[i]; - heapWriter.zs[i] = heapWriter.zs[j]; - heapWriter.zs[j] = z; - } - - @Override - protected int compare(int i, int j) { - int cmp; - if (dim == 0) { - cmp = Integer.compare(heapWriter.xs[i], heapWriter.xs[j]); - } else if (dim == 1) { - cmp = Integer.compare(heapWriter.ys[i], heapWriter.ys[j]); - } else { - cmp = Integer.compare(heapWriter.zs[i], heapWriter.zs[j]); - } - if (cmp != 0) { - return cmp; - } - - // Tie-break - cmp = Integer.compare(heapWriter.docIDs[i], heapWriter.docIDs[j]); - if (cmp != 0) { - return cmp; - } - - return Long.compare(heapWriter.ords[i], heapWriter.ords[j]); - } - }.sort(0, (int) pointCount); - - HeapWriter sorted = new HeapWriter((int) pointCount); - //System.out.println("sorted dim=" + dim); - for(int i=0;i= minX && x <= maxX: "x=" + x + " minX=" + minX + " maxX=" + maxX; - - int y = reader.y(); - assert y >= minY && y <= maxY: "y=" + y + " minY=" + minY + " maxY=" + maxY; - - int z = reader.z(); - assert z >= minZ && z <= maxZ: "z=" + z + " minZ=" + minZ + " maxZ=" + maxZ; - - if (splitDim == 0) { - splitValue = x; - } else if (splitDim == 1) { - splitValue = y; - } else { - splitValue = z; - } - success = true; - } finally { - if (success) { - IOUtils.close(reader); - } else { - IOUtils.closeWhileHandlingException(reader); - } - } - - // Mark ords that fall into the left half, and also handle the == boundary case: - assert bitSet.cardinality() == 0: "cardinality=" + bitSet.cardinality(); - - success = false; - reader = source.writer.getReader(source.start); - try { - int lastValue = Integer.MIN_VALUE; - for (int i=0;i= lastValue; - lastValue = value; - - assert value <= splitValue: "i=" + i + " value=" + value + " vs splitValue=" + splitValue; - long ord = reader.ord(); - int docID = reader.docID(); - assert docID >= 0: "docID=" + docID + " reader=" + reader; - - // We should never see dup ords: - assert bitSet.get(ord) == false; - bitSet.set(ord); - } - success = true; - } finally { - if (success) { - IOUtils.close(reader); - } else { - IOUtils.closeWhileHandlingException(reader); - } - } - - assert leftCount == bitSet.cardinality(): "leftCount=" + leftCount + " cardinality=" + bitSet.cardinality(); - - return splitValue; - } - - // Split on the dim with the largest range: - static int getSplitDim(int minX, int maxX, int minY, int maxY, int minZ, int maxZ) { - long xRange = (long) maxX - (long) minX; - long yRange = (long) maxY - (long) minY; - long zRange = (long) maxZ - (long) minZ; - - if (xRange > yRange) { - if (xRange > zRange) { - return 0; - } else { - return 2; - } - } else if (yRange > zRange) { - return 1; - } else { - return 2; - } - } - - /** The incoming PathSlice for the dim we will split is already partitioned/sorted. */ - private void build(int nodeID, int leafNodeOffset, - PathSlice lastXSorted, - PathSlice lastYSorted, - PathSlice lastZSorted, - LongBitSet bitSet, - IndexOutput out, - int minX, int maxX, - int minY, int maxY, - int minZ, int maxZ, - int[] splitValues, - long[] leafBlockFPs) throws IOException { - - long count = lastXSorted.count; - assert count > 0; - assert count <= ArrayUtil.MAX_ARRAY_LENGTH; - - assert count == lastYSorted.count; - assert count == lastZSorted.count; - - //if (DEBUG) System.out.println("\nBUILD: nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset + "\n lastXSorted=" + lastXSorted + "\n lastYSorted=" + lastYSorted + "\n lastZSorted=" + lastZSorted + "\n count=" + lastXSorted.count + " x=" + minX + " TO " + maxX + " y=" + minY + " TO " + maxY + " z=" + minZ + " TO " + maxZ); - - if (nodeID >= leafNodeOffset) { - // Leaf node: write block - //if (DEBUG) System.out.println(" leaf"); - assert maxX >= minX; - assert maxY >= minY; - assert maxZ >= minZ; - - //System.out.println("\nleaf:\n lat range: " + ((long) maxLatEnc-minLatEnc)); - //System.out.println(" lon range: " + ((long) maxLonEnc-minLonEnc)); - - // Sort by docID in the leaf so we get sequentiality at search time (may not matter?): - Reader reader = lastXSorted.writer.getReader(lastXSorted.start); - - assert count <= scratchDocIDs.length: "count=" + count + " scratchDocIDs.length=" + scratchDocIDs.length; - - boolean success = false; - try { - for (int i=0;i 539 MB, but query time for 225 queries went from 1.65 sec -> 2.64 sec. - // I think if we also indexed prefix terms here we could do less costly compression - // on those lists: - int docID = scratchDocIDs[i]; - if (docID != lastDocID) { - out.writeInt(docID); - //System.out.println(" write docID=" + docID); - lastDocID = docID; - } - } - //long endFP = out.getFilePointer(); - //System.out.println(" bytes/doc: " + ((endFP - startFP) / count)); - } else { - - int splitDim = getSplitDim(minX, maxX, minY, maxY, minZ, maxZ); - //System.out.println(" splitDim=" + splitDim); - - PathSlice source; - - if (splitDim == 0) { - source = lastXSorted; - } else if (splitDim == 1) { - source = lastYSorted; - } else { - source = lastZSorted; - } - - // We let ties go to either side, so we should never get down to count == 0, even - // in adversarial case (all values are the same): - assert count > 0; - - // Inner node: partition/recurse - //if (DEBUG) System.out.println(" non-leaf"); - - assert nodeID < splitValues.length: "nodeID=" + nodeID + " splitValues.length=" + splitValues.length; - - int splitValue = markLeftTree(splitDim, source, bitSet, - minX, maxX, - minY, maxY, - minZ, maxZ); - long leftCount = count/2; - - // TODO: we could save split value in here so we don't have to re-open file later: - - // Partition the other (not split) dims into sorted left and right sets, so we can recurse. - // This is somewhat hairy: we partition the next X, Y set according to how we had just - // partitioned the Z set, etc. - - Writer[] leftWriters = new Writer[3]; - Writer[] rightWriters = new Writer[3]; - - for(int dim=0;dim<3;dim++) { - if (dim == splitDim) { - continue; - } - - Writer leftWriter = null; - Writer rightWriter = null; - Reader reader = null; - - boolean success = false; - - int nextLeftCount = 0; - - PathSlice nextSource; - if (dim == 0) { - nextSource = lastXSorted; - } else if (dim == 1) { - nextSource = lastYSorted; - } else { - nextSource = lastZSorted; - } - - try { - leftWriter = getWriter(leftCount); - rightWriter = getWriter(nextSource.count - leftCount); - - assert nextSource.count == count; - reader = nextSource.writer.getReader(nextSource.start); - - // TODO: we could compute the split value here for each sub-tree and save an O(N) pass on recursion, but makes code hairier and only - // changes the constant factor of building, not the big-oh: - for (int i=0;i= 0: "docID=" + docID + " reader=" + reader; - //System.out.println(" i=" + i + " x=" + x + " ord=" + ord + " docID=" + docID); - if (bitSet.get(ord)) { - if (splitDim == 0) { - assert x <= splitValue: "x=" + x + " splitValue=" + splitValue; - } else if (splitDim == 1) { - assert y <= splitValue: "y=" + y + " splitValue=" + splitValue; - } else { - assert z <= splitValue: "z=" + z + " splitValue=" + splitValue; - } - leftWriter.append(x, y, z, ord, docID); - nextLeftCount++; - } else { - if (splitDim == 0) { - assert x >= splitValue: "x=" + x + " splitValue=" + splitValue; - } else if (splitDim == 1) { - assert y >= splitValue: "y=" + y + " splitValue=" + splitValue; - } else { - assert z >= splitValue: "z=" + z + " splitValue=" + splitValue; - } - rightWriter.append(x, y, z, ord, docID); - } - } - success = true; - } finally { - if (success) { - IOUtils.close(reader, leftWriter, rightWriter); - } else { - IOUtils.closeWhileHandlingException(reader, leftWriter, rightWriter); - } - } - - assert leftCount == nextLeftCount: "leftCount=" + leftCount + " nextLeftCount=" + nextLeftCount; - leftWriters[dim] = leftWriter; - rightWriters[dim] = rightWriter; - } - bitSet.clear(0, pointCount); - - long rightCount = count - leftCount; - - boolean success = false; - try { - if (splitDim == 0) { - build(2*nodeID, leafNodeOffset, - new PathSlice(source.writer, source.start, leftCount), - new PathSlice(leftWriters[1], 0, leftCount), - new PathSlice(leftWriters[2], 0, leftCount), - bitSet, - out, - minX, splitValue, - minY, maxY, - minZ, maxZ, - splitValues, leafBlockFPs); - leftWriters[1].destroy(); - leftWriters[2].destroy(); - - build(2*nodeID+1, leafNodeOffset, - new PathSlice(source.writer, source.start+leftCount, rightCount), - new PathSlice(rightWriters[1], 0, rightCount), - new PathSlice(rightWriters[2], 0, rightCount), - bitSet, - out, - splitValue, maxX, - minY, maxY, - minZ, maxZ, - splitValues, leafBlockFPs); - rightWriters[1].destroy(); - rightWriters[2].destroy(); - } else if (splitDim == 1) { - build(2*nodeID, leafNodeOffset, - new PathSlice(leftWriters[0], 0, leftCount), - new PathSlice(source.writer, source.start, leftCount), - new PathSlice(leftWriters[2], 0, leftCount), - bitSet, - out, - minX, maxX, - minY, splitValue, - minZ, maxZ, - splitValues, leafBlockFPs); - leftWriters[0].destroy(); - leftWriters[2].destroy(); - - build(2*nodeID+1, leafNodeOffset, - new PathSlice(rightWriters[0], 0, rightCount), - new PathSlice(source.writer, source.start+leftCount, rightCount), - new PathSlice(rightWriters[2], 0, rightCount), - bitSet, - out, - minX, maxX, - splitValue, maxY, - minZ, maxZ, - splitValues, leafBlockFPs); - rightWriters[0].destroy(); - rightWriters[2].destroy(); - } else { - build(2*nodeID, leafNodeOffset, - new PathSlice(leftWriters[0], 0, leftCount), - new PathSlice(leftWriters[1], 0, leftCount), - new PathSlice(source.writer, source.start, leftCount), - bitSet, - out, - minX, maxX, - minY, maxY, - minZ, splitValue, - splitValues, leafBlockFPs); - leftWriters[0].destroy(); - leftWriters[1].destroy(); - - build(2*nodeID+1, leafNodeOffset, - new PathSlice(rightWriters[0], 0, rightCount), - new PathSlice(rightWriters[1], 0, rightCount), - new PathSlice(source.writer, source.start+leftCount, rightCount), - bitSet, - out, - minX, maxX, - minY, maxY, - splitValue, maxZ, - splitValues, leafBlockFPs); - rightWriters[0].destroy(); - rightWriters[1].destroy(); - } - success = true; - } finally { - if (success == false) { - for(Writer writer : leftWriters) { - if (writer != null) { - try { - writer.destroy(); - } catch (Throwable t) { - // Suppress to keep throwing original exc - } - } - } - for(Writer writer : rightWriters) { - if (writer != null) { - try { - writer.destroy(); - } catch (Throwable t) { - // Suppress to keep throwing original exc - } - } - } - } - } - - splitValues[nodeID] = splitValue; - } - } - - Writer getWriter(long count) throws IOException { - if (count < maxPointsSortInHeap) { - return new HeapWriter((int) count); - } else { - return new OfflineWriter(tempDir, tempFileNamePrefix, count); - } - } -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DBinaryDocValues.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DBinaryDocValues.java deleted file mode 100644 index 875e4f1afa7..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DBinaryDocValues.java +++ /dev/null @@ -1,42 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.util.BytesRef; - -class Geo3DBinaryDocValues extends BinaryDocValues { - final BKD3DTreeReader bkdTreeReader; - final BinaryDocValues delegate; - final double planetMax; - - public Geo3DBinaryDocValues(BKD3DTreeReader bkdTreeReader, BinaryDocValues delegate, double planetMax) { - this.bkdTreeReader = bkdTreeReader; - this.delegate = delegate; - this.planetMax = planetMax; - } - - public BKD3DTreeReader getBKD3DTreeReader() { - return bkdTreeReader; - } - - @Override - public BytesRef get(int docID) { - return delegate.get(docID); - } -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DDocValuesConsumer.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DDocValuesConsumer.java deleted file mode 100644 index 01db9fc23dc..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DDocValuesConsumer.java +++ /dev/null @@ -1,145 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Closeable; -import java.io.IOException; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.geo3d.PlanetModel; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; - -class Geo3DDocValuesConsumer extends DocValuesConsumer implements Closeable { - final DocValuesConsumer delegate; - final int maxPointsInLeafNode; - final int maxPointsSortInHeap; - final IndexOutput out; - final Map fieldIndexFPs = new HashMap<>(); - final SegmentWriteState state; - final Directory tempDir; - final String tempFileNamePrefix; - - public Geo3DDocValuesConsumer(Directory tempDir, String tempFileNamePrefix, PlanetModel planetModel, DocValuesConsumer delegate, - SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException { - BKD3DTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap); - this.tempDir = tempDir; - this.tempFileNamePrefix = tempFileNamePrefix; - this.delegate = delegate; - this.maxPointsInLeafNode = maxPointsInLeafNode; - this.maxPointsSortInHeap = maxPointsSortInHeap; - this.state = state; - String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Geo3DDocValuesFormat.DATA_EXTENSION); - out = state.directory.createOutput(datFileName, state.context); - CodecUtil.writeIndexHeader(out, Geo3DDocValuesFormat.DATA_CODEC_NAME, Geo3DDocValuesFormat.DATA_VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - - // We write the max for this PlanetModel into the index so we know we are decoding correctly at search time, and so we can also do - // best-effort check that the search time PlanetModel "matches": - out.writeLong(Double.doubleToLongBits(planetModel.getMaximumMagnitude())); - } - - @Override - public void close() throws IOException { - boolean success = false; - try { - CodecUtil.writeFooter(out); - success = true; - } finally { - if (success) { - IOUtils.close(delegate, out); - } else { - IOUtils.closeWhileHandlingException(delegate, out); - } - } - - String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Geo3DDocValuesFormat.META_EXTENSION); - IndexOutput metaOut = state.directory.createOutput(metaFileName, state.context); - success = false; - try { - CodecUtil.writeIndexHeader(metaOut, Geo3DDocValuesFormat.META_CODEC_NAME, Geo3DDocValuesFormat.META_VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - metaOut.writeVInt(fieldIndexFPs.size()); - for(Map.Entry ent : fieldIndexFPs.entrySet()) { - metaOut.writeVInt(ent.getKey()); - metaOut.writeVLong(ent.getValue()); - } - CodecUtil.writeFooter(metaOut); - success = true; - } finally { - if (success) { - IOUtils.close(metaOut); - } else { - IOUtils.closeWhileHandlingException(metaOut); - } - } - } - - @Override - public void addSortedNumericField(FieldInfo field, Iterable docToValueCount, Iterable values) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public void addNumericField(FieldInfo field, Iterable values) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public void addBinaryField(FieldInfo field, Iterable values) throws IOException { - delegate.addBinaryField(field, values); - BKD3DTreeWriter writer = new BKD3DTreeWriter(tempDir, tempFileNamePrefix, maxPointsInLeafNode, maxPointsSortInHeap); - Iterator valuesIt = values.iterator(); - for (int docID=0;docID values, Iterable docToOrd) { - throw new UnsupportedOperationException(); - } - - @Override - public void addSortedSetField(FieldInfo field, Iterable values, Iterable docToOrdCount, Iterable ords) { - throw new UnsupportedOperationException(); - } -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DDocValuesFormat.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DDocValuesFormat.java deleted file mode 100644 index 0bb8348770d..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DDocValuesFormat.java +++ /dev/null @@ -1,167 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.codecs.DocValuesFormat; -import org.apache.lucene.codecs.DocValuesProducer; -import org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat; -import org.apache.lucene.geo3d.PlanetModel; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SegmentWriteState; - -import java.io.IOException; - -/** - * A {@link DocValuesFormat} to efficiently index geo-spatial 3D x,y,z points - * from {@link Geo3DPointField} for fast shape intersection queries using - * ({@link PointInGeo3DShapeQuery}) - * - *

This wraps {@link Lucene54DocValuesFormat}, but saves its own BKD tree - * structures to disk for fast query-time intersection. See this paper - * for details. - * - *

The BKD tree slices up 3D x,y,z space into smaller and - * smaller 3D rectangles, until the smallest rectangles have approximately - * between X/2 and X (X default is 1024) points in them, at which point - * such leaf cells are written as a block to disk, while the index tree - * structure records how space was sub-divided is loaded into HEAP - * at search time. At search time, the tree is recursed based on whether - * each of left or right child overlap with the query shape, and once - * a leaf block is reached, all documents in that leaf block are collected - * if the cell is fully enclosed by the query shape, or filtered and then - * collected, if not. - * - *

The index is also quite compact, because docs only appear once in - * the tree (no "prefix terms"). - * - *

In addition to the files written by {@link Lucene54DocValuesFormat}, this format writes: - *

    - *
  1. .kd3d: BKD leaf data and index
  2. - *
  3. .kd3m: BKD metadata
  4. - *
- * - *

The disk format is experimental and free to change suddenly, and this code - * likely has new and exciting bugs! - * - * @lucene.experimental */ - -public class Geo3DDocValuesFormat extends DocValuesFormat { - - static final String DATA_CODEC_NAME = "Geo3DData"; - static final int DATA_VERSION_START = 0; - static final int DATA_VERSION_CURRENT = DATA_VERSION_START; - static final String DATA_EXTENSION = "g3dd"; - - static final String META_CODEC_NAME = "Geo3DMeta"; - static final int META_VERSION_START = 0; - static final int META_VERSION_CURRENT = META_VERSION_START; - static final String META_EXTENSION = "g3dm"; - - private final int maxPointsInLeafNode; - private final int maxPointsSortInHeap; - - private final DocValuesFormat delegate = new Lucene54DocValuesFormat(); - - private final PlanetModel planetModel; - - /** Default constructor */ - public Geo3DDocValuesFormat() { - this(PlanetModel.WGS84, BKD3DTreeWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKD3DTreeWriter.DEFAULT_MAX_POINTS_SORT_IN_HEAP); - } - - /** Creates this with custom configuration. - * - * @param planetModel the {@link PlanetModel} to use; this is only used when writing - * @param maxPointsInLeafNode Maximum number of points in each leaf cell. Smaller values create a deeper tree with larger in-heap index and possibly - * faster searching. The default is 1024. - * @param maxPointsSortInHeap Maximum number of points where in-heap sort can be used. When the number of points exceeds this, a (slower) - * offline sort is used. The default is 128 * 1024. - * - * @lucene.experimental */ - public Geo3DDocValuesFormat(PlanetModel planetModel, int maxPointsInLeafNode, int maxPointsSortInHeap) { - super("BKD3DTree"); - BKD3DTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap); - this.maxPointsInLeafNode = maxPointsInLeafNode; - this.maxPointsSortInHeap = maxPointsSortInHeap; - this.planetModel = planetModel; - } - - @Override - public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException { - return new Geo3DDocValuesConsumer(state.directory, state.segmentInfo.name, planetModel, delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap); - } - - @Override - public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { - return new Geo3DDocValuesProducer(delegate.fieldsProducer(state), state); - } - - /** Clips the incoming value to the allowed min/max range before encoding, instead of throwing an exception. */ - static int encodeValueLenient(double planetMax, double x) { - if (x > planetMax) { - x = planetMax; - } else if (x < -planetMax) { - x = -planetMax; - } - return encodeValue(planetMax, x); - } - - static int encodeValue(double planetMax, double x) { - if (x > planetMax) { - throw new IllegalArgumentException("value=" + x + " is out-of-bounds (greater than planetMax=" + planetMax + ")"); - } - if (x < -planetMax) { - throw new IllegalArgumentException("value=" + x + " is out-of-bounds (less than than -planetMax=" + -planetMax + ")"); - } - long y = Math.round (x * (Integer.MAX_VALUE / planetMax)); - assert y >= Integer.MIN_VALUE; - assert y <= Integer.MAX_VALUE; - - return (int) y; - } - - /** Center decode */ - static double decodeValueCenter(double planetMax, int x) { - return x * (planetMax / Integer.MAX_VALUE); - } - - /** More negative decode, at bottom of cell */ - static double decodeValueMin(double planetMax, int x) { - return (((double)x) - 0.5) * (planetMax / Integer.MAX_VALUE); - } - - /** More positive decode, at top of cell */ - static double decodeValueMax(double planetMax, int x) { - return (((double)x) + 0.5) * (planetMax / Integer.MAX_VALUE); - } - - - static int readInt(byte[] bytes, int offset) { - return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset+1] & 0xFF) << 16) - | ((bytes[offset+2] & 0xFF) << 8) | (bytes[offset+3] & 0xFF); - } - - static void writeInt(int value, byte[] bytes, int offset) { - bytes[offset] = (byte) ((value >> 24) & 0xff); - bytes[offset+1] = (byte) ((value >> 16) & 0xff); - bytes[offset+2] = (byte) ((value >> 8) & 0xff); - bytes[offset+3] = (byte) (value & 0xff); - } -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DDocValuesProducer.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DDocValuesProducer.java deleted file mode 100644 index 983d3d37555..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DDocValuesProducer.java +++ /dev/null @@ -1,177 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; - -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.DocValuesProducer; -import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SortedDocValues; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.store.ChecksumIndexInput; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.RamUsageEstimator; - -class Geo3DDocValuesProducer extends DocValuesProducer { - - private final Map treeReaders = new HashMap<>(); - private final Map fieldToIndexFPs = new HashMap<>(); - - private final IndexInput datIn; - private final AtomicLong ramBytesUsed; - private final int maxDoc; - private final DocValuesProducer delegate; - private final boolean merging; - private final double planetMax; - - public Geo3DDocValuesProducer(DocValuesProducer delegate, SegmentReadState state) throws IOException { - String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Geo3DDocValuesFormat.META_EXTENSION); - ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaFileName, state.context); - CodecUtil.checkIndexHeader(metaIn, Geo3DDocValuesFormat.META_CODEC_NAME, Geo3DDocValuesFormat.META_VERSION_START, Geo3DDocValuesFormat.META_VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - int fieldCount = metaIn.readVInt(); - for(int i=0;i getChildResources() { - List resources = new ArrayList<>(); - for(Map.Entry ent : treeReaders.entrySet()) { - resources.add(Accountables.namedAccountable("field " + ent.getKey(), ent.getValue())); - } - resources.add(Accountables.namedAccountable("delegate", delegate)); - - return resources; - } - - @Override - public synchronized DocValuesProducer getMergeInstance() throws IOException { - return new Geo3DDocValuesProducer(this); - } - - @Override - public long ramBytesUsed() { - return ramBytesUsed.get() + delegate.ramBytesUsed(); - } -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/GrowingHeapWriter.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/GrowingHeapWriter.java deleted file mode 100644 index 2a2949c9a19..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/GrowingHeapWriter.java +++ /dev/null @@ -1,92 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.RamUsageEstimator; - -final class GrowingHeapWriter implements Writer { - int[] xs; - int[] ys; - int[] zs; - int[] docIDs; - long[] ords; - private int nextWrite; - final int maxSize; - - public GrowingHeapWriter(int maxSize) { - xs = new int[16]; - ys = new int[16]; - zs = new int[16]; - docIDs = new int[16]; - ords = new long[16]; - this.maxSize = maxSize; - } - - private int[] growExact(int[] arr, int size) { - assert size > arr.length; - int[] newArr = new int[size]; - System.arraycopy(arr, 0, newArr, 0, arr.length); - return newArr; - } - - private long[] growExact(long[] arr, int size) { - assert size > arr.length; - long[] newArr = new long[size]; - System.arraycopy(arr, 0, newArr, 0, arr.length); - return newArr; - } - - @Override - public void append(int x, int y, int z, long ord, int docID) { - assert ord == nextWrite; - if (xs.length == nextWrite) { - int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT)); - assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite; - xs = growExact(xs, nextSize); - ys = growExact(ys, nextSize); - zs = growExact(zs, nextSize); - ords = growExact(ords, nextSize); - docIDs = growExact(docIDs, nextSize); - } - xs[nextWrite] = x; - ys[nextWrite] = y; - zs[nextWrite] = z; - ords[nextWrite] = ord; - docIDs[nextWrite] = docID; - nextWrite++; - } - - @Override - public Reader getReader(long start) { - return new HeapReader(xs, ys, zs, ords, docIDs, (int) start, nextWrite); - } - - @Override - public void close() { - } - - @Override - public void destroy() { - } - - @Override - public String toString() { - return "GrowingHeapWriter(count=" + nextWrite + " alloc=" + xs.length + ")"; - } -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapReader.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapReader.java deleted file mode 100644 index 76d87db6313..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapReader.java +++ /dev/null @@ -1,73 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -final class HeapReader implements Reader { - private int curRead; - final int[] xs; - final int[] ys; - final int[] zs; - final long[] ords; - final int[] docIDs; - final int end; - - HeapReader(int[] xs, int[] ys, int[] zs, long[] ords, int[] docIDs, int start, int end) { - this.xs = xs; - this.ys = ys; - this.zs = zs; - this.ords = ords; - this.docIDs = docIDs; - curRead = start-1; - this.end = end; - } - - @Override - public boolean next() { - curRead++; - return curRead < end; - } - - @Override - public int x() { - return xs[curRead]; - } - - @Override - public int y() { - return ys[curRead]; - } - - @Override - public int z() { - return zs[curRead]; - } - - @Override - public int docID() { - return docIDs[curRead]; - } - - @Override - public long ord() { - return ords[curRead]; - } - - @Override - public void close() { - } -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapWriter.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapWriter.java deleted file mode 100644 index 9ced7133fb3..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapWriter.java +++ /dev/null @@ -1,69 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -final class HeapWriter implements Writer { - final int[] xs; - final int[] ys; - final int[] zs; - final int[] docIDs; - final long[] ords; - private int nextWrite; - private boolean closed; - - public HeapWriter(int count) { - xs = new int[count]; - ys = new int[count]; - zs = new int[count]; - docIDs = new int[count]; - ords = new long[count]; - } - - @Override - public void append(int x, int y, int z, long ord, int docID) { - xs[nextWrite] = x; - ys[nextWrite] = y; - zs[nextWrite] = z; - ords[nextWrite] = ord; - docIDs[nextWrite] = docID; - nextWrite++; - } - - @Override - public Reader getReader(long start) { - assert closed; - return new HeapReader(xs, ys, zs, ords, docIDs, (int) start, xs.length); - } - - @Override - public void close() { - closed = true; - if (nextWrite != xs.length) { - throw new IllegalStateException("only wrote " + nextWrite + " values, but expected " + xs.length); - } - } - - @Override - public void destroy() { - } - - @Override - public String toString() { - return "HeapWriter(count=" + xs.length + ")"; - } -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineReader.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineReader.java deleted file mode 100644 index cafa49aa6c7..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineReader.java +++ /dev/null @@ -1,84 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; - -final class OfflineReader implements Reader { - final IndexInput in; - long countLeft; - private int x; - private int y; - private int z; - private long ord; - private int docID; - - OfflineReader(Directory tempDir, String tempFileName, long start, long count) throws IOException { - in = tempDir.openInput(tempFileName, IOContext.READONCE); - in.seek(start * BKD3DTreeWriter.BYTES_PER_DOC); - this.countLeft = count; - } - - @Override - public boolean next() throws IOException { - if (countLeft == 0) { - return false; - } - countLeft--; - x = in.readInt(); - y = in.readInt(); - z = in.readInt(); - ord = in.readLong(); - docID = in.readInt(); - return true; - } - - @Override - public int x() { - return x; - } - - @Override - public int y() { - return y; - } - - @Override - public int z() { - return z; - } - - @Override - public long ord() { - return ord; - } - - @Override - public int docID() { - return docID; - } - - @Override - public void close() throws IOException { - in.close(); - } -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineWriter.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineWriter.java deleted file mode 100644 index 5afa16a7371..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineWriter.java +++ /dev/null @@ -1,77 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.store.ByteArrayDataOutput; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexOutput; - -final class OfflineWriter implements Writer { - - final Directory tempDir; - final IndexOutput out; - final byte[] scratchBytes = new byte[BKD3DTreeWriter.BYTES_PER_DOC]; - final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes); - final long count; - private long countWritten; - private boolean closed; - - public OfflineWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException { - this.tempDir = tempDir; - out = tempDir.createTempOutput(tempFileNamePrefix, "bkd3d", IOContext.DEFAULT); - this.count = count; - } - - @Override - public void append(int x, int y, int z, long ord, int docID) throws IOException { - out.writeInt(x); - out.writeInt(y); - out.writeInt(z); - out.writeLong(ord); - out.writeInt(docID); - countWritten++; - } - - @Override - public Reader getReader(long start) throws IOException { - assert closed; - return new OfflineReader(tempDir, out.getName(), start, count-start); - } - - @Override - public void close() throws IOException { - closed = true; - out.close(); - if (count != countWritten) { - throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count); - } - } - - @Override - public void destroy() throws IOException { - tempDir.deleteFile(out.getName()); - } - - @Override - public String toString() { - return "OfflineWriter(count=" + count + " tempFileName=" + out.getName() + ")"; - } -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/PointInGeo3DShapeQuery.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/PointInGeo3DShapeQuery.java deleted file mode 100644 index 7db4972adf7..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/PointInGeo3DShapeQuery.java +++ /dev/null @@ -1,222 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.geo3d.GeoArea; -import org.apache.lucene.geo3d.GeoAreaFactory; -import org.apache.lucene.geo3d.GeoShape; -import org.apache.lucene.geo3d.PlanetModel; -import org.apache.lucene.geo3d.XYZBounds; -import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; -import org.apache.lucene.util.BytesRef; - -import java.io.IOException; - -/** Finds all previously indexed points that fall within the specified polygon. - * - *

The field must be indexed with {@link Geo3DDocValuesFormat}, and {@link Geo3DPointField} added per document. - * - *

Because this implementation cannot intersect each cell with the polygon, it will be costly especially for large polygons, as every - * possible point must be checked. - * - *

NOTE: for fastest performance, this allocates FixedBitSet(maxDoc) for each segment. The score of each hit is the query boost. - * - * @lucene.experimental */ - -public class PointInGeo3DShapeQuery extends Query { - final String field; - final PlanetModel planetModel; - final GeoShape shape; - - /** The lats/lons must be clockwise or counter-clockwise. */ - public PointInGeo3DShapeQuery(PlanetModel planetModel, String field, GeoShape shape) { - this.field = field; - this.planetModel = planetModel; - this.shape = shape; - } - - @Override - public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - - // I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be - // used in the first pass: - - return new ConstantScoreWeight(this) { - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - LeafReader reader = context.reader(); - BinaryDocValues bdv = reader.getBinaryDocValues(field); - if (bdv == null) { - // No docs in this segment had this field - return null; - } - - if (bdv instanceof Geo3DBinaryDocValues == false) { - throw new IllegalStateException("field \"" + field + "\" was not indexed with Geo3DBinaryDocValuesFormat: got: " + bdv); - } - final Geo3DBinaryDocValues treeDV = (Geo3DBinaryDocValues) bdv; - BKD3DTreeReader tree = treeDV.getBKD3DTreeReader(); - - XYZBounds bounds = new XYZBounds(); - shape.getBounds(bounds); - - final double planetMax = planetModel.getMaximumMagnitude(); - if (planetMax != treeDV.planetMax) { - throw new IllegalStateException(planetModel + " is not the same one used during indexing: planetMax=" + planetMax + " vs indexing planetMax=" + treeDV.planetMax); - } - - /* - GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel, - bounds.getMinimumX(), - bounds.getMaximumX(), - bounds.getMinimumY(), - bounds.getMaximumY(), - bounds.getMinimumZ(), - bounds.getMaximumZ()); - - assert xyzSolid.getRelationship(shape) == GeoArea.WITHIN || xyzSolid.getRelationship(shape) == GeoArea.OVERLAPS: "expected WITHIN (1) or OVERLAPS (2) but got " + xyzSolid.getRelationship(shape) + "; shape="+shape+"; XYZSolid="+xyzSolid; - */ - - DocIdSet result = tree.intersect(Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMinimumX()), - Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMaximumX()), - Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMinimumY()), - Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMaximumY()), - Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMinimumZ()), - Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMaximumZ()), - new BKD3DTreeReader.ValueFilter() { - @Override - public boolean accept(int docID) { - //System.out.println(" accept? docID=" + docID); - BytesRef bytes = treeDV.get(docID); - if (bytes == null) { - //System.out.println(" false (null)"); - return false; - } - - assert bytes.length == 12; - double x = Geo3DDocValuesFormat.decodeValueCenter(treeDV.planetMax, Geo3DDocValuesFormat.readInt(bytes.bytes, bytes.offset)); - double y = Geo3DDocValuesFormat.decodeValueCenter(treeDV.planetMax, Geo3DDocValuesFormat.readInt(bytes.bytes, bytes.offset+4)); - double z = Geo3DDocValuesFormat.decodeValueCenter(treeDV.planetMax, Geo3DDocValuesFormat.readInt(bytes.bytes, bytes.offset+8)); - // System.out.println(" accept docID=" + docID + " point: x=" + x + " y=" + y + " z=" + z); - - // True if x,y,z is within shape - //System.out.println(" x=" + x + " y=" + y + " z=" + z); - //System.out.println(" ret: " + shape.isWithin(x, y, z)); - - return shape.isWithin(x, y, z); - } - - @Override - public BKD3DTreeReader.Relation compare(int cellXMinEnc, int cellXMaxEnc, int cellYMinEnc, int cellYMaxEnc, int cellZMinEnc, int cellZMaxEnc) { - assert cellXMinEnc <= cellXMaxEnc; - assert cellYMinEnc <= cellYMaxEnc; - assert cellZMinEnc <= cellZMaxEnc; - - // Because the BKD tree operates in quantized (64 bit -> 32 bit) space, and the cell bounds - // here are inclusive, we need to extend the bounds to the largest un-quantized values that - // could quantize into these bounds. The encoding (Geo3DDocValuesFormat.encodeValue) does - // a Math.round from double to long, so e.g. 1.4 -> 1, and -1.4 -> -1: - double cellXMin = Geo3DDocValuesFormat.decodeValueMin(treeDV.planetMax, cellXMinEnc); - double cellXMax = Geo3DDocValuesFormat.decodeValueMax(treeDV.planetMax, cellXMaxEnc); - double cellYMin = Geo3DDocValuesFormat.decodeValueMin(treeDV.planetMax, cellYMinEnc); - double cellYMax = Geo3DDocValuesFormat.decodeValueMax(treeDV.planetMax, cellYMaxEnc); - double cellZMin = Geo3DDocValuesFormat.decodeValueMin(treeDV.planetMax, cellZMinEnc); - double cellZMax = Geo3DDocValuesFormat.decodeValueMax(treeDV.planetMax, cellZMaxEnc); - //System.out.println(" compare: x=" + cellXMin + "-" + cellXMax + " y=" + cellYMin + "-" + cellYMax + " z=" + cellZMin + "-" + cellZMax); - - GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel, cellXMin, cellXMax, cellYMin, cellYMax, cellZMin, cellZMax); - - switch(xyzSolid.getRelationship(shape)) { - case GeoArea.CONTAINS: - // Shape fully contains the cell - //System.out.println(" inside"); - return BKD3DTreeReader.Relation.CELL_INSIDE_SHAPE; - case GeoArea.OVERLAPS: - // They do overlap but neither contains the other: - //System.out.println(" crosses1"); - return BKD3DTreeReader.Relation.SHAPE_CROSSES_CELL; - case GeoArea.WITHIN: - // Cell fully contains the shape: - //System.out.println(" crosses2"); - return BKD3DTreeReader.Relation.SHAPE_INSIDE_CELL; - case GeoArea.DISJOINT: - // They do not overlap at all - //System.out.println(" outside"); - return BKD3DTreeReader.Relation.SHAPE_OUTSIDE_CELL; - default: - assert false; - return BKD3DTreeReader.Relation.SHAPE_CROSSES_CELL; - } - } - }); - - final DocIdSetIterator disi = result.iterator(); - - return new ConstantScoreScorer(this, score(), disi); - } - }; - } - - @Override - @SuppressWarnings({"unchecked","rawtypes"}) - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - if (!super.equals(o)) return false; - - PointInGeo3DShapeQuery that = (PointInGeo3DShapeQuery) o; - - return planetModel.equals(that.planetModel) && shape.equals(that.shape); - } - - @Override - public final int hashCode() { - int result = super.hashCode(); - result = 31 * result + planetModel.hashCode(); - result = 31 * result + shape.hashCode(); - return result; - } - - @Override - public String toString(String field) { - final StringBuilder sb = new StringBuilder(); - sb.append(getClass().getSimpleName()); - sb.append(':'); - if (this.field.equals(field) == false) { - sb.append(" field="); - sb.append(this.field); - sb.append(':'); - } - sb.append("PlanetModel: "); - sb.append(planetModel); - sb.append(" Shape: "); - sb.append(shape); - return sb.toString(); - } -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Reader.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Reader.java deleted file mode 100644 index a43eabec10e..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Reader.java +++ /dev/null @@ -1,31 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Closeable; -import java.io.IOException; - -/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */ -interface Reader extends Closeable { - boolean next() throws IOException; - int x(); - int y(); - int z(); - long ord(); - int docID(); -} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Writer.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Writer.java deleted file mode 100644 index 5e366be20ab..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Writer.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.apache.lucene.bkdtree3d; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Closeable; -import java.io.IOException; - -/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */ -interface Writer extends Closeable { - void append(int x, int y, int z, long ord, int docID) throws IOException; - Reader getReader(long start) throws IOException; - void destroy() throws IOException; -} - diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/package-info.java b/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/package-info.java deleted file mode 100644 index dcafb757d22..00000000000 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/package-info.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Fast "indexed point inside geo3d shape" query implementation. - */ -package org.apache.lucene.bkdtree3d; diff --git a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DPointField.java b/lucene/spatial3d/src/java/org/apache/lucene/geo3d/Geo3DPointField.java similarity index 76% rename from lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DPointField.java rename to lucene/spatial3d/src/java/org/apache/lucene/geo3d/Geo3DPointField.java index 8be2a7930ad..836546f23ee 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Geo3DPointField.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/geo3d/Geo3DPointField.java @@ -1,4 +1,4 @@ -package org.apache.lucene.bkdtree3d; +package org.apache.lucene.geo3d; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -17,24 +17,23 @@ package org.apache.lucene.bkdtree3d; * limitations under the License. */ -import org.apache.lucene.geo3d.PlanetModel; -import org.apache.lucene.geo3d.GeoPoint; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; -import org.apache.lucene.index.DocValuesType; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.bkd.BKDUtil; -// TODO: allow multi-valued, packing all points into a single BytesRef - -/** Add this to a document to index lat/lon point, but be sure to use {@link Geo3DDocValuesFormat} for the field. - +/** Add this to a document to index lat/lon or x/y/z point, indexed as a dimensional value. + * Multiple values are allowed: just add multiple Geo3DPointField to the document with the + * same field name. + * * @lucene.experimental */ public final class Geo3DPointField extends Field { /** Indexing {@link FieldType}. */ public static final FieldType TYPE = new FieldType(); static { - TYPE.setDocValuesType(DocValuesType.BINARY); + TYPE.setDimensions(3, RamUsageEstimator.NUM_BYTES_INT); TYPE.freeze(); } @@ -62,9 +61,9 @@ public final class Geo3DPointField extends Field { private void fillFieldsData(double planetMax, double x, double y, double z) { byte[] bytes = new byte[12]; - Geo3DDocValuesFormat.writeInt(Geo3DDocValuesFormat.encodeValue(planetMax, x), bytes, 0); - Geo3DDocValuesFormat.writeInt(Geo3DDocValuesFormat.encodeValue(planetMax, y), bytes, 4); - Geo3DDocValuesFormat.writeInt(Geo3DDocValuesFormat.encodeValue(planetMax, z), bytes, 8); + BKDUtil.intToBytes(Geo3DUtil.encodeValue(planetMax, x), bytes, 0); + BKDUtil.intToBytes(Geo3DUtil.encodeValue(planetMax, y), bytes, 1); + BKDUtil.intToBytes(Geo3DUtil.encodeValue(planetMax, z), bytes, 2); fieldsData = new BytesRef(bytes); } } diff --git a/lucene/spatial3d/src/java/org/apache/lucene/geo3d/Geo3DUtil.java b/lucene/spatial3d/src/java/org/apache/lucene/geo3d/Geo3DUtil.java new file mode 100644 index 00000000000..10076cdc919 --- /dev/null +++ b/lucene/spatial3d/src/java/org/apache/lucene/geo3d/Geo3DUtil.java @@ -0,0 +1,60 @@ +package org.apache.lucene.geo3d; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +class Geo3DUtil { + + /** Clips the incoming value to the allowed min/max range before encoding, instead of throwing an exception. */ + public static int encodeValueLenient(double planetMax, double x) { + if (x > planetMax) { + x = planetMax; + } else if (x < -planetMax) { + x = -planetMax; + } + return encodeValue(planetMax, x); + } + + public static int encodeValue(double planetMax, double x) { + if (x > planetMax) { + throw new IllegalArgumentException("value=" + x + " is out-of-bounds (greater than planetMax=" + planetMax + ")"); + } + if (x < -planetMax) { + throw new IllegalArgumentException("value=" + x + " is out-of-bounds (less than than -planetMax=" + -planetMax + ")"); + } + long y = Math.round (x * (Integer.MAX_VALUE / planetMax)); + assert y >= Integer.MIN_VALUE; + assert y <= Integer.MAX_VALUE; + + return (int) y; + } + + /** Center decode */ + public static double decodeValueCenter(double planetMax, int x) { + return x * (planetMax / Integer.MAX_VALUE); + } + + /** More negative decode, at bottom of cell */ + public static double decodeValueMin(double planetMax, int x) { + return (((double)x) - 0.5) * (planetMax / Integer.MAX_VALUE); + } + + /** More positive decode, at top of cell */ + public static double decodeValueMax(double planetMax, int x) { + return (((double)x) + 0.5) * (planetMax / Integer.MAX_VALUE); + } +} diff --git a/lucene/spatial3d/src/java/org/apache/lucene/geo3d/PointInGeo3DShapeQuery.java b/lucene/spatial3d/src/java/org/apache/lucene/geo3d/PointInGeo3DShapeQuery.java new file mode 100644 index 00000000000..e9e8f34d1d8 --- /dev/null +++ b/lucene/spatial3d/src/java/org/apache/lucene/geo3d/PointInGeo3DShapeQuery.java @@ -0,0 +1,205 @@ +package org.apache.lucene.geo3d; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.DimensionalValues; +import org.apache.lucene.index.DimensionalValues.IntersectVisitor; +import org.apache.lucene.index.DimensionalValues.Relation; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.DocIdSetBuilder; +import org.apache.lucene.util.bkd.BKDUtil; + +/** Finds all previously indexed points that fall within the specified polygon. + * + *

The field must be indexed using {@link Geo3DPointField}. + * + * @lucene.experimental */ + +public class PointInGeo3DShapeQuery extends Query { + final String field; + final PlanetModel planetModel; + final GeoShape shape; + + /** The lats/lons must be clockwise or counter-clockwise. */ + public PointInGeo3DShapeQuery(PlanetModel planetModel, String field, GeoShape shape) { + this.field = field; + this.planetModel = planetModel; + this.shape = shape; + } + + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + + // I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be + // used in the first pass: + + return new ConstantScoreWeight(this) { + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + LeafReader reader = context.reader(); + DimensionalValues values = reader.getDimensionalValues(); + if (values == null) { + return null; + } + + /* + XYZBounds bounds = new XYZBounds(); + shape.getBounds(bounds); + + final double planetMax = planetModel.getMaximumMagnitude(); + if (planetMax != treeDV.planetMax) { + throw new IllegalStateException(planetModel + " is not the same one used during indexing: planetMax=" + planetMax + " vs indexing planetMax=" + treeDV.planetMax); + } + */ + + /* + GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel, + bounds.getMinimumX(), + bounds.getMaximumX(), + bounds.getMinimumY(), + bounds.getMaximumY(), + bounds.getMinimumZ(), + bounds.getMaximumZ()); + + assert xyzSolid.getRelationship(shape) == GeoArea.WITHIN || xyzSolid.getRelationship(shape) == GeoArea.OVERLAPS: "expected WITHIN (1) or OVERLAPS (2) but got " + xyzSolid.getRelationship(shape) + "; shape="+shape+"; XYZSolid="+xyzSolid; + */ + + double planetMax = planetModel.getMaximumMagnitude(); + + DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); + + int[] hitCount = new int[1]; + values.intersect(field, + new IntersectVisitor() { + + @Override + public void visit(int docID) { + result.add(docID); + hitCount[0]++; + } + + @Override + public void visit(int docID, byte[] packedValue) { + assert packedValue.length == 12; + double x = Geo3DUtil.decodeValueCenter(planetMax, BKDUtil.bytesToInt(packedValue, 0)); + double y = Geo3DUtil.decodeValueCenter(planetMax, BKDUtil.bytesToInt(packedValue, 1)); + double z = Geo3DUtil.decodeValueCenter(planetMax, BKDUtil.bytesToInt(packedValue, 2)); + if (shape.isWithin(x, y, z)) { + result.add(docID); + hitCount[0]++; + } + } + + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + // Because the dimensional format operates in quantized (64 bit -> 32 bit) space, and the cell bounds + // here are inclusive, we need to extend the bounds to the largest un-quantized values that + // could quantize into these bounds. The encoding (Geo3DUtil.encodeValue) does + // a Math.round from double to long, so e.g. 1.4 -> 1, and -1.4 -> -1: + double xMin = Geo3DUtil.decodeValueMin(planetMax, BKDUtil.bytesToInt(minPackedValue, 0)); + double xMax = Geo3DUtil.decodeValueMax(planetMax, BKDUtil.bytesToInt(maxPackedValue, 0)); + double yMin = Geo3DUtil.decodeValueMin(planetMax, BKDUtil.bytesToInt(minPackedValue, 1)); + double yMax = Geo3DUtil.decodeValueMax(planetMax, BKDUtil.bytesToInt(maxPackedValue, 1)); + double zMin = Geo3DUtil.decodeValueMin(planetMax, BKDUtil.bytesToInt(minPackedValue, 2)); + double zMax = Geo3DUtil.decodeValueMax(planetMax, BKDUtil.bytesToInt(maxPackedValue, 2)); + + //System.out.println(" compare: x=" + cellXMin + "-" + cellXMax + " y=" + cellYMin + "-" + cellYMax + " z=" + cellZMin + "-" + cellZMax); + assert xMin <= xMax; + assert yMin <= yMax; + assert zMin <= zMax; + + GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel, xMin, xMax, yMin, yMax, zMin, zMax); + + switch(xyzSolid.getRelationship(shape)) { + case GeoArea.CONTAINS: + // Shape fully contains the cell + //System.out.println(" inside"); + return Relation.CELL_INSIDE_QUERY; + case GeoArea.OVERLAPS: + // They do overlap but neither contains the other: + //System.out.println(" crosses1"); + return Relation.CELL_CROSSES_QUERY; + case GeoArea.WITHIN: + // Cell fully contains the shape: + //System.out.println(" crosses2"); + // return Relation.SHAPE_INSIDE_CELL; + return Relation.CELL_CROSSES_QUERY; + case GeoArea.DISJOINT: + // They do not overlap at all + //System.out.println(" outside"); + return Relation.CELL_OUTSIDE_QUERY; + default: + assert false; + return Relation.CELL_CROSSES_QUERY; + } + } + }); + + // NOTE: hitCount[0] will be over-estimate in multi-valued case + return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator()); + } + }; + } + + @Override + @SuppressWarnings({"unchecked","rawtypes"}) + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + + PointInGeo3DShapeQuery that = (PointInGeo3DShapeQuery) o; + + return planetModel.equals(that.planetModel) && shape.equals(that.shape); + } + + @Override + public final int hashCode() { + int result = super.hashCode(); + result = 31 * result + planetModel.hashCode(); + result = 31 * result + shape.hashCode(); + return result; + } + + @Override + public String toString(String field) { + final StringBuilder sb = new StringBuilder(); + sb.append(getClass().getSimpleName()); + sb.append(':'); + if (this.field.equals(field) == false) { + sb.append(" field="); + sb.append(this.field); + sb.append(':'); + } + sb.append("PlanetModel: "); + sb.append(planetModel); + sb.append(" Shape: "); + sb.append(shape); + return sb.toString(); + } +} diff --git a/lucene/spatial3d/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/lucene/spatial3d/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat deleted file mode 100644 index c098dbd9968..00000000000 --- a/lucene/spatial3d/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat +++ /dev/null @@ -1,17 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat - diff --git a/lucene/spatial3d/src/test/org/apache/lucene/bkdtree3d/TestGeo3DPointField.java b/lucene/spatial3d/src/test/org/apache/lucene/geo3d/TestGeo3DPointField.java similarity index 63% rename from lucene/spatial3d/src/test/org/apache/lucene/bkdtree3d/TestGeo3DPointField.java rename to lucene/spatial3d/src/test/org/apache/lucene/geo3d/TestGeo3DPointField.java index 8ad23ccc6ef..7ef605e114b 100644 --- a/lucene/spatial3d/src/test/org/apache/lucene/bkdtree3d/TestGeo3DPointField.java +++ b/lucene/spatial3d/src/test/org/apache/lucene/geo3d/TestGeo3DPointField.java @@ -1,4 +1,4 @@ -package org.apache.lucene.bkdtree3d; +package org.apache.lucene.geo3d; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -28,21 +28,15 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.DocValuesFormat; -import org.apache.lucene.codecs.lucene60.Lucene60Codec; +import org.apache.lucene.codecs.DimensionalFormat; +import org.apache.lucene.codecs.DimensionalReader; +import org.apache.lucene.codecs.DimensionalWriter; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.lucene60.Lucene60DimensionalReader; +import org.apache.lucene.codecs.lucene60.Lucene60DimensionalWriter; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.geo3d.GeoArea; -import org.apache.lucene.geo3d.GeoAreaFactory; -import org.apache.lucene.geo3d.GeoBBoxFactory; -import org.apache.lucene.geo3d.GeoCircleFactory; -import org.apache.lucene.geo3d.GeoPath; -import org.apache.lucene.geo3d.GeoPoint; -import org.apache.lucene.geo3d.GeoPolygonFactory; -import org.apache.lucene.geo3d.GeoShape; -import org.apache.lucene.geo3d.PlanetModel; -import org.apache.lucene.geo3d.XYZBounds; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; @@ -50,16 +44,13 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Term; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; @@ -69,12 +60,6 @@ import org.junit.BeforeClass; import com.carrotsearch.randomizedtesting.generators.RandomInts; -import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueCenter; -import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueMax; -import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueMin; -import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.encodeValue; -import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.encodeValueLenient; - public class TestGeo3DPointField extends LuceneTestCase { private static boolean smallBBox; @@ -87,12 +72,39 @@ public class TestGeo3DPointField extends LuceneTestCase { } } + private static Codec getCodec() { + if (Codec.getDefault().getName().equals("Lucene60")) { + int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048); + double maxMBSortInHeap = 0.1 + (3*random().nextDouble()); + if (VERBOSE) { + System.out.println("TEST: using Lucene60DimensionalFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap); + } + + return new FilterCodec("Lucene60", Codec.getDefault()) { + @Override + public DimensionalFormat dimensionalFormat() { + return new DimensionalFormat() { + @Override + public DimensionalWriter fieldsWriter(SegmentWriteState writeState) throws IOException { + return new Lucene60DimensionalWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap); + } + + @Override + public DimensionalReader fieldsReader(SegmentReadState readState) throws IOException { + return new Lucene60DimensionalReader(readState); + } + }; + } + }; + } else { + return Codec.getDefault(); + } + } + public void testBasic() throws Exception { Directory dir = getDirectory(); - int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048); - int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024); IndexWriterConfig iwc = newIndexWriterConfig(); - iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Geo3DDocValuesFormat(PlanetModel.WGS84, maxPointsInLeaf, maxPointsSortInHeap))); + iwc.setCodec(getCodec()); IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new Geo3DPointField("field", PlanetModel.WGS84, toRadians(50.7345267), toRadians(-97.5303555))); @@ -108,126 +120,10 @@ public class TestGeo3DPointField extends LuceneTestCase { dir.close(); } - public void testPlanetModelChanged() throws Exception { - Directory dir = getDirectory(); - int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048); - int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024); - IndexWriterConfig iwc = newIndexWriterConfig(); - iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Geo3DDocValuesFormat(PlanetModel.WGS84, maxPointsInLeaf, maxPointsSortInHeap))); - IndexWriter w = new IndexWriter(dir, iwc); - Document doc = new Document(); - doc.add(new Geo3DPointField("field", PlanetModel.WGS84, toRadians(50.7345267), toRadians(-97.5303555))); - w.addDocument(doc); - IndexReader r = DirectoryReader.open(w, true); - IndexSearcher s = new IndexSearcher(r); - try { - s.search(new PointInGeo3DShapeQuery(PlanetModel.SPHERE, - "field", - GeoCircleFactory.makeGeoCircle(PlanetModel.WGS84, toRadians(50), toRadians(-97), Math.PI/180.)), 1); - fail("did not hit exc"); - } catch (IllegalStateException ise) { - // expected - } - w.close(); - r.close(); - dir.close(); - } - private static double toRadians(double degrees) { return Math.PI*(degrees/360.0); } - public void testBKDBasic() throws Exception { - Directory dir = getDirectory(); - IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT); - - BKD3DTreeWriter w = new BKD3DTreeWriter(dir, "bkd3d"); - - w.add(0, 0, 0, 0); - w.add(1, 1, 1, 1); - w.add(-1, -1, -1, 2); - - long indexFP = w.finish(out); - out.close(); - - IndexInput in = dir.openInput("bkd", IOContext.DEFAULT); - in.seek(indexFP); - BKD3DTreeReader r = new BKD3DTreeReader(in, 3); - - DocIdSet hits = r.intersect(Integer.MIN_VALUE, Integer.MAX_VALUE, - Integer.MIN_VALUE, Integer.MAX_VALUE, - Integer.MIN_VALUE, Integer.MAX_VALUE, - - new BKD3DTreeReader.ValueFilter() { - - @Override - public boolean accept(int docID) { - return true; - } - - @Override - public BKD3DTreeReader.Relation compare(int xMin, int xMax, - int yMin, int yMax, - int zMin, int zMax) { - return BKD3DTreeReader.Relation.SHAPE_INSIDE_CELL; - } - - }); - DocIdSetIterator disi = hits.iterator(); - assertEquals(0, disi.nextDoc()); - assertEquals(1, disi.nextDoc()); - assertEquals(2, disi.nextDoc()); - assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc()); - in.close(); - dir.close(); - } - - static class Point { - final double x; - final double y; - final double z; - - public Point(double x, double y, double z) { - this.x = x; - this.y = y; - this.z = z; - } - - @Override - public String toString() { - return "x=" + x + " y=" + y + " z=" + z; - } - } - - private static class Range { - final double min; - final double max; - - public Range(double min, double max) { - this.min = min; - this.max = max; - } - - @Override - public String toString() { - return min + " TO " + max; - } - } - - private double randomCoord(PlanetModel planetModel) { - return planetModel.getMaximumMagnitude() * 2*(random().nextDouble()-0.5); - } - - private Range randomRange(PlanetModel planetModel) { - double x = randomCoord(planetModel); - double y = randomCoord(planetModel); - if (x < y) { - return new Range(x, y); - } else { - return new Range(y, x); - } - } - private static PlanetModel getPlanetModel() { if (random().nextBoolean()) { // Use one of the earth models: @@ -243,161 +139,6 @@ public class TestGeo3DPointField extends LuceneTestCase { } } - public void testBKDRandom() throws Exception { - List points = new ArrayList<>(); - int numPoints = atLeast(10000); - Directory dir = getDirectory(); - IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT); - int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048); - - int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024); - - PlanetModel planetModel = getPlanetModel(); - final double planetMax = planetModel.getMaximumMagnitude(); - - BKD3DTreeWriter w = new BKD3DTreeWriter(dir, "bkd3d", maxPointsInLeaf, maxPointsSortInHeap); - for(int docID=0;docID 0 && random().nextInt(30) == 17) { - // Dup point - point = points.get(random().nextInt(points.size())); - } else { - point = new Point(randomCoord(planetModel), - randomCoord(planetModel), - randomCoord(planetModel)); - } - - if (VERBOSE) { - System.err.println(" docID=" + docID + " point=" + point); - System.err.println(" x=" + encodeValue(planetMax, point.x) + - " y=" + encodeValue(planetMax, point.y) + - " z=" + encodeValue(planetMax, point.z)); - } - - points.add(point); - w.add(encodeValue(planetMax, point.x), - encodeValue(planetMax, point.y), - encodeValue(planetMax, point.z), - docID); - } - - long indexFP = w.finish(out); - out.close(); - - IndexInput in = dir.openInput("bkd", IOContext.DEFAULT); - in.seek(indexFP); - BKD3DTreeReader r = new BKD3DTreeReader(in, numPoints); - - int numIters = atLeast(100); - for(int iter=0;iter xMaxEnc || cellXMax < xMinEnc) { - return BKD3DTreeReader.Relation.SHAPE_OUTSIDE_CELL; - } - if (cellYMin > yMaxEnc || cellYMax < yMinEnc) { - return BKD3DTreeReader.Relation.SHAPE_OUTSIDE_CELL; - } - if (cellZMin > zMaxEnc || cellZMax < zMinEnc) { - return BKD3DTreeReader.Relation.SHAPE_OUTSIDE_CELL; - } - - if (cellXMin >= xMinEnc && cellXMax <= xMaxEnc && - cellYMin >= yMinEnc && cellYMax <= yMaxEnc && - cellZMin >= zMinEnc && cellZMax <= zMaxEnc) { - return BKD3DTreeReader.Relation.CELL_INSIDE_SHAPE; - } - - if (xMinEnc >= cellXMin && xMaxEnc <= cellXMax && - yMinEnc >= cellYMin && yMaxEnc <= cellYMax && - zMinEnc >= cellZMin && zMaxEnc <= cellZMax) { - return BKD3DTreeReader.Relation.SHAPE_INSIDE_CELL; - } - - return BKD3DTreeReader.Relation.SHAPE_CROSSES_CELL; - } - }); - - DocIdSetIterator disi = hits.iterator(); - FixedBitSet matches = new FixedBitSet(numPoints); - while (true) { - int nextHit = disi.nextDoc(); - if (nextHit == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - matches.set(nextHit); - } - if (VERBOSE) { - System.err.println(" total hits: " + matches.cardinality()); - } - - for(int docID=0;docID= xMinEnc && xEnc <= xMaxEnc && - yEnc >= yMinEnc && yEnc <= yMaxEnc && - zEnc >= zMinEnc && zEnc <= zMaxEnc; - - if (expected != actual) { - System.out.println("docID=" + docID + " is wrong: expected=" + expected + " actual=" + actual); - System.out.println(" x=" + point.x + " (" + xEnc + ")" + " y=" + point.y + " (" + yEnc + ")" + " z=" + point.z + " (" + zEnc + ")"); - fail("wrong match"); - } - } - } - - in.close(); - dir.close(); - } - private static class Cell { static int nextCellID; @@ -426,9 +167,9 @@ public class TestGeo3DPointField extends LuceneTestCase { /** Returns true if the quantized point lies within this cell, inclusive on all bounds. */ public boolean contains(double planetMax, GeoPoint point) { - int docX = encodeValue(planetMax, point.x); - int docY = encodeValue(planetMax, point.y); - int docZ = encodeValue(planetMax, point.z); + int docX = Geo3DUtil.encodeValue(planetMax, point.x); + int docY = Geo3DUtil.encodeValue(planetMax, point.y); + int docZ = Geo3DUtil.encodeValue(planetMax, point.z); return docX >= xMinEnc && docX <= xMaxEnc && docY >= yMinEnc && docY <= yMaxEnc && @@ -442,9 +183,9 @@ public class TestGeo3DPointField extends LuceneTestCase { } private static GeoPoint quantize(double planetMax, GeoPoint point) { - return new GeoPoint(decodeValueCenter(planetMax, encodeValue(planetMax, point.x)), - decodeValueCenter(planetMax, encodeValue(planetMax, point.y)), - decodeValueCenter(planetMax, encodeValue(planetMax, point.z))); + return new GeoPoint(Geo3DUtil.decodeValueCenter(planetMax, Geo3DUtil.encodeValue(planetMax, point.x)), + Geo3DUtil.decodeValueCenter(planetMax, Geo3DUtil.encodeValue(planetMax, point.y)), + Geo3DUtil.decodeValueCenter(planetMax, Geo3DUtil.encodeValue(planetMax, point.z))); } /** Tests consistency of GeoArea.getRelationship vs GeoShape.isWithin */ @@ -488,12 +229,12 @@ public class TestGeo3DPointField extends LuceneTestCase { // Start with the root cell that fully contains the shape: Cell root = new Cell(null, - encodeValueLenient(planetMax, bounds.getMinimumX()), - encodeValueLenient(planetMax, bounds.getMaximumX()), - encodeValueLenient(planetMax, bounds.getMinimumY()), - encodeValueLenient(planetMax, bounds.getMaximumY()), - encodeValueLenient(planetMax, bounds.getMinimumZ()), - encodeValueLenient(planetMax, bounds.getMaximumZ()), + Geo3DUtil.encodeValueLenient(planetMax, bounds.getMinimumX()), + Geo3DUtil.encodeValueLenient(planetMax, bounds.getMaximumX()), + Geo3DUtil.encodeValueLenient(planetMax, bounds.getMinimumY()), + Geo3DUtil.encodeValueLenient(planetMax, bounds.getMaximumY()), + Geo3DUtil.encodeValueLenient(planetMax, bounds.getMinimumZ()), + Geo3DUtil.encodeValueLenient(planetMax, bounds.getMaximumZ()), 0); if (VERBOSE) { @@ -534,14 +275,14 @@ public class TestGeo3DPointField extends LuceneTestCase { } else { GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel, - decodeValueMin(planetMax, cell.xMinEnc), decodeValueMax(planetMax, cell.xMaxEnc), - decodeValueMin(planetMax, cell.yMinEnc), decodeValueMax(planetMax, cell.yMaxEnc), - decodeValueMin(planetMax, cell.zMinEnc), decodeValueMax(planetMax, cell.zMaxEnc)); + Geo3DUtil.decodeValueMin(planetMax, cell.xMinEnc), Geo3DUtil.decodeValueMax(planetMax, cell.xMaxEnc), + Geo3DUtil.decodeValueMin(planetMax, cell.yMinEnc), Geo3DUtil.decodeValueMax(planetMax, cell.yMaxEnc), + Geo3DUtil.decodeValueMin(planetMax, cell.zMinEnc), Geo3DUtil.decodeValueMax(planetMax, cell.zMaxEnc)); if (VERBOSE) { - log.println(" minx="+decodeValueMin(planetMax, cell.xMinEnc)+" maxx="+decodeValueMax(planetMax, cell.xMaxEnc)+ - " miny="+decodeValueMin(planetMax, cell.yMinEnc)+" maxy="+decodeValueMax(planetMax, cell.yMaxEnc)+ - " minz="+decodeValueMin(planetMax, cell.zMinEnc)+" maxz="+decodeValueMax(planetMax, cell.zMaxEnc)); + log.println(" minx="+Geo3DUtil.decodeValueMin(planetMax, cell.xMinEnc)+" maxx="+Geo3DUtil.decodeValueMax(planetMax, cell.xMaxEnc)+ + " miny="+Geo3DUtil.decodeValueMin(planetMax, cell.yMinEnc)+" maxy="+Geo3DUtil.decodeValueMax(planetMax, cell.yMaxEnc)+ + " minz="+Geo3DUtil.decodeValueMin(planetMax, cell.zMinEnc)+" maxz="+Geo3DUtil.decodeValueMax(planetMax, cell.zMaxEnc)); } switch (xyzSolid.getRelationship(shape)) { @@ -898,8 +639,6 @@ public class TestGeo3DPointField extends LuceneTestCase { } private static void verify(double[] lats, double[] lons) throws Exception { - int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048); - int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024); IndexWriterConfig iwc = newIndexWriterConfig(); PlanetModel planetModel = getPlanetModel(); @@ -909,18 +648,7 @@ public class TestGeo3DPointField extends LuceneTestCase { if (mbd != -1 && mbd < lats.length/100) { iwc.setMaxBufferedDocs(lats.length/100); } - final DocValuesFormat dvFormat = new Geo3DDocValuesFormat(planetModel, maxPointsInLeaf, maxPointsSortInHeap); - Codec codec = new Lucene60Codec() { - @Override - public DocValuesFormat getDocValuesFormatForField(String field) { - if (field.equals("point")) { - return dvFormat; - } else { - return super.getDocValuesFormatForField(field); - } - } - }; - iwc.setCodec(codec); + iwc.setCodec(getCodec()); Directory dir; if (lats.length > 100000) { dir = noVirusChecker(newFSDirectory(createTempDir("TestBKDTree")));