LUCENE-9705: Create Lucene90PointsFormat (#52)

This commit is contained in:
Ignacio Vera 2021-04-01 07:04:04 +02:00 committed by GitHub
parent 1d579b9448
commit 8c9b9546cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 1025 additions and 209 deletions

View File

@ -16,7 +16,7 @@
*/
/**
* Components from the Lucene 7.0 index format. See {@link org.apache.lucene.codecs.lucene86} for an
* overview of the current index format.
* Components from the Lucene 7.0 index format. See {@link
* org.apache.lucene.backward_codecs.lucene86} for an overview of the current index format.
*/
package org.apache.lucene.backward_codecs.lucene70;

View File

@ -38,7 +38,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.lucene86.Lucene86PointsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
@ -49,7 +48,6 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
* <p>If you want to reuse functionality of this codec in another codec, extend {@link FilterCodec}.
*
* @lucene.experimental
* @see org.apache.lucene.codecs.lucene86 package documentation for file format details.
*/
public class Lucene86Codec extends Codec {
private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
@ -130,7 +128,7 @@ public class Lucene86Codec extends Codec {
}
@Override
public final PointsFormat pointsFormat() {
public PointsFormat pointsFormat() {
return pointsFormat;
}

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene86;
package org.apache.lucene.backward_codecs.lucene86;
import java.io.IOException;
import org.apache.lucene.codecs.PointsFormat;
@ -43,7 +43,7 @@ import org.apache.lucene.index.SegmentWriteState;
*
* @lucene.experimental
*/
public final class Lucene86PointsFormat extends PointsFormat {
public class Lucene86PointsFormat extends PointsFormat {
static final String DATA_CODEC_NAME = "Lucene86PointsFormatData";
static final String INDEX_CODEC_NAME = "Lucene86PointsFormatIndex";
@ -66,7 +66,7 @@ public final class Lucene86PointsFormat extends PointsFormat {
@Override
public PointsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new Lucene86PointsWriter(state);
throw new UnsupportedOperationException("Old codecs may only be used for reading");
}
@Override

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene86;
package org.apache.lucene.backward_codecs.lucene86;
import java.io.IOException;
import java.util.HashMap;
@ -31,7 +31,7 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader;
/** Reads point values previously written with {@link Lucene86PointsWriter} */
/** Reads point values previously written with Lucene86PointsWriter */
public class Lucene86PointsReader extends PointsReader {
final IndexInput indexIn, dataIn;
final SegmentReadState readState;

View File

@ -25,6 +25,7 @@ import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
import org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat;
import org.apache.lucene.backward_codecs.lucene80.Lucene80NormsFormat;
import org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat;
import org.apache.lucene.backward_codecs.lucene86.Lucene86PointsFormat;
import org.apache.lucene.backward_codecs.lucene86.Lucene86SegmentInfoFormat;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
@ -39,7 +40,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.lucene86.Lucene86PointsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
@ -49,7 +49,6 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
*
* <p>If you want to reuse functionality of this codec in another codec, extend {@link FilterCodec}.
*
* @see org.apache.lucene.codecs.lucene86 package documentation for file format details.
* @lucene.experimental
*/
public class Lucene87Codec extends Codec {
@ -153,7 +152,7 @@ public class Lucene87Codec extends Codec {
}
@Override
public final PointsFormat pointsFormat() {
public PointsFormat pointsFormat() {
return pointsFormat;
}

View File

@ -294,85 +294,4 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
r.close();
dir.close();
}
public void testDocCountEdgeCases() {
PointValues values = getPointValues(Long.MAX_VALUE, 1, Long.MAX_VALUE);
long docs = values.estimateDocCount(null);
assertEquals(1, docs);
values = getPointValues(Long.MAX_VALUE, 1, 1);
docs = values.estimateDocCount(null);
assertEquals(1, docs);
values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE);
docs = values.estimateDocCount(null);
assertEquals(Integer.MAX_VALUE, docs);
values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE / 2);
docs = values.estimateDocCount(null);
assertEquals(Integer.MAX_VALUE, docs);
values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, 1);
docs = values.estimateDocCount(null);
assertEquals(1, docs);
}
public void testRandomDocCount() {
for (int i = 0; i < 100; i++) {
long size = TestUtil.nextLong(random(), 1, Long.MAX_VALUE);
int maxDoc = (size > Integer.MAX_VALUE) ? Integer.MAX_VALUE : Math.toIntExact(size);
int docCount = TestUtil.nextInt(random(), 1, maxDoc);
long estimatedPointCount = TestUtil.nextLong(random(), 0, size);
PointValues values = getPointValues(size, docCount, estimatedPointCount);
long docs = values.estimateDocCount(null);
assertTrue(docs <= estimatedPointCount);
assertTrue(docs <= maxDoc);
assertTrue(docs >= estimatedPointCount / (size / docCount));
}
}
private PointValues getPointValues(long size, int docCount, long estimatedPointCount) {
return new PointValues() {
@Override
public void intersect(IntersectVisitor visitor) {
throw new UnsupportedOperationException();
}
@Override
public long estimatePointCount(IntersectVisitor visitor) {
return estimatedPointCount;
}
@Override
public byte[] getMinPackedValue() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMaxPackedValue() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getNumDimensions() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getNumIndexDimensions() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getBytesPerDimension() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long size() {
return size;
}
@Override
public int getDocCount() {
return docCount;
}
};
}
}

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene86;
package org.apache.lucene.backward_codecs.lucene86;
import java.io.IOException;
import java.util.ArrayList;

View File

@ -24,6 +24,7 @@ import org.apache.lucene.backward_codecs.lucene80.Lucene80RWNormsFormat;
import org.apache.lucene.backward_codecs.lucene84.Lucene84RWPostingsFormat;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
@ -82,4 +83,9 @@ public class Lucene86RWCodec extends Lucene86Codec {
public TermVectorsFormat termVectorsFormat() {
return new Lucene50RWTermVectorsFormat();
}
@Override
public final PointsFormat pointsFormat() {
return new Lucene86RWPointsFormat();
}
}

View File

@ -14,9 +14,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs.lucene86;
/**
* Components from the Lucene 8.6 index format. See {@link org.apache.lucene.codecs.lucene90} for an
* overview of the current index format.
*/
package org.apache.lucene.codecs.lucene86;
import java.io.IOException;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.index.SegmentWriteState;
/** Writable version of Lucene86PointsFormat for testing */
public final class Lucene86RWPointsFormat extends Lucene86PointsFormat {
/** Sole constructor */
public Lucene86RWPointsFormat() {}
@Override
public PointsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new Lucene86PointsWriter(state);
}
}

View File

@ -14,10 +14,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene86;
package org.apache.lucene.backward_codecs.lucene86;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.backward_codecs.lucene87.Lucene87RWCodec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
@ -48,7 +49,7 @@ public class TestLucene86PointsFormat extends BasePointsFormatTestCase {
public TestLucene86PointsFormat() {
// standard issue
Codec defaultCodec = TestUtil.getDefaultCodec();
Codec defaultCodec = new Lucene87RWCodec();
if (random().nextBoolean()) {
// randomize parameters
maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
@ -341,85 +342,4 @@ public class TestLucene86PointsFormat extends BasePointsFormatTestCase {
r.close();
dir.close();
}
public void testDocCountEdgeCases() {
PointValues values = getPointValues(Long.MAX_VALUE, 1, Long.MAX_VALUE);
long docs = values.estimateDocCount(null);
assertEquals(1, docs);
values = getPointValues(Long.MAX_VALUE, 1, 1);
docs = values.estimateDocCount(null);
assertEquals(1, docs);
values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE);
docs = values.estimateDocCount(null);
assertEquals(Integer.MAX_VALUE, docs);
values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE / 2);
docs = values.estimateDocCount(null);
assertEquals(Integer.MAX_VALUE, docs);
values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, 1);
docs = values.estimateDocCount(null);
assertEquals(1, docs);
}
public void testRandomDocCount() {
for (int i = 0; i < 100; i++) {
long size = TestUtil.nextLong(random(), 1, Long.MAX_VALUE);
int maxDoc = (size > Integer.MAX_VALUE) ? Integer.MAX_VALUE : Math.toIntExact(size);
int docCount = TestUtil.nextInt(random(), 1, maxDoc);
long estimatedPointCount = TestUtil.nextLong(random(), 0, size);
PointValues values = getPointValues(size, docCount, estimatedPointCount);
long docs = values.estimateDocCount(null);
assertTrue(docs <= estimatedPointCount);
assertTrue(docs <= maxDoc);
assertTrue(docs >= estimatedPointCount / (size / docCount));
}
}
private PointValues getPointValues(long size, int docCount, long estimatedPointCount) {
return new PointValues() {
@Override
public void intersect(IntersectVisitor visitor) {
throw new UnsupportedOperationException();
}
@Override
public long estimatePointCount(IntersectVisitor visitor) {
return estimatedPointCount;
}
@Override
public byte[] getMinPackedValue() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMaxPackedValue() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getNumDimensions() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getNumIndexDimensions() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getBytesPerDimension() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long size() {
return size;
}
@Override
public int getDocCount() {
return docCount;
}
};
}
}

View File

@ -20,9 +20,11 @@ import org.apache.lucene.backward_codecs.lucene50.Lucene50RWCompoundFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50RWTermVectorsFormat;
import org.apache.lucene.backward_codecs.lucene80.Lucene80RWNormsFormat;
import org.apache.lucene.backward_codecs.lucene84.Lucene84RWPostingsFormat;
import org.apache.lucene.backward_codecs.lucene86.Lucene86RWPointsFormat;
import org.apache.lucene.backward_codecs.lucene86.Lucene86RWSegmentInfoFormat;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
@ -80,4 +82,9 @@ public class Lucene87RWCodec extends Lucene87Codec {
public StoredFieldsFormat storedFieldsFormat() {
return new Lucene87RWStoredFieldsFormat(mode.storedMode);
}
@Override
public final PointsFormat pointsFormat() {
return new Lucene86RWPointsFormat();
}
}

View File

@ -30,7 +30,6 @@ import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.lucene86.Lucene86PointsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
@ -144,7 +143,7 @@ public class Lucene90Codec extends Codec {
@Override
public final PointsFormat pointsFormat() {
return new Lucene86PointsFormat();
return new Lucene90PointsFormat();
}
@Override

View File

@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
import java.io.IOException;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
/**
* Lucene 9.0 point format, which encodes dimensional values in a block KD-tree structure for fast
* 1D range and N dimensional shape intersection filtering. See <a
* href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for
* details.
*
* <p>Data is stored across three files
*
* <ul>
* <li>A .kdm file that records metadata about the fields, such as numbers of dimensions or
* numbers of bytes per dimension.
* <li>A .kdi file that stores inner nodes of the tree.
* <li>A .kdd file that stores leaf nodes, where most of the data lives.
* </ul>
*
* See <a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=173081898">this
* wiki</a> for detailed data structures of the three files.
*
* @lucene.experimental
*/
public final class Lucene90PointsFormat extends PointsFormat {
static final String DATA_CODEC_NAME = "Lucene90PointsFormatData";
static final String INDEX_CODEC_NAME = "Lucene90PointsFormatIndex";
static final String META_CODEC_NAME = "Lucene90PointsFormatMeta";
/** Filename extension for the leaf blocks */
public static final String DATA_EXTENSION = "kdd";
/** Filename extension for the index per field */
public static final String INDEX_EXTENSION = "kdi";
/** Filename extension for the meta per field */
public static final String META_EXTENSION = "kdm";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
/** Sole constructor */
public Lucene90PointsFormat() {}
@Override
public PointsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new Lucene90PointsWriter(state);
}
@Override
public PointsReader fieldsReader(SegmentReadState state) throws IOException {
return new Lucene90PointsReader(state);
}
}

View File

@ -0,0 +1,158 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader;
/** Reads point values previously written with {@link Lucene90PointsWriter} */
public class Lucene90PointsReader extends PointsReader {
final IndexInput indexIn, dataIn;
final SegmentReadState readState;
final Map<Integer, BKDReader> readers = new HashMap<>();
/** Sole constructor */
public Lucene90PointsReader(SegmentReadState readState) throws IOException {
this.readState = readState;
String metaFileName =
IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
Lucene90PointsFormat.META_EXTENSION);
String indexFileName =
IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
Lucene90PointsFormat.INDEX_EXTENSION);
String dataFileName =
IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
Lucene90PointsFormat.DATA_EXTENSION);
boolean success = false;
try {
indexIn = readState.directory.openInput(indexFileName, readState.context);
CodecUtil.checkIndexHeader(
indexIn,
Lucene90PointsFormat.INDEX_CODEC_NAME,
Lucene90PointsFormat.VERSION_START,
Lucene90PointsFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix);
dataIn = readState.directory.openInput(dataFileName, readState.context);
CodecUtil.checkIndexHeader(
dataIn,
Lucene90PointsFormat.DATA_CODEC_NAME,
Lucene90PointsFormat.VERSION_START,
Lucene90PointsFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix);
long indexLength = -1, dataLength = -1;
try (ChecksumIndexInput metaIn =
readState.directory.openChecksumInput(metaFileName, readState.context)) {
Throwable priorE = null;
try {
CodecUtil.checkIndexHeader(
metaIn,
Lucene90PointsFormat.META_CODEC_NAME,
Lucene90PointsFormat.VERSION_START,
Lucene90PointsFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix);
while (true) {
int fieldNumber = metaIn.readInt();
if (fieldNumber == -1) {
break;
} else if (fieldNumber < 0) {
throw new CorruptIndexException("Illegal field number: " + fieldNumber, metaIn);
}
BKDReader reader = new BKDReader(metaIn, indexIn, dataIn);
readers.put(fieldNumber, reader);
}
indexLength = metaIn.readLong();
dataLength = metaIn.readLong();
} catch (Throwable t) {
priorE = t;
} finally {
CodecUtil.checkFooter(metaIn, priorE);
}
}
// At this point, checksums of the meta file have been validated so we
// know that indexLength and dataLength are very likely correct.
CodecUtil.retrieveChecksum(indexIn, indexLength);
CodecUtil.retrieveChecksum(dataIn, dataLength);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(this);
}
}
}
/**
* Returns the underlying {@link BKDReader}.
*
* @lucene.internal
*/
@Override
public PointValues getValues(String fieldName) {
FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName);
if (fieldInfo == null) {
throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized");
}
if (fieldInfo.getPointDimensionCount() == 0) {
throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index point values");
}
return readers.get(fieldInfo.number);
}
@Override
public long ramBytesUsed() {
return 0L;
}
@Override
public void checkIntegrity() throws IOException {
CodecUtil.checksumEntireFile(indexIn);
CodecUtil.checksumEntireFile(dataIn);
}
@Override
public void close() throws IOException {
IOUtils.close(indexIn, dataIn);
// Free up heap:
readers.clear();
}
}

View File

@ -0,0 +1,298 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.MutablePointValues;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDConfig;
import org.apache.lucene.util.bkd.BKDReader;
import org.apache.lucene.util.bkd.BKDWriter;
/** Writes dimensional values */
public class Lucene90PointsWriter extends PointsWriter {
/** Outputs used to write the BKD tree data files. */
protected final IndexOutput metaOut, indexOut, dataOut;
final SegmentWriteState writeState;
final int maxPointsInLeafNode;
final double maxMBSortInHeap;
private boolean finished;
/** Full constructor */
public Lucene90PointsWriter(
SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap)
throws IOException {
assert writeState.fieldInfos.hasPointValues();
this.writeState = writeState;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxMBSortInHeap = maxMBSortInHeap;
String dataFileName =
IndexFileNames.segmentFileName(
writeState.segmentInfo.name,
writeState.segmentSuffix,
Lucene90PointsFormat.DATA_EXTENSION);
dataOut = writeState.directory.createOutput(dataFileName, writeState.context);
boolean success = false;
try {
CodecUtil.writeIndexHeader(
dataOut,
Lucene90PointsFormat.DATA_CODEC_NAME,
Lucene90PointsFormat.VERSION_CURRENT,
writeState.segmentInfo.getId(),
writeState.segmentSuffix);
String metaFileName =
IndexFileNames.segmentFileName(
writeState.segmentInfo.name,
writeState.segmentSuffix,
Lucene90PointsFormat.META_EXTENSION);
metaOut = writeState.directory.createOutput(metaFileName, writeState.context);
CodecUtil.writeIndexHeader(
metaOut,
Lucene90PointsFormat.META_CODEC_NAME,
Lucene90PointsFormat.VERSION_CURRENT,
writeState.segmentInfo.getId(),
writeState.segmentSuffix);
String indexFileName =
IndexFileNames.segmentFileName(
writeState.segmentInfo.name,
writeState.segmentSuffix,
Lucene90PointsFormat.INDEX_EXTENSION);
indexOut = writeState.directory.createOutput(indexFileName, writeState.context);
CodecUtil.writeIndexHeader(
indexOut,
Lucene90PointsFormat.INDEX_CODEC_NAME,
Lucene90PointsFormat.VERSION_CURRENT,
writeState.segmentInfo.getId(),
writeState.segmentSuffix);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(this);
}
}
}
/**
* Uses the defaults values for {@code maxPointsInLeafNode} (1024) and {@code maxMBSortInHeap}
* (16.0)
*/
public Lucene90PointsWriter(SegmentWriteState writeState) throws IOException {
this(
writeState,
BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override
public void writeField(FieldInfo fieldInfo, PointsReader reader) throws IOException {
PointValues values = reader.getValues(fieldInfo.name);
BKDConfig config =
new BKDConfig(
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode);
try (BKDWriter writer =
new BKDWriter(
writeState.segmentInfo.maxDoc(),
writeState.directory,
writeState.segmentInfo.name,
config,
maxMBSortInHeap,
values.size())) {
if (values instanceof MutablePointValues) {
Runnable finalizer =
writer.writeField(
metaOut, indexOut, dataOut, fieldInfo.name, (MutablePointValues) values);
if (finalizer != null) {
metaOut.writeInt(fieldInfo.number);
finalizer.run();
}
return;
}
values.intersect(
new IntersectVisitor() {
@Override
public void visit(int docID) {
throw new IllegalStateException();
}
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
writer.add(packedValue, docID);
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
// We could have 0 points on merge since all docs with dimensional fields may be deleted:
Runnable finalizer = writer.finish(metaOut, indexOut, dataOut);
if (finalizer != null) {
metaOut.writeInt(fieldInfo.number);
finalizer.run();
}
}
}
@Override
public void merge(MergeState mergeState) throws IOException {
/**
* If indexSort is activated and some of the leaves are not sorted the next test will catch that
* and the non-optimized merge will run. If the readers are all sorted then it's safe to perform
* a bulk merge of the points.
*/
for (PointsReader reader : mergeState.pointsReaders) {
if (reader instanceof Lucene90PointsReader == false) {
// We can only bulk merge when all to-be-merged segments use our format:
super.merge(mergeState);
return;
}
}
for (PointsReader reader : mergeState.pointsReaders) {
if (reader != null) {
reader.checkIntegrity();
}
}
for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
if (fieldInfo.getPointDimensionCount() != 0) {
if (fieldInfo.getPointDimensionCount() == 1) {
// Worst case total maximum size (if none of the points are deleted):
long totMaxSize = 0;
for (int i = 0; i < mergeState.pointsReaders.length; i++) {
PointsReader reader = mergeState.pointsReaders[i];
if (reader != null) {
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
PointValues values = reader.getValues(fieldInfo.name);
if (values != null) {
totMaxSize += values.size();
}
}
}
}
BKDConfig config =
new BKDConfig(
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode);
// System.out.println("MERGE: field=" + fieldInfo.name);
// Optimize the 1D case to use BKDWriter.merge, which does a single merge sort of the
// already sorted incoming segments, instead of trying to sort all points again as if
// we were simply reindexing them:
try (BKDWriter writer =
new BKDWriter(
writeState.segmentInfo.maxDoc(),
writeState.directory,
writeState.segmentInfo.name,
config,
maxMBSortInHeap,
totMaxSize)) {
List<BKDReader> bkdReaders = new ArrayList<>();
List<MergeState.DocMap> docMaps = new ArrayList<>();
for (int i = 0; i < mergeState.pointsReaders.length; i++) {
PointsReader reader = mergeState.pointsReaders[i];
if (reader != null) {
// we confirmed this up above
assert reader instanceof Lucene90PointsReader;
Lucene90PointsReader reader90 = (Lucene90PointsReader) reader;
// NOTE: we cannot just use the merged fieldInfo.number (instead of resolving to
// this
// reader's FieldInfo as we do below) because field numbers can easily be different
// when addIndexes(Directory...) copies over segments from another index:
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
BKDReader bkdReader = reader90.readers.get(readerFieldInfo.number);
if (bkdReader != null) {
bkdReaders.add(bkdReader);
docMaps.add(mergeState.docMaps[i]);
}
}
}
}
Runnable finalizer = writer.merge(metaOut, indexOut, dataOut, docMaps, bkdReaders);
if (finalizer != null) {
metaOut.writeInt(fieldInfo.number);
finalizer.run();
}
}
} else {
mergeOneField(mergeState, fieldInfo);
}
}
}
finish();
}
@Override
public void finish() throws IOException {
if (finished) {
throw new IllegalStateException("already finished");
}
finished = true;
metaOut.writeInt(-1);
CodecUtil.writeFooter(indexOut);
CodecUtil.writeFooter(dataOut);
metaOut.writeLong(indexOut.getFilePointer());
metaOut.writeLong(dataOut.getFilePointer());
CodecUtil.writeFooter(metaOut);
}
@Override
public void close() throws IOException {
IOUtils.close(metaOut, indexOut, dataOut);
}
}

View File

@ -176,7 +176,7 @@
* factors.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live documents}. An
* optional file indicating which documents are live.
* <li>{@link org.apache.lucene.codecs.lucene86.Lucene86PointsFormat Point values}. Optional pair
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PointsFormat Point values}. Optional pair
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
* and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
* intersection (2D, 3D).
@ -305,7 +305,7 @@
* <td>Info about what documents are live</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene86.Lucene86PointsFormat Point values}</td>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PointsFormat Point values}</td>
* <td>.dii, .dim</td>
* <td>Holds indexed points</td>
* </tr>

View File

@ -0,0 +1,344 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.BasePointsFormatTestCase;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MockRandomMergePolicy;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.bkd.BKDConfig;
public class TestLucene90PointsFormat extends BasePointsFormatTestCase {
private final Codec codec;
private final int maxPointsInLeafNode;
public TestLucene90PointsFormat() {
// standard issue
Codec defaultCodec = TestUtil.getDefaultCodec();
if (random().nextBoolean()) {
// randomize parameters
maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
double maxMBSortInHeap = 3.0 + (3 * random().nextDouble());
if (VERBOSE) {
System.out.println(
"TEST: using Lucene60PointsFormat with maxPointsInLeafNode="
+ maxPointsInLeafNode
+ " and maxMBSortInHeap="
+ maxMBSortInHeap);
}
// sneaky impersonation!
codec =
new FilterCodec(defaultCodec.getName(), defaultCodec) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene90PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
}
@Override
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene90PointsReader(readState);
}
};
}
};
} else {
// standard issue
codec = defaultCodec;
maxPointsInLeafNode = BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
}
}
@Override
protected Codec getCodec() {
return codec;
}
@Override
public void testMergeStability() throws Exception {
assumeFalse(
"TODO: mess with the parameters and test gets angry!", codec instanceof FilterCodec);
super.testMergeStability();
}
public void testEstimatePointCount() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// Avoid mockRandomMP since it may cause non-optimal merges that make the
// number of points per leaf hard to predict
while (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
iwc.setMergePolicy(newMergePolicy());
}
IndexWriter w = new IndexWriter(dir, iwc);
byte[] pointValue = new byte[3];
byte[] uniquePointValue = new byte[3];
random().nextBytes(uniquePointValue);
final int numDocs =
TEST_NIGHTLY ? atLeast(10000) : atLeast(500); // at night, make sure we have several leaves
final boolean multiValues = random().nextBoolean();
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (i == numDocs / 2) {
doc.add(new BinaryPoint("f", uniquePointValue));
} else {
final int numValues = (multiValues) ? TestUtil.nextInt(random(), 2, 100) : 1;
for (int j = 0; j < numValues; j++) {
do {
random().nextBytes(pointValue);
} while (Arrays.equals(pointValue, uniquePointValue));
doc.add(new BinaryPoint("f", pointValue));
}
}
w.addDocument(doc);
}
w.forceMerge(1);
final IndexReader r = DirectoryReader.open(w);
w.close();
final LeafReader lr = getOnlyLeafReader(r);
PointValues points = lr.getPointValues("f");
// If all points match, then the point count is numLeaves * maxPointsInLeafNode
final int numLeaves = (int) Math.ceil((double) points.size() / maxPointsInLeafNode);
IntersectVisitor allPointsVisitor =
new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_INSIDE_QUERY;
}
};
assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(allPointsVisitor));
assertEquals(numDocs, points.estimateDocCount(allPointsVisitor));
IntersectVisitor noPointsVisitor =
new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_OUTSIDE_QUERY;
}
};
// Return 0 if no points match
assertEquals(0, points.estimatePointCount(noPointsVisitor));
assertEquals(0, points.estimateDocCount(noPointsVisitor));
IntersectVisitor onePointMatchVisitor =
new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (Arrays.compareUnsigned(uniquePointValue, 0, 3, maxPackedValue, 0, 3) > 0
|| Arrays.compareUnsigned(uniquePointValue, 0, 3, minPackedValue, 0, 3) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
};
// If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2
// in general, or maybe 2x that if the point is a split value
final long pointCount = points.estimatePointCount(onePointMatchVisitor);
assertTrue(
"" + pointCount,
pointCount == (maxPointsInLeafNode + 1) / 2
|| // common case
pointCount == 2 * ((maxPointsInLeafNode + 1) / 2)); // if the point is a split value
final long docCount = points.estimateDocCount(onePointMatchVisitor);
if (multiValues) {
assertEquals(
docCount,
(long)
(docCount
* (1d
- Math.pow(
(numDocs - pointCount) / points.size(), points.size() / docCount))));
} else {
assertEquals(Math.min(pointCount, numDocs), docCount);
}
r.close();
dir.close();
}
// The tree is always balanced in the N dims case, and leaves are
// not all full so things are a bit different
public void testEstimatePointCount2Dims() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
byte[][] pointValue = new byte[2][];
pointValue[0] = new byte[3];
pointValue[1] = new byte[3];
byte[][] uniquePointValue = new byte[2][];
uniquePointValue[0] = new byte[3];
uniquePointValue[1] = new byte[3];
random().nextBytes(uniquePointValue[0]);
random().nextBytes(uniquePointValue[1]);
final int numDocs =
TEST_NIGHTLY
? atLeast(10000)
: atLeast(1000); // in nightly, make sure we have several leaves
final boolean multiValues = random().nextBoolean();
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (i == numDocs / 2) {
doc.add(new BinaryPoint("f", uniquePointValue));
} else {
final int numValues = (multiValues) ? TestUtil.nextInt(random(), 2, 100) : 1;
for (int j = 0; j < numValues; j++) {
do {
random().nextBytes(pointValue[0]);
random().nextBytes(pointValue[1]);
} while (Arrays.equals(pointValue[0], uniquePointValue[0])
|| Arrays.equals(pointValue[1], uniquePointValue[1]));
doc.add(new BinaryPoint("f", pointValue));
}
}
w.addDocument(doc);
}
w.forceMerge(1);
final IndexReader r = DirectoryReader.open(w);
w.close();
final LeafReader lr = getOnlyLeafReader(r);
PointValues points = lr.getPointValues("f");
IntersectVisitor allPointsVisitor =
new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_INSIDE_QUERY;
}
};
// If all points match, then the point count is numLeaves * maxPointsInLeafNode
final int numLeaves = (int) Math.ceil((double) points.size() / maxPointsInLeafNode);
assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(allPointsVisitor));
assertEquals(numDocs, points.estimateDocCount(allPointsVisitor));
IntersectVisitor noPointsVisitor =
new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_OUTSIDE_QUERY;
}
};
// Return 0 if no points match
assertEquals(0, points.estimatePointCount(noPointsVisitor));
assertEquals(0, points.estimateDocCount(noPointsVisitor));
IntersectVisitor onePointMatchVisitor =
new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
for (int dim = 0; dim < 2; ++dim) {
if (Arrays.compareUnsigned(
uniquePointValue[dim], 0, 3, maxPackedValue, dim * 3, dim * 3 + 3)
> 0
|| Arrays.compareUnsigned(
uniquePointValue[dim], 0, 3, minPackedValue, dim * 3, dim * 3 + 3)
< 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
}
return Relation.CELL_CROSSES_QUERY;
}
};
final long pointCount = points.estimatePointCount(onePointMatchVisitor);
// The number of matches needs to be multiple of count per leaf
final long countPerLeaf = (maxPointsInLeafNode + 1) / 2;
assertTrue("" + pointCount, pointCount % countPerLeaf == 0);
// in extreme cases, a point can be be shared by 4 leaves
assertTrue("" + pointCount, pointCount / countPerLeaf <= 4 && pointCount / countPerLeaf >= 1);
final long docCount = points.estimateDocCount(onePointMatchVisitor);
if (multiValues) {
assertEquals(
docCount,
(long)
(docCount
* (1d
- Math.pow(
(numDocs - pointCount) / points.size(), points.size() / docCount))));
} else {
assertEquals(Math.min(pointCount, numDocs), docCount);
}
r.close();
dir.close();
}
}

View File

@ -36,8 +36,8 @@ import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.codecs.lucene86.Lucene86PointsReader;
import org.apache.lucene.codecs.lucene86.Lucene86PointsWriter;
import org.apache.lucene.codecs.lucene90.Lucene90PointsReader;
import org.apache.lucene.codecs.lucene90.Lucene90PointsWriter;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
@ -1280,12 +1280,12 @@ public class TestPointQueries extends LuceneTestCase {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene86PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
return new Lucene90PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
}
@Override
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene86PointsReader(readState);
return new Lucene90PointsReader(readState);
}
};
}

View File

@ -19,7 +19,7 @@ package org.apache.lucene.sandbox.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.codecs.lucene86.Lucene86PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.geo.GeoUtils;
@ -54,7 +54,7 @@ public class LatLonPointPrototypeQueries {
*
* <p>This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link
* LatLonDocValuesField#newDistanceSort}, but is far more efficient since it takes advantage of
* properties the indexed BKD tree. Currently this only works with {@link Lucene86PointsFormat}
* properties the indexed BKD tree. Currently this only works with {@link Lucene90PointsFormat}
* (used by the default codec). Multi-valued fields are currently not de-duplicated, so if a
* document had multiple instances of the specified field that make it into the top n, that
* document will appear more than once.

View File

@ -32,8 +32,8 @@ import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.codecs.lucene86.Lucene86PointsReader;
import org.apache.lucene.codecs.lucene86.Lucene86PointsWriter;
import org.apache.lucene.codecs.lucene90.Lucene90PointsReader;
import org.apache.lucene.codecs.lucene90.Lucene90PointsWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
@ -108,12 +108,12 @@ public class TestGeo3DPoint extends LuceneTestCase {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene86PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
return new Lucene90PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
}
@Override
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene86PointsReader(readState);
return new Lucene90PointsReader(readState);
}
};
}

View File

@ -31,8 +31,8 @@ import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.codecs.lucene86.Lucene86PointsReader;
import org.apache.lucene.codecs.lucene86.Lucene86PointsWriter;
import org.apache.lucene.codecs.lucene90.Lucene90PointsReader;
import org.apache.lucene.codecs.lucene90.Lucene90PointsWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
@ -1467,13 +1467,13 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene86PointsWriter(
return new Lucene90PointsWriter(
writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene86PointsReader(readState);
return new Lucene90PointsReader(readState);
}
};
}

View File

@ -31,8 +31,8 @@ import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.codecs.lucene86.Lucene86PointsReader;
import org.apache.lucene.codecs.lucene86.Lucene86PointsWriter;
import org.apache.lucene.codecs.lucene90.Lucene90PointsReader;
import org.apache.lucene.codecs.lucene90.Lucene90PointsWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
@ -1312,13 +1312,13 @@ public abstract class BaseXYPointTestCase extends LuceneTestCase {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene86PointsWriter(
return new Lucene90PointsWriter(
writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene86PointsReader(readState);
return new Lucene90PointsReader(readState);
}
};
}

View File

@ -1186,4 +1186,85 @@ public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCa
w.forceMerge(1);
IOUtils.close(w, dir);
}
public void testDocCountEdgeCases() {
PointValues values = getPointValues(Long.MAX_VALUE, 1, Long.MAX_VALUE);
long docs = values.estimateDocCount(null);
assertEquals(1, docs);
values = getPointValues(Long.MAX_VALUE, 1, 1);
docs = values.estimateDocCount(null);
assertEquals(1, docs);
values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE);
docs = values.estimateDocCount(null);
assertEquals(Integer.MAX_VALUE, docs);
values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE / 2);
docs = values.estimateDocCount(null);
assertEquals(Integer.MAX_VALUE, docs);
values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, 1);
docs = values.estimateDocCount(null);
assertEquals(1, docs);
}
public void testRandomDocCount() {
for (int i = 0; i < 100; i++) {
long size = TestUtil.nextLong(random(), 1, Long.MAX_VALUE);
int maxDoc = (size > Integer.MAX_VALUE) ? Integer.MAX_VALUE : Math.toIntExact(size);
int docCount = TestUtil.nextInt(random(), 1, maxDoc);
long estimatedPointCount = TestUtil.nextLong(random(), 0, size);
PointValues values = getPointValues(size, docCount, estimatedPointCount);
long docs = values.estimateDocCount(null);
assertTrue(docs <= estimatedPointCount);
assertTrue(docs <= maxDoc);
assertTrue(docs >= estimatedPointCount / (size / docCount));
}
}
private PointValues getPointValues(long size, int docCount, long estimatedPointCount) {
return new PointValues() {
@Override
public void intersect(IntersectVisitor visitor) {
throw new UnsupportedOperationException();
}
@Override
public long estimatePointCount(IntersectVisitor visitor) {
return estimatedPointCount;
}
@Override
public byte[] getMinPackedValue() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMaxPackedValue() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getNumDimensions() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getNumIndexDimensions() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getBytesPerDimension() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long size() {
return size;
}
@Override
public int getDocCount() {
return docCount;
}
};
}
}

View File

@ -39,9 +39,9 @@ import org.apache.lucene.codecs.blockterms.LuceneVarGapDocFreqInterval;
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
import org.apache.lucene.codecs.bloom.TestBloomFilteredLucenePostings;
import org.apache.lucene.codecs.lucene86.Lucene86PointsReader;
import org.apache.lucene.codecs.lucene86.Lucene86PointsWriter;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsReader;
import org.apache.lucene.codecs.lucene90.Lucene90PointsWriter;
import org.apache.lucene.codecs.memory.DirectPostingsFormat;
import org.apache.lucene.codecs.memory.FSTPostingsFormat;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
@ -102,7 +102,7 @@ public class RandomCodec extends AssertingCodec {
// Randomize how BKDWriter chooses its splits:
return new Lucene86PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap) {
return new Lucene90PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap) {
@Override
public void writeField(FieldInfo fieldInfo, PointsReader reader) throws IOException {
@ -157,7 +157,7 @@ public class RandomCodec extends AssertingCodec {
@Override
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene86PointsReader(readState);
return new Lucene90PointsReader(readState);
}
});
}