LUCENE-10165: Implement Lucene90DocValuesProducer#getMergeInstance. (#374)

This speeds up merging by returning doc values that perform faster when all doc
IDs and values are consumed.
This commit is contained in:
Adrien Grand 2021-10-21 08:41:47 +02:00 committed by GitHub
parent 4c2692e897
commit 9e84b2fd41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 344 additions and 78 deletions

View File

@ -53,14 +53,15 @@ import org.apache.lucene.util.packed.DirectReader;
/** reader for {@link Lucene90DocValuesFormat} */
final class Lucene90DocValuesProducer extends DocValuesProducer {
private final Map<String, NumericEntry> numerics = new HashMap<>();
private final Map<String, BinaryEntry> binaries = new HashMap<>();
private final Map<String, SortedEntry> sorted = new HashMap<>();
private final Map<String, SortedSetEntry> sortedSets = new HashMap<>();
private final Map<String, SortedNumericEntry> sortedNumerics = new HashMap<>();
private final Map<String, NumericEntry> numerics;
private final Map<String, BinaryEntry> binaries;
private final Map<String, SortedEntry> sorted;
private final Map<String, SortedSetEntry> sortedSets;
private final Map<String, SortedNumericEntry> sortedNumerics;
private final IndexInput data;
private final int maxDoc;
private int version = -1;
private final boolean merging;
/** expert: instantiates a new reader */
Lucene90DocValuesProducer(
@ -73,6 +74,12 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
String metaName =
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
this.maxDoc = state.segmentInfo.maxDoc();
numerics = new HashMap<>();
binaries = new HashMap<>();
sorted = new HashMap<>();
sortedSets = new HashMap<>();
sortedNumerics = new HashMap<>();
merging = false;
// read in the entries from the metadata file.
try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) {
@ -129,6 +136,34 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
}
// Used for cloning
private Lucene90DocValuesProducer(
Map<String, NumericEntry> numerics,
Map<String, BinaryEntry> binaries,
Map<String, SortedEntry> sorted,
Map<String, SortedSetEntry> sortedSets,
Map<String, SortedNumericEntry> sortedNumerics,
IndexInput data,
int maxDoc,
int version,
boolean merging) {
this.numerics = numerics;
this.binaries = binaries;
this.sorted = sorted;
this.sortedSets = sortedSets;
this.sortedNumerics = sortedNumerics;
this.data = data.clone();
this.maxDoc = maxDoc;
this.version = version;
this.merging = merging;
}
@Override
public DocValuesProducer getMergeInstance() {
return new Lucene90DocValuesProducer(
numerics, binaries, sorted, sortedSets, sortedNumerics, data, maxDoc, version, true);
}
private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
FieldInfo info = infos.fieldInfo(fieldNumber);
@ -433,6 +468,15 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
}
private LongValues getDirectReaderInstance(
RandomAccessInput slice, int bitsPerValue, long offset, long numValues) {
if (merging) {
return DirectReader.getMergeInstance(slice, bitsPerValue, offset, numValues);
} else {
return DirectReader.getInstance(slice, bitsPerValue, offset);
}
}
private NumericDocValues getNumeric(NumericEntry entry) throws IOException {
if (entry.docsWithFieldOffset == -2) {
// empty
@ -460,7 +504,8 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
};
} else {
final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
final LongValues values =
getDirectReaderInstance(slice, entry.bitsPerValue, 0L, entry.numValues);
if (entry.table != null) {
final long[] table = entry.table;
return new DenseNumericDocValues(maxDoc) {
@ -521,7 +566,8 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
};
} else {
final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
final LongValues values =
getDirectReaderInstance(slice, entry.bitsPerValue, 0L, entry.numValues);
if (entry.table != null) {
final long[] table = entry.table;
return new SparseNumericDocValues(disi) {
@ -577,7 +623,8 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
};
} else {
final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
final LongValues values =
getDirectReaderInstance(slice, entry.bitsPerValue, 0L, entry.numValues);
if (entry.table != null) {
final long[] table = entry.table;
return new LongValues() {
@ -713,7 +760,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
final RandomAccessInput addressesData =
this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
final LongValues addresses =
DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData);
DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData, merging);
return new DenseBinaryDocValues(maxDoc) {
final BytesRef bytes = new BytesRef(new byte[entry.maxLength], 0, entry.maxLength);
@ -791,10 +838,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
final RandomAccessInput slice =
data.randomAccessSlice(ordsEntry.valuesOffset, ordsEntry.valuesLength);
final LongValues values = DirectReader.getInstance(slice, ordsEntry.bitsPerValue);
final LongValues values =
getDirectReaderInstance(slice, ordsEntry.bitsPerValue, 0L, ordsEntry.numValues);
if (ordsEntry.docsWithFieldOffset == -1) { // dense
return new BaseSortedDocValues(entry, data) {
return new BaseSortedDocValues(entry) {
private final int maxDoc = Lucene90DocValuesProducer.this.maxDoc;
private int doc = -1;
@ -843,7 +891,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
ordsEntry.denseRankPower,
ordsEntry.numValues);
return new BaseSortedDocValues(entry, data) {
return new BaseSortedDocValues(entry) {
@Override
public int ordValue() throws IOException {
@ -879,7 +927,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
final NumericDocValues ords = getNumeric(entry.ordsEntry);
return new BaseSortedDocValues(entry, data) {
return new BaseSortedDocValues(entry) {
@Override
public int ordValue() throws IOException {
@ -913,15 +961,13 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
};
}
private abstract static class BaseSortedDocValues extends SortedDocValues {
private abstract class BaseSortedDocValues extends SortedDocValues {
final SortedEntry entry;
final IndexInput data;
final TermsEnum termsEnum;
BaseSortedDocValues(SortedEntry entry, IndexInput data) throws IOException {
BaseSortedDocValues(SortedEntry entry) throws IOException {
this.entry = entry;
this.data = data;
this.termsEnum = termsEnum();
}
@ -955,7 +1001,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
}
private abstract static class BaseSortedSetDocValues extends SortedSetDocValues {
private abstract class BaseSortedSetDocValues extends SortedSetDocValues {
final SortedSetEntry entry;
final IndexInput data;
@ -997,7 +1043,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
}
private static class TermsDict extends BaseTermsEnum {
private class TermsDict extends BaseTermsEnum {
static final int LZ4_DECOMPRESSOR_PADDING = 7;
final TermsDictEntry entry;
@ -1018,13 +1064,15 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
this.entry = entry;
RandomAccessInput addressesSlice =
data.randomAccessSlice(entry.termsAddressesOffset, entry.termsAddressesLength);
blockAddresses = DirectMonotonicReader.getInstance(entry.termsAddressesMeta, addressesSlice);
blockAddresses =
DirectMonotonicReader.getInstance(entry.termsAddressesMeta, addressesSlice, merging);
bytes = data.slice("terms", entry.termsDataOffset, entry.termsDataLength);
blockMask = (1L << TERMS_DICT_BLOCK_LZ4_SHIFT) - 1;
RandomAccessInput indexAddressesSlice =
data.randomAccessSlice(entry.termsIndexAddressesOffset, entry.termsIndexAddressesLength);
indexAddresses =
DirectMonotonicReader.getInstance(entry.termsIndexAddressesMeta, indexAddressesSlice);
DirectMonotonicReader.getInstance(
entry.termsIndexAddressesMeta, indexAddressesSlice, merging);
indexBytes = data.slice("terms-index", entry.termsIndexOffset, entry.termsIndexLength);
term = new BytesRef(entry.maxTermLength);
@ -1236,7 +1284,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
final RandomAccessInput addressesInput =
data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
final LongValues addresses =
DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput);
DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput, merging);
final LongValues values = getNumericValues(entry);
@ -1482,10 +1530,12 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
this.block++;
} while (this.block != block);
final int numValues =
Math.toIntExact(Math.min(1 << shift, entry.numValues - (block << shift)));
values =
bitsPerValue == 0
? LongValues.ZEROES
: DirectReader.getInstance(slice, bitsPerValue, offset);
: getDirectReaderInstance(slice, bitsPerValue, offset, numValues);
}
return mul * values.get(index & mask) + delta;
}

View File

@ -99,13 +99,25 @@ public final class DirectMonotonicReader extends LongValues implements Accountab
return meta;
}
/** Retrieves an instance from the specified slice. */
/** Retrieves a non-merging instance from the specified slice. */
public static DirectMonotonicReader getInstance(Meta meta, RandomAccessInput data)
throws IOException {
return getInstance(meta, data, false);
}
/** Retrieves an instance from the specified slice. */
public static DirectMonotonicReader getInstance(
Meta meta, RandomAccessInput data, boolean merging) throws IOException {
final LongValues[] readers = new LongValues[meta.numBlocks];
for (int i = 0; i < meta.mins.length; ++i) {
for (int i = 0; i < meta.numBlocks; ++i) {
if (meta.bpvs[i] == 0) {
readers[i] = EMPTY;
} else if (merging
&& i < meta.numBlocks - 1 // we only know the number of values for the last block
&& meta.blockShift >= DirectReader.MERGE_BUFFER_SHIFT) {
readers[i] =
DirectReader.getMergeInstance(
data, meta.bpvs[i], meta.offsets[i], 1L << meta.blockShift);
} else {
readers[i] = DirectReader.getInstance(data, meta.bpvs[i], meta.offsets[i]);
}

View File

@ -17,6 +17,7 @@
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.io.UncheckedIOException;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.LongValues;
@ -38,6 +39,10 @@ import org.apache.lucene.util.LongValues;
*/
public class DirectReader {
static final int MERGE_BUFFER_SHIFT = 7;
private static final int MERGE_BUFFER_SIZE = 1 << MERGE_BUFFER_SHIFT;
private static final int MERGE_BUFFER_MASK = MERGE_BUFFER_SIZE - 1;
/**
* Retrieves an instance from the specified slice written decoding {@code bitsPerValue} for each
* value
@ -85,6 +90,102 @@ public class DirectReader {
}
}
/**
* Retrieves an instance that is specialized for merges and is typically faster at sequential
* access but slower at random access.
*/
public static LongValues getMergeInstance(
RandomAccessInput slice, int bitsPerValue, long numValues) {
return getMergeInstance(slice, bitsPerValue, 0L, numValues);
}
/**
* Retrieves an instance that is specialized for merges and is typically faster at sequential
* access.
*/
public static LongValues getMergeInstance(
RandomAccessInput slice, int bitsPerValue, long baseOffset, long numValues) {
return new LongValues() {
private final long[] buffer = new long[MERGE_BUFFER_SIZE];
private long blockIndex = -1;
@Override
public long get(long index) {
assert index < numValues;
final long blockIndex = index >>> MERGE_BUFFER_SHIFT;
if (this.blockIndex != blockIndex) {
try {
fillBuffer(blockIndex << MERGE_BUFFER_SHIFT);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
this.blockIndex = blockIndex;
}
return buffer[(int) (index & MERGE_BUFFER_MASK)];
}
private void fillBuffer(long index) throws IOException {
// NOTE: we're not allowed to read more than 3 bytes past the last value
if (index > numValues - MERGE_BUFFER_SIZE) {
// Less than 128 values left
final LongValues slowInstance = getInstance(slice, bitsPerValue, baseOffset);
final int numValuesLastBlock = Math.toIntExact(numValues - index);
for (int i = 0; i < numValuesLastBlock; ++i) {
buffer[i] = slowInstance.get(index + i);
}
} else if ((bitsPerValue & 0x07) == 0) {
// bitsPerValue is a multiple of 8: 8, 16, 24, 32, 30, 48, 56, 64
final int bytesPerValue = bitsPerValue / Byte.SIZE;
final long mask = bitsPerValue == 64 ? ~0L : (1L << bitsPerValue) - 1;
long offset = baseOffset + (index * bitsPerValue) / 8;
for (int i = 0; i < MERGE_BUFFER_SIZE; ++i) {
if (bitsPerValue > Integer.SIZE) {
buffer[i] = slice.readLong(offset) & mask;
} else if (bitsPerValue > Short.SIZE) {
buffer[i] = slice.readInt(offset) & mask;
} else if (bitsPerValue > Byte.SIZE) {
buffer[i] = Short.toUnsignedLong(slice.readShort(offset));
} else {
buffer[i] = Byte.toUnsignedLong(slice.readByte(offset));
}
offset += bytesPerValue;
}
} else if (bitsPerValue < 8) {
// bitsPerValue is 1, 2 or 4
final int valuesPerLong = Long.SIZE / bitsPerValue;
final long mask = (1L << bitsPerValue) - 1;
long offset = baseOffset + (index * bitsPerValue) / 8;
int i = 0;
for (int l = 0; l < 2 * bitsPerValue; ++l) {
final long bits = slice.readLong(offset);
for (int j = 0; j < valuesPerLong; ++j) {
buffer[i++] = (bits >>> (j * bitsPerValue)) & mask;
}
offset += Long.BYTES;
}
} else {
// bitsPerValue is 12, 20 or 28
// Read values 2 by 2
final int numBytesFor2Values = bitsPerValue * 2 / Byte.SIZE;
final long mask = (1L << bitsPerValue) - 1;
long offset = baseOffset + (index * bitsPerValue) / 8;
for (int i = 0; i < MERGE_BUFFER_SIZE; i += 2) {
final long l;
if (numBytesFor2Values > Integer.BYTES) {
l = slice.readLong(offset);
} else {
l = slice.readInt(offset);
}
buffer[i] = l & mask;
buffer[i + 1] = (l >>> bitsPerValue) & mask;
offset += numBytesFor2Values;
}
}
}
};
}
static final class DirectPackedReader1 extends LongValues {
final RandomAccessInput in;
final long offset;
@ -229,7 +330,6 @@ public class DirectReader {
static final class DirectPackedReader24 extends LongValues {
final RandomAccessInput in;
final long offset;
;
DirectPackedReader24(RandomAccessInput in, long offset) {
this.in = in;

View File

@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
/** Tests Lucene90DocValuesFormat's merge instance. */
public class TestLucene90DocValuesFormatMergeInstance extends TestLucene90DocValuesFormat {
@Override
protected boolean shouldTestMergeInstance() {
return true;
}
}

View File

@ -155,6 +155,14 @@ public class TestDirectMonotonic extends LuceneTestCase {
}
public void testRandom() throws IOException {
doTestRandom(false);
}
public void testRandomMerging() throws IOException {
doTestRandom(true);
}
private void doTestRandom(boolean merging) throws IOException {
Random random = random();
final int iters = atLeast(random, 3);
for (int iter = 0; iter < iters; ++iter) {
@ -199,7 +207,8 @@ public class TestDirectMonotonic extends LuceneTestCase {
DirectMonotonicReader.Meta meta =
DirectMonotonicReader.loadMeta(metaIn, numValues, blockShift);
LongValues values =
DirectMonotonicReader.getInstance(meta, dataIn.randomAccessSlice(0, dataLength));
DirectMonotonicReader.getInstance(
meta, dataIn.randomAccessSlice(0, dataLength), merging);
for (int i = 0; i < numValues; ++i) {
assertEquals(actualValues.get(i).longValue(), values.get(i));
}

View File

@ -78,7 +78,7 @@ public class TestDirectPacked extends LuceneTestCase {
public void testRandom() throws Exception {
Directory dir = newDirectory();
for (int bpv = 1; bpv <= 64; bpv++) {
doTestBpv(dir, bpv, 0);
doTestBpv(dir, bpv, 0, false);
}
dir.close();
}
@ -87,12 +87,30 @@ public class TestDirectPacked extends LuceneTestCase {
Directory dir = newDirectory();
final int offset = TestUtil.nextInt(random(), 1, 100);
for (int bpv = 1; bpv <= 64; bpv++) {
doTestBpv(dir, bpv, offset);
doTestBpv(dir, bpv, offset, false);
}
dir.close();
}
private void doTestBpv(Directory directory, int bpv, long offset) throws Exception {
public void testRandomMerge() throws Exception {
Directory dir = newDirectory();
for (int bpv = 1; bpv <= 64; bpv++) {
doTestBpv(dir, bpv, 0, true);
}
dir.close();
}
public void testRandomMergeWithOffset() throws Exception {
Directory dir = newDirectory();
final int offset = TestUtil.nextInt(random(), 1, 100);
for (int bpv = 1; bpv <= 64; bpv++) {
doTestBpv(dir, bpv, offset, true);
}
dir.close();
}
private void doTestBpv(Directory directory, int bpv, long offset, boolean merge)
throws Exception {
MyRandom random = new MyRandom(random().nextLong());
int numIters = TEST_NIGHTLY ? 100 : 10;
for (int i = 0; i < numIters; i++) {
@ -110,9 +128,16 @@ public class TestDirectPacked extends LuceneTestCase {
writer.finish();
output.close();
IndexInput input = directory.openInput(name, IOContext.DEFAULT);
LongValues reader =
DirectReader.getInstance(
input.randomAccessSlice(0, input.length()), bitsRequired, offset);
LongValues reader;
if (merge) {
reader =
DirectReader.getMergeInstance(
input.randomAccessSlice(0, input.length()), bitsRequired, offset, original.length);
} else {
reader =
DirectReader.getInstance(
input.randomAccessSlice(0, input.length()), bitsRequired, offset);
}
for (int j = 0; j < original.length; j++) {
assertEquals("bpv=" + bpv, original[j], reader.get(j));
}

View File

@ -117,7 +117,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
@ -152,7 +153,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
@ -189,7 +191,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
@ -228,7 +231,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
@ -283,7 +287,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
for (int i = 0; i < numDocs; i++) {
@ -320,7 +325,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
@ -360,7 +366,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
@ -405,7 +412,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
@ -453,7 +461,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -485,7 +494,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
for (int i = 0; i < 2; i++) {
@ -521,7 +531,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -550,7 +561,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -578,7 +590,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
@ -620,7 +633,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
for (int i = 0; i < 2; i++) {
@ -683,7 +697,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
@ -724,7 +739,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
BytesRef scratch = newBytesRef();
@ -759,7 +775,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
assertEquals(2, dv.getValueCount());
@ -798,7 +815,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
assertEquals(2, dv.getValueCount()); // 2 ords
@ -867,7 +885,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -892,7 +911,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -1008,7 +1028,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -1039,7 +1060,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -1067,7 +1089,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -1093,7 +1116,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = DocValues.getSorted(ireader.leaves().get(0).reader(), "dv");
assertEquals(0, dv.nextDoc());
@ -1115,7 +1139,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -1138,7 +1163,8 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexReader ireader =
maybeWrapWithMergingReader(DirectoryReader.open(directory)); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = DocValues.getSorted(ireader.leaves().get(0).reader(), "dv");
byte[] mybytes = new byte[20];
@ -1170,7 +1196,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(dir));
assertEquals(1, reader.leaves().size());
IndexSearcher searcher = new IndexSearcher(reader);
@ -1324,7 +1350,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
// Asserts equality of stored value vs. DocValue by iterating DocValues one at a time
protected void assertDVIterate(Directory dir) throws IOException {
DirectoryReader ir = DirectoryReader.open(dir);
DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(dir));
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
@ -1391,7 +1417,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(dir));
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
@ -2464,7 +2490,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iw.forceMerge(1);
iw.close();
IndexReader ir = DirectoryReader.open(directory);
IndexReader ir = maybeWrapWithMergingReader(DirectoryReader.open(directory));
assertEquals(1, ir.leaves().size());
LeafReader ar = ir.leaves().get(0).reader();
NumericDocValues dv = ar.getNumericDocValues("dv1");
@ -2491,7 +2517,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iw.forceMerge(1);
iw.close();
IndexReader ir = DirectoryReader.open(directory);
IndexReader ir = maybeWrapWithMergingReader(DirectoryReader.open(directory));
assertEquals(1, ir.leaves().size());
LeafReader ar = ir.leaves().get(0).reader();
NumericDocValues dv = ar.getNumericDocValues("dv1");
@ -2522,7 +2548,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iw.forceMerge(1);
iw.close();
IndexReader ir = DirectoryReader.open(directory);
IndexReader ir = maybeWrapWithMergingReader(DirectoryReader.open(directory));
assertEquals(1, ir.leaves().size());
LeafReader ar = ir.leaves().get(0).reader();
NumericDocValues dv = ar.getNumericDocValues("dv1");
@ -2549,7 +2575,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iw.forceMerge(1);
iw.close();
IndexReader ir = DirectoryReader.open(directory);
IndexReader ir = maybeWrapWithMergingReader(DirectoryReader.open(directory));
assertEquals(1, ir.leaves().size());
LeafReader ar = ir.leaves().get(0).reader();
BinaryDocValues dv = ar.getBinaryDocValues("dv1");
@ -2576,7 +2602,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iw.forceMerge(1);
iw.close();
IndexReader ir = DirectoryReader.open(directory);
IndexReader ir = maybeWrapWithMergingReader(DirectoryReader.open(directory));
assertEquals(1, ir.leaves().size());
LeafReader ar = ir.leaves().get(0).reader();
BinaryDocValues dv = ar.getBinaryDocValues("dv1");
@ -2607,7 +2633,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iw.forceMerge(1);
iw.close();
IndexReader ir = DirectoryReader.open(directory);
IndexReader ir = maybeWrapWithMergingReader(DirectoryReader.open(directory));
assertEquals(1, ir.leaves().size());
LeafReader ar = ir.leaves().get(0).reader();
BinaryDocValues dv = ar.getBinaryDocValues("dv1");
@ -2667,7 +2693,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
writer.close();
// compare
final DirectoryReader ir = DirectoryReader.open(dir);
final DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(dir));
int numThreads = TestUtil.nextInt(random(), 2, 7);
Thread[] threads = new Thread[numThreads];
final CountDownLatch startingGun = new CountDownLatch(1);
@ -2783,7 +2809,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
writer.close();
// compare
final DirectoryReader ir = DirectoryReader.open(dir);
final DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(dir));
int numThreads = TestUtil.nextInt(random(), 2, 7);
Thread[] threads = new Thread[numThreads];
final CountDownLatch startingGun = new CountDownLatch(1);
@ -2907,7 +2933,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
// now check with threads
for (int i = 0; i < 10; i++) {
final DirectoryReader r = DirectoryReader.open(dir);
final DirectoryReader r = maybeWrapWithMergingReader(DirectoryReader.open(dir));
final CountDownLatch startingGun = new CountDownLatch(1);
Thread[] threads = new Thread[TestUtil.nextInt(random(), 4, 10)];
for (int tid = 0; tid < threads.length; tid++) {
@ -2921,7 +2947,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
startingGun.await();
for (LeafReaderContext leaf : r.leaves()) {
DocValuesStatus status =
CheckIndex.testDocValues((SegmentReader) leaf.reader(), infoStream, true);
CheckIndex.testDocValues((CodecReader) leaf.reader(), infoStream, true);
if (status.error != null) {
throw status.error;
}
@ -2988,7 +3014,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
writer.close();
// Now search the index:
IndexReader reader = DirectoryReader.open(directory);
IndexReader reader = maybeWrapWithMergingReader(DirectoryReader.open(directory));
assert reader.leaves().size() == 1;
SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -3009,7 +3035,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
writer.close();
// Now search the index:
IndexReader reader = DirectoryReader.open(directory);
IndexReader reader = maybeWrapWithMergingReader(DirectoryReader.open(directory));
assert reader.leaves().size() == 1;
SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -3059,7 +3085,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
writer.close();
// Now search the index:
IndexReader reader = DirectoryReader.open(directory);
IndexReader reader = maybeWrapWithMergingReader(DirectoryReader.open(directory));
assert reader.leaves().size() == 1;
SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -3081,7 +3107,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
writer.close();
// Now search the index:
IndexReader reader = DirectoryReader.open(directory);
IndexReader reader = maybeWrapWithMergingReader(DirectoryReader.open(directory));
assert reader.leaves().size() == 1;
SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -3104,7 +3130,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
writer.close();
// Now search the index:
IndexReader reader = DirectoryReader.open(directory);
IndexReader reader = maybeWrapWithMergingReader(DirectoryReader.open(directory));
assert reader.leaves().size() == 1;
SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
@ -3133,7 +3159,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
writer.close();
// Now search the index:
IndexReader reader = DirectoryReader.open(directory);
IndexReader reader = maybeWrapWithMergingReader(DirectoryReader.open(directory));
assert reader.leaves().size() == 1;
SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv");
assertEquals(0, dv.nextDoc());

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.index;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.util.CloseableThreadLocal;
@ -45,6 +46,18 @@ public class MergingCodecReader extends FilterCodecReader {
}
}
};
private final CloseableThreadLocal<DocValuesProducer> docValuesReader =
new CloseableThreadLocal<DocValuesProducer>() {
@Override
protected DocValuesProducer initialValue() {
DocValuesProducer docValues = in.getDocValuesReader();
if (docValues == null) {
return null;
} else {
return docValues.getMergeInstance();
}
}
};
// TODO: other formats too
/** Wrap the given instance. */
@ -62,6 +75,11 @@ public class MergingCodecReader extends FilterCodecReader {
return normsReader.get();
}
@Override
public DocValuesProducer getDocValuesReader() {
return docValuesReader.get();
}
@Override
public CacheHelper getCoreCacheHelper() {
// same content, we can delegate