mirror of https://github.com/apache/lucene.git
LUCENE-6906: Fix Lucene54DocValuesFormat on large empty segments.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1715918 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
60bb927256
commit
8eea72d476
|
@ -319,7 +319,7 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
|
||||||
// sparse bits need a bit more metadata
|
// sparse bits need a bit more metadata
|
||||||
entry.numDocsWithValue = meta.readVLong();
|
entry.numDocsWithValue = meta.readVLong();
|
||||||
final int blockShift = meta.readVInt();
|
final int blockShift = meta.readVInt();
|
||||||
entry.monotonicMeta = DirectMonotonicReader.loadMeta(meta, entry.numDocsWithValue + 1, blockShift);
|
entry.monotonicMeta = DirectMonotonicReader.loadMeta(meta, entry.numDocsWithValue, blockShift);
|
||||||
ramBytesUsed.addAndGet(entry.monotonicMeta.ramBytesUsed());
|
ramBytesUsed.addAndGet(entry.monotonicMeta.ramBytesUsed());
|
||||||
directAddressesMeta.put(info.name, entry.monotonicMeta);
|
directAddressesMeta.put(info.name, entry.monotonicMeta);
|
||||||
}
|
}
|
||||||
|
|
|
@ -123,7 +123,9 @@ public final class DirectMonotonicWriter {
|
||||||
if (finished) {
|
if (finished) {
|
||||||
throw new IllegalStateException("#finish has been called already");
|
throw new IllegalStateException("#finish has been called already");
|
||||||
}
|
}
|
||||||
|
if (bufferSize > 0) {
|
||||||
flush();
|
flush();
|
||||||
|
}
|
||||||
finished = true;
|
finished = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.codecs.lucene54;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -24,6 +25,7 @@ import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
|
@ -138,7 +140,7 @@ public class TestLucene54DocValuesFormat extends BaseCompressingDocValuesFormatT
|
||||||
|
|
||||||
@Slow
|
@Slow
|
||||||
public void testSparseDocValuesVsStoredFields() throws Exception {
|
public void testSparseDocValuesVsStoredFields() throws Exception {
|
||||||
int numIterations = atLeast(2);
|
int numIterations = atLeast(1);
|
||||||
for (int i = 0; i < numIterations; i++) {
|
for (int i = 0; i < numIterations; i++) {
|
||||||
doTestSparseDocValuesVsStoredFields();
|
doTestSparseDocValuesVsStoredFields();
|
||||||
}
|
}
|
||||||
|
@ -158,7 +160,7 @@ public class TestLucene54DocValuesFormat extends BaseCompressingDocValuesFormatT
|
||||||
// sparse compression is only enabled if less than 1% of docs have a value
|
// sparse compression is only enabled if less than 1% of docs have a value
|
||||||
final int avgGap = 100;
|
final int avgGap = 100;
|
||||||
|
|
||||||
final int numDocs = atLeast(100);
|
final int numDocs = atLeast(200);
|
||||||
for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
|
for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
|
||||||
writer.addDocument(new Document());
|
writer.addDocument(new Document());
|
||||||
}
|
}
|
||||||
|
@ -185,7 +187,7 @@ public class TestLucene54DocValuesFormat extends BaseCompressingDocValuesFormatT
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
|
|
||||||
// add a gap
|
// add a gap
|
||||||
for (int j = random().nextInt(avgGap * 2); j >= 0; --j) {
|
for (int j = TestUtil.nextInt(random(), 0, avgGap * 2); j >= 0; --j) {
|
||||||
writer.addDocument(new Document());
|
writer.addDocument(new Document());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -502,4 +504,5 @@ public class TestLucene54DocValuesFormat extends BaseCompressingDocValuesFormatT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,28 @@ import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
public class TestDirectMonotonic extends LuceneTestCase {
|
public class TestDirectMonotonic extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testEmpty() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
final int blockShift = TestUtil.nextInt(random(), DirectMonotonicWriter.MIN_BLOCK_SHIFT, DirectMonotonicWriter.MAX_BLOCK_SHIFT);
|
||||||
|
|
||||||
|
final long dataLength;
|
||||||
|
try (IndexOutput metaOut = dir.createOutput("meta", IOContext.DEFAULT);
|
||||||
|
IndexOutput dataOut = dir.createOutput("data", IOContext.DEFAULT)) {
|
||||||
|
DirectMonotonicWriter w = DirectMonotonicWriter.getInstance(metaOut, dataOut, 0, blockShift);
|
||||||
|
w.finish();
|
||||||
|
dataLength = dataOut.getFilePointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
try (IndexInput metaIn = dir.openInput("meta", IOContext.READONCE);
|
||||||
|
IndexInput dataIn = dir.openInput("data", IOContext.DEFAULT)) {
|
||||||
|
DirectMonotonicReader.Meta meta = DirectMonotonicReader.loadMeta(metaIn, 0, blockShift);
|
||||||
|
DirectMonotonicReader.getInstance(meta, dataIn.randomAccessSlice(0, dataLength));
|
||||||
|
// no exception
|
||||||
|
}
|
||||||
|
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
public void testSimple() throws IOException {
|
public void testSimple() throws IOException {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
final int blockShift = 2;
|
final int blockShift = 2;
|
||||||
|
@ -100,12 +122,25 @@ public class TestDirectMonotonic extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandom() throws IOException {
|
public void testRandom() throws IOException {
|
||||||
|
final int iters = atLeast(3);
|
||||||
|
for (int iter = 0; iter < iters; ++iter) {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
final int blockShift = TestUtil.nextInt(random(), DirectMonotonicWriter.MIN_BLOCK_SHIFT, DirectMonotonicWriter.MAX_BLOCK_SHIFT);
|
final int blockShift = TestUtil.nextInt(random(), DirectMonotonicWriter.MIN_BLOCK_SHIFT, DirectMonotonicWriter.MAX_BLOCK_SHIFT);
|
||||||
final int numValues = TestUtil.nextInt(random(), 1, 1 << 20);
|
final int maxNumValues = 1 << 20;
|
||||||
|
final int numValues;
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
// random number
|
||||||
|
numValues = TestUtil.nextInt(random(), 1, maxNumValues);
|
||||||
|
} else {
|
||||||
|
// multiple of the block size
|
||||||
|
final int numBlocks = TestUtil.nextInt(random(), 0, maxNumValues >>> blockShift);
|
||||||
|
numValues = TestUtil.nextInt(random(), 0, numBlocks) << blockShift;
|
||||||
|
}
|
||||||
List<Long> actualValues = new ArrayList<>();
|
List<Long> actualValues = new ArrayList<>();
|
||||||
long previous = random().nextLong();
|
long previous = random().nextLong();
|
||||||
|
if (numValues > 0) {
|
||||||
actualValues.add(previous);
|
actualValues.add(previous);
|
||||||
|
}
|
||||||
for (int i = 1; i < numValues; ++i) {
|
for (int i = 1; i < numValues; ++i) {
|
||||||
previous += random().nextInt(1 << random().nextInt(20));
|
previous += random().nextInt(1 << random().nextInt(20));
|
||||||
actualValues.add(previous);
|
actualValues.add(previous);
|
||||||
|
@ -133,5 +168,6 @@ public class TestDirectMonotonic extends LuceneTestCase {
|
||||||
|
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -3148,6 +3148,172 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
||||||
assertEquals(term2.get(), enum2.term());
|
assertEquals(term2.get(), enum2.term());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// same as testSortedMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
|
||||||
|
public void testSortedMergeAwayAllValuesLargeSegment() throws IOException {
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||||
|
doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
final int numEmptyDocs = atLeast(1024);
|
||||||
|
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||||
|
iwriter.addDocument(new Document());
|
||||||
|
}
|
||||||
|
iwriter.commit();
|
||||||
|
iwriter.deleteDocuments(new Term("id", "1"));
|
||||||
|
iwriter.forceMerge(1);
|
||||||
|
|
||||||
|
DirectoryReader ireader = iwriter.getReader();
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
|
||||||
|
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||||
|
assertEquals(-1, dv.getOrd(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// same as testSortedSetMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
|
||||||
|
public void testSortedSetMergeAwayAllValuesLargeSegment() throws IOException {
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||||
|
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
final int numEmptyDocs = atLeast(1024);
|
||||||
|
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||||
|
iwriter.addDocument(new Document());
|
||||||
|
}
|
||||||
|
iwriter.commit();
|
||||||
|
iwriter.deleteDocuments(new Term("id", "1"));
|
||||||
|
iwriter.forceMerge(1);
|
||||||
|
|
||||||
|
DirectoryReader ireader = iwriter.getReader();
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||||
|
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||||
|
dv.setDocument(i);
|
||||||
|
assertEquals(-1L, dv.nextOrd());
|
||||||
|
}
|
||||||
|
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// same as testNumericMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
|
||||||
|
public void testNumericMergeAwayAllValuesLargeSegment() throws IOException {
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||||
|
doc.add(new NumericDocValuesField("field", 42L));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
final int numEmptyDocs = atLeast(1024);
|
||||||
|
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||||
|
iwriter.addDocument(new Document());
|
||||||
|
}
|
||||||
|
iwriter.commit();
|
||||||
|
iwriter.deleteDocuments(new Term("id", "1"));
|
||||||
|
iwriter.forceMerge(1);
|
||||||
|
|
||||||
|
DirectoryReader ireader = iwriter.getReader();
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
NumericDocValues dv = getOnlySegmentReader(ireader).getNumericDocValues("field");
|
||||||
|
Bits docsWithField = getOnlySegmentReader(ireader).getDocsWithField("field");
|
||||||
|
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||||
|
assertEquals(0, dv.get(i));
|
||||||
|
assertFalse(docsWithField.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// same as testSortedNumericMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
|
||||||
|
public void testSortedNumericMergeAwayAllValuesLargeSegment() throws IOException {
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||||
|
doc.add(new SortedNumericDocValuesField("field", 42L));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
final int numEmptyDocs = atLeast(1024);
|
||||||
|
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||||
|
iwriter.addDocument(new Document());
|
||||||
|
}
|
||||||
|
iwriter.commit();
|
||||||
|
iwriter.deleteDocuments(new Term("id", "1"));
|
||||||
|
iwriter.forceMerge(1);
|
||||||
|
|
||||||
|
DirectoryReader ireader = iwriter.getReader();
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
SortedNumericDocValues dv = getOnlySegmentReader(ireader).getSortedNumericDocValues("field");
|
||||||
|
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||||
|
dv.setDocument(i);
|
||||||
|
assertEquals(0, dv.count());
|
||||||
|
}
|
||||||
|
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// same as testBinaryMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
|
||||||
|
public void testBinaryMergeAwayAllValuesLargeSegment() throws IOException {
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||||
|
doc.add(new BinaryDocValuesField("field", new BytesRef("hello")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
final int numEmptyDocs = atLeast(1024);
|
||||||
|
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||||
|
iwriter.addDocument(new Document());
|
||||||
|
}
|
||||||
|
iwriter.commit();
|
||||||
|
iwriter.deleteDocuments(new Term("id", "1"));
|
||||||
|
iwriter.forceMerge(1);
|
||||||
|
|
||||||
|
DirectoryReader ireader = iwriter.getReader();
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
BinaryDocValues dv = getOnlySegmentReader(ireader).getBinaryDocValues("field");
|
||||||
|
Bits docsWithField = getOnlySegmentReader(ireader).getDocsWithField("field");
|
||||||
|
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||||
|
assertEquals(new BytesRef(), dv.get(i));
|
||||||
|
assertFalse(docsWithField.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
protected boolean codecAcceptsHugeBinaryValues(String field) {
|
protected boolean codecAcceptsHugeBinaryValues(String field) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue