mirror of https://github.com/apache/lucene.git
LUCENE-5591: pass proper IOContext when writing DocValues updates
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1591469 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b8c02f6267
commit
2227cca025
|
@ -125,6 +125,9 @@ Optimizations
|
||||||
* LUCENE-5599: HttpReplicator did not properly delegate bulk read() to wrapped
|
* LUCENE-5599: HttpReplicator did not properly delegate bulk read() to wrapped
|
||||||
InputStream. (Christoph Kaser via Shai Erera)
|
InputStream. (Christoph Kaser via Shai Erera)
|
||||||
|
|
||||||
|
* LUCENE-5591: pass an IOContext with estimated flush size when applying DV
|
||||||
|
updates. (Shai Erera)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
* LUCENE-5600: HttpClientBase did not properly consume a connection if a server
|
* LUCENE-5600: HttpClientBase did not properly consume a connection if a server
|
||||||
|
|
|
@ -3,6 +3,7 @@ package org.apache.lucene.index;
|
||||||
import org.apache.lucene.document.BinaryDocValuesField;
|
import org.apache.lucene.document.BinaryDocValuesField;
|
||||||
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
|
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||||
|
@ -110,13 +111,15 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
private PagedGrowableWriter offsets, lengths;
|
private PagedGrowableWriter offsets, lengths;
|
||||||
private BytesRef values;
|
private BytesRef values;
|
||||||
private int size;
|
private int size;
|
||||||
|
private final int bitsPerValue;
|
||||||
|
|
||||||
public BinaryDocValuesFieldUpdates(String field, int maxDoc) {
|
public BinaryDocValuesFieldUpdates(String field, int maxDoc) {
|
||||||
super(field, Type.BINARY);
|
super(field, Type.BINARY);
|
||||||
docsWithField = new FixedBitSet(64);
|
docsWithField = new FixedBitSet(64);
|
||||||
docs = new PagedMutable(1, 1024, PackedInts.bitsRequired(maxDoc - 1), PackedInts.COMPACT);
|
bitsPerValue = PackedInts.bitsRequired(maxDoc - 1);
|
||||||
offsets = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST);
|
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
|
||||||
lengths = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST);
|
offsets = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
|
||||||
|
lengths = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
|
||||||
values = new BytesRef(16); // start small
|
values = new BytesRef(16); // start small
|
||||||
size = 0;
|
size = 0;
|
||||||
}
|
}
|
||||||
|
@ -222,7 +225,12 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
lengths.set(size, otherUpdates.lengths.get(i));
|
lengths.set(size, otherUpdates.lengths.get(i));
|
||||||
++size;
|
++size;
|
||||||
}
|
}
|
||||||
values.append(otherUpdates.values);
|
int newLen = values.length + otherUpdates.values.length;
|
||||||
|
if (values.bytes.length < newLen) {
|
||||||
|
values.bytes = ArrayUtil.grow(values.bytes, newLen);
|
||||||
|
}
|
||||||
|
System.arraycopy(otherUpdates.values.bytes, otherUpdates.values.offset, values.bytes, values.length, otherUpdates.values.length);
|
||||||
|
values.length = newLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -230,4 +238,14 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
return size > 0;
|
return size > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesPerDoc() {
|
||||||
|
long bytesPerDoc = (long) Math.ceil((double) (bitsPerValue + 1 /* docsWithField */) / 8); // docs
|
||||||
|
final int capacity = estimateCapacity(size);
|
||||||
|
bytesPerDoc += (long) Math.ceil((double) offsets.ramBytesUsed() / capacity); // offsets
|
||||||
|
bytesPerDoc += (long) Math.ceil((double) lengths.ramBytesUsed() / capacity); // lengths
|
||||||
|
bytesPerDoc += (long) Math.ceil((double) values.length / size); // values
|
||||||
|
return bytesPerDoc;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.index.NumericDocValuesFieldUpdates;
|
import org.apache.lucene.index.NumericDocValuesFieldUpdates;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.util.packed.PagedGrowableWriter;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -30,6 +31,8 @@ import org.apache.lucene.search.DocIdSetIterator;
|
||||||
*/
|
*/
|
||||||
abstract class DocValuesFieldUpdates {
|
abstract class DocValuesFieldUpdates {
|
||||||
|
|
||||||
|
protected static final int PAGE_SIZE = 1024;
|
||||||
|
|
||||||
static enum Type { NUMERIC, BINARY }
|
static enum Type { NUMERIC, BINARY }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -86,6 +89,17 @@ abstract class DocValuesFieldUpdates {
|
||||||
return numericDVUpdates.size() + binaryDVUpdates.size();
|
return numericDVUpdates.size() + binaryDVUpdates.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long ramBytesPerDoc() {
|
||||||
|
long ramBytesPerDoc = 0;
|
||||||
|
for (NumericDocValuesFieldUpdates updates : numericDVUpdates.values()) {
|
||||||
|
ramBytesPerDoc += updates.ramBytesPerDoc();
|
||||||
|
}
|
||||||
|
for (BinaryDocValuesFieldUpdates updates : binaryDVUpdates.values()) {
|
||||||
|
ramBytesPerDoc += updates.ramBytesPerDoc();
|
||||||
|
}
|
||||||
|
return ramBytesPerDoc;
|
||||||
|
}
|
||||||
|
|
||||||
DocValuesFieldUpdates getUpdates(String field, Type type) {
|
DocValuesFieldUpdates getUpdates(String field, Type type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case NUMERIC:
|
case NUMERIC:
|
||||||
|
@ -128,6 +142,14 @@ abstract class DocValuesFieldUpdates {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the estimated capacity of a {@link PagedGrowableWriter} given the
|
||||||
|
* actual number of stored elements.
|
||||||
|
*/
|
||||||
|
protected static int estimateCapacity(int size) {
|
||||||
|
return (int) Math.ceil((double) size / PAGE_SIZE) * PAGE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add an update to a document. For unsetting a value you should pass
|
* Add an update to a document. For unsetting a value you should pass
|
||||||
* {@code null}.
|
* {@code null}.
|
||||||
|
@ -147,8 +169,10 @@ abstract class DocValuesFieldUpdates {
|
||||||
*/
|
*/
|
||||||
public abstract void merge(DocValuesFieldUpdates other);
|
public abstract void merge(DocValuesFieldUpdates other);
|
||||||
|
|
||||||
/** Returns true if this instance contains any updates.
|
/** Returns true if this instance contains any updates. */
|
||||||
* @return TODO*/
|
|
||||||
public abstract boolean any();
|
public abstract boolean any();
|
||||||
|
|
||||||
|
/** Returns approximate RAM bytes used per document. */
|
||||||
|
public abstract long ramBytesPerDoc();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,6 +88,7 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final int bitsPerValue;
|
||||||
private FixedBitSet docsWithField;
|
private FixedBitSet docsWithField;
|
||||||
private PagedMutable docs;
|
private PagedMutable docs;
|
||||||
private PagedGrowableWriter values;
|
private PagedGrowableWriter values;
|
||||||
|
@ -96,8 +97,9 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
public NumericDocValuesFieldUpdates(String field, int maxDoc) {
|
public NumericDocValuesFieldUpdates(String field, int maxDoc) {
|
||||||
super(field, Type.NUMERIC);
|
super(field, Type.NUMERIC);
|
||||||
docsWithField = new FixedBitSet(64);
|
docsWithField = new FixedBitSet(64);
|
||||||
docs = new PagedMutable(1, 1024, PackedInts.bitsRequired(maxDoc - 1), PackedInts.COMPACT);
|
bitsPerValue = PackedInts.bitsRequired(maxDoc - 1);
|
||||||
values = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST);
|
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
|
||||||
|
values = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
|
||||||
size = 0;
|
size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -198,4 +200,12 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
||||||
return size > 0;
|
return size > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesPerDoc() {
|
||||||
|
long bytesPerDoc = (long) Math.ceil((double) (bitsPerValue + 1 /* docsWithField */) / 8);
|
||||||
|
final int capacity = estimateCapacity(size);
|
||||||
|
bytesPerDoc += (long) Math.ceil((double) values.ramBytesUsed() / capacity); // values
|
||||||
|
return bytesPerDoc;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
import org.apache.lucene.document.BinaryDocValuesField;
|
import org.apache.lucene.document.BinaryDocValuesField;
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FlushInfo;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.TrackingDirectoryWrapper;
|
import org.apache.lucene.store.TrackingDirectoryWrapper;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
@ -342,7 +343,9 @@ class ReadersAndUpdates {
|
||||||
fieldInfos = builder.finish();
|
fieldInfos = builder.finish();
|
||||||
final long nextFieldInfosGen = info.getNextFieldInfosGen();
|
final long nextFieldInfosGen = info.getNextFieldInfosGen();
|
||||||
final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX);
|
final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX);
|
||||||
final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, IOContext.DEFAULT, segmentSuffix);
|
final long estUpdatesSize = dvUpdates.ramBytesPerDoc() * info.info.getDocCount();
|
||||||
|
final IOContext updatesContext = new IOContext(new FlushInfo(info.info.getDocCount(), estUpdatesSize));
|
||||||
|
final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix);
|
||||||
final DocValuesFormat docValuesFormat = codec.docValuesFormat();
|
final DocValuesFormat docValuesFormat = codec.docValuesFormat();
|
||||||
final DocValuesConsumer fieldsConsumer = docValuesFormat.fieldsConsumer(state);
|
final DocValuesConsumer fieldsConsumer = docValuesFormat.fieldsConsumer(state);
|
||||||
boolean fieldsConsumerSuccess = false;
|
boolean fieldsConsumerSuccess = false;
|
||||||
|
@ -465,7 +468,12 @@ class ReadersAndUpdates {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
codec.fieldInfosFormat().getFieldInfosWriter().write(trackingDir, info.info.name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
|
// we write approximately that many bytes (based on Lucene46DVF):
|
||||||
|
// HEADER + FOOTER: 40
|
||||||
|
// 90 bytes per-field (over estimating long name and attributes map)
|
||||||
|
final long estInfosSize = 40 + 90 * fieldInfos.size();
|
||||||
|
final IOContext infosContext = new IOContext(new FlushInfo(info.info.getDocCount(), estInfosSize));
|
||||||
|
codec.fieldInfosFormat().getFieldInfosWriter().write(trackingDir, info.info.name, segmentSuffix, fieldInfos, infosContext);
|
||||||
fieldsConsumerSuccess = true;
|
fieldsConsumerSuccess = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (fieldsConsumerSuccess) {
|
if (fieldsConsumerSuccess) {
|
||||||
|
|
|
@ -27,12 +27,14 @@ import org.apache.lucene.document.SortedDocValuesField;
|
||||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.NRTCachingDirectory;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
|
|
||||||
|
@ -54,6 +56,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@SuppressCodecs({"Lucene40","Lucene41","Lucene42","Lucene45"})
|
@SuppressCodecs({"Lucene40","Lucene41","Lucene42","Lucene45"})
|
||||||
|
@SuppressWarnings("resource")
|
||||||
public class TestBinaryDocValuesUpdates extends LuceneTestCase {
|
public class TestBinaryDocValuesUpdates extends LuceneTestCase {
|
||||||
|
|
||||||
static long getValue(BinaryDocValues bdv, int idx, BytesRef scratch) {
|
static long getValue(BinaryDocValues bdv, int idx, BytesRef scratch) {
|
||||||
|
@ -1450,4 +1453,30 @@ public class TestBinaryDocValuesUpdates extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIOContext() throws Exception {
|
||||||
|
// LUCENE-5591: make sure we pass an IOContext with an approximate
|
||||||
|
// segmentSize in FlushInfo
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||||
|
// we want a single large enough segment so that a doc-values update writes a large file
|
||||||
|
conf.setMergePolicy(NoMergePolicy.INSTANCE);
|
||||||
|
conf.setMaxBufferedDocs(Integer.MAX_VALUE); // manually flush
|
||||||
|
conf.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
|
||||||
|
IndexWriter writer = new IndexWriter(dir, conf.clone());
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
writer.addDocument(doc(i));
|
||||||
|
}
|
||||||
|
writer.commit();
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
NRTCachingDirectory cachingDir = new NRTCachingDirectory(dir, 100, 1/(1024.*1024.));
|
||||||
|
writer = new IndexWriter(cachingDir, conf.clone());
|
||||||
|
writer.updateBinaryDocValue(new Term("id", "doc-0"), "val", toBytes(100L));
|
||||||
|
DirectoryReader reader = DirectoryReader.open(writer, true); // flush
|
||||||
|
assertEquals(0, cachingDir.listCachedFiles().length);
|
||||||
|
|
||||||
|
IOUtils.close(reader, writer, cachingDir);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.document.SortedDocValuesField;
|
||||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.NRTCachingDirectory;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
@ -55,6 +56,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@SuppressCodecs({"Lucene40","Lucene41","Lucene42","Lucene45"})
|
@SuppressCodecs({"Lucene40","Lucene41","Lucene42","Lucene45"})
|
||||||
|
@SuppressWarnings("resource")
|
||||||
public class TestNumericDocValuesUpdates extends LuceneTestCase {
|
public class TestNumericDocValuesUpdates extends LuceneTestCase {
|
||||||
|
|
||||||
private Document doc(int id) {
|
private Document doc(int id) {
|
||||||
|
@ -1431,4 +1433,31 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIOContext() throws Exception {
|
||||||
|
// LUCENE-5591: make sure we pass an IOContext with an approximate
|
||||||
|
// segmentSize in FlushInfo
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||||
|
// we want a single large enough segment so that a doc-values update writes a large file
|
||||||
|
conf.setMergePolicy(NoMergePolicy.INSTANCE);
|
||||||
|
conf.setMaxBufferedDocs(Integer.MAX_VALUE); // manually flush
|
||||||
|
conf.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
|
||||||
|
IndexWriter writer = new IndexWriter(dir, conf.clone());
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
writer.addDocument(doc(i));
|
||||||
|
}
|
||||||
|
writer.commit();
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
NRTCachingDirectory cachingDir = new NRTCachingDirectory(dir, 100, 1/(1024.*1024.));
|
||||||
|
writer = new IndexWriter(cachingDir, conf.clone());
|
||||||
|
writer.updateNumericDocValue(new Term("id", "doc-0"), "val", 100L);
|
||||||
|
DirectoryReader reader = DirectoryReader.open(writer, true); // flush
|
||||||
|
assertEquals(0, cachingDir.listCachedFiles().length);
|
||||||
|
for (String f : cachingDir.listAll()) System.out.println(f + " " + cachingDir.fileLength(f));
|
||||||
|
|
||||||
|
IOUtils.close(reader, writer, cachingDir);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue