mirror of https://github.com/apache/lucene.git
LUCENE-5591: pass proper IOContext when writing DocValues updates
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1591469 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b8c02f6267
commit
2227cca025
|
@ -125,6 +125,9 @@ Optimizations
|
|||
* LUCENE-5599: HttpReplicator did not properly delegate bulk read() to wrapped
|
||||
InputStream. (Christoph Kaser via Shai Erera)
|
||||
|
||||
* LUCENE-5591: pass an IOContext with estimated flush size when applying DV
|
||||
updates. (Shai Erera)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-5600: HttpClientBase did not properly consume a connection if a server
|
||||
|
|
|
@ -3,6 +3,7 @@ package org.apache.lucene.index;
|
|||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
|
@ -110,13 +111,15 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
|||
private PagedGrowableWriter offsets, lengths;
|
||||
private BytesRef values;
|
||||
private int size;
|
||||
private final int bitsPerValue;
|
||||
|
||||
public BinaryDocValuesFieldUpdates(String field, int maxDoc) {
|
||||
super(field, Type.BINARY);
|
||||
docsWithField = new FixedBitSet(64);
|
||||
docs = new PagedMutable(1, 1024, PackedInts.bitsRequired(maxDoc - 1), PackedInts.COMPACT);
|
||||
offsets = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST);
|
||||
lengths = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST);
|
||||
bitsPerValue = PackedInts.bitsRequired(maxDoc - 1);
|
||||
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
|
||||
offsets = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
|
||||
lengths = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
|
||||
values = new BytesRef(16); // start small
|
||||
size = 0;
|
||||
}
|
||||
|
@ -222,7 +225,12 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
|||
lengths.set(size, otherUpdates.lengths.get(i));
|
||||
++size;
|
||||
}
|
||||
values.append(otherUpdates.values);
|
||||
int newLen = values.length + otherUpdates.values.length;
|
||||
if (values.bytes.length < newLen) {
|
||||
values.bytes = ArrayUtil.grow(values.bytes, newLen);
|
||||
}
|
||||
System.arraycopy(otherUpdates.values.bytes, otherUpdates.values.offset, values.bytes, values.length, otherUpdates.values.length);
|
||||
values.length = newLen;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -230,4 +238,14 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
|||
return size > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesPerDoc() {
|
||||
long bytesPerDoc = (long) Math.ceil((double) (bitsPerValue + 1 /* docsWithField */) / 8); // docs
|
||||
final int capacity = estimateCapacity(size);
|
||||
bytesPerDoc += (long) Math.ceil((double) offsets.ramBytesUsed() / capacity); // offsets
|
||||
bytesPerDoc += (long) Math.ceil((double) lengths.ramBytesUsed() / capacity); // lengths
|
||||
bytesPerDoc += (long) Math.ceil((double) values.length / size); // values
|
||||
return bytesPerDoc;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.index.NumericDocValuesFieldUpdates;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.packed.PagedGrowableWriter;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -30,6 +31,8 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
*/
|
||||
abstract class DocValuesFieldUpdates {
|
||||
|
||||
protected static final int PAGE_SIZE = 1024;
|
||||
|
||||
static enum Type { NUMERIC, BINARY }
|
||||
|
||||
/**
|
||||
|
@ -86,6 +89,17 @@ abstract class DocValuesFieldUpdates {
|
|||
return numericDVUpdates.size() + binaryDVUpdates.size();
|
||||
}
|
||||
|
||||
long ramBytesPerDoc() {
|
||||
long ramBytesPerDoc = 0;
|
||||
for (NumericDocValuesFieldUpdates updates : numericDVUpdates.values()) {
|
||||
ramBytesPerDoc += updates.ramBytesPerDoc();
|
||||
}
|
||||
for (BinaryDocValuesFieldUpdates updates : binaryDVUpdates.values()) {
|
||||
ramBytesPerDoc += updates.ramBytesPerDoc();
|
||||
}
|
||||
return ramBytesPerDoc;
|
||||
}
|
||||
|
||||
DocValuesFieldUpdates getUpdates(String field, Type type) {
|
||||
switch (type) {
|
||||
case NUMERIC:
|
||||
|
@ -128,6 +142,14 @@ abstract class DocValuesFieldUpdates {
|
|||
this.type = type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the estimated capacity of a {@link PagedGrowableWriter} given the
|
||||
* actual number of stored elements.
|
||||
*/
|
||||
protected static int estimateCapacity(int size) {
|
||||
return (int) Math.ceil((double) size / PAGE_SIZE) * PAGE_SIZE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an update to a document. For unsetting a value you should pass
|
||||
* {@code null}.
|
||||
|
@ -147,8 +169,10 @@ abstract class DocValuesFieldUpdates {
|
|||
*/
|
||||
public abstract void merge(DocValuesFieldUpdates other);
|
||||
|
||||
/** Returns true if this instance contains any updates.
|
||||
* @return TODO*/
|
||||
/** Returns true if this instance contains any updates. */
|
||||
public abstract boolean any();
|
||||
|
||||
/** Returns approximate RAM bytes used per document. */
|
||||
public abstract long ramBytesPerDoc();
|
||||
|
||||
}
|
||||
|
|
|
@ -88,6 +88,7 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
|||
}
|
||||
}
|
||||
|
||||
private final int bitsPerValue;
|
||||
private FixedBitSet docsWithField;
|
||||
private PagedMutable docs;
|
||||
private PagedGrowableWriter values;
|
||||
|
@ -96,8 +97,9 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
|||
public NumericDocValuesFieldUpdates(String field, int maxDoc) {
|
||||
super(field, Type.NUMERIC);
|
||||
docsWithField = new FixedBitSet(64);
|
||||
docs = new PagedMutable(1, 1024, PackedInts.bitsRequired(maxDoc - 1), PackedInts.COMPACT);
|
||||
values = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST);
|
||||
bitsPerValue = PackedInts.bitsRequired(maxDoc - 1);
|
||||
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
|
||||
values = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
|
||||
size = 0;
|
||||
}
|
||||
|
||||
|
@ -198,4 +200,12 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
|
|||
return size > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesPerDoc() {
|
||||
long bytesPerDoc = (long) Math.ceil((double) (bitsPerValue + 1 /* docsWithField */) / 8);
|
||||
final int capacity = estimateCapacity(size);
|
||||
bytesPerDoc += (long) Math.ceil((double) values.ramBytesUsed() / capacity); // values
|
||||
return bytesPerDoc;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.codecs.LiveDocsFormat;
|
|||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FlushInfo;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.TrackingDirectoryWrapper;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -342,7 +343,9 @@ class ReadersAndUpdates {
|
|||
fieldInfos = builder.finish();
|
||||
final long nextFieldInfosGen = info.getNextFieldInfosGen();
|
||||
final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX);
|
||||
final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, IOContext.DEFAULT, segmentSuffix);
|
||||
final long estUpdatesSize = dvUpdates.ramBytesPerDoc() * info.info.getDocCount();
|
||||
final IOContext updatesContext = new IOContext(new FlushInfo(info.info.getDocCount(), estUpdatesSize));
|
||||
final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix);
|
||||
final DocValuesFormat docValuesFormat = codec.docValuesFormat();
|
||||
final DocValuesConsumer fieldsConsumer = docValuesFormat.fieldsConsumer(state);
|
||||
boolean fieldsConsumerSuccess = false;
|
||||
|
@ -465,8 +468,13 @@ class ReadersAndUpdates {
|
|||
});
|
||||
}
|
||||
|
||||
codec.fieldInfosFormat().getFieldInfosWriter().write(trackingDir, info.info.name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
|
||||
fieldsConsumerSuccess = true;
|
||||
// we write approximately that many bytes (based on Lucene46DVF):
|
||||
// HEADER + FOOTER: 40
|
||||
// 90 bytes per-field (over estimating long name and attributes map)
|
||||
final long estInfosSize = 40 + 90 * fieldInfos.size();
|
||||
final IOContext infosContext = new IOContext(new FlushInfo(info.info.getDocCount(), estInfosSize));
|
||||
codec.fieldInfosFormat().getFieldInfosWriter().write(trackingDir, info.info.name, segmentSuffix, fieldInfos, infosContext);
|
||||
fieldsConsumerSuccess = true;
|
||||
} finally {
|
||||
if (fieldsConsumerSuccess) {
|
||||
fieldsConsumer.close();
|
||||
|
|
|
@ -27,12 +27,14 @@ import org.apache.lucene.document.SortedDocValuesField;
|
|||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.NRTCachingDirectory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
|
||||
|
@ -54,6 +56,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
|||
*/
|
||||
|
||||
@SuppressCodecs({"Lucene40","Lucene41","Lucene42","Lucene45"})
|
||||
@SuppressWarnings("resource")
|
||||
public class TestBinaryDocValuesUpdates extends LuceneTestCase {
|
||||
|
||||
static long getValue(BinaryDocValues bdv, int idx, BytesRef scratch) {
|
||||
|
@ -1450,4 +1453,30 @@ public class TestBinaryDocValuesUpdates extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIOContext() throws Exception {
|
||||
// LUCENE-5591: make sure we pass an IOContext with an approximate
|
||||
// segmentSize in FlushInfo
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
// we want a single large enough segment so that a doc-values update writes a large file
|
||||
conf.setMergePolicy(NoMergePolicy.INSTANCE);
|
||||
conf.setMaxBufferedDocs(Integer.MAX_VALUE); // manually flush
|
||||
conf.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
|
||||
IndexWriter writer = new IndexWriter(dir, conf.clone());
|
||||
for (int i = 0; i < 100; i++) {
|
||||
writer.addDocument(doc(i));
|
||||
}
|
||||
writer.commit();
|
||||
writer.close();
|
||||
|
||||
NRTCachingDirectory cachingDir = new NRTCachingDirectory(dir, 100, 1/(1024.*1024.));
|
||||
writer = new IndexWriter(cachingDir, conf.clone());
|
||||
writer.updateBinaryDocValue(new Term("id", "doc-0"), "val", toBytes(100L));
|
||||
DirectoryReader reader = DirectoryReader.open(writer, true); // flush
|
||||
assertEquals(0, cachingDir.listCachedFiles().length);
|
||||
|
||||
IOUtils.close(reader, writer, cachingDir);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.document.SortedDocValuesField;
|
|||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.NRTCachingDirectory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -55,6 +56,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
|||
*/
|
||||
|
||||
@SuppressCodecs({"Lucene40","Lucene41","Lucene42","Lucene45"})
|
||||
@SuppressWarnings("resource")
|
||||
public class TestNumericDocValuesUpdates extends LuceneTestCase {
|
||||
|
||||
private Document doc(int id) {
|
||||
|
@ -1431,4 +1433,31 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIOContext() throws Exception {
|
||||
// LUCENE-5591: make sure we pass an IOContext with an approximate
|
||||
// segmentSize in FlushInfo
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
// we want a single large enough segment so that a doc-values update writes a large file
|
||||
conf.setMergePolicy(NoMergePolicy.INSTANCE);
|
||||
conf.setMaxBufferedDocs(Integer.MAX_VALUE); // manually flush
|
||||
conf.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
|
||||
IndexWriter writer = new IndexWriter(dir, conf.clone());
|
||||
for (int i = 0; i < 100; i++) {
|
||||
writer.addDocument(doc(i));
|
||||
}
|
||||
writer.commit();
|
||||
writer.close();
|
||||
|
||||
NRTCachingDirectory cachingDir = new NRTCachingDirectory(dir, 100, 1/(1024.*1024.));
|
||||
writer = new IndexWriter(cachingDir, conf.clone());
|
||||
writer.updateNumericDocValue(new Term("id", "doc-0"), "val", 100L);
|
||||
DirectoryReader reader = DirectoryReader.open(writer, true); // flush
|
||||
assertEquals(0, cachingDir.listCachedFiles().length);
|
||||
for (String f : cachingDir.listAll()) System.out.println(f + " " + cachingDir.fileLength(f));
|
||||
|
||||
IOUtils.close(reader, writer, cachingDir);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue