LUCENE-5591: pass proper IOContext when writing DocValues updates

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1591469 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2014-04-30 20:14:20 +00:00
parent b8c02f6267
commit 2227cca025
7 changed files with 134 additions and 13 deletions

View File

@ -124,6 +124,9 @@ Optimizations
* LUCENE-5599: HttpReplicator did not properly delegate bulk read() to wrapped
InputStream. (Christoph Kaser via Shai Erera)
* LUCENE-5591: pass an IOContext with estimated flush size when applying DV
updates. (Shai Erera)
Bug fixes

View File

@ -3,6 +3,7 @@ package org.apache.lucene.index;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.InPlaceMergeSorter;
@ -110,13 +111,15 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
private PagedGrowableWriter offsets, lengths;
private BytesRef values;
private int size;
private final int bitsPerValue;
public BinaryDocValuesFieldUpdates(String field, int maxDoc) {
super(field, Type.BINARY);
docsWithField = new FixedBitSet(64);
docs = new PagedMutable(1, 1024, PackedInts.bitsRequired(maxDoc - 1), PackedInts.COMPACT);
offsets = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST);
lengths = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST);
bitsPerValue = PackedInts.bitsRequired(maxDoc - 1);
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
offsets = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
lengths = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
values = new BytesRef(16); // start small
size = 0;
}
@ -222,12 +225,27 @@ class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
lengths.set(size, otherUpdates.lengths.get(i));
++size;
}
values.append(otherUpdates.values);
int newLen = values.length + otherUpdates.values.length;
if (values.bytes.length < newLen) {
values.bytes = ArrayUtil.grow(values.bytes, newLen);
}
System.arraycopy(otherUpdates.values.bytes, otherUpdates.values.offset, values.bytes, values.length, otherUpdates.values.length);
values.length = newLen;
}
@Override
public boolean any() {
return size > 0;
}
@Override
public long ramBytesPerDoc() {
long bytesPerDoc = (long) Math.ceil((double) (bitsPerValue + 1 /* docsWithField */) / 8); // docs
final int capacity = estimateCapacity(size);
bytesPerDoc += (long) Math.ceil((double) offsets.ramBytesUsed() / capacity); // offsets
bytesPerDoc += (long) Math.ceil((double) lengths.ramBytesUsed() / capacity); // lengths
bytesPerDoc += (long) Math.ceil((double) values.length / size); // values
return bytesPerDoc;
}
}

View File

@ -5,6 +5,7 @@ import java.util.Map;
import org.apache.lucene.index.NumericDocValuesFieldUpdates;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.packed.PagedGrowableWriter;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -30,6 +31,8 @@ import org.apache.lucene.search.DocIdSetIterator;
*/
abstract class DocValuesFieldUpdates {
protected static final int PAGE_SIZE = 1024;
static enum Type { NUMERIC, BINARY }
/**
@ -86,6 +89,17 @@ abstract class DocValuesFieldUpdates {
return numericDVUpdates.size() + binaryDVUpdates.size();
}
long ramBytesPerDoc() {
long ramBytesPerDoc = 0;
for (NumericDocValuesFieldUpdates updates : numericDVUpdates.values()) {
ramBytesPerDoc += updates.ramBytesPerDoc();
}
for (BinaryDocValuesFieldUpdates updates : binaryDVUpdates.values()) {
ramBytesPerDoc += updates.ramBytesPerDoc();
}
return ramBytesPerDoc;
}
DocValuesFieldUpdates getUpdates(String field, Type type) {
switch (type) {
case NUMERIC:
@ -128,6 +142,14 @@ abstract class DocValuesFieldUpdates {
this.type = type;
}
/**
* Returns the estimated capacity of a {@link PagedGrowableWriter} given the
* actual number of stored elements.
*/
protected static int estimateCapacity(int size) {
return (int) Math.ceil((double) size / PAGE_SIZE) * PAGE_SIZE;
}
/**
* Add an update to a document. For unsetting a value you should pass
* {@code null}.
@ -147,8 +169,10 @@ abstract class DocValuesFieldUpdates {
*/
public abstract void merge(DocValuesFieldUpdates other);
/** Returns true if this instance contains any updates.
* @return TODO*/
/** Returns true if this instance contains any updates. */
public abstract boolean any();
/** Returns approximate RAM bytes used per document. */
public abstract long ramBytesPerDoc();
}

View File

@ -88,6 +88,7 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
}
}
private final int bitsPerValue;
private FixedBitSet docsWithField;
private PagedMutable docs;
private PagedGrowableWriter values;
@ -96,8 +97,9 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
public NumericDocValuesFieldUpdates(String field, int maxDoc) {
super(field, Type.NUMERIC);
docsWithField = new FixedBitSet(64);
docs = new PagedMutable(1, 1024, PackedInts.bitsRequired(maxDoc - 1), PackedInts.COMPACT);
values = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST);
bitsPerValue = PackedInts.bitsRequired(maxDoc - 1);
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
values = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
size = 0;
}
@ -198,4 +200,12 @@ class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
return size > 0;
}
@Override
public long ramBytesPerDoc() {
long bytesPerDoc = (long) Math.ceil((double) (bitsPerValue + 1 /* docsWithField */) / 8);
final int capacity = estimateCapacity(size);
bytesPerDoc += (long) Math.ceil((double) values.ramBytesUsed() / capacity); // values
return bytesPerDoc;
}
}

View File

@ -33,6 +33,7 @@ import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.Bits;
@ -342,7 +343,9 @@ class ReadersAndUpdates {
fieldInfos = builder.finish();
final long nextFieldInfosGen = info.getNextFieldInfosGen();
final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX);
final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, IOContext.DEFAULT, segmentSuffix);
final long estUpdatesSize = dvUpdates.ramBytesPerDoc() * info.info.getDocCount();
final IOContext updatesContext = new IOContext(new FlushInfo(info.info.getDocCount(), estUpdatesSize));
final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix);
final DocValuesFormat docValuesFormat = codec.docValuesFormat();
final DocValuesConsumer fieldsConsumer = docValuesFormat.fieldsConsumer(state);
boolean fieldsConsumerSuccess = false;
@ -464,9 +467,14 @@ class ReadersAndUpdates {
}
});
}
codec.fieldInfosFormat().getFieldInfosWriter().write(trackingDir, info.info.name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
fieldsConsumerSuccess = true;
// we write approximately that many bytes (based on Lucene46DVF):
// HEADER + FOOTER: 40
// 90 bytes per-field (over estimating long name and attributes map)
final long estInfosSize = 40 + 90 * fieldInfos.size();
final IOContext infosContext = new IOContext(new FlushInfo(info.info.getDocCount(), estInfosSize));
codec.fieldInfosFormat().getFieldInfosWriter().write(trackingDir, info.info.name, segmentSuffix, fieldInfos, infosContext);
fieldsConsumerSuccess = true;
} finally {
if (fieldsConsumerSuccess) {
fieldsConsumer.close();

View File

@ -27,12 +27,14 @@ import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.TestUtil;
import org.junit.Test;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
@ -54,6 +56,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks;
*/
@SuppressCodecs({"Lucene40","Lucene41","Lucene42","Lucene45"})
@SuppressWarnings("resource")
public class TestBinaryDocValuesUpdates extends LuceneTestCase {
static long getValue(BinaryDocValues bdv, int idx, BytesRef scratch) {
@ -1449,5 +1452,31 @@ public class TestBinaryDocValuesUpdates extends LuceneTestCase {
dir.close();
}
@Test
public void testIOContext() throws Exception {
// LUCENE-5591: make sure we pass an IOContext with an approximate
// segmentSize in FlushInfo
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
// we want a single large enough segment so that a doc-values update writes a large file
conf.setMergePolicy(NoMergePolicy.INSTANCE);
conf.setMaxBufferedDocs(Integer.MAX_VALUE); // manually flush
conf.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
IndexWriter writer = new IndexWriter(dir, conf.clone());
for (int i = 0; i < 100; i++) {
writer.addDocument(doc(i));
}
writer.commit();
writer.close();
NRTCachingDirectory cachingDir = new NRTCachingDirectory(dir, 100, 1/(1024.*1024.));
writer = new IndexWriter(cachingDir, conf.clone());
writer.updateBinaryDocValue(new Term("id", "doc-0"), "val", toBytes(100L));
DirectoryReader reader = DirectoryReader.open(writer, true); // flush
assertEquals(0, cachingDir.listCachedFiles().length);
IOUtils.close(reader, writer, cachingDir);
}
}

View File

@ -27,6 +27,7 @@ import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@ -55,6 +56,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks;
*/
@SuppressCodecs({"Lucene40","Lucene41","Lucene42","Lucene45"})
@SuppressWarnings("resource")
public class TestNumericDocValuesUpdates extends LuceneTestCase {
private Document doc(int id) {
@ -1430,5 +1432,32 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
dir.close();
}
@Test
public void testIOContext() throws Exception {
// LUCENE-5591: make sure we pass an IOContext with an approximate
// segmentSize in FlushInfo
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
// we want a single large enough segment so that a doc-values update writes a large file
conf.setMergePolicy(NoMergePolicy.INSTANCE);
conf.setMaxBufferedDocs(Integer.MAX_VALUE); // manually flush
conf.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
IndexWriter writer = new IndexWriter(dir, conf.clone());
for (int i = 0; i < 100; i++) {
writer.addDocument(doc(i));
}
writer.commit();
writer.close();
NRTCachingDirectory cachingDir = new NRTCachingDirectory(dir, 100, 1/(1024.*1024.));
writer = new IndexWriter(cachingDir, conf.clone());
writer.updateNumericDocValue(new Term("id", "doc-0"), "val", 100L);
DirectoryReader reader = DirectoryReader.open(writer, true); // flush
assertEquals(0, cachingDir.listCachedFiles().length);
for (String f : cachingDir.listAll()) System.out.println(f + " " + cachingDir.fileLength(f));
IOUtils.close(reader, writer, cachingDir);
}
}