LUCENE-3509: Added fasterButMoreRam option for docvalues.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1225779 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Martijn van Groningen 2011-12-30 09:40:45 +00:00
parent 76d1662cb7
commit 5906546f00
7 changed files with 67 additions and 24 deletions

View File

@ -649,6 +649,10 @@ Optimizations
* LUCENE-3643: FilteredQuery and IndexSearcher.search(Query, Filter,...)
now optimize the special case query instanceof MatchAllDocsQuery to
execute as ConstantScoreQuery. (Uwe Schindler)
* LUCENE-3509: Added fasterButMoreRam option for docvalues. This option controls whether the space for packed ints
should be rounded up for better performance. This option only applies for docvalues types bytes fixed sorted
and bytes var sorted. (Simon Willnauer, Martijn van Groningen)
Bug fixes

View File

@ -24,6 +24,7 @@ import org.apache.lucene.codecs.lucene40.values.Writer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type; // javadoc
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRef;
@ -31,6 +32,7 @@ import org.apache.lucene.util.Counter;
/**
* Abstract base class for PerDocConsumer implementations
*
* @lucene.experimental
*/
//TODO: this needs to go under lucene40 codec (its specific to its impl)
@ -39,12 +41,27 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
protected final String segmentSuffix;
private final Counter bytesUsed;
protected final IOContext context;
private final boolean fasterButMoreRam;
/**
* @param state The state to initiate a {@link PerDocConsumer} instance
*/
protected DocValuesWriterBase(PerDocWriteState state) {
this(state, true);
}
/**
* @param state The state to initiate a {@link PerDocConsumer} instance
* @param fasterButMoreRam whether packed ints for docvalues should be optimized for speed by rounding up the bytes
* used for a value to either 8, 16, 32 or 64 bytes. This option is only applicable for
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and {@link Type#BYTES_VAR_SORTED}.
*/
protected DocValuesWriterBase(PerDocWriteState state, boolean fasterButMoreRam) {
this.segmentName = state.segmentName;
this.segmentSuffix = state.segmentSuffix;
this.bytesUsed = state.bytesUsed;
this.context = state.context;
this.fasterButMoreRam = fasterButMoreRam;
}
protected abstract Directory getDirectory() throws IOException;
@ -54,10 +71,10 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
}
@Override
public DocValuesConsumer addValuesField(DocValues.Type valueType, FieldInfo field) throws IOException {
public DocValuesConsumer addValuesField(Type valueType, FieldInfo field) throws IOException {
return Writer.create(valueType,
docValuesId(segmentName, field.number),
getDirectory(), getComparator(), bytesUsed, context);
getDirectory(), getComparator(), bytesUsed, context, fasterButMoreRam);
}
public static String docValuesId(String segmentsName, int fieldId) {

View File

@ -114,13 +114,17 @@ public final class Bytes {
* {@link Writer}. A call to {@link Writer#finish(int)} will release
* all internally used resources and frees the memory tracking
* reference.
* @param context
* @param fasterButMoreRam whether packed ints for docvalues should be optimized for speed by rounding up the bytes
* used for a value to either 8, 16, 32 or 64 bytes. This option is only applicable for
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and {@link Type#BYTES_VAR_SORTED}.
* @param context I/O Context
* @return a new {@link Writer} instance
* @throws IOException
* if the files for the writer can not be created.
*/
public static Writer getWriter(Directory dir, String id, Mode mode,
boolean fixedSize, Comparator<BytesRef> sortComparator, Counter bytesUsed, IOContext context)
boolean fixedSize, Comparator<BytesRef> sortComparator,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam)
throws IOException {
// TODO -- i shouldn't have to specify fixed? can
// track itself & do the write thing at write time?
@ -134,7 +138,7 @@ public final class Bytes {
} else if (mode == Mode.DEREF) {
return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.SORTED) {
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context);
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, fasterButMoreRam);
}
} else {
if (mode == Mode.STRAIGHT) {
@ -142,7 +146,7 @@ public final class Bytes {
} else if (mode == Mode.DEREF) {
return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.SORTED) {
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context);
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, fasterButMoreRam);
}
}
@ -386,23 +390,32 @@ public final class Bytes {
protected int lastDocId = -1;
protected int[] docToEntry;
protected final BytesRefHash hash;
protected final boolean fasterButMoreRam;
protected long maxBytes = 0;
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
int codecVersion, Counter bytesUsed, IOContext context)
throws IOException {
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context);
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false);
}
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam)
throws IOException {
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam);
}
protected DerefBytesWriterBase(Directory dir, String id, String codecName, int codecVersion, Allocator allocator,
Counter bytesUsed, IOContext context) throws IOException {
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
super(dir, id, codecName, codecVersion, bytesUsed, context);
hash = new BytesRefHash(new ByteBlockPool(allocator),
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
docToEntry = new int[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
this.fasterButMoreRam = fasterButMoreRam;
}
protected static int writePrefixLength(DataOutput datOut, BytesRef bytes)
@ -499,7 +512,7 @@ public final class Bytes {
protected void writeIndex(IndexOutput idxOut, int docCount,
long maxValue, int[] addresses, int[] toEntry) throws IOException {
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
PackedInts.bitsRequired(maxValue));
bitsRequired(maxValue));
final int limit = docCount > docToEntry.length ? docToEntry.length
: docCount;
assert toEntry.length >= limit -1;
@ -523,7 +536,7 @@ public final class Bytes {
protected void writeIndex(IndexOutput idxOut, int docCount,
long maxValue, long[] addresses, int[] toEntry) throws IOException {
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
PackedInts.bitsRequired(maxValue));
bitsRequired(maxValue));
final int limit = docCount > docToEntry.length ? docToEntry.length
: docCount;
assert toEntry.length >= limit -1;
@ -543,6 +556,11 @@ public final class Bytes {
}
w.finish();
}
protected int bitsRequired(long maxValue){
return fasterButMoreRam ?
PackedInts.getNextFixedSize(PackedInts.bitsRequired(maxValue)) : PackedInts.bitsRequired(maxValue);
}
}

View File

@ -56,8 +56,8 @@ class FixedSortedBytesImpl {
private final Comparator<BytesRef> comp;
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam);
this.comp = comp;
}

View File

@ -57,8 +57,8 @@ final class VarSortedBytesImpl {
private final Comparator<BytesRef> comp;
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam);
this.comp = comp;
size = 0;
}
@ -123,7 +123,7 @@ final class VarSortedBytesImpl {
// total bytes of data
idxOut.writeLong(maxBytes);
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1,
PackedInts.bitsRequired(maxBytes));
bitsRequired(maxBytes));
// first dump bytes data, recording index & write offset as
// we go
final BytesRef spare = new BytesRef();

View File

@ -175,11 +175,14 @@ public abstract class Writer extends DocValuesConsumer {
* the {@link Directory} to create the files from.
* @param bytesUsed
* a byte-usage tracking reference
* @param fasterButMoreRam Whether the space used for packed ints should be rounded up for higher lookup performance.
* Currently this parameter only applies for types {@link Type#BYTES_VAR_SORTED}
* and {@link Type#BYTES_FIXED_SORTED}.
* @return a new {@link Writer} instance for the given {@link Type}
* @throws IOException
*/
public static Writer create(Type type, String id, Directory directory,
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context) throws IOException {
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
if (comp == null) {
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
}
@ -196,22 +199,22 @@ public abstract class Writer extends DocValuesConsumer {
return Floats.getWriter(directory, id, bytesUsed, context, type);
case BYTES_FIXED_STRAIGHT:
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, true, comp,
bytesUsed, context);
bytesUsed, context, fasterButMoreRam);
case BYTES_FIXED_DEREF:
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, true, comp,
bytesUsed, context);
bytesUsed, context, fasterButMoreRam);
case BYTES_FIXED_SORTED:
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, true, comp,
bytesUsed, context);
bytesUsed, context, fasterButMoreRam);
case BYTES_VAR_STRAIGHT:
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, false, comp,
bytesUsed, context);
bytesUsed, context, fasterButMoreRam);
case BYTES_VAR_DEREF:
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, false, comp,
bytesUsed, context);
bytesUsed, context, fasterButMoreRam);
case BYTES_VAR_SORTED:
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, false, comp,
bytesUsed, context);
bytesUsed, context, fasterButMoreRam);
default:
throw new IllegalArgumentException("Unknown Values: " + type);
}

View File

@ -63,7 +63,8 @@ public class TestDocValues extends LuceneTestCase {
Directory dir = newDirectory();
final Counter trackBytes = Counter.newCounter();
Writer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random));
Writer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random),
random.nextBoolean());
int maxDoc = 220;
final String[] values = new String[maxDoc];
final int fixedLength = 1 + atLeast(50);