mirror of https://github.com/apache/lucene.git
LUCENE-3509: Added fasterButMoreRam option for docvalues.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1225779 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
76d1662cb7
commit
5906546f00
|
@ -649,6 +649,10 @@ Optimizations
|
|||
* LUCENE-3643: FilteredQuery and IndexSearcher.search(Query, Filter,...)
|
||||
now optimize the special case query instanceof MatchAllDocsQuery to
|
||||
execute as ConstantScoreQuery. (Uwe Schindler)
|
||||
|
||||
* LUCENE-3509: Added fasterButMoreRam option for docvalues. This option controls whether the space for packed ints
|
||||
should be rounded up for better performance. This option only applies for docvalues types bytes fixed sorted
|
||||
and bytes var sorted. (Simon Willnauer, Martijn van Groningen)
|
||||
|
||||
Bug fixes
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.codecs.lucene40.values.Writer;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.PerDocWriteState;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Type; // javadoc
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -31,6 +32,7 @@ import org.apache.lucene.util.Counter;
|
|||
|
||||
/**
|
||||
* Abstract base class for PerDocConsumer implementations
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
//TODO: this needs to go under lucene40 codec (its specific to its impl)
|
||||
|
@ -39,12 +41,27 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
|
|||
protected final String segmentSuffix;
|
||||
private final Counter bytesUsed;
|
||||
protected final IOContext context;
|
||||
|
||||
private final boolean fasterButMoreRam;
|
||||
|
||||
/**
|
||||
* @param state The state to initiate a {@link PerDocConsumer} instance
|
||||
*/
|
||||
protected DocValuesWriterBase(PerDocWriteState state) {
|
||||
this(state, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param state The state to initiate a {@link PerDocConsumer} instance
|
||||
* @param fasterButMoreRam whether packed ints for docvalues should be optimized for speed by rounding up the bytes
|
||||
* used for a value to either 8, 16, 32 or 64 bytes. This option is only applicable for
|
||||
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and {@link Type#BYTES_VAR_SORTED}.
|
||||
*/
|
||||
protected DocValuesWriterBase(PerDocWriteState state, boolean fasterButMoreRam) {
|
||||
this.segmentName = state.segmentName;
|
||||
this.segmentSuffix = state.segmentSuffix;
|
||||
this.bytesUsed = state.bytesUsed;
|
||||
this.context = state.context;
|
||||
this.fasterButMoreRam = fasterButMoreRam;
|
||||
}
|
||||
|
||||
protected abstract Directory getDirectory() throws IOException;
|
||||
|
@ -54,10 +71,10 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer addValuesField(DocValues.Type valueType, FieldInfo field) throws IOException {
|
||||
public DocValuesConsumer addValuesField(Type valueType, FieldInfo field) throws IOException {
|
||||
return Writer.create(valueType,
|
||||
docValuesId(segmentName, field.number),
|
||||
getDirectory(), getComparator(), bytesUsed, context);
|
||||
getDirectory(), getComparator(), bytesUsed, context, fasterButMoreRam);
|
||||
}
|
||||
|
||||
public static String docValuesId(String segmentsName, int fieldId) {
|
||||
|
|
|
@ -114,13 +114,17 @@ public final class Bytes {
|
|||
* {@link Writer}. A call to {@link Writer#finish(int)} will release
|
||||
* all internally used resources and frees the memory tracking
|
||||
* reference.
|
||||
* @param context
|
||||
* @param fasterButMoreRam whether packed ints for docvalues should be optimized for speed by rounding up the bytes
|
||||
* used for a value to either 8, 16, 32 or 64 bytes. This option is only applicable for
|
||||
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and {@link Type#BYTES_VAR_SORTED}.
|
||||
* @param context I/O Context
|
||||
* @return a new {@link Writer} instance
|
||||
* @throws IOException
|
||||
* if the files for the writer can not be created.
|
||||
*/
|
||||
public static Writer getWriter(Directory dir, String id, Mode mode,
|
||||
boolean fixedSize, Comparator<BytesRef> sortComparator, Counter bytesUsed, IOContext context)
|
||||
boolean fixedSize, Comparator<BytesRef> sortComparator,
|
||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam)
|
||||
throws IOException {
|
||||
// TODO -- i shouldn't have to specify fixed? can
|
||||
// track itself & do the write thing at write time?
|
||||
|
@ -134,7 +138,7 @@ public final class Bytes {
|
|||
} else if (mode == Mode.DEREF) {
|
||||
return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context);
|
||||
} else if (mode == Mode.SORTED) {
|
||||
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context);
|
||||
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, fasterButMoreRam);
|
||||
}
|
||||
} else {
|
||||
if (mode == Mode.STRAIGHT) {
|
||||
|
@ -142,7 +146,7 @@ public final class Bytes {
|
|||
} else if (mode == Mode.DEREF) {
|
||||
return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context);
|
||||
} else if (mode == Mode.SORTED) {
|
||||
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context);
|
||||
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, fasterButMoreRam);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -386,23 +390,32 @@ public final class Bytes {
|
|||
protected int lastDocId = -1;
|
||||
protected int[] docToEntry;
|
||||
protected final BytesRefHash hash;
|
||||
protected final boolean fasterButMoreRam;
|
||||
protected long maxBytes = 0;
|
||||
|
||||
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
|
||||
int codecVersion, Counter bytesUsed, IOContext context)
|
||||
throws IOException {
|
||||
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context);
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false);
|
||||
}
|
||||
|
||||
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
|
||||
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam)
|
||||
throws IOException {
|
||||
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam);
|
||||
}
|
||||
|
||||
protected DerefBytesWriterBase(Directory dir, String id, String codecName, int codecVersion, Allocator allocator,
|
||||
Counter bytesUsed, IOContext context) throws IOException {
|
||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
||||
super(dir, id, codecName, codecVersion, bytesUsed, context);
|
||||
hash = new BytesRefHash(new ByteBlockPool(allocator),
|
||||
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
||||
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
||||
docToEntry = new int[1];
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||
this.fasterButMoreRam = fasterButMoreRam;
|
||||
}
|
||||
|
||||
protected static int writePrefixLength(DataOutput datOut, BytesRef bytes)
|
||||
|
@ -499,7 +512,7 @@ public final class Bytes {
|
|||
protected void writeIndex(IndexOutput idxOut, int docCount,
|
||||
long maxValue, int[] addresses, int[] toEntry) throws IOException {
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||
PackedInts.bitsRequired(maxValue));
|
||||
bitsRequired(maxValue));
|
||||
final int limit = docCount > docToEntry.length ? docToEntry.length
|
||||
: docCount;
|
||||
assert toEntry.length >= limit -1;
|
||||
|
@ -523,7 +536,7 @@ public final class Bytes {
|
|||
protected void writeIndex(IndexOutput idxOut, int docCount,
|
||||
long maxValue, long[] addresses, int[] toEntry) throws IOException {
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||
PackedInts.bitsRequired(maxValue));
|
||||
bitsRequired(maxValue));
|
||||
final int limit = docCount > docToEntry.length ? docToEntry.length
|
||||
: docCount;
|
||||
assert toEntry.length >= limit -1;
|
||||
|
@ -543,6 +556,11 @@ public final class Bytes {
|
|||
}
|
||||
w.finish();
|
||||
}
|
||||
|
||||
protected int bitsRequired(long maxValue){
|
||||
return fasterButMoreRam ?
|
||||
PackedInts.getNextFixedSize(PackedInts.bitsRequired(maxValue)) : PackedInts.bitsRequired(maxValue);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -56,8 +56,8 @@ class FixedSortedBytesImpl {
|
|||
private final Comparator<BytesRef> comp;
|
||||
|
||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||
Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam);
|
||||
this.comp = comp;
|
||||
}
|
||||
|
||||
|
|
|
@ -57,8 +57,8 @@ final class VarSortedBytesImpl {
|
|||
private final Comparator<BytesRef> comp;
|
||||
|
||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||
Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam);
|
||||
this.comp = comp;
|
||||
size = 0;
|
||||
}
|
||||
|
@ -123,7 +123,7 @@ final class VarSortedBytesImpl {
|
|||
// total bytes of data
|
||||
idxOut.writeLong(maxBytes);
|
||||
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1,
|
||||
PackedInts.bitsRequired(maxBytes));
|
||||
bitsRequired(maxBytes));
|
||||
// first dump bytes data, recording index & write offset as
|
||||
// we go
|
||||
final BytesRef spare = new BytesRef();
|
||||
|
|
|
@ -175,11 +175,14 @@ public abstract class Writer extends DocValuesConsumer {
|
|||
* the {@link Directory} to create the files from.
|
||||
* @param bytesUsed
|
||||
* a byte-usage tracking reference
|
||||
* @param fasterButMoreRam Whether the space used for packed ints should be rounded up for higher lookup performance.
|
||||
* Currently this parameter only applies for types {@link Type#BYTES_VAR_SORTED}
|
||||
* and {@link Type#BYTES_FIXED_SORTED}.
|
||||
* @return a new {@link Writer} instance for the given {@link Type}
|
||||
* @throws IOException
|
||||
*/
|
||||
public static Writer create(Type type, String id, Directory directory,
|
||||
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context) throws IOException {
|
||||
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
||||
if (comp == null) {
|
||||
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
@ -196,22 +199,22 @@ public abstract class Writer extends DocValuesConsumer {
|
|||
return Floats.getWriter(directory, id, bytesUsed, context, type);
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, true, comp,
|
||||
bytesUsed, context);
|
||||
bytesUsed, context, fasterButMoreRam);
|
||||
case BYTES_FIXED_DEREF:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, true, comp,
|
||||
bytesUsed, context);
|
||||
bytesUsed, context, fasterButMoreRam);
|
||||
case BYTES_FIXED_SORTED:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, true, comp,
|
||||
bytesUsed, context);
|
||||
bytesUsed, context, fasterButMoreRam);
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, false, comp,
|
||||
bytesUsed, context);
|
||||
bytesUsed, context, fasterButMoreRam);
|
||||
case BYTES_VAR_DEREF:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, false, comp,
|
||||
bytesUsed, context);
|
||||
bytesUsed, context, fasterButMoreRam);
|
||||
case BYTES_VAR_SORTED:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, false, comp,
|
||||
bytesUsed, context);
|
||||
bytesUsed, context, fasterButMoreRam);
|
||||
default:
|
||||
throw new IllegalArgumentException("Unknown Values: " + type);
|
||||
}
|
||||
|
|
|
@ -63,7 +63,8 @@ public class TestDocValues extends LuceneTestCase {
|
|||
|
||||
Directory dir = newDirectory();
|
||||
final Counter trackBytes = Counter.newCounter();
|
||||
Writer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random));
|
||||
Writer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random),
|
||||
random.nextBoolean());
|
||||
int maxDoc = 220;
|
||||
final String[] values = new String[maxDoc];
|
||||
final int fixedLength = 1 + atLeast(50);
|
||||
|
|
Loading…
Reference in New Issue