mirror of https://github.com/apache/lucene.git
LUCENE-3186: Promote docvalues types during merge if ValueType or size differes across segments
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1181020 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
cbde4e03e8
commit
3088c013ae
|
@ -132,6 +132,12 @@ public final class FieldInfo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void resetDocValues(ValueType v) {
|
||||||
|
if (docValues != null) {
|
||||||
|
docValues = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public boolean hasDocValues() {
|
public boolean hasDocValues() {
|
||||||
return docValues != null;
|
return docValues != null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -128,7 +128,7 @@ public class MultiPerDocValues extends PerDocValues {
|
||||||
if (docsUpto != start) {
|
if (docsUpto != start) {
|
||||||
type = values.type();
|
type = values.type();
|
||||||
docValuesIndex.add(new MultiIndexDocValues.DocValuesIndex(
|
docValuesIndex.add(new MultiIndexDocValues.DocValuesIndex(
|
||||||
new MultiIndexDocValues.DummyDocValues(start, type), docsUpto, start
|
new MultiIndexDocValues.EmptyDocValues(start, type), docsUpto, start
|
||||||
- docsUpto));
|
- docsUpto));
|
||||||
}
|
}
|
||||||
docValuesIndex.add(new MultiIndexDocValues.DocValuesIndex(values, start,
|
docValuesIndex.add(new MultiIndexDocValues.DocValuesIndex(values, start,
|
||||||
|
@ -137,7 +137,7 @@ public class MultiPerDocValues extends PerDocValues {
|
||||||
|
|
||||||
} else if (i + 1 == subs.length && !docValuesIndex.isEmpty()) {
|
} else if (i + 1 == subs.length && !docValuesIndex.isEmpty()) {
|
||||||
docValuesIndex.add(new MultiIndexDocValues.DocValuesIndex(
|
docValuesIndex.add(new MultiIndexDocValues.DocValuesIndex(
|
||||||
new MultiIndexDocValues.DummyDocValues(start, type), docsUpto, start
|
new MultiIndexDocValues.EmptyDocValues(start, type), docsUpto, start
|
||||||
- docsUpto));
|
- docsUpto));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,7 +33,6 @@ import org.apache.lucene.index.codecs.FieldsReader;
|
||||||
import org.apache.lucene.index.codecs.FieldsWriter;
|
import org.apache.lucene.index.codecs.FieldsWriter;
|
||||||
import org.apache.lucene.index.codecs.MergeState;
|
import org.apache.lucene.index.codecs.MergeState;
|
||||||
import org.apache.lucene.index.codecs.PerDocConsumer;
|
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||||
import org.apache.lucene.index.codecs.PerDocValues;
|
|
||||||
import org.apache.lucene.store.CompoundFileDirectory;
|
import org.apache.lucene.store.CompoundFileDirectory;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
@ -141,6 +140,8 @@ final class SegmentMerger {
|
||||||
if (fieldInfos.hasVectors()) {
|
if (fieldInfos.hasVectors()) {
|
||||||
mergeVectors();
|
mergeVectors();
|
||||||
}
|
}
|
||||||
|
// write FIS once merge is done. IDV might change types or drops fields
|
||||||
|
fieldInfos.write(directory, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
|
||||||
return mergedDocs;
|
return mergedDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -254,7 +255,6 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final SegmentCodecs codecInfo = fieldInfos.buildSegmentCodecs(false);
|
final SegmentCodecs codecInfo = fieldInfos.buildSegmentCodecs(false);
|
||||||
fieldInfos.write(directory, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
|
|
||||||
|
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
|
|
||||||
|
@ -584,28 +584,11 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void mergePerDoc() throws IOException {
|
private void mergePerDoc() throws IOException {
|
||||||
final List<PerDocValues> perDocProducers = new ArrayList<PerDocValues>();
|
|
||||||
final List<ReaderUtil.Slice> perDocSlices = new ArrayList<ReaderUtil.Slice>();
|
|
||||||
int docBase = 0;
|
|
||||||
for (MergeState.IndexReaderAndLiveDocs r : readers) {
|
|
||||||
final int maxDoc = r.reader.maxDoc();
|
|
||||||
final PerDocValues producer = r.reader.perDocValues();
|
|
||||||
if (producer != null) {
|
|
||||||
perDocSlices.add(new ReaderUtil.Slice(docBase, maxDoc, perDocProducers
|
|
||||||
.size()));
|
|
||||||
perDocProducers.add(producer);
|
|
||||||
}
|
|
||||||
docBase += maxDoc;
|
|
||||||
}
|
|
||||||
if (!perDocSlices.isEmpty()) {
|
|
||||||
final PerDocConsumer docsConsumer = codec
|
final PerDocConsumer docsConsumer = codec
|
||||||
.docsConsumer(new PerDocWriteState(segmentWriteState));
|
.docsConsumer(new PerDocWriteState(segmentWriteState));
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
final MultiPerDocValues multiPerDocValues = new MultiPerDocValues(
|
docsConsumer.merge(mergeState);
|
||||||
perDocProducers.toArray(PerDocValues.EMPTY_ARRAY),
|
|
||||||
perDocSlices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
|
|
||||||
docsConsumer.merge(mergeState, multiPerDocValues);
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (success) {
|
if (success) {
|
||||||
|
@ -614,9 +597,6 @@ final class SegmentMerger {
|
||||||
IOUtils.closeWhileHandlingException(docsConsumer);
|
IOUtils.closeWhileHandlingException(docsConsumer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
/* don't close the perDocProducers here since they are private segment producers
|
|
||||||
* and will be closed once the SegmentReader goes out of scope */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private MergeState mergeState;
|
private MergeState mergeState;
|
||||||
|
|
|
@ -92,48 +92,38 @@ public abstract class DocValuesConsumer {
|
||||||
*
|
*
|
||||||
* @param mergeState
|
* @param mergeState
|
||||||
* the state to merge
|
* the state to merge
|
||||||
* @param values
|
* @param docValues docValues array containing one instance per reader (
|
||||||
* the docValues to merge in
|
* {@link MergeState#readers}) or <code>null</code> if the reader has
|
||||||
|
* no {@link IndexDocValues} instance.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* if an {@link IOException} occurs
|
* if an {@link IOException} occurs
|
||||||
*/
|
*/
|
||||||
public void merge(org.apache.lucene.index.codecs.MergeState mergeState,
|
public void merge(MergeState mergeState, IndexDocValues[] docValues) throws IOException {
|
||||||
IndexDocValues values) throws IOException {
|
|
||||||
assert mergeState != null;
|
assert mergeState != null;
|
||||||
// TODO we need some kind of compatibility notation for values such
|
boolean hasMerged = false;
|
||||||
// that two slightly different segments can be merged eg. fixed vs.
|
|
||||||
// variable byte len or float32 vs. float64
|
|
||||||
boolean merged = false;
|
|
||||||
/*
|
|
||||||
* We ignore the given DocValues here and merge from the subReaders directly
|
|
||||||
* to support bulk copies on the DocValues Writer level. if this gets merged
|
|
||||||
* with MultiDocValues the writer can not optimize for bulk-copyable data
|
|
||||||
*/
|
|
||||||
for(int readerIDX=0;readerIDX<mergeState.readers.size();readerIDX++) {
|
for(int readerIDX=0;readerIDX<mergeState.readers.size();readerIDX++) {
|
||||||
final org.apache.lucene.index.codecs.MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(readerIDX);
|
final org.apache.lucene.index.codecs.MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(readerIDX);
|
||||||
final IndexDocValues r = reader.reader.docValues(mergeState.fieldInfo.name);
|
if (docValues[readerIDX] != null) {
|
||||||
if (r != null) {
|
hasMerged = true;
|
||||||
merged = true;
|
merge(new Writer.SingleSubMergeState(docValues[readerIDX], mergeState.docBase[readerIDX], reader.reader.maxDoc(),
|
||||||
merge(new Writer.MergeState(r, mergeState.docBase[readerIDX], reader.reader.maxDoc(),
|
|
||||||
reader.liveDocs));
|
reader.liveDocs));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (merged) {
|
// only finish if no exception is thrown!
|
||||||
|
if (hasMerged) {
|
||||||
finish(mergeState.mergedDocCount);
|
finish(mergeState.mergedDocCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merges the given {@link MergeState} into this {@link DocValuesConsumer}.
|
* Merges the given {@link SingleSubMergeState} into this {@link DocValuesConsumer}.
|
||||||
* {@link MergeState#docBase} must always be increasing. Merging segments out
|
|
||||||
* of order is not supported.
|
|
||||||
*
|
*
|
||||||
* @param mergeState
|
* @param mergeState
|
||||||
* the {@link MergeState} to merge
|
* the {@link SingleSubMergeState} to merge
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* if an {@link IOException} occurs
|
* if an {@link IOException} occurs
|
||||||
*/
|
*/
|
||||||
protected abstract void merge(MergeState mergeState) throws IOException;
|
protected abstract void merge(SingleSubMergeState mergeState) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Specialized auxiliary MergeState is necessary since we don't want to
|
* Specialized auxiliary MergeState is necessary since we don't want to
|
||||||
|
@ -141,7 +131,7 @@ public abstract class DocValuesConsumer {
|
||||||
* created for each merged low level {@link IndexReader} we are merging to
|
* created for each merged low level {@link IndexReader} we are merging to
|
||||||
* support low level bulk copies.
|
* support low level bulk copies.
|
||||||
*/
|
*/
|
||||||
public static class MergeState {
|
public static class SingleSubMergeState {
|
||||||
/**
|
/**
|
||||||
* the source reader for this MergeState - merged values should be read from
|
* the source reader for this MergeState - merged values should be read from
|
||||||
* this instance
|
* this instance
|
||||||
|
@ -154,7 +144,7 @@ public abstract class DocValuesConsumer {
|
||||||
/** the not deleted bits for this MergeState */
|
/** the not deleted bits for this MergeState */
|
||||||
public final Bits liveDocs;
|
public final Bits liveDocs;
|
||||||
|
|
||||||
public MergeState(IndexDocValues reader, int docBase, int docCount, Bits liveDocs) {
|
public SingleSubMergeState(IndexDocValues reader, int docBase, int docCount, Bits liveDocs) {
|
||||||
assert reader != null;
|
assert reader != null;
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
this.docBase = docBase;
|
this.docBase = docBase;
|
||||||
|
|
|
@ -19,7 +19,10 @@ import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.values.IndexDocValues;
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
|
import org.apache.lucene.index.values.TypePromoter;
|
||||||
|
import org.apache.lucene.index.values.ValueType;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract API that consumes per document values. Concrete implementations of
|
* Abstract API that consumes per document values. Concrete implementations of
|
||||||
|
@ -40,28 +43,75 @@ public abstract class PerDocConsumer implements Closeable{
|
||||||
* Consumes and merges the given {@link PerDocValues} producer
|
* Consumes and merges the given {@link PerDocValues} producer
|
||||||
* into this consumers format.
|
* into this consumers format.
|
||||||
*/
|
*/
|
||||||
public void merge(MergeState mergeState, PerDocValues producer)
|
public void merge(MergeState mergeState)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Iterable<String> fields = producer.fields();
|
final FieldInfos fieldInfos = mergeState.fieldInfos;
|
||||||
for (String field : fields) {
|
final IndexDocValues[] docValues = new IndexDocValues[mergeState.readers.size()];
|
||||||
mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field);
|
final PerDocValues[] perDocValues = new PerDocValues[mergeState.readers.size()];
|
||||||
assert mergeState.fieldInfo != null : "FieldInfo for field is null: "
|
// pull all PerDocValues
|
||||||
+ field;
|
for (int i = 0; i < perDocValues.length; i++) {
|
||||||
if (mergeState.fieldInfo.hasDocValues()) {
|
perDocValues[i] = mergeState.readers.get(i).reader.perDocValues();
|
||||||
final IndexDocValues docValues = producer.docValues(field);
|
}
|
||||||
if (docValues == null) {
|
for (FieldInfo fieldInfo : fieldInfos) {
|
||||||
/*
|
mergeState.fieldInfo = fieldInfo;
|
||||||
* It is actually possible that a fieldInfo has a values type but no
|
TypePromoter currentPromoter = TypePromoter.getIdentityPromoter();
|
||||||
* values are actually available. this can happen if there are already
|
if (fieldInfo.hasDocValues()) {
|
||||||
* segments without values around.
|
for (int i = 0; i < perDocValues.length; i++) {
|
||||||
*/
|
if (perDocValues[i] != null) { // get all IDV to merge
|
||||||
|
docValues[i] = perDocValues[i].docValues(fieldInfo.name);
|
||||||
|
if (docValues[i] != null) {
|
||||||
|
currentPromoter = promoteValueType(fieldInfo, docValues[i], currentPromoter);
|
||||||
|
if (currentPromoter == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentPromoter == null) {
|
||||||
|
fieldInfo.resetDocValues(null);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
assert currentPromoter != TypePromoter.getIdentityPromoter();
|
||||||
|
if (fieldInfo.getDocValues() != currentPromoter.type()) {
|
||||||
|
// reset the type if we got promoted
|
||||||
|
fieldInfo.resetDocValues(currentPromoter.type());
|
||||||
|
}
|
||||||
|
|
||||||
final DocValuesConsumer docValuesConsumer = addValuesField(mergeState.fieldInfo);
|
final DocValuesConsumer docValuesConsumer = addValuesField(mergeState.fieldInfo);
|
||||||
assert docValuesConsumer != null;
|
assert docValuesConsumer != null;
|
||||||
docValuesConsumer.merge(mergeState, docValues);
|
docValuesConsumer.merge(mergeState, docValues);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* NOTE: don't close the perDocProducers here since they are private segment producers
|
||||||
|
* and will be closed once the SegmentReader goes out of scope */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected TypePromoter promoteValueType(final FieldInfo fieldInfo, final IndexDocValues docValues,
|
||||||
|
TypePromoter currentPromoter) {
|
||||||
|
assert currentPromoter != null;
|
||||||
|
final TypePromoter incomingPromoter = TypePromoter.create(docValues.type(), docValues.getValueSize());
|
||||||
|
assert incomingPromoter != null;
|
||||||
|
final TypePromoter newPromoter = currentPromoter.promote(incomingPromoter);
|
||||||
|
return newPromoter == null ? handleIncompatibleValueType(fieldInfo, incomingPromoter, currentPromoter) : newPromoter;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolves a conflicts of incompatible {@link TypePromoter}s. The default
|
||||||
|
* implementation promotes incompatible types to
|
||||||
|
* {@link ValueType#BYTES_VAR_STRAIGHT} and preserves all values. If this
|
||||||
|
* method returns <code>null</code> all docvalues for the given
|
||||||
|
* {@link FieldInfo} are dropped and all values are lost.
|
||||||
|
*
|
||||||
|
* @param incomingPromoter
|
||||||
|
* the incompatible incoming promoter
|
||||||
|
* @param currentPromoter
|
||||||
|
* the current promoter
|
||||||
|
* @return a promoted {@link TypePromoter} or <code>null</code> iff this index
|
||||||
|
* docvalues should be dropped for this field.
|
||||||
|
*/
|
||||||
|
protected TypePromoter handleIncompatibleValueType(FieldInfo fieldInfo, TypePromoter incomingPromoter, TypePromoter currentPromoter) {
|
||||||
|
return TypePromoter.create(ValueType.BYTES_VAR_STRAIGHT, TypePromoter.VAR_TYPE_VALUE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,6 +86,12 @@ class FixedDerefBytesImpl {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, type());
|
return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, type());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueSize() {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class FixedDerefSource extends BytesSourceBase {
|
static final class FixedDerefSource extends BytesSourceBase {
|
||||||
|
|
|
@ -104,6 +104,11 @@ class FixedSortedBytesImpl {
|
||||||
return new DirectFixedSortedSource(cloneData(), cloneIndex(), size,
|
return new DirectFixedSortedSource(cloneData(), cloneIndex(), size,
|
||||||
valueCount, comparator, type);
|
valueCount, comparator, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueSize() {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class FixedSortedSource extends BytesSortedSourceBase {
|
static final class FixedSortedSource extends BytesSortedSourceBase {
|
||||||
|
|
|
@ -59,6 +59,7 @@ class FixedStraightBytesImpl {
|
||||||
int version, Counter bytesUsed, IOContext context) throws IOException {
|
int version, Counter bytesUsed, IOContext context) throws IOException {
|
||||||
super(dir, id, codecName, version, bytesUsed, context);
|
super(dir, id, codecName, version, bytesUsed, context);
|
||||||
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
|
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
|
||||||
|
pool.nextBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -70,7 +71,6 @@ class FixedStraightBytesImpl {
|
||||||
throw new IllegalArgumentException("bytes arrays > " + Short.MAX_VALUE + " are not supported");
|
throw new IllegalArgumentException("bytes arrays > " + Short.MAX_VALUE + " are not supported");
|
||||||
}
|
}
|
||||||
size = bytes.length;
|
size = bytes.length;
|
||||||
pool.nextBuffer();
|
|
||||||
} else if (bytes.length != size) {
|
} else if (bytes.length != size) {
|
||||||
throw new IllegalArgumentException("expected bytes size=" + size
|
throw new IllegalArgumentException("expected bytes size=" + size
|
||||||
+ " but got " + bytes.length);
|
+ " but got " + bytes.length);
|
||||||
|
@ -120,7 +120,7 @@ class FixedStraightBytesImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
static class Writer extends FixedBytesWriterBase {
|
static class Writer extends FixedBytesWriterBase {
|
||||||
private boolean merge;
|
private boolean hasMerged;
|
||||||
private IndexOutput datOut;
|
private IndexOutput datOut;
|
||||||
|
|
||||||
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context) throws IOException {
|
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context) throws IOException {
|
||||||
|
@ -133,12 +133,15 @@ class FixedStraightBytesImpl {
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void merge(MergeState state) throws IOException {
|
protected void merge(SingleSubMergeState state) throws IOException {
|
||||||
merge = true;
|
|
||||||
datOut = getOrCreateDataOut();
|
datOut = getOrCreateDataOut();
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
if (state.liveDocs == null && state.reader instanceof FixedStraightReader ) {
|
if (!hasMerged && size != -1) {
|
||||||
|
datOut.writeInt(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state.liveDocs == null && tryBulkMerge(state.reader)) {
|
||||||
FixedStraightReader reader = (FixedStraightReader) state.reader;
|
FixedStraightReader reader = (FixedStraightReader) state.reader;
|
||||||
final int maxDocs = reader.maxDoc;
|
final int maxDocs = reader.maxDoc;
|
||||||
if (maxDocs == 0) {
|
if (maxDocs == 0) {
|
||||||
|
@ -172,18 +175,23 @@ class FixedStraightBytesImpl {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
IOUtils.closeWhileHandlingException(datOut);
|
IOUtils.closeWhileHandlingException(datOut);
|
||||||
}
|
}
|
||||||
|
hasMerged = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected boolean tryBulkMerge(IndexDocValues docValues) {
|
||||||
|
return docValues instanceof FixedStraightReader;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void mergeDoc(int docID, int sourceDoc) throws IOException {
|
protected void mergeDoc(int docID, int sourceDoc) throws IOException {
|
||||||
assert lastDocID < docID;
|
assert lastDocID < docID;
|
||||||
currentMergeSource.getBytes(sourceDoc, bytesRef);
|
setMergeBytes(sourceDoc);
|
||||||
if (size == -1) {
|
if (size == -1) {
|
||||||
size = bytesRef.length;
|
size = bytesRef.length;
|
||||||
datOut.writeInt(size);
|
datOut.writeInt(size);
|
||||||
}
|
}
|
||||||
assert size == bytesRef.length;
|
assert size == bytesRef.length : "size: " + size + " ref: " + bytesRef.length;
|
||||||
if (lastDocID+1 < docID) {
|
if (lastDocID+1 < docID) {
|
||||||
fill(datOut, docID);
|
fill(datOut, docID);
|
||||||
}
|
}
|
||||||
|
@ -191,6 +199,10 @@ class FixedStraightBytesImpl {
|
||||||
lastDocID = docID;
|
lastDocID = docID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void setMergeBytes(int sourceDoc) {
|
||||||
|
currentMergeSource.getBytes(sourceDoc, bytesRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Fills up to but not including this docID
|
// Fills up to but not including this docID
|
||||||
|
@ -203,7 +215,7 @@ class FixedStraightBytesImpl {
|
||||||
public void finish(int docCount) throws IOException {
|
public void finish(int docCount) throws IOException {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
if (!merge) {
|
if (!hasMerged) {
|
||||||
// indexing path - no disk IO until here
|
// indexing path - no disk IO until here
|
||||||
assert datOut == null;
|
assert datOut == null;
|
||||||
datOut = getOrCreateDataOut();
|
datOut = getOrCreateDataOut();
|
||||||
|
@ -267,6 +279,11 @@ class FixedStraightBytesImpl {
|
||||||
public Source getDirectSource() throws IOException {
|
public Source getDirectSource() throws IOException {
|
||||||
return new DirectFixedStraightSource(cloneData(), size, type());
|
return new DirectFixedStraightSource(cloneData(), size, type());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueSize() {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// specialized version for single bytes
|
// specialized version for single bytes
|
||||||
|
|
|
@ -85,6 +85,18 @@ public class Floats {
|
||||||
public void add(int docID, PerDocFieldValues docValues) throws IOException {
|
public void add(int docID, PerDocFieldValues docValues) throws IOException {
|
||||||
add(docID, docValues.getFloat());
|
add(docID, docValues.getFloat());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean tryBulkMerge(IndexDocValues docValues) {
|
||||||
|
// only bulk merge if value type is the same otherwise size differs
|
||||||
|
return super.tryBulkMerge(docValues) && docValues.type() == template.type();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setMergeBytes(int sourceDoc) {
|
||||||
|
final double value = currentMergeSource.getFloat(sourceDoc);
|
||||||
|
template.toBytes(value, bytesRef);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final static class FloatsReader extends FixedStraightBytesImpl.FixedStraightReader {
|
final static class FloatsReader extends FixedStraightBytesImpl.FixedStraightReader {
|
||||||
|
|
|
@ -106,6 +106,17 @@ public abstract class IndexDocValues implements Closeable {
|
||||||
cache.close(this);
|
cache.close(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the size per value in bytes or <code>-1</code> iff size per value
|
||||||
|
* is variable.
|
||||||
|
*
|
||||||
|
* @return the size per value in bytes or <code>-1</code> iff size per value
|
||||||
|
* is variable.
|
||||||
|
*/
|
||||||
|
public int getValueSize() {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the {@link SourceCache} used by this {@link IndexDocValues} instance. This
|
* Sets the {@link SourceCache} used by this {@link IndexDocValues} instance. This
|
||||||
* method should be called before {@link #load()} is called. All {@link Source} instances in the currently used cache will be closed
|
* method should be called before {@link #load()} is called. All {@link Source} instances in the currently used cache will be closed
|
||||||
|
|
|
@ -93,9 +93,9 @@ public final class Ints {
|
||||||
protected IntsWriter(Directory dir, String id, String codecName,
|
protected IntsWriter(Directory dir, String id, String codecName,
|
||||||
int version, Counter bytesUsed, IOContext context, ValueType valueType) throws IOException {
|
int version, Counter bytesUsed, IOContext context, ValueType valueType) throws IOException {
|
||||||
super(dir, id, codecName, version, bytesUsed, context);
|
super(dir, id, codecName, version, bytesUsed, context);
|
||||||
final int expectedSize = typeToSize(valueType);
|
size = typeToSize(valueType);
|
||||||
this.bytesRef = new BytesRef(expectedSize);
|
this.bytesRef = new BytesRef(size);
|
||||||
bytesRef.length = expectedSize;
|
bytesRef.length = size;
|
||||||
template = IndexDocValuesArray.TEMPLATES.get(valueType);
|
template = IndexDocValuesArray.TEMPLATES.get(valueType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,6 +109,18 @@ public final class Ints {
|
||||||
public void add(int docID, PerDocFieldValues docValues) throws IOException {
|
public void add(int docID, PerDocFieldValues docValues) throws IOException {
|
||||||
add(docID, docValues.getInt());
|
add(docID, docValues.getInt());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setMergeBytes(int sourceDoc) {
|
||||||
|
final long value = currentMergeSource.getInt(sourceDoc);
|
||||||
|
template.toBytes(value, bytesRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean tryBulkMerge(IndexDocValues docValues) {
|
||||||
|
// only bulk merge if value type is the same otherwise size differs
|
||||||
|
return super.tryBulkMerge(docValues) && docValues.type() == template.type();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final static class IntsReader extends FixedStraightBytesImpl.FixedStraightReader {
|
final static class IntsReader extends FixedStraightBytesImpl.FixedStraightReader {
|
||||||
|
|
|
@ -46,6 +46,8 @@ public class MultiIndexDocValues extends IndexDocValues {
|
||||||
|
|
||||||
private DocValuesIndex[] docValuesIdx;
|
private DocValuesIndex[] docValuesIdx;
|
||||||
private int[] starts;
|
private int[] starts;
|
||||||
|
private ValueType type;
|
||||||
|
private int valueSize;
|
||||||
|
|
||||||
public MultiIndexDocValues() {
|
public MultiIndexDocValues() {
|
||||||
starts = new int[0];
|
starts = new int[0];
|
||||||
|
@ -62,38 +64,51 @@ public class MultiIndexDocValues extends IndexDocValues {
|
||||||
}
|
}
|
||||||
|
|
||||||
public IndexDocValues reset(DocValuesIndex[] docValuesIdx) {
|
public IndexDocValues reset(DocValuesIndex[] docValuesIdx) {
|
||||||
int[] start = new int[docValuesIdx.length];
|
final int[] start = new int[docValuesIdx.length];
|
||||||
|
TypePromoter promoter = TypePromoter.getIdentityPromoter();
|
||||||
for (int i = 0; i < docValuesIdx.length; i++) {
|
for (int i = 0; i < docValuesIdx.length; i++) {
|
||||||
start[i] = docValuesIdx[i].start;
|
start[i] = docValuesIdx[i].start;
|
||||||
|
if (!(docValuesIdx[i].docValues instanceof EmptyDocValues)) {
|
||||||
|
// only promote if not a dummy
|
||||||
|
final TypePromoter incomingPromoter = TypePromoter.create(
|
||||||
|
docValuesIdx[i].docValues.type(),
|
||||||
|
docValuesIdx[i].docValues.getValueSize());
|
||||||
|
promoter = promoter.promote(incomingPromoter);
|
||||||
|
if (promoter == null) {
|
||||||
|
throw new IllegalStateException("Can not promote " + incomingPromoter);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
this.type = promoter.type();
|
||||||
|
this.valueSize = promoter.getValueSize();
|
||||||
this.starts = start;
|
this.starts = start;
|
||||||
this.docValuesIdx = docValuesIdx;
|
this.docValuesIdx = docValuesIdx;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class DummyDocValues extends IndexDocValues {
|
public static class EmptyDocValues extends IndexDocValues {
|
||||||
final int maxDoc;
|
final int maxDoc;
|
||||||
final Source emptySoruce;
|
final Source emptySource;
|
||||||
|
|
||||||
public DummyDocValues(int maxDoc, ValueType type) {
|
public EmptyDocValues(int maxDoc, ValueType type) {
|
||||||
this.maxDoc = maxDoc;
|
this.maxDoc = maxDoc;
|
||||||
this.emptySoruce = new EmptySource(type);
|
this.emptySource = new EmptySource(type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Source load() throws IOException {
|
public Source load() throws IOException {
|
||||||
return emptySoruce;
|
return emptySource;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ValueType type() {
|
public ValueType type() {
|
||||||
return emptySoruce.type();
|
return emptySource.type();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Source getDirectSource() throws IOException {
|
public Source getDirectSource() throws IOException {
|
||||||
return emptySoruce;
|
return emptySource;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,7 +195,12 @@ public class MultiIndexDocValues extends IndexDocValues {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ValueType type() {
|
public ValueType type() {
|
||||||
return this.docValuesIdx[0].docValues.type();
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueSize() {
|
||||||
|
return valueSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -93,7 +93,7 @@ class VarStraightBytesImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void merge(MergeState state) throws IOException {
|
protected void merge(SingleSubMergeState state) throws IOException {
|
||||||
merge = true;
|
merge = true;
|
||||||
datOut = getOrCreateDataOut();
|
datOut = getOrCreateDataOut();
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
|
|
|
@ -138,7 +138,7 @@ public abstract class Writer extends DocValuesConsumer {
|
||||||
public abstract void finish(int docCount) throws IOException;
|
public abstract void finish(int docCount) throws IOException;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void merge(MergeState state) throws IOException {
|
protected void merge(SingleSubMergeState state) throws IOException {
|
||||||
// This enables bulk copies in subclasses per MergeState, subclasses can
|
// This enables bulk copies in subclasses per MergeState, subclasses can
|
||||||
// simply override this and decide if they want to merge
|
// simply override this and decide if they want to merge
|
||||||
// segments using this generic implementation or if a bulk merge is possible
|
// segments using this generic implementation or if a bulk merge is possible
|
||||||
|
|
Loading…
Reference in New Issue