mirror of https://github.com/apache/lucene.git
LUCENE-3676: Support SortedSource in MultiDocValues
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1228293 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c957cbea88
commit
40b3b75a6e
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.SortedBytesMergeUtils;
|
import org.apache.lucene.index.SortedBytesMergeUtils;
|
||||||
import org.apache.lucene.index.DocValues.SortedSource;
|
import org.apache.lucene.index.DocValues.SortedSource;
|
||||||
import org.apache.lucene.index.DocValues.Type;
|
import org.apache.lucene.index.DocValues.Type;
|
||||||
|
import org.apache.lucene.index.SortedBytesMergeUtils.IndexOutputBytesRefConsumer;
|
||||||
import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
|
import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
|
||||||
import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
|
import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
|
||||||
import org.apache.lucene.index.MergeState;
|
import org.apache.lucene.index.MergeState;
|
||||||
|
@ -66,11 +67,11 @@ class FixedSortedBytesImpl {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
final MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_FIXED_SORTED, docValues, comp, mergeState);
|
final MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_FIXED_SORTED, docValues, comp, mergeState.mergedDocCount);
|
||||||
List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState, docValues, ctx);
|
List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx);
|
||||||
final IndexOutput datOut = getOrCreateDataOut();
|
final IndexOutput datOut = getOrCreateDataOut();
|
||||||
datOut.writeInt(ctx.sizePerValues);
|
datOut.writeInt(ctx.sizePerValues);
|
||||||
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, datOut, slices);
|
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices);
|
||||||
|
|
||||||
final IndexOutput idxOut = getOrCreateIndexOut();
|
final IndexOutput idxOut = getOrCreateIndexOut();
|
||||||
idxOut.writeInt(maxOrd);
|
idxOut.writeInt(maxOrd);
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.SortedBytesMergeUtils;
|
import org.apache.lucene.index.SortedBytesMergeUtils;
|
||||||
import org.apache.lucene.index.DocValues.SortedSource;
|
import org.apache.lucene.index.DocValues.SortedSource;
|
||||||
import org.apache.lucene.index.DocValues.Type;
|
import org.apache.lucene.index.DocValues.Type;
|
||||||
|
import org.apache.lucene.index.SortedBytesMergeUtils.IndexOutputBytesRefConsumer;
|
||||||
import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
|
import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
|
||||||
import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
|
import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
|
||||||
import org.apache.lucene.index.MergeState;
|
import org.apache.lucene.index.MergeState;
|
||||||
|
@ -67,12 +68,12 @@ final class VarSortedBytesImpl {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_VAR_SORTED, docValues, comp, mergeState);
|
MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_VAR_SORTED, docValues, comp, mergeState.mergedDocCount);
|
||||||
final List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState, docValues, ctx);
|
final List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx);
|
||||||
IndexOutput datOut = getOrCreateDataOut();
|
IndexOutput datOut = getOrCreateDataOut();
|
||||||
|
|
||||||
ctx.offsets = new long[1];
|
ctx.offsets = new long[1];
|
||||||
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, datOut, slices);
|
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices);
|
||||||
final long[] offsets = ctx.offsets;
|
final long[] offsets = ctx.offsets;
|
||||||
maxBytes = offsets[maxOrd-1];
|
maxBytes = offsets[maxOrd-1];
|
||||||
final IndexOutput idxOut = getOrCreateIndexOut();
|
final IndexOutput idxOut = getOrCreateIndexOut();
|
||||||
|
|
|
@ -20,11 +20,17 @@ import java.io.IOException;
|
||||||
import java.lang.reflect.Array;
|
import java.lang.reflect.Array;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
|
||||||
|
import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.PagedBytes;
|
||||||
import org.apache.lucene.util.ReaderUtil;
|
import org.apache.lucene.util.ReaderUtil;
|
||||||
import org.apache.lucene.util.ReaderUtil.Gather;
|
import org.apache.lucene.util.ReaderUtil.Gather;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.Reader;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A wrapper for compound IndexReader providing access to per segment
|
* A wrapper for compound IndexReader providing access to per segment
|
||||||
|
@ -143,6 +149,8 @@ public class MultiDocValues extends DocValues {
|
||||||
switch(promoted) {
|
switch(promoted) {
|
||||||
case BYTES_FIXED_DEREF:
|
case BYTES_FIXED_DEREF:
|
||||||
case BYTES_FIXED_STRAIGHT:
|
case BYTES_FIXED_STRAIGHT:
|
||||||
|
case BYTES_FIXED_SORTED:
|
||||||
|
assert promotedType[0].getValueSize() >= 0;
|
||||||
slice.docValues = new EmptyFixedDocValues(slice.length, promoted, promotedType[0].getValueSize());
|
slice.docValues = new EmptyFixedDocValues(slice.length, promoted, promotedType[0].getValueSize());
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -179,7 +187,6 @@ public class MultiDocValues extends DocValues {
|
||||||
return emptySource.type();
|
return emptySource.type();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Source getDirectSource() throws IOException {
|
public Source getDirectSource() throws IOException {
|
||||||
return emptySource;
|
return emptySource;
|
||||||
|
@ -276,6 +283,59 @@ public class MultiDocValues extends DocValues {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
public SortedSource asSortedSource() {
|
||||||
|
try {
|
||||||
|
if (type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED) {
|
||||||
|
DocValues[] values = new DocValues[slices.length];
|
||||||
|
Comparator<BytesRef> comp = null;
|
||||||
|
for (int i = 0; i < values.length; i++) {
|
||||||
|
values[i] = slices[i].docValues;
|
||||||
|
if (!(values[i] instanceof EmptyDocValues)) {
|
||||||
|
Comparator<BytesRef> comparator = values[i].getDirectSource()
|
||||||
|
.asSortedSource().getComparator();
|
||||||
|
assert comp == null || comp == comparator;
|
||||||
|
comp = comparator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert comp != null;
|
||||||
|
final int globalNumDocs = globalNumDocs();
|
||||||
|
final MergeContext ctx = SortedBytesMergeUtils.init(type, values,
|
||||||
|
comp, globalNumDocs);
|
||||||
|
List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(
|
||||||
|
docBases(), new int[values.length][], values, ctx);
|
||||||
|
RecordingBytesRefConsumer consumer = new RecordingBytesRefConsumer(
|
||||||
|
type);
|
||||||
|
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, consumer,
|
||||||
|
slices);
|
||||||
|
final int[] docToOrd = new int[globalNumDocs];
|
||||||
|
for (SortedSourceSlice slice : slices) {
|
||||||
|
slice.toAbsolutOrds(docToOrd);
|
||||||
|
}
|
||||||
|
return new MultiSortedSource(type, comp, consumer.pagedBytes,
|
||||||
|
ctx.sizePerValues, maxOrd, docToOrd, consumer.ordToOffset);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException("load failed", e);
|
||||||
|
}
|
||||||
|
return super.asSortedSource();
|
||||||
|
}
|
||||||
|
|
||||||
|
private int globalNumDocs() {
|
||||||
|
int docs = 0;
|
||||||
|
for (int i = 0; i < slices.length; i++) {
|
||||||
|
docs += slices[i].length;
|
||||||
|
}
|
||||||
|
return docs;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int[] docBases() {
|
||||||
|
int[] docBases = new int[slices.length];
|
||||||
|
for (int i = 0; i < slices.length; i++) {
|
||||||
|
docBases[i] = slices[i].start;
|
||||||
|
}
|
||||||
|
return docBases;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean hasArray() {
|
public boolean hasArray() {
|
||||||
boolean oneRealSource = false;
|
boolean oneRealSource = false;
|
||||||
for (DocValuesSlice slice : slices) {
|
for (DocValuesSlice slice : slices) {
|
||||||
|
@ -346,12 +406,79 @@ public class MultiDocValues extends DocValues {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final class RecordingBytesRefConsumer implements SortedBytesMergeUtils.BytesRefConsumer {
|
||||||
|
private final static int PAGED_BYTES_BITS = 15;
|
||||||
|
final PagedBytes pagedBytes = new PagedBytes(PAGED_BYTES_BITS);
|
||||||
|
long[] ordToOffset;
|
||||||
|
|
||||||
|
public RecordingBytesRefConsumer(Type type) {
|
||||||
|
ordToOffset = type == Type.BYTES_VAR_SORTED ? new long[2] : null;
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public void consume(BytesRef ref, int ord, long offset) throws IOException {
|
||||||
|
pagedBytes.copy(ref);
|
||||||
|
if (ordToOffset != null) {
|
||||||
|
if (ord+1 >= ordToOffset.length) {
|
||||||
|
ordToOffset = ArrayUtil.grow(ordToOffset, ord + 2);
|
||||||
|
}
|
||||||
|
ordToOffset[ord+1] = offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class MultiSortedSource extends SortedSource {
|
||||||
|
private final PagedBytes.Reader data;
|
||||||
|
private final int[] docToOrd;
|
||||||
|
private final long[] ordToOffset;
|
||||||
|
private int size;
|
||||||
|
private int valueCount;
|
||||||
|
public MultiSortedSource(Type type, Comparator<BytesRef> comparator, PagedBytes pagedBytes, int size, int numValues, int[] docToOrd, long[] ordToOffset) {
|
||||||
|
super(type, comparator);
|
||||||
|
data = pagedBytes.freeze(true);
|
||||||
|
this.size = size;
|
||||||
|
this.valueCount = numValues;
|
||||||
|
this.docToOrd = docToOrd;
|
||||||
|
this.ordToOffset = ordToOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int ord(int docID) {
|
||||||
|
return docToOrd[docID];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||||
|
int size = this.size;
|
||||||
|
long offset = (ord*size);
|
||||||
|
if (ordToOffset != null) {
|
||||||
|
offset = ordToOffset[ord];
|
||||||
|
size = (int) (ordToOffset[1 + ord] - offset);
|
||||||
|
}
|
||||||
|
if (size < 0) {
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
|
assert size >=0;
|
||||||
|
return data.fillSlice(bytesRef, offset, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Reader getDocToOrd() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueCount() {
|
||||||
|
return valueCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: this is dup of DocValues.getDefaultSource()?
|
// TODO: this is dup of DocValues.getDefaultSource()?
|
||||||
private static class EmptySource extends Source {
|
private static class EmptySource extends SortedSource {
|
||||||
|
|
||||||
public EmptySource(Type type) {
|
public EmptySource(Type type) {
|
||||||
super(type);
|
super(type, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -369,14 +496,46 @@ public class MultiDocValues extends DocValues {
|
||||||
public long getInt(int docID) {
|
public long getInt(int docID) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSource asSortedSource() {
|
||||||
|
if (type() == Type.BYTES_FIXED_SORTED || type() == Type.BYTES_VAR_SORTED) {
|
||||||
|
|
||||||
|
}
|
||||||
|
return super.asSortedSource();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int ord(int docID) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||||
|
bytesRef.length = 0;
|
||||||
|
bytesRef.offset = 0;
|
||||||
|
return bytesRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Reader getDocToOrd() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueCount() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class EmptyFixedSource extends EmptySource {
|
private static class EmptyFixedSource extends EmptySource {
|
||||||
private final int valueSize;
|
private final int valueSize;
|
||||||
|
private final byte[] valueArray;
|
||||||
public EmptyFixedSource(Type type, int valueSize) {
|
public EmptyFixedSource(Type type, int valueSize) {
|
||||||
super(type);
|
super(type);
|
||||||
this.valueSize = valueSize;
|
this.valueSize = valueSize;
|
||||||
|
valueArray = new byte[valueSize];
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -396,6 +555,14 @@ public class MultiDocValues extends DocValues {
|
||||||
public long getInt(int docID) {
|
public long getInt(int docID) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||||
|
bytesRef.bytes = valueArray;
|
||||||
|
bytesRef.length = valueSize;
|
||||||
|
bytesRef.offset = 0;
|
||||||
|
return bytesRef;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -412,4 +579,6 @@ public class MultiDocValues extends DocValues {
|
||||||
public Source getDirectSource() throws IOException {
|
public Source getDirectSource() throws IOException {
|
||||||
return new MultiSource(slices, starts, true, type);
|
return new MultiSource(slices, starts, true, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,6 @@ import java.util.List;
|
||||||
import org.apache.lucene.index.DocValues.SortedSource;
|
import org.apache.lucene.index.DocValues.SortedSource;
|
||||||
import org.apache.lucene.index.DocValues.Source;
|
import org.apache.lucene.index.DocValues.Source;
|
||||||
import org.apache.lucene.index.DocValues.Type;
|
import org.apache.lucene.index.DocValues.Type;
|
||||||
import org.apache.lucene.index.MergeState.IndexReaderAndLiveDocs;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -35,11 +34,6 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
/**
|
/**
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
// TODO: generalize this a bit more:
|
|
||||||
// * remove writing (like indexoutput) from here
|
|
||||||
// * just take IndexReaders (not IR&LiveDocs), doesnt care about liveDocs
|
|
||||||
// * hook into MultiDocValues to make a MultiSortedSource
|
|
||||||
// * maybe DV merging should then just use MultiDocValues for simplicity?
|
|
||||||
public final class SortedBytesMergeUtils {
|
public final class SortedBytesMergeUtils {
|
||||||
|
|
||||||
private SortedBytesMergeUtils() {
|
private SortedBytesMergeUtils() {
|
||||||
|
@ -47,7 +41,7 @@ public final class SortedBytesMergeUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static MergeContext init(Type type, DocValues[] docValues,
|
public static MergeContext init(Type type, DocValues[] docValues,
|
||||||
Comparator<BytesRef> comp, MergeState mergeState) {
|
Comparator<BytesRef> comp, int mergeDocCount) {
|
||||||
int size = -1;
|
int size = -1;
|
||||||
if (type == Type.BYTES_FIXED_SORTED) {
|
if (type == Type.BYTES_FIXED_SORTED) {
|
||||||
for (DocValues indexDocValues : docValues) {
|
for (DocValues indexDocValues : docValues) {
|
||||||
|
@ -58,7 +52,7 @@ public final class SortedBytesMergeUtils {
|
||||||
}
|
}
|
||||||
assert size >= 0;
|
assert size >= 0;
|
||||||
}
|
}
|
||||||
return new MergeContext(comp, mergeState, size, type);
|
return new MergeContext(comp, mergeDocCount, size, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final class MergeContext {
|
public static final class MergeContext {
|
||||||
|
@ -69,7 +63,7 @@ public final class SortedBytesMergeUtils {
|
||||||
public final int[] docToEntry;
|
public final int[] docToEntry;
|
||||||
public long[] offsets; // if non-null #mergeRecords collects byte offsets here
|
public long[] offsets; // if non-null #mergeRecords collects byte offsets here
|
||||||
|
|
||||||
public MergeContext(Comparator<BytesRef> comp, MergeState mergeState,
|
public MergeContext(Comparator<BytesRef> comp, int mergeDocCount,
|
||||||
int size, Type type) {
|
int size, Type type) {
|
||||||
assert type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED;
|
assert type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED;
|
||||||
this.comp = comp;
|
this.comp = comp;
|
||||||
|
@ -79,11 +73,15 @@ public final class SortedBytesMergeUtils {
|
||||||
missingValue.grow(size);
|
missingValue.grow(size);
|
||||||
missingValue.length = size;
|
missingValue.length = size;
|
||||||
}
|
}
|
||||||
docToEntry = new int[mergeState.mergedDocCount];
|
docToEntry = new int[mergeDocCount];
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMergeDocCount() {
|
||||||
|
return docToEntry.length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<SortedSourceSlice> buildSlices(MergeState mergeState,
|
public static List<SortedSourceSlice> buildSlices(int[] docBases ,int[][] docMaps,
|
||||||
DocValues[] docValues, MergeContext ctx) throws IOException {
|
DocValues[] docValues, MergeContext ctx) throws IOException {
|
||||||
final List<SortedSourceSlice> slices = new ArrayList<SortedSourceSlice>();
|
final List<SortedSourceSlice> slices = new ArrayList<SortedSourceSlice>();
|
||||||
for (int i = 0; i < docValues.length; i++) {
|
for (int i = 0; i < docValues.length; i++) {
|
||||||
|
@ -92,13 +90,13 @@ public final class SortedBytesMergeUtils {
|
||||||
if (docValues[i] != null
|
if (docValues[i] != null
|
||||||
&& (directSource = docValues[i].getDirectSource()) != null) {
|
&& (directSource = docValues[i].getDirectSource()) != null) {
|
||||||
final SortedSourceSlice slice = new SortedSourceSlice(i, directSource
|
final SortedSourceSlice slice = new SortedSourceSlice(i, directSource
|
||||||
.asSortedSource(), mergeState, ctx.docToEntry);
|
.asSortedSource(), docBases, ctx.getMergeDocCount(), ctx.docToEntry);
|
||||||
nextSlice = slice;
|
nextSlice = slice;
|
||||||
} else {
|
} else {
|
||||||
nextSlice = new SortedSourceSlice(i, new MissingValueSource(ctx),
|
nextSlice = new SortedSourceSlice(i, new MissingValueSource(ctx),
|
||||||
mergeState, ctx.docToEntry);
|
docBases, ctx.getMergeDocCount(), ctx.docToEntry);
|
||||||
}
|
}
|
||||||
createOrdMapping(mergeState, nextSlice);
|
createOrdMapping(docBases, docMaps, nextSlice);
|
||||||
slices.add(nextSlice);
|
slices.add(nextSlice);
|
||||||
}
|
}
|
||||||
return Collections.unmodifiableList(slices);
|
return Collections.unmodifiableList(slices);
|
||||||
|
@ -113,12 +111,12 @@ public final class SortedBytesMergeUtils {
|
||||||
* mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping
|
* mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping
|
||||||
* contains the new global ordinals for the relative index.
|
* contains the new global ordinals for the relative index.
|
||||||
*/
|
*/
|
||||||
private static void createOrdMapping(MergeState mergeState,
|
private static void createOrdMapping(int[] docBases ,int[][] docMaps,
|
||||||
SortedSourceSlice currentSlice) {
|
SortedSourceSlice currentSlice) {
|
||||||
final int readerIdx = currentSlice.readerIdx;
|
final int readerIdx = currentSlice.readerIdx;
|
||||||
final int[] currentDocMap = mergeState.docMaps[readerIdx];
|
final int[] currentDocMap = docMaps[readerIdx];
|
||||||
final int docBase = currentSlice.docToOrdStart;
|
final int docBase = currentSlice.docToOrdStart;
|
||||||
assert docBase == mergeState.docBase[readerIdx];
|
assert docBase == docBases[readerIdx];
|
||||||
if (currentDocMap != null) { // we have deletes
|
if (currentDocMap != null) { // we have deletes
|
||||||
for (int i = 0; i < currentDocMap.length; i++) {
|
for (int i = 0; i < currentDocMap.length; i++) {
|
||||||
final int doc = currentDocMap[i];
|
final int doc = currentDocMap[i];
|
||||||
|
@ -131,11 +129,7 @@ public final class SortedBytesMergeUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else { // no deletes
|
} else { // no deletes
|
||||||
final IndexReaderAndLiveDocs indexReaderAndLiveDocs = mergeState.readers
|
final int numDocs = currentSlice.docToOrdEnd - currentSlice.docToOrdStart;
|
||||||
.get(readerIdx);
|
|
||||||
final int numDocs = indexReaderAndLiveDocs.reader.numDocs();
|
|
||||||
assert indexReaderAndLiveDocs.liveDocs == null;
|
|
||||||
assert currentSlice.docToOrdEnd - currentSlice.docToOrdStart == numDocs;
|
|
||||||
for (int doc = 0; doc < numDocs; doc++) {
|
for (int doc = 0; doc < numDocs; doc++) {
|
||||||
final int ord = currentSlice.source.ord(doc);
|
final int ord = currentSlice.source.ord(doc);
|
||||||
currentSlice.docIDToRelativeOrd[docBase + doc] = ord;
|
currentSlice.docIDToRelativeOrd[docBase + doc] = ord;
|
||||||
|
@ -145,7 +139,7 @@ public final class SortedBytesMergeUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int mergeRecords(MergeContext ctx, IndexOutput datOut,
|
public static int mergeRecords(MergeContext ctx, BytesRefConsumer consumer,
|
||||||
List<SortedSourceSlice> slices) throws IOException {
|
List<SortedSourceSlice> slices) throws IOException {
|
||||||
final RecordMerger merger = new RecordMerger(new MergeQueue(slices.size(),
|
final RecordMerger merger = new RecordMerger(new MergeQueue(slices.size(),
|
||||||
ctx.comp), slices.toArray(new SortedSourceSlice[0]));
|
ctx.comp), slices.toArray(new SortedSourceSlice[0]));
|
||||||
|
@ -159,22 +153,38 @@ public final class SortedBytesMergeUtils {
|
||||||
currentMergedBytes = merger.current;
|
currentMergedBytes = merger.current;
|
||||||
assert ctx.sizePerValues == -1 || ctx.sizePerValues == currentMergedBytes.length : "size: "
|
assert ctx.sizePerValues == -1 || ctx.sizePerValues == currentMergedBytes.length : "size: "
|
||||||
+ ctx.sizePerValues + " spare: " + currentMergedBytes.length;
|
+ ctx.sizePerValues + " spare: " + currentMergedBytes.length;
|
||||||
|
offset += currentMergedBytes.length;
|
||||||
if (recordOffsets) {
|
if (recordOffsets) {
|
||||||
offset += currentMergedBytes.length;
|
|
||||||
if (merger.currentOrd >= offsets.length) {
|
if (merger.currentOrd >= offsets.length) {
|
||||||
offsets = ArrayUtil.grow(offsets, merger.currentOrd + 1);
|
offsets = ArrayUtil.grow(offsets, merger.currentOrd + 1);
|
||||||
}
|
}
|
||||||
offsets[merger.currentOrd] = offset;
|
offsets[merger.currentOrd] = offset;
|
||||||
}
|
}
|
||||||
datOut.writeBytes(currentMergedBytes.bytes, currentMergedBytes.offset,
|
consumer.consume(currentMergedBytes, merger.currentOrd, offset);
|
||||||
currentMergedBytes.length);
|
|
||||||
merger.pushTop();
|
merger.pushTop();
|
||||||
}
|
}
|
||||||
ctx.offsets = offsets;
|
ctx.offsets = offsets;
|
||||||
assert offsets == null || offsets[merger.currentOrd - 1] == offset;
|
assert offsets == null || offsets[merger.currentOrd - 1] == offset;
|
||||||
return merger.currentOrd;
|
return merger.currentOrd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static interface BytesRefConsumer {
|
||||||
|
public void consume(BytesRef ref, int ord, long offset) throws IOException;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final class IndexOutputBytesRefConsumer implements BytesRefConsumer {
|
||||||
|
private final IndexOutput datOut;
|
||||||
|
|
||||||
|
public IndexOutputBytesRefConsumer(IndexOutput datOut) {
|
||||||
|
this.datOut = datOut;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void consume(BytesRef currentMergedBytes, int ord, long offset) throws IOException {
|
||||||
|
datOut.writeBytes(currentMergedBytes.bytes, currentMergedBytes.offset,
|
||||||
|
currentMergedBytes.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static final class RecordMerger {
|
private static final class RecordMerger {
|
||||||
private final MergeQueue queue;
|
private final MergeQueue queue;
|
||||||
|
@ -241,22 +251,22 @@ public final class SortedBytesMergeUtils {
|
||||||
/* the currently merged relative ordinal */
|
/* the currently merged relative ordinal */
|
||||||
int relativeOrd = -1;
|
int relativeOrd = -1;
|
||||||
|
|
||||||
SortedSourceSlice(int readerIdx, SortedSource source, MergeState state,
|
SortedSourceSlice(int readerIdx, SortedSource source, int[] docBase, int mergeDocCount,
|
||||||
int[] docToOrd) {
|
int[] docToOrd) {
|
||||||
super();
|
super();
|
||||||
this.readerIdx = readerIdx;
|
this.readerIdx = readerIdx;
|
||||||
this.source = source;
|
this.source = source;
|
||||||
this.docIDToRelativeOrd = docToOrd;
|
this.docIDToRelativeOrd = docToOrd;
|
||||||
this.ordMapping = new int[source.getValueCount()];
|
this.ordMapping = new int[source.getValueCount()];
|
||||||
this.docToOrdStart = state.docBase[readerIdx];
|
this.docToOrdStart = docBase[readerIdx];
|
||||||
this.docToOrdEnd = this.docToOrdStart + numDocs(state, readerIdx);
|
this.docToOrdEnd = this.docToOrdStart + numDocs(docBase, mergeDocCount, readerIdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int numDocs(MergeState state, int readerIndex) {
|
private static int numDocs(int[] docBase, int mergedDocCount, int readerIndex) {
|
||||||
if (readerIndex == state.docBase.length - 1) {
|
if (readerIndex == docBase.length - 1) {
|
||||||
return state.mergedDocCount - state.docBase[readerIndex];
|
return mergedDocCount - docBase[readerIndex];
|
||||||
}
|
}
|
||||||
return state.docBase[readerIndex + 1] - state.docBase[readerIndex];
|
return docBase[readerIndex + 1] - docBase[readerIndex];
|
||||||
}
|
}
|
||||||
|
|
||||||
BytesRef next() {
|
BytesRef next() {
|
||||||
|
@ -269,6 +279,16 @@ public final class SortedBytesMergeUtils {
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int[] toAbsolutOrds(int[] docToOrd) {
|
||||||
|
for (int i = docToOrdStart; i < docToOrdEnd; i++) {
|
||||||
|
final int mappedOrd = docIDToRelativeOrd[i];
|
||||||
|
assert mappedOrd < ordMapping.length;
|
||||||
|
assert ordMapping[mappedOrd] > 0 : "illegal mapping ord maps to an unreferenced value";
|
||||||
|
docToOrd[i] = ordMapping[mappedOrd] -1;
|
||||||
|
}
|
||||||
|
return docToOrd;
|
||||||
|
}
|
||||||
|
|
||||||
public void writeOrds(PackedInts.Writer writer) throws IOException {
|
public void writeOrds(PackedInts.Writer writer) throws IOException {
|
||||||
for (int i = docToOrdStart; i < docToOrdEnd; i++) {
|
for (int i = docToOrdStart; i < docToOrdEnd; i++) {
|
||||||
|
|
|
@ -21,8 +21,14 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
@ -33,6 +39,7 @@ import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.DocValues.SortedSource;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
@ -47,6 +54,7 @@ import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.LockObtainFailedException;
|
import org.apache.lucene.store.LockObtainFailedException;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.BytesRefHash;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
@ -539,6 +547,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
||||||
return MultiDocValues.getDocValues(reader, field);
|
return MultiDocValues.getDocValues(reader, field);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("fallthrough")
|
||||||
private Source getSource(DocValues values) throws IOException {
|
private Source getSource(DocValues values) throws IOException {
|
||||||
// getSource uses cache internally
|
// getSource uses cache internally
|
||||||
switch(random.nextInt(5)) {
|
switch(random.nextInt(5)) {
|
||||||
|
@ -547,7 +556,9 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
||||||
case 2:
|
case 2:
|
||||||
return values.getDirectSource();
|
return values.getDirectSource();
|
||||||
case 1:
|
case 1:
|
||||||
return values.getSource();
|
if(values.type() == Type.BYTES_VAR_SORTED || values.type() == Type.BYTES_FIXED_SORTED) {
|
||||||
|
return values.getSource().asSortedSource();
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return values.getSource();
|
return values.getSource();
|
||||||
}
|
}
|
||||||
|
@ -705,4 +716,100 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
||||||
r.close();
|
r.close();
|
||||||
d.close();
|
d.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSortedBytes() throws IOException {
|
||||||
|
Type[] types = new Type[] { Type.BYTES_FIXED_SORTED, Type.BYTES_VAR_SORTED };
|
||||||
|
for (Type type : types) {
|
||||||
|
boolean fixed = type == Type.BYTES_FIXED_SORTED;
|
||||||
|
final Directory d = newDirectory();
|
||||||
|
IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||||
|
new MockAnalyzer(random));
|
||||||
|
IndexWriter w = new IndexWriter(d, cfg);
|
||||||
|
Comparator<BytesRef> comp = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
|
int numDocs = atLeast(100);
|
||||||
|
BytesRefHash hash = new BytesRefHash();
|
||||||
|
Map<String, String> docToString = new HashMap<String, String>();
|
||||||
|
int len = 1 + random.nextInt(50);
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newField("id", "" + i, TextField.TYPE_STORED));
|
||||||
|
DocValuesField f = new DocValuesField("field");
|
||||||
|
String string =fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random,
|
||||||
|
len) : _TestUtil.randomRealisticUnicodeString(random, 1, len);
|
||||||
|
hash.add(new BytesRef(string));
|
||||||
|
docToString.put("" + i, string);
|
||||||
|
|
||||||
|
f.setBytes(new BytesRef(string), type, comp);
|
||||||
|
doc.add(f);
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
if (rarely()) {
|
||||||
|
w.commit();
|
||||||
|
}
|
||||||
|
int numDocsNoValue = atLeast(10);
|
||||||
|
for (int i = 0; i < numDocsNoValue; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newField("id", "noValue", TextField.TYPE_STORED));
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
BytesRef bytesRef = new BytesRef(fixed ? len : 0);
|
||||||
|
bytesRef.offset = 0;
|
||||||
|
bytesRef.length = fixed ? len : 0;
|
||||||
|
hash.add(bytesRef); // add empty value for the gaps
|
||||||
|
if (rarely()) {
|
||||||
|
w.commit();
|
||||||
|
}
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
String id = "" + i + numDocs;
|
||||||
|
doc.add(newField("id", id, TextField.TYPE_STORED));
|
||||||
|
DocValuesField f = new DocValuesField("field");
|
||||||
|
String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random,
|
||||||
|
len) : _TestUtil.randomRealisticUnicodeString(random, 1, len);
|
||||||
|
hash.add(new BytesRef(string));
|
||||||
|
docToString.put(id, string);
|
||||||
|
f.setBytes(new BytesRef(string), type, comp);
|
||||||
|
doc.add(f);
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
w.commit();
|
||||||
|
IndexReader reader = w.getReader();
|
||||||
|
DocValues docValues = MultiDocValues.getDocValues(reader, "field");
|
||||||
|
Source source = getSource(docValues);
|
||||||
|
SortedSource asSortedSource = source.asSortedSource();
|
||||||
|
int[] sort = hash.sort(comp);
|
||||||
|
BytesRef expected = new BytesRef();
|
||||||
|
BytesRef actual = new BytesRef();
|
||||||
|
assertEquals(hash.size(), asSortedSource.getValueCount());
|
||||||
|
for (int i = 0; i < hash.size(); i++) {
|
||||||
|
hash.get(sort[i], expected);
|
||||||
|
asSortedSource.getByOrd(i, actual);
|
||||||
|
assertEquals(expected.utf8ToString(), actual.utf8ToString());
|
||||||
|
int ord = asSortedSource.getByValue(expected, actual);
|
||||||
|
assertEquals(i, ord);
|
||||||
|
}
|
||||||
|
reader = new SlowMultiReaderWrapper(reader);
|
||||||
|
Set<Entry<String, String>> entrySet = docToString.entrySet();
|
||||||
|
|
||||||
|
for (Entry<String, String> entry : entrySet) {
|
||||||
|
int docId = docId(reader, new Term("id", entry.getKey()));
|
||||||
|
expected.copyChars(entry.getValue());
|
||||||
|
assertEquals(expected, asSortedSource.getBytes(docId, actual));
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
w.close();
|
||||||
|
d.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int docId(IndexReader reader, Term term) throws IOException {
|
||||||
|
int docFreq = reader.docFreq(term);
|
||||||
|
assertEquals(1, docFreq);
|
||||||
|
DocsEnum termDocsEnum = reader.termDocsEnum(null, term.field, term.bytes, false);
|
||||||
|
int nextDoc = termDocsEnum.nextDoc();
|
||||||
|
assertEquals(DocsEnum.NO_MORE_DOCS, termDocsEnum.nextDoc());
|
||||||
|
return nextDoc;
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -100,14 +100,12 @@ public class TestTypePromotion extends LuceneTestCase {
|
||||||
randomValueType(types, random), values, num_1 + num_2, num_3);
|
randomValueType(types, random), values, num_1 + num_2, num_3);
|
||||||
writer_2.commit();
|
writer_2.commit();
|
||||||
writer_2.close();
|
writer_2.close();
|
||||||
if (random.nextBoolean()) {
|
if (rarely()) {
|
||||||
writer.addIndexes(dir_2);
|
writer.addIndexes(dir_2);
|
||||||
} else {
|
} else {
|
||||||
// do a real merge here
|
// do a real merge here
|
||||||
IndexReader open = IndexReader.open(dir_2);
|
IndexReader open = IndexReader.open(dir_2);
|
||||||
// we cannot use SlowMR for sorted bytes, because it returns a null sortedsource
|
writer.addIndexes(new SlowMultiReaderWrapper(open));
|
||||||
boolean useSlowMRWrapper = types != SORTED_BYTES && random.nextBoolean();
|
|
||||||
writer.addIndexes(useSlowMRWrapper ? new SlowMultiReaderWrapper(open) : open);
|
|
||||||
open.close();
|
open.close();
|
||||||
}
|
}
|
||||||
dir_2.close();
|
dir_2.close();
|
||||||
|
|
|
@ -258,7 +258,7 @@ public class TestSort extends LuceneTestCase {
|
||||||
//System.out.println(writer.getSegmentCount());
|
//System.out.println(writer.getSegmentCount());
|
||||||
writer.close();
|
writer.close();
|
||||||
IndexReader reader = IndexReader.open(indexStore);
|
IndexReader reader = IndexReader.open(indexStore);
|
||||||
return new IndexSearcher (reader);
|
return newSearcher(reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getRandomNumberString(int num, int low, int high) {
|
public String getRandomNumberString(int num, int low, int high) {
|
||||||
|
@ -1210,35 +1210,11 @@ public class TestSort extends LuceneTestCase {
|
||||||
assertMatches( null, searcher, query, sort, expectedResult );
|
assertMatches( null, searcher, query, sort, expectedResult );
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean hasSlowMultiReaderWrapper(IndexReader r) {
|
|
||||||
if (r instanceof SlowMultiReaderWrapper) {
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
IndexReader[] subReaders = r.getSequentialSubReaders();
|
|
||||||
if (subReaders != null) {
|
|
||||||
for (IndexReader subReader : subReaders) {
|
|
||||||
if (hasSlowMultiReaderWrapper(subReader)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// make sure the documents returned by the search match the expected list
|
// make sure the documents returned by the search match the expected list
|
||||||
private void assertMatches(String msg, IndexSearcher searcher, Query query, Sort sort,
|
private void assertMatches(String msg, IndexSearcher searcher, Query query, Sort sort,
|
||||||
String expectedResult) throws IOException {
|
String expectedResult) throws IOException {
|
||||||
|
|
||||||
for(SortField sortField : sort.getSort()) {
|
|
||||||
if (sortField.getUseIndexValues() && sortField.getType() == SortField.Type.STRING) {
|
|
||||||
if (hasSlowMultiReaderWrapper(searcher.getIndexReader())) {
|
|
||||||
// Cannot use STRING DocValues sort with SlowMultiReaderWrapper
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs;
|
//ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs;
|
||||||
TopDocs hits = searcher.search(query, null, Math.max(1, expectedResult.length()), sort);
|
TopDocs hits = searcher.search(query, null, Math.max(1, expectedResult.length()), sort);
|
||||||
ScoreDoc[] result = hits.scoreDocs;
|
ScoreDoc[] result = hits.scoreDocs;
|
||||||
|
|
Loading…
Reference in New Issue