LUCENE-3676: Support SortedSource in MultiDocValues

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1228293 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2012-01-06 17:27:07 +00:00
parent c957cbea88
commit 40b3b75a6e
7 changed files with 346 additions and 74 deletions

View File

@ -28,6 +28,7 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedBytesMergeUtils; import org.apache.lucene.index.SortedBytesMergeUtils;
import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.SortedBytesMergeUtils.IndexOutputBytesRefConsumer;
import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext; import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice; import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MergeState;
@ -66,11 +67,11 @@ class FixedSortedBytesImpl {
throws IOException { throws IOException {
boolean success = false; boolean success = false;
try { try {
final MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_FIXED_SORTED, docValues, comp, mergeState); final MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_FIXED_SORTED, docValues, comp, mergeState.mergedDocCount);
List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState, docValues, ctx); List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx);
final IndexOutput datOut = getOrCreateDataOut(); final IndexOutput datOut = getOrCreateDataOut();
datOut.writeInt(ctx.sizePerValues); datOut.writeInt(ctx.sizePerValues);
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, datOut, slices); final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices);
final IndexOutput idxOut = getOrCreateIndexOut(); final IndexOutput idxOut = getOrCreateIndexOut();
idxOut.writeInt(maxOrd); idxOut.writeInt(maxOrd);

View File

@ -28,6 +28,7 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedBytesMergeUtils; import org.apache.lucene.index.SortedBytesMergeUtils;
import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.SortedBytesMergeUtils.IndexOutputBytesRefConsumer;
import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext; import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice; import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MergeState;
@ -67,12 +68,12 @@ final class VarSortedBytesImpl {
throws IOException { throws IOException {
boolean success = false; boolean success = false;
try { try {
MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_VAR_SORTED, docValues, comp, mergeState); MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_VAR_SORTED, docValues, comp, mergeState.mergedDocCount);
final List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState, docValues, ctx); final List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx);
IndexOutput datOut = getOrCreateDataOut(); IndexOutput datOut = getOrCreateDataOut();
ctx.offsets = new long[1]; ctx.offsets = new long[1];
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, datOut, slices); final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices);
final long[] offsets = ctx.offsets; final long[] offsets = ctx.offsets;
maxBytes = offsets[maxOrd-1]; maxBytes = offsets[maxOrd-1];
final IndexOutput idxOut = getOrCreateIndexOut(); final IndexOutput idxOut = getOrCreateIndexOut();

View File

@ -20,11 +20,17 @@ import java.io.IOException;
import java.lang.reflect.Array; import java.lang.reflect.Array;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator;
import java.util.List; import java.util.List;
import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.ReaderUtil.Gather; import org.apache.lucene.util.ReaderUtil.Gather;
import org.apache.lucene.util.packed.PackedInts.Reader;
/** /**
* A wrapper for compound IndexReader providing access to per segment * A wrapper for compound IndexReader providing access to per segment
@ -143,6 +149,8 @@ public class MultiDocValues extends DocValues {
switch(promoted) { switch(promoted) {
case BYTES_FIXED_DEREF: case BYTES_FIXED_DEREF:
case BYTES_FIXED_STRAIGHT: case BYTES_FIXED_STRAIGHT:
case BYTES_FIXED_SORTED:
assert promotedType[0].getValueSize() >= 0;
slice.docValues = new EmptyFixedDocValues(slice.length, promoted, promotedType[0].getValueSize()); slice.docValues = new EmptyFixedDocValues(slice.length, promoted, promotedType[0].getValueSize());
break; break;
default: default:
@ -179,7 +187,6 @@ public class MultiDocValues extends DocValues {
return emptySource.type(); return emptySource.type();
} }
@Override @Override
public Source getDirectSource() throws IOException { public Source getDirectSource() throws IOException {
return emptySource; return emptySource;
@ -276,6 +283,59 @@ public class MultiDocValues extends DocValues {
} }
@Override @Override
public SortedSource asSortedSource() {
try {
if (type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED) {
DocValues[] values = new DocValues[slices.length];
Comparator<BytesRef> comp = null;
for (int i = 0; i < values.length; i++) {
values[i] = slices[i].docValues;
if (!(values[i] instanceof EmptyDocValues)) {
Comparator<BytesRef> comparator = values[i].getDirectSource()
.asSortedSource().getComparator();
assert comp == null || comp == comparator;
comp = comparator;
}
}
assert comp != null;
final int globalNumDocs = globalNumDocs();
final MergeContext ctx = SortedBytesMergeUtils.init(type, values,
comp, globalNumDocs);
List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(
docBases(), new int[values.length][], values, ctx);
RecordingBytesRefConsumer consumer = new RecordingBytesRefConsumer(
type);
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, consumer,
slices);
final int[] docToOrd = new int[globalNumDocs];
for (SortedSourceSlice slice : slices) {
slice.toAbsolutOrds(docToOrd);
}
return new MultiSortedSource(type, comp, consumer.pagedBytes,
ctx.sizePerValues, maxOrd, docToOrd, consumer.ordToOffset);
}
} catch (IOException e) {
throw new RuntimeException("load failed", e);
}
return super.asSortedSource();
}
private int globalNumDocs() {
int docs = 0;
for (int i = 0; i < slices.length; i++) {
docs += slices[i].length;
}
return docs;
}
private int[] docBases() {
int[] docBases = new int[slices.length];
for (int i = 0; i < slices.length; i++) {
docBases[i] = slices[i].start;
}
return docBases;
}
public boolean hasArray() { public boolean hasArray() {
boolean oneRealSource = false; boolean oneRealSource = false;
for (DocValuesSlice slice : slices) { for (DocValuesSlice slice : slices) {
@ -346,12 +406,79 @@ public class MultiDocValues extends DocValues {
} }
} }
} }
private static final class RecordingBytesRefConsumer implements SortedBytesMergeUtils.BytesRefConsumer {
private final static int PAGED_BYTES_BITS = 15;
final PagedBytes pagedBytes = new PagedBytes(PAGED_BYTES_BITS);
long[] ordToOffset;
public RecordingBytesRefConsumer(Type type) {
ordToOffset = type == Type.BYTES_VAR_SORTED ? new long[2] : null;
}
@Override
public void consume(BytesRef ref, int ord, long offset) throws IOException {
pagedBytes.copy(ref);
if (ordToOffset != null) {
if (ord+1 >= ordToOffset.length) {
ordToOffset = ArrayUtil.grow(ordToOffset, ord + 2);
}
ordToOffset[ord+1] = offset;
}
}
}
private static final class MultiSortedSource extends SortedSource {
private final PagedBytes.Reader data;
private final int[] docToOrd;
private final long[] ordToOffset;
private int size;
private int valueCount;
public MultiSortedSource(Type type, Comparator<BytesRef> comparator, PagedBytes pagedBytes, int size, int numValues, int[] docToOrd, long[] ordToOffset) {
super(type, comparator);
data = pagedBytes.freeze(true);
this.size = size;
this.valueCount = numValues;
this.docToOrd = docToOrd;
this.ordToOffset = ordToOffset;
}
@Override
public int ord(int docID) {
return docToOrd[docID];
}
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
int size = this.size;
long offset = (ord*size);
if (ordToOffset != null) {
offset = ordToOffset[ord];
size = (int) (ordToOffset[1 + ord] - offset);
}
if (size < 0) {
System.out.println();
}
assert size >=0;
return data.fillSlice(bytesRef, offset, size);
}
@Override
public Reader getDocToOrd() {
return null;
}
@Override
public int getValueCount() {
return valueCount;
}
}
// TODO: this is dup of DocValues.getDefaultSource()? // TODO: this is dup of DocValues.getDefaultSource()?
private static class EmptySource extends Source { private static class EmptySource extends SortedSource {
public EmptySource(Type type) { public EmptySource(Type type) {
super(type); super(type, BytesRef.getUTF8SortedAsUnicodeComparator());
} }
@Override @Override
@ -369,14 +496,46 @@ public class MultiDocValues extends DocValues {
public long getInt(int docID) { public long getInt(int docID) {
return 0; return 0;
} }
@Override
public SortedSource asSortedSource() {
if (type() == Type.BYTES_FIXED_SORTED || type() == Type.BYTES_VAR_SORTED) {
}
return super.asSortedSource();
}
@Override
public int ord(int docID) {
return 0;
}
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
bytesRef.length = 0;
bytesRef.offset = 0;
return bytesRef;
}
@Override
public Reader getDocToOrd() {
return null;
}
@Override
public int getValueCount() {
return 1;
}
} }
private static class EmptyFixedSource extends EmptySource { private static class EmptyFixedSource extends EmptySource {
private final int valueSize; private final int valueSize;
private final byte[] valueArray;
public EmptyFixedSource(Type type, int valueSize) { public EmptyFixedSource(Type type, int valueSize) {
super(type); super(type);
this.valueSize = valueSize; this.valueSize = valueSize;
valueArray = new byte[valueSize];
} }
@Override @Override
@ -396,6 +555,14 @@ public class MultiDocValues extends DocValues {
public long getInt(int docID) { public long getInt(int docID) {
return 0; return 0;
} }
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
bytesRef.bytes = valueArray;
bytesRef.length = valueSize;
bytesRef.offset = 0;
return bytesRef;
}
} }
@Override @Override
@ -412,4 +579,6 @@ public class MultiDocValues extends DocValues {
public Source getDirectSource() throws IOException { public Source getDirectSource() throws IOException {
return new MultiSource(slices, starts, true, type); return new MultiSource(slices, starts, true, type);
} }
} }

View File

@ -25,7 +25,6 @@ import java.util.List;
import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.MergeState.IndexReaderAndLiveDocs;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -35,11 +34,6 @@ import org.apache.lucene.util.packed.PackedInts;
/** /**
* @lucene.internal * @lucene.internal
*/ */
// TODO: generalize this a bit more:
// * remove writing (like indexoutput) from here
// * just take IndexReaders (not IR&LiveDocs), doesnt care about liveDocs
// * hook into MultiDocValues to make a MultiSortedSource
// * maybe DV merging should then just use MultiDocValues for simplicity?
public final class SortedBytesMergeUtils { public final class SortedBytesMergeUtils {
private SortedBytesMergeUtils() { private SortedBytesMergeUtils() {
@ -47,7 +41,7 @@ public final class SortedBytesMergeUtils {
} }
public static MergeContext init(Type type, DocValues[] docValues, public static MergeContext init(Type type, DocValues[] docValues,
Comparator<BytesRef> comp, MergeState mergeState) { Comparator<BytesRef> comp, int mergeDocCount) {
int size = -1; int size = -1;
if (type == Type.BYTES_FIXED_SORTED) { if (type == Type.BYTES_FIXED_SORTED) {
for (DocValues indexDocValues : docValues) { for (DocValues indexDocValues : docValues) {
@ -58,7 +52,7 @@ public final class SortedBytesMergeUtils {
} }
assert size >= 0; assert size >= 0;
} }
return new MergeContext(comp, mergeState, size, type); return new MergeContext(comp, mergeDocCount, size, type);
} }
public static final class MergeContext { public static final class MergeContext {
@ -69,7 +63,7 @@ public final class SortedBytesMergeUtils {
public final int[] docToEntry; public final int[] docToEntry;
public long[] offsets; // if non-null #mergeRecords collects byte offsets here public long[] offsets; // if non-null #mergeRecords collects byte offsets here
public MergeContext(Comparator<BytesRef> comp, MergeState mergeState, public MergeContext(Comparator<BytesRef> comp, int mergeDocCount,
int size, Type type) { int size, Type type) {
assert type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED; assert type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED;
this.comp = comp; this.comp = comp;
@ -79,11 +73,15 @@ public final class SortedBytesMergeUtils {
missingValue.grow(size); missingValue.grow(size);
missingValue.length = size; missingValue.length = size;
} }
docToEntry = new int[mergeState.mergedDocCount]; docToEntry = new int[mergeDocCount];
}
public int getMergeDocCount() {
return docToEntry.length;
} }
} }
public static List<SortedSourceSlice> buildSlices(MergeState mergeState, public static List<SortedSourceSlice> buildSlices(int[] docBases ,int[][] docMaps,
DocValues[] docValues, MergeContext ctx) throws IOException { DocValues[] docValues, MergeContext ctx) throws IOException {
final List<SortedSourceSlice> slices = new ArrayList<SortedSourceSlice>(); final List<SortedSourceSlice> slices = new ArrayList<SortedSourceSlice>();
for (int i = 0; i < docValues.length; i++) { for (int i = 0; i < docValues.length; i++) {
@ -92,13 +90,13 @@ public final class SortedBytesMergeUtils {
if (docValues[i] != null if (docValues[i] != null
&& (directSource = docValues[i].getDirectSource()) != null) { && (directSource = docValues[i].getDirectSource()) != null) {
final SortedSourceSlice slice = new SortedSourceSlice(i, directSource final SortedSourceSlice slice = new SortedSourceSlice(i, directSource
.asSortedSource(), mergeState, ctx.docToEntry); .asSortedSource(), docBases, ctx.getMergeDocCount(), ctx.docToEntry);
nextSlice = slice; nextSlice = slice;
} else { } else {
nextSlice = new SortedSourceSlice(i, new MissingValueSource(ctx), nextSlice = new SortedSourceSlice(i, new MissingValueSource(ctx),
mergeState, ctx.docToEntry); docBases, ctx.getMergeDocCount(), ctx.docToEntry);
} }
createOrdMapping(mergeState, nextSlice); createOrdMapping(docBases, docMaps, nextSlice);
slices.add(nextSlice); slices.add(nextSlice);
} }
return Collections.unmodifiableList(slices); return Collections.unmodifiableList(slices);
@ -113,12 +111,12 @@ public final class SortedBytesMergeUtils {
* mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping * mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping
* contains the new global ordinals for the relative index. * contains the new global ordinals for the relative index.
*/ */
private static void createOrdMapping(MergeState mergeState, private static void createOrdMapping(int[] docBases ,int[][] docMaps,
SortedSourceSlice currentSlice) { SortedSourceSlice currentSlice) {
final int readerIdx = currentSlice.readerIdx; final int readerIdx = currentSlice.readerIdx;
final int[] currentDocMap = mergeState.docMaps[readerIdx]; final int[] currentDocMap = docMaps[readerIdx];
final int docBase = currentSlice.docToOrdStart; final int docBase = currentSlice.docToOrdStart;
assert docBase == mergeState.docBase[readerIdx]; assert docBase == docBases[readerIdx];
if (currentDocMap != null) { // we have deletes if (currentDocMap != null) { // we have deletes
for (int i = 0; i < currentDocMap.length; i++) { for (int i = 0; i < currentDocMap.length; i++) {
final int doc = currentDocMap[i]; final int doc = currentDocMap[i];
@ -131,11 +129,7 @@ public final class SortedBytesMergeUtils {
} }
} }
} else { // no deletes } else { // no deletes
final IndexReaderAndLiveDocs indexReaderAndLiveDocs = mergeState.readers final int numDocs = currentSlice.docToOrdEnd - currentSlice.docToOrdStart;
.get(readerIdx);
final int numDocs = indexReaderAndLiveDocs.reader.numDocs();
assert indexReaderAndLiveDocs.liveDocs == null;
assert currentSlice.docToOrdEnd - currentSlice.docToOrdStart == numDocs;
for (int doc = 0; doc < numDocs; doc++) { for (int doc = 0; doc < numDocs; doc++) {
final int ord = currentSlice.source.ord(doc); final int ord = currentSlice.source.ord(doc);
currentSlice.docIDToRelativeOrd[docBase + doc] = ord; currentSlice.docIDToRelativeOrd[docBase + doc] = ord;
@ -145,7 +139,7 @@ public final class SortedBytesMergeUtils {
} }
} }
public static int mergeRecords(MergeContext ctx, IndexOutput datOut, public static int mergeRecords(MergeContext ctx, BytesRefConsumer consumer,
List<SortedSourceSlice> slices) throws IOException { List<SortedSourceSlice> slices) throws IOException {
final RecordMerger merger = new RecordMerger(new MergeQueue(slices.size(), final RecordMerger merger = new RecordMerger(new MergeQueue(slices.size(),
ctx.comp), slices.toArray(new SortedSourceSlice[0])); ctx.comp), slices.toArray(new SortedSourceSlice[0]));
@ -159,22 +153,38 @@ public final class SortedBytesMergeUtils {
currentMergedBytes = merger.current; currentMergedBytes = merger.current;
assert ctx.sizePerValues == -1 || ctx.sizePerValues == currentMergedBytes.length : "size: " assert ctx.sizePerValues == -1 || ctx.sizePerValues == currentMergedBytes.length : "size: "
+ ctx.sizePerValues + " spare: " + currentMergedBytes.length; + ctx.sizePerValues + " spare: " + currentMergedBytes.length;
offset += currentMergedBytes.length;
if (recordOffsets) { if (recordOffsets) {
offset += currentMergedBytes.length;
if (merger.currentOrd >= offsets.length) { if (merger.currentOrd >= offsets.length) {
offsets = ArrayUtil.grow(offsets, merger.currentOrd + 1); offsets = ArrayUtil.grow(offsets, merger.currentOrd + 1);
} }
offsets[merger.currentOrd] = offset; offsets[merger.currentOrd] = offset;
} }
datOut.writeBytes(currentMergedBytes.bytes, currentMergedBytes.offset, consumer.consume(currentMergedBytes, merger.currentOrd, offset);
currentMergedBytes.length);
merger.pushTop(); merger.pushTop();
} }
ctx.offsets = offsets; ctx.offsets = offsets;
assert offsets == null || offsets[merger.currentOrd - 1] == offset; assert offsets == null || offsets[merger.currentOrd - 1] == offset;
return merger.currentOrd; return merger.currentOrd;
} }
public static interface BytesRefConsumer {
public void consume(BytesRef ref, int ord, long offset) throws IOException;
}
public static final class IndexOutputBytesRefConsumer implements BytesRefConsumer {
private final IndexOutput datOut;
public IndexOutputBytesRefConsumer(IndexOutput datOut) {
this.datOut = datOut;
}
@Override
public void consume(BytesRef currentMergedBytes, int ord, long offset) throws IOException {
datOut.writeBytes(currentMergedBytes.bytes, currentMergedBytes.offset,
currentMergedBytes.length);
}
}
private static final class RecordMerger { private static final class RecordMerger {
private final MergeQueue queue; private final MergeQueue queue;
@ -241,22 +251,22 @@ public final class SortedBytesMergeUtils {
/* the currently merged relative ordinal */ /* the currently merged relative ordinal */
int relativeOrd = -1; int relativeOrd = -1;
SortedSourceSlice(int readerIdx, SortedSource source, MergeState state, SortedSourceSlice(int readerIdx, SortedSource source, int[] docBase, int mergeDocCount,
int[] docToOrd) { int[] docToOrd) {
super(); super();
this.readerIdx = readerIdx; this.readerIdx = readerIdx;
this.source = source; this.source = source;
this.docIDToRelativeOrd = docToOrd; this.docIDToRelativeOrd = docToOrd;
this.ordMapping = new int[source.getValueCount()]; this.ordMapping = new int[source.getValueCount()];
this.docToOrdStart = state.docBase[readerIdx]; this.docToOrdStart = docBase[readerIdx];
this.docToOrdEnd = this.docToOrdStart + numDocs(state, readerIdx); this.docToOrdEnd = this.docToOrdStart + numDocs(docBase, mergeDocCount, readerIdx);
} }
private static int numDocs(MergeState state, int readerIndex) { private static int numDocs(int[] docBase, int mergedDocCount, int readerIndex) {
if (readerIndex == state.docBase.length - 1) { if (readerIndex == docBase.length - 1) {
return state.mergedDocCount - state.docBase[readerIndex]; return mergedDocCount - docBase[readerIndex];
} }
return state.docBase[readerIndex + 1] - state.docBase[readerIndex]; return docBase[readerIndex + 1] - docBase[readerIndex];
} }
BytesRef next() { BytesRef next() {
@ -269,6 +279,16 @@ public final class SortedBytesMergeUtils {
} }
return null; return null;
} }
public int[] toAbsolutOrds(int[] docToOrd) {
for (int i = docToOrdStart; i < docToOrdEnd; i++) {
final int mappedOrd = docIDToRelativeOrd[i];
assert mappedOrd < ordMapping.length;
assert ordMapping[mappedOrd] > 0 : "illegal mapping ord maps to an unreferenced value";
docToOrd[i] = ordMapping[mappedOrd] -1;
}
return docToOrd;
}
public void writeOrds(PackedInts.Writer writer) throws IOException { public void writeOrds(PackedInts.Writer writer) throws IOException {
for (int i = docToOrdStart; i < docToOrdEnd; i++) { for (int i = docToOrdStart; i < docToOrdEnd; i++) {

View File

@ -21,8 +21,14 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
@ -33,6 +39,7 @@ import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
@ -47,6 +54,7 @@ import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
@ -539,6 +547,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
return MultiDocValues.getDocValues(reader, field); return MultiDocValues.getDocValues(reader, field);
} }
@SuppressWarnings("fallthrough")
private Source getSource(DocValues values) throws IOException { private Source getSource(DocValues values) throws IOException {
// getSource uses cache internally // getSource uses cache internally
switch(random.nextInt(5)) { switch(random.nextInt(5)) {
@ -547,7 +556,9 @@ public class TestDocValuesIndexing extends LuceneTestCase {
case 2: case 2:
return values.getDirectSource(); return values.getDirectSource();
case 1: case 1:
return values.getSource(); if(values.type() == Type.BYTES_VAR_SORTED || values.type() == Type.BYTES_FIXED_SORTED) {
return values.getSource().asSortedSource();
}
default: default:
return values.getSource(); return values.getSource();
} }
@ -705,4 +716,100 @@ public class TestDocValuesIndexing extends LuceneTestCase {
r.close(); r.close();
d.close(); d.close();
} }
public void testSortedBytes() throws IOException {
Type[] types = new Type[] { Type.BYTES_FIXED_SORTED, Type.BYTES_VAR_SORTED };
for (Type type : types) {
boolean fixed = type == Type.BYTES_FIXED_SORTED;
final Directory d = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random));
IndexWriter w = new IndexWriter(d, cfg);
Comparator<BytesRef> comp = BytesRef.getUTF8SortedAsUnicodeComparator();
int numDocs = atLeast(100);
BytesRefHash hash = new BytesRefHash();
Map<String, String> docToString = new HashMap<String, String>();
int len = 1 + random.nextInt(50);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(newField("id", "" + i, TextField.TYPE_STORED));
DocValuesField f = new DocValuesField("field");
String string =fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random,
len) : _TestUtil.randomRealisticUnicodeString(random, 1, len);
hash.add(new BytesRef(string));
docToString.put("" + i, string);
f.setBytes(new BytesRef(string), type, comp);
doc.add(f);
w.addDocument(doc);
}
if (rarely()) {
w.commit();
}
int numDocsNoValue = atLeast(10);
for (int i = 0; i < numDocsNoValue; i++) {
Document doc = new Document();
doc.add(newField("id", "noValue", TextField.TYPE_STORED));
w.addDocument(doc);
}
BytesRef bytesRef = new BytesRef(fixed ? len : 0);
bytesRef.offset = 0;
bytesRef.length = fixed ? len : 0;
hash.add(bytesRef); // add empty value for the gaps
if (rarely()) {
w.commit();
}
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
String id = "" + i + numDocs;
doc.add(newField("id", id, TextField.TYPE_STORED));
DocValuesField f = new DocValuesField("field");
String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random,
len) : _TestUtil.randomRealisticUnicodeString(random, 1, len);
hash.add(new BytesRef(string));
docToString.put(id, string);
f.setBytes(new BytesRef(string), type, comp);
doc.add(f);
w.addDocument(doc);
}
w.commit();
IndexReader reader = w.getReader();
DocValues docValues = MultiDocValues.getDocValues(reader, "field");
Source source = getSource(docValues);
SortedSource asSortedSource = source.asSortedSource();
int[] sort = hash.sort(comp);
BytesRef expected = new BytesRef();
BytesRef actual = new BytesRef();
assertEquals(hash.size(), asSortedSource.getValueCount());
for (int i = 0; i < hash.size(); i++) {
hash.get(sort[i], expected);
asSortedSource.getByOrd(i, actual);
assertEquals(expected.utf8ToString(), actual.utf8ToString());
int ord = asSortedSource.getByValue(expected, actual);
assertEquals(i, ord);
}
reader = new SlowMultiReaderWrapper(reader);
Set<Entry<String, String>> entrySet = docToString.entrySet();
for (Entry<String, String> entry : entrySet) {
int docId = docId(reader, new Term("id", entry.getKey()));
expected.copyChars(entry.getValue());
assertEquals(expected, asSortedSource.getBytes(docId, actual));
}
reader.close();
w.close();
d.close();
}
}
public int docId(IndexReader reader, Term term) throws IOException {
int docFreq = reader.docFreq(term);
assertEquals(1, docFreq);
DocsEnum termDocsEnum = reader.termDocsEnum(null, term.field, term.bytes, false);
int nextDoc = termDocsEnum.nextDoc();
assertEquals(DocsEnum.NO_MORE_DOCS, termDocsEnum.nextDoc());
return nextDoc;
}
} }

View File

@ -100,14 +100,12 @@ public class TestTypePromotion extends LuceneTestCase {
randomValueType(types, random), values, num_1 + num_2, num_3); randomValueType(types, random), values, num_1 + num_2, num_3);
writer_2.commit(); writer_2.commit();
writer_2.close(); writer_2.close();
if (random.nextBoolean()) { if (rarely()) {
writer.addIndexes(dir_2); writer.addIndexes(dir_2);
} else { } else {
// do a real merge here // do a real merge here
IndexReader open = IndexReader.open(dir_2); IndexReader open = IndexReader.open(dir_2);
// we cannot use SlowMR for sorted bytes, because it returns a null sortedsource writer.addIndexes(new SlowMultiReaderWrapper(open));
boolean useSlowMRWrapper = types != SORTED_BYTES && random.nextBoolean();
writer.addIndexes(useSlowMRWrapper ? new SlowMultiReaderWrapper(open) : open);
open.close(); open.close();
} }
dir_2.close(); dir_2.close();

View File

@ -258,7 +258,7 @@ public class TestSort extends LuceneTestCase {
//System.out.println(writer.getSegmentCount()); //System.out.println(writer.getSegmentCount());
writer.close(); writer.close();
IndexReader reader = IndexReader.open(indexStore); IndexReader reader = IndexReader.open(indexStore);
return new IndexSearcher (reader); return newSearcher(reader);
} }
public String getRandomNumberString(int num, int low, int high) { public String getRandomNumberString(int num, int low, int high) {
@ -1210,35 +1210,11 @@ public class TestSort extends LuceneTestCase {
assertMatches( null, searcher, query, sort, expectedResult ); assertMatches( null, searcher, query, sort, expectedResult );
} }
private static boolean hasSlowMultiReaderWrapper(IndexReader r) {
if (r instanceof SlowMultiReaderWrapper) {
return true;
} else {
IndexReader[] subReaders = r.getSequentialSubReaders();
if (subReaders != null) {
for (IndexReader subReader : subReaders) {
if (hasSlowMultiReaderWrapper(subReader)) {
return true;
}
}
}
}
return false;
}
// make sure the documents returned by the search match the expected list // make sure the documents returned by the search match the expected list
private void assertMatches(String msg, IndexSearcher searcher, Query query, Sort sort, private void assertMatches(String msg, IndexSearcher searcher, Query query, Sort sort,
String expectedResult) throws IOException { String expectedResult) throws IOException {
for(SortField sortField : sort.getSort()) {
if (sortField.getUseIndexValues() && sortField.getType() == SortField.Type.STRING) {
if (hasSlowMultiReaderWrapper(searcher.getIndexReader())) {
// Cannot use STRING DocValues sort with SlowMultiReaderWrapper
return;
}
}
}
//ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs; //ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs;
TopDocs hits = searcher.search(query, null, Math.max(1, expectedResult.length()), sort); TopDocs hits = searcher.search(query, null, Math.max(1, expectedResult.length()), sort);
ScoreDoc[] result = hits.scoreDocs; ScoreDoc[] result = hits.scoreDocs;