From 40b3b75a6e60b564ca8d027d43a2808b3f74d43f Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 6 Jan 2012 17:27:07 +0000 Subject: [PATCH] LUCENE-3676: Support SortedSource in MultiDocValues git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1228293 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene40/values/FixedSortedBytesImpl.java | 7 +- .../lucene40/values/VarSortedBytesImpl.java | 7 +- .../apache/lucene/index/MultiDocValues.java | 177 +++++++++++++++++- .../lucene/index/SortedBytesMergeUtils.java | 88 +++++---- .../lucene/index/TestDocValuesIndexing.java | 109 ++++++++++- .../lucene/index/TestTypePromotion.java | 6 +- .../org/apache/lucene/search/TestSort.java | 26 +-- 7 files changed, 346 insertions(+), 74 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java index 28006d63f20..7e12c9c97cf 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java @@ -28,6 +28,7 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.SortedBytesMergeUtils; import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.SortedBytesMergeUtils.IndexOutputBytesRefConsumer; import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext; import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice; import org.apache.lucene.index.MergeState; @@ -66,11 +67,11 @@ class FixedSortedBytesImpl { throws IOException { boolean success = false; try { - final MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_FIXED_SORTED, docValues, comp, mergeState); - List slices = SortedBytesMergeUtils.buildSlices(mergeState, docValues, ctx); + final MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_FIXED_SORTED, docValues, comp, mergeState.mergedDocCount); + List slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx); final IndexOutput datOut = getOrCreateDataOut(); datOut.writeInt(ctx.sizePerValues); - final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, datOut, slices); + final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices); final IndexOutput idxOut = getOrCreateIndexOut(); idxOut.writeInt(maxOrd); diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java index d1e8c2a05a4..a4185e9349d 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java @@ -28,6 +28,7 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.SortedBytesMergeUtils; import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.SortedBytesMergeUtils.IndexOutputBytesRefConsumer; import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext; import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice; import org.apache.lucene.index.MergeState; @@ -67,12 +68,12 @@ final class VarSortedBytesImpl { throws IOException { boolean success = false; try { - MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_VAR_SORTED, docValues, comp, mergeState); - final List slices = SortedBytesMergeUtils.buildSlices(mergeState, docValues, ctx); + MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_VAR_SORTED, docValues, comp, mergeState.mergedDocCount); + final List slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx); IndexOutput datOut = getOrCreateDataOut(); ctx.offsets = new long[1]; - final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, datOut, slices); + final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices); final long[] offsets = ctx.offsets; maxBytes = offsets[maxOrd-1]; final IndexOutput idxOut = getOrCreateIndexOut(); diff --git a/lucene/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/MultiDocValues.java index 2df3046e1a5..e0fa37d6393 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/MultiDocValues.java @@ -20,11 +20,17 @@ import java.io.IOException; import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; import java.util.List; +import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext; +import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil.Gather; +import org.apache.lucene.util.packed.PackedInts.Reader; /** * A wrapper for compound IndexReader providing access to per segment @@ -143,6 +149,8 @@ public class MultiDocValues extends DocValues { switch(promoted) { case BYTES_FIXED_DEREF: case BYTES_FIXED_STRAIGHT: + case BYTES_FIXED_SORTED: + assert promotedType[0].getValueSize() >= 0; slice.docValues = new EmptyFixedDocValues(slice.length, promoted, promotedType[0].getValueSize()); break; default: @@ -179,7 +187,6 @@ public class MultiDocValues extends DocValues { return emptySource.type(); } - @Override public Source getDirectSource() throws IOException { return emptySource; @@ -276,6 +283,59 @@ public class MultiDocValues extends DocValues { } @Override + public SortedSource asSortedSource() { + try { + if (type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED) { + DocValues[] values = new DocValues[slices.length]; + Comparator comp = null; + for (int i = 0; i < values.length; i++) { + values[i] = slices[i].docValues; + if (!(values[i] instanceof EmptyDocValues)) { + Comparator comparator = values[i].getDirectSource() + .asSortedSource().getComparator(); + assert comp == null || comp == comparator; + comp = comparator; + } + } + assert comp != null; + final int globalNumDocs = globalNumDocs(); + final MergeContext ctx = SortedBytesMergeUtils.init(type, values, + comp, globalNumDocs); + List slices = SortedBytesMergeUtils.buildSlices( + docBases(), new int[values.length][], values, ctx); + RecordingBytesRefConsumer consumer = new RecordingBytesRefConsumer( + type); + final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, consumer, + slices); + final int[] docToOrd = new int[globalNumDocs]; + for (SortedSourceSlice slice : slices) { + slice.toAbsolutOrds(docToOrd); + } + return new MultiSortedSource(type, comp, consumer.pagedBytes, + ctx.sizePerValues, maxOrd, docToOrd, consumer.ordToOffset); + } + } catch (IOException e) { + throw new RuntimeException("load failed", e); + } + return super.asSortedSource(); + } + + private int globalNumDocs() { + int docs = 0; + for (int i = 0; i < slices.length; i++) { + docs += slices[i].length; + } + return docs; + } + + private int[] docBases() { + int[] docBases = new int[slices.length]; + for (int i = 0; i < slices.length; i++) { + docBases[i] = slices[i].start; + } + return docBases; + } + public boolean hasArray() { boolean oneRealSource = false; for (DocValuesSlice slice : slices) { @@ -346,12 +406,79 @@ public class MultiDocValues extends DocValues { } } } + + private static final class RecordingBytesRefConsumer implements SortedBytesMergeUtils.BytesRefConsumer { + private final static int PAGED_BYTES_BITS = 15; + final PagedBytes pagedBytes = new PagedBytes(PAGED_BYTES_BITS); + long[] ordToOffset; + + public RecordingBytesRefConsumer(Type type) { + ordToOffset = type == Type.BYTES_VAR_SORTED ? new long[2] : null; + } + @Override + public void consume(BytesRef ref, int ord, long offset) throws IOException { + pagedBytes.copy(ref); + if (ordToOffset != null) { + if (ord+1 >= ordToOffset.length) { + ordToOffset = ArrayUtil.grow(ordToOffset, ord + 2); + } + ordToOffset[ord+1] = offset; + } + } + + } + + private static final class MultiSortedSource extends SortedSource { + private final PagedBytes.Reader data; + private final int[] docToOrd; + private final long[] ordToOffset; + private int size; + private int valueCount; + public MultiSortedSource(Type type, Comparator comparator, PagedBytes pagedBytes, int size, int numValues, int[] docToOrd, long[] ordToOffset) { + super(type, comparator); + data = pagedBytes.freeze(true); + this.size = size; + this.valueCount = numValues; + this.docToOrd = docToOrd; + this.ordToOffset = ordToOffset; + } + + @Override + public int ord(int docID) { + return docToOrd[docID]; + } + + @Override + public BytesRef getByOrd(int ord, BytesRef bytesRef) { + int size = this.size; + long offset = (ord*size); + if (ordToOffset != null) { + offset = ordToOffset[ord]; + size = (int) (ordToOffset[1 + ord] - offset); + } + if (size < 0) { + System.out.println(); + } + assert size >=0; + return data.fillSlice(bytesRef, offset, size); + } + + @Override + public Reader getDocToOrd() { + return null; + } + + @Override + public int getValueCount() { + return valueCount; + } + } // TODO: this is dup of DocValues.getDefaultSource()? - private static class EmptySource extends Source { + private static class EmptySource extends SortedSource { public EmptySource(Type type) { - super(type); + super(type, BytesRef.getUTF8SortedAsUnicodeComparator()); } @Override @@ -369,14 +496,46 @@ public class MultiDocValues extends DocValues { public long getInt(int docID) { return 0; } + + @Override + public SortedSource asSortedSource() { + if (type() == Type.BYTES_FIXED_SORTED || type() == Type.BYTES_VAR_SORTED) { + + } + return super.asSortedSource(); + } + + @Override + public int ord(int docID) { + return 0; + } + + @Override + public BytesRef getByOrd(int ord, BytesRef bytesRef) { + bytesRef.length = 0; + bytesRef.offset = 0; + return bytesRef; + } + + @Override + public Reader getDocToOrd() { + return null; + } + + @Override + public int getValueCount() { + return 1; + } + } private static class EmptyFixedSource extends EmptySource { private final int valueSize; - + private final byte[] valueArray; public EmptyFixedSource(Type type, int valueSize) { super(type); this.valueSize = valueSize; + valueArray = new byte[valueSize]; } @Override @@ -396,6 +555,14 @@ public class MultiDocValues extends DocValues { public long getInt(int docID) { return 0; } + + @Override + public BytesRef getByOrd(int ord, BytesRef bytesRef) { + bytesRef.bytes = valueArray; + bytesRef.length = valueSize; + bytesRef.offset = 0; + return bytesRef; + } } @Override @@ -412,4 +579,6 @@ public class MultiDocValues extends DocValues { public Source getDirectSource() throws IOException { return new MultiSource(slices, starts, true, type); } + + } diff --git a/lucene/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java b/lucene/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java index 19959c3d830..dce3011565d 100644 --- a/lucene/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java +++ b/lucene/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java @@ -25,7 +25,6 @@ import java.util.List; import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; -import org.apache.lucene.index.MergeState.IndexReaderAndLiveDocs; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -35,11 +34,6 @@ import org.apache.lucene.util.packed.PackedInts; /** * @lucene.internal */ -// TODO: generalize this a bit more: -// * remove writing (like indexoutput) from here -// * just take IndexReaders (not IR&LiveDocs), doesnt care about liveDocs -// * hook into MultiDocValues to make a MultiSortedSource -// * maybe DV merging should then just use MultiDocValues for simplicity? public final class SortedBytesMergeUtils { private SortedBytesMergeUtils() { @@ -47,7 +41,7 @@ public final class SortedBytesMergeUtils { } public static MergeContext init(Type type, DocValues[] docValues, - Comparator comp, MergeState mergeState) { + Comparator comp, int mergeDocCount) { int size = -1; if (type == Type.BYTES_FIXED_SORTED) { for (DocValues indexDocValues : docValues) { @@ -58,7 +52,7 @@ public final class SortedBytesMergeUtils { } assert size >= 0; } - return new MergeContext(comp, mergeState, size, type); + return new MergeContext(comp, mergeDocCount, size, type); } public static final class MergeContext { @@ -69,7 +63,7 @@ public final class SortedBytesMergeUtils { public final int[] docToEntry; public long[] offsets; // if non-null #mergeRecords collects byte offsets here - public MergeContext(Comparator comp, MergeState mergeState, + public MergeContext(Comparator comp, int mergeDocCount, int size, Type type) { assert type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED; this.comp = comp; @@ -79,11 +73,15 @@ public final class SortedBytesMergeUtils { missingValue.grow(size); missingValue.length = size; } - docToEntry = new int[mergeState.mergedDocCount]; + docToEntry = new int[mergeDocCount]; + } + + public int getMergeDocCount() { + return docToEntry.length; } } - public static List buildSlices(MergeState mergeState, + public static List buildSlices(int[] docBases ,int[][] docMaps, DocValues[] docValues, MergeContext ctx) throws IOException { final List slices = new ArrayList(); for (int i = 0; i < docValues.length; i++) { @@ -92,13 +90,13 @@ public final class SortedBytesMergeUtils { if (docValues[i] != null && (directSource = docValues[i].getDirectSource()) != null) { final SortedSourceSlice slice = new SortedSourceSlice(i, directSource - .asSortedSource(), mergeState, ctx.docToEntry); + .asSortedSource(), docBases, ctx.getMergeDocCount(), ctx.docToEntry); nextSlice = slice; } else { nextSlice = new SortedSourceSlice(i, new MissingValueSource(ctx), - mergeState, ctx.docToEntry); + docBases, ctx.getMergeDocCount(), ctx.docToEntry); } - createOrdMapping(mergeState, nextSlice); + createOrdMapping(docBases, docMaps, nextSlice); slices.add(nextSlice); } return Collections.unmodifiableList(slices); @@ -113,12 +111,12 @@ public final class SortedBytesMergeUtils { * mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping * contains the new global ordinals for the relative index. */ - private static void createOrdMapping(MergeState mergeState, + private static void createOrdMapping(int[] docBases ,int[][] docMaps, SortedSourceSlice currentSlice) { final int readerIdx = currentSlice.readerIdx; - final int[] currentDocMap = mergeState.docMaps[readerIdx]; + final int[] currentDocMap = docMaps[readerIdx]; final int docBase = currentSlice.docToOrdStart; - assert docBase == mergeState.docBase[readerIdx]; + assert docBase == docBases[readerIdx]; if (currentDocMap != null) { // we have deletes for (int i = 0; i < currentDocMap.length; i++) { final int doc = currentDocMap[i]; @@ -131,11 +129,7 @@ public final class SortedBytesMergeUtils { } } } else { // no deletes - final IndexReaderAndLiveDocs indexReaderAndLiveDocs = mergeState.readers - .get(readerIdx); - final int numDocs = indexReaderAndLiveDocs.reader.numDocs(); - assert indexReaderAndLiveDocs.liveDocs == null; - assert currentSlice.docToOrdEnd - currentSlice.docToOrdStart == numDocs; + final int numDocs = currentSlice.docToOrdEnd - currentSlice.docToOrdStart; for (int doc = 0; doc < numDocs; doc++) { final int ord = currentSlice.source.ord(doc); currentSlice.docIDToRelativeOrd[docBase + doc] = ord; @@ -145,7 +139,7 @@ public final class SortedBytesMergeUtils { } } - public static int mergeRecords(MergeContext ctx, IndexOutput datOut, + public static int mergeRecords(MergeContext ctx, BytesRefConsumer consumer, List slices) throws IOException { final RecordMerger merger = new RecordMerger(new MergeQueue(slices.size(), ctx.comp), slices.toArray(new SortedSourceSlice[0])); @@ -159,22 +153,38 @@ public final class SortedBytesMergeUtils { currentMergedBytes = merger.current; assert ctx.sizePerValues == -1 || ctx.sizePerValues == currentMergedBytes.length : "size: " + ctx.sizePerValues + " spare: " + currentMergedBytes.length; - + offset += currentMergedBytes.length; if (recordOffsets) { - offset += currentMergedBytes.length; if (merger.currentOrd >= offsets.length) { offsets = ArrayUtil.grow(offsets, merger.currentOrd + 1); } offsets[merger.currentOrd] = offset; } - datOut.writeBytes(currentMergedBytes.bytes, currentMergedBytes.offset, - currentMergedBytes.length); + consumer.consume(currentMergedBytes, merger.currentOrd, offset); merger.pushTop(); } ctx.offsets = offsets; assert offsets == null || offsets[merger.currentOrd - 1] == offset; return merger.currentOrd; } + + public static interface BytesRefConsumer { + public void consume(BytesRef ref, int ord, long offset) throws IOException; + } + + public static final class IndexOutputBytesRefConsumer implements BytesRefConsumer { + private final IndexOutput datOut; + + public IndexOutputBytesRefConsumer(IndexOutput datOut) { + this.datOut = datOut; + } + + @Override + public void consume(BytesRef currentMergedBytes, int ord, long offset) throws IOException { + datOut.writeBytes(currentMergedBytes.bytes, currentMergedBytes.offset, + currentMergedBytes.length); + } + } private static final class RecordMerger { private final MergeQueue queue; @@ -241,22 +251,22 @@ public final class SortedBytesMergeUtils { /* the currently merged relative ordinal */ int relativeOrd = -1; - SortedSourceSlice(int readerIdx, SortedSource source, MergeState state, + SortedSourceSlice(int readerIdx, SortedSource source, int[] docBase, int mergeDocCount, int[] docToOrd) { super(); this.readerIdx = readerIdx; this.source = source; this.docIDToRelativeOrd = docToOrd; this.ordMapping = new int[source.getValueCount()]; - this.docToOrdStart = state.docBase[readerIdx]; - this.docToOrdEnd = this.docToOrdStart + numDocs(state, readerIdx); + this.docToOrdStart = docBase[readerIdx]; + this.docToOrdEnd = this.docToOrdStart + numDocs(docBase, mergeDocCount, readerIdx); } - private static int numDocs(MergeState state, int readerIndex) { - if (readerIndex == state.docBase.length - 1) { - return state.mergedDocCount - state.docBase[readerIndex]; + private static int numDocs(int[] docBase, int mergedDocCount, int readerIndex) { + if (readerIndex == docBase.length - 1) { + return mergedDocCount - docBase[readerIndex]; } - return state.docBase[readerIndex + 1] - state.docBase[readerIndex]; + return docBase[readerIndex + 1] - docBase[readerIndex]; } BytesRef next() { @@ -269,6 +279,16 @@ public final class SortedBytesMergeUtils { } return null; } + + public int[] toAbsolutOrds(int[] docToOrd) { + for (int i = docToOrdStart; i < docToOrdEnd; i++) { + final int mappedOrd = docIDToRelativeOrd[i]; + assert mappedOrd < ordMapping.length; + assert ordMapping[mappedOrd] > 0 : "illegal mapping ord maps to an unreferenced value"; + docToOrd[i] = ordMapping[mappedOrd] -1; + } + return docToOrd; + } public void writeOrds(PackedInts.Writer writer) throws IOException { for (int i = docToOrdStart; i < docToOrdEnd; i++) { diff --git a/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java index fbd1c3b4dcd..95d199f56a4 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java @@ -21,8 +21,14 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.EnumSet; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; @@ -33,6 +39,7 @@ import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -47,6 +54,7 @@ import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -539,6 +547,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { return MultiDocValues.getDocValues(reader, field); } + @SuppressWarnings("fallthrough") private Source getSource(DocValues values) throws IOException { // getSource uses cache internally switch(random.nextInt(5)) { @@ -547,7 +556,9 @@ public class TestDocValuesIndexing extends LuceneTestCase { case 2: return values.getDirectSource(); case 1: - return values.getSource(); + if(values.type() == Type.BYTES_VAR_SORTED || values.type() == Type.BYTES_FIXED_SORTED) { + return values.getSource().asSortedSource(); + } default: return values.getSource(); } @@ -705,4 +716,100 @@ public class TestDocValuesIndexing extends LuceneTestCase { r.close(); d.close(); } + + public void testSortedBytes() throws IOException { + Type[] types = new Type[] { Type.BYTES_FIXED_SORTED, Type.BYTES_VAR_SORTED }; + for (Type type : types) { + boolean fixed = type == Type.BYTES_FIXED_SORTED; + final Directory d = newDirectory(); + IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)); + IndexWriter w = new IndexWriter(d, cfg); + Comparator comp = BytesRef.getUTF8SortedAsUnicodeComparator(); + int numDocs = atLeast(100); + BytesRefHash hash = new BytesRefHash(); + Map docToString = new HashMap(); + int len = 1 + random.nextInt(50); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(newField("id", "" + i, TextField.TYPE_STORED)); + DocValuesField f = new DocValuesField("field"); + String string =fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random, + len) : _TestUtil.randomRealisticUnicodeString(random, 1, len); + hash.add(new BytesRef(string)); + docToString.put("" + i, string); + + f.setBytes(new BytesRef(string), type, comp); + doc.add(f); + w.addDocument(doc); + } + if (rarely()) { + w.commit(); + } + int numDocsNoValue = atLeast(10); + for (int i = 0; i < numDocsNoValue; i++) { + Document doc = new Document(); + doc.add(newField("id", "noValue", TextField.TYPE_STORED)); + w.addDocument(doc); + } + BytesRef bytesRef = new BytesRef(fixed ? len : 0); + bytesRef.offset = 0; + bytesRef.length = fixed ? len : 0; + hash.add(bytesRef); // add empty value for the gaps + if (rarely()) { + w.commit(); + } + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + String id = "" + i + numDocs; + doc.add(newField("id", id, TextField.TYPE_STORED)); + DocValuesField f = new DocValuesField("field"); + String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random, + len) : _TestUtil.randomRealisticUnicodeString(random, 1, len); + hash.add(new BytesRef(string)); + docToString.put(id, string); + f.setBytes(new BytesRef(string), type, comp); + doc.add(f); + w.addDocument(doc); + } + w.commit(); + IndexReader reader = w.getReader(); + DocValues docValues = MultiDocValues.getDocValues(reader, "field"); + Source source = getSource(docValues); + SortedSource asSortedSource = source.asSortedSource(); + int[] sort = hash.sort(comp); + BytesRef expected = new BytesRef(); + BytesRef actual = new BytesRef(); + assertEquals(hash.size(), asSortedSource.getValueCount()); + for (int i = 0; i < hash.size(); i++) { + hash.get(sort[i], expected); + asSortedSource.getByOrd(i, actual); + assertEquals(expected.utf8ToString(), actual.utf8ToString()); + int ord = asSortedSource.getByValue(expected, actual); + assertEquals(i, ord); + } + reader = new SlowMultiReaderWrapper(reader); + Set> entrySet = docToString.entrySet(); + + for (Entry entry : entrySet) { + int docId = docId(reader, new Term("id", entry.getKey())); + expected.copyChars(entry.getValue()); + assertEquals(expected, asSortedSource.getBytes(docId, actual)); + } + + reader.close(); + w.close(); + d.close(); + } + } + + public int docId(IndexReader reader, Term term) throws IOException { + int docFreq = reader.docFreq(term); + assertEquals(1, docFreq); + DocsEnum termDocsEnum = reader.termDocsEnum(null, term.field, term.bytes, false); + int nextDoc = termDocsEnum.nextDoc(); + assertEquals(DocsEnum.NO_MORE_DOCS, termDocsEnum.nextDoc()); + return nextDoc; + + } } diff --git a/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java b/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java index 8b71be31941..b7028932513 100644 --- a/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java +++ b/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java @@ -100,14 +100,12 @@ public class TestTypePromotion extends LuceneTestCase { randomValueType(types, random), values, num_1 + num_2, num_3); writer_2.commit(); writer_2.close(); - if (random.nextBoolean()) { + if (rarely()) { writer.addIndexes(dir_2); } else { // do a real merge here IndexReader open = IndexReader.open(dir_2); - // we cannot use SlowMR for sorted bytes, because it returns a null sortedsource - boolean useSlowMRWrapper = types != SORTED_BYTES && random.nextBoolean(); - writer.addIndexes(useSlowMRWrapper ? new SlowMultiReaderWrapper(open) : open); + writer.addIndexes(new SlowMultiReaderWrapper(open)); open.close(); } dir_2.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestSort.java b/lucene/src/test/org/apache/lucene/search/TestSort.java index 37851037bd8..a7c1bf9079f 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSort.java +++ b/lucene/src/test/org/apache/lucene/search/TestSort.java @@ -258,7 +258,7 @@ public class TestSort extends LuceneTestCase { //System.out.println(writer.getSegmentCount()); writer.close(); IndexReader reader = IndexReader.open(indexStore); - return new IndexSearcher (reader); + return newSearcher(reader); } public String getRandomNumberString(int num, int low, int high) { @@ -1210,35 +1210,11 @@ public class TestSort extends LuceneTestCase { assertMatches( null, searcher, query, sort, expectedResult ); } - private static boolean hasSlowMultiReaderWrapper(IndexReader r) { - if (r instanceof SlowMultiReaderWrapper) { - return true; - } else { - IndexReader[] subReaders = r.getSequentialSubReaders(); - if (subReaders != null) { - for (IndexReader subReader : subReaders) { - if (hasSlowMultiReaderWrapper(subReader)) { - return true; - } - } - } - } - return false; - } // make sure the documents returned by the search match the expected list private void assertMatches(String msg, IndexSearcher searcher, Query query, Sort sort, String expectedResult) throws IOException { - for(SortField sortField : sort.getSort()) { - if (sortField.getUseIndexValues() && sortField.getType() == SortField.Type.STRING) { - if (hasSlowMultiReaderWrapper(searcher.getIndexReader())) { - // Cannot use STRING DocValues sort with SlowMultiReaderWrapper - return; - } - } - } - //ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs; TopDocs hits = searcher.search(query, null, Math.max(1, expectedResult.length()), sort); ScoreDoc[] result = hits.scoreDocs;