LUCENE-3676: Support SortedSource in MultiDocValues

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1228293 13f79535-47bb-0310-9956-ffa450edef68
2012-01-06 17:27:07 +00:00 · 2012-01-06 17:27:07 +00:00 · 40b3b75a6e
parent c957cbea88
commit 40b3b75a6e
7 changed files with 346 additions and 74 deletions
--- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
@ -28,6 +28,7 @@ import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.SortedBytesMergeUtils;
 import org.apache.lucene.index.DocValues.SortedSource;
 import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.index.SortedBytesMergeUtils.IndexOutputBytesRefConsumer;
 import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
 import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
 import org.apache.lucene.index.MergeState;
@ -66,11 +67,11 @@ class FixedSortedBytesImpl {
        throws IOException {
      boolean success = false;
      try {
-        final MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_FIXED_SORTED, docValues, comp, mergeState);
-        List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState, docValues, ctx);
+        final MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_FIXED_SORTED, docValues, comp, mergeState.mergedDocCount);
+        List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx);
        final IndexOutput datOut = getOrCreateDataOut();
        datOut.writeInt(ctx.sizePerValues);
-        final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, datOut, slices);
+        final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices);
        
        final IndexOutput idxOut = getOrCreateIndexOut();
        idxOut.writeInt(maxOrd);
--- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
@ -28,6 +28,7 @@ import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.SortedBytesMergeUtils;
 import org.apache.lucene.index.DocValues.SortedSource;
 import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.index.SortedBytesMergeUtils.IndexOutputBytesRefConsumer;
 import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
 import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
 import org.apache.lucene.index.MergeState;
@ -67,12 +68,12 @@ final class VarSortedBytesImpl {
        throws IOException {
      boolean success = false;
      try {
-        MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_VAR_SORTED, docValues, comp, mergeState);
-        final List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState, docValues, ctx);
+        MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_VAR_SORTED, docValues, comp, mergeState.mergedDocCount);
+        final List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx);
        IndexOutput datOut = getOrCreateDataOut();
        
        ctx.offsets = new long[1];
-        final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, datOut, slices);
+        final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices);
        final long[] offsets = ctx.offsets;
        maxBytes = offsets[maxOrd-1];
        final IndexOutput idxOut = getOrCreateIndexOut();
--- a/lucene/src/java/org/apache/lucene/index/MultiDocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/MultiDocValues.java
@ -20,11 +20,17 @@ import java.io.IOException;
 import java.lang.reflect.Array;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Comparator;
 import java.util.List;

+import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
+import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PagedBytes;
 import org.apache.lucene.util.ReaderUtil;
 import org.apache.lucene.util.ReaderUtil.Gather;
+import org.apache.lucene.util.packed.PackedInts.Reader;

 /**
 * A wrapper for compound IndexReader providing access to per segment
@ -143,6 +149,8 @@ public class MultiDocValues extends DocValues {
          switch(promoted) {
            case BYTES_FIXED_DEREF:
            case BYTES_FIXED_STRAIGHT:
+            case BYTES_FIXED_SORTED:
+              assert promotedType[0].getValueSize() >= 0;
              slice.docValues = new EmptyFixedDocValues(slice.length, promoted, promotedType[0].getValueSize());
              break;
            default:
@ -179,7 +187,6 @@ public class MultiDocValues extends DocValues {
      return emptySource.type();
    }

-
    @Override
    public Source getDirectSource() throws IOException {
      return emptySource;
@ -276,6 +283,59 @@ public class MultiDocValues extends DocValues {
    }

    @Override
+    public SortedSource asSortedSource() {
+      try {
+        if (type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED) {
+          DocValues[] values = new DocValues[slices.length];
+          Comparator<BytesRef> comp = null;
+          for (int i = 0; i < values.length; i++) {
+            values[i] = slices[i].docValues;
+            if (!(values[i] instanceof EmptyDocValues)) {
+              Comparator<BytesRef> comparator = values[i].getDirectSource()
+                  .asSortedSource().getComparator();
+              assert comp == null || comp == comparator;
+              comp = comparator;
+            }
+          }
+          assert comp != null;
+          final int globalNumDocs = globalNumDocs();
+          final MergeContext ctx = SortedBytesMergeUtils.init(type, values,
+              comp, globalNumDocs);
+          List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(
+              docBases(), new int[values.length][], values, ctx);
+          RecordingBytesRefConsumer consumer = new RecordingBytesRefConsumer(
+              type);
+          final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, consumer,
+              slices);
+          final int[] docToOrd = new int[globalNumDocs];
+          for (SortedSourceSlice slice : slices) {
+            slice.toAbsolutOrds(docToOrd);
+          }
+          return new MultiSortedSource(type, comp, consumer.pagedBytes,
+              ctx.sizePerValues, maxOrd, docToOrd, consumer.ordToOffset);
+        }
+      } catch (IOException e) {
+        throw new RuntimeException("load failed", e);
+      }
+      return super.asSortedSource();
+    }
+    
+    private int globalNumDocs() {
+      int docs = 0;
+      for (int i = 0; i < slices.length; i++) {
+        docs += slices[i].length;
+      }
+      return docs;
+    }
+    
+    private int[] docBases() {
+      int[] docBases = new int[slices.length];
+      for (int i = 0; i < slices.length; i++) {
+        docBases[i] = slices[i].start;
+      }
+      return docBases;
+    }
+    
    public boolean hasArray() {
      boolean oneRealSource = false;
      for (DocValuesSlice slice : slices) {
@ -346,12 +406,79 @@ public class MultiDocValues extends DocValues {
      }
    }
  }
+  
+  private static final class RecordingBytesRefConsumer implements SortedBytesMergeUtils.BytesRefConsumer {
+    private final static int PAGED_BYTES_BITS = 15;
+    final PagedBytes pagedBytes = new PagedBytes(PAGED_BYTES_BITS);
+    long[] ordToOffset;
+    
+    public RecordingBytesRefConsumer(Type type) {
+      ordToOffset = type == Type.BYTES_VAR_SORTED ? new long[2] : null;
+    }
+    @Override
+    public void consume(BytesRef ref, int ord, long offset) throws IOException {
+      pagedBytes.copy(ref);
+      if (ordToOffset != null) {
+        if (ord+1 >= ordToOffset.length) {
+          ordToOffset = ArrayUtil.grow(ordToOffset, ord + 2);
+        }
+        ordToOffset[ord+1] = offset;
+      }
+    }
+    
+  }
+  
+  private static final class MultiSortedSource extends SortedSource {
+    private final PagedBytes.Reader data;
+    private final int[] docToOrd;
+    private final long[] ordToOffset;
+    private int size;
+    private int valueCount;
+    public MultiSortedSource(Type type, Comparator<BytesRef> comparator, PagedBytes pagedBytes, int size, int numValues, int[] docToOrd, long[] ordToOffset) {
+      super(type, comparator);
+      data = pagedBytes.freeze(true);
+      this.size = size;
+      this.valueCount = numValues;
+      this.docToOrd = docToOrd;
+      this.ordToOffset = ordToOffset;
+    }
+
+    @Override
+    public int ord(int docID) {
+      return docToOrd[docID];
+    }
+
+    @Override
+    public BytesRef getByOrd(int ord, BytesRef bytesRef) {
+      int size = this.size;
+      long offset = (ord*size);
+      if (ordToOffset != null) {
+        offset =  ordToOffset[ord];
+        size = (int) (ordToOffset[1 + ord] - offset);
+      }
+      if (size < 0) {
+        System.out.println();
+      }
+      assert size >=0;
+      return data.fillSlice(bytesRef, offset, size);
+     }
+
+    @Override
+    public Reader getDocToOrd() {
+      return null;
+    }
+
+    @Override
+    public int getValueCount() {
+      return valueCount;
+    }
+  }

  // TODO: this is dup of DocValues.getDefaultSource()?
-  private static class EmptySource extends Source {
+  private static class EmptySource extends SortedSource {

    public EmptySource(Type type) {
-      super(type);
+      super(type, BytesRef.getUTF8SortedAsUnicodeComparator());
    }

    @Override
@ -369,14 +496,46 @@ public class MultiDocValues extends DocValues {
    public long getInt(int docID) {
      return 0;
    }
+
+    @Override
+    public SortedSource asSortedSource() {
+      if (type() == Type.BYTES_FIXED_SORTED || type() == Type.BYTES_VAR_SORTED) {
+        
+      }
+      return super.asSortedSource();
+    }
+
+    @Override
+    public int ord(int docID) {
+      return 0;
+    }
+
+    @Override
+    public BytesRef getByOrd(int ord, BytesRef bytesRef) {
+      bytesRef.length = 0;
+      bytesRef.offset = 0;
+      return bytesRef;
+    }
+
+    @Override
+    public Reader getDocToOrd() {
+      return null;
+    }
+
+    @Override
+    public int getValueCount() {
+      return 1;
+    }
+    
  }
  
  private static class EmptyFixedSource extends EmptySource {
    private final int valueSize;
-    
+    private final byte[] valueArray;
    public EmptyFixedSource(Type type, int valueSize) {
      super(type);
      this.valueSize = valueSize;
+      valueArray = new byte[valueSize];
    }

    @Override
@ -396,6 +555,14 @@ public class MultiDocValues extends DocValues {
    public long getInt(int docID) {
      return 0;
    }
+    
+    @Override
+    public BytesRef getByOrd(int ord, BytesRef bytesRef) {
+      bytesRef.bytes = valueArray;
+      bytesRef.length = valueSize;
+      bytesRef.offset = 0;
+      return bytesRef;
+    }
  }

  @Override
@ -412,4 +579,6 @@ public class MultiDocValues extends DocValues {
  public Source getDirectSource() throws IOException {
    return new MultiSource(slices, starts, true, type);
  }
+  
+  
 }
--- a/lucene/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java
+++ b/lucene/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java
@ -25,7 +25,6 @@ import java.util.List;
 import org.apache.lucene.index.DocValues.SortedSource;
 import org.apache.lucene.index.DocValues.Source;
 import org.apache.lucene.index.DocValues.Type;
-import org.apache.lucene.index.MergeState.IndexReaderAndLiveDocs;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@ -35,11 +34,6 @@ import org.apache.lucene.util.packed.PackedInts;
 /**
 * @lucene.internal
 */
-// TODO: generalize this a bit more:
-//       * remove writing (like indexoutput) from here
-//       * just take IndexReaders (not IR&LiveDocs), doesnt care about liveDocs
-//       * hook into MultiDocValues to make a MultiSortedSource
-//       * maybe DV merging should then just use MultiDocValues for simplicity?
 public final class SortedBytesMergeUtils {

  private SortedBytesMergeUtils() {
@ -47,7 +41,7 @@ public final class SortedBytesMergeUtils {
  }

  public static MergeContext init(Type type, DocValues[] docValues,
-      Comparator<BytesRef> comp, MergeState mergeState) {
+      Comparator<BytesRef> comp, int mergeDocCount) {
    int size = -1;
    if (type == Type.BYTES_FIXED_SORTED) {
      for (DocValues indexDocValues : docValues) {
@ -58,7 +52,7 @@ public final class SortedBytesMergeUtils {
      }
      assert size >= 0;
    }
-    return new MergeContext(comp, mergeState, size, type);
+    return new MergeContext(comp, mergeDocCount, size, type);
  }

  public static final class MergeContext {
@ -69,7 +63,7 @@ public final class SortedBytesMergeUtils {
    public final int[] docToEntry;
    public long[] offsets; // if non-null #mergeRecords collects byte offsets here

-    public MergeContext(Comparator<BytesRef> comp, MergeState mergeState,
+    public MergeContext(Comparator<BytesRef> comp, int mergeDocCount,
        int size, Type type) {
      assert type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED;
      this.comp = comp;
@ -79,11 +73,15 @@ public final class SortedBytesMergeUtils {
        missingValue.grow(size);
        missingValue.length = size;
      }
-      docToEntry = new int[mergeState.mergedDocCount];
+      docToEntry = new int[mergeDocCount];
+    }
+    
+    public int getMergeDocCount() {
+      return docToEntry.length;
    }
  }

-  public static List<SortedSourceSlice> buildSlices(MergeState mergeState,
+  public static List<SortedSourceSlice> buildSlices(int[] docBases ,int[][] docMaps,
      DocValues[] docValues, MergeContext ctx) throws IOException {
    final List<SortedSourceSlice> slices = new ArrayList<SortedSourceSlice>();
    for (int i = 0; i < docValues.length; i++) {
@ -92,13 +90,13 @@ public final class SortedBytesMergeUtils {
      if (docValues[i] != null
          && (directSource = docValues[i].getDirectSource()) != null) {
        final SortedSourceSlice slice = new SortedSourceSlice(i, directSource
-            .asSortedSource(), mergeState, ctx.docToEntry);
+            .asSortedSource(), docBases, ctx.getMergeDocCount(), ctx.docToEntry);
        nextSlice = slice;
      } else {
        nextSlice = new SortedSourceSlice(i, new MissingValueSource(ctx),
-            mergeState, ctx.docToEntry);
+            docBases, ctx.getMergeDocCount(), ctx.docToEntry);
      }
-      createOrdMapping(mergeState, nextSlice);
+      createOrdMapping(docBases, docMaps, nextSlice);
      slices.add(nextSlice);
    }
    return Collections.unmodifiableList(slices);
@ -113,12 +111,12 @@ public final class SortedBytesMergeUtils {
   * mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping
   * contains the new global ordinals for the relative index.
   */
-  private static void createOrdMapping(MergeState mergeState,
+  private static void createOrdMapping(int[] docBases ,int[][] docMaps,
      SortedSourceSlice currentSlice) {
    final int readerIdx = currentSlice.readerIdx;
-    final int[] currentDocMap = mergeState.docMaps[readerIdx];
+    final int[] currentDocMap = docMaps[readerIdx];
    final int docBase = currentSlice.docToOrdStart;
-    assert docBase == mergeState.docBase[readerIdx];
+    assert docBase == docBases[readerIdx];
    if (currentDocMap != null) { // we have deletes
      for (int i = 0; i < currentDocMap.length; i++) {
        final int doc = currentDocMap[i];
@ -131,11 +129,7 @@ public final class SortedBytesMergeUtils {
        }
      }
    } else { // no deletes
-      final IndexReaderAndLiveDocs indexReaderAndLiveDocs = mergeState.readers
-          .get(readerIdx);
-      final int numDocs = indexReaderAndLiveDocs.reader.numDocs();
-      assert indexReaderAndLiveDocs.liveDocs == null;
-      assert currentSlice.docToOrdEnd - currentSlice.docToOrdStart == numDocs;
+      final int numDocs = currentSlice.docToOrdEnd - currentSlice.docToOrdStart;
      for (int doc = 0; doc < numDocs; doc++) {
        final int ord = currentSlice.source.ord(doc);
        currentSlice.docIDToRelativeOrd[docBase + doc] = ord;
@ -145,7 +139,7 @@ public final class SortedBytesMergeUtils {
    }
  }

-  public static int mergeRecords(MergeContext ctx, IndexOutput datOut,
+  public static int mergeRecords(MergeContext ctx, BytesRefConsumer consumer,
      List<SortedSourceSlice> slices) throws IOException {
    final RecordMerger merger = new RecordMerger(new MergeQueue(slices.size(),
        ctx.comp), slices.toArray(new SortedSourceSlice[0]));
@ -159,22 +153,38 @@ public final class SortedBytesMergeUtils {
      currentMergedBytes = merger.current;
      assert ctx.sizePerValues == -1 || ctx.sizePerValues == currentMergedBytes.length : "size: "
          + ctx.sizePerValues + " spare: " + currentMergedBytes.length;
-
+      offset += currentMergedBytes.length;
      if (recordOffsets) {
-        offset += currentMergedBytes.length;
        if (merger.currentOrd >= offsets.length) {
          offsets = ArrayUtil.grow(offsets, merger.currentOrd + 1);
        }
        offsets[merger.currentOrd] = offset;
      }
-      datOut.writeBytes(currentMergedBytes.bytes, currentMergedBytes.offset,
-          currentMergedBytes.length);
+      consumer.consume(currentMergedBytes, merger.currentOrd, offset);
      merger.pushTop();
    }
    ctx.offsets = offsets;
    assert offsets == null || offsets[merger.currentOrd - 1] == offset;
    return merger.currentOrd;
  }
+  
+  public static interface BytesRefConsumer {
+    public void consume(BytesRef ref, int ord, long offset) throws IOException;
+  }
+  
+  public static final class IndexOutputBytesRefConsumer implements BytesRefConsumer {
+    private final IndexOutput datOut;
+    
+    public IndexOutputBytesRefConsumer(IndexOutput datOut) {
+      this.datOut = datOut;
+    }
+
+    @Override
+    public void consume(BytesRef currentMergedBytes, int ord, long offset) throws IOException {
+      datOut.writeBytes(currentMergedBytes.bytes, currentMergedBytes.offset,
+          currentMergedBytes.length);      
+    }
+  }

  private static final class RecordMerger {
    private final MergeQueue queue;
@ -241,22 +251,22 @@ public final class SortedBytesMergeUtils {
    /* the currently merged relative ordinal */
    int relativeOrd = -1;

-    SortedSourceSlice(int readerIdx, SortedSource source, MergeState state,
+    SortedSourceSlice(int readerIdx, SortedSource source, int[] docBase, int mergeDocCount,
        int[] docToOrd) {
      super();
      this.readerIdx = readerIdx;
      this.source = source;
      this.docIDToRelativeOrd = docToOrd;
      this.ordMapping = new int[source.getValueCount()];
-      this.docToOrdStart = state.docBase[readerIdx];
-      this.docToOrdEnd = this.docToOrdStart + numDocs(state, readerIdx);
+      this.docToOrdStart = docBase[readerIdx];
+      this.docToOrdEnd = this.docToOrdStart + numDocs(docBase, mergeDocCount, readerIdx);
    }

-    private static int numDocs(MergeState state, int readerIndex) {
-      if (readerIndex == state.docBase.length - 1) {
-        return state.mergedDocCount - state.docBase[readerIndex];
+    private static int numDocs(int[] docBase, int mergedDocCount, int readerIndex) {
+      if (readerIndex == docBase.length - 1) {
+        return mergedDocCount - docBase[readerIndex];
      }
-      return state.docBase[readerIndex + 1] - state.docBase[readerIndex];
+      return docBase[readerIndex + 1] - docBase[readerIndex];
    }

    BytesRef next() {
@ -269,6 +279,16 @@ public final class SortedBytesMergeUtils {
      }
      return null;
    }
+    
+    public int[] toAbsolutOrds(int[] docToOrd) {
+      for (int i = docToOrdStart; i < docToOrdEnd; i++) {
+        final int mappedOrd = docIDToRelativeOrd[i];
+        assert mappedOrd < ordMapping.length;
+        assert ordMapping[mappedOrd] > 0 : "illegal mapping ord maps to an unreferenced value";
+        docToOrd[i] = ordMapping[mappedOrd] -1;
+      }
+      return docToOrd;
+    }

    public void writeOrds(PackedInts.Writer writer) throws IOException {
      for (int i = docToOrdStart; i < docToOrdEnd; i++) {
--- a/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java
+++ b/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java
@ -21,8 +21,14 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;

 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
@ -33,6 +39,7 @@ import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValues.SortedSource;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@ -47,6 +54,7 @@ import org.apache.lucene.search.*;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
@ -539,6 +547,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
    return MultiDocValues.getDocValues(reader, field);
  }

+  @SuppressWarnings("fallthrough")
  private Source getSource(DocValues values) throws IOException {
    // getSource uses cache internally
    switch(random.nextInt(5)) {
@ -547,7 +556,9 @@ public class TestDocValuesIndexing extends LuceneTestCase {
    case 2:
      return values.getDirectSource();
    case 1:
-      return values.getSource();
+      if(values.type() == Type.BYTES_VAR_SORTED || values.type() == Type.BYTES_FIXED_SORTED) {
+        return values.getSource().asSortedSource();
+      }
    default:
      return values.getSource();
    }
@ -705,4 +716,100 @@ public class TestDocValuesIndexing extends LuceneTestCase {
    r.close();
    d.close();
  }
+  
+  public void testSortedBytes() throws IOException {
+    Type[] types = new Type[] { Type.BYTES_FIXED_SORTED, Type.BYTES_VAR_SORTED };
+    for (Type type : types) {
+      boolean fixed = type == Type.BYTES_FIXED_SORTED;
+      final Directory d = newDirectory();
+      IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT,
+          new MockAnalyzer(random));
+      IndexWriter w = new IndexWriter(d, cfg);
+      Comparator<BytesRef> comp = BytesRef.getUTF8SortedAsUnicodeComparator();
+      int numDocs = atLeast(100);
+      BytesRefHash hash = new BytesRefHash();
+      Map<String, String> docToString = new HashMap<String, String>();
+      int len = 1 + random.nextInt(50);
+      for (int i = 0; i < numDocs; i++) {
+        Document doc = new Document();
+        doc.add(newField("id", "" + i, TextField.TYPE_STORED));
+        DocValuesField f = new DocValuesField("field");
+        String string =fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random,
+            len) : _TestUtil.randomRealisticUnicodeString(random, 1, len);
+        hash.add(new BytesRef(string));
+        docToString.put("" + i, string);
+
+        f.setBytes(new BytesRef(string), type, comp);
+        doc.add(f);
+        w.addDocument(doc);
+      }
+      if (rarely()) {
+        w.commit();
+      }
+      int numDocsNoValue = atLeast(10);
+      for (int i = 0; i < numDocsNoValue; i++) {
+        Document doc = new Document();
+        doc.add(newField("id", "noValue", TextField.TYPE_STORED));
+        w.addDocument(doc);
+      }
+      BytesRef bytesRef = new BytesRef(fixed ? len : 0);
+      bytesRef.offset = 0;
+      bytesRef.length = fixed ? len : 0;
+      hash.add(bytesRef); // add empty value for the gaps
+      if (rarely()) {
+        w.commit();
+      }
+      for (int i = 0; i < numDocs; i++) {
+        Document doc = new Document();
+        String id = "" + i + numDocs;
+        doc.add(newField("id", id, TextField.TYPE_STORED));
+        DocValuesField f = new DocValuesField("field");
+        String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random,
+            len) : _TestUtil.randomRealisticUnicodeString(random, 1, len);
+        hash.add(new BytesRef(string));
+        docToString.put(id, string);
+        f.setBytes(new BytesRef(string), type, comp);
+        doc.add(f);
+        w.addDocument(doc);
+      }
+      w.commit();
+      IndexReader reader = w.getReader();
+      DocValues docValues = MultiDocValues.getDocValues(reader, "field");
+      Source source = getSource(docValues);
+      SortedSource asSortedSource = source.asSortedSource();
+      int[] sort = hash.sort(comp);
+      BytesRef expected = new BytesRef();
+      BytesRef actual = new BytesRef();
+      assertEquals(hash.size(), asSortedSource.getValueCount());
+      for (int i = 0; i < hash.size(); i++) {
+        hash.get(sort[i], expected);
+        asSortedSource.getByOrd(i, actual);
+        assertEquals(expected.utf8ToString(), actual.utf8ToString());
+        int ord = asSortedSource.getByValue(expected, actual);
+        assertEquals(i, ord);
+      }
+      reader = new SlowMultiReaderWrapper(reader);
+      Set<Entry<String, String>> entrySet = docToString.entrySet();
+
+      for (Entry<String, String> entry : entrySet) {
+        int docId = docId(reader, new Term("id", entry.getKey()));
+        expected.copyChars(entry.getValue());
+        assertEquals(expected, asSortedSource.getBytes(docId, actual));
+      }
+
+      reader.close();
+      w.close();
+      d.close();
+    }
+  }
+  
+  public int docId(IndexReader reader, Term term) throws IOException {
+    int docFreq = reader.docFreq(term);
+    assertEquals(1, docFreq);
+    DocsEnum termDocsEnum = reader.termDocsEnum(null, term.field, term.bytes, false);
+    int nextDoc = termDocsEnum.nextDoc();
+    assertEquals(DocsEnum.NO_MORE_DOCS, termDocsEnum.nextDoc());
+    return nextDoc;
+    
+  }
 }
--- a/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java
+++ b/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java
@ -100,14 +100,12 @@ public class TestTypePromotion extends LuceneTestCase {
          randomValueType(types, random), values, num_1 + num_2, num_3);
      writer_2.commit();
      writer_2.close();
-      if (random.nextBoolean()) {
+      if (rarely()) {
        writer.addIndexes(dir_2);
      } else {
        // do a real merge here
        IndexReader open = IndexReader.open(dir_2);
-        // we cannot use SlowMR for sorted bytes, because it returns a null sortedsource
-        boolean useSlowMRWrapper = types != SORTED_BYTES && random.nextBoolean();
-        writer.addIndexes(useSlowMRWrapper ? new SlowMultiReaderWrapper(open) : open);
+        writer.addIndexes(new SlowMultiReaderWrapper(open));
        open.close();
      }
      dir_2.close();
--- a/lucene/src/test/org/apache/lucene/search/TestSort.java
+++ b/lucene/src/test/org/apache/lucene/search/TestSort.java
@ -258,7 +258,7 @@ public class TestSort extends LuceneTestCase {
    //System.out.println(writer.getSegmentCount());
    writer.close();
    IndexReader reader = IndexReader.open(indexStore);
-    return new IndexSearcher (reader);
+    return newSearcher(reader);
  }
  
  public String getRandomNumberString(int num, int low, int high) {
@ -1210,35 +1210,11 @@ public class TestSort extends LuceneTestCase {
    assertMatches( null, searcher, query, sort, expectedResult );
  }

-  private static boolean hasSlowMultiReaderWrapper(IndexReader r) {
-    if (r instanceof SlowMultiReaderWrapper) {
-      return true;
-    } else {
-      IndexReader[] subReaders = r.getSequentialSubReaders();
-      if (subReaders != null) {
-        for (IndexReader subReader : subReaders) {
-          if (hasSlowMultiReaderWrapper(subReader)) {
-            return true;
-          }
-        }
-      }
-    }
-    return false;
-  }

  // make sure the documents returned by the search match the expected list
  private void assertMatches(String msg, IndexSearcher searcher, Query query, Sort sort,
      String expectedResult) throws IOException {

-    for(SortField sortField : sort.getSort()) {
-      if (sortField.getUseIndexValues() && sortField.getType() == SortField.Type.STRING) {
-        if (hasSlowMultiReaderWrapper(searcher.getIndexReader())) {
-          // Cannot use STRING DocValues sort with SlowMultiReaderWrapper
-          return;
-        }
-      }
-    }
-
    //ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs;
    TopDocs hits = searcher.search(query, null, Math.max(1, expectedResult.length()), sort);
    ScoreDoc[] result = hits.scoreDocs;