LUCENE-3518: enable sorting by sorted source doc values

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1201440 13f79535-47bb-0310-9956-ffa450edef68
2011-11-13 15:26:36 +00:00 · 2011-11-13 15:26:36 +00:00 · dbd48a72e4
parent 2cdf3fa14d
commit dbd48a72e4
9 changed files with 850 additions and 88 deletions
--- a/lucene/src/java/org/apache/lucene/document/FieldType.java
+++ b/lucene/src/java/org/apache/lucene/document/FieldType.java
@ -145,33 +145,33 @@ public class FieldType implements IndexableFieldType {
      if (result.length() > 0)
        result.append(",");
      result.append("indexed");
-    }
-    if (tokenized()) {
-      if (result.length() > 0)
-        result.append(",");
-      result.append("tokenized");
-    }
-    if (storeTermVectors()) {
-      if (result.length() > 0)
-        result.append(",");
-      result.append("termVector");
-    }
-    if (storeTermVectorOffsets()) {
-      if (result.length() > 0)
-        result.append(",");
-      result.append("termVectorOffsets");
-    }
-    if (storeTermVectorPositions()) {
-      if (result.length() > 0)
-        result.append(",");
-      result.append("termVectorPosition");
-    }
-    if (omitNorms()) {
-      result.append(",omitNorms");
-    }
-    if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
-      result.append(",indexOptions=");
-      result.append(indexOptions);
+      if (tokenized()) {
+        if (result.length() > 0)
+          result.append(",");
+        result.append("tokenized");
+      }
+      if (storeTermVectors()) {
+        if (result.length() > 0)
+          result.append(",");
+        result.append("termVector");
+      }
+      if (storeTermVectorOffsets()) {
+        if (result.length() > 0)
+          result.append(",");
+        result.append("termVectorOffsets");
+      }
+      if (storeTermVectorPositions()) {
+        if (result.length() > 0)
+          result.append(",");
+        result.append("termVectorPosition");
+      }
+      if (omitNorms()) {
+        result.append(",omitNorms");
+      }
+      if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+        result.append(",indexOptions=");
+        result.append(indexOptions);
+      }
    }
    
    return result.toString();
--- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
@ -32,17 +32,17 @@ import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.ByteBlockPool.Allocator;
+import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.CodecUtil;
 import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.PagedBytes;
 import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.ByteBlockPool.Allocator;
-import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
-import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
 import org.apache.lucene.util.packed.PackedInts;

 /**
@ -586,7 +586,11 @@ public final class Bytes {
      this.idxIn = idxIn;
      ordToOffsetIndex = hasOffsets ? PackedInts.getReader(idxIn) : null; 
      docToOrdIndex = PackedInts.getReader(idxIn);
+    }

+    @Override
+    public PackedInts.Reader getDocToOrd() {
+      return docToOrdIndex;
    }
    
    @Override
--- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
@ -194,6 +194,11 @@ class FixedSortedBytesImpl {
      return (int) docToOrdIndex.get(docID);
    }

+    @Override
+    public PackedInts.Reader getDocToOrd() {
+      return docToOrdIndex;
+    }
+
    @Override
    public BytesRef getByOrd(int ord, BytesRef bytesRef) {
      try {
--- a/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java
@ -26,6 +26,7 @@ import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.codecs.DocValuesFormat;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.packed.PackedInts;

 /**
 * {@link IndexDocValues} provides a dense per-document typed storage for fast
@ -223,7 +224,7 @@ public abstract class IndexDocValues implements Closeable {
      return null;
    }
  }
-  
+
  /**
   * A sorted variant of {@link Source} for <tt>byte[]</tt> values per document.
   * <p>
@ -257,6 +258,18 @@ public abstract class IndexDocValues implements Closeable {
    /** Returns value for specified ord. */
    public abstract BytesRef getByOrd(int ord, BytesRef bytesRef);

+    /**
+     * Returns the PackedInts.Reader impl that maps document to ord.
+     */
+    public abstract PackedInts.Reader getDocToOrd();
+    
+    /**
+     * Returns the comparator used to order the BytesRefs.
+     */
+    public Comparator<BytesRef> getComparator() {
+      return comparator;
+    }
+
    /**
     * Performs a lookup by value.
     * 
@ -304,4 +317,98 @@ public abstract class IndexDocValues implements Closeable {
     */
    public abstract int getValueCount();
  }
+
+  /** Returns a Source that always returns default (missing)
+   *  values for all documents. */
+  public static Source getDefaultSource(final ValueType type) {
+    return new Source(type) {
+      @Override
+      public long getInt(int docID) {
+        return 0;
+      }
+
+      @Override
+      public double getFloat(int docID) {
+        return 0.0;
+      }
+
+      @Override
+      public BytesRef getBytes(int docID, BytesRef ref) {
+        ref.length = 0;
+        return ref;
+      }
+    };
+  }
+
+  /** Returns a SortedSource that always returns default (missing)
+   *  values for all documents. */
+  public static SortedSource getDefaultSortedSource(final ValueType type, final int size) {
+
+    final PackedInts.Reader docToOrd = new PackedInts.Reader() {
+      @Override
+      public long get(int index) {
+        return 0;
+      }
+
+      @Override
+      public int getBitsPerValue() {
+        return 0;
+      }
+
+      @Override
+      public int size() {
+        return size;
+      }
+
+      @Override
+      public boolean hasArray() {
+        return false;
+      }
+
+      @Override
+      public Object getArray() {
+        return null;
+      }
+    };
+
+    return new SortedSource(type, BytesRef.getUTF8SortedAsUnicodeComparator()) {
+
+      @Override
+      public BytesRef getBytes(int docID, BytesRef ref) {
+        ref.length = 0;
+        return ref;
+      }
+
+      @Override
+      public int ord(int docID) {
+        return 0;
+      }
+
+      @Override
+      public BytesRef getByOrd(int ord, BytesRef bytesRef) {
+        assert ord == 0;
+        bytesRef.length = 0;
+        return bytesRef;
+      }
+
+      @Override
+      public PackedInts.Reader getDocToOrd() {
+        return docToOrd;
+      }
+
+      @Override
+      public int getByValue(BytesRef value, BytesRef spare) {
+        if (value.length == 0) {
+          return 0;
+        } else {
+          return -1;
+        }
+      }
+
+      @Override
+        public int getValueCount() {
+        return 1;
+      }
+    };
+  }
 }
--- a/lucene/src/java/org/apache/lucene/index/values/SortedBytesMergeUtils.java
+++ b/lucene/src/java/org/apache/lucene/index/values/SortedBytesMergeUtils.java
@ -299,6 +299,11 @@ final class SortedBytesMergeUtils {
      return bytesRef;
    }

+    @Override
+    public PackedInts.Reader getDocToOrd() {
+      return null;
+    }
+
    @Override
    public int getValueCount() {
      return 1;
--- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
@ -214,6 +214,11 @@ final class VarSortedBytesImpl {
      return (int) docToOrdIndex.get(docID);
    }

+    @Override
+    public PackedInts.Reader getDocToOrd() {
+      return docToOrdIndex;
+    }
+
    @Override
    public BytesRef getByOrd(int ord, BytesRef bytesRef) {
      try {
--- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java
+++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java
@ -18,10 +18,14 @@ package org.apache.lucene.search;
 */

 import java.io.IOException;
+import java.util.Comparator;

 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.values.IndexDocValues.SortedSource;
 import org.apache.lucene.index.values.IndexDocValues.Source;
 import org.apache.lucene.index.values.IndexDocValues;
+import org.apache.lucene.index.values.ValueType;
 import org.apache.lucene.search.FieldCache.ByteParser;
 import org.apache.lucene.search.FieldCache.DocTerms;
 import org.apache.lucene.search.FieldCache.DocTermsIndex;
@ -399,6 +403,8 @@ public abstract class FieldComparator<T> {
      final IndexDocValues docValues = context.reader.docValues(field);
      if (docValues != null) {
        currentReaderValues = docValues.getSource(); 
+      } else {
+        currentReaderValues = IndexDocValues.getDefaultSource(ValueType.FLOAT_64);
      }
      return this;
    }
@ -690,6 +696,8 @@ public abstract class FieldComparator<T> {
      IndexDocValues docValues = context.reader.docValues(field);
      if (docValues != null) {
        currentReaderValues = docValues.getSource();
+      } else {
+        currentReaderValues = IndexDocValues.getDefaultSource(ValueType.FIXED_INTS_64);
      }
      return this;
    }
@ -911,30 +919,53 @@ public abstract class FieldComparator<T> {
   *  than {@link TermValComparator}.  For very small
   *  result sets it may be slower. */
  public static final class TermOrdValComparator extends FieldComparator<BytesRef> {
-    /** @lucene.internal */
+    /* Ords for each slot.
+       @lucene.internal */
    final int[] ords;
-    /** @lucene.internal */
+
+    /* Values for each slot.
+       @lucene.internal */
    final BytesRef[] values;
-    /** @lucene.internal */
+
+    /* Which reader last copied a value into the slot. When
+       we compare two slots, we just compare-by-ord if the
+       readerGen is the same; else we must compare the
+       values (slower).
+       @lucene.internal */
    final int[] readerGen;

-    /** @lucene.internal */
+    /* Gen of current reader we are on.
+       @lucene.internal */
    int currentReaderGen = -1;
-    private DocTermsIndex termsIndex;
+
+    /* Current reader's doc ord/values.
+       @lucene.internal */
+    DocTermsIndex termsIndex;
+
    private final String field;

-    /** @lucene.internal */
+    /* Bottom slot, or -1 if queue isn't full yet
+       @lucene.internal */
    int bottomSlot = -1;
-    /** @lucene.internal */
+
+    /* Bottom ord (same as ords[bottomSlot] once bottomSlot
+       is set).  Cached for faster compares.
+       @lucene.internal */
    int bottomOrd;
-    /** @lucene.internal */
+
+    /* True if current bottom slot matches the current
+       reader.
+       @lucene.internal */
    boolean bottomSameReader;
-    /** @lucene.internal */
+
+    /* Bottom value (same as values[bottomSlot] once
+       bottomSlot is set).  Cached for faster compares.
+      @lucene.internal */
    BytesRef bottomValue;
-    /** @lucene.internal */
+
    final BytesRef tempBR = new BytesRef();

-    public TermOrdValComparator(int numHits, String field, int sortPos, boolean reversed) {
+    public TermOrdValComparator(int numHits, String field) {
      ords = new int[numHits];
      values = new BytesRef[numHits];
      readerGen = new int[numHits];
@ -1325,6 +1356,396 @@ public abstract class FieldComparator<T> {
    }
  }

+  /** Sorts by field's natural Term sort order, using
+   *  ordinals; this is just like {@link
+   *  TermOrdValComparator} except it uses DocValues to
+   *  retrieve the sort ords saved during indexing. */
+  public static final class TermOrdValDocValuesComparator extends FieldComparator<BytesRef> {
+    /* Ords for each slot.
+       @lucene.internal */
+    final int[] ords;
+
+    /* Values for each slot.
+       @lucene.internal */
+    final BytesRef[] values;
+
+    /* Which reader last copied a value into the slot. When
+       we compare two slots, we just compare-by-ord if the
+       readerGen is the same; else we must compare the
+       values (slower).
+       @lucene.internal */
+    final int[] readerGen;
+
+    /* Gen of current reader we are on.
+       @lucene.internal */
+    int currentReaderGen = -1;
+
+    /* Current reader's doc ord/values.
+       @lucene.internal */
+    SortedSource termsIndex;
+
+    /* Comparator for comparing by value.
+       @lucene.internal */
+    Comparator<BytesRef> comp;
+
+    private final String field;
+
+    /* Bottom slot, or -1 if queue isn't full yet
+       @lucene.internal */
+    int bottomSlot = -1;
+
+    /* Bottom ord (same as ords[bottomSlot] once bottomSlot
+       is set).  Cached for faster compares.
+       @lucene.internal */
+    int bottomOrd;
+
+    /* True if current bottom slot matches the current
+       reader.
+       @lucene.internal */
+    boolean bottomSameReader;
+
+    /* Bottom value (same as values[bottomSlot] once
+       bottomSlot is set).  Cached for faster compares.
+      @lucene.internal */
+    BytesRef bottomValue;
+
+    /** @lucene.internal */
+    final BytesRef tempBR = new BytesRef();
+
+    public TermOrdValDocValuesComparator(int numHits, String field) {
+      ords = new int[numHits];
+      values = new BytesRef[numHits];
+      readerGen = new int[numHits];
+      this.field = field;
+    }
+
+    @Override
+    public int compare(int slot1, int slot2) {
+      if (readerGen[slot1] == readerGen[slot2]) {
+        return ords[slot1] - ords[slot2];
+      }
+
+      final BytesRef val1 = values[slot1];
+      final BytesRef val2 = values[slot2];
+      if (val1 == null) {
+        if (val2 == null) {
+          return 0;
+        }
+        return -1;
+      } else if (val2 == null) {
+        return 1;
+      }
+      return comp.compare(val1, val2);
+    }
+
+    @Override
+    public int compareBottom(int doc) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void copy(int slot, int doc) {
+      throw new UnsupportedOperationException();
+    }
+
+    // TODO: would be nice to share these specialized impls
+    // w/ TermOrdValComparator
+
+    /** Base class for specialized (per bit width of the
+     * ords) per-segment comparator.  NOTE: this is messy;
+     * we do this only because hotspot can't reliably inline
+     * the underlying array access when looking up doc->ord
+     * @lucene.internal
+     */
+    abstract class PerSegmentComparator extends FieldComparator<BytesRef> {
+      
+      @Override
+      public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
+        return TermOrdValDocValuesComparator.this.setNextReader(context);
+      }
+
+      @Override
+      public int compare(int slot1, int slot2) {
+        return TermOrdValDocValuesComparator.this.compare(slot1, slot2);
+      }
+
+      @Override
+      public void setBottom(final int bottom) {
+        TermOrdValDocValuesComparator.this.setBottom(bottom);
+      }
+
+      @Override
+      public BytesRef value(int slot) {
+        return TermOrdValDocValuesComparator.this.value(slot);
+      }
+
+      @Override
+      public int compareValues(BytesRef val1, BytesRef val2) {
+        assert val1 != null;
+        assert val2 != null;
+        return comp.compare(val1, val2);
+      }
+    }
+
+    // Used per-segment when bit width of doc->ord is 8:
+    private final class ByteOrdComparator extends PerSegmentComparator {
+      private final byte[] readerOrds;
+      private final SortedSource termsIndex;
+      private final int docBase;
+
+      public ByteOrdComparator(byte[] readerOrds, SortedSource termsIndex, int docBase) {
+        this.readerOrds = readerOrds;
+        this.termsIndex = termsIndex;
+        this.docBase = docBase;
+      }
+
+      @Override
+      public int compareBottom(int doc) {
+        assert bottomSlot != -1;
+        if (bottomSameReader) {
+          // ord is precisely comparable, even in the equal case
+          return bottomOrd - (readerOrds[doc]&0xFF);
+        } else {
+          // ord is only approx comparable: if they are not
+          // equal, we can use that; if they are equal, we
+          // must fallback to compare by value
+          final int order = readerOrds[doc]&0xFF;
+          final int cmp = bottomOrd - order;
+          if (cmp != 0) {
+            return cmp;
+          }
+
+          termsIndex.getByOrd(order, tempBR);
+          return comp.compare(bottomValue, tempBR);
+        }
+      }
+
+      @Override
+      public void copy(int slot, int doc) {
+        final int ord = readerOrds[doc]&0xFF;
+        ords[slot] = ord;
+        if (values[slot] == null) {
+          values[slot] = new BytesRef();
+        }
+        termsIndex.getByOrd(ord, values[slot]);
+        readerGen[slot] = currentReaderGen;
+      }
+    }
+
+    // Used per-segment when bit width of doc->ord is 16:
+    private final class ShortOrdComparator extends PerSegmentComparator {
+      private final short[] readerOrds;
+      private final SortedSource termsIndex;
+      private final int docBase;
+
+      public ShortOrdComparator(short[] readerOrds, SortedSource termsIndex, int docBase) {
+        this.readerOrds = readerOrds;
+        this.termsIndex = termsIndex;
+        this.docBase = docBase;
+      }
+
+      @Override
+      public int compareBottom(int doc) {
+        assert bottomSlot != -1;
+        if (bottomSameReader) {
+          // ord is precisely comparable, even in the equal case
+          return bottomOrd - (readerOrds[doc]&0xFFFF);
+        } else {
+          // ord is only approx comparable: if they are not
+          // equal, we can use that; if they are equal, we
+          // must fallback to compare by value
+          final int order = readerOrds[doc]&0xFFFF;
+          final int cmp = bottomOrd - order;
+          if (cmp != 0) {
+            return cmp;
+          }
+
+          termsIndex.getByOrd(order, tempBR);
+          return comp.compare(bottomValue, tempBR);
+        }
+      }
+
+      @Override
+      public void copy(int slot, int doc) {
+        final int ord = readerOrds[doc]&0xFFFF;
+        ords[slot] = ord;
+        if (values[slot] == null) {
+          values[slot] = new BytesRef();
+        }
+        termsIndex.getByOrd(ord, values[slot]);
+        readerGen[slot] = currentReaderGen;
+      }
+    }
+
+    // Used per-segment when bit width of doc->ord is 32:
+    private final class IntOrdComparator extends PerSegmentComparator {
+      private final int[] readerOrds;
+      private final SortedSource termsIndex;
+      private final int docBase;
+
+      public IntOrdComparator(int[] readerOrds, SortedSource termsIndex, int docBase) {
+        this.readerOrds = readerOrds;
+        this.termsIndex = termsIndex;
+        this.docBase = docBase;
+      }
+
+      @Override
+      public int compareBottom(int doc) {
+        assert bottomSlot != -1;
+        if (bottomSameReader) {
+          // ord is precisely comparable, even in the equal case
+          return bottomOrd - readerOrds[doc];
+        } else {
+          // ord is only approx comparable: if they are not
+          // equal, we can use that; if they are equal, we
+          // must fallback to compare by value
+          final int order = readerOrds[doc];
+          final int cmp = bottomOrd - order;
+          if (cmp != 0) {
+            return cmp;
+          }
+          termsIndex.getByOrd(order, tempBR);
+          return comp.compare(bottomValue, tempBR);
+        }
+      }
+
+      @Override
+      public void copy(int slot, int doc) {
+        final int ord = readerOrds[doc];
+        ords[slot] = ord;
+        if (values[slot] == null) {
+          values[slot] = new BytesRef();
+        }
+        termsIndex.getByOrd(ord, values[slot]);
+        readerGen[slot] = currentReaderGen;
+      }
+    }
+
+    // Used per-segment when bit width is not a native array
+    // size (8, 16, 32):
+    private final class AnyOrdComparator extends PerSegmentComparator {
+      private final PackedInts.Reader readerOrds;
+      private final int docBase;
+
+      public AnyOrdComparator(PackedInts.Reader readerOrds, int docBase) {
+        this.readerOrds = readerOrds;
+        this.docBase = docBase;
+      }
+
+      @Override
+      public int compareBottom(int doc) {
+        assert bottomSlot != -1;
+        if (bottomSameReader) {
+          // ord is precisely comparable, even in the equal case
+          return bottomOrd - (int) readerOrds.get(doc);
+        } else {
+          // ord is only approx comparable: if they are not
+          // equal, we can use that; if they are equal, we
+          // must fallback to compare by value
+          final int order = (int) readerOrds.get(doc);
+          final int cmp = bottomOrd - order;
+          if (cmp != 0) {
+            return cmp;
+          }
+          termsIndex.getByOrd(order, tempBR);
+          return comp.compare(bottomValue, tempBR);
+        }
+      }
+
+      @Override
+      public void copy(int slot, int doc) {
+        final int ord = (int) readerOrds.get(doc);
+        ords[slot] = ord;
+        if (values[slot] == null) {
+          values[slot] = new BytesRef();
+        }
+        termsIndex.getByOrd(ord, values[slot]);
+        readerGen[slot] = currentReaderGen;
+      }
+    }
+
+    @Override
+    public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
+      final int docBase = context.docBase;
+
+      final IndexDocValues dv = context.reader.docValues(field);
+      if (dv == null) {
+        termsIndex = IndexDocValues.getDefaultSortedSource(ValueType.BYTES_VAR_SORTED, context.reader.maxDoc());
+      } else {
+        termsIndex = dv.getSource().asSortedSource();
+        if (termsIndex == null) {
+          termsIndex = IndexDocValues.getDefaultSortedSource(ValueType.BYTES_VAR_SORTED, context.reader.maxDoc());
+        }
+      }
+
+      comp = termsIndex.getComparator();
+
+      FieldComparator perSegComp = null;
+      final PackedInts.Reader docToOrd = termsIndex.getDocToOrd();
+      if (docToOrd.hasArray()) {
+        final Object arr = docToOrd.getArray();
+        assert arr != null;
+        if (arr instanceof byte[]) {
+          // 8 bit packed
+          perSegComp = new ByteOrdComparator((byte[]) arr, termsIndex, docBase);
+        } else if (arr instanceof short[]) {
+          // 16 bit packed
+          perSegComp = new ShortOrdComparator((short[]) arr, termsIndex, docBase);
+        } else if (arr instanceof int[]) {
+          // 32 bit packed
+          perSegComp = new IntOrdComparator((int[]) arr, termsIndex, docBase);
+        }
+      }
+
+      if (perSegComp == null) {
+        perSegComp = new AnyOrdComparator(docToOrd, docBase);
+      }
+        
+      currentReaderGen++;
+      if (bottomSlot != -1) {
+        perSegComp.setBottom(bottomSlot);
+      }
+
+      return perSegComp;
+    }
+    
+    @Override
+    public void setBottom(final int bottom) {
+      bottomSlot = bottom;
+
+      bottomValue = values[bottomSlot];
+      if (currentReaderGen == readerGen[bottomSlot]) {
+        bottomOrd = ords[bottomSlot];
+        bottomSameReader = true;
+      } else {
+        if (bottomValue == null) {
+          // 0 ord is null for all segments
+          assert ords[bottomSlot] == 0;
+          bottomOrd = 0;
+          bottomSameReader = true;
+          readerGen[bottomSlot] = currentReaderGen;
+        } else {
+          final int index = termsIndex.getByValue(bottomValue, tempBR);
+          if (index < 0) {
+            bottomOrd = -index - 2;
+            bottomSameReader = false;
+          } else {
+            bottomOrd = index;
+            // exact value match
+            bottomSameReader = true;
+            readerGen[bottomSlot] = currentReaderGen;            
+            ords[bottomSlot] = bottomOrd;
+          }
+        }
+      }
+    }
+
+    @Override
+    public BytesRef value(int slot) {
+      return values[slot];
+    }
+  }
+
  /** Sorts by field's natural Term sort order.  All
   *  comparisons are done using BytesRef.compareTo, which is
   *  slow for medium to large result sets but possibly
@ -1410,6 +1831,74 @@ public abstract class FieldComparator<T> {
    }
  }

+  /** Sorts by field's natural Term sort order.  All
+   *  comparisons are done using BytesRef.compareTo, which is
+   *  slow for medium to large result sets but possibly
+   *  very fast for very small results sets.  The BytesRef
+   *  values are obtained using {@link IndexReader#docValues}. */
+  public static final class TermValDocValuesComparator extends FieldComparator<BytesRef> {
+
+    private BytesRef[] values;
+    private Source docTerms;
+    private final String field;
+    private BytesRef bottom;
+    private final BytesRef tempBR = new BytesRef();
+
+    TermValDocValuesComparator(int numHits, String field) {
+      values = new BytesRef[numHits];
+      this.field = field;
+    }
+
+    @Override
+    public int compare(int slot1, int slot2) {
+      assert values[slot1] != null;
+      assert values[slot2] != null;
+      return values[slot1].compareTo(values[slot2]);
+    }
+
+    @Override
+    public int compareBottom(int doc) {
+      assert bottom != null;
+      return bottom.compareTo(docTerms.getBytes(doc, tempBR));
+    }
+
+    @Override
+    public void copy(int slot, int doc) {
+      if (values[slot] == null) {
+        values[slot] = new BytesRef();
+      }
+      docTerms.getBytes(doc, values[slot]);
+    }
+
+    @Override
+    public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
+      final IndexDocValues dv = context.reader.docValues(field);
+      if (dv != null) {
+        docTerms = dv.getSource();
+      } else {
+        docTerms = IndexDocValues.getDefaultSource(ValueType.BYTES_VAR_DEREF);
+      }
+      return this;
+    }
+    
+    @Override
+    public void setBottom(final int bottom) {
+      this.bottom = values[bottom];
+    }
+
+    @Override
+    public BytesRef value(int slot) {
+      return values[slot];
+    }
+
+    @Override
+    public int compareValues(BytesRef val1, BytesRef val2) {
+      assert val1 != null;
+      assert val2 != null;
+      return val1.compareTo(val2);
+    }
+  }
+
  final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key) {
    return binarySearch(br, a, key, 1, a.numOrd()-1);
  }
--- a/lucene/src/java/org/apache/lucene/search/SortField.java
+++ b/lucene/src/java/org/apache/lucene/search/SortField.java
@ -254,6 +254,7 @@ public class SortField {
  @Override
  public String toString() {
    StringBuilder buffer = new StringBuilder();
+    String dv = useIndexValues ? " [dv]" : "";
    switch (type) {
      case SCORE:
        buffer.append("<score>");
@ -264,11 +265,11 @@ public class SortField {
        break;

      case STRING:
-        buffer.append("<string: \"").append(field).append("\">");
+        buffer.append("<string" + dv + ": \"").append(field).append("\">");
        break;

      case STRING_VAL:
-        buffer.append("<string_val: \"").append(field).append("\">");
+        buffer.append("<string_val" + dv + ": \"").append(field).append("\">");
        break;

      case BYTE:
@ -280,7 +281,7 @@ public class SortField {
        break;

      case INT:
-        buffer.append("<int: \"").append(field).append("\">");
+        buffer.append("<int" + dv + ": \"").append(field).append("\">");
        break;

      case LONG:
@ -288,11 +289,11 @@ public class SortField {
        break;

      case FLOAT:
-        buffer.append("<float: \"").append(field).append("\">");
+        buffer.append("<float" + dv + ": \"").append(field).append("\">");
        break;

      case DOUBLE:
-        buffer.append("<double: \"").append(field).append("\">");
+        buffer.append("<double" + dv + ": \"").append(field).append("\">");
        break;

      case CUSTOM:
@ -415,10 +416,18 @@ public class SortField {
      return comparatorSource.newComparator(field, numHits, sortPos, reverse);

    case STRING:
-      return new FieldComparator.TermOrdValComparator(numHits, field, sortPos, reverse);
+      if (useIndexValues) {
+        return new FieldComparator.TermOrdValDocValuesComparator(numHits, field);
+      } else {
+        return new FieldComparator.TermOrdValComparator(numHits, field);
+      }

    case STRING_VAL:
-      return new FieldComparator.TermValComparator(numHits, field);
+      if (useIndexValues) {
+        return new FieldComparator.TermValDocValuesComparator(numHits, field);
+      } else {
+        return new FieldComparator.TermValComparator(numHits, field);
+      }

    case REWRITEABLE:
      throw new IllegalStateException("SortField needs to be rewritten through Sort.rewrite(..) and SortField.rewrite(..)");
--- a/lucene/src/test/org/apache/lucene/search/TestSort.java
+++ b/lucene/src/test/org/apache/lucene/search/TestSort.java
@ -81,6 +81,7 @@ public class TestSort extends LuceneTestCase {
  public static void beforeClass() throws Exception {
    NUM_STRINGS = atLeast(6000);
  }
+
  // document data:
  // the tracer field is used to determine which document was hit
  // the contents field is used to search and sort by relevance
@ -111,7 +112,7 @@ public class TestSort extends LuceneTestCase {
  {   "c",   "m",            "5",           "5.0",           "5",    null,    null,              "5",           "5", "5", "5", null},
  {   "d",   "m",            null,          null,           null,    null,    null,              null,           null, null, null, null}
  }; 
-  
+
  // create an index of all the documents, or just the x, or just the y documents
  private IndexSearcher getIndex (boolean even, boolean odd)
  throws IOException {
@ -119,6 +120,21 @@ public class TestSort extends LuceneTestCase {
    dirs.add(indexStore);
    RandomIndexWriter writer = new RandomIndexWriter(random, indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));

+    final ValueType stringDVType;
+    if (dvStringSorted) {
+      // Index sorted
+      stringDVType = random.nextBoolean() ? ValueType.BYTES_VAR_SORTED : ValueType.BYTES_FIXED_SORTED;
+    } else {
+      // Index non-sorted
+      if (random.nextBoolean()) {
+        // Fixed
+        stringDVType = random.nextBoolean() ? ValueType.BYTES_FIXED_STRAIGHT : ValueType.BYTES_FIXED_DEREF;
+      } else {
+        // Var
+        stringDVType = random.nextBoolean() ? ValueType.BYTES_VAR_STRAIGHT : ValueType.BYTES_VAR_DEREF;
+      }
+    }
+
    FieldType ft1 = new FieldType();
    ft1.setStored(true);
    FieldType ft2 = new FieldType();
@ -142,7 +158,13 @@ public class TestSort extends LuceneTestCase {
          }
          doc.add(f);
        }
-        if (data[i][4] != null) doc.add (new StringField ("string",   data[i][4]));
+        if (data[i][4] != null) {
+          Field f = new StringField ("string", data[i][4]);
+          if (supportsDocValues) {
+            f = IndexDocValuesField.build(f, stringDVType);
+          }
+          doc.add(f);
+        }
        if (data[i][5] != null) doc.add (new StringField ("custom",   data[i][5]));
        if (data[i][6] != null) doc.add (new StringField ("i18n",     data[i][6]));
        if (data[i][7] != null) doc.add (new StringField ("long",     data[i][7]));
@ -185,21 +207,52 @@ public class TestSort extends LuceneTestCase {
            setMaxBufferedDocs(4).
            setMergePolicy(newLogMergePolicy(97))
    );
-    FieldType customType = new FieldType();
-    customType.setStored(true);
+    FieldType onlyStored = new FieldType();
+    onlyStored.setStored(true);
+    final int fixedLen = getRandomNumber(2, 8);
+    final int fixedLen2 = getRandomNumber(1, 4);
    for (int i=0; i<NUM_STRINGS; i++) {
-        Document doc = new Document();
-        String num = getRandomCharString(getRandomNumber(2, 8), 48, 52);
-        doc.add (new Field ("tracer", num, customType));
-        //doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
-        doc.add (new StringField ("string", num));
-        String num2 = getRandomCharString(getRandomNumber(1, 4), 48, 50);
-        doc.add (new StringField ("string2", num2));
-        doc.add (new Field ("tracer2", num2, customType));
-        for(IndexableField f : doc.getFields()) {
-          ((Field) f).setBoost(2.0f);
-        }
-        writer.addDocument (doc);
+      Document doc = new Document();
+      String num = getRandomCharString(getRandomNumber(2, 8), 48, 52);
+      doc.add (new Field ("tracer", num, onlyStored));
+      //doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
+      Field f = new StringField("string", num);
+      if (supportsDocValues) {
+        f = IndexDocValuesField.build(f, ValueType.BYTES_VAR_SORTED);
+      }
+      doc.add (f);
+      String num2 = getRandomCharString(getRandomNumber(1, 4), 48, 50);
+      f = new StringField ("string2", num2);
+      if (supportsDocValues) {
+        f = IndexDocValuesField.build(f, ValueType.BYTES_VAR_SORTED);
+      }
+      doc.add (f);
+      doc.add (new Field ("tracer2", num2, onlyStored));
+      for(IndexableField f2 : doc.getFields()) {
+        ((Field) f2).setBoost(2.0f);
+      }
+
+      String numFixed = getRandomCharString(fixedLen, 48, 52);
+      doc.add (new Field ("fixed_tracer", numFixed, onlyStored));
+      //doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
+      f = new StringField("string_fixed", numFixed);
+      if (supportsDocValues) {
+        f = IndexDocValuesField.build(f, ValueType.BYTES_FIXED_SORTED);
+      }
+      doc.add (f);
+      String num2Fixed = getRandomCharString(fixedLen2, 48, 52);
+      f = new StringField ("string2_fixed", num2Fixed);
+      if (supportsDocValues) {
+        f = IndexDocValuesField.build(f, ValueType.BYTES_FIXED_SORTED);
+      }
+      doc.add (f);
+      doc.add (new Field ("tracer2_fixed", num2Fixed, onlyStored));
+
+      for(IndexableField f2 : doc.getFields()) {
+        ((Field) f2).setBoost(2.0f);
+      }
+
+      writer.addDocument (doc);
    }
    //writer.forceMerge(1);
    //System.out.println(writer.getSegmentCount());
@ -249,10 +302,15 @@ public class TestSort extends LuceneTestCase {
    return getIndex (false, false);
  }

+  // Set to true if the DV "string" field is indexed as a
+  // sorted source:
+  private boolean dvStringSorted;
+  
  @Override
  public void setUp() throws Exception {
    super.setUp();
    
+    dvStringSorted = random.nextBoolean();
    full = getFullIndex();
    searchX = getXIndex();
    searchY = getYIndex();
@ -339,6 +397,20 @@ public class TestSort extends LuceneTestCase {
      sort.setSort (useDocValues(new SortField ("double", SortField.Type.DOUBLE)), SortField.FIELD_DOC );
      assertMatches (full, queryX, sort, "AGICE");
      assertMatches (full, queryY, sort, "DJHBF");
+
+      sort.setSort (useDocValues(new SortField ("string", getDVStringSortType())), SortField.FIELD_DOC );
+      assertMatches (full, queryX, sort, "AIGEC");
+      assertMatches (full, queryY, sort, "DJHFB");
+    }
+  }
+
+  private SortField.Type getDVStringSortType() {
+    if (dvStringSorted) {
+      // If you index as sorted source you can still sort by
+      // value instead:
+      return random.nextBoolean() ? SortField.Type.STRING : SortField.Type.STRING_VAL;
+    } else {
+      return SortField.Type.STRING_VAL;
    }
  }
  
@ -405,42 +477,72 @@ public class TestSort extends LuceneTestCase {
  /**
   * Test String sorting: small queue to many matches, multi field sort, reverse sort
   */
-  public void testStringSort() throws IOException {
-    ScoreDoc[] result = null;
-    IndexSearcher searcher = getFullStrings();
+  public void testStringSort() throws Exception {
+    // Normal string field, var length
    sort.setSort(
        new SortField("string", SortField.Type.STRING),
        new SortField("string2", SortField.Type.STRING, true),
        SortField.FIELD_DOC);
+    verifyStringSort(sort);

-    result = searcher.search(new MatchAllDocsQuery(), null, 500, sort).scoreDocs;
+    // Normal string field, fixed length
+    sort.setSort(
+        new SortField("string_fixed", SortField.Type.STRING),
+        new SortField("string2_fixed", SortField.Type.STRING, true),
+        SortField.FIELD_DOC);
+    verifyStringSort(sort);

+    // Doc values field, var length
+    assumeFalse("cannot work with preflex codec",
+                "Lucene3x".equals(Codec.getDefault().getName()));
+    sort.setSort(
+                 useDocValues(new SortField("string", getDVStringSortType())),
+                 useDocValues(new SortField("string2", getDVStringSortType(), true)),
+                 SortField.FIELD_DOC);
+    verifyStringSort(sort);
+
+    // Doc values field, fixed length
+    sort.setSort(
+                 useDocValues(new SortField("string_fixed", getDVStringSortType())),
+                 useDocValues(new SortField("string2_fixed", getDVStringSortType(), true)),
+                 SortField.FIELD_DOC);
+    verifyStringSort(sort);
+  }
+
+  private void verifyStringSort(Sort sort) throws Exception {
+    final IndexSearcher searcher = getFullStrings();
+    final ScoreDoc[] result = searcher.search(new MatchAllDocsQuery(), null, _TestUtil.nextInt(random, 500, searcher.getIndexReader().maxDoc()), sort).scoreDocs;
    StringBuilder buff = new StringBuilder();
    int n = result.length;
    String last = null;
    String lastSub = null;
    int lastDocId = 0;
    boolean fail = false;
+    final String fieldSuffix = sort.getSort()[0].getField().endsWith("_fixed") ? "_fixed" : "";
    for (int x = 0; x < n; ++x) {
      Document doc2 = searcher.doc(result[x].doc);
-      IndexableField[] v = doc2.getFields("tracer");
-      IndexableField[] v2 = doc2.getFields("tracer2");
+      IndexableField[] v = doc2.getFields("tracer" + fieldSuffix);
+      IndexableField[] v2 = doc2.getFields("tracer2" + fieldSuffix);
      for (int j = 0; j < v.length; ++j) {
+        buff.append(v[j] + "(" + v2[j] + ")(" + result[x].doc+")\n");
        if (last != null) {
          int cmp = v[j].stringValue().compareTo(last);
          if (!(cmp >= 0)) { // ensure first field is in order
            fail = true;
            System.out.println("fail:" + v[j] + " < " + last);
+            buff.append("  WRONG tracer\n");
          }
          if (cmp == 0) { // ensure second field is in reverse order
            cmp = v2[j].stringValue().compareTo(lastSub);
            if (cmp > 0) {
              fail = true;
              System.out.println("rev field fail:" + v2[j] + " > " + lastSub);
+              buff.append("  WRONG tracer2\n");
            } else if(cmp == 0) { // ensure docid is in order
              if (result[x].doc < lastDocId) {
                fail = true;
                System.out.println("doc fail:" + result[x].doc + " > " + lastDocId);
+                buff.append("  WRONG docID\n");
              }
            }
          }
@ -448,11 +550,10 @@ public class TestSort extends LuceneTestCase {
        last = v[j].stringValue();
        lastSub = v2[j].stringValue();
        lastDocId = result[x].doc;
-        buff.append(v[j] + "(" + v2[j] + ")(" + result[x].doc+") ");
      }
    }
-    if(fail) {
-      System.out.println("topn field1(field2)(docID):" + buff);
+    if (fail) {
+      System.out.println("topn field1(field2)(docID):\n" + buff);
    }
    assertFalse("Found sort results out of order", fail);
    searcher.close();
@ -549,6 +650,16 @@ public class TestSort extends LuceneTestCase {
    
    sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), new SortField ("string", SortField.Type.STRING) );
    assertMatches (empty, queryX, sort, "");
+
+    sort.setSort (useDocValues(new SortField ("string", getDVStringSortType(), true)), SortField.FIELD_DOC );
+    assertMatches (empty, queryX, sort, "");
+
+    sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)),
+                  useDocValues(new SortField ("string", getDVStringSortType())) );
+    assertMatches (empty, queryX, sort, "");
+    
+    sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), useDocValues(new SortField ("string", getDVStringSortType())) );
+    assertMatches (empty, queryX, sort, "");
  }

  static class MyFieldComparator extends FieldComparator<Integer> {
@ -642,11 +753,18 @@ public class TestSort extends LuceneTestCase {
      sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT, true)) );
      assertMatches (full, queryX, sort, "AECIG");
      assertMatches (full, queryY, sort, "BFJHD");
+
+      sort.setSort (useDocValues(new SortField ("string", getDVStringSortType(), true)) );
+      assertMatches (full, queryX, sort, "CEGIA");
+      assertMatches (full, queryY, sort, "BFHJD");
    }
  }

  // test sorting when the sort field is empty (undefined) for some of the documents
  public void testEmptyFieldSort() throws Exception {
+
+    // NOTE: do not test DocValues fields here, since you
+    // can't sort when some documents don't have the field
    sort.setSort (new SortField ("string", SortField.Type.STRING) );
    assertMatches (full, queryF, sort, "ZJI");

@ -662,14 +780,6 @@ public class TestSort extends LuceneTestCase {
    sort.setSort (new SortField ("float", SortField.Type.FLOAT) );
    assertMatches (full, queryF, sort, "ZJI");

-    if (supportsDocValues) {
-      sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)) );
-      assertMatches (full, queryF, sort, "IZJ");
-    
-      sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)) );
-      assertMatches (full, queryF, sort, "ZJI");
-    }
-
    // using a nonexisting field as first sort key shouldn't make a difference:
    sort.setSort (new SortField ("nosuchfield", SortField.Type.STRING),
        new SortField ("float", SortField.Type.FLOAT) );
@ -679,7 +789,6 @@ public class TestSort extends LuceneTestCase {
    assertMatches (full, queryF, sort, "IJZ");

    // When a field is null for both documents, the next SortField should be used.
-                // Works for
    sort.setSort (new SortField ("int", SortField.Type.INT),
                                new SortField ("string", SortField.Type.STRING),
        new SortField ("float", SortField.Type.FLOAT) );
@ -688,7 +797,7 @@ public class TestSort extends LuceneTestCase {
    // Reverse the last criterium to make sure the test didn't pass by chance
    sort.setSort (new SortField ("int", SortField.Type.INT),
                                new SortField ("string", SortField.Type.STRING),
-        new SortField ("float", SortField.Type.FLOAT, true) );
+                  new SortField ("float", SortField.Type.FLOAT, true) );
    assertMatches (full, queryG, sort, "ZYXW");

    // Do the same for a ParallelMultiSearcher
@ -696,13 +805,13 @@ public class TestSort extends LuceneTestCase {
    IndexSearcher parallelSearcher=new IndexSearcher (full.getIndexReader(), exec);

    sort.setSort (new SortField ("int", SortField.Type.INT),
-                                new SortField ("string", SortField.Type.STRING),
-        new SortField ("float", SortField.Type.FLOAT) );
+                  new SortField ("string", SortField.Type.STRING),
+                  new SortField ("float", SortField.Type.FLOAT) );
    assertMatches (parallelSearcher, queryG, sort, "ZWXY");

    sort.setSort (new SortField ("int", SortField.Type.INT),
-                                new SortField ("string", SortField.Type.STRING),
-        new SortField ("float", SortField.Type.FLOAT, true) );
+                  new SortField ("string", SortField.Type.STRING),
+                  new SortField ("float", SortField.Type.FLOAT, true) );
    assertMatches (parallelSearcher, queryG, sort, "ZYXW");
    parallelSearcher.close();
    exec.shutdown();
@ -719,6 +828,20 @@ public class TestSort extends LuceneTestCase {

    sort.setSort (new SortField ("float", SortField.Type.FLOAT), new SortField ("string", SortField.Type.STRING) );
    assertMatches (full, queryX, sort, "GICEA");
+
+    if (supportsDocValues) {
+      sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)),
+                    useDocValues(new SortField ("float", SortField.Type.FLOAT)));
+      assertMatches (full, queryX, sort, "IGEAC");
+
+      sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT, true)),
+                    useDocValues(new SortField (null, SortField.Type.DOC, true)));
+      assertMatches (full, queryX, sort, "CEAGI");
+
+      sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)),
+                    useDocValues(new SortField ("string", getDVStringSortType())));
+      assertMatches (full, queryX, sort, "GICEA");
+    }
  }

  // test a variety of sorts using a parallel multisearcher
@ -1064,6 +1187,21 @@ public class TestSort extends LuceneTestCase {

      sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT, true)));
      assertMatches(multi, queryF, sort, "JZI");
+
+      sort.setSort(useDocValues(new SortField("string", getDVStringSortType())));
+      assertMatches(multi, queryA, sort, "DJAIHGFEBC");
+      
+      sort.setSort(useDocValues(new SortField("string", getDVStringSortType(), true)));
+      assertMatches(multi, queryA, sort, "CBEFGHIAJD");
+      
+      sort.setSort(useDocValues(new SortField("float", SortField.Type.FLOAT)),useDocValues(new SortField("string", getDVStringSortType())));
+      assertMatches(multi, queryA, sort, "GDHJICEFAB");
+
+      sort.setSort(useDocValues(new SortField ("string", getDVStringSortType())));
+      assertMatches(multi, queryF, sort, "ZJI");
+
+      sort.setSort(useDocValues(new SortField ("string", getDVStringSortType(), true)));
+      assertMatches(multi, queryF, sort, "IJZ");
    }
    
    // up to this point, all of the searches should have "sane"