Merged /lucene/dev/trunk:r1514641-1514711

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1514719 13f79535-47bb-0310-9956-ffa450edef68
2013-08-16 14:11:27 +00:00 · 2013-08-16 14:11:27 +00:00 · 7a4f7c669b
parent 3be8ed1d10 f7d2ac0b0d
commit 7a4f7c669b
20 changed files with 568 additions and 62 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -122,6 +122,11 @@ Bug Fixes
  boundary, made it into the top-N and went to the formatter.
  (Manuel Amoabeng, Michael McCandless, Robert Muir)
 * LUCENE-4583: Indexing core no longer enforces a limit on maximum
  length binary doc values fields, but individual codecs (including
  the default one) have their own limits (David Smiley, Robert Muir,
  Mike McCandless)
 API Changes
 * LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java
@ -118,6 +118,11 @@ import org.apache.lucene.util.packed.PackedInts;
 *        {@code BYTES_VAR_DEREF BYTES_VAR_DEREF} it doesn't apply deduplication of the document values.
 *    </li>
 * </ul>
 * <p>
 * Limitations:
 * <ul>
 *   <li> Binary doc values can be at most {@link #MAX_BINARY_FIELD_LENGTH} in length.
 * </ul>
 * @deprecated Only for reading old 4.0 and 4.1 segments
 */
@Deprecated
@ -125,6 +130,9 @@ import org.apache.lucene.util.packed.PackedInts;
 // for back compat only!
 public class Lucene40DocValuesFormat extends DocValuesFormat {
  /** Maximum length for each binary doc values field. */
  public static final int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
  /** Sole constructor. */
  public Lucene40DocValuesFormat() {
    super("Lucene40");
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
@ -37,14 +37,14 @@ import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.MathUtil;
 import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FST.INPUT_TYPE;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;
 import org.apache.lucene.util.packed.BlockPackedWriter;
 import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
 import org.apache.lucene.util.packed.PackedInts;
 /**
 * Writer for {@link Lucene42DocValuesFormat}
@ -220,6 +220,9 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
    final long startFP = data.getFilePointer();
    for(BytesRef v : values) {
      final int length = v == null ? 0 : v.length;
      if (length > Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH) {
        throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH);
      }
      minLength = Math.min(minLength, length);
      maxLength = Math.max(maxLength, length);
      if (v != null) {
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java
@ -118,8 +118,17 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
 *   <p>SortedSet entries store the list of ordinals in their BinaryData as a
 *      sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>       
 * </ol>
 * <p>
 * Limitations:
 * <ul>
 *   <li> Binary doc values can be at most {@link #MAX_BINARY_FIELD_LENGTH} in length.
 * </ul>
 */
 public final class Lucene42DocValuesFormat extends DocValuesFormat {
  /** Maximum length for each binary doc values field. */
  public static final int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
  final float acceptableOverheadRatio;
  /** 
--- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
@ -22,33 +22,44 @@ import java.util.Iterator;
 import java.util.NoSuchElementException;
 import org.apache.lucene.codecs.DocValuesConsumer;
-import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
+import org.apache.lucene.store.DataInput;
-import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.OpenBitSet;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.PagedBytes;
 import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
 import org.apache.lucene.util.packed.PackedInts;
 import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
 /** Buffers up pending byte[] per doc, then flushes when
 *  segment flushes. */
 class BinaryDocValuesWriter extends DocValuesWriter {
-  private final ByteBlockPool pool;
+  /** Maximum length for a binary field; we set this to "a
   *  bit" below Integer.MAX_VALUE because the exact max
   *  allowed byte[] is JVM dependent, so we want to avoid
   *  a case where a large value worked in one JVM but
   *  failed later at search time with a different JVM.  */
  private static final int MAX_LENGTH = Integer.MAX_VALUE-256;
  // 32 KB block sizes for PagedBytes storage:
  private final static int BLOCK_BITS = 15;
  private final PagedBytes bytes;
  private final DataOutput bytesOut;
  private final Counter iwBytesUsed;
  private final AppendingDeltaPackedLongBuffer lengths;
  private final OpenBitSet docsWithField;
  private final Counter iwBytesUsed;
  private long bytesUsed;
  private final FieldInfo fieldInfo;
-  private int addedValues = 0;
+  private int addedValues;
  private long bytesUsed;
  public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
    this.fieldInfo = fieldInfo;
-    this.pool = new ByteBlockPool(new DirectTrackingAllocator(iwBytesUsed));
+    this.bytes = new PagedBytes(BLOCK_BITS);
    this.bytesOut = bytes.getDataOutput();
    this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
    this.iwBytesUsed = iwBytesUsed;
    this.docsWithField = new OpenBitSet();
@ -63,8 +74,8 @@ class BinaryDocValuesWriter extends DocValuesWriter {
    if (value == null) {
      throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\": null value not allowed");
    }
-    if (value.length > (BYTE_BLOCK_SIZE - 2)) {
+    if (value.length > MAX_LENGTH) {
-      throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + (BYTE_BLOCK_SIZE - 2));
+      throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + MAX_LENGTH);
    }
    // Fill in any holes:
@ -74,7 +85,12 @@ class BinaryDocValuesWriter extends DocValuesWriter {
    }
    addedValues++;
    lengths.add(value.length);
-    pool.append(value);
+    try {
      bytesOut.writeBytes(value.bytes, value.offset, value.length);
    } catch (IOException ioe) {
      // Should never happen!
      throw new RuntimeException(ioe);
    }
    docsWithField.set(docID);
    updateBytesUsed();
  }
@ -85,7 +101,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
  }
  private void updateBytesUsed() {
-    final long newBytesUsed = docsWithFieldBytesUsed();
+    final long newBytesUsed = lengths.ramBytesUsed() + bytes.ramBytesUsed() + docsWithFieldBytesUsed();
    iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
    bytesUsed = newBytesUsed;
  }
@ -97,6 +113,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
  @Override
  public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
    final int maxDoc = state.segmentInfo.getDocCount();
    bytes.freeze(false);
    dvConsumer.addBinaryField(fieldInfo,
                              new Iterable<BytesRef>() {
                                @Override
@ -114,10 +131,10 @@ class BinaryDocValuesWriter extends DocValuesWriter {
  private class BytesIterator implements Iterator<BytesRef> {
    final BytesRef value = new BytesRef();
    final AppendingDeltaPackedLongBuffer.Iterator lengthsIterator = lengths.iterator();
    final DataInput bytesIterator = bytes.getDataInput();
    final int size = (int) lengths.size();
    final int maxDoc;
    int upto;
    long byteOffset;
    BytesIterator(int maxDoc) {
      this.maxDoc = maxDoc;
@ -138,8 +155,12 @@ class BinaryDocValuesWriter extends DocValuesWriter {
        int length = (int) lengthsIterator.next();
        value.grow(length);
        value.length = length;
-        pool.readBytes(byteOffset, value.bytes, value.offset, value.length);
+        try {
-        byteOffset += length;
+          bytesIterator.readBytes(value.bytes, value.offset, value.length);
        } catch (IOException ioe) {
          // Should never happen!
          throw new RuntimeException(ioe);
        }
        if (docsWithField.get(upto)) {
          v = value;
        } else {
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java
@ -92,21 +92,22 @@ public final class FieldInfo {
     */
    NUMERIC,
    /**
-     * A per-document byte[].
+     * A per-document byte[].  Values may be larger than
     * 32766 bytes, but different codecs may enforce their own limits.
     */
    BINARY,
    /** 
     * A pre-sorted byte[]. Fields with this type only store distinct byte values 
     * and store an additional offset pointer per document to dereference the shared 
     * byte[]. The stored byte[] is presorted and allows access via document id, 
-     * ordinal and by-value.
+     * ordinal and by-value.  Values must be <= 32766 bytes.
     */
    SORTED,
    /** 
     * A pre-sorted Set&lt;byte[]&gt;. Fields with this type only store distinct byte values 
     * and store additional offset pointers per document to dereference the shared 
     * byte[]s. The stored byte[] is presorted and allows access via document id, 
-     * ordinal and by-value.
+     * ordinal and by-value.  Values must be <= 32766 bytes.
     */
    SORTED_SET
  };
--- a/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
+++ b/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
@ -21,6 +21,8 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexInput;
 /** Represents a logical byte[] as a series of pages.  You
@ -34,6 +36,7 @@ import org.apache.lucene.store.IndexInput;
 // other "shift/mask big arrays". there are too many of these classes!
 public final class PagedBytes {
  private final List<byte[]> blocks = new ArrayList<byte[]>();
  // TODO: these are unused?
  private final List<Integer> blockEnd = new ArrayList<Integer>();
  private final int blockSize;
  private final int blockBits;
@ -42,6 +45,7 @@ public final class PagedBytes {
  private boolean frozen;
  private int upto;
  private byte[] currentBlock;
  private final long bytesUsedPerBlock;
  private static final byte[] EMPTY_BYTES = new byte[0];
@ -75,13 +79,13 @@ public final class PagedBytes {
     * given length. Iff the slice spans across a block border this method will
     * allocate sufficient resources and copy the paged data.
     * <p>
-     * Slices spanning more than one block are not supported.
+     * Slices spanning more than two blocks are not supported.
     * </p>
     * @lucene.internal 
     **/
    public void fillSlice(BytesRef b, long start, int length) {
      assert length >= 0: "length=" + length;
-      assert length <= blockSize+1;
+      assert length <= blockSize+1: "length=" + length;
      final int index = (int) (start >> blockBits);
      final int offset = (int) (start & blockMask);
      b.length = length;
@ -132,6 +136,7 @@ public final class PagedBytes {
    this.blockBits = blockBits;
    blockMask = blockSize-1;
    upto = blockSize;
    bytesUsedPerBlock = blockSize + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + RamUsageEstimator.NUM_BYTES_OBJECT_REF;
  }
  /** Read this many bytes from in */
@ -216,6 +221,11 @@ public final class PagedBytes {
    }
  }
  /** Return approx RAM usage in bytes. */
  public long ramBytesUsed() {
    return (blocks.size() + (currentBlock != null ? 1 : 0)) * bytesUsedPerBlock;
  }
  /** Copy bytes in, writing the length as a 1 or 2 byte
   *  vInt prefix. */
  // TODO: this really needs to be refactored into fieldcacheimpl!
@ -249,4 +259,148 @@ public final class PagedBytes {
    return pointer;
  }
  public final class PagedBytesDataInput extends DataInput {
    private int currentBlockIndex;
    private int currentBlockUpto;
    private byte[] currentBlock;
    PagedBytesDataInput() {
      currentBlock = blocks.get(0);
    }
    @Override
    public PagedBytesDataInput clone() {
      PagedBytesDataInput clone = getDataInput();
      clone.setPosition(getPosition());
      return clone;
    }
    /** Returns the current byte position. */
    public long getPosition() {
      return (long) currentBlockIndex * blockSize + currentBlockUpto;
    }
    /** Seek to a position previously obtained from
     *  {@link #getPosition}. */
    public void setPosition(long pos) {
      currentBlockIndex = (int) (pos >> blockBits);
      currentBlock = blocks.get(currentBlockIndex);
      currentBlockUpto = (int) (pos & blockMask);
    }
    @Override
    public byte readByte() {
      if (currentBlockUpto == blockSize) {
        nextBlock();
      }
      return currentBlock[currentBlockUpto++];
    }
    @Override
    public void readBytes(byte[] b, int offset, int len) {
      assert b.length >= offset + len;
      final int offsetEnd = offset + len;
      while (true) {
        final int blockLeft = blockSize - currentBlockUpto;
        final int left = offsetEnd - offset;
        if (blockLeft < left) {
          System.arraycopy(currentBlock, currentBlockUpto,
                           b, offset,
                           blockLeft);
          nextBlock();
          offset += blockLeft;
        } else {
          // Last block
          System.arraycopy(currentBlock, currentBlockUpto,
                           b, offset,
                           left);
          currentBlockUpto += left;
          break;
        }
      }
    }
    private void nextBlock() {
      currentBlockIndex++;
      currentBlockUpto = 0;
      currentBlock = blocks.get(currentBlockIndex);
    }
  }
  public final class PagedBytesDataOutput extends DataOutput {
    @Override
    public void writeByte(byte b) {
      if (upto == blockSize) {
        if (currentBlock != null) {
          blocks.add(currentBlock);
          blockEnd.add(upto);
        }
        currentBlock = new byte[blockSize];
        upto = 0;
      }
      currentBlock[upto++] = b;
    }
    @Override
    public void writeBytes(byte[] b, int offset, int length) {
      assert b.length >= offset + length;
      if (length == 0) {
        return;
      }
      if (upto == blockSize) {
        if (currentBlock != null) {
          blocks.add(currentBlock);
          blockEnd.add(upto);
        }
        currentBlock = new byte[blockSize];
        upto = 0;
      }
      final int offsetEnd = offset + length;
      while(true) {
        final int left = offsetEnd - offset;
        final int blockLeft = blockSize - upto;
        if (blockLeft < left) {
          System.arraycopy(b, offset, currentBlock, upto, blockLeft);
          blocks.add(currentBlock);
          blockEnd.add(blockSize);
          currentBlock = new byte[blockSize];
          upto = 0;
          offset += blockLeft;
        } else {
          // Last block
          System.arraycopy(b, offset, currentBlock, upto, left);
          upto += left;
          break;
        }
      }
    }
    /** Return the current byte position. */
    public long getPosition() {
      return getPointer();
    }
  }
  /** Returns a DataInput to read values from this
   *  PagedBytes instance. */
  public PagedBytesDataInput getDataInput() {
    if (!frozen) {
      throw new IllegalStateException("must call freeze() before getDataInput");
    }
    return new PagedBytesDataInput();
  }
  /** Returns a DataOutput that you may use to write into
   *  this PagedBytes instance.  If you do this, you should
   *  not call the other writing methods (eg, copy);
   *  results are undefined. */
  public PagedBytesDataOutput getDataOutput() {
    if (frozen) {
      throw new IllegalStateException("cannot get DataOutput after freeze()");
    }
    return new PagedBytesDataOutput();
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java
@ -31,4 +31,10 @@ public class TestLucene40DocValuesFormat extends BaseDocValuesFormatTestCase {
    return codec;
  }
  // LUCENE-4583: This codec should throw IAE on huge binary values:
  @Override
  protected boolean codecAcceptsHugeBinaryValues(String field) {
    return false;
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java
@ -30,4 +30,9 @@ public class TestLucene42DocValuesFormat extends BaseCompressingDocValuesFormatT
  protected Codec getCodec() {
    return codec;
  }
  @Override
  protected boolean codecAcceptsHugeBinaryValues(String field) {
    return false;
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
@ -46,6 +46,7 @@ import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util._TestUtil;
 /**
 * Basic tests of PerFieldDocValuesFormat
@ -64,6 +65,11 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
    return codec;
  }
  @Override
  protected boolean codecAcceptsHugeBinaryValues(String field) {
    return _TestUtil.fieldSupportsHugeBinaryDocValues(field);
  }
  // just a simple trivial test
  // TODO: we should come up with a test that somehow checks that segment suffix
  // is respected by all codec apis (not just docvalues and postings)
--- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesFormat.java
@ -18,6 +18,7 @@ package org.apache.lucene.index;
 */
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.util._TestUtil;
 /** Tests the codec configuration defined by LuceneTestCase randomly
 *  (typically a mix across different fields).
@ -28,4 +29,9 @@ public class TestDocValuesFormat extends BaseDocValuesFormatTestCase {
  protected Codec getCodec() {
    return Codec.getDefault();
  }
  @Override
  protected boolean codecAcceptsHugeBinaryValues(String field) {
    return _TestUtil.fieldSupportsHugeBinaryDocValues(field);
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java
@ -327,30 +327,6 @@ public class TestDocValuesIndexing extends LuceneTestCase {
    directory.close();
  }
  public void testTooLargeBytes() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());
    Directory directory = newDirectory();
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    IndexWriter iwriter = new IndexWriter(directory, iwc);
    Document doc = new Document();
    byte bytes[] = new byte[100000];
    BytesRef b = new BytesRef(bytes);
    random().nextBytes(bytes);
    doc.add(new BinaryDocValuesField("dv", b));
    try {
      iwriter.addDocument(doc);
      fail("did not get expected exception");
    } catch (IllegalArgumentException expected) {
      // expected
    }
    iwriter.close();
    directory.close();
  }
  public void testTooLargeSortedBytes() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());
--- a/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java
@ -22,6 +22,7 @@ import java.util.*;
 import org.apache.lucene.store.BaseDirectoryWrapper;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
@ -30,6 +31,9 @@ import org.junit.Ignore;
 public class TestPagedBytes extends LuceneTestCase {
  // Writes random byte/s to "normal" file in dir, then
  // copies into PagedBytes and verifies with
  // PagedBytes.Reader: 
  public void testDataInputOutput() throws Exception {
    Random random = random();
    for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
@ -90,6 +94,60 @@ public class TestPagedBytes extends LuceneTestCase {
    }
  }
  // Writes random byte/s into PagedBytes via
  // .getDataOutput(), then verifies with
  // PagedBytes.getDataInput(): 
  public void testDataInputOutput2() throws Exception {
    Random random = random();
    for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
      final int blockBits = _TestUtil.nextInt(random, 1, 20);
      final int blockSize = 1 << blockBits;
      final PagedBytes p = new PagedBytes(blockBits);
      final DataOutput out = p.getDataOutput();
      final int numBytes = random().nextInt(10000000);
      final byte[] answer = new byte[numBytes];
      random().nextBytes(answer);
      int written = 0;
      while(written < numBytes) {
        if (random().nextInt(10) == 7) {
          out.writeByte(answer[written++]);
        } else {
          int chunk = Math.min(random().nextInt(1000), numBytes - written);
          out.writeBytes(answer, written, chunk);
          written += chunk;
        }
      }
      final PagedBytes.Reader reader = p.freeze(random.nextBoolean());
      final DataInput in = p.getDataInput();
      final byte[] verify = new byte[numBytes];
      int read = 0;
      while(read < numBytes) {
        if (random().nextInt(10) == 7) {
          verify[read++] = in.readByte();
        } else {
          int chunk = Math.min(random().nextInt(1000), numBytes - read);
          in.readBytes(verify, read, chunk);
          read += chunk;
        }
      }
      assertTrue(Arrays.equals(answer, verify));
      final BytesRef slice = new BytesRef();
      for(int iter2=0;iter2<100;iter2++) {
        final int pos = random.nextInt(numBytes-1);
        final int len = random.nextInt(Math.min(blockSize+1, numBytes - pos));
        reader.fillSlice(slice, pos, len);
        for(int byteUpto=0;byteUpto<len;byteUpto++) {
          assertEquals(answer[pos + byteUpto], slice.bytes[slice.offset + byteUpto]);
        }
      }
    }
  }
  @Ignore // memory hole
  public void testOverflow() throws IOException {
    BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("testOverflow"));
@ -126,4 +184,5 @@ public class TestPagedBytes extends LuceneTestCase {
    in.close();
    dir.close();
  }
 }
--- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java
@ -22,13 +22,16 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.facet.FacetTestCase;
 import org.apache.lucene.facet.FacetTestUtils;
 import org.apache.lucene.facet.codecs.facet42.Facet42Codec;
 import org.apache.lucene.facet.index.FacetFields;
 import org.apache.lucene.facet.params.CategoryListParams;
 import org.apache.lucene.facet.params.FacetIndexingParams;
@ -48,6 +51,8 @@ import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util._TestUtil;
 public class TestDemoFacets extends FacetTestCase {
@ -248,4 +253,60 @@ public class TestDemoFacets extends FacetTestCase {
    dir.close();
    taxoDir.close();
  }
  // LUCENE-4583: make sure if we require > 32 KB for one
  // document, we don't hit exc when using Facet42DocValuesFormat
  public void testManyFacetsInOneDocument() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwc.setCodec(new Facet42Codec());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    FacetFields facetFields = new FacetFields(taxoWriter);
    int numLabels = _TestUtil.nextInt(random(), 40000, 100000);
    Document doc = new Document();
    doc.add(newTextField("field", "text", Field.Store.NO));
    List<CategoryPath> paths = new ArrayList<CategoryPath>();
    for(int i=0;i<numLabels;i++) {
      paths.add(new CategoryPath("dim", "" + i));
    }
    facetFields.addFields(doc, paths);
    writer.addDocument(doc);
    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    writer.close();
    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    taxoWriter.close();
    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("dim"), Integer.MAX_VALUE));
    // Aggregate the facet counts:
    FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query, and use MultiCollector to
    // wrap collecting the "normal" hits and also facets:
    searcher.search(new MatchAllDocsQuery(), c);
    List<FacetResult> results = c.getFacetResults();
    assertEquals(1, results.size());
    FacetResultNode root = results.get(0).getFacetResultNode();
    assertEquals(numLabels, root.subResults.size());
    Set<String> allLabels = new HashSet<String>();
    for(FacetResultNode childNode : root.subResults) {
      assertEquals(2, childNode.label.length);
      allLabels.add(childNode.label.components[1]);
      assertEquals(1, (int) childNode.value);
    }
    assertEquals(numLabels, allLabels.size());
    IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java
@ -162,6 +162,9 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
      if (b == null) {
        b = new BytesRef(); // 4.0 doesnt distinguish
      }
      if (b.length > Lucene40DocValuesFormat.MAX_BINARY_FIELD_LENGTH) {
        throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + Lucene40DocValuesFormat.MAX_BINARY_FIELD_LENGTH);
      }
      minLength = Math.min(minLength, b.length);
      maxLength = Math.max(maxLength, b.length);
      if (uniqueValues != null) {
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
@ -17,21 +17,20 @@ package org.apache.lucene.index;
 * limitations under the License.
 */
 import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@ -58,6 +57,8 @@ import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
 import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
 /**
 * Abstract class to do basic tests for a docvalues format.
 * NOTE: This test focuses on the docvalues impl, nothing else.
@ -2401,4 +2402,172 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
    directory.close();
  }
  // LUCENE-4853
  public void testHugeBinaryValues() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    // FSDirectory because SimpleText will consume gobbs of
    // space when storing big binary values:
    Directory d = newFSDirectory(_TestUtil.getTempDir("hugeBinaryValues"));
    boolean doFixed = random().nextBoolean();
    int numDocs;
    int fixedLength = 0;
    if (doFixed) {
      // Sometimes make all values fixed length since some
      // codecs have different code paths for this:
      numDocs = _TestUtil.nextInt(random(), 10, 20);
      fixedLength = _TestUtil.nextInt(random(), 65537, 256*1024);
    } else {
      numDocs = _TestUtil.nextInt(random(), 100, 200);
    }
    IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    List<byte[]> docBytes = new ArrayList<byte[]>();
    long totalBytes = 0;
    for(int docID=0;docID<numDocs;docID++) {
      // we don't use RandomIndexWriter because it might add
      // more docvalues than we expect !!!!
      // Must be > 64KB in size to ensure more than 2 pages in
      // PagedBytes would be needed:
      int numBytes;
      if (doFixed) {
        numBytes = fixedLength;
      } else if (docID == 0 || random().nextInt(5) == 3) {
        numBytes = _TestUtil.nextInt(random(), 65537, 3*1024*1024);
      } else {
        numBytes = _TestUtil.nextInt(random(), 1, 1024*1024);
      }
      totalBytes += numBytes;
      if (totalBytes > 5 * 1024*1024) {
        break;
      }
      byte[] bytes = new byte[numBytes];
      random().nextBytes(bytes);
      docBytes.add(bytes);
      Document doc = new Document();      
      BytesRef b = new BytesRef(bytes);
      b.length = bytes.length;
      doc.add(new BinaryDocValuesField("field", b));
      doc.add(new StringField("id", ""+docID, Field.Store.YES));
      try {
        w.addDocument(doc);
      } catch (IllegalArgumentException iae) {
        if (iae.getMessage().indexOf("is too large") == -1) {
          throw iae;
        } else {
          // OK: some codecs can't handle binary DV > 32K
          assertFalse(codecAcceptsHugeBinaryValues("field"));
          w.rollback();
          d.close();
          return;
        }
      }
    }
    DirectoryReader r;
    try {
      r = w.getReader();
    } catch (IllegalArgumentException iae) {
      if (iae.getMessage().indexOf("is too large") == -1) {
        throw iae;
      } else {
        assertFalse(codecAcceptsHugeBinaryValues("field"));
        // OK: some codecs can't handle binary DV > 32K
        w.rollback();
        d.close();
        return;
      }
    }
    w.close();
    AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
    BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
    for(int docID=0;docID<docBytes.size();docID++) {
      StoredDocument doc = ar.document(docID);
      BytesRef bytes = new BytesRef();
      s.get(docID, bytes);
      byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
      assertEquals(expected.length, bytes.length);
      assertEquals(new BytesRef(expected), bytes);
    }
    assertTrue(codecAcceptsHugeBinaryValues("field"));
    ar.close();
    d.close();
  }
  public void testHugeBinaryValueLimit() throws Exception {
    // We only test DVFormats that have a limit
    assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
    Analyzer analyzer = new MockAnalyzer(random());
    // FSDirectory because SimpleText will consume gobbs of
    // space when storing big binary values:
    Directory d = newFSDirectory(_TestUtil.getTempDir("hugeBinaryValues"));
    boolean doFixed = random().nextBoolean();
    int numDocs;
    int fixedLength = 0;
    if (doFixed) {
      // Sometimes make all values fixed length since some
      // codecs have different code paths for this:
      numDocs = _TestUtil.nextInt(random(), 10, 20);
      fixedLength = Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH;
    } else {
      numDocs = _TestUtil.nextInt(random(), 100, 200);
    }
    IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    List<byte[]> docBytes = new ArrayList<byte[]>();
    long totalBytes = 0;
    for(int docID=0;docID<numDocs;docID++) {
      // we don't use RandomIndexWriter because it might add
      // more docvalues than we expect !!!!
      // Must be > 64KB in size to ensure more than 2 pages in
      // PagedBytes would be needed:
      int numBytes;
      if (doFixed) {
        numBytes = fixedLength;
      } else if (docID == 0 || random().nextInt(5) == 3) {
        numBytes = Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH;
      } else {
        numBytes = _TestUtil.nextInt(random(), 1, Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH);
      }
      totalBytes += numBytes;
      if (totalBytes > 5 * 1024*1024) {
        break;
      }
      byte[] bytes = new byte[numBytes];
      random().nextBytes(bytes);
      docBytes.add(bytes);
      Document doc = new Document();      
      BytesRef b = new BytesRef(bytes);
      b.length = bytes.length;
      doc.add(new BinaryDocValuesField("field", b));
      doc.add(new StringField("id", ""+docID, Field.Store.YES));
      w.addDocument(doc);
    }
    DirectoryReader r = w.getReader();
    w.close();
    AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
    BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
    for(int docID=0;docID<docBytes.size();docID++) {
      StoredDocument doc = ar.document(docID);
      BytesRef bytes = new BytesRef();
      s.get(docID, bytes);
      byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
      assertEquals(expected.length, bytes.length);
      assertEquals(new BytesRef(expected), bytes);
    }
    ar.close();
    d.close();
  }
  protected boolean codecAcceptsHugeBinaryValues(String field) {
    return true;
  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
@ -59,12 +59,12 @@ import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.CheckIndex;
 import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
 import org.apache.lucene.index.CheckIndex.Status.FieldNormStatus;
 import org.apache.lucene.index.CheckIndex.Status.StoredFieldStatus;
 import org.apache.lucene.index.CheckIndex.Status.TermIndexStatus;
 import org.apache.lucene.index.CheckIndex.Status.TermVectorStatus;
 import org.apache.lucene.index.CheckIndex;
 import org.apache.lucene.index.ConcurrentMergeScheduler;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
@ -744,13 +744,24 @@ public class _TestUtil {
    }
  }
-  public static String getDocValuesFormat(Codec codec, String field) {
+  public static String getDocValuesFormat(String field) {
-    DocValuesFormat d = codec.docValuesFormat();
+    return getDocValuesFormat(Codec.getDefault(), field);
    if (d instanceof PerFieldDocValuesFormat) {
      return ((PerFieldDocValuesFormat)d).getDocValuesFormatForField(field).getName();
    } else {
      return d.getName();
  }
  public static String getDocValuesFormat(Codec codec, String field) {
    DocValuesFormat f = codec.docValuesFormat();
    if (f instanceof PerFieldDocValuesFormat) {
      return ((PerFieldDocValuesFormat) f).getDocValuesFormatForField(field).getName();
    } else {
      return f.getName();
    }
  }
  public static boolean fieldSupportsHugeBinaryDocValues(String field) {
    String dvFormat = getDocValuesFormat(field);
    return dvFormat.equals("CheapBastard") ||
      dvFormat.equals("Disk") ||
      dvFormat.equals("SimpleText");
  }
  public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -130,6 +130,8 @@ Bug Fixes
  of divide by zero, and makes estimated hit counts meaningful in non-optimized
  indexes.  (hossman)
 * SOLR-5164: Can not create a collection via collections API (cloud mode) (Erick Erickson)
 Optimizations
 ----------------------
--- a/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java
+++ b/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java
@ -31,7 +31,6 @@ import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.util.Date;
 import java.util.List;
 import java.util.Properties;
@ -78,6 +77,7 @@ public class CorePropertiesLocator implements CoresLocator {
    Properties p = buildCoreProperties(cd);
    Writer os = null;
    try {
      propfile.getParentFile().mkdirs();
      os = new OutputStreamWriter(new FileOutputStream(propfile), Charsets.UTF_8);
      p.store(os, "Written by CorePropertiesLocator");
    }
--- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
@ -409,8 +409,9 @@ public class CoreAdminHandler extends RequestHandlerBase {
    String name = checkNotEmpty(params.get(CoreAdminParams.NAME),
        "Missing parameter [" + CoreAdminParams.NAME + "]");
    String instancedir = params.get(CoreAdminParams.INSTANCE_DIR);
-    if (StringUtils.isEmpty(instancedir))
+    if (StringUtils.isEmpty(instancedir)) {
-      instancedir = container.getSolrHome() + File.separator + name;
+      instancedir = name; // Already relative to solrHome, we haven't been given an absolute path.
    }
    Properties coreProps = new Properties();
    for (String param : paramToProp.keySet()) {