LUCENE-2205: port to trunk in preflex codec; port TestPagedBytes and PagedBytes.DataInput/Output to trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1190017 13f79535-47bb-0310-9956-ffa450edef68
2011-10-27 20:49:13 +00:00 · 2011-10-27 20:49:13 +00:00 · 00da65393c
parent 60079a441f
commit 00da65393c
7 changed files with 693 additions and 63 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -723,6 +723,9 @@ Optimizations
  FilteredQuery/IndexSearcher added by LUCENE-1536 to Lucene 4.0.
  (Uwe Schindler)
 * LUCENE-2205: Very substantial (3-5X) RAM reduction required to hold
  the terms index on opening an IndexReader (Aaron McCurry via Mike McCandless)
 Test Cases
 * LUCENE-3420: Disable the finalness checks in TokenStream and Analyzer
--- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java
@ -47,9 +47,8 @@ public final class TermInfosReader {
  private final SegmentTermEnum origEnum;
  private final long size;
-  private final Term[] indexTerms;
+  private final TermInfosReaderIndex index;
-  private final TermInfo[] indexInfos;
+  private final int indexLength;
  private final long[] indexPointers;
  private final int totalIndexInterval;
@ -118,37 +117,23 @@ public final class TermInfosReader {
      if (indexDivisor != -1) {
        // Load terms index
        totalIndexInterval = origEnum.indexInterval * indexDivisor;
-        final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, "", PreFlexCodec.TERMS_INDEX_EXTENSION),
+
        final String indexFileName = IndexFileNames.segmentFileName(segment, "", PreFlexCodec.TERMS_INDEX_EXTENSION);
        final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(indexFileName,
                                                                                   context), fieldInfos, true);
        try {
-          int indexSize = 1+((int)indexEnum.size-1)/indexDivisor;  // otherwise read index
+          // nocommit don't cast to int..
-
+          index = new TermInfosReaderIndex(indexEnum, indexDivisor, (int) dir.fileLength(indexFileName), totalIndexInterval);
-          indexTerms = new Term[indexSize];
+          indexLength = index.length();
          indexInfos = new TermInfo[indexSize];
          indexPointers = new long[indexSize];
          for (int i=0;indexEnum.next(); i++) {
            indexTerms[i] = indexEnum.term();
            assert indexTerms[i] != null;
            assert indexTerms[i].text() != null;
            assert indexTerms[i].field() != null;
            indexInfos[i] = indexEnum.termInfo();
            indexPointers[i] = indexEnum.indexPointer;
            for (int j = 1; j < indexDivisor; j++)
              if (!indexEnum.next())
                break;
          }
        } finally {
          indexEnum.close();
        }
      } else {
        // Do not load terms index:
        totalIndexInterval = -1;
-        indexTerms = null;
+        index = null;
-        indexInfos = null;
+        indexLength = -1;
        indexPointers = null;
      }
      success = true;
    } finally {
@ -203,31 +188,6 @@ public final class TermInfosReader {
    }
  }
  /** Returns the offset of the greatest index entry which is less than or equal to term.*/
  private int getIndexOffset(Term term) {
    int lo = 0;					  // binary search indexTerms[]
    int hi = indexTerms.length - 1;
    while (hi >= lo) {
      int mid = (lo + hi) >>> 1;
      assert indexTerms[mid] != null : "indexTerms = " + indexTerms.length + " mid=" + mid;
      int delta = compareAsUTF16(term, indexTerms[mid]);
      if (delta < 0)
 	hi = mid - 1;
      else if (delta > 0)
 	lo = mid + 1;
      else
 	return mid;
    }
    return hi;
  }
  private void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
    enumerator.seek(indexPointers[indexOffset],
                    ((long) indexOffset * totalIndexInterval) - 1,
                    indexTerms[indexOffset], indexInfos[indexOffset]);
  }
  /** Returns the TermInfo for a Term in the set, or null. */
  TermInfo get(Term term) throws IOException {
    return get(term, false);
@ -272,8 +232,8 @@ public final class TermInfosReader {
 	&& ((enumerator.prev() != null && compareAsUTF16(term, enumerator.prev())> 0)
 	    || compareAsUTF16(term, enumerator.term()) >= 0)) {
      int enumOffset = (int)(enumerator.position/totalIndexInterval)+1;
-      if (indexTerms.length == enumOffset	  // but before end of block
+      if (indexLength == enumOffset    // but before end of block
-          || compareAsUTF16(term, indexTerms[enumOffset]) < 0) {
+    || index.compareTo(term, enumOffset) < 0) {
       // no need to seek
        final TermInfo ti;
@ -309,10 +269,10 @@ public final class TermInfosReader {
      indexPos = (int) (tiOrd.termOrd / totalIndexInterval);
    } else {
      // Must do binary search:
-      indexPos = getIndexOffset(term);
+      indexPos = index.getIndexOffset(term);
    }
-    seekEnum(enumerator, indexPos);
+    index.seekEnum(enumerator, indexPos);
    enumerator.scanTo(term);
    final TermInfo ti;
@ -352,7 +312,7 @@ public final class TermInfosReader {
  }
  private void ensureIndexIsRead() {
-    if (indexTerms == null) {
+    if (index == null) {
      throw new IllegalStateException("terms index was not loaded when this reader was created");
    }
  }
@ -362,10 +322,10 @@ public final class TermInfosReader {
    if (size == 0) return -1;
    ensureIndexIsRead();
-    int indexOffset = getIndexOffset(term);
+    int indexOffset = index.getIndexOffset(term);
    SegmentTermEnum enumerator = getThreadResources().termEnum;
-    seekEnum(enumerator, indexOffset);
+    index.seekEnum(enumerator, indexOffset);
    while(compareAsUTF16(term, enumerator.term()) > 0 && enumerator.next()) {}
--- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReaderIndex.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReaderIndex.java
@ -0,0 +1,252 @@
 package org.apache.lucene.index.codecs.preflex;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.BitUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
 import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput;
 import org.apache.lucene.util.PagedBytes;
 import org.apache.lucene.util.packed.GrowableWriter;
 import org.apache.lucene.util.packed.PackedInts;
 /**
 * This stores a monotonically increasing set of <Term, TermInfo> pairs in an
 * index segment. Pairs are accessed either by Term or by ordinal position the
 * set. The Terms and TermInfo are actually serialized and stored into a byte
 * array and pointers to the position of each are stored in a int array.
 */
 class TermInfosReaderIndex {
  private static final int MAX_PAGE_BITS = 18; // 256 KB block
  private Term[] fields;
  private int totalIndexInterval;
  private Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUTF16Comparator();
  private final PagedBytesDataInput dataInput;
  private final PackedInts.Reader indexToDataOffset;
  private final int indexSize;
  private final int skipInterval;
  /**
   * Loads the segment information at segment load time.
   * 
   * @param indexEnum
   *          the term enum.
   * @param indexDivisor
   *          the index divisor.
   * @param tiiFileLength
   *          the size of the tii file, used to approximate the size of the
   *          buffer.
   * @param totalIndexInterval
   *          the total index interval.
   */
  TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) throws IOException {
    this.totalIndexInterval = totalIndexInterval;
    indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor;
    skipInterval = indexEnum.skipInterval;
    // this is only an inital size, it will be GCed once the build is complete
    long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor;
    PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize));
    PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();
    GrowableWriter indexToTerms = new GrowableWriter(4, indexSize, false);
    String currentField = null;
    List<String> fieldStrs = new ArrayList<String>();
    int fieldCounter = -1;
    for (int i = 0; indexEnum.next(); i++) {
      Term term = indexEnum.term();
      if (currentField == null || !currentField.equals(term.field())) {
        currentField = term.field();
        fieldStrs.add(currentField);
        fieldCounter++;
      }
      TermInfo termInfo = indexEnum.termInfo();
      indexToTerms.set(i, dataOutput.getPosition());
      dataOutput.writeVInt(fieldCounter);
      dataOutput.writeString(term.text());
      dataOutput.writeVInt(termInfo.docFreq);
      if (termInfo.docFreq >= skipInterval) {
        dataOutput.writeVInt(termInfo.skipOffset);
      }
      dataOutput.writeVLong(termInfo.freqPointer);
      dataOutput.writeVLong(termInfo.proxPointer);
      dataOutput.writeVLong(indexEnum.indexPointer);
      for (int j = 1; j < indexDivisor; j++) {
        if (!indexEnum.next()) {
          break;
        }
      }
    }
    fields = new Term[fieldStrs.size()];
    for (int i = 0; i < fields.length; i++) {
      fields[i] = new Term(fieldStrs.get(i));
    }
    dataPagedBytes.freeze(true);
    dataInput = dataPagedBytes.getDataInput();
    indexToDataOffset = indexToTerms.getMutable();
  }
  private static int estimatePageBits(long estSize) {
    return Math.max(Math.min(64 - BitUtil.nlz(estSize), MAX_PAGE_BITS), 4);
  }
  void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
    PagedBytesDataInput input = (PagedBytesDataInput) dataInput.clone();
    input.setPosition(indexToDataOffset.get(indexOffset));
    // read the term
    int fieldId = input.readVInt();
    Term field = fields[fieldId];
    Term term = new Term(field.field(), input.readString());
    // read the terminfo
    TermInfo termInfo = new TermInfo();
    termInfo.docFreq = input.readVInt();
    if (termInfo.docFreq >= skipInterval) {
      termInfo.skipOffset = input.readVInt();
    } else {
      termInfo.skipOffset = 0;
    }
    termInfo.freqPointer = input.readVLong();
    termInfo.proxPointer = input.readVLong();
    long pointer = input.readVLong();
    // perform the seek
    enumerator.seek(pointer, ((long) indexOffset * totalIndexInterval) - 1, term, termInfo);
  }
  /**
   * Binary search for the given term.
   * 
   * @param term
   *          the term to locate.
   * @throws IOException 
   */
  int getIndexOffset(Term term) throws IOException {
    int lo = 0;
    int hi = indexSize - 1;
    PagedBytesDataInput input = (PagedBytesDataInput) dataInput.clone();
    BytesRef scratch = new BytesRef();
    while (hi >= lo) {
      int mid = (lo + hi) >>> 1;
      int delta = compareTo(term, mid, input, scratch);
      if (delta < 0)
        hi = mid - 1;
      else if (delta > 0)
        lo = mid + 1;
      else
        return mid;
    }
    return hi;
  }
  /**
   * Gets the term at the given position.  For testing.
   * 
   * @param termIndex
   *          the position to read the term from the index.
   * @return the term.
   * @throws IOException
   */
  Term getTerm(int termIndex) throws IOException {
    PagedBytesDataInput input = (PagedBytesDataInput) dataInput.clone();
    input.setPosition(indexToDataOffset.get(termIndex));
    // read the term
    int fieldId = input.readVInt();
    Term field = fields[fieldId];
    return new Term(field.field(), input.readString());
  }
  /**
   * Returns the number of terms.
   * 
   * @return int.
   */
  int length() {
    return indexSize;
  }
  /**
   * The compares the given term against the term in the index specified by the
   * term index. ie It returns negative N when term is less than index term;
   * 
   * @param term
   *          the given term.
   * @param termIndex
   *          the index of the of term to compare.
   * @return int.
   * @throws IOException 
   */
  int compareTo(Term term, int termIndex) throws IOException {
    return compareTo(term, termIndex, (PagedBytesDataInput) dataInput.clone(), new BytesRef());
  }
  /**
   * Compare the fields of the terms first, and if not equals return from
   * compare. If equal compare terms.
   * 
   * @param term
   *          the term to compare.
   * @param termIndex
   *          the position of the term in the input to compare
   * @param input
   *          the input buffer.
   * @return int.
   * @throws IOException 
   */
  private int compareTo(Term term, int termIndex, PagedBytesDataInput input, BytesRef reuse) throws IOException {
    // if term field does not equal mid's field index, then compare fields
    // else if they are equal, compare term's string values...
    int c = compareField(term, termIndex, input);
    if (c == 0) {
      reuse.length = input.readVInt();
      reuse.grow(reuse.length);
      input.readBytes(reuse.bytes, 0, reuse.length);
      return comparator.compare(term.bytes(), reuse);
    }
    return c;
  }
  /**
   * Compares the fields before checking the text of the terms.
   * 
   * @param term
   *          the given term.
   * @param termIndex
   *          the term that exists in the data block.
   * @param input
   *          the data block.
   * @return int.
   * @throws IOException 
   */
  private int compareField(Term term, int termIndex, PagedBytesDataInput input) throws IOException {
    input.setPosition(indexToDataOffset.get(termIndex));
    return term.field().compareTo(fields[input.readVInt()].field());
  }
 }
--- a/lucene/src/java/org/apache/lucene/util/PagedBytes.java
+++ b/lucene/src/java/org/apache/lucene/util/PagedBytes.java
@ -17,12 +17,14 @@ package org.apache.lucene.util;
 * limitations under the License.
 */
 import org.apache.lucene.store.IndexInput;
 import java.util.List;
 import java.util.ArrayList;
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexInput;
 /** Represents a logical byte[] as a series of pages.  You
 *  can write-once into the logical byte[] (append only),
@ -37,6 +39,8 @@ public final class PagedBytes {
  private final int blockSize;
  private final int blockBits;
  private final int blockMask;
  private boolean didSkipBytes;
  private boolean frozen;
  private int upto;
  private byte[] currentBlock;
@ -320,6 +324,7 @@ public final class PagedBytes {
      if (currentBlock != null) {
        blocks.add(currentBlock);
        blockEnd.add(upto);
        didSkipBytes = true;
      }
      currentBlock = new byte[blockSize];
      upto = 0;
@ -338,6 +343,12 @@ public final class PagedBytes {
  /** Commits final byte[], trimming it if necessary and if trim=true */
  public Reader freeze(boolean trim) {
    if (frozen) {
      throw new IllegalStateException("already frozen");
    }
    if (didSkipBytes) {
      throw new IllegalStateException("cannot freeze when copy(BytesRef, BytesRef) was used");
    }
    if (trim && upto < blockSize) {
      final byte[] newBlock = new byte[upto];
      System.arraycopy(currentBlock, 0, newBlock, 0, upto);
@ -348,6 +359,7 @@ public final class PagedBytes {
    }
    blocks.add(currentBlock);
    blockEnd.add(upto); 
    frozen = true;
    currentBlock = null;
    return new Reader(this);
  }
@ -389,4 +401,150 @@ public final class PagedBytes {
    return pointer;
  }
  public final class PagedBytesDataInput extends DataInput {
    private int currentBlockIndex;
    private int currentBlockUpto;
    private byte[] currentBlock;
    PagedBytesDataInput() {
      currentBlock = blocks.get(0);
    }
    @Override
    public Object clone() {
      PagedBytesDataInput clone = getDataInput();
      clone.setPosition(getPosition());
      return clone;
    }
    /** Returns the current byte position. */
    public long getPosition() {
      return currentBlockIndex * blockSize + currentBlockUpto;
    }
    /** Seek to a position previously obtained from
     *  {@link #getPosition}. */
    public void setPosition(long pos) {
      currentBlockIndex = (int) (pos >> blockBits);
      currentBlock = blocks.get(currentBlockIndex);
      currentBlockUpto = (int) (pos & blockMask);
    }
    @Override
    public byte readByte() {
      if (currentBlockUpto == blockSize) {
        nextBlock();
      }
      return currentBlock[currentBlockUpto++];
    }
    @Override
    public void readBytes(byte[] b, int offset, int len) {
      final int offsetEnd = offset + len;
      while (true) {
        final int blockLeft = blockSize - currentBlockUpto;
        final int left = offsetEnd - offset;
        if (blockLeft < left) {
          System.arraycopy(currentBlock, currentBlockUpto,
                           b, offset,
                           blockLeft);
          nextBlock();
          offset += blockLeft;
        } else {
          // Last block
          System.arraycopy(currentBlock, currentBlockUpto,
                           b, offset,
                           left);
          currentBlockUpto += left;
          break;
        }
      }
    }
    private void nextBlock() {
      currentBlockIndex++;
      currentBlockUpto = 0;
      currentBlock = blocks.get(currentBlockIndex);
    }
  }
  public final class PagedBytesDataOutput extends DataOutput {
    @Override
    public void writeByte(byte b) {
      if (upto == blockSize) {
        if (currentBlock != null) {
          blocks.add(currentBlock);
          blockEnd.add(upto);
        }
        currentBlock = new byte[blockSize];
        upto = 0;
      }
      currentBlock[upto++] = b;
    }
    @Override
    public void writeBytes(byte[] b, int offset, int length) throws IOException {
      if (length == 0) {
        return;
      }
      if (upto == blockSize) {
        if (currentBlock != null) {
          blocks.add(currentBlock);
          blockEnd.add(upto);
        }
        currentBlock = new byte[blockSize];
        upto = 0;
      }
      final int offsetEnd = offset + length;
      while(true) {
        final int left = offsetEnd - offset;
        final int blockLeft = blockSize - upto;
        if (blockLeft < left) {
          System.arraycopy(b, offset, currentBlock, upto, blockLeft);
          blocks.add(currentBlock);
          blockEnd.add(blockSize);
          currentBlock = new byte[blockSize];
          upto = 0;
          offset += blockLeft;
        } else {
          // Last block
          System.arraycopy(b, offset, currentBlock, upto, left);
          upto += left;
          break;
        }
      }
    }
    /** Return the current byte position. */
    public long getPosition() {
      if (currentBlock == null) {
        return 0;
      } else {
        return blocks.size() * blockSize + upto;
      }
    }
  }
  /** Returns a DataInput to read values from this
   *  PagedBytes instance. */
  public PagedBytesDataInput getDataInput() {
    if (!frozen) {
      throw new IllegalStateException("must call freeze() before getDataInput");
    }
    return new PagedBytesDataInput();
  }
  /** Returns a DataOutput that you may use to write into
   *  this PagedBytes instance.  If you do this, you should
   *  not call the other writing methods (eg, copy);
   *  results are undefined. */
  public PagedBytesDataOutput getDataOutput() {
    if (frozen) {
      throw new IllegalStateException("cannot get DataOutput after freeze()");
    }
    return new PagedBytesDataOutput();
  }
 }
--- a/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestTermInfosReaderIndex.java
+++ b/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestTermInfosReaderIndex.java
@ -0,0 +1,193 @@
 package org.apache.lucene.index.codecs.preflex;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Random;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LogMergePolicy;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.index.codecs.CoreCodecProvider;
 import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
 public class TestTermInfosReaderIndex extends LuceneTestCase {
  private static final int NUMBER_OF_DOCUMENTS = 1000;
  private static final int NUMBER_OF_FIELDS = 100;
  private TermInfosReaderIndex index;
  private Directory directory;
  private SegmentTermEnum termEnum;
  private int indexDivisor;
  private int termIndexInterval;
  private IndexReader reader;
  private List<Term> sampleTerms;
  @Override
  public void setUp() throws Exception {
    super.setUp();
    indexDivisor = _TestUtil.nextInt(random, 1, 10);
    directory = newDirectory();
    termIndexInterval = populate(directory);
    IndexReader r0 = IndexReader.open(directory);
    SegmentReader r = (SegmentReader) r0.getSequentialSubReaders()[0];
    String segment = r.getSegmentName();
    r.close();
    FieldInfos fieldInfos = new FieldInfos(directory, IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELD_INFOS_EXTENSION));
    String segmentFileName = IndexFileNames.segmentFileName(segment, "", PreFlexCodec.TERMS_INDEX_EXTENSION);
    long tiiFileLength = directory.fileLength(segmentFileName);
    IndexInput input = directory.openInput(segmentFileName, newIOContext(random));
    termEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, "", PreFlexCodec.TERMS_EXTENSION), newIOContext(random)), fieldInfos, false);
    int totalIndexInterval = termEnum.indexInterval * indexDivisor;
    SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true);
    index = new TermInfosReaderIndex(indexEnum, indexDivisor, tiiFileLength, totalIndexInterval);
    indexEnum.close();
    input.close();
    reader = IndexReader.open(directory);
    sampleTerms = sample(reader,1000);
  }
  @Override
  public void tearDown() throws Exception {
    termEnum.close();
    reader.close();
    directory.close();
    super.tearDown();
  }
  public void testSeekEnum() throws CorruptIndexException, IOException {
    int indexPosition = 3;
    SegmentTermEnum clone = (SegmentTermEnum) termEnum.clone();
    Term term = findTermThatWouldBeAtIndex(clone, indexPosition);
    SegmentTermEnum enumerator = clone;
    index.seekEnum(enumerator, indexPosition);
    assertEquals(term, enumerator.term());
    clone.close();
  }
  public void testCompareTo() throws IOException {
    Term term = new Term("field" + random.nextInt(NUMBER_OF_FIELDS) ,getText());
    for (int i = 0; i < index.length(); i++) {
      Term t = index.getTerm(i);
      int compareTo = term.compareTo(t);
      assertEquals(compareTo, index.compareTo(term, i));
    }
  }
  public void testRandomSearchPerformance() throws CorruptIndexException, IOException {
    IndexSearcher searcher = new IndexSearcher(reader);
    for (Term t : sampleTerms) {
      TermQuery query = new TermQuery(t);
      TopDocs topDocs = searcher.search(query, 10);
      assertTrue(topDocs.totalHits > 0);
    }
    searcher.close();
  }
  private List<Term> sample(IndexReader reader, int size) throws IOException {
    List<Term> sample = new ArrayList<Term>();
    Random random = new Random();
    FieldsEnum fieldsEnum = MultiFields.getFields(reader).iterator();
    String field;
    while((field = fieldsEnum.next()) != null) {
      TermsEnum terms = fieldsEnum.terms();
      while (terms.next() != null) {
        if (sample.size() >= size) {
          int pos = random.nextInt(size);
          sample.set(pos, new Term(field, terms.term()));
        } else {
          sample.add(new Term(field, terms.term()));
        }
      }
    }
    Collections.shuffle(sample);
    return sample;
  }
  private Term findTermThatWouldBeAtIndex(SegmentTermEnum termEnum, int index) throws IOException {
    int termPosition = index * termIndexInterval * indexDivisor;
    for (int i = 0; i < termPosition; i++) {
      if (!termEnum.next()) {
        fail("Should not have run out of terms.");
      }
    }
    return termEnum.term();
  }
  private int populate(Directory directory) throws CorruptIndexException, LockObtainFailedException, IOException {
    IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, 
        new MockAnalyzer(random, MockTokenizer.KEYWORD, false));
    CoreCodecProvider cp = new CoreCodecProvider();
    cp.unregister(cp.lookup("PreFlex"));
    cp.register(new PreFlexRWCodec());
    cp.setDefaultFieldCodec("PreFlex");
    config.setCodecProvider(cp);
    // turn off compound file, this test will open some index files directly.
    LogMergePolicy mp = newLogMergePolicy();
    mp.setUseCompoundFile(false);
    config.setMergePolicy(mp);
    RandomIndexWriter writer = new RandomIndexWriter(random, directory, config);
    for (int i = 0; i < NUMBER_OF_DOCUMENTS; i++) {
      Document document = new Document();
      for (int f = 0; f < NUMBER_OF_FIELDS; f++) {
        document.add(newField("field" + f, getText(), StringField.TYPE_UNSTORED));
      }
      writer.addDocument(document);
    }
    writer.optimize();
    writer.close();
    return config.getTermIndexInterval();
  }
  private String getText() {
    return Long.toString(random.nextLong(),Character.MAX_RADIX);
  }
 }
--- a/lucene/src/test/org/apache/lucene/util/TestPagedBytes.java
+++ b/lucene/src/test/org/apache/lucene/util/TestPagedBytes.java
@ -0,0 +1,64 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.util;
 import java.util.Arrays;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
 public class TestPagedBytes extends LuceneTestCase {
  public void testDataInputOutput() throws Exception {
    for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
      final PagedBytes p = new PagedBytes(_TestUtil.nextInt(random, 1, 20));
      final DataOutput out = p.getDataOutput();
      final int numBytes = random.nextInt(10000000);
      final byte[] answer = new byte[numBytes];
      random.nextBytes(answer);
      int written = 0;
      while(written < numBytes) {
        if (random.nextInt(10) == 7) {
          out.writeByte(answer[written++]);
        } else {
          int chunk = Math.max(random.nextInt(1000), numBytes - written);
          out.writeBytes(answer, written, chunk);
          written += chunk;
        }
      }
      p.freeze(random.nextBoolean());
      final DataInput in = p.getDataInput();
      final byte[] verify = new byte[numBytes];
      int read = 0;
      while(read < numBytes) {
        if (random.nextInt(10) == 7) {
          verify[read++] = in.readByte();
        } else {
          int chunk = Math.max(random.nextInt(1000), numBytes - read);
          in.readBytes(verify, read, chunk);
          read += chunk;
        }
      }
      assertTrue(Arrays.equals(answer, verify));
    }
  }
 }