LUCENE-1212: factor DocumentsWriter into separate source files

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@636458 13f79535-47bb-0310-9956-ffa450edef68
2008-03-12 19:09:12 +00:00 · 2008-03-12 19:09:12 +00:00 · e5f9b4e1cb
parent a6258c76fd
commit e5f9b4e1cb
13 changed files with 2314 additions and 2030 deletions
--- a/src/java/org/apache/lucene/index/AbortException.java
+++ b/src/java/org/apache/lucene/index/AbortException.java
@ -0,0 +1,29 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 // Used only internally to DW to call abort "up the stack"
 class AbortException extends IOException {
  public AbortException(Throwable cause, DocumentsWriter docWriter) {
    super();
    initCause(cause);
    docWriter.setAborting();
  }
 }
--- a/src/java/org/apache/lucene/index/BufferedDeletes.java
+++ b/src/java/org/apache/lucene/index/BufferedDeletes.java
@ -0,0 +1,146 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.util.HashMap;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Iterator;
 import java.util.Map.Entry;
 /** Holds buffered deletes, by docID, term or query.  We
 *  hold two instances of this class: one for the deletes
 *  prior to the last flush, the other for deletes after
 *  the last flush.  This is so if we need to abort
 *  (discard all buffered docs) we can also discard the
 *  buffered deletes yet keep the deletes done during
 *  previously flushed segments. */
 class BufferedDeletes {
  int numTerms;
  HashMap terms = new HashMap();
  HashMap queries = new HashMap();
  List docIDs = new ArrayList();
  // Number of documents a delete term applies to.
  final static class Num {
    private int num;
    Num(int num) {
      this.num = num;
    }
    int getNum() {
      return num;
    }
    void setNum(int num) {
      // Only record the new number if it's greater than the
      // current one.  This is important because if multiple
      // threads are replacing the same doc at nearly the
      // same time, it's possible that one thread that got a
      // higher docID is scheduled before the other
      // threads.
      if (num > this.num)
        this.num = num;
    }
  }
  void update(BufferedDeletes in) {
    numTerms += in.numTerms;
    terms.putAll(in.terms);
    queries.putAll(in.queries);
    docIDs.addAll(in.docIDs);
    in.terms.clear();
    in.numTerms = 0;
    in.queries.clear();
    in.docIDs.clear();
  }
  void clear() {
    terms.clear();
    queries.clear();
    docIDs.clear();
    numTerms = 0;
  }
  boolean any() {
    return terms.size() > 0 || docIDs.size() > 0 || queries.size() > 0;
  }
  // Remaps all buffered deletes based on a completed
  // merge
  synchronized void remap(MergeDocIDRemapper mapper,
                          SegmentInfos infos,
                          int[][] docMaps,
                          int[] delCounts,
                          MergePolicy.OneMerge merge,
                          int mergeDocCount) {
    final HashMap newDeleteTerms;
    // Remap delete-by-term
    if (terms.size() > 0) {
      newDeleteTerms = new HashMap();
      Iterator iter = terms.entrySet().iterator();
      while(iter.hasNext()) {
        Entry entry = (Entry) iter.next();
        Num num = (Num) entry.getValue();
        newDeleteTerms.put(entry.getKey(),
                           new Num(mapper.remap(num.getNum())));
      }
    } else
      newDeleteTerms = null;
    // Remap delete-by-docID
    final List newDeleteDocIDs;
    if (docIDs.size() > 0) {
      newDeleteDocIDs = new ArrayList(docIDs.size());
      Iterator iter = docIDs.iterator();
      while(iter.hasNext()) {
        Integer num = (Integer) iter.next();
        newDeleteDocIDs.add(new Integer(mapper.remap(num.intValue())));
      }
    } else
      newDeleteDocIDs = null;
    // Remap delete-by-query
    final HashMap newDeleteQueries;
    if (queries.size() > 0) {
      newDeleteQueries = new HashMap(queries.size());
      Iterator iter = queries.entrySet().iterator();
      while(iter.hasNext()) {
        Entry entry = (Entry) iter.next();
        Integer num = (Integer) entry.getValue();
        newDeleteQueries.put(entry.getKey(),
                             new Integer(mapper.remap(num.intValue())));
      }
    } else
      newDeleteQueries = null;
    if (newDeleteTerms != null)
      terms = newDeleteTerms;
    if (newDeleteDocIDs != null)
      docIDs = newDeleteDocIDs;
    if (newDeleteQueries != null)
      queries = newDeleteQueries;
  }
 }
--- a/src/java/org/apache/lucene/index/BufferedNorms.java
+++ b/src/java/org/apache/lucene/index/BufferedNorms.java
@ -0,0 +1,60 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.search.Similarity;
 /* Stores norms, buffered in RAM, until they are flushed
 * to a partial segment. */
 final class BufferedNorms {
  RAMOutputStream out;
  int upto;
  private static final byte defaultNorm = Similarity.encodeNorm(1.0f);
  BufferedNorms() {
    out = new RAMOutputStream();
  }
  void add(float norm) throws IOException {
    byte b = Similarity.encodeNorm(norm);
    out.writeByte(b);
    upto++;
  }
  void reset() {
    out.reset();
    upto = 0;
  }
  void fill(int docID) throws IOException {
    // Must now fill in docs that didn't have this
    // field.  Note that this is how norms can consume
    // tremendous storage when the docs have widely
    // varying different fields, because we are not
    // storing the norms sparsely (see LUCENE-830)
    if (upto < docID) {
      DocumentsWriter.fillBytes(out, defaultNorm, docID-upto);
      upto = docID;
    }
  }
 }
--- a/src/java/org/apache/lucene/index/ByteBlockPool.java
+++ b/src/java/org/apache/lucene/index/ByteBlockPool.java
@ -0,0 +1,142 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* Class that Posting and PostingVector use to write byte
 * streams into shared fixed-size byte[] arrays.  The idea
 * is to allocate slices of increasing lengths For
 * example, the first slice is 5 bytes, the next slice is
 * 14, etc.  We start by writing our bytes into the first
 * 5 bytes.  When we hit the end of the slice, we allocate
 * the next slice and then write the address of the new
 * slice into the last 4 bytes of the previous slice (the
 * "forwarding address").
 *
 * Each slice is filled with 0's initially, and we mark
 * the end with a non-zero byte.  This way the methods
 * that are writing into the slice don't need to record
 * its length and instead allocate a new slice once they
 * hit a non-zero byte. */
 import java.util.Arrays;
 final class ByteBlockPool {
  public byte[][] buffers = new byte[10][];
  int bufferUpto = -1;                        // Which buffer we are upto
  public int byteUpto = DocumentsWriter.BYTE_BLOCK_SIZE;             // Where we are in head buffer
  public byte[] buffer;                              // Current head buffer
  public int byteOffset = -DocumentsWriter.BYTE_BLOCK_SIZE;          // Current head offset
  private boolean trackAllocations;
  DocumentsWriter docWriter;
  public ByteBlockPool(DocumentsWriter docWriter, boolean trackAllocations) {
    this.docWriter = docWriter;
    this.trackAllocations = trackAllocations;
  }
  public void reset() {
    if (bufferUpto != -1) {
      // We allocated at least one buffer
      for(int i=0;i<bufferUpto;i++)
        // Fully zero fill buffers that we fully used
        Arrays.fill(buffers[i], (byte) 0);
      // Partial zero fill the final buffer
      Arrays.fill(buffers[bufferUpto], 0, byteUpto, (byte) 0);
      if (bufferUpto > 0)
        // Recycle all but the first buffer
        docWriter.recycleByteBlocks(buffers, 1, 1+bufferUpto);
      // Re-use the first buffer
      bufferUpto = 0;
      byteUpto = 0;
      byteOffset = 0;
      buffer = buffers[0];
    }
  }
  public void nextBuffer() {
    if (1+bufferUpto == buffers.length) {
      byte[][] newBuffers = new byte[(int) (buffers.length*1.5)][];
      System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
      buffers = newBuffers;
    }
    buffer = buffers[1+bufferUpto] = docWriter.getByteBlock(trackAllocations);
    bufferUpto++;
    byteUpto = 0;
    byteOffset += DocumentsWriter.BYTE_BLOCK_SIZE;
  }
  public int newSlice(final int size) {
    if (byteUpto > DocumentsWriter.BYTE_BLOCK_SIZE-size)
      nextBuffer();
    final int upto = byteUpto;
    byteUpto += size;
    buffer[byteUpto-1] = 16;
    return upto;
  }
  // Size of each slice.  These arrays should be at most 16
  // elements.  First array is just a compact way to encode
  // X+1 with a max.  Second array is the length of each
  // slice, ie first slice is 5 bytes, next slice is 14
  // bytes, etc.
  final static int[] nextLevelArray = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
  final static int[] levelSizeArray = {5, 14, 20, 30, 40, 40, 80, 80, 120, 200};
  final static int FIRST_LEVEL_SIZE = levelSizeArray[0];
  public int allocSlice(final byte[] slice, final int upto) {
    final int level = slice[upto] & 15;
    final int newLevel = nextLevelArray[level];
    final int newSize = levelSizeArray[newLevel];
    // Maybe allocate another block
    if (byteUpto > DocumentsWriter.BYTE_BLOCK_SIZE-newSize)
      nextBuffer();
    final int newUpto = byteUpto;
    final int offset = newUpto + byteOffset;
    byteUpto += newSize;
    // Copy forward the past 3 bytes (which we are about
    // to overwrite with the forwarding address):
    buffer[newUpto] = slice[upto-3];
    buffer[newUpto+1] = slice[upto-2];
    buffer[newUpto+2] = slice[upto-1];
    // Write forwarding address at end of last slice:
    slice[upto-3] = (byte) (offset >>> 24);
    slice[upto-2] = (byte) (offset >>> 16);
    slice[upto-1] = (byte) (offset >>> 8);
    slice[upto] = (byte) offset;
    // Write new level:
    buffer[byteUpto-1] = (byte) (16|newLevel);
    return newUpto+3;
  }
 }
--- a/src/java/org/apache/lucene/index/ByteSliceReader.java
+++ b/src/java/org/apache/lucene/index/ByteSliceReader.java
@ -0,0 +1,136 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import java.io.IOException;
 /* IndexInput that knows how to read the byte slices written
 * by Posting and PostingVector.  We read the bytes in
 * each slice until we hit the end of that slice at which
 * point we read the forwarding address of the next slice
 * and then jump to it.*/
 final class ByteSliceReader extends IndexInput {
  ByteBlockPool pool;
  int bufferUpto;
  byte[] buffer;
  public int upto;
  int limit;
  int level;
  public int bufferOffset;
  public int endIndex;
  public void init(ByteBlockPool pool, int startIndex, int endIndex) {
    assert endIndex-startIndex > 0;
    this.pool = pool;
    this.endIndex = endIndex;
    level = 0;
    bufferUpto = startIndex / DocumentsWriter.BYTE_BLOCK_SIZE;
    bufferOffset = bufferUpto * DocumentsWriter.BYTE_BLOCK_SIZE;
    buffer = pool.buffers[bufferUpto];
    upto = startIndex & DocumentsWriter.BYTE_BLOCK_MASK;
    final int firstSize = ByteBlockPool.levelSizeArray[0];
    if (startIndex+firstSize >= endIndex) {
      // There is only this one slice to read
      limit = endIndex & DocumentsWriter.BYTE_BLOCK_MASK;
    } else
      limit = upto+firstSize-4;
  }
  public byte readByte() {
    // Assert that we are not @ EOF
    assert upto + bufferOffset < endIndex;
    if (upto == limit)
      nextSlice();
    return buffer[upto++];
  }
  public long writeTo(IndexOutput out) throws IOException {
    long size = 0;
    while(true) {
      if (limit + bufferOffset == endIndex) {
        assert endIndex - bufferOffset >= upto;
        out.writeBytes(buffer, upto, limit-upto);
        size += limit-upto;
        break;
      } else {
        out.writeBytes(buffer, upto, limit-upto);
        size += limit-upto;
        nextSlice();
      }
    }
    return size;
  }
  public void nextSlice() {
    // Skip to our next slice
    final int nextIndex = ((buffer[limit]&0xff)<<24) + ((buffer[1+limit]&0xff)<<16) + ((buffer[2+limit]&0xff)<<8) + (buffer[3+limit]&0xff);
    level = ByteBlockPool.nextLevelArray[level];
    final int newSize = ByteBlockPool.levelSizeArray[level];
    bufferUpto = nextIndex / DocumentsWriter.BYTE_BLOCK_SIZE;
    bufferOffset = bufferUpto * DocumentsWriter.BYTE_BLOCK_SIZE;
    buffer = pool.buffers[bufferUpto];
    upto = nextIndex & DocumentsWriter.BYTE_BLOCK_MASK;
    if (nextIndex + newSize >= endIndex) {
      // We are advancing to the final slice
      assert endIndex - nextIndex > 0;
      limit = endIndex - bufferOffset;
    } else {
      // This is not the final slice (subtract 4 for the
      // forwarding address at the end of this new slice)
      limit = upto+newSize-4;
    }
  }
  public void readBytes(byte[] b, int offset, int len) {
    while(len > 0) {
      final int numLeft = limit-upto;
      if (numLeft < len) {
        // Read entire slice
        System.arraycopy(buffer, upto, b, offset, numLeft);
        offset += numLeft;
        len -= numLeft;
        nextSlice();
      } else {
        // This slice is the last one
        System.arraycopy(buffer, upto, b, offset, len);
        upto += len;
        break;
      }
    }
  }
  public long getFilePointer() {throw new RuntimeException("not implemented");}
  public long length() {throw new RuntimeException("not implemented");}
  public void seek(long pos) {throw new RuntimeException("not implemented");}
  public void close() {throw new RuntimeException("not implemented");}
 }
--- a/src/java/org/apache/lucene/index/CharBlockPool.java
+++ b/src/java/org/apache/lucene/index/CharBlockPool.java
@ -0,0 +1,56 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 final class CharBlockPool {
  public char[][] buffers = new char[10][];
  int numBuffer;
  int bufferUpto = -1;                        // Which buffer we are upto
  public int byteUpto = DocumentsWriter.CHAR_BLOCK_SIZE;             // Where we are in head buffer
  public char[] buffer;                              // Current head buffer
  public int byteOffset = -DocumentsWriter.CHAR_BLOCK_SIZE;          // Current head offset
  private DocumentsWriter docWriter;
  public CharBlockPool(DocumentsWriter docWriter) {
    this.docWriter = docWriter;
  }
  public void reset() {
    docWriter.recycleCharBlocks(buffers, 1+bufferUpto);
    bufferUpto = -1;
    byteUpto = DocumentsWriter.CHAR_BLOCK_SIZE;
    byteOffset = -DocumentsWriter.CHAR_BLOCK_SIZE;
  }
  public void nextBuffer() {
    if (1+bufferUpto == buffers.length) {
      char[][] newBuffers = new char[(int) (buffers.length*1.5)][];
      System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
      buffers = newBuffers;
    }
    buffer = buffers[1+bufferUpto] = docWriter.getCharBlock();
    bufferUpto++;
    byteUpto = 0;
    byteOffset += DocumentsWriter.CHAR_BLOCK_SIZE;
  }
 }
--- a/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/src/java/org/apache/lucene/index/DocumentsWriter.java
--- a/src/java/org/apache/lucene/index/DocumentsWriterFieldData.java
+++ b/src/java/org/apache/lucene/index/DocumentsWriterFieldData.java
@ -0,0 +1,773 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.store.IndexOutput;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
 /** Used by DocumentsWriter to hold data associated with a
 *  single field in a single ThreadState, including the
 *  Postings hash.  A document may have many occurrences for
 *  a given field name; we gather all such occurrences here
 *  (in docFields) so that we can process the entire field
 *  at once. */
 final class DocumentsWriterFieldData implements Comparable {
  final DocumentsWriterThreadState threadState;
  FieldInfo fieldInfo;
  int fieldCount;
  Fieldable[] docFields = new Fieldable[1];
  int lastGen = -1;
  DocumentsWriterFieldData next;
  boolean doNorms;
  boolean doVectors;
  boolean doVectorPositions;
  boolean doVectorOffsets;
  boolean postingsCompacted;
  int numPostings;
  Posting[] postingsHash;
  int postingsHashSize;
  int postingsHashHalfSize;
  int postingsHashMask;
  int position;
  int length;
  int offset;
  float boost;
  int postingsVectorsUpto;
  public DocumentsWriterFieldData(DocumentsWriterThreadState threadState, FieldInfo fieldInfo) {
    this.fieldInfo = fieldInfo;
    this.threadState = threadState;
  }
  void resetPostingArrays() {
    if (!postingsCompacted)
      compactPostings();
    threadState.docWriter.recyclePostings(this.postingsHash, numPostings);
    Arrays.fill(postingsHash, 0, postingsHash.length, null);
    postingsCompacted = false;
    numPostings = 0;
  }
  void initPostingArrays() {
    // Target hash fill factor of <= 50%
    // NOTE: must be a power of two for hash collision
    // strategy to work correctly
    postingsHashSize = 4;
    postingsHashHalfSize = 2;
    postingsHashMask = postingsHashSize-1;
    postingsHash = new Posting[postingsHashSize];
  }
  public int compareTo(Object o) {
    return fieldInfo.name.compareTo(((DocumentsWriterFieldData) o).fieldInfo.name);
  }
  private void compactPostings() {
    int upto = 0;
    for(int i=0;i<postingsHashSize;i++)
      if (postingsHash[i] != null)
        postingsHash[upto++] = postingsHash[i];
    assert upto == numPostings;
    postingsCompacted = true;
  }
  /** Collapse the hash table & sort in-place. */
  public Posting[] sortPostings() {
    compactPostings();
    threadState.doPostingSort(postingsHash, numPostings);
    return postingsHash;
  }
  /** Process all occurrences of one field in the document. */
  public void processField(Analyzer analyzer) throws IOException, AbortException {
    length = 0;
    position = 0;
    offset = 0;
    boost = threadState.docBoost;
    final int maxFieldLength = threadState.docWriter.writer.getMaxFieldLength();
    final int limit = fieldCount;
    final Fieldable[] docFieldsFinal = docFields;
    boolean doWriteVectors = true;
    // Walk through all occurrences in this doc for this
    // field:
    try {
      for(int j=0;j<limit;j++) {
        Fieldable field = docFieldsFinal[j];
        if (field.isIndexed())
          invertField(field, analyzer, maxFieldLength);
        if (field.isStored()) {
          threadState.numStoredFields++;
          boolean success = false;
          try {
            threadState.localFieldsWriter.writeField(fieldInfo, field);
            success = true;
          } finally {
            // If we hit an exception inside
            // localFieldsWriter.writeField, the
            // contents of fdtLocal can be corrupt, so
            // we must discard all stored fields for
            // this document:
            if (!success)
              threadState.fdtLocal.reset();
          }
        }
        docFieldsFinal[j] = null;
      }
    } catch (AbortException ae) {
      doWriteVectors = false;
      throw ae;
    } finally {
      if (postingsVectorsUpto > 0) {
        try {
          if (doWriteVectors) {
            // Add term vectors for this field
            boolean success = false;
            try {
              writeVectors(fieldInfo);
              success = true;
            } finally {
              if (!success) {
                // If we hit an exception inside
                // writeVectors, the contents of tvfLocal
                // can be corrupt, so we must discard all
                // term vectors for this document:
                threadState.numVectorFields = 0;
                threadState.tvfLocal.reset();
              }
            }
          }
        } finally {
          if (postingsVectorsUpto > threadState.maxPostingsVectors)
            threadState.maxPostingsVectors = postingsVectorsUpto;
          postingsVectorsUpto = 0;
          threadState.vectorsPool.reset();
        }
      }
    }
  }
  int offsetEnd;
  Token localToken = new Token();
  /* Invert one occurrence of one field in the document */
  public void invertField(Fieldable field, Analyzer analyzer, final int maxFieldLength) throws IOException, AbortException {
    if (length>0)
      position += analyzer.getPositionIncrementGap(fieldInfo.name);
    if (!field.isTokenized()) {		  // un-tokenized field
      String stringValue = field.stringValue();
      final int valueLength = stringValue.length();
      Token token = localToken;
      token.clear();
      char[] termBuffer = token.termBuffer();
      if (termBuffer.length < valueLength)
        termBuffer = token.resizeTermBuffer(valueLength);
      stringValue.getChars(0, valueLength, termBuffer, 0);
      token.setTermLength(valueLength);
      token.setStartOffset(offset);
      token.setEndOffset(offset + stringValue.length());
      addPosition(token);
      offset += stringValue.length();
      length++;
    } else {                                  // tokenized field
      final TokenStream stream;
      final TokenStream streamValue = field.tokenStreamValue();
      if (streamValue != null) 
        stream = streamValue;
      else {
        // the field does not have a TokenStream,
        // so we have to obtain one from the analyzer
        final Reader reader;			  // find or make Reader
        final Reader readerValue = field.readerValue();
        if (readerValue != null)
          reader = readerValue;
        else {
          String stringValue = field.stringValue();
          if (stringValue == null)
            throw new IllegalArgumentException("field must have either TokenStream, String or Reader value");
          threadState.stringReader.init(stringValue);
          reader = threadState.stringReader;
        }
        // Tokenize field and add to postingTable
        stream = analyzer.reusableTokenStream(fieldInfo.name, reader);
      }
      // reset the TokenStream to the first token
      stream.reset();
      try {
        offsetEnd = offset-1;
        Token token;
        for(;;) {
          token = stream.next(localToken);
          if (token == null) break;
          position += (token.getPositionIncrement() - 1);
          addPosition(token);
          if (++length >= maxFieldLength) {
            if (threadState.docWriter.infoStream != null)
              threadState.docWriter.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens");
            break;
          }
        }
        offset = offsetEnd+1;
      } finally {
        stream.close();
      }
    }
    boost *= field.getBoost();
  }
  /** Only called when term vectors are enabled.  This
   *  is called the first time we see a given term for
   *  each document, to allocate a PostingVector
   *  instance that is used to record data needed to
   *  write the posting vectors. */
  private PostingVector addNewVector() {
    if (postingsVectorsUpto == threadState.postingsVectors.length) {
      final int newSize;
      if (threadState.postingsVectors.length < 2)
        newSize = 2;
      else
        newSize = (int) (1.5*threadState.postingsVectors.length);
      PostingVector[] newArray = new PostingVector[newSize];
      System.arraycopy(threadState.postingsVectors, 0, newArray, 0, threadState.postingsVectors.length);
      threadState.postingsVectors = newArray;
    }
    p.vector = threadState.postingsVectors[postingsVectorsUpto];
    if (p.vector == null)
      p.vector = threadState.postingsVectors[postingsVectorsUpto] = new PostingVector();
    postingsVectorsUpto++;
    final PostingVector v = p.vector;
    v.p = p;
    if (doVectorPositions) {
      final int upto = threadState.vectorsPool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
      v.posStart = v.posUpto = threadState.vectorsPool.byteOffset + upto;
    }
    if (doVectorOffsets) {
      final int upto = threadState.vectorsPool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
      v.offsetStart = v.offsetUpto = threadState.vectorsPool.byteOffset + upto;
    }
    return v;
  }
  int offsetStartCode;
  int offsetStart;
  // Current posting we are working on
  Posting p;
  PostingVector vector;
  /** Test whether the text for current Posting p equals
   *  current tokenText. */
  boolean postingEquals(final char[] tokenText, final int tokenTextLen) {
    final char[] text = threadState.charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
    assert text != null;
    int pos = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
    int tokenPos = 0;
    for(;tokenPos<tokenTextLen;pos++,tokenPos++)
      if (tokenText[tokenPos] != text[pos])
        return false;
    return 0xffff == text[pos];
  }
  /** This is the hotspot of indexing: it's called once
   *  for every term of every document.  Its job is to *
   *  update the postings byte stream (Postings hash) *
   *  based on the occurence of a single term. */
  private void addPosition(Token token) throws AbortException {
    final Payload payload = token.getPayload();
    // Get the text of this term.  Term can either
    // provide a String token or offset into a char[]
    // array
    final char[] tokenText = token.termBuffer();
    final int tokenTextLen = token.termLength();
    int code = 0;
    // Compute hashcode
    int downto = tokenTextLen;
    while (downto > 0)
      code = (code*31) + tokenText[--downto];
    // System.out.println("  addPosition: buffer=" + new String(tokenText, 0, tokenTextLen) + " pos=" + position + " offsetStart=" + (offset+token.startOffset()) + " offsetEnd=" + (offset + token.endOffset()) + " docID=" + docID + " doPos=" + doVectorPositions + " doOffset=" + doVectorOffsets);
    int hashPos = code & postingsHashMask;
    assert !postingsCompacted;
    // Locate Posting in hash
    p = postingsHash[hashPos];
    if (p != null && !postingEquals(tokenText, tokenTextLen)) {
      // Conflict: keep searching different locations in
      // the hash table.
      final int inc = ((code>>8)+code)|1;
      do {
        code += inc;
        hashPos = code & postingsHashMask;
        p = postingsHash[hashPos];
      } while (p != null && !postingEquals(tokenText, tokenTextLen));
    }
    final int proxCode;
    // If we hit an exception below, it's possible the
    // posting list or term vectors data will be
    // partially written and thus inconsistent if
    // flushed, so we have to abort all documents
    // since the last flush:
    try {
      if (p != null) {       // term seen since last flush
        if (threadState.docID != p.lastDocID) { // term not yet seen in this doc
          // System.out.println("    seen before (new docID=" + docID + ") freqUpto=" + p.freqUpto +" proxUpto=" + p.proxUpto);
          assert p.docFreq > 0;
          // Now that we know doc freq for previous doc,
          // write it & lastDocCode
          freqUpto = p.freqUpto & DocumentsWriter.BYTE_BLOCK_MASK;
          freq = threadState.postingsPool.buffers[p.freqUpto >> DocumentsWriter.BYTE_BLOCK_SHIFT];
          if (1 == p.docFreq)
            writeFreqVInt(p.lastDocCode|1);
          else {
            writeFreqVInt(p.lastDocCode);
            writeFreqVInt(p.docFreq);
          }
          p.freqUpto = freqUpto + (p.freqUpto & DocumentsWriter.BYTE_BLOCK_NOT_MASK);
          if (doVectors) {
            vector = addNewVector();
            if (doVectorOffsets) {
              offsetStartCode = offsetStart = offset + token.startOffset();
              offsetEnd = offset + token.endOffset();
            }
          }
          proxCode = position;
          p.docFreq = 1;
          // Store code so we can write this after we're
          // done with this new doc
          p.lastDocCode = (threadState.docID-p.lastDocID) << 1;
          p.lastDocID = threadState.docID;
        } else {                                // term already seen in this doc
          // System.out.println("    seen before (same docID=" + docID + ") proxUpto=" + p.proxUpto);
          p.docFreq++;
          proxCode = position-p.lastPosition;
          if (doVectors) {
            vector = p.vector;
            if (vector == null)
              vector = addNewVector();
            if (doVectorOffsets) {
              offsetStart = offset + token.startOffset();
              offsetEnd = offset + token.endOffset();
              offsetStartCode = offsetStart-vector.lastOffset;
            }
          }
        }
      } else {					  // term not seen before
        // System.out.println("    never seen docID=" + docID);
        // Refill?
        if (0 == threadState.postingsFreeCount) {
          threadState.docWriter.getPostings(threadState.postingsFreeList);
          threadState.postingsFreeCount = threadState.postingsFreeList.length;
        }
        final int textLen1 = 1+tokenTextLen;
        if (textLen1 + threadState.charPool.byteUpto > DocumentsWriter.CHAR_BLOCK_SIZE) {
          if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE) {
            // Just skip this term, to remain as robust as
            // possible during indexing.  A TokenFilter
            // can be inserted into the analyzer chain if
            // other behavior is wanted (pruning the term
            // to a prefix, throwing an exception, etc).
            if (threadState.maxTermPrefix == null)
              threadState.maxTermPrefix = new String(tokenText, 0, 30);
            // Still increment position:
            position++;
            return;
          }
          threadState.charPool.nextBuffer();
        }
        final char[] text = threadState.charPool.buffer;
        final int textUpto = threadState.charPool.byteUpto;
        // Pull next free Posting from free list
        p = threadState.postingsFreeList[--threadState.postingsFreeCount];
        p.textStart = textUpto + threadState.charPool.byteOffset;
        threadState.charPool.byteUpto += textLen1;
        System.arraycopy(tokenText, 0, text, textUpto, tokenTextLen);
        text[textUpto+tokenTextLen] = 0xffff;
        assert postingsHash[hashPos] == null;
        postingsHash[hashPos] = p;
        numPostings++;
        if (numPostings == postingsHashHalfSize)
          rehashPostings(2*postingsHashSize);
        // Init first slice for freq & prox streams
        final int upto1 = threadState.postingsPool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
        p.freqStart = p.freqUpto = threadState.postingsPool.byteOffset + upto1;
        final int upto2 = threadState.postingsPool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
        p.proxStart = p.proxUpto = threadState.postingsPool.byteOffset + upto2;
        p.lastDocCode = threadState.docID << 1;
        p.lastDocID = threadState.docID;
        p.docFreq = 1;
        if (doVectors) {
          vector = addNewVector();
          if (doVectorOffsets) {
            offsetStart = offsetStartCode = offset + token.startOffset();
            offsetEnd = offset + token.endOffset();
          }
        }
        proxCode = position;
      }
      proxUpto = p.proxUpto & DocumentsWriter.BYTE_BLOCK_MASK;
      prox = threadState.postingsPool.buffers[p.proxUpto >> DocumentsWriter.BYTE_BLOCK_SHIFT];
      assert prox != null;
      if (payload != null && payload.length > 0) {
        writeProxVInt((proxCode<<1)|1);
        writeProxVInt(payload.length);
        writeProxBytes(payload.data, payload.offset, payload.length);
        fieldInfo.storePayloads = true;
      } else
        writeProxVInt(proxCode<<1);
      p.proxUpto = proxUpto + (p.proxUpto & DocumentsWriter.BYTE_BLOCK_NOT_MASK);
      p.lastPosition = position++;
      if (doVectorPositions) {
        posUpto = vector.posUpto & DocumentsWriter.BYTE_BLOCK_MASK;
        pos = threadState.vectorsPool.buffers[vector.posUpto >> DocumentsWriter.BYTE_BLOCK_SHIFT];
        writePosVInt(proxCode);
        vector.posUpto = posUpto + (vector.posUpto & DocumentsWriter.BYTE_BLOCK_NOT_MASK);
      }
      if (doVectorOffsets) {
        offsetUpto = vector.offsetUpto & DocumentsWriter.BYTE_BLOCK_MASK;
        offsets = threadState.vectorsPool.buffers[vector.offsetUpto >> DocumentsWriter.BYTE_BLOCK_SHIFT];
        writeOffsetVInt(offsetStartCode);
        writeOffsetVInt(offsetEnd-offsetStart);
        vector.lastOffset = offsetEnd;
        vector.offsetUpto = offsetUpto + (vector.offsetUpto & DocumentsWriter.BYTE_BLOCK_NOT_MASK);
      }
    } catch (Throwable t) {
      throw new AbortException(t, threadState.docWriter);
    }
  }
  /** Write vInt into freq stream of current Posting */
  public void writeFreqVInt(int i) {
    while ((i & ~0x7F) != 0) {
      writeFreqByte((byte)((i & 0x7f) | 0x80));
      i >>>= 7;
    }
    writeFreqByte((byte) i);
  }
  /** Write vInt into prox stream of current Posting */
  public void writeProxVInt(int i) {
    while ((i & ~0x7F) != 0) {
      writeProxByte((byte)((i & 0x7f) | 0x80));
      i >>>= 7;
    }
    writeProxByte((byte) i);
  }
  /** Write byte into freq stream of current Posting */
  byte[] freq;
  int freqUpto;
  public void writeFreqByte(byte b) {
    assert freq != null;
    if (freq[freqUpto] != 0) {
      freqUpto = threadState.postingsPool.allocSlice(freq, freqUpto);
      freq = threadState.postingsPool.buffer;
      p.freqUpto = threadState.postingsPool.byteOffset;
    }
    freq[freqUpto++] = b;
  }
  /** Write byte into prox stream of current Posting */
  byte[] prox;
  int proxUpto;
  public void writeProxByte(byte b) {
    assert prox != null;
    if (prox[proxUpto] != 0) {
      proxUpto = threadState.postingsPool.allocSlice(prox, proxUpto);
      prox = threadState.postingsPool.buffer;
      p.proxUpto = threadState.postingsPool.byteOffset;
      assert prox != null;
    }
    prox[proxUpto++] = b;
    assert proxUpto != prox.length;
  }
  /** Currently only used to copy a payload into the prox
   *  stream. */
  public void writeProxBytes(byte[] b, int offset, int len) {
    final int offsetEnd = offset + len;
    while(offset < offsetEnd) {
      if (prox[proxUpto] != 0) {
        // End marker
        proxUpto = threadState.postingsPool.allocSlice(prox, proxUpto);
        prox = threadState.postingsPool.buffer;
        p.proxUpto = threadState.postingsPool.byteOffset;
      }
      prox[proxUpto++] = b[offset++];
      assert proxUpto != prox.length;
    }
  }
  /** Write vInt into offsets stream of current
   *  PostingVector */
  public void writeOffsetVInt(int i) {
    while ((i & ~0x7F) != 0) {
      writeOffsetByte((byte)((i & 0x7f) | 0x80));
      i >>>= 7;
    }
    writeOffsetByte((byte) i);
  }
  byte[] offsets;
  int offsetUpto;
  /** Write byte into offsets stream of current
   *  PostingVector */
  public void writeOffsetByte(byte b) {
    assert offsets != null;
    if (offsets[offsetUpto] != 0) {
      offsetUpto = threadState.vectorsPool.allocSlice(offsets, offsetUpto);
      offsets = threadState.vectorsPool.buffer;
      vector.offsetUpto = threadState.vectorsPool.byteOffset;
    }
    offsets[offsetUpto++] = b;
  }
  /** Write vInt into pos stream of current
   *  PostingVector */
  public void writePosVInt(int i) {
    while ((i & ~0x7F) != 0) {
      writePosByte((byte)((i & 0x7f) | 0x80));
      i >>>= 7;
    }
    writePosByte((byte) i);
  }
  byte[] pos;
  int posUpto;
  /** Write byte into pos stream of current
   *  PostingVector */
  public void writePosByte(byte b) {
    assert pos != null;
    if (pos[posUpto] != 0) {
      posUpto = threadState.vectorsPool.allocSlice(pos, posUpto);
      pos = threadState.vectorsPool.buffer;
      vector.posUpto = threadState.vectorsPool.byteOffset;
    }
    pos[posUpto++] = b;
  }
  /** Called when postings hash is too small (> 50%
   *  occupied) or too large (< 20% occupied). */
  void rehashPostings(final int newSize) {
    final int newMask = newSize-1;
    Posting[] newHash = new Posting[newSize];
    for(int i=0;i<postingsHashSize;i++) {
      Posting p0 = postingsHash[i];
      if (p0 != null) {
        final int start = p0.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
        final char[] text = threadState.charPool.buffers[p0.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
        int pos = start;
        while(text[pos] != 0xffff)
          pos++;
        int code = 0;
        while (pos > start)
          code = (code*31) + text[--pos];
        int hashPos = code & newMask;
        assert hashPos >= 0;
        if (newHash[hashPos] != null) {
          final int inc = ((code>>8)+code)|1;
          do {
            code += inc;
            hashPos = code & newMask;
          } while (newHash[hashPos] != null);
        }
        newHash[hashPos] = p0;
      }
    }
    postingsHashMask =  newMask;
    postingsHash = newHash;
    postingsHashSize = newSize;
    postingsHashHalfSize = newSize >> 1;
  }
  final ByteSliceReader vectorSliceReader = new ByteSliceReader();
  /** Called once per field per document if term vectors
   *  are enabled, to write the vectors to *
   *  RAMOutputStream, which is then quickly flushed to
   *  * the real term vectors files in the Directory. */
  void writeVectors(FieldInfo fieldInfo) throws IOException {
    assert fieldInfo.storeTermVector;
    assert threadState.vectorFieldsInOrder(fieldInfo);
    threadState.vectorFieldNumbers[threadState.numVectorFields] = fieldInfo.number;
    threadState.vectorFieldPointers[threadState.numVectorFields] = threadState.tvfLocal.getFilePointer();
    threadState.numVectorFields++;
    final int numPostingsVectors = postingsVectorsUpto;
    final PostingVector[] postingsVectors = threadState.postingsVectors;
    final IndexOutput tvfLocal = threadState.tvfLocal;
    threadState.tvfLocal.writeVInt(numPostingsVectors);
    byte bits = 0x0;
    if (doVectorPositions)
      bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
    if (doVectorOffsets) 
      bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
    threadState.tvfLocal.writeByte(bits);
    threadState.doVectorSort(postingsVectors, numPostingsVectors);
    Posting lastPosting = null;
    final ByteSliceReader reader = vectorSliceReader;
    final char[][] charBuffers = threadState.charPool.buffers;
    for(int j=0;j<numPostingsVectors;j++) {
      final PostingVector vector = postingsVectors[j];
      Posting posting = vector.p;
      final int freq = posting.docFreq;
      final int prefix;
      final char[] text2 = charBuffers[posting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
      final int start2 = posting.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
      int pos2 = start2;
      // Compute common prefix between last term and
      // this term
      if (lastPosting == null)
        prefix = 0;
      else {
        final char[] text1 = charBuffers[lastPosting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
        final int start1 = lastPosting.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
        int pos1 = start1;
        while(true) {
          final char c1 = text1[pos1];
          final char c2 = text2[pos2];
          if (c1 != c2 || c1 == 0xffff) {
            prefix = pos1-start1;
            break;
          }
          pos1++;
          pos2++;
        }
      }
      lastPosting = posting;
      // Compute length
      while(text2[pos2] != 0xffff)
        pos2++;
      final int suffix = pos2 - start2 - prefix;
      tvfLocal.writeVInt(prefix);
      tvfLocal.writeVInt(suffix);
      tvfLocal.writeChars(text2, start2 + prefix, suffix);
      tvfLocal.writeVInt(freq);
      if (doVectorPositions) {
        reader.init(threadState.vectorsPool, vector.posStart, vector.posUpto);
        reader.writeTo(tvfLocal);
      }
      if (doVectorOffsets) {
        reader.init(threadState.vectorsPool, vector.offsetStart, vector.offsetUpto);
        reader.writeTo(tvfLocal);
      }
    }
  }
 }
--- a/src/java/org/apache/lucene/index/DocumentsWriterFieldMergeState.java
+++ b/src/java/org/apache/lucene/index/DocumentsWriterFieldMergeState.java
@ -0,0 +1,89 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 /** Used by DocumentsWriter to merge the postings from
 *  multiple ThreadStates when creating a segment */
 final class DocumentsWriterFieldMergeState {
  DocumentsWriterFieldData field;
  Posting[] postings;
  private Posting p;
  char[] text;
  int textOffset;
  private int postingUpto = -1;
  ByteSliceReader freq = new ByteSliceReader();
  ByteSliceReader prox = new ByteSliceReader();
  int docID;
  int termFreq;
  boolean nextTerm() throws IOException {
    postingUpto++;
    if (postingUpto == field.numPostings)
      return false;
    p = postings[postingUpto];
    docID = 0;
    text = field.threadState.charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
    textOffset = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
    if (p.freqUpto > p.freqStart)
      freq.init(field.threadState.postingsPool, p.freqStart, p.freqUpto);
    else
      freq.bufferOffset = freq.upto = freq.endIndex = 0;
    prox.init(field.threadState.postingsPool, p.proxStart, p.proxUpto);
    // Should always be true
    boolean result = nextDoc();
    assert result;
    return true;
  }
  public boolean nextDoc() throws IOException {
    if (freq.bufferOffset + freq.upto == freq.endIndex) {
      if (p.lastDocCode != -1) {
        // Return last doc
        docID = p.lastDocID;
        termFreq = p.docFreq;
        p.lastDocCode = -1;
        return true;
      } else 
        // EOF
        return false;
    }
    final int code = freq.readVInt();
    docID += code >>> 1;
    if ((code & 1) != 0)
      termFreq = 1;
    else
      termFreq = freq.readVInt();
    return true;
  }
 }
--- a/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java
+++ b/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java
@ -0,0 +1,719 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.List;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.analysis.Analyzer;
 /** Used by DocumentsWriter to maintain per-thread state.
 *  We keep a separate Posting hash and other state for each
 *  thread and then merge postings hashes from all threads
 *  when writing the segment. */
 final class DocumentsWriterThreadState {
  Posting[] postingsFreeList;           // Free Posting instances
  int postingsFreeCount;
  RAMOutputStream tvfLocal = new RAMOutputStream();    // Term vectors for one doc
  RAMOutputStream fdtLocal = new RAMOutputStream();    // Stored fields for one doc
  FieldsWriter localFieldsWriter;       // Fields for one doc
  long[] vectorFieldPointers;
  int[] vectorFieldNumbers;
  boolean isIdle = true;                // Whether we are in use
  int numThreads = 1;                   // Number of threads that use this instance
  int docID;                            // docID we are now working on
  int numStoredFields;                  // How many stored fields in current doc
  float docBoost;                       // Boost for current doc
  DocumentsWriterFieldData[] fieldDataArray;           // Fields touched by current doc
  int numFieldData;                     // How many fields in current doc
  int numVectorFields;                  // How many vector fields in current doc
  DocumentsWriterFieldData[] allFieldDataArray = new DocumentsWriterFieldData[10]; // All FieldData instances
  int numAllFieldData;
  DocumentsWriterFieldData[] fieldDataHash;            // Hash FieldData instances by field name
  int fieldDataHashMask;
  String maxTermPrefix;                 // Non-null prefix of a too-large term if this
  // doc has one
  boolean doFlushAfter;
  final DocumentsWriter docWriter;
  final ByteBlockPool postingsPool;
  final ByteBlockPool vectorsPool;
  final CharBlockPool charPool;
  public DocumentsWriterThreadState(DocumentsWriter docWriter) {
    this.docWriter = docWriter;
    fieldDataArray = new DocumentsWriterFieldData[8];
    fieldDataHash = new DocumentsWriterFieldData[16];
    fieldDataHashMask = 15;
    vectorFieldPointers = new long[10];
    vectorFieldNumbers = new int[10];
    postingsFreeList = new Posting[256];
    postingsFreeCount = 0;
    postingsPool = new ByteBlockPool(docWriter ,true);
    vectorsPool = new ByteBlockPool(docWriter, false);
    charPool = new CharBlockPool(docWriter);
  }
  /** Clear the postings hash and return objects back to
   *  shared pool */
  public void resetPostings() throws IOException {
    fieldGen = 0;
    maxPostingsVectors = 0;
    doFlushAfter = false;
    if (localFieldsWriter != null) {
      localFieldsWriter.close();
      localFieldsWriter = null;
    }
    postingsPool.reset();
    charPool.reset();
    docWriter.recyclePostings(postingsFreeList, postingsFreeCount);
    postingsFreeCount = 0;
    for(int i=0;i<numAllFieldData;i++) {
      DocumentsWriterFieldData fp = allFieldDataArray[i];
      fp.lastGen = -1;
      if (fp.numPostings > 0)
        fp.resetPostingArrays();
    }
  }
  /** Move all per-document state that was accumulated in
   *  the ThreadState into the "real" stores. */
  public void writeDocument() throws IOException, AbortException {
    // If we hit an exception while appending to the
    // stored fields or term vectors files, we have to
    // abort all documents since we last flushed because
    // it means those files are possibly inconsistent.
    try {
      docWriter.numDocsInStore++;
      // Append stored fields to the real FieldsWriter:
      docWriter.fieldsWriter.flushDocument(numStoredFields, fdtLocal);
      fdtLocal.reset();
      // Append term vectors to the real outputs:
      final IndexOutput tvx = docWriter.tvx;
      final IndexOutput tvd = docWriter.tvd;
      final IndexOutput tvf = docWriter.tvf;
      if (tvx != null) {
        tvx.writeLong(tvd.getFilePointer());
        tvx.writeLong(tvf.getFilePointer());
        tvd.writeVInt(numVectorFields);
        if (numVectorFields > 0) {
          for(int i=0;i<numVectorFields;i++)
            tvd.writeVInt(vectorFieldNumbers[i]);
          assert 0 == vectorFieldPointers[0];
          long lastPos = vectorFieldPointers[0];
          for(int i=1;i<numVectorFields;i++) {
            long pos = vectorFieldPointers[i];
            tvd.writeVLong(pos-lastPos);
            lastPos = pos;
          }
          tvfLocal.writeTo(tvf);
          tvfLocal.reset();
        }
      }
      // Append norms for the fields we saw:
      for(int i=0;i<numFieldData;i++) {
        DocumentsWriterFieldData fp = fieldDataArray[i];
        if (fp.doNorms) {
          BufferedNorms bn = docWriter.norms[fp.fieldInfo.number];
          assert bn != null;
          assert bn.upto <= docID;
          bn.fill(docID);
          float norm = fp.boost * docWriter.writer.getSimilarity().lengthNorm(fp.fieldInfo.name, fp.length);
          bn.add(norm);
        }
      }
    } catch (Throwable t) {
      // Forcefully idle this threadstate -- its state will
      // be reset by abort()
      isIdle = true;
      throw new AbortException(t, docWriter);
    }
    if (docWriter.bufferIsFull && !docWriter.flushPending) {
      docWriter.flushPending = true;
      doFlushAfter = true;
    }
  }
  int fieldGen;
  /** Initializes shared state for this new document */
  void init(Document doc, int docID) throws IOException, AbortException {
    assert !isIdle;
    assert docWriter.writer.testPoint("DocumentsWriter.ThreadState.init start");
    this.docID = docID;
    docBoost = doc.getBoost();
    numStoredFields = 0;
    numFieldData = 0;
    numVectorFields = 0;
    maxTermPrefix = null;
    assert 0 == fdtLocal.length();
    assert 0 == fdtLocal.getFilePointer();
    assert 0 == tvfLocal.length();
    assert 0 == tvfLocal.getFilePointer();
    final int thisFieldGen = fieldGen++;
    List docFields = doc.getFields();
    final int numDocFields = docFields.size();
    boolean docHasVectors = false;
    // Absorb any new fields first seen in this document.
    // Also absorb any changes to fields we had already
    // seen before (eg suddenly turning on norms or
    // vectors, etc.):
    for(int i=0;i<numDocFields;i++) {
      Fieldable field = (Fieldable) docFields.get(i);
      FieldInfo fi = docWriter.fieldInfos.add(field.name(), field.isIndexed(), field.isTermVectorStored(),
                                              field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
                                              field.getOmitNorms(), false);
      if (fi.isIndexed && !fi.omitNorms) {
        // Maybe grow our buffered norms
        if (docWriter.norms.length <= fi.number) {
          int newSize = (int) ((1+fi.number)*1.25);
          BufferedNorms[] newNorms = new BufferedNorms[newSize];
          System.arraycopy(docWriter.norms, 0, newNorms, 0, docWriter.norms.length);
          docWriter.norms = newNorms;
        }
        if (docWriter.norms[fi.number] == null)
          docWriter.norms[fi.number] = new BufferedNorms();
        docWriter.hasNorms = true;
      }
      // Make sure we have a FieldData allocated
      int hashPos = fi.name.hashCode() & fieldDataHashMask;
      DocumentsWriterFieldData fp = fieldDataHash[hashPos];
      while(fp != null && !fp.fieldInfo.name.equals(fi.name))
        fp = fp.next;
      if (fp == null) {
        fp = new DocumentsWriterFieldData(this, fi);
        fp.next = fieldDataHash[hashPos];
        fieldDataHash[hashPos] = fp;
        if (numAllFieldData == allFieldDataArray.length) {
          int newSize = (int) (allFieldDataArray.length*1.5);
          int newHashSize = fieldDataHash.length*2;
          DocumentsWriterFieldData newArray[] = new DocumentsWriterFieldData[newSize];
          DocumentsWriterFieldData newHashArray[] = new DocumentsWriterFieldData[newHashSize];
          System.arraycopy(allFieldDataArray, 0, newArray, 0, numAllFieldData);
          // Rehash
          fieldDataHashMask = newSize-1;
          for(int j=0;j<fieldDataHash.length;j++) {
            DocumentsWriterFieldData fp0 = fieldDataHash[j];
            while(fp0 != null) {
              hashPos = fp0.fieldInfo.name.hashCode() & fieldDataHashMask;
              DocumentsWriterFieldData nextFP0 = fp0.next;
              fp0.next = newHashArray[hashPos];
              newHashArray[hashPos] = fp0;
              fp0 = nextFP0;
            }
          }
          allFieldDataArray = newArray;
          fieldDataHash = newHashArray;
        }
        allFieldDataArray[numAllFieldData++] = fp;
      } else {
        assert fp.fieldInfo == fi;
      }
      if (thisFieldGen != fp.lastGen) {
        // First time we're seeing this field for this doc
        fp.lastGen = thisFieldGen;
        fp.fieldCount = 0;
        fp.doVectors = fp.doVectorPositions = fp.doVectorOffsets = false;
        fp.doNorms = fi.isIndexed && !fi.omitNorms;
        if (numFieldData == fieldDataArray.length) {
          int newSize = fieldDataArray.length*2;
          DocumentsWriterFieldData newArray[] = new DocumentsWriterFieldData[newSize];
          System.arraycopy(fieldDataArray, 0, newArray, 0, numFieldData);
          fieldDataArray = newArray;
        }
        fieldDataArray[numFieldData++] = fp;
      }
      if (field.isTermVectorStored()) {
        if (!fp.doVectors && numVectorFields++ == vectorFieldPointers.length) {
          final int newSize = (int) (numVectorFields*1.5);
          vectorFieldPointers = new long[newSize];
          vectorFieldNumbers = new int[newSize];
        }
        fp.doVectors = true;
        docHasVectors = true;
        fp.doVectorPositions |= field.isStorePositionWithTermVector();
        fp.doVectorOffsets |= field.isStoreOffsetWithTermVector();
      }
      if (fp.fieldCount == fp.docFields.length) {
        Fieldable[] newArray = new Fieldable[fp.docFields.length*2];
        System.arraycopy(fp.docFields, 0, newArray, 0, fp.docFields.length);
        fp.docFields = newArray;
      }
      // Lazily allocate arrays for postings:
      if (field.isIndexed() && fp.postingsHash == null)
        fp.initPostingArrays();
      fp.docFields[fp.fieldCount++] = field;
    }
    // Maybe init the local & global fieldsWriter
    if (localFieldsWriter == null) {
      if (docWriter.fieldsWriter == null) {
        assert docWriter.docStoreSegment == null;
        assert docWriter.segment != null;
        docWriter.docStoreSegment = docWriter.segment;
        // If we hit an exception while init'ing the
        // fieldsWriter, we must abort this segment
        // because those files will be in an unknown
        // state:
        try {
          docWriter.fieldsWriter = new FieldsWriter(docWriter.directory, docWriter.docStoreSegment, docWriter.fieldInfos);
        } catch (Throwable t) {
          throw new AbortException(t, docWriter);
        }
        docWriter.files = null;
      }
      localFieldsWriter = new FieldsWriter(null, fdtLocal, docWriter.fieldInfos);
    }
    // First time we see a doc that has field(s) with
    // stored vectors, we init our tvx writer
    if (docHasVectors) {
      if (docWriter.tvx == null) {
        assert docWriter.docStoreSegment != null;
        // If we hit an exception while init'ing the term
        // vector output files, we must abort this segment
        // because those files will be in an unknown
        // state:
        try {
          docWriter.tvx = docWriter.directory.createOutput(docWriter.docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
          docWriter.tvx.writeInt(TermVectorsReader.FORMAT_VERSION2);
          docWriter.tvd = docWriter.directory.createOutput(docWriter.docStoreSegment +  "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
          docWriter.tvd.writeInt(TermVectorsReader.FORMAT_VERSION2);
          docWriter.tvf = docWriter.directory.createOutput(docWriter.docStoreSegment +  "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
          docWriter.tvf.writeInt(TermVectorsReader.FORMAT_VERSION2);
          // We must "catch up" for all docs before us
          // that had no vectors:
          for(int i=0;i<docWriter.numDocsInStore;i++) {
            docWriter.tvx.writeLong(docWriter.tvd.getFilePointer());
            docWriter.tvd.writeVInt(0);
            docWriter.tvx.writeLong(0);
          }
        } catch (Throwable t) {
          throw new AbortException(t, docWriter);
        }
        docWriter.files = null;
      }
      numVectorFields = 0;
    }
  }
  /** Do in-place sort of Posting array */
  void doPostingSort(Posting[] postings, int numPosting) {
    quickSort(postings, 0, numPosting-1);
  }
  void quickSort(Posting[] postings, int lo, int hi) {
    if (lo >= hi)
      return;
    else if (hi == 1+lo) {
      if (comparePostings(postings[lo], postings[hi]) > 0) {
        final Posting tmp = postings[lo];
        postings[lo] = postings[hi];
        postings[hi] = tmp;
      }
      return;
    }
    int mid = (lo + hi) >>> 1;
    if (comparePostings(postings[lo], postings[mid]) > 0) {
      Posting tmp = postings[lo];
      postings[lo] = postings[mid];
      postings[mid] = tmp;
    }
    if (comparePostings(postings[mid], postings[hi]) > 0) {
      Posting tmp = postings[mid];
      postings[mid] = postings[hi];
      postings[hi] = tmp;
      if (comparePostings(postings[lo], postings[mid]) > 0) {
        Posting tmp2 = postings[lo];
        postings[lo] = postings[mid];
        postings[mid] = tmp2;
      }
    }
    int left = lo + 1;
    int right = hi - 1;
    if (left >= right)
      return;
    Posting partition = postings[mid];
    for (; ;) {
      while (comparePostings(postings[right], partition) > 0)
        --right;
      while (left < right && comparePostings(postings[left], partition) <= 0)
        ++left;
      if (left < right) {
        Posting tmp = postings[left];
        postings[left] = postings[right];
        postings[right] = tmp;
        --right;
      } else {
        break;
      }
    }
    quickSort(postings, lo, left);
    quickSort(postings, left + 1, hi);
  }
  /** Do in-place sort of PostingVector array */
  void doVectorSort(PostingVector[] postings, int numPosting) {
    quickSort(postings, 0, numPosting-1);
  }
  void quickSort(PostingVector[] postings, int lo, int hi) {
    if (lo >= hi)
      return;
    else if (hi == 1+lo) {
      if (comparePostings(postings[lo].p, postings[hi].p) > 0) {
        final PostingVector tmp = postings[lo];
        postings[lo] = postings[hi];
        postings[hi] = tmp;
      }
      return;
    }
    int mid = (lo + hi) >>> 1;
    if (comparePostings(postings[lo].p, postings[mid].p) > 0) {
      PostingVector tmp = postings[lo];
      postings[lo] = postings[mid];
      postings[mid] = tmp;
    }
    if (comparePostings(postings[mid].p, postings[hi].p) > 0) {
      PostingVector tmp = postings[mid];
      postings[mid] = postings[hi];
      postings[hi] = tmp;
      if (comparePostings(postings[lo].p, postings[mid].p) > 0) {
        PostingVector tmp2 = postings[lo];
        postings[lo] = postings[mid];
        postings[mid] = tmp2;
      }
    }
    int left = lo + 1;
    int right = hi - 1;
    if (left >= right)
      return;
    PostingVector partition = postings[mid];
    for (; ;) {
      while (comparePostings(postings[right].p, partition.p) > 0)
        --right;
      while (left < right && comparePostings(postings[left].p, partition.p) <= 0)
        ++left;
      if (left < right) {
        PostingVector tmp = postings[left];
        postings[left] = postings[right];
        postings[right] = tmp;
        --right;
      } else {
        break;
      }
    }
    quickSort(postings, lo, left);
    quickSort(postings, left + 1, hi);
  }
  void quickSort(DocumentsWriterFieldData[] array, int lo, int hi) {
    if (lo >= hi)
      return;
    else if (hi == 1+lo) {
      if (array[lo].compareTo(array[hi]) > 0) {
        final DocumentsWriterFieldData tmp = array[lo];
        array[lo] = array[hi];
        array[hi] = tmp;
      }
      return;
    }
    int mid = (lo + hi) >>> 1;
    if (array[lo].compareTo(array[mid]) > 0) {
      DocumentsWriterFieldData tmp = array[lo];
      array[lo] = array[mid];
      array[mid] = tmp;
    }
    if (array[mid].compareTo(array[hi]) > 0) {
      DocumentsWriterFieldData tmp = array[mid];
      array[mid] = array[hi];
      array[hi] = tmp;
      if (array[lo].compareTo(array[mid]) > 0) {
        DocumentsWriterFieldData tmp2 = array[lo];
        array[lo] = array[mid];
        array[mid] = tmp2;
      }
    }
    int left = lo + 1;
    int right = hi - 1;
    if (left >= right)
      return;
    DocumentsWriterFieldData partition = array[mid];
    for (; ;) {
      while (array[right].compareTo(partition) > 0)
        --right;
      while (left < right && array[left].compareTo(partition) <= 0)
        ++left;
      if (left < right) {
        DocumentsWriterFieldData tmp = array[left];
        array[left] = array[right];
        array[right] = tmp;
        --right;
      } else {
        break;
      }
    }
    quickSort(array, lo, left);
    quickSort(array, left + 1, hi);
  }
  /** If there are fields we've seen but did not see again
   *  in the last run, then free them up.  Also reduce
   *  postings hash size. */
  void trimFields() {
    int upto = 0;
    for(int i=0;i<numAllFieldData;i++) {
      DocumentsWriterFieldData fp = allFieldDataArray[i];
      if (fp.lastGen == -1) {
        // This field was not seen since the previous
        // flush, so, free up its resources now
        // Unhash
        final int hashPos = fp.fieldInfo.name.hashCode() & fieldDataHashMask;
        DocumentsWriterFieldData last = null;
        DocumentsWriterFieldData fp0 = fieldDataHash[hashPos];
        while(fp0 != fp) {
          last = fp0;
          fp0 = fp0.next;
        }
        assert fp0 != null;
        if (last == null)
          fieldDataHash[hashPos] = fp.next;
        else
          last.next = fp.next;
        if (docWriter.infoStream != null)
          docWriter.infoStream.println("  remove field=" + fp.fieldInfo.name);
      } else {
        // Reset
        fp.lastGen = -1;
        allFieldDataArray[upto++] = fp;
        if (fp.numPostings > 0 && ((float) fp.numPostings) / fp.postingsHashSize < 0.2) {
          int hashSize = fp.postingsHashSize;
          // Reduce hash so it's between 25-50% full
          while (fp.numPostings < (hashSize>>1) && hashSize >= 2)
            hashSize >>= 1;
          hashSize <<= 1;
          if (hashSize != fp.postingsHash.length)
            fp.rehashPostings(hashSize);
        }
      }
    }
    // If we didn't see any norms for this field since
    // last flush, free it
    for(int i=0;i<docWriter.norms.length;i++) {
      BufferedNorms n = docWriter.norms[i];
      if (n != null && n.upto == 0)
        docWriter.norms[i] = null;
    }
    numAllFieldData = upto;
    // Also pare back PostingsVectors if it's excessively
    // large
    if (maxPostingsVectors * 1.5 < postingsVectors.length) {
      final int newSize;
      if (0 == maxPostingsVectors)
        newSize = 1;
      else
        newSize = (int) (1.5*maxPostingsVectors);
      PostingVector[] newArray = new PostingVector[newSize];
      System.arraycopy(postingsVectors, 0, newArray, 0, newSize);
      postingsVectors = newArray;
    }
  }
  /** Tokenizes the fields of a document into Postings */
  void processDocument(Analyzer analyzer)
    throws IOException, AbortException {
    final int numFields = numFieldData;
    assert clearLastVectorFieldName();
    assert 0 == fdtLocal.length();
    if (docWriter.tvx != null)
      // If we are writing vectors then we must visit
      // fields in sorted order so they are written in
      // sorted order.  TODO: we actually only need to
      // sort the subset of fields that have vectors
      // enabled; we could save [small amount of] CPU
      // here.
      quickSort(fieldDataArray, 0, numFields-1);
    // We process the document one field at a time
    for(int i=0;i<numFields;i++)
      fieldDataArray[i].processField(analyzer);
    if (docWriter.infoStream != null && maxTermPrefix != null)
      docWriter.infoStream.println("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + maxTermPrefix + "...'"); 
  }
  // USE ONLY FOR DEBUGGING!
  /*
    public String getPostingText() {
    char[] text = charPool.buffers[p.textStart >> CHAR_BLOCK_SHIFT];
    int upto = p.textStart & CHAR_BLOCK_MASK;
    while(text[upto] != 0xffff)
    upto++;
    return new String(text, p.textStart, upto-(p.textStart & BYTE_BLOCK_MASK));
    }
  */
  /** Compares term text for two Posting instance and
   *  returns -1 if p1 < p2; 1 if p1 > p2; else 0.
   */
  int comparePostings(Posting p1, Posting p2) {
    if (p1 == p2)
      return 0;
    final char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
    int pos1 = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
    final char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
    int pos2 = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
    while(true) {
      final char c1 = text1[pos1++];
      final char c2 = text2[pos2++];
      if (c1 < c2)
        if (0xffff == c2)
          return 1;
        else
          return -1;
      else if (c2 < c1)
        if (0xffff == c1)
          return -1;
        else
          return 1;
      else if (0xffff == c1)
        return 0;
    }
  }
  String lastVectorFieldName;
  // Called only by assert
  final boolean clearLastVectorFieldName() {
    lastVectorFieldName = null;
    return true;
  }
  // Called only by assert
  final boolean vectorFieldsInOrder(FieldInfo fi) {
    try {
      if (lastVectorFieldName != null)
        return lastVectorFieldName.compareTo(fi.name) < 0;
      else
        return true;
    } finally {
      lastVectorFieldName = fi.name;
    }
  }
  PostingVector[] postingsVectors = new PostingVector[1];
  int maxPostingsVectors;
  // Used to read a string value for a field
  ReusableStringReader stringReader = new ReusableStringReader();
 }
--- a/src/java/org/apache/lucene/index/Posting.java
+++ b/src/java/org/apache/lucene/index/Posting.java
@ -0,0 +1,34 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* Used by DocumentsWriter to track postings for a single
 * term.  One of these exists per unique term seen since the
 * last flush. */
 final class Posting {
  int textStart;                                  // Address into char[] blocks where our text is stored
  int docFreq;                                    // # times this term occurs in the current doc
  int freqStart;                                  // Address of first byte[] slice for freq
  int freqUpto;                                   // Next write address for freq
  int proxStart;                                  // Address of first byte[] slice
  int proxUpto;                                   // Next write address for prox
  int lastDocID;                                  // Last docID where this term occurred
  int lastDocCode;                                // Code for prior doc
  int lastPosition;                               // Last position where this term occurred
  PostingVector vector;                           // Corresponding PostingVector instance
 }
--- a/src/java/org/apache/lucene/index/PostingVector.java
+++ b/src/java/org/apache/lucene/index/PostingVector.java
@ -0,0 +1,30 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* Used by DocumentsWriter to track data for term vectors.
 * One of these exists per unique term seen in each field in
 * the document. */
 class PostingVector {
  Posting p;                                      // Corresponding Posting instance for this term
  int lastOffset;                                 // Last offset we saw
  int offsetStart;                                // Address of first slice for offsets
  int offsetUpto;                                 // Next write address for offsets
  int posStart;                                   // Address of first slice for positions
  int posUpto;                                    // Next write address for positions
 }
--- a/src/java/org/apache/lucene/index/ReusableStringReader.java
+++ b/src/java/org/apache/lucene/index/ReusableStringReader.java
@ -0,0 +1,55 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.Reader;
 /** Used by DocumentsWriter to implemented a StringReader
 *  that can be reset to a new string; we use this when
 *  tokenizing the string value from a Field. */
 final class ReusableStringReader extends Reader {
  int upto;
  int left;
  String s;
  void init(String s) {
    this.s = s;
    left = s.length();
    this.upto = 0;
  }
  public int read(char[] c) {
    return read(c, 0, c.length);
  }
  public int read(char[] c, int off, int len) {
    if (left > len) {
      s.getChars(upto, upto+len, c, off);
      upto += len;
      left -= len;
      return len;
    } else if (0 == left) {
      return -1;
    } else {
      s.getChars(upto, upto+left, c, off);
      int r = left;
      left = 0;
      upto = s.length();
      return r;
    }
  }
  public void close() {};
 }