LUCENE-2373 Create a Codec to work with streaming and append-only filesystems.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@962694 13f79535-47bb-0310-9956-ffa450edef68
2010-07-09 21:06:24 +00:00 · 2010-07-09 21:06:24 +00:00 · b2eb10239e
parent c5bc95a357
commit b2eb10239e
21 changed files with 953 additions and 70 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -189,6 +189,10 @@ New features
 * LUCENE-1810: Added FieldSelectorResult.LATENT to not cache lazy loaded fields
  (Tim Smith, Grant Ingersoll)

+* LUCENE-2373: Extend CodecProvider to use SegmentInfosWriter and
+  SegmentInfosReader to allow customization of SegmentInfos data.
+  (Andrzej Bialecki)
+
 Optimizations

 * LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@ -15,6 +15,11 @@ New Features
    pages from the buffer cache, since fadvise/madvise do not seem.
    (Michael McCandless)

+  * LUCENE-2373: Added a Codec implementation that works with append-only
+    filesystems (such as e.g. Hadoop DFS). SegmentInfos writing/reading
+    code is refactored to support append-only FS, and to allow for future
+    customization of per-segment information. (Andrzej Bialecki)
+
 ======================= Lucene 3.x (not yet released) =======================

 Changes in backwards compatibility policy
--- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java
@ -0,0 +1,140 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
+import org.apache.lucene.index.codecs.standard.StandardCodec;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReaderImpl;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriterImpl;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This codec extends {@link StandardCodec} to work on append-only outputs, such
+ * as plain output streams and append-only filesystems.
+ *
+ * <p>Note: compound file format feature is not compatible with
+ * this codec.  You must call both
+ * LogMergePolicy.setUseCompoundFile(false) and
+ * LogMergePolicy.setUseCompoundDocStore(false) to disable
+ * compound file format.</p>
+ * @lucene.experimental
+ */
+public class AppendingCodec extends Codec {
+  public static String CODEC_NAME = "Appending";
+  
+  public AppendingCodec() {
+    name = CODEC_NAME;
+  }
+
+  @Override
+  public FieldsConsumer fieldsConsumer(SegmentWriteState state)
+          throws IOException {
+    StandardPostingsWriter docsWriter = new StandardPostingsWriterImpl(state);
+    boolean success = false;
+    AppendingTermsIndexWriter indexWriter = null;
+    try {
+      indexWriter = new AppendingTermsIndexWriter(state);
+      success = true;
+    } finally {
+      if (!success) {
+        docsWriter.close();
+      }
+    }
+    success = false;
+    try {
+      FieldsConsumer ret = new AppendingTermsDictWriter(indexWriter, state, docsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          docsWriter.close();
+        } finally {
+          indexWriter.close();
+        }
+      }
+    }
+  }
+
+  @Override
+  public FieldsProducer fieldsProducer(SegmentReadState state)
+          throws IOException {
+    StandardPostingsReader docsReader = new StandardPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize);
+    StandardTermsIndexReader indexReader;
+
+    boolean success = false;
+    try {
+      indexReader = new AppendingTermsIndexReader(state.dir,
+              state.fieldInfos,
+              state.segmentInfo.name,
+              state.termsIndexDivisor,
+              BytesRef.getUTF8SortedAsUnicodeComparator());
+      success = true;
+    } finally {
+      if (!success) {
+        docsReader.close();
+      }
+    }
+    success = false;
+    try {
+      FieldsProducer ret = new AppendingTermsDictReader(indexReader,
+              state.dir, state.fieldInfos, state.segmentInfo.name,
+              docsReader,
+              state.readBufferSize,
+              BytesRef.getUTF8SortedAsUnicodeComparator(),
+              StandardCodec.TERMS_CACHE_SIZE);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          docsReader.close();
+        } finally {
+          indexReader.close();
+        }
+      }
+    }
+  }
+
+  @Override
+  public void files(Directory dir, SegmentInfo segmentInfo, Set<String> files)
+          throws IOException {
+    StandardPostingsReaderImpl.files(dir, segmentInfo, files);
+    StandardTermsDictReader.files(dir, segmentInfo, files);
+    SimpleStandardTermsIndexReader.files(dir, segmentInfo, files);
+  }
+
+  @Override
+  public void getExtensions(Set<String> extensions) {
+    StandardCodec.getStandardExtensions(extensions);
+  }
+}
--- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosReader.java
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosReader.java
@ -0,0 +1,41 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.codecs.DefaultSegmentInfosReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
+public class AppendingSegmentInfosReader extends DefaultSegmentInfosReader {
+
+  @Override
+  public void finalizeInput(IndexInput input) throws IOException,
+          CorruptIndexException {
+    input.close();
+  }
+
+  @Override
+  public IndexInput openInput(Directory dir, String segmentsFileName)
+          throws IOException {
+    return dir.openInput(segmentsFileName);
+  }
+
+}
--- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosWriter.java
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosWriter.java
@ -0,0 +1,44 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+
+public class AppendingSegmentInfosWriter extends DefaultSegmentInfosWriter {
+
+  @Override
+  protected IndexOutput createOutput(Directory dir, String segmentsFileName)
+          throws IOException {
+    return dir.createOutput(segmentsFileName);
+  }
+
+  @Override
+  public void finishCommit(IndexOutput out) throws IOException {
+    out.close();
+  }
+
+  @Override
+  public void prepareCommit(IndexOutput segmentOutput) throws IOException {
+    // noop
+  }
+
+}
--- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java
@ -0,0 +1,55 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+
+public class AppendingTermsDictReader extends StandardTermsDictReader {
+
+  public AppendingTermsDictReader(StandardTermsIndexReader indexReader,
+          Directory dir, FieldInfos fieldInfos, String segment,
+          StandardPostingsReader postingsReader, int readBufferSize,
+          Comparator<BytesRef> termComp, int termsCacheSize) throws IOException {
+    super(indexReader, dir, fieldInfos, segment, postingsReader, readBufferSize,
+            termComp, termsCacheSize);
+  }
+  
+  @Override
+  protected void readHeader(IndexInput in) throws IOException {
+    CodecUtil.checkHeader(in, AppendingTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT);    
+  }
+
+  @Override
+  protected void seekDir(IndexInput in, long dirOffset) throws IOException {
+    in.seek(in.length() - Long.SIZE / 8);
+    long offset = in.readLong();
+    in.seek(offset);
+  }
+
+}
--- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java
@ -0,0 +1,49 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexWriter;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+
+public class AppendingTermsDictWriter extends StandardTermsDictWriter {
+  final static String CODEC_NAME = "APPENDING_TERMS_DICT";
+
+  public AppendingTermsDictWriter(StandardTermsIndexWriter indexWriter,
+          SegmentWriteState state, StandardPostingsWriter postingsWriter,
+          Comparator<BytesRef> termComp) throws IOException {
+    super(indexWriter, state, postingsWriter, termComp);
+  }
+  
+  @Override
+  protected void writeHeader(IndexOutput out) throws IOException {
+    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); 
+  }
+
+  @Override
+  protected void writeTrailer(long dirStart) throws IOException {
+    out.writeLong(dirStart);
+  }
+}
--- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexReader.java
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexReader.java
@ -0,0 +1,49 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+
+public class AppendingTermsIndexReader extends SimpleStandardTermsIndexReader {
+
+  public AppendingTermsIndexReader(Directory dir, FieldInfos fieldInfos,
+          String segment, int indexDivisor, Comparator<BytesRef> termComp)
+          throws IOException {
+    super(dir, fieldInfos, segment, indexDivisor, termComp);
+  }
+  
+  @Override
+  protected void readHeader(IndexInput input) throws IOException {
+    CodecUtil.checkHeader(input, AppendingTermsIndexWriter.CODEC_NAME, AppendingTermsIndexWriter.VERSION_START);    
+  }
+
+  @Override
+  protected void seekDir(IndexInput input, long dirOffset) throws IOException {
+    input.seek(input.length() - Long.SIZE / 8);
+    long offset = input.readLong();
+    input.seek(offset);
+  }
+}
--- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexWriter.java
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexWriter.java
@ -0,0 +1,45 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexWriter;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.CodecUtil;
+
+public class AppendingTermsIndexWriter extends SimpleStandardTermsIndexWriter {
+  final static String CODEC_NAME = "APPENDING_TERMS_INDEX";
+  final static int VERSION_START = 0;
+  final static int VERSION_CURRENT = VERSION_START;
+
+  public AppendingTermsIndexWriter(SegmentWriteState state) throws IOException {
+    super(state);
+  }
+  
+  @Override
+  protected void writeHeader(IndexOutput out) throws IOException {
+    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);    
+  }
+
+  @Override
+  protected void writeTrailer(long dirStart) throws IOException {
+    out.writeLong(dirStart);
+  }
+}
--- a/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java
+++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java
@ -0,0 +1,170 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.SegmentInfosReader;
+import org.apache.lucene.index.codecs.SegmentInfosWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestAppendingCodec extends LuceneTestCase {
+  
+  static class AppendingCodecProvider extends CodecProvider {
+    Codec appending = new AppendingCodec();
+    SegmentInfosWriter infosWriter = new AppendingSegmentInfosWriter();
+    SegmentInfosReader infosReader = new AppendingSegmentInfosReader();
+    
+    @Override
+    public Codec lookup(String name) {
+      return appending;
+    }
+    @Override
+    public Codec getWriter(SegmentWriteState state) {
+      return appending;
+    }
+    @Override
+    public SegmentInfosReader getSegmentInfosReader() {
+      return infosReader;
+    }
+    @Override
+    public SegmentInfosWriter getSegmentInfosWriter() {
+      return infosWriter;
+    }
+    
+  }
+  
+  private static class AppendingIndexOutputWrapper extends IndexOutput {
+    IndexOutput wrapped;
+    
+    public AppendingIndexOutputWrapper(IndexOutput wrapped) {
+      this.wrapped = wrapped;
+    }
+
+    @Override
+    public void close() throws IOException {
+      wrapped.close();
+    }
+
+    @Override
+    public void flush() throws IOException {
+      wrapped.flush();
+    }
+
+    @Override
+    public long getFilePointer() {
+      return wrapped.getFilePointer();
+    }
+
+    @Override
+    public long length() throws IOException {
+      return wrapped.length();
+    }
+
+    @Override
+    public void seek(long pos) throws IOException {
+      throw new UnsupportedOperationException("seek() is unsupported");
+    }
+
+    @Override
+    public void writeByte(byte b) throws IOException {
+      wrapped.writeByte(b);
+    }
+
+    @Override
+    public void writeBytes(byte[] b, int offset, int length) throws IOException {
+      wrapped.writeBytes(b, offset, length);
+    }
+    
+  }
+  
+  @SuppressWarnings("serial")
+  private static class AppendingRAMDirectory extends RAMDirectory {
+
+    @Override
+    public IndexOutput createOutput(String name) throws IOException {
+      return new AppendingIndexOutputWrapper(super.createOutput(name));
+    }
+    
+  }
+  
+  private static final String text = "the quick brown fox jumped over the lazy dog";
+
+  public void testCodec() throws Exception {
+    Directory dir = new AppendingRAMDirectory();
+    IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer());
+    
+    cfg.setCodecProvider(new AppendingCodecProvider());
+    ((LogMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false);
+    ((LogMergePolicy)cfg.getMergePolicy()).setUseCompoundDocStore(false);
+    IndexWriter writer = new IndexWriter(dir, cfg);
+    Document doc = new Document();
+    doc.add(new Field("f", text, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
+    writer.addDocument(doc);
+    writer.commit();
+    writer.addDocument(doc);
+    writer.optimize();
+    writer.close();
+    IndexReader reader = IndexReader.open(dir, null, true, 1, new AppendingCodecProvider());
+    assertEquals(2, reader.numDocs());
+    doc = reader.document(0);
+    assertEquals(text, doc.get("f"));
+    Fields fields = MultiFields.getFields(reader);
+    Terms terms = fields.terms("f");
+    assertNotNull(terms);
+    TermsEnum te = terms.iterator();
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("quick")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("brown")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("fox")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("jumped")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("over")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("lazy")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("dog")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("the")));
+    DocsEnum de = te.docs(null, null);
+    assertTrue(de.advance(0) != DocsEnum.NO_MORE_DOCS);
+    assertEquals(2, de.freq());
+    assertTrue(de.advance(1) != DocsEnum.NO_MORE_DOCS);
+    assertTrue(de.advance(2) == DocsEnum.NO_MORE_DOCS);
+    reader.close();
+  }
+}
--- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
@ -133,12 +133,14 @@ public final class SegmentInfo {
  /**
   * Construct a new SegmentInfo instance by reading a
   * previously saved SegmentInfo from input.
+   * <p>Note: this is public only to allow access from
+   * the codecs package.</p>
   *
   * @param dir directory to load from
   * @param format format of the segments info file
   * @param input input handle to read segment info from
   */
-  SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
+  public SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
    this.dir = dir;
    name = input.readString();
    docCount = input.readInt();
@ -373,7 +375,7 @@ public final class SegmentInfo {
  }
  
  /** Save this segment's info. */
-  void write(IndexOutput output)
+  public void write(IndexOutput output)
    throws IOException {
    assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name;
    output.writeString(name);
--- a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
@ -20,10 +20,10 @@ package org.apache.lucene.index;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.ChecksumIndexOutput;
-import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.NoSuchDirectoryException;
 import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.SegmentInfosReader;
+import org.apache.lucene.index.codecs.SegmentInfosWriter;
 import org.apache.lucene.util.ThreadInterruptedException;

 import java.io.FileNotFoundException;
@ -65,7 +65,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
  public static final int FORMAT_4_0 = -10;

  /* This must always point to the most recent file format. */
-  static final int CURRENT_FORMAT = FORMAT_4_0;
+  public static final int CURRENT_FORMAT = FORMAT_4_0;
  
  public int counter = 0;    // used to name new segments
  
@ -73,14 +73,16 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
   * counts how often the index has been changed by adding or deleting docs.
   * starting with the current time in milliseconds forces to create unique version numbers.
   */
-  private long version = System.currentTimeMillis();
+  public long version = System.currentTimeMillis();

  private long generation = 0;     // generation of the "segments_N" for the next commit
  private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
                                   // or wrote; this is normally the same as generation except if
                                   // there was an IOException that had interrupted a commit

-  private Map<String,String> userData = Collections.<String,String>emptyMap();       // Opaque Map<String, String> that user can specify during IndexWriter.commit
+  public Map<String,String> userData = Collections.<String,String>emptyMap();       // Opaque Map<String, String> that user can specify during IndexWriter.commit
+  
+  private CodecProvider codecs;

  /**
   * If non-null, information about loading segments_N files
@ -88,6 +90,14 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
   */
  private static PrintStream infoStream;
  
+  public SegmentInfos() {
+    this(CodecProvider.getDefault());
+  }
+  
+  public SegmentInfos(CodecProvider codecs) {
+    this.codecs = codecs;
+  }
+
  public final SegmentInfo info(int i) {
    return get(i);
  }
@ -205,42 +215,22 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
   */
  public final void read(Directory directory, String segmentFileName, 
                         CodecProvider codecs) throws CorruptIndexException, IOException {
+    this.codecs = codecs;
    boolean success = false;

    // Clear any previous segments:
    clear();

-    ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName));
-
    generation = generationFromSegmentsFileName(segmentFileName);

    lastGeneration = generation;

    try {
-      int format = input.readInt();
-
-      // check that it is a format we can understand
-      if (format < CURRENT_FORMAT)
-        throw new CorruptIndexException("Unknown (newer than us?) format version: " + format);
-
-      version = input.readLong(); // read version
-      counter = input.readInt(); // read counter
-      
-      for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
-        add(new SegmentInfo(directory, format, input, codecs));
-      }
-      
-      userData = input.readStringStringMap();
-
-      final long checksumNow = input.getChecksum();
-      final long checksumThen = input.readLong();
-      if (checksumNow != checksumThen)
-        throw new CorruptIndexException("checksum mismatch in segments file");
-
+      SegmentInfosReader infosReader = codecs.getSegmentInfosReader();
+      infosReader.read(directory, segmentFileName, codecs, this);
      success = true;
    }
    finally {
-      input.close();
      if (!success) {
        // Clear any segment infos we had loaded so we
        // have a clean slate on retry:
@ -261,6 +251,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
  
  public final void read(Directory directory, final CodecProvider codecs) throws CorruptIndexException, IOException {
    generation = lastGeneration = -1;
+    this.codecs = codecs;

    new FindSegmentsFile(directory) {

@ -274,7 +265,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {

  // Only non-null after prepareCommit has been called and
  // before finishCommit is called
-  ChecksumIndexOutput pendingSegnOutput;
+  IndexOutput pendingSegnOutput;

  private void write(Directory directory) throws IOException {

@ -287,21 +278,14 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
      generation++;
    }

-    ChecksumIndexOutput segnOutput = new ChecksumIndexOutput(directory.createOutput(segmentFileName));
+    IndexOutput segnOutput = null;

    boolean success = false;

    try {
-      segnOutput.writeInt(CURRENT_FORMAT); // write FORMAT
-      segnOutput.writeLong(++version); // every write changes
-                                   // the index
-      segnOutput.writeInt(counter); // write counter
-      segnOutput.writeInt(size()); // write infos
-      for (SegmentInfo si : this) {
-        si.write(segnOutput);
-      }
-      segnOutput.writeStringStringMap(userData);
-      segnOutput.prepareCommit();
+      SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
+      segnOutput = infosWriter.writeInfos(directory, segmentFileName, this);
+      infosWriter.prepareCommit(segnOutput);
      success = true;
      pendingSegnOutput = segnOutput;
    } finally {
@ -785,8 +769,8 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
      throw new IllegalStateException("prepareCommit was not called");
    boolean success = false;
    try {
-      pendingSegnOutput.finishCommit();
-      pendingSegnOutput.close();
+      SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
+      infosWriter.finishCommit(pendingSegnOutput);
      pendingSegnOutput = null;
      success = true;
    } finally {
--- a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
@ -38,6 +38,8 @@ import org.apache.lucene.index.codecs.standard.StandardCodec;
 *  @lucene.experimental */

 public abstract class CodecProvider {
+  private SegmentInfosWriter infosWriter = new DefaultSegmentInfosWriter();
+  private SegmentInfosReader infosReader = new DefaultSegmentInfosReader();

  private final HashMap<String, Codec> codecs = new HashMap<String, Codec>();

@ -73,6 +75,14 @@ public abstract class CodecProvider {

  public abstract Codec getWriter(SegmentWriteState state);
  
+  public SegmentInfosWriter getSegmentInfosWriter() {
+    return infosWriter;
+  }
+  
+  public SegmentInfosReader getSegmentInfosReader() {
+    return infosReader;
+  }
+
  static private final CodecProvider defaultCodecs = new DefaultCodecProvider();

  public static CodecProvider getDefault() {
--- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java
@ -0,0 +1,80 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * Default implementation of {@link SegmentInfosReader}.
+ * @lucene.experimental
+ */
+public class DefaultSegmentInfosReader extends SegmentInfosReader {
+
+  @Override
+  public void read(Directory directory, String segmentsFileName, CodecProvider codecs,
+          SegmentInfos infos) throws IOException {
+    IndexInput input = null;
+    try {
+      input = openInput(directory, segmentsFileName);
+      int format = input.readInt();
+  
+      // check that it is a format we can understand
+      if (format < SegmentInfos.CURRENT_FORMAT)
+        throw new CorruptIndexException("Unknown (newer than us?) format version: " + format);
+  
+      infos.version = input.readLong(); // read version
+      infos.counter = input.readInt(); // read counter
+  
+      for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
+        infos.add(new SegmentInfo(directory, format, input, codecs));
+      }
+      
+      infos.userData = input.readStringStringMap();
+      finalizeInput(input);
+      
+    } finally {
+      if (input != null) {
+        input.close();
+      }
+    }
+
+  }
+  
+  public IndexInput openInput(Directory dir, String segmentsFileName) throws IOException {
+    IndexInput in = dir.openInput(segmentsFileName);
+    return new ChecksumIndexInput(in);
+    
+  }
+  
+  public void finalizeInput(IndexInput input) throws IOException, CorruptIndexException {
+    ChecksumIndexInput cksumInput = (ChecksumIndexInput)input;
+    final long checksumNow = cksumInput.getChecksum();
+    final long checksumThen = cksumInput.readLong();
+    if (checksumNow != checksumThen)
+      throw new CorruptIndexException("checksum mismatch in segments file");
+    
+  }
+
+}
--- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
@ -0,0 +1,67 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.ChecksumIndexOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * Default implementation of {@link SegmentInfosWriter}.
+ * @lucene.experimental
+ */
+public class DefaultSegmentInfosWriter extends SegmentInfosWriter {
+
+  @Override
+  public IndexOutput writeInfos(Directory dir, String segmentFileName, SegmentInfos infos)
+          throws IOException {
+    IndexOutput out = createOutput(dir, segmentFileName);
+    out.writeInt(SegmentInfos.CURRENT_FORMAT); // write FORMAT
+    out.writeLong(++infos.version); // every write changes
+                                 // the index
+    out.writeInt(infos.counter); // write counter
+    out.writeInt(infos.size()); // write infos
+    for (SegmentInfo si : infos) {
+      si.write(out);
+    }
+    out.writeStringStringMap(infos.getUserData());
+    return out;
+  }
+  
+  protected IndexOutput createOutput(Directory dir, String segmentFileName)
+      throws IOException {
+    IndexOutput plainOut = dir.createOutput(segmentFileName);
+    ChecksumIndexOutput out = new ChecksumIndexOutput(plainOut);
+    return out;
+  }
+
+  @Override
+  public void prepareCommit(IndexOutput segmentOutput) throws IOException {
+    ((ChecksumIndexOutput)segmentOutput).prepareCommit();
+  }
+
+  @Override
+  public void finishCommit(IndexOutput out) throws IOException {
+    ((ChecksumIndexOutput)out).finishCommit();
+    out.close();
+  }
+}
--- a/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosReader.java
@ -0,0 +1,40 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.Directory;
+
+/**
+ * Specifies an API for classes that can read {@link SegmentInfos} information.
+ * @lucene.experimental
+ */
+public abstract class SegmentInfosReader {
+
+  /**
+   * Read {@link SegmentInfos} data from a directory.
+   * @param directory directory to read from
+   * @param segmentsFileName name of the "segments_N" file
+   * @param codecs current codecs
+   * @param infos empty instance to be populated with data
+   * @throws IOException
+   */
+  public abstract void read(Directory directory, String segmentsFileName, CodecProvider codecs, SegmentInfos infos) throws IOException;
+}
--- a/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosWriter.java
@ -0,0 +1,63 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * Specifies an API for classes that can write out {@link SegmentInfos} data.
+ * @lucene.experimental
+ */
+public abstract class SegmentInfosWriter {
+
+  /**
+   * Write {@link SegmentInfos} data without closing the output. The returned
+   * output will become finished only after a successful completion of
+   * "two phase commit" that first calls {@link #prepareCommit(IndexOutput)} and
+   * then {@link #finishCommit(IndexOutput)}.
+   * @param dir directory to write data to
+   * @param segmentsFileName name of the "segments_N" file to create
+   * @param infos data to write
+   * @return an instance of {@link IndexOutput} to be used in subsequent "two
+   * phase commit" operations as described above.
+   * @throws IOException
+   */
+  public abstract IndexOutput writeInfos(Directory dir, String segmentsFileName, SegmentInfos infos) throws IOException;
+  
+  /**
+   * First phase of the two-phase commit - ensure that all output can be
+   * successfully written out.
+   * @param out an instance of {@link IndexOutput} returned from a previous
+   * call to {@link #writeInfos(Directory, String, SegmentInfos)}.
+   * @throws IOException
+   */
+  public abstract void prepareCommit(IndexOutput out) throws IOException;
+  
+  /**
+   * Second phase of the two-phase commit. In this step the output should be
+   * finalized and closed.
+   * @param out an instance of {@link IndexOutput} returned from a previous
+   * call to {@link #writeInfos(Directory, String, SegmentInfos)}.
+   * @throws IOException
+   */
+  public abstract void finishCommit(IndexOutput out) throws IOException;
+}
--- a/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
@ -87,6 +87,9 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {

  final HashMap<FieldInfo,FieldIndexReader> fields = new HashMap<FieldInfo,FieldIndexReader>();
  
+  // start of the field info data
+  protected long dirOffset;
+
  public SimpleStandardTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, Comparator<BytesRef> termComp)
    throws IOException {

@ -97,10 +100,8 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
    boolean success = false;

    try {
-      CodecUtil.checkHeader(in, SimpleStandardTermsIndexWriter.CODEC_NAME, SimpleStandardTermsIndexWriter.VERSION_START);
-
-      final long dirOffset = in.readLong();
      
+      readHeader(in);
      indexInterval = in.readInt();
      this.indexDivisor = indexDivisor;

@ -111,9 +112,9 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
        totalIndexInterval = indexInterval * indexDivisor;
      }
      
-      // Read directory
-      in.seek(dirOffset);
+      seekDir(in, dirOffset);

+      // Read directory
      final int numFields = in.readInt();

      for(int i=0;i<numFields;i++) {
@ -144,6 +145,11 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
    }
  }
  
+  protected void readHeader(IndexInput input) throws IOException {
+    CodecUtil.checkHeader(input, SimpleStandardTermsIndexWriter.CODEC_NAME, SimpleStandardTermsIndexWriter.VERSION_START);
+    dirOffset = input.readLong();
+  }
+
  private final class FieldIndexReader extends FieldReader {

    final private FieldInfo fieldInfo;
@ -445,4 +451,8 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
      termBytesReader.close();
    }
  }
+
+  protected void seekDir(IndexInput input, long dirOffset) throws IOException {
+    input.seek(dirOffset);
+  }
 }
--- a/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java
@ -33,7 +33,7 @@ import java.io.IOException;

 /** @lucene.experimental */
 public class SimpleStandardTermsIndexWriter extends StandardTermsIndexWriter {
-  final private IndexOutput out;
+  protected final IndexOutput out;

  final static String CODEC_NAME = "SIMPLE_STANDARD_TERMS_INDEX";
  final static int VERSION_START = 0;
@ -50,12 +50,15 @@ public class SimpleStandardTermsIndexWriter extends StandardTermsIndexWriter {
    state.flushedFiles.add(indexFileName);
    termIndexInterval = state.termIndexInterval;
    out = state.directory.createOutput(indexFileName);
-    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
    fieldInfos = state.fieldInfos;
+    writeHeader(out);
+    out.writeInt(termIndexInterval);
+  }
  
+  protected void writeHeader(IndexOutput out) throws IOException {
+    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
    // Placeholder for dir offset
    out.writeLong(0);
-    out.writeInt(termIndexInterval);
  }

  @Override
@ -179,8 +182,12 @@ public class SimpleStandardTermsIndexWriter extends StandardTermsIndexWriter {
      out.writeLong(field.packedIndexStart);
      out.writeLong(field.packedOffsetsStart);
    }
-    out.seek(CodecUtil.headerLength(CODEC_NAME));
-    out.writeLong(dirStart);
+    writeTrailer(dirStart);
    out.close();
  }
+
+  protected void writeTrailer(long dirStart) throws IOException {
+    out.seek(CodecUtil.headerLength(CODEC_NAME));
+    out.writeLong(dirStart);
+  }
 }
--- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
@ -72,6 +72,9 @@ public class StandardTermsDictReader extends FieldsProducer {
  // Reads the terms index
  private StandardTermsIndexReader indexReader;
  
+  // keeps the dirStart offset
+  protected long dirOffset;
+
  // Used as key for the terms cache
  private static class FieldAndTerm extends DoubleBarrelLRUCache.CloneableKey {
    String field;
@ -116,15 +119,13 @@ public class StandardTermsDictReader extends FieldsProducer {

    boolean success = false;
    try {
-      CodecUtil.checkHeader(in, StandardTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT);
-
-      final long dirOffset = in.readLong();
+      readHeader(in);

      // Have PostingsReader init itself
      postingsReader.init(in);

      // Read per-field details
-      in.seek(dirOffset);
+      seekDir(in, dirOffset);

      final int numFields = in.readInt();

@ -151,6 +152,16 @@ public class StandardTermsDictReader extends FieldsProducer {
    this.indexReader = indexReader;
  }

+  protected void readHeader(IndexInput input) throws IOException {
+    CodecUtil.checkHeader(in, StandardTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT);
+    dirOffset = in.readLong();    
+  }
+  
+  protected void seekDir(IndexInput input, long dirOffset)
+      throws IOException {
+    input.seek(dirOffset);
+  }
+  
  @Override
  public void loadTermsIndex(int indexDivisor) throws IOException {
    indexReader.loadTermsIndex(indexDivisor);
--- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java
@ -55,7 +55,7 @@ public class StandardTermsDictWriter extends FieldsConsumer {

  private final DeltaBytesWriter termWriter;

-  final IndexOutput out;
+  protected final IndexOutput out;
  final StandardPostingsWriter postingsWriter;
  final FieldInfos fieldInfos;
  FieldInfo currentField;
@ -77,12 +77,7 @@ public class StandardTermsDictWriter extends FieldsConsumer {
    state.flushedFiles.add(termsFileName);

    fieldInfos = state.fieldInfos;
-
-    // Count indexed fields up front
-    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); 
-
-    out.writeLong(0);                             // leave space for end index pointer
-
+    writeHeader(out);
    termWriter = new DeltaBytesWriter(out);
    currentField = null;
    this.postingsWriter = postingsWriter;
@ -90,6 +85,13 @@ public class StandardTermsDictWriter extends FieldsConsumer {
    postingsWriter.start(out);                          // have consumer write its format/header
  }
  
+  protected void writeHeader(IndexOutput out) throws IOException {
+    // Count indexed fields up front
+    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); 
+
+    out.writeLong(0);                             // leave space for end index pointer    
+  }
+
  @Override
  public TermsConsumer addField(FieldInfo field) {
    assert currentField == null || currentField.name.compareTo(field.name) < 0;
@ -115,8 +117,7 @@ public class StandardTermsDictWriter extends FieldsConsumer {
        out.writeLong(field.numTerms);
        out.writeLong(field.termsStartPointer);
      }
-      out.seek(CodecUtil.headerLength(CODEC_NAME));
-      out.writeLong(dirStart);
+      writeTrailer(dirStart);
    } finally {
      try {
        out.close();
@ -130,6 +131,12 @@ public class StandardTermsDictWriter extends FieldsConsumer {
    }
  }

+  protected void writeTrailer(long dirStart) throws IOException {
+    // TODO Auto-generated method stub
+    out.seek(CodecUtil.headerLength(CODEC_NAME));
+    out.writeLong(dirStart);    
+  }
+  
  class TermsWriter extends TermsConsumer {
    private final FieldInfo fieldInfo;
    private final StandardPostingsWriter postingsWriter;