From b2eb10239e73ed3c5c9fdd30090069c2046c8b5e Mon Sep 17 00:00:00 2001
From: Andrzej Bialecki <ab@apache.org>
Date: Fri, 9 Jul 2010 21:06:24 +0000
Subject: [PATCH] LUCENE-2373 Create a Codec to work with streaming and
 append-only filesystems.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@962694 13f79535-47bb-0310-9956-ffa450edef68
---
 lucene/CHANGES.txt                            |   4 +
 lucene/contrib/CHANGES.txt                    |   5 +
 .../codecs/appending/AppendingCodec.java      | 140 +++++++++++++++
 .../AppendingSegmentInfosReader.java          |  41 +++++
 .../AppendingSegmentInfosWriter.java          |  44 +++++
 .../appending/AppendingTermsDictReader.java   |  55 ++++++
 .../appending/AppendingTermsDictWriter.java   |  49 +++++
 .../appending/AppendingTermsIndexReader.java  |  49 +++++
 .../appending/AppendingTermsIndexWriter.java  |  45 +++++
 .../codecs/appending/TestAppendingCodec.java  | 170 ++++++++++++++++++
 .../org/apache/lucene/index/SegmentInfo.java  |   6 +-
 .../org/apache/lucene/index/SegmentInfos.java |  68 +++----
 .../lucene/index/codecs/CodecProvider.java    |  10 ++
 .../codecs/DefaultSegmentInfosReader.java     |  80 +++++++++
 .../codecs/DefaultSegmentInfosWriter.java     |  67 +++++++
 .../index/codecs/SegmentInfosReader.java      |  40 +++++
 .../index/codecs/SegmentInfosWriter.java      |  63 +++++++
 .../SimpleStandardTermsIndexReader.java       |  22 ++-
 .../SimpleStandardTermsIndexWriter.java       |  21 ++-
 .../standard/StandardTermsDictReader.java     |  19 +-
 .../standard/StandardTermsDictWriter.java     |  25 ++-
 21 files changed, 953 insertions(+), 70 deletions(-)
 create mode 100644 lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java
 create mode 100644 lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosReader.java
 create mode 100644 lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosWriter.java
 create mode 100644 lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java
 create mode 100644 lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java
 create mode 100644 lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexReader.java
 create mode 100644 lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexWriter.java
 create mode 100644 lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java
 create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java
 create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
 create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosReader.java
 create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosWriter.java

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index a894549303b..d41a7965301 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -189,6 +189,10 @@ New features
 * LUCENE-1810: Added FieldSelectorResult.LATENT to not cache lazy loaded fields
   (Tim Smith, Grant Ingersoll)
 
+* LUCENE-2373: Extend CodecProvider to use SegmentInfosWriter and
+  SegmentInfosReader to allow customization of SegmentInfos data.
+  (Andrzej Bialecki)
+
 Optimizations
 
 * LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt
index 55e5d80b321..c87674ad77a 100644
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@@ -15,6 +15,11 @@ New Features
     pages from the buffer cache, since fadvise/madvise do not seem.
     (Michael McCandless)
 
+  * LUCENE-2373: Added a Codec implementation that works with append-only
+    filesystems (such as e.g. Hadoop DFS). SegmentInfos writing/reading
+    code is refactored to support append-only FS, and to allow for future
+    customization of per-segment information. (Andrzej Bialecki)
+
 ======================= Lucene 3.x (not yet released) =======================
 
 Changes in backwards compatibility policy
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java
new file mode 100644
index 00000000000..72c772d8742
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java
@@ -0,0 +1,140 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
+import org.apache.lucene.index.codecs.standard.StandardCodec;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReaderImpl;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriterImpl;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This codec extends {@link StandardCodec} to work on append-only outputs, such
+ * as plain output streams and append-only filesystems.
+ *
+ * <p>Note: compound file format feature is not compatible with
+ * this codec.  You must call both
+ * LogMergePolicy.setUseCompoundFile(false) and
+ * LogMergePolicy.setUseCompoundDocStore(false) to disable
+ * compound file format.</p>
+ * @lucene.experimental
+ */
+public class AppendingCodec extends Codec {
+  public static String CODEC_NAME = "Appending";
+  
+  public AppendingCodec() {
+    name = CODEC_NAME;
+  }
+
+  @Override
+  public FieldsConsumer fieldsConsumer(SegmentWriteState state)
+          throws IOException {
+    StandardPostingsWriter docsWriter = new StandardPostingsWriterImpl(state);
+    boolean success = false;
+    AppendingTermsIndexWriter indexWriter = null;
+    try {
+      indexWriter = new AppendingTermsIndexWriter(state);
+      success = true;
+    } finally {
+      if (!success) {
+        docsWriter.close();
+      }
+    }
+    success = false;
+    try {
+      FieldsConsumer ret = new AppendingTermsDictWriter(indexWriter, state, docsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          docsWriter.close();
+        } finally {
+          indexWriter.close();
+        }
+      }
+    }
+  }
+
+  @Override
+  public FieldsProducer fieldsProducer(SegmentReadState state)
+          throws IOException {
+    StandardPostingsReader docsReader = new StandardPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize);
+    StandardTermsIndexReader indexReader;
+
+    boolean success = false;
+    try {
+      indexReader = new AppendingTermsIndexReader(state.dir,
+              state.fieldInfos,
+              state.segmentInfo.name,
+              state.termsIndexDivisor,
+              BytesRef.getUTF8SortedAsUnicodeComparator());
+      success = true;
+    } finally {
+      if (!success) {
+        docsReader.close();
+      }
+    }
+    success = false;
+    try {
+      FieldsProducer ret = new AppendingTermsDictReader(indexReader,
+              state.dir, state.fieldInfos, state.segmentInfo.name,
+              docsReader,
+              state.readBufferSize,
+              BytesRef.getUTF8SortedAsUnicodeComparator(),
+              StandardCodec.TERMS_CACHE_SIZE);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          docsReader.close();
+        } finally {
+          indexReader.close();
+        }
+      }
+    }
+  }
+
+  @Override
+  public void files(Directory dir, SegmentInfo segmentInfo, Set<String> files)
+          throws IOException {
+    StandardPostingsReaderImpl.files(dir, segmentInfo, files);
+    StandardTermsDictReader.files(dir, segmentInfo, files);
+    SimpleStandardTermsIndexReader.files(dir, segmentInfo, files);
+  }
+
+  @Override
+  public void getExtensions(Set<String> extensions) {
+    StandardCodec.getStandardExtensions(extensions);
+  }
+}
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosReader.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosReader.java
new file mode 100644
index 00000000000..bd4b26c5c9a
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosReader.java
@@ -0,0 +1,41 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.codecs.DefaultSegmentInfosReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
+public class AppendingSegmentInfosReader extends DefaultSegmentInfosReader {
+
+  @Override
+  public void finalizeInput(IndexInput input) throws IOException,
+          CorruptIndexException {
+    input.close();
+  }
+
+  @Override
+  public IndexInput openInput(Directory dir, String segmentsFileName)
+          throws IOException {
+    return dir.openInput(segmentsFileName);
+  }
+
+}
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosWriter.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosWriter.java
new file mode 100644
index 00000000000..45d53e01955
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosWriter.java
@@ -0,0 +1,44 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+
+public class AppendingSegmentInfosWriter extends DefaultSegmentInfosWriter {
+
+  @Override
+  protected IndexOutput createOutput(Directory dir, String segmentsFileName)
+          throws IOException {
+    return dir.createOutput(segmentsFileName);
+  }
+
+  @Override
+  public void finishCommit(IndexOutput out) throws IOException {
+    out.close();
+  }
+
+  @Override
+  public void prepareCommit(IndexOutput segmentOutput) throws IOException {
+    // noop
+  }
+
+}
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java
new file mode 100644
index 00000000000..370ddc1d2c7
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java
@@ -0,0 +1,55 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+
+public class AppendingTermsDictReader extends StandardTermsDictReader {
+
+  public AppendingTermsDictReader(StandardTermsIndexReader indexReader,
+          Directory dir, FieldInfos fieldInfos, String segment,
+          StandardPostingsReader postingsReader, int readBufferSize,
+          Comparator<BytesRef> termComp, int termsCacheSize) throws IOException {
+    super(indexReader, dir, fieldInfos, segment, postingsReader, readBufferSize,
+            termComp, termsCacheSize);
+  }
+  
+  @Override
+  protected void readHeader(IndexInput in) throws IOException {
+    CodecUtil.checkHeader(in, AppendingTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT);    
+  }
+
+  @Override
+  protected void seekDir(IndexInput in, long dirOffset) throws IOException {
+    in.seek(in.length() - Long.SIZE / 8);
+    long offset = in.readLong();
+    in.seek(offset);
+  }
+
+}
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java
new file mode 100644
index 00000000000..011687024d4
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java
@@ -0,0 +1,49 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexWriter;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+
+public class AppendingTermsDictWriter extends StandardTermsDictWriter {
+  final static String CODEC_NAME = "APPENDING_TERMS_DICT";
+
+  public AppendingTermsDictWriter(StandardTermsIndexWriter indexWriter,
+          SegmentWriteState state, StandardPostingsWriter postingsWriter,
+          Comparator<BytesRef> termComp) throws IOException {
+    super(indexWriter, state, postingsWriter, termComp);
+  }
+  
+  @Override
+  protected void writeHeader(IndexOutput out) throws IOException {
+    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); 
+  }
+
+  @Override
+  protected void writeTrailer(long dirStart) throws IOException {
+    out.writeLong(dirStart);
+  }
+}
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexReader.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexReader.java
new file mode 100644
index 00000000000..e61fe8c667e
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexReader.java
@@ -0,0 +1,49 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+
+public class AppendingTermsIndexReader extends SimpleStandardTermsIndexReader {
+
+  public AppendingTermsIndexReader(Directory dir, FieldInfos fieldInfos,
+          String segment, int indexDivisor, Comparator<BytesRef> termComp)
+          throws IOException {
+    super(dir, fieldInfos, segment, indexDivisor, termComp);
+  }
+  
+  @Override
+  protected void readHeader(IndexInput input) throws IOException {
+    CodecUtil.checkHeader(input, AppendingTermsIndexWriter.CODEC_NAME, AppendingTermsIndexWriter.VERSION_START);    
+  }
+
+  @Override
+  protected void seekDir(IndexInput input, long dirOffset) throws IOException {
+    input.seek(input.length() - Long.SIZE / 8);
+    long offset = input.readLong();
+    input.seek(offset);
+  }
+}
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexWriter.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexWriter.java
new file mode 100644
index 00000000000..6a3f728fc58
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexWriter.java
@@ -0,0 +1,45 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexWriter;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.CodecUtil;
+
+public class AppendingTermsIndexWriter extends SimpleStandardTermsIndexWriter {
+  final static String CODEC_NAME = "APPENDING_TERMS_INDEX";
+  final static int VERSION_START = 0;
+  final static int VERSION_CURRENT = VERSION_START;
+
+  public AppendingTermsIndexWriter(SegmentWriteState state) throws IOException {
+    super(state);
+  }
+  
+  @Override
+  protected void writeHeader(IndexOutput out) throws IOException {
+    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);    
+  }
+
+  @Override
+  protected void writeTrailer(long dirStart) throws IOException {
+    out.writeLong(dirStart);
+  }
+}
diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java
new file mode 100644
index 00000000000..cef9ece0b54
--- /dev/null
+++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java
@@ -0,0 +1,170 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.SegmentInfosReader;
+import org.apache.lucene.index.codecs.SegmentInfosWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestAppendingCodec extends LuceneTestCase {
+  
+  static class AppendingCodecProvider extends CodecProvider {
+    Codec appending = new AppendingCodec();
+    SegmentInfosWriter infosWriter = new AppendingSegmentInfosWriter();
+    SegmentInfosReader infosReader = new AppendingSegmentInfosReader();
+    
+    @Override
+    public Codec lookup(String name) {
+      return appending;
+    }
+    @Override
+    public Codec getWriter(SegmentWriteState state) {
+      return appending;
+    }
+    @Override
+    public SegmentInfosReader getSegmentInfosReader() {
+      return infosReader;
+    }
+    @Override
+    public SegmentInfosWriter getSegmentInfosWriter() {
+      return infosWriter;
+    }
+    
+  }
+  
+  private static class AppendingIndexOutputWrapper extends IndexOutput {
+    IndexOutput wrapped;
+    
+    public AppendingIndexOutputWrapper(IndexOutput wrapped) {
+      this.wrapped = wrapped;
+    }
+
+    @Override
+    public void close() throws IOException {
+      wrapped.close();
+    }
+
+    @Override
+    public void flush() throws IOException {
+      wrapped.flush();
+    }
+
+    @Override
+    public long getFilePointer() {
+      return wrapped.getFilePointer();
+    }
+
+    @Override
+    public long length() throws IOException {
+      return wrapped.length();
+    }
+
+    @Override
+    public void seek(long pos) throws IOException {
+      throw new UnsupportedOperationException("seek() is unsupported");
+    }
+
+    @Override
+    public void writeByte(byte b) throws IOException {
+      wrapped.writeByte(b);
+    }
+
+    @Override
+    public void writeBytes(byte[] b, int offset, int length) throws IOException {
+      wrapped.writeBytes(b, offset, length);
+    }
+    
+  }
+  
+  @SuppressWarnings("serial")
+  private static class AppendingRAMDirectory extends RAMDirectory {
+
+    @Override
+    public IndexOutput createOutput(String name) throws IOException {
+      return new AppendingIndexOutputWrapper(super.createOutput(name));
+    }
+    
+  }
+  
+  private static final String text = "the quick brown fox jumped over the lazy dog";
+
+  public void testCodec() throws Exception {
+    Directory dir = new AppendingRAMDirectory();
+    IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer());
+    
+    cfg.setCodecProvider(new AppendingCodecProvider());
+    ((LogMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false);
+    ((LogMergePolicy)cfg.getMergePolicy()).setUseCompoundDocStore(false);
+    IndexWriter writer = new IndexWriter(dir, cfg);
+    Document doc = new Document();
+    doc.add(new Field("f", text, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
+    writer.addDocument(doc);
+    writer.commit();
+    writer.addDocument(doc);
+    writer.optimize();
+    writer.close();
+    IndexReader reader = IndexReader.open(dir, null, true, 1, new AppendingCodecProvider());
+    assertEquals(2, reader.numDocs());
+    doc = reader.document(0);
+    assertEquals(text, doc.get("f"));
+    Fields fields = MultiFields.getFields(reader);
+    Terms terms = fields.terms("f");
+    assertNotNull(terms);
+    TermsEnum te = terms.iterator();
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("quick")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("brown")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("fox")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("jumped")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("over")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("lazy")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("dog")));
+    assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("the")));
+    DocsEnum de = te.docs(null, null);
+    assertTrue(de.advance(0) != DocsEnum.NO_MORE_DOCS);
+    assertEquals(2, de.freq());
+    assertTrue(de.advance(1) != DocsEnum.NO_MORE_DOCS);
+    assertTrue(de.advance(2) == DocsEnum.NO_MORE_DOCS);
+    reader.close();
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
index cf6c4716b56..f5f46133022 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
@@ -133,12 +133,14 @@ public final class SegmentInfo {
   /**
    * Construct a new SegmentInfo instance by reading a
    * previously saved SegmentInfo from input.
+   * <p>Note: this is public only to allow access from
+   * the codecs package.</p>
    *
    * @param dir directory to load from
    * @param format format of the segments info file
    * @param input input handle to read segment info from
    */
-  SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
+  public SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
     this.dir = dir;
     name = input.readString();
     docCount = input.readInt();
@@ -373,7 +375,7 @@ public final class SegmentInfo {
   }
   
   /** Save this segment's info. */
-  void write(IndexOutput output)
+  public void write(IndexOutput output)
     throws IOException {
     assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name;
     output.writeString(name);
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
index 3e75fa248d0..efcba816a4e 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
@@ -20,10 +20,10 @@ package org.apache.lucene.index;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.ChecksumIndexOutput;
-import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.NoSuchDirectoryException;
 import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.SegmentInfosReader;
+import org.apache.lucene.index.codecs.SegmentInfosWriter;
 import org.apache.lucene.util.ThreadInterruptedException;
 
 import java.io.FileNotFoundException;
@@ -65,7 +65,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
   public static final int FORMAT_4_0 = -10;
 
   /* This must always point to the most recent file format. */
-  static final int CURRENT_FORMAT = FORMAT_4_0;
+  public static final int CURRENT_FORMAT = FORMAT_4_0;
   
   public int counter = 0;    // used to name new segments
   
@@ -73,20 +73,30 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
    * counts how often the index has been changed by adding or deleting docs.
    * starting with the current time in milliseconds forces to create unique version numbers.
    */
-  private long version = System.currentTimeMillis();
+  public long version = System.currentTimeMillis();
 
   private long generation = 0;     // generation of the "segments_N" for the next commit
   private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
                                    // or wrote; this is normally the same as generation except if
                                    // there was an IOException that had interrupted a commit
 
-  private Map<String,String> userData = Collections.<String,String>emptyMap();       // Opaque Map<String, String> that user can specify during IndexWriter.commit
+  public Map<String,String> userData = Collections.<String,String>emptyMap();       // Opaque Map<String, String> that user can specify during IndexWriter.commit
+  
+  private CodecProvider codecs;
 
   /**
    * If non-null, information about loading segments_N files
    * will be printed here.  @see #setInfoStream.
    */
   private static PrintStream infoStream;
+  
+  public SegmentInfos() {
+    this(CodecProvider.getDefault());
+  }
+  
+  public SegmentInfos(CodecProvider codecs) {
+    this.codecs = codecs;
+  }
 
   public final SegmentInfo info(int i) {
     return get(i);
@@ -205,42 +215,22 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
    */
   public final void read(Directory directory, String segmentFileName, 
                          CodecProvider codecs) throws CorruptIndexException, IOException {
+    this.codecs = codecs;
     boolean success = false;
 
     // Clear any previous segments:
     clear();
 
-    ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName));
-
     generation = generationFromSegmentsFileName(segmentFileName);
 
     lastGeneration = generation;
 
     try {
-      int format = input.readInt();
-
-      // check that it is a format we can understand
-      if (format < CURRENT_FORMAT)
-        throw new CorruptIndexException("Unknown (newer than us?) format version: " + format);
-
-      version = input.readLong(); // read version
-      counter = input.readInt(); // read counter
-      
-      for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
-        add(new SegmentInfo(directory, format, input, codecs));
-      }
-      
-      userData = input.readStringStringMap();
-
-      final long checksumNow = input.getChecksum();
-      final long checksumThen = input.readLong();
-      if (checksumNow != checksumThen)
-        throw new CorruptIndexException("checksum mismatch in segments file");
-
+      SegmentInfosReader infosReader = codecs.getSegmentInfosReader();
+      infosReader.read(directory, segmentFileName, codecs, this);
       success = true;
     }
     finally {
-      input.close();
       if (!success) {
         // Clear any segment infos we had loaded so we
         // have a clean slate on retry:
@@ -261,6 +251,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
   
   public final void read(Directory directory, final CodecProvider codecs) throws CorruptIndexException, IOException {
     generation = lastGeneration = -1;
+    this.codecs = codecs;
 
     new FindSegmentsFile(directory) {
 
@@ -274,7 +265,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
 
   // Only non-null after prepareCommit has been called and
   // before finishCommit is called
-  ChecksumIndexOutput pendingSegnOutput;
+  IndexOutput pendingSegnOutput;
 
   private void write(Directory directory) throws IOException {
 
@@ -287,21 +278,14 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
       generation++;
     }
 
-    ChecksumIndexOutput segnOutput = new ChecksumIndexOutput(directory.createOutput(segmentFileName));
+    IndexOutput segnOutput = null;
 
     boolean success = false;
 
     try {
-      segnOutput.writeInt(CURRENT_FORMAT); // write FORMAT
-      segnOutput.writeLong(++version); // every write changes
-                                   // the index
-      segnOutput.writeInt(counter); // write counter
-      segnOutput.writeInt(size()); // write infos
-      for (SegmentInfo si : this) {
-        si.write(segnOutput);
-      }
-      segnOutput.writeStringStringMap(userData);
-      segnOutput.prepareCommit();
+      SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
+      segnOutput = infosWriter.writeInfos(directory, segmentFileName, this);
+      infosWriter.prepareCommit(segnOutput);
       success = true;
       pendingSegnOutput = segnOutput;
     } finally {
@@ -785,8 +769,8 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
       throw new IllegalStateException("prepareCommit was not called");
     boolean success = false;
     try {
-      pendingSegnOutput.finishCommit();
-      pendingSegnOutput.close();
+      SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
+      infosWriter.finishCommit(pendingSegnOutput);
       pendingSegnOutput = null;
       success = true;
     } finally {
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
index a3ae4c4f8cb..71e6c8519ea 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
@@ -38,6 +38,8 @@ import org.apache.lucene.index.codecs.standard.StandardCodec;
  *  @lucene.experimental */
 
 public abstract class CodecProvider {
+  private SegmentInfosWriter infosWriter = new DefaultSegmentInfosWriter();
+  private SegmentInfosReader infosReader = new DefaultSegmentInfosReader();
 
   private final HashMap<String, Codec> codecs = new HashMap<String, Codec>();
 
@@ -72,6 +74,14 @@ public abstract class CodecProvider {
   }
 
   public abstract Codec getWriter(SegmentWriteState state);
+  
+  public SegmentInfosWriter getSegmentInfosWriter() {
+    return infosWriter;
+  }
+  
+  public SegmentInfosReader getSegmentInfosReader() {
+    return infosReader;
+  }
 
   static private final CodecProvider defaultCodecs = new DefaultCodecProvider();
 
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java
new file mode 100644
index 00000000000..bb13615d3da
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java
@@ -0,0 +1,80 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * Default implementation of {@link SegmentInfosReader}.
+ * @lucene.experimental
+ */
+public class DefaultSegmentInfosReader extends SegmentInfosReader {
+
+  @Override
+  public void read(Directory directory, String segmentsFileName, CodecProvider codecs,
+          SegmentInfos infos) throws IOException {
+    IndexInput input = null;
+    try {
+      input = openInput(directory, segmentsFileName);
+      int format = input.readInt();
+  
+      // check that it is a format we can understand
+      if (format < SegmentInfos.CURRENT_FORMAT)
+        throw new CorruptIndexException("Unknown (newer than us?) format version: " + format);
+  
+      infos.version = input.readLong(); // read version
+      infos.counter = input.readInt(); // read counter
+  
+      for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
+        infos.add(new SegmentInfo(directory, format, input, codecs));
+      }
+      
+      infos.userData = input.readStringStringMap();
+      finalizeInput(input);
+      
+    } finally {
+      if (input != null) {
+        input.close();
+      }
+    }
+
+  }
+  
+  public IndexInput openInput(Directory dir, String segmentsFileName) throws IOException {
+    IndexInput in = dir.openInput(segmentsFileName);
+    return new ChecksumIndexInput(in);
+    
+  }
+  
+  public void finalizeInput(IndexInput input) throws IOException, CorruptIndexException {
+    ChecksumIndexInput cksumInput = (ChecksumIndexInput)input;
+    final long checksumNow = cksumInput.getChecksum();
+    final long checksumThen = cksumInput.readLong();
+    if (checksumNow != checksumThen)
+      throw new CorruptIndexException("checksum mismatch in segments file");
+    
+  }
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
new file mode 100644
index 00000000000..ee71c93aa75
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
@@ -0,0 +1,67 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.ChecksumIndexOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * Default implementation of {@link SegmentInfosWriter}.
+ * @lucene.experimental
+ */
+public class DefaultSegmentInfosWriter extends SegmentInfosWriter {
+
+  @Override
+  public IndexOutput writeInfos(Directory dir, String segmentFileName, SegmentInfos infos)
+          throws IOException {
+    IndexOutput out = createOutput(dir, segmentFileName);
+    out.writeInt(SegmentInfos.CURRENT_FORMAT); // write FORMAT
+    out.writeLong(++infos.version); // every write changes
+                                 // the index
+    out.writeInt(infos.counter); // write counter
+    out.writeInt(infos.size()); // write infos
+    for (SegmentInfo si : infos) {
+      si.write(out);
+    }
+    out.writeStringStringMap(infos.getUserData());
+    return out;
+  }
+  
+  protected IndexOutput createOutput(Directory dir, String segmentFileName)
+      throws IOException {
+    IndexOutput plainOut = dir.createOutput(segmentFileName);
+    ChecksumIndexOutput out = new ChecksumIndexOutput(plainOut);
+    return out;
+  }
+
+  @Override
+  public void prepareCommit(IndexOutput segmentOutput) throws IOException {
+    ((ChecksumIndexOutput)segmentOutput).prepareCommit();
+  }
+
+  @Override
+  public void finishCommit(IndexOutput out) throws IOException {
+    ((ChecksumIndexOutput)out).finishCommit();
+    out.close();
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosReader.java b/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosReader.java
new file mode 100644
index 00000000000..4a90fb93ac2
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosReader.java
@@ -0,0 +1,40 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.Directory;
+
+/**
+ * Specifies an API for classes that can read {@link SegmentInfos} information.
+ * @lucene.experimental
+ */
+public abstract class SegmentInfosReader {
+
+  /**
+   * Read {@link SegmentInfos} data from a directory.
+   * @param directory directory to read from
+   * @param segmentsFileName name of the "segments_N" file
+   * @param codecs current codecs
+   * @param infos empty instance to be populated with data
+   * @throws IOException
+   */
+  public abstract void read(Directory directory, String segmentsFileName, CodecProvider codecs, SegmentInfos infos) throws IOException;
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosWriter.java
new file mode 100644
index 00000000000..19f2e5dc397
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosWriter.java
@@ -0,0 +1,63 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * Specifies an API for classes that can write out {@link SegmentInfos} data.
+ * @lucene.experimental
+ */
+public abstract class SegmentInfosWriter {
+
+  /**
+   * Write {@link SegmentInfos} data without closing the output. The returned
+   * output will become finished only after a successful completion of
+   * "two phase commit" that first calls {@link #prepareCommit(IndexOutput)} and
+   * then {@link #finishCommit(IndexOutput)}.
+   * @param dir directory to write data to
+   * @param segmentsFileName name of the "segments_N" file to create
+   * @param infos data to write
+   * @return an instance of {@link IndexOutput} to be used in subsequent "two
+   * phase commit" operations as described above.
+   * @throws IOException
+   */
+  public abstract IndexOutput writeInfos(Directory dir, String segmentsFileName, SegmentInfos infos) throws IOException;
+  
+  /**
+   * First phase of the two-phase commit - ensure that all output can be
+   * successfully written out.
+   * @param out an instance of {@link IndexOutput} returned from a previous
+   * call to {@link #writeInfos(Directory, String, SegmentInfos)}.
+   * @throws IOException
+   */
+  public abstract void prepareCommit(IndexOutput out) throws IOException;
+  
+  /**
+   * Second phase of the two-phase commit. In this step the output should be
+   * finalized and closed.
+   * @param out an instance of {@link IndexOutput} returned from a previous
+   * call to {@link #writeInfos(Directory, String, SegmentInfos)}.
+   * @throws IOException
+   */
+  public abstract void finishCommit(IndexOutput out) throws IOException;
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java b/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
index 10b24a820c9..86426bb1513 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
@@ -86,6 +86,9 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
   private PagedBytes.Reader termBytesReader;
 
   final HashMap<FieldInfo,FieldIndexReader> fields = new HashMap<FieldInfo,FieldIndexReader>();
+  
+  // start of the field info data
+  protected long dirOffset;
 
   public SimpleStandardTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, Comparator<BytesRef> termComp)
     throws IOException {
@@ -97,10 +100,8 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
     boolean success = false;
 
     try {
-      CodecUtil.checkHeader(in, SimpleStandardTermsIndexWriter.CODEC_NAME, SimpleStandardTermsIndexWriter.VERSION_START);
-
-      final long dirOffset = in.readLong();
-
+      
+      readHeader(in);
       indexInterval = in.readInt();
       this.indexDivisor = indexDivisor;
 
@@ -110,10 +111,10 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
         // In case terms index gets loaded, later, on demand
         totalIndexInterval = indexInterval * indexDivisor;
       }
+      
+      seekDir(in, dirOffset);
 
       // Read directory
-      in.seek(dirOffset);
-
       final int numFields = in.readInt();
 
       for(int i=0;i<numFields;i++) {
@@ -143,6 +144,11 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
       }
     }
   }
+  
+  protected void readHeader(IndexInput input) throws IOException {
+    CodecUtil.checkHeader(input, SimpleStandardTermsIndexWriter.CODEC_NAME, SimpleStandardTermsIndexWriter.VERSION_START);
+    dirOffset = input.readLong();
+  }
 
   private final class FieldIndexReader extends FieldReader {
 
@@ -445,4 +451,8 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
       termBytesReader.close();
     }
   }
+
+  protected void seekDir(IndexInput input, long dirOffset) throws IOException {
+    input.seek(dirOffset);
+  }
 }
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java
index 51d5cb339ad..df7fd85c5c2 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java
@@ -33,7 +33,7 @@ import java.io.IOException;
 
 /** @lucene.experimental */
 public class SimpleStandardTermsIndexWriter extends StandardTermsIndexWriter {
-  final private IndexOutput out;
+  protected final IndexOutput out;
 
   final static String CODEC_NAME = "SIMPLE_STANDARD_TERMS_INDEX";
   final static int VERSION_START = 0;
@@ -50,12 +50,15 @@ public class SimpleStandardTermsIndexWriter extends StandardTermsIndexWriter {
     state.flushedFiles.add(indexFileName);
     termIndexInterval = state.termIndexInterval;
     out = state.directory.createOutput(indexFileName);
-    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
     fieldInfos = state.fieldInfos;
-
+    writeHeader(out);
+    out.writeInt(termIndexInterval);
+  }
+  
+  protected void writeHeader(IndexOutput out) throws IOException {
+    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
     // Placeholder for dir offset
     out.writeLong(0);
-    out.writeInt(termIndexInterval);
   }
 
   @Override
@@ -179,8 +182,12 @@ public class SimpleStandardTermsIndexWriter extends StandardTermsIndexWriter {
       out.writeLong(field.packedIndexStart);
       out.writeLong(field.packedOffsetsStart);
     }
-    out.seek(CodecUtil.headerLength(CODEC_NAME));
-    out.writeLong(dirStart);
+    writeTrailer(dirStart);
     out.close();
   }
-}
\ No newline at end of file
+
+  protected void writeTrailer(long dirStart) throws IOException {
+    out.seek(CodecUtil.headerLength(CODEC_NAME));
+    out.writeLong(dirStart);
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
index 483808e8907..3f6114ed0ea 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
@@ -71,6 +71,9 @@ public class StandardTermsDictReader extends FieldsProducer {
 
   // Reads the terms index
   private StandardTermsIndexReader indexReader;
+  
+  // keeps the dirStart offset
+  protected long dirOffset;
 
   // Used as key for the terms cache
   private static class FieldAndTerm extends DoubleBarrelLRUCache.CloneableKey {
@@ -116,15 +119,13 @@ public class StandardTermsDictReader extends FieldsProducer {
 
     boolean success = false;
     try {
-      CodecUtil.checkHeader(in, StandardTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT);
-
-      final long dirOffset = in.readLong();
+      readHeader(in);
 
       // Have PostingsReader init itself
       postingsReader.init(in);
 
       // Read per-field details
-      in.seek(dirOffset);
+      seekDir(in, dirOffset);
 
       final int numFields = in.readInt();
 
@@ -151,6 +152,16 @@ public class StandardTermsDictReader extends FieldsProducer {
     this.indexReader = indexReader;
   }
 
+  protected void readHeader(IndexInput input) throws IOException {
+    CodecUtil.checkHeader(in, StandardTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT);
+    dirOffset = in.readLong();    
+  }
+  
+  protected void seekDir(IndexInput input, long dirOffset)
+      throws IOException {
+    input.seek(dirOffset);
+  }
+  
   @Override
   public void loadTermsIndex(int indexDivisor) throws IOException {
     indexReader.loadTermsIndex(indexDivisor);
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java
index 9a001d2a56c..8c973041098 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java
@@ -55,7 +55,7 @@ public class StandardTermsDictWriter extends FieldsConsumer {
 
   private final DeltaBytesWriter termWriter;
 
-  final IndexOutput out;
+  protected final IndexOutput out;
   final StandardPostingsWriter postingsWriter;
   final FieldInfos fieldInfos;
   FieldInfo currentField;
@@ -77,18 +77,20 @@ public class StandardTermsDictWriter extends FieldsConsumer {
     state.flushedFiles.add(termsFileName);
 
     fieldInfos = state.fieldInfos;
-
-    // Count indexed fields up front
-    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); 
-
-    out.writeLong(0);                             // leave space for end index pointer
-
+    writeHeader(out);
     termWriter = new DeltaBytesWriter(out);
     currentField = null;
     this.postingsWriter = postingsWriter;
 
     postingsWriter.start(out);                          // have consumer write its format/header
   }
+  
+  protected void writeHeader(IndexOutput out) throws IOException {
+    // Count indexed fields up front
+    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); 
+
+    out.writeLong(0);                             // leave space for end index pointer    
+  }
 
   @Override
   public TermsConsumer addField(FieldInfo field) {
@@ -115,8 +117,7 @@ public class StandardTermsDictWriter extends FieldsConsumer {
         out.writeLong(field.numTerms);
         out.writeLong(field.termsStartPointer);
       }
-      out.seek(CodecUtil.headerLength(CODEC_NAME));
-      out.writeLong(dirStart);
+      writeTrailer(dirStart);
     } finally {
       try {
         out.close();
@@ -130,6 +131,12 @@ public class StandardTermsDictWriter extends FieldsConsumer {
     }
   }
 
+  protected void writeTrailer(long dirStart) throws IOException {
+    // TODO Auto-generated method stub
+    out.seek(CodecUtil.headerLength(CODEC_NAME));
+    out.writeLong(dirStart);    
+  }
+  
   class TermsWriter extends TermsConsumer {
     private final FieldInfo fieldInfo;
     private final StandardPostingsWriter postingsWriter;