diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index a894549303b..d41a7965301 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -189,6 +189,10 @@ New features
* LUCENE-1810: Added FieldSelectorResult.LATENT to not cache lazy loaded fields
(Tim Smith, Grant Ingersoll)
+* LUCENE-2373: Extend CodecProvider to use SegmentInfosWriter and
+ SegmentInfosReader to allow customization of SegmentInfos data.
+ (Andrzej Bialecki)
+
Optimizations
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt
index 55e5d80b321..c87674ad77a 100644
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@@ -15,6 +15,11 @@ New Features
pages from the buffer cache, since fadvise/madvise do not seem.
(Michael McCandless)
+ * LUCENE-2373: Added a Codec implementation that works with append-only
+ filesystems (such as e.g. Hadoop DFS). SegmentInfos writing/reading
+ code is refactored to support append-only FS, and to allow for future
+ customization of per-segment information. (Andrzej Bialecki)
+
======================= Lucene 3.x (not yet released) =======================
Changes in backwards compatibility policy
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java
new file mode 100644
index 00000000000..72c772d8742
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java
@@ -0,0 +1,140 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
+import org.apache.lucene.index.codecs.standard.StandardCodec;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReaderImpl;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriterImpl;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This codec extends {@link StandardCodec} to work on append-only outputs, such
+ * as plain output streams and append-only filesystems.
+ *
+ *
Note: compound file format feature is not compatible with
+ * this codec. You must call both
+ * LogMergePolicy.setUseCompoundFile(false) and
+ * LogMergePolicy.setUseCompoundDocStore(false) to disable
+ * compound file format.
+ * @lucene.experimental
+ */
+public class AppendingCodec extends Codec {
+ public static String CODEC_NAME = "Appending";
+
+ public AppendingCodec() {
+ name = CODEC_NAME;
+ }
+
+ @Override
+ public FieldsConsumer fieldsConsumer(SegmentWriteState state)
+ throws IOException {
+ StandardPostingsWriter docsWriter = new StandardPostingsWriterImpl(state);
+ boolean success = false;
+ AppendingTermsIndexWriter indexWriter = null;
+ try {
+ indexWriter = new AppendingTermsIndexWriter(state);
+ success = true;
+ } finally {
+ if (!success) {
+ docsWriter.close();
+ }
+ }
+ success = false;
+ try {
+ FieldsConsumer ret = new AppendingTermsDictWriter(indexWriter, state, docsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ try {
+ docsWriter.close();
+ } finally {
+ indexWriter.close();
+ }
+ }
+ }
+ }
+
+ @Override
+ public FieldsProducer fieldsProducer(SegmentReadState state)
+ throws IOException {
+ StandardPostingsReader docsReader = new StandardPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize);
+ StandardTermsIndexReader indexReader;
+
+ boolean success = false;
+ try {
+ indexReader = new AppendingTermsIndexReader(state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ state.termsIndexDivisor,
+ BytesRef.getUTF8SortedAsUnicodeComparator());
+ success = true;
+ } finally {
+ if (!success) {
+ docsReader.close();
+ }
+ }
+ success = false;
+ try {
+ FieldsProducer ret = new AppendingTermsDictReader(indexReader,
+ state.dir, state.fieldInfos, state.segmentInfo.name,
+ docsReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ StandardCodec.TERMS_CACHE_SIZE);
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ try {
+ docsReader.close();
+ } finally {
+ indexReader.close();
+ }
+ }
+ }
+ }
+
+ @Override
+ public void files(Directory dir, SegmentInfo segmentInfo, Set files)
+ throws IOException {
+ StandardPostingsReaderImpl.files(dir, segmentInfo, files);
+ StandardTermsDictReader.files(dir, segmentInfo, files);
+ SimpleStandardTermsIndexReader.files(dir, segmentInfo, files);
+ }
+
+ @Override
+ public void getExtensions(Set extensions) {
+ StandardCodec.getStandardExtensions(extensions);
+ }
+}
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosReader.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosReader.java
new file mode 100644
index 00000000000..bd4b26c5c9a
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosReader.java
@@ -0,0 +1,41 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.codecs.DefaultSegmentInfosReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
+public class AppendingSegmentInfosReader extends DefaultSegmentInfosReader {
+
+ @Override
+ public void finalizeInput(IndexInput input) throws IOException,
+ CorruptIndexException {
+ input.close();
+ }
+
+ @Override
+ public IndexInput openInput(Directory dir, String segmentsFileName)
+ throws IOException {
+ return dir.openInput(segmentsFileName);
+ }
+
+}
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosWriter.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosWriter.java
new file mode 100644
index 00000000000..45d53e01955
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingSegmentInfosWriter.java
@@ -0,0 +1,44 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+
+public class AppendingSegmentInfosWriter extends DefaultSegmentInfosWriter {
+
+ @Override
+ protected IndexOutput createOutput(Directory dir, String segmentsFileName)
+ throws IOException {
+ return dir.createOutput(segmentsFileName);
+ }
+
+ @Override
+ public void finishCommit(IndexOutput out) throws IOException {
+ out.close();
+ }
+
+ @Override
+ public void prepareCommit(IndexOutput segmentOutput) throws IOException {
+ // noop
+ }
+
+}
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java
new file mode 100644
index 00000000000..370ddc1d2c7
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java
@@ -0,0 +1,55 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+
+public class AppendingTermsDictReader extends StandardTermsDictReader {
+
+ public AppendingTermsDictReader(StandardTermsIndexReader indexReader,
+ Directory dir, FieldInfos fieldInfos, String segment,
+ StandardPostingsReader postingsReader, int readBufferSize,
+ Comparator termComp, int termsCacheSize) throws IOException {
+ super(indexReader, dir, fieldInfos, segment, postingsReader, readBufferSize,
+ termComp, termsCacheSize);
+ }
+
+ @Override
+ protected void readHeader(IndexInput in) throws IOException {
+ CodecUtil.checkHeader(in, AppendingTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT);
+ }
+
+ @Override
+ protected void seekDir(IndexInput in, long dirOffset) throws IOException {
+ in.seek(in.length() - Long.SIZE / 8);
+ long offset = in.readLong();
+ in.seek(offset);
+ }
+
+}
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java
new file mode 100644
index 00000000000..011687024d4
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java
@@ -0,0 +1,49 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexWriter;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+
+public class AppendingTermsDictWriter extends StandardTermsDictWriter {
+ final static String CODEC_NAME = "APPENDING_TERMS_DICT";
+
+ public AppendingTermsDictWriter(StandardTermsIndexWriter indexWriter,
+ SegmentWriteState state, StandardPostingsWriter postingsWriter,
+ Comparator termComp) throws IOException {
+ super(indexWriter, state, postingsWriter, termComp);
+ }
+
+ @Override
+ protected void writeHeader(IndexOutput out) throws IOException {
+ CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
+ }
+
+ @Override
+ protected void writeTrailer(long dirStart) throws IOException {
+ out.writeLong(dirStart);
+ }
+}
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexReader.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexReader.java
new file mode 100644
index 00000000000..e61fe8c667e
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexReader.java
@@ -0,0 +1,49 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+
+public class AppendingTermsIndexReader extends SimpleStandardTermsIndexReader {
+
+ public AppendingTermsIndexReader(Directory dir, FieldInfos fieldInfos,
+ String segment, int indexDivisor, Comparator termComp)
+ throws IOException {
+ super(dir, fieldInfos, segment, indexDivisor, termComp);
+ }
+
+ @Override
+ protected void readHeader(IndexInput input) throws IOException {
+ CodecUtil.checkHeader(input, AppendingTermsIndexWriter.CODEC_NAME, AppendingTermsIndexWriter.VERSION_START);
+ }
+
+ @Override
+ protected void seekDir(IndexInput input, long dirOffset) throws IOException {
+ input.seek(input.length() - Long.SIZE / 8);
+ long offset = input.readLong();
+ input.seek(offset);
+ }
+}
diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexWriter.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexWriter.java
new file mode 100644
index 00000000000..6a3f728fc58
--- /dev/null
+++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsIndexWriter.java
@@ -0,0 +1,45 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexWriter;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.CodecUtil;
+
+public class AppendingTermsIndexWriter extends SimpleStandardTermsIndexWriter {
+ final static String CODEC_NAME = "APPENDING_TERMS_INDEX";
+ final static int VERSION_START = 0;
+ final static int VERSION_CURRENT = VERSION_START;
+
+ public AppendingTermsIndexWriter(SegmentWriteState state) throws IOException {
+ super(state);
+ }
+
+ @Override
+ protected void writeHeader(IndexOutput out) throws IOException {
+ CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
+ }
+
+ @Override
+ protected void writeTrailer(long dirStart) throws IOException {
+ out.writeLong(dirStart);
+ }
+}
diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java
new file mode 100644
index 00000000000..cef9ece0b54
--- /dev/null
+++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java
@@ -0,0 +1,170 @@
+package org.apache.lucene.index.codecs.appending;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.SegmentInfosReader;
+import org.apache.lucene.index.codecs.SegmentInfosWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestAppendingCodec extends LuceneTestCase {
+
+ static class AppendingCodecProvider extends CodecProvider {
+ Codec appending = new AppendingCodec();
+ SegmentInfosWriter infosWriter = new AppendingSegmentInfosWriter();
+ SegmentInfosReader infosReader = new AppendingSegmentInfosReader();
+
+ @Override
+ public Codec lookup(String name) {
+ return appending;
+ }
+ @Override
+ public Codec getWriter(SegmentWriteState state) {
+ return appending;
+ }
+ @Override
+ public SegmentInfosReader getSegmentInfosReader() {
+ return infosReader;
+ }
+ @Override
+ public SegmentInfosWriter getSegmentInfosWriter() {
+ return infosWriter;
+ }
+
+ }
+
+ private static class AppendingIndexOutputWrapper extends IndexOutput {
+ IndexOutput wrapped;
+
+ public AppendingIndexOutputWrapper(IndexOutput wrapped) {
+ this.wrapped = wrapped;
+ }
+
+ @Override
+ public void close() throws IOException {
+ wrapped.close();
+ }
+
+ @Override
+ public void flush() throws IOException {
+ wrapped.flush();
+ }
+
+ @Override
+ public long getFilePointer() {
+ return wrapped.getFilePointer();
+ }
+
+ @Override
+ public long length() throws IOException {
+ return wrapped.length();
+ }
+
+ @Override
+ public void seek(long pos) throws IOException {
+ throw new UnsupportedOperationException("seek() is unsupported");
+ }
+
+ @Override
+ public void writeByte(byte b) throws IOException {
+ wrapped.writeByte(b);
+ }
+
+ @Override
+ public void writeBytes(byte[] b, int offset, int length) throws IOException {
+ wrapped.writeBytes(b, offset, length);
+ }
+
+ }
+
+ @SuppressWarnings("serial")
+ private static class AppendingRAMDirectory extends RAMDirectory {
+
+ @Override
+ public IndexOutput createOutput(String name) throws IOException {
+ return new AppendingIndexOutputWrapper(super.createOutput(name));
+ }
+
+ }
+
+ private static final String text = "the quick brown fox jumped over the lazy dog";
+
+ public void testCodec() throws Exception {
+ Directory dir = new AppendingRAMDirectory();
+ IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer());
+
+ cfg.setCodecProvider(new AppendingCodecProvider());
+ ((LogMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false);
+ ((LogMergePolicy)cfg.getMergePolicy()).setUseCompoundDocStore(false);
+ IndexWriter writer = new IndexWriter(dir, cfg);
+ Document doc = new Document();
+ doc.add(new Field("f", text, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
+ writer.addDocument(doc);
+ writer.commit();
+ writer.addDocument(doc);
+ writer.optimize();
+ writer.close();
+ IndexReader reader = IndexReader.open(dir, null, true, 1, new AppendingCodecProvider());
+ assertEquals(2, reader.numDocs());
+ doc = reader.document(0);
+ assertEquals(text, doc.get("f"));
+ Fields fields = MultiFields.getFields(reader);
+ Terms terms = fields.terms("f");
+ assertNotNull(terms);
+ TermsEnum te = terms.iterator();
+ assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("quick")));
+ assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("brown")));
+ assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("fox")));
+ assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("jumped")));
+ assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("over")));
+ assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("lazy")));
+ assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("dog")));
+ assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("the")));
+ DocsEnum de = te.docs(null, null);
+ assertTrue(de.advance(0) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(2, de.freq());
+ assertTrue(de.advance(1) != DocsEnum.NO_MORE_DOCS);
+ assertTrue(de.advance(2) == DocsEnum.NO_MORE_DOCS);
+ reader.close();
+ }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
index cf6c4716b56..f5f46133022 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
@@ -133,12 +133,14 @@ public final class SegmentInfo {
/**
* Construct a new SegmentInfo instance by reading a
* previously saved SegmentInfo from input.
+ * Note: this is public only to allow access from
+ * the codecs package.
*
* @param dir directory to load from
* @param format format of the segments info file
* @param input input handle to read segment info from
*/
- SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
+ public SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
this.dir = dir;
name = input.readString();
docCount = input.readInt();
@@ -373,7 +375,7 @@ public final class SegmentInfo {
}
/** Save this segment's info. */
- void write(IndexOutput output)
+ public void write(IndexOutput output)
throws IOException {
assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name;
output.writeString(name);
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
index 3e75fa248d0..efcba816a4e 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
@@ -20,10 +20,10 @@ package org.apache.lucene.index;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.ChecksumIndexOutput;
-import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.NoSuchDirectoryException;
import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.SegmentInfosReader;
+import org.apache.lucene.index.codecs.SegmentInfosWriter;
import org.apache.lucene.util.ThreadInterruptedException;
import java.io.FileNotFoundException;
@@ -65,7 +65,7 @@ public final class SegmentInfos extends Vector {
public static final int FORMAT_4_0 = -10;
/* This must always point to the most recent file format. */
- static final int CURRENT_FORMAT = FORMAT_4_0;
+ public static final int CURRENT_FORMAT = FORMAT_4_0;
public int counter = 0; // used to name new segments
@@ -73,20 +73,30 @@ public final class SegmentInfos extends Vector {
* counts how often the index has been changed by adding or deleting docs.
* starting with the current time in milliseconds forces to create unique version numbers.
*/
- private long version = System.currentTimeMillis();
+ public long version = System.currentTimeMillis();
private long generation = 0; // generation of the "segments_N" for the next commit
private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
// or wrote; this is normally the same as generation except if
// there was an IOException that had interrupted a commit
- private Map userData = Collections.emptyMap(); // Opaque Map that user can specify during IndexWriter.commit
+ public Map userData = Collections.emptyMap(); // Opaque Map that user can specify during IndexWriter.commit
+
+ private CodecProvider codecs;
/**
* If non-null, information about loading segments_N files
* will be printed here. @see #setInfoStream.
*/
private static PrintStream infoStream;
+
+ public SegmentInfos() {
+ this(CodecProvider.getDefault());
+ }
+
+ public SegmentInfos(CodecProvider codecs) {
+ this.codecs = codecs;
+ }
public final SegmentInfo info(int i) {
return get(i);
@@ -205,42 +215,22 @@ public final class SegmentInfos extends Vector {
*/
public final void read(Directory directory, String segmentFileName,
CodecProvider codecs) throws CorruptIndexException, IOException {
+ this.codecs = codecs;
boolean success = false;
// Clear any previous segments:
clear();
- ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName));
-
generation = generationFromSegmentsFileName(segmentFileName);
lastGeneration = generation;
try {
- int format = input.readInt();
-
- // check that it is a format we can understand
- if (format < CURRENT_FORMAT)
- throw new CorruptIndexException("Unknown (newer than us?) format version: " + format);
-
- version = input.readLong(); // read version
- counter = input.readInt(); // read counter
-
- for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
- add(new SegmentInfo(directory, format, input, codecs));
- }
-
- userData = input.readStringStringMap();
-
- final long checksumNow = input.getChecksum();
- final long checksumThen = input.readLong();
- if (checksumNow != checksumThen)
- throw new CorruptIndexException("checksum mismatch in segments file");
-
+ SegmentInfosReader infosReader = codecs.getSegmentInfosReader();
+ infosReader.read(directory, segmentFileName, codecs, this);
success = true;
}
finally {
- input.close();
if (!success) {
// Clear any segment infos we had loaded so we
// have a clean slate on retry:
@@ -261,6 +251,7 @@ public final class SegmentInfos extends Vector {
public final void read(Directory directory, final CodecProvider codecs) throws CorruptIndexException, IOException {
generation = lastGeneration = -1;
+ this.codecs = codecs;
new FindSegmentsFile(directory) {
@@ -274,7 +265,7 @@ public final class SegmentInfos extends Vector {
// Only non-null after prepareCommit has been called and
// before finishCommit is called
- ChecksumIndexOutput pendingSegnOutput;
+ IndexOutput pendingSegnOutput;
private void write(Directory directory) throws IOException {
@@ -287,21 +278,14 @@ public final class SegmentInfos extends Vector {
generation++;
}
- ChecksumIndexOutput segnOutput = new ChecksumIndexOutput(directory.createOutput(segmentFileName));
+ IndexOutput segnOutput = null;
boolean success = false;
try {
- segnOutput.writeInt(CURRENT_FORMAT); // write FORMAT
- segnOutput.writeLong(++version); // every write changes
- // the index
- segnOutput.writeInt(counter); // write counter
- segnOutput.writeInt(size()); // write infos
- for (SegmentInfo si : this) {
- si.write(segnOutput);
- }
- segnOutput.writeStringStringMap(userData);
- segnOutput.prepareCommit();
+ SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
+ segnOutput = infosWriter.writeInfos(directory, segmentFileName, this);
+ infosWriter.prepareCommit(segnOutput);
success = true;
pendingSegnOutput = segnOutput;
} finally {
@@ -785,8 +769,8 @@ public final class SegmentInfos extends Vector {
throw new IllegalStateException("prepareCommit was not called");
boolean success = false;
try {
- pendingSegnOutput.finishCommit();
- pendingSegnOutput.close();
+ SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
+ infosWriter.finishCommit(pendingSegnOutput);
pendingSegnOutput = null;
success = true;
} finally {
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
index a3ae4c4f8cb..71e6c8519ea 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
@@ -38,6 +38,8 @@ import org.apache.lucene.index.codecs.standard.StandardCodec;
* @lucene.experimental */
public abstract class CodecProvider {
+ private SegmentInfosWriter infosWriter = new DefaultSegmentInfosWriter();
+ private SegmentInfosReader infosReader = new DefaultSegmentInfosReader();
private final HashMap codecs = new HashMap();
@@ -72,6 +74,14 @@ public abstract class CodecProvider {
}
public abstract Codec getWriter(SegmentWriteState state);
+
+ public SegmentInfosWriter getSegmentInfosWriter() {
+ return infosWriter;
+ }
+
+ public SegmentInfosReader getSegmentInfosReader() {
+ return infosReader;
+ }
static private final CodecProvider defaultCodecs = new DefaultCodecProvider();
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java
new file mode 100644
index 00000000000..bb13615d3da
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java
@@ -0,0 +1,80 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * Default implementation of {@link SegmentInfosReader}.
+ * @lucene.experimental
+ */
+public class DefaultSegmentInfosReader extends SegmentInfosReader {
+
+ @Override
+ public void read(Directory directory, String segmentsFileName, CodecProvider codecs,
+ SegmentInfos infos) throws IOException {
+ IndexInput input = null;
+ try {
+ input = openInput(directory, segmentsFileName);
+ int format = input.readInt();
+
+ // check that it is a format we can understand
+ if (format < SegmentInfos.CURRENT_FORMAT)
+ throw new CorruptIndexException("Unknown (newer than us?) format version: " + format);
+
+ infos.version = input.readLong(); // read version
+ infos.counter = input.readInt(); // read counter
+
+ for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
+ infos.add(new SegmentInfo(directory, format, input, codecs));
+ }
+
+ infos.userData = input.readStringStringMap();
+ finalizeInput(input);
+
+ } finally {
+ if (input != null) {
+ input.close();
+ }
+ }
+
+ }
+
+ public IndexInput openInput(Directory dir, String segmentsFileName) throws IOException {
+ IndexInput in = dir.openInput(segmentsFileName);
+ return new ChecksumIndexInput(in);
+
+ }
+
+ public void finalizeInput(IndexInput input) throws IOException, CorruptIndexException {
+ ChecksumIndexInput cksumInput = (ChecksumIndexInput)input;
+ final long checksumNow = cksumInput.getChecksum();
+ final long checksumThen = cksumInput.readLong();
+ if (checksumNow != checksumThen)
+ throw new CorruptIndexException("checksum mismatch in segments file");
+
+ }
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
new file mode 100644
index 00000000000..ee71c93aa75
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
@@ -0,0 +1,67 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.ChecksumIndexOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * Default implementation of {@link SegmentInfosWriter}.
+ * @lucene.experimental
+ */
+public class DefaultSegmentInfosWriter extends SegmentInfosWriter {
+
+ @Override
+ public IndexOutput writeInfos(Directory dir, String segmentFileName, SegmentInfos infos)
+ throws IOException {
+ IndexOutput out = createOutput(dir, segmentFileName);
+ out.writeInt(SegmentInfos.CURRENT_FORMAT); // write FORMAT
+ out.writeLong(++infos.version); // every write changes
+ // the index
+ out.writeInt(infos.counter); // write counter
+ out.writeInt(infos.size()); // write infos
+ for (SegmentInfo si : infos) {
+ si.write(out);
+ }
+ out.writeStringStringMap(infos.getUserData());
+ return out;
+ }
+
+ protected IndexOutput createOutput(Directory dir, String segmentFileName)
+ throws IOException {
+ IndexOutput plainOut = dir.createOutput(segmentFileName);
+ ChecksumIndexOutput out = new ChecksumIndexOutput(plainOut);
+ return out;
+ }
+
+ @Override
+ public void prepareCommit(IndexOutput segmentOutput) throws IOException {
+ ((ChecksumIndexOutput)segmentOutput).prepareCommit();
+ }
+
+ @Override
+ public void finishCommit(IndexOutput out) throws IOException {
+ ((ChecksumIndexOutput)out).finishCommit();
+ out.close();
+ }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosReader.java b/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosReader.java
new file mode 100644
index 00000000000..4a90fb93ac2
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosReader.java
@@ -0,0 +1,40 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.Directory;
+
+/**
+ * Specifies an API for classes that can read {@link SegmentInfos} information.
+ * @lucene.experimental
+ */
+public abstract class SegmentInfosReader {
+
+ /**
+ * Read {@link SegmentInfos} data from a directory.
+ * @param directory directory to read from
+ * @param segmentsFileName name of the "segments_N" file
+ * @param codecs current codecs
+ * @param infos empty instance to be populated with data
+ * @throws IOException
+ */
+ public abstract void read(Directory directory, String segmentsFileName, CodecProvider codecs, SegmentInfos infos) throws IOException;
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosWriter.java
new file mode 100644
index 00000000000..19f2e5dc397
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/codecs/SegmentInfosWriter.java
@@ -0,0 +1,63 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * Specifies an API for classes that can write out {@link SegmentInfos} data.
+ * @lucene.experimental
+ */
+public abstract class SegmentInfosWriter {
+
+ /**
+ * Write {@link SegmentInfos} data without closing the output. The returned
+ * output will become finished only after a successful completion of
+ * "two phase commit" that first calls {@link #prepareCommit(IndexOutput)} and
+ * then {@link #finishCommit(IndexOutput)}.
+ * @param dir directory to write data to
+ * @param segmentsFileName name of the "segments_N" file to create
+ * @param infos data to write
+ * @return an instance of {@link IndexOutput} to be used in subsequent "two
+ * phase commit" operations as described above.
+ * @throws IOException
+ */
+ public abstract IndexOutput writeInfos(Directory dir, String segmentsFileName, SegmentInfos infos) throws IOException;
+
+ /**
+ * First phase of the two-phase commit - ensure that all output can be
+ * successfully written out.
+ * @param out an instance of {@link IndexOutput} returned from a previous
+ * call to {@link #writeInfos(Directory, String, SegmentInfos)}.
+ * @throws IOException
+ */
+ public abstract void prepareCommit(IndexOutput out) throws IOException;
+
+ /**
+ * Second phase of the two-phase commit. In this step the output should be
+ * finalized and closed.
+ * @param out an instance of {@link IndexOutput} returned from a previous
+ * call to {@link #writeInfos(Directory, String, SegmentInfos)}.
+ * @throws IOException
+ */
+ public abstract void finishCommit(IndexOutput out) throws IOException;
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java b/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
index 10b24a820c9..86426bb1513 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
@@ -86,6 +86,9 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
private PagedBytes.Reader termBytesReader;
final HashMap fields = new HashMap();
+
+ // start of the field info data
+ protected long dirOffset;
public SimpleStandardTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, Comparator termComp)
throws IOException {
@@ -97,10 +100,8 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
boolean success = false;
try {
- CodecUtil.checkHeader(in, SimpleStandardTermsIndexWriter.CODEC_NAME, SimpleStandardTermsIndexWriter.VERSION_START);
-
- final long dirOffset = in.readLong();
-
+
+ readHeader(in);
indexInterval = in.readInt();
this.indexDivisor = indexDivisor;
@@ -110,10 +111,10 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
// In case terms index gets loaded, later, on demand
totalIndexInterval = indexInterval * indexDivisor;
}
+
+ seekDir(in, dirOffset);
// Read directory
- in.seek(dirOffset);
-
final int numFields = in.readInt();
for(int i=0;i