mirror of https://github.com/apache/lucene.git
LUCENE-2373 Create a Codec to work with streaming and append-only filesystems.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@962694 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c5bc95a357
commit
b2eb10239e
|
@ -189,6 +189,10 @@ New features
|
|||
* LUCENE-1810: Added FieldSelectorResult.LATENT to not cache lazy loaded fields
|
||||
(Tim Smith, Grant Ingersoll)
|
||||
|
||||
* LUCENE-2373: Extend CodecProvider to use SegmentInfosWriter and
|
||||
SegmentInfosReader to allow customization of SegmentInfos data.
|
||||
(Andrzej Bialecki)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
|
||||
|
|
|
@ -15,6 +15,11 @@ New Features
|
|||
pages from the buffer cache, since fadvise/madvise do not seem.
|
||||
(Michael McCandless)
|
||||
|
||||
* LUCENE-2373: Added a Codec implementation that works with append-only
|
||||
filesystems (such as e.g. Hadoop DFS). SegmentInfos writing/reading
|
||||
code is refactored to support append-only FS, and to allow for future
|
||||
customization of per-segment information. (Andrzej Bialecki)
|
||||
|
||||
======================= Lucene 3.x (not yet released) =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -0,0 +1,140 @@
|
|||
package org.apache.lucene.index.codecs.appending;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||
import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
|
||||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||
import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
|
||||
import org.apache.lucene.index.codecs.standard.StandardPostingsReaderImpl;
|
||||
import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
|
||||
import org.apache.lucene.index.codecs.standard.StandardPostingsWriterImpl;
|
||||
import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
|
||||
import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* This codec extends {@link StandardCodec} to work on append-only outputs, such
|
||||
* as plain output streams and append-only filesystems.
|
||||
*
|
||||
* <p>Note: compound file format feature is not compatible with
|
||||
* this codec. You must call both
|
||||
* LogMergePolicy.setUseCompoundFile(false) and
|
||||
* LogMergePolicy.setUseCompoundDocStore(false) to disable
|
||||
* compound file format.</p>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class AppendingCodec extends Codec {
|
||||
public static String CODEC_NAME = "Appending";
|
||||
|
||||
public AppendingCodec() {
|
||||
name = CODEC_NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
|
||||
throws IOException {
|
||||
StandardPostingsWriter docsWriter = new StandardPostingsWriterImpl(state);
|
||||
boolean success = false;
|
||||
AppendingTermsIndexWriter indexWriter = null;
|
||||
try {
|
||||
indexWriter = new AppendingTermsIndexWriter(state);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
docsWriter.close();
|
||||
}
|
||||
}
|
||||
success = false;
|
||||
try {
|
||||
FieldsConsumer ret = new AppendingTermsDictWriter(indexWriter, state, docsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
try {
|
||||
docsWriter.close();
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state)
|
||||
throws IOException {
|
||||
StandardPostingsReader docsReader = new StandardPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize);
|
||||
StandardTermsIndexReader indexReader;
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
indexReader = new AppendingTermsIndexReader(state.dir,
|
||||
state.fieldInfos,
|
||||
state.segmentInfo.name,
|
||||
state.termsIndexDivisor,
|
||||
BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
docsReader.close();
|
||||
}
|
||||
}
|
||||
success = false;
|
||||
try {
|
||||
FieldsProducer ret = new AppendingTermsDictReader(indexReader,
|
||||
state.dir, state.fieldInfos, state.segmentInfo.name,
|
||||
docsReader,
|
||||
state.readBufferSize,
|
||||
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
||||
StandardCodec.TERMS_CACHE_SIZE);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
try {
|
||||
docsReader.close();
|
||||
} finally {
|
||||
indexReader.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void files(Directory dir, SegmentInfo segmentInfo, Set<String> files)
|
||||
throws IOException {
|
||||
StandardPostingsReaderImpl.files(dir, segmentInfo, files);
|
||||
StandardTermsDictReader.files(dir, segmentInfo, files);
|
||||
SimpleStandardTermsIndexReader.files(dir, segmentInfo, files);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getExtensions(Set<String> extensions) {
|
||||
StandardCodec.getStandardExtensions(extensions);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
package org.apache.lucene.index.codecs.appending;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.codecs.DefaultSegmentInfosReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
public class AppendingSegmentInfosReader extends DefaultSegmentInfosReader {
|
||||
|
||||
@Override
|
||||
public void finalizeInput(IndexInput input) throws IOException,
|
||||
CorruptIndexException {
|
||||
input.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexInput openInput(Directory dir, String segmentsFileName)
|
||||
throws IOException {
|
||||
return dir.openInput(segmentsFileName);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
package org.apache.lucene.index.codecs.appending;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
public class AppendingSegmentInfosWriter extends DefaultSegmentInfosWriter {
|
||||
|
||||
@Override
|
||||
protected IndexOutput createOutput(Directory dir, String segmentsFileName)
|
||||
throws IOException {
|
||||
return dir.createOutput(segmentsFileName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishCommit(IndexOutput out) throws IOException {
|
||||
out.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepareCommit(IndexOutput segmentOutput) throws IOException {
|
||||
// noop
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
package org.apache.lucene.index.codecs.appending;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
|
||||
import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
|
||||
import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
|
||||
import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
|
||||
public class AppendingTermsDictReader extends StandardTermsDictReader {
|
||||
|
||||
public AppendingTermsDictReader(StandardTermsIndexReader indexReader,
|
||||
Directory dir, FieldInfos fieldInfos, String segment,
|
||||
StandardPostingsReader postingsReader, int readBufferSize,
|
||||
Comparator<BytesRef> termComp, int termsCacheSize) throws IOException {
|
||||
super(indexReader, dir, fieldInfos, segment, postingsReader, readBufferSize,
|
||||
termComp, termsCacheSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void readHeader(IndexInput in) throws IOException {
|
||||
CodecUtil.checkHeader(in, AppendingTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void seekDir(IndexInput in, long dirOffset) throws IOException {
|
||||
in.seek(in.length() - Long.SIZE / 8);
|
||||
long offset = in.readLong();
|
||||
in.seek(offset);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package org.apache.lucene.index.codecs.appending;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
|
||||
import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
|
||||
import org.apache.lucene.index.codecs.standard.StandardTermsIndexWriter;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
|
||||
public class AppendingTermsDictWriter extends StandardTermsDictWriter {
|
||||
final static String CODEC_NAME = "APPENDING_TERMS_DICT";
|
||||
|
||||
public AppendingTermsDictWriter(StandardTermsIndexWriter indexWriter,
|
||||
SegmentWriteState state, StandardPostingsWriter postingsWriter,
|
||||
Comparator<BytesRef> termComp) throws IOException {
|
||||
super(indexWriter, state, postingsWriter, termComp);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeHeader(IndexOutput out) throws IOException {
|
||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeTrailer(long dirStart) throws IOException {
|
||||
out.writeLong(dirStart);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package org.apache.lucene.index.codecs.appending;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
|
||||
public class AppendingTermsIndexReader extends SimpleStandardTermsIndexReader {
|
||||
|
||||
public AppendingTermsIndexReader(Directory dir, FieldInfos fieldInfos,
|
||||
String segment, int indexDivisor, Comparator<BytesRef> termComp)
|
||||
throws IOException {
|
||||
super(dir, fieldInfos, segment, indexDivisor, termComp);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void readHeader(IndexInput input) throws IOException {
|
||||
CodecUtil.checkHeader(input, AppendingTermsIndexWriter.CODEC_NAME, AppendingTermsIndexWriter.VERSION_START);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void seekDir(IndexInput input, long dirOffset) throws IOException {
|
||||
input.seek(input.length() - Long.SIZE / 8);
|
||||
long offset = input.readLong();
|
||||
input.seek(offset);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package org.apache.lucene.index.codecs.appending;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexWriter;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
|
||||
public class AppendingTermsIndexWriter extends SimpleStandardTermsIndexWriter {
|
||||
final static String CODEC_NAME = "APPENDING_TERMS_INDEX";
|
||||
final static int VERSION_START = 0;
|
||||
final static int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
public AppendingTermsIndexWriter(SegmentWriteState state) throws IOException {
|
||||
super(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeHeader(IndexOutput out) throws IOException {
|
||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeTrailer(long dirStart) throws IOException {
|
||||
out.writeLong(dirStart);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,170 @@
|
|||
package org.apache.lucene.index.codecs.appending;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LogMergePolicy;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.SegmentInfosReader;
|
||||
import org.apache.lucene.index.codecs.SegmentInfosWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestAppendingCodec extends LuceneTestCase {
|
||||
|
||||
static class AppendingCodecProvider extends CodecProvider {
|
||||
Codec appending = new AppendingCodec();
|
||||
SegmentInfosWriter infosWriter = new AppendingSegmentInfosWriter();
|
||||
SegmentInfosReader infosReader = new AppendingSegmentInfosReader();
|
||||
|
||||
@Override
|
||||
public Codec lookup(String name) {
|
||||
return appending;
|
||||
}
|
||||
@Override
|
||||
public Codec getWriter(SegmentWriteState state) {
|
||||
return appending;
|
||||
}
|
||||
@Override
|
||||
public SegmentInfosReader getSegmentInfosReader() {
|
||||
return infosReader;
|
||||
}
|
||||
@Override
|
||||
public SegmentInfosWriter getSegmentInfosWriter() {
|
||||
return infosWriter;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class AppendingIndexOutputWrapper extends IndexOutput {
|
||||
IndexOutput wrapped;
|
||||
|
||||
public AppendingIndexOutputWrapper(IndexOutput wrapped) {
|
||||
this.wrapped = wrapped;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
wrapped.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
wrapped.flush();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getFilePointer() {
|
||||
return wrapped.getFilePointer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long length() throws IOException {
|
||||
return wrapped.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(long pos) throws IOException {
|
||||
throw new UnsupportedOperationException("seek() is unsupported");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeByte(byte b) throws IOException {
|
||||
wrapped.writeByte(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeBytes(byte[] b, int offset, int length) throws IOException {
|
||||
wrapped.writeBytes(b, offset, length);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings("serial")
|
||||
private static class AppendingRAMDirectory extends RAMDirectory {
|
||||
|
||||
@Override
|
||||
public IndexOutput createOutput(String name) throws IOException {
|
||||
return new AppendingIndexOutputWrapper(super.createOutput(name));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static final String text = "the quick brown fox jumped over the lazy dog";
|
||||
|
||||
public void testCodec() throws Exception {
|
||||
Directory dir = new AppendingRAMDirectory();
|
||||
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer());
|
||||
|
||||
cfg.setCodecProvider(new AppendingCodecProvider());
|
||||
((LogMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false);
|
||||
((LogMergePolicy)cfg.getMergePolicy()).setUseCompoundDocStore(false);
|
||||
IndexWriter writer = new IndexWriter(dir, cfg);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("f", text, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
writer.addDocument(doc);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
IndexReader reader = IndexReader.open(dir, null, true, 1, new AppendingCodecProvider());
|
||||
assertEquals(2, reader.numDocs());
|
||||
doc = reader.document(0);
|
||||
assertEquals(text, doc.get("f"));
|
||||
Fields fields = MultiFields.getFields(reader);
|
||||
Terms terms = fields.terms("f");
|
||||
assertNotNull(terms);
|
||||
TermsEnum te = terms.iterator();
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("quick")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("brown")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("fox")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("jumped")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("over")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("lazy")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("dog")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("the")));
|
||||
DocsEnum de = te.docs(null, null);
|
||||
assertTrue(de.advance(0) != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(2, de.freq());
|
||||
assertTrue(de.advance(1) != DocsEnum.NO_MORE_DOCS);
|
||||
assertTrue(de.advance(2) == DocsEnum.NO_MORE_DOCS);
|
||||
reader.close();
|
||||
}
|
||||
}
|
|
@ -133,12 +133,14 @@ public final class SegmentInfo {
|
|||
/**
|
||||
* Construct a new SegmentInfo instance by reading a
|
||||
* previously saved SegmentInfo from input.
|
||||
* <p>Note: this is public only to allow access from
|
||||
* the codecs package.</p>
|
||||
*
|
||||
* @param dir directory to load from
|
||||
* @param format format of the segments info file
|
||||
* @param input input handle to read segment info from
|
||||
*/
|
||||
SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
|
||||
public SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
|
||||
this.dir = dir;
|
||||
name = input.readString();
|
||||
docCount = input.readInt();
|
||||
|
@ -373,7 +375,7 @@ public final class SegmentInfo {
|
|||
}
|
||||
|
||||
/** Save this segment's info. */
|
||||
void write(IndexOutput output)
|
||||
public void write(IndexOutput output)
|
||||
throws IOException {
|
||||
assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name;
|
||||
output.writeString(name);
|
||||
|
|
|
@ -20,10 +20,10 @@ package org.apache.lucene.index;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.ChecksumIndexOutput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.NoSuchDirectoryException;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.SegmentInfosReader;
|
||||
import org.apache.lucene.index.codecs.SegmentInfosWriter;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
|
@ -65,7 +65,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
public static final int FORMAT_4_0 = -10;
|
||||
|
||||
/* This must always point to the most recent file format. */
|
||||
static final int CURRENT_FORMAT = FORMAT_4_0;
|
||||
public static final int CURRENT_FORMAT = FORMAT_4_0;
|
||||
|
||||
public int counter = 0; // used to name new segments
|
||||
|
||||
|
@ -73,14 +73,16 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
* counts how often the index has been changed by adding or deleting docs.
|
||||
* starting with the current time in milliseconds forces to create unique version numbers.
|
||||
*/
|
||||
private long version = System.currentTimeMillis();
|
||||
public long version = System.currentTimeMillis();
|
||||
|
||||
private long generation = 0; // generation of the "segments_N" for the next commit
|
||||
private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
|
||||
// or wrote; this is normally the same as generation except if
|
||||
// there was an IOException that had interrupted a commit
|
||||
|
||||
private Map<String,String> userData = Collections.<String,String>emptyMap(); // Opaque Map<String, String> that user can specify during IndexWriter.commit
|
||||
public Map<String,String> userData = Collections.<String,String>emptyMap(); // Opaque Map<String, String> that user can specify during IndexWriter.commit
|
||||
|
||||
private CodecProvider codecs;
|
||||
|
||||
/**
|
||||
* If non-null, information about loading segments_N files
|
||||
|
@ -88,6 +90,14 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
*/
|
||||
private static PrintStream infoStream;
|
||||
|
||||
public SegmentInfos() {
|
||||
this(CodecProvider.getDefault());
|
||||
}
|
||||
|
||||
public SegmentInfos(CodecProvider codecs) {
|
||||
this.codecs = codecs;
|
||||
}
|
||||
|
||||
public final SegmentInfo info(int i) {
|
||||
return get(i);
|
||||
}
|
||||
|
@ -205,42 +215,22 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
*/
|
||||
public final void read(Directory directory, String segmentFileName,
|
||||
CodecProvider codecs) throws CorruptIndexException, IOException {
|
||||
this.codecs = codecs;
|
||||
boolean success = false;
|
||||
|
||||
// Clear any previous segments:
|
||||
clear();
|
||||
|
||||
ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName));
|
||||
|
||||
generation = generationFromSegmentsFileName(segmentFileName);
|
||||
|
||||
lastGeneration = generation;
|
||||
|
||||
try {
|
||||
int format = input.readInt();
|
||||
|
||||
// check that it is a format we can understand
|
||||
if (format < CURRENT_FORMAT)
|
||||
throw new CorruptIndexException("Unknown (newer than us?) format version: " + format);
|
||||
|
||||
version = input.readLong(); // read version
|
||||
counter = input.readInt(); // read counter
|
||||
|
||||
for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
|
||||
add(new SegmentInfo(directory, format, input, codecs));
|
||||
}
|
||||
|
||||
userData = input.readStringStringMap();
|
||||
|
||||
final long checksumNow = input.getChecksum();
|
||||
final long checksumThen = input.readLong();
|
||||
if (checksumNow != checksumThen)
|
||||
throw new CorruptIndexException("checksum mismatch in segments file");
|
||||
|
||||
SegmentInfosReader infosReader = codecs.getSegmentInfosReader();
|
||||
infosReader.read(directory, segmentFileName, codecs, this);
|
||||
success = true;
|
||||
}
|
||||
finally {
|
||||
input.close();
|
||||
if (!success) {
|
||||
// Clear any segment infos we had loaded so we
|
||||
// have a clean slate on retry:
|
||||
|
@ -261,6 +251,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
|
||||
public final void read(Directory directory, final CodecProvider codecs) throws CorruptIndexException, IOException {
|
||||
generation = lastGeneration = -1;
|
||||
this.codecs = codecs;
|
||||
|
||||
new FindSegmentsFile(directory) {
|
||||
|
||||
|
@ -274,7 +265,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
|
||||
// Only non-null after prepareCommit has been called and
|
||||
// before finishCommit is called
|
||||
ChecksumIndexOutput pendingSegnOutput;
|
||||
IndexOutput pendingSegnOutput;
|
||||
|
||||
private void write(Directory directory) throws IOException {
|
||||
|
||||
|
@ -287,21 +278,14 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
generation++;
|
||||
}
|
||||
|
||||
ChecksumIndexOutput segnOutput = new ChecksumIndexOutput(directory.createOutput(segmentFileName));
|
||||
IndexOutput segnOutput = null;
|
||||
|
||||
boolean success = false;
|
||||
|
||||
try {
|
||||
segnOutput.writeInt(CURRENT_FORMAT); // write FORMAT
|
||||
segnOutput.writeLong(++version); // every write changes
|
||||
// the index
|
||||
segnOutput.writeInt(counter); // write counter
|
||||
segnOutput.writeInt(size()); // write infos
|
||||
for (SegmentInfo si : this) {
|
||||
si.write(segnOutput);
|
||||
}
|
||||
segnOutput.writeStringStringMap(userData);
|
||||
segnOutput.prepareCommit();
|
||||
SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
|
||||
segnOutput = infosWriter.writeInfos(directory, segmentFileName, this);
|
||||
infosWriter.prepareCommit(segnOutput);
|
||||
success = true;
|
||||
pendingSegnOutput = segnOutput;
|
||||
} finally {
|
||||
|
@ -785,8 +769,8 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
throw new IllegalStateException("prepareCommit was not called");
|
||||
boolean success = false;
|
||||
try {
|
||||
pendingSegnOutput.finishCommit();
|
||||
pendingSegnOutput.close();
|
||||
SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
|
||||
infosWriter.finishCommit(pendingSegnOutput);
|
||||
pendingSegnOutput = null;
|
||||
success = true;
|
||||
} finally {
|
||||
|
|
|
@ -38,6 +38,8 @@ import org.apache.lucene.index.codecs.standard.StandardCodec;
|
|||
* @lucene.experimental */
|
||||
|
||||
public abstract class CodecProvider {
|
||||
private SegmentInfosWriter infosWriter = new DefaultSegmentInfosWriter();
|
||||
private SegmentInfosReader infosReader = new DefaultSegmentInfosReader();
|
||||
|
||||
private final HashMap<String, Codec> codecs = new HashMap<String, Codec>();
|
||||
|
||||
|
@ -73,6 +75,14 @@ public abstract class CodecProvider {
|
|||
|
||||
public abstract Codec getWriter(SegmentWriteState state);
|
||||
|
||||
public SegmentInfosWriter getSegmentInfosWriter() {
|
||||
return infosWriter;
|
||||
}
|
||||
|
||||
public SegmentInfosReader getSegmentInfosReader() {
|
||||
return infosReader;
|
||||
}
|
||||
|
||||
static private final CodecProvider defaultCodecs = new DefaultCodecProvider();
|
||||
|
||||
public static CodecProvider getDefault() {
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
package org.apache.lucene.index.codecs;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
* Default implementation of {@link SegmentInfosReader}.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DefaultSegmentInfosReader extends SegmentInfosReader {
|
||||
|
||||
@Override
|
||||
public void read(Directory directory, String segmentsFileName, CodecProvider codecs,
|
||||
SegmentInfos infos) throws IOException {
|
||||
IndexInput input = null;
|
||||
try {
|
||||
input = openInput(directory, segmentsFileName);
|
||||
int format = input.readInt();
|
||||
|
||||
// check that it is a format we can understand
|
||||
if (format < SegmentInfos.CURRENT_FORMAT)
|
||||
throw new CorruptIndexException("Unknown (newer than us?) format version: " + format);
|
||||
|
||||
infos.version = input.readLong(); // read version
|
||||
infos.counter = input.readInt(); // read counter
|
||||
|
||||
for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
|
||||
infos.add(new SegmentInfo(directory, format, input, codecs));
|
||||
}
|
||||
|
||||
infos.userData = input.readStringStringMap();
|
||||
finalizeInput(input);
|
||||
|
||||
} finally {
|
||||
if (input != null) {
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public IndexInput openInput(Directory dir, String segmentsFileName) throws IOException {
|
||||
IndexInput in = dir.openInput(segmentsFileName);
|
||||
return new ChecksumIndexInput(in);
|
||||
|
||||
}
|
||||
|
||||
public void finalizeInput(IndexInput input) throws IOException, CorruptIndexException {
|
||||
ChecksumIndexInput cksumInput = (ChecksumIndexInput)input;
|
||||
final long checksumNow = cksumInput.getChecksum();
|
||||
final long checksumThen = cksumInput.readLong();
|
||||
if (checksumNow != checksumThen)
|
||||
throw new CorruptIndexException("checksum mismatch in segments file");
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
package org.apache.lucene.index.codecs;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.store.ChecksumIndexOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
/**
|
||||
* Default implementation of {@link SegmentInfosWriter}.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DefaultSegmentInfosWriter extends SegmentInfosWriter {
|
||||
|
||||
@Override
|
||||
public IndexOutput writeInfos(Directory dir, String segmentFileName, SegmentInfos infos)
|
||||
throws IOException {
|
||||
IndexOutput out = createOutput(dir, segmentFileName);
|
||||
out.writeInt(SegmentInfos.CURRENT_FORMAT); // write FORMAT
|
||||
out.writeLong(++infos.version); // every write changes
|
||||
// the index
|
||||
out.writeInt(infos.counter); // write counter
|
||||
out.writeInt(infos.size()); // write infos
|
||||
for (SegmentInfo si : infos) {
|
||||
si.write(out);
|
||||
}
|
||||
out.writeStringStringMap(infos.getUserData());
|
||||
return out;
|
||||
}
|
||||
|
||||
protected IndexOutput createOutput(Directory dir, String segmentFileName)
|
||||
throws IOException {
|
||||
IndexOutput plainOut = dir.createOutput(segmentFileName);
|
||||
ChecksumIndexOutput out = new ChecksumIndexOutput(plainOut);
|
||||
return out;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepareCommit(IndexOutput segmentOutput) throws IOException {
|
||||
((ChecksumIndexOutput)segmentOutput).prepareCommit();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishCommit(IndexOutput out) throws IOException {
|
||||
((ChecksumIndexOutput)out).finishCommit();
|
||||
out.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
package org.apache.lucene.index.codecs;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
/**
|
||||
* Specifies an API for classes that can read {@link SegmentInfos} information.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class SegmentInfosReader {
|
||||
|
||||
/**
|
||||
* Read {@link SegmentInfos} data from a directory.
|
||||
* @param directory directory to read from
|
||||
* @param segmentsFileName name of the "segments_N" file
|
||||
* @param codecs current codecs
|
||||
* @param infos empty instance to be populated with data
|
||||
* @throws IOException
|
||||
*/
|
||||
public abstract void read(Directory directory, String segmentsFileName, CodecProvider codecs, SegmentInfos infos) throws IOException;
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
package org.apache.lucene.index.codecs;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
/**
|
||||
* Specifies an API for classes that can write out {@link SegmentInfos} data.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class SegmentInfosWriter {
|
||||
|
||||
/**
|
||||
* Write {@link SegmentInfos} data without closing the output. The returned
|
||||
* output will become finished only after a successful completion of
|
||||
* "two phase commit" that first calls {@link #prepareCommit(IndexOutput)} and
|
||||
* then {@link #finishCommit(IndexOutput)}.
|
||||
* @param dir directory to write data to
|
||||
* @param segmentsFileName name of the "segments_N" file to create
|
||||
* @param infos data to write
|
||||
* @return an instance of {@link IndexOutput} to be used in subsequent "two
|
||||
* phase commit" operations as described above.
|
||||
* @throws IOException
|
||||
*/
|
||||
public abstract IndexOutput writeInfos(Directory dir, String segmentsFileName, SegmentInfos infos) throws IOException;
|
||||
|
||||
/**
|
||||
* First phase of the two-phase commit - ensure that all output can be
|
||||
* successfully written out.
|
||||
* @param out an instance of {@link IndexOutput} returned from a previous
|
||||
* call to {@link #writeInfos(Directory, String, SegmentInfos)}.
|
||||
* @throws IOException
|
||||
*/
|
||||
public abstract void prepareCommit(IndexOutput out) throws IOException;
|
||||
|
||||
/**
|
||||
* Second phase of the two-phase commit. In this step the output should be
|
||||
* finalized and closed.
|
||||
* @param out an instance of {@link IndexOutput} returned from a previous
|
||||
* call to {@link #writeInfos(Directory, String, SegmentInfos)}.
|
||||
* @throws IOException
|
||||
*/
|
||||
public abstract void finishCommit(IndexOutput out) throws IOException;
|
||||
}
|
|
@ -87,6 +87,9 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
|
|||
|
||||
final HashMap<FieldInfo,FieldIndexReader> fields = new HashMap<FieldInfo,FieldIndexReader>();
|
||||
|
||||
// start of the field info data
|
||||
protected long dirOffset;
|
||||
|
||||
public SimpleStandardTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, Comparator<BytesRef> termComp)
|
||||
throws IOException {
|
||||
|
||||
|
@ -97,10 +100,8 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
|
|||
boolean success = false;
|
||||
|
||||
try {
|
||||
CodecUtil.checkHeader(in, SimpleStandardTermsIndexWriter.CODEC_NAME, SimpleStandardTermsIndexWriter.VERSION_START);
|
||||
|
||||
final long dirOffset = in.readLong();
|
||||
|
||||
readHeader(in);
|
||||
indexInterval = in.readInt();
|
||||
this.indexDivisor = indexDivisor;
|
||||
|
||||
|
@ -111,9 +112,9 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
|
|||
totalIndexInterval = indexInterval * indexDivisor;
|
||||
}
|
||||
|
||||
// Read directory
|
||||
in.seek(dirOffset);
|
||||
seekDir(in, dirOffset);
|
||||
|
||||
// Read directory
|
||||
final int numFields = in.readInt();
|
||||
|
||||
for(int i=0;i<numFields;i++) {
|
||||
|
@ -144,6 +145,11 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
|
|||
}
|
||||
}
|
||||
|
||||
protected void readHeader(IndexInput input) throws IOException {
|
||||
CodecUtil.checkHeader(input, SimpleStandardTermsIndexWriter.CODEC_NAME, SimpleStandardTermsIndexWriter.VERSION_START);
|
||||
dirOffset = input.readLong();
|
||||
}
|
||||
|
||||
private final class FieldIndexReader extends FieldReader {
|
||||
|
||||
final private FieldInfo fieldInfo;
|
||||
|
@ -445,4 +451,8 @@ public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
|
|||
termBytesReader.close();
|
||||
}
|
||||
}
|
||||
|
||||
protected void seekDir(IndexInput input, long dirOffset) throws IOException {
|
||||
input.seek(dirOffset);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ import java.io.IOException;
|
|||
|
||||
/** @lucene.experimental */
|
||||
public class SimpleStandardTermsIndexWriter extends StandardTermsIndexWriter {
|
||||
final private IndexOutput out;
|
||||
protected final IndexOutput out;
|
||||
|
||||
final static String CODEC_NAME = "SIMPLE_STANDARD_TERMS_INDEX";
|
||||
final static int VERSION_START = 0;
|
||||
|
@ -50,12 +50,15 @@ public class SimpleStandardTermsIndexWriter extends StandardTermsIndexWriter {
|
|||
state.flushedFiles.add(indexFileName);
|
||||
termIndexInterval = state.termIndexInterval;
|
||||
out = state.directory.createOutput(indexFileName);
|
||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||
fieldInfos = state.fieldInfos;
|
||||
writeHeader(out);
|
||||
out.writeInt(termIndexInterval);
|
||||
}
|
||||
|
||||
protected void writeHeader(IndexOutput out) throws IOException {
|
||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||
// Placeholder for dir offset
|
||||
out.writeLong(0);
|
||||
out.writeInt(termIndexInterval);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -179,8 +182,12 @@ public class SimpleStandardTermsIndexWriter extends StandardTermsIndexWriter {
|
|||
out.writeLong(field.packedIndexStart);
|
||||
out.writeLong(field.packedOffsetsStart);
|
||||
}
|
||||
out.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
out.writeLong(dirStart);
|
||||
writeTrailer(dirStart);
|
||||
out.close();
|
||||
}
|
||||
|
||||
protected void writeTrailer(long dirStart) throws IOException {
|
||||
out.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
out.writeLong(dirStart);
|
||||
}
|
||||
}
|
|
@ -72,6 +72,9 @@ public class StandardTermsDictReader extends FieldsProducer {
|
|||
// Reads the terms index
|
||||
private StandardTermsIndexReader indexReader;
|
||||
|
||||
// keeps the dirStart offset
|
||||
protected long dirOffset;
|
||||
|
||||
// Used as key for the terms cache
|
||||
private static class FieldAndTerm extends DoubleBarrelLRUCache.CloneableKey {
|
||||
String field;
|
||||
|
@ -116,15 +119,13 @@ public class StandardTermsDictReader extends FieldsProducer {
|
|||
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.checkHeader(in, StandardTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT);
|
||||
|
||||
final long dirOffset = in.readLong();
|
||||
readHeader(in);
|
||||
|
||||
// Have PostingsReader init itself
|
||||
postingsReader.init(in);
|
||||
|
||||
// Read per-field details
|
||||
in.seek(dirOffset);
|
||||
seekDir(in, dirOffset);
|
||||
|
||||
final int numFields = in.readInt();
|
||||
|
||||
|
@ -151,6 +152,16 @@ public class StandardTermsDictReader extends FieldsProducer {
|
|||
this.indexReader = indexReader;
|
||||
}
|
||||
|
||||
protected void readHeader(IndexInput input) throws IOException {
|
||||
CodecUtil.checkHeader(in, StandardTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT);
|
||||
dirOffset = in.readLong();
|
||||
}
|
||||
|
||||
protected void seekDir(IndexInput input, long dirOffset)
|
||||
throws IOException {
|
||||
input.seek(dirOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void loadTermsIndex(int indexDivisor) throws IOException {
|
||||
indexReader.loadTermsIndex(indexDivisor);
|
||||
|
|
|
@ -55,7 +55,7 @@ public class StandardTermsDictWriter extends FieldsConsumer {
|
|||
|
||||
private final DeltaBytesWriter termWriter;
|
||||
|
||||
final IndexOutput out;
|
||||
protected final IndexOutput out;
|
||||
final StandardPostingsWriter postingsWriter;
|
||||
final FieldInfos fieldInfos;
|
||||
FieldInfo currentField;
|
||||
|
@ -77,12 +77,7 @@ public class StandardTermsDictWriter extends FieldsConsumer {
|
|||
state.flushedFiles.add(termsFileName);
|
||||
|
||||
fieldInfos = state.fieldInfos;
|
||||
|
||||
// Count indexed fields up front
|
||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||
|
||||
out.writeLong(0); // leave space for end index pointer
|
||||
|
||||
writeHeader(out);
|
||||
termWriter = new DeltaBytesWriter(out);
|
||||
currentField = null;
|
||||
this.postingsWriter = postingsWriter;
|
||||
|
@ -90,6 +85,13 @@ public class StandardTermsDictWriter extends FieldsConsumer {
|
|||
postingsWriter.start(out); // have consumer write its format/header
|
||||
}
|
||||
|
||||
protected void writeHeader(IndexOutput out) throws IOException {
|
||||
// Count indexed fields up front
|
||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||
|
||||
out.writeLong(0); // leave space for end index pointer
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsConsumer addField(FieldInfo field) {
|
||||
assert currentField == null || currentField.name.compareTo(field.name) < 0;
|
||||
|
@ -115,8 +117,7 @@ public class StandardTermsDictWriter extends FieldsConsumer {
|
|||
out.writeLong(field.numTerms);
|
||||
out.writeLong(field.termsStartPointer);
|
||||
}
|
||||
out.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
out.writeLong(dirStart);
|
||||
writeTrailer(dirStart);
|
||||
} finally {
|
||||
try {
|
||||
out.close();
|
||||
|
@ -130,6 +131,12 @@ public class StandardTermsDictWriter extends FieldsConsumer {
|
|||
}
|
||||
}
|
||||
|
||||
protected void writeTrailer(long dirStart) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
out.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
out.writeLong(dirStart);
|
||||
}
|
||||
|
||||
class TermsWriter extends TermsConsumer {
|
||||
private final FieldInfo fieldInfo;
|
||||
private final StandardPostingsWriter postingsWriter;
|
||||
|
|
Loading…
Reference in New Issue