mirror of https://github.com/apache/lucene.git
LUCENE-5675: initial scaffolding for new IDVPF
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1594971 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ad7e4bd450
commit
a301ab5610
|
@ -0,0 +1,96 @@
|
|||
package org.apache.lucene.codecs.idversion;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/** A PostingsFormat for primary-key (ID) fields, that associates a
|
||||
* long version with each ID and enables fast (using only the terms index)
|
||||
* lookup for whether a given ID may have a version > N.
|
||||
*
|
||||
* The field is indexed as DOCS_ONLY, but the user must feed in the
|
||||
* version as a payload on the first token.
|
||||
*
|
||||
* The docID and version for each ID is inlined into the terms dict.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
public abstract class IDVersionPostingsFormat extends PostingsFormat {
|
||||
|
||||
private final int minTermsInBlock;
|
||||
private final int maxTermsInBlock;
|
||||
|
||||
public IDVersionPostingsFormat() {
|
||||
this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
public IDVersionPostingsFormat(int minTermsInBlock, int maxTermsInBlock) {
|
||||
super("IDVersion");
|
||||
this.minTermsInBlock = minTermsInBlock;
|
||||
this.maxTermsInBlock = maxTermsInBlock;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
PostingsWriterBase postingsWriter = new IDVersionPostingsWriter();
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsConsumer ret = new BlockTreeTermsWriter(state,
|
||||
postingsWriter,
|
||||
minTermsInBlock,
|
||||
maxTermsInBlock);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsWriter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
PostingsReaderBase postingsReader = new IDVersionPostingsReader();
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsProducer ret = new BlockTreeTermsReader(state.directory,
|
||||
state.fieldInfos,
|
||||
state.segmentInfo,
|
||||
postingsReader,
|
||||
state.context,
|
||||
state.segmentSuffix);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsReader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
package org.apache.lucene.codecs.idversion;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.codecs.idversion.IDVersionPostingsWriter.IDVersionTermState;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
public final class IDVersionPostingsReader extends PostingsReaderBase {
|
||||
|
||||
@Override
|
||||
public void init(IndexInput termsIn) throws IOException {
|
||||
// Make sure we are talking to the matching postings writer
|
||||
CodecUtil.checkHeader(termsIn,
|
||||
IDVersionPostingsWriter.TERMS_CODEC,
|
||||
IDVersionPostingsWriter.VERSION_START,
|
||||
IDVersionPostingsWriter.VERSION_CURRENT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BlockTermState newTermState() {
|
||||
return new IDVersionTermState();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
|
||||
throws IOException {
|
||||
final IDVersionTermState termState = (IDVersionTermState) _termState;
|
||||
termState.idVersion = Long.MAX_VALUE - longs[0];
|
||||
termState.docID = in.readVInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
// nocommit todo -- need a SingleDocDocsEnum
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs,
|
||||
DocsAndPositionsEnum reuse, int flags) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
}
|
||||
}
|
|
@ -0,0 +1,154 @@
|
|||
package org.apache.lucene.codecs.idversion;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.PushPostingsWriterBase;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
||||
|
||||
final static String TERMS_CODEC = "IDVersionPostingsWriterTerms";
|
||||
|
||||
// Increment version to change it
|
||||
final static int VERSION_START = 0;
|
||||
final static int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
final static IDVersionTermState emptyState = new IDVersionTermState();
|
||||
IDVersionTermState lastState;
|
||||
|
||||
private int lastDocID;
|
||||
private int lastPosition;
|
||||
private long lastVersion;
|
||||
|
||||
final static class IDVersionTermState extends BlockTermState {
|
||||
long idVersion;
|
||||
int docID;
|
||||
|
||||
@Override
|
||||
public IDVersionTermState clone() {
|
||||
IDVersionTermState other = new IDVersionTermState();
|
||||
other.copyFrom(this);
|
||||
return other;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyFrom(TermState _other) {
|
||||
super.copyFrom(_other);
|
||||
IDVersionTermState other = (IDVersionTermState) _other;
|
||||
idVersion = other.idVersion;
|
||||
docID = other.docID;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IDVersionTermState newTermState() {
|
||||
return new IDVersionTermState();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(IndexOutput termsOut) throws IOException {
|
||||
CodecUtil.writeHeader(termsOut, TERMS_CODEC, VERSION_CURRENT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int setField(FieldInfo fieldInfo) {
|
||||
super.setField(fieldInfo);
|
||||
if (fieldInfo.getIndexOptions() != FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
throw new IllegalArgumentException("field must be index using IndexOptions.DOCS_AND_FREQS_AND_POSITIONS");
|
||||
}
|
||||
lastState = emptyState;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startTerm() {
|
||||
lastDocID = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startDoc(int docID, int termDocFreq) throws IOException {
|
||||
if (lastDocID != -1) {
|
||||
// nocommit need test
|
||||
throw new IllegalArgumentException("term appears in more than one document");
|
||||
}
|
||||
if (termDocFreq != 1) {
|
||||
// nocommit need test
|
||||
throw new IllegalArgumentException("term appears more than once in the document");
|
||||
}
|
||||
|
||||
lastDocID = docID;
|
||||
lastPosition = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
|
||||
if (lastPosition != -1) {
|
||||
// nocommit need test
|
||||
throw new IllegalArgumentException("term appears more than once in document");
|
||||
}
|
||||
lastPosition = position;
|
||||
if (payload == null) {
|
||||
// nocommit need test
|
||||
throw new IllegalArgumentException("missing payload");
|
||||
}
|
||||
if (payload.length == 0) {
|
||||
// nocommit need test
|
||||
throw new IllegalArgumentException("payload.length == 0");
|
||||
}
|
||||
// nocommit decode payload to long here ... PayloadHelper!? or keep as byte[]?
|
||||
lastVersion = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishDoc() throws IOException {
|
||||
if (lastPosition == -1) {
|
||||
// nocommit need test
|
||||
throw new IllegalArgumentException("missing addPosition");
|
||||
}
|
||||
}
|
||||
|
||||
/** Called when we are done adding docs to this term */
|
||||
@Override
|
||||
public void finishTerm(BlockTermState _state) throws IOException {
|
||||
IDVersionTermState state = (IDVersionTermState) _state;
|
||||
assert state.docFreq > 0;
|
||||
|
||||
assert lastDocID != -1;
|
||||
state.docID = lastDocID;
|
||||
state.idVersion = lastVersion;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
|
||||
IDVersionTermState state = (IDVersionTermState) _state;
|
||||
// nocommit must send version up to FST somehow ...
|
||||
out.writeVInt(state.docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue