LUCENE-5969: clean up constants

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1633385 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-10-21 14:08:04 +00:00
parent 784b2bd0cb
commit 03a6d8ce3c
3 changed files with 93 additions and 97 deletions

View File

@ -20,6 +20,7 @@ package org.apache.lucene.codecs.lucene50;
import java.io.IOException;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
@ -33,6 +34,7 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts;
@ -373,6 +375,21 @@ public final class Lucene50PostingsFormat extends PostingsFormat {
* See chapter: <a href="#Payloads">Payloads and Offsets</a>
*/
public static final String PAY_EXTENSION = "pay";
/**
* Expert: The maximum number of skip levels. Smaller values result in
* slightly smaller indexes, but slower skipping in big posting lists.
*/
static final int MAX_SKIP_LEVELS = 10;
final static String TERMS_CODEC = "Lucene50PostingsWriterTerms";
final static String DOC_CODEC = "Lucene50PostingsWriterDoc";
final static String POS_CODEC = "Lucene50PostingsWriterPos";
final static String PAY_CODEC = "Lucene50PostingsWriterPay";
// Increment version to change it
final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START;
private final int minTermBlockSize;
private final int maxTermBlockSize;
@ -440,4 +457,39 @@ public final class Lucene50PostingsFormat extends PostingsFormat {
}
}
}
final static class IntBlockTermState extends BlockTermState {
long docStartFP = 0;
long posStartFP = 0;
long payStartFP = 0;
long skipOffset = -1;
long lastPosBlockOffset = -1;
// docid when there is a single pulsed posting, otherwise -1
// freq is always implicitly totalTermFreq in this case.
int singletonDocID = -1;
@Override
public IntBlockTermState clone() {
IntBlockTermState other = new IntBlockTermState();
other.copyFrom(this);
return other;
}
@Override
public void copyFrom(TermState _other) {
super.copyFrom(_other);
IntBlockTermState other = (IntBlockTermState) _other;
docStartFP = other.docStartFP;
posStartFP = other.posStartFP;
payStartFP = other.payStartFP;
lastPosBlockOffset = other.lastPosBlockOffset;
skipOffset = other.skipOffset;
singletonDocID = other.singletonDocID;
}
@Override
public String toString() {
return super.toString() + " docStartFP=" + docStartFP + " posStartFP=" + posStartFP + " payStartFP=" + payStartFP + " lastPosBlockOffset=" + lastPosBlockOffset + " singletonDocID=" + singletonDocID;
}
}
}

View File

@ -17,10 +17,16 @@ package org.apache.lucene.codecs.lucene50;
* limitations under the License.
*/
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene50.ForUtil.MAX_DATA_SIZE;
import static org.apache.lucene.codecs.lucene50.ForUtil.MAX_ENCODED_SIZE;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter.IntBlockTermState;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.DOC_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.PAY_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.POS_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.VERSION_START;
import java.io.IOException;
import java.util.Arrays;
@ -29,6 +35,7 @@ import java.util.Collections;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
@ -48,7 +55,6 @@ import org.apache.lucene.util.RamUsageEstimator;
* Concrete class that reads docId(maybe frq,pos,offset,payloads) list
* with postings format.
*
* @see Lucene50SkipReader for details
* @lucene.experimental
*/
public final class Lucene50PostingsReader extends PostingsReaderBase {
@ -68,42 +74,29 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
IndexInput docIn = null;
IndexInput posIn = null;
IndexInput payIn = null;
// NOTE: these data files are too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
String docName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene50PostingsFormat.DOC_EXTENSION);
try {
docIn = state.directory.openInput(docName, state.context);
version = CodecUtil.checkSegmentHeader(docIn,
Lucene50PostingsWriter.DOC_CODEC,
Lucene50PostingsWriter.VERSION_START,
Lucene50PostingsWriter.VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
version = CodecUtil.checkSegmentHeader(docIn, DOC_CODEC, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
forUtil = new ForUtil(docIn);
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
CodecUtil.retrieveChecksum(docIn);
if (state.fieldInfos.hasProx()) {
String proxName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene50PostingsFormat.POS_EXTENSION);
posIn = state.directory.openInput(proxName, state.context);
CodecUtil.checkSegmentHeader(posIn, Lucene50PostingsWriter.POS_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
CodecUtil.checkSegmentHeader(posIn, POS_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
CodecUtil.retrieveChecksum(posIn);
if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
String payName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene50PostingsFormat.PAY_EXTENSION);
payIn = state.directory.openInput(payName, state.context);
CodecUtil.checkSegmentHeader(payIn, Lucene50PostingsWriter.PAY_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
CodecUtil.checkSegmentHeader(payIn, PAY_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
CodecUtil.retrieveChecksum(payIn);
}
}
@ -122,12 +115,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
@Override
public void init(IndexInput termsIn, SegmentReadState state) throws IOException {
// Make sure we are talking to the matching postings writer
CodecUtil.checkSegmentHeader(termsIn,
Lucene50PostingsWriter.TERMS_CODEC,
Lucene50PostingsWriter.VERSION_START,
Lucene50PostingsWriter.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
CodecUtil.checkSegmentHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
final int indexBlockSize = termsIn.readVInt();
if (indexBlockSize != BLOCK_SIZE) {
throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
@ -414,11 +402,11 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
skipper = new Lucene50SkipReader(docIn.clone(),
Lucene50PostingsWriter.MAX_SKIP_LEVELS,
BLOCK_SIZE,
indexHasPos,
indexHasOffsets,
indexHasPayloads);
MAX_SKIP_LEVELS,
BLOCK_SIZE,
indexHasPos,
indexHasOffsets,
indexHasPayloads);
}
if (!skipped) {
@ -692,11 +680,11 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
skipper = new Lucene50SkipReader(docIn.clone(),
Lucene50PostingsWriter.MAX_SKIP_LEVELS,
BLOCK_SIZE,
true,
indexHasOffsets,
indexHasPayloads);
MAX_SKIP_LEVELS,
BLOCK_SIZE,
true,
indexHasOffsets,
indexHasPayloads);
}
if (!skipped) {
@ -1118,7 +1106,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
skipper = new Lucene50SkipReader(docIn.clone(),
Lucene50PostingsWriter.MAX_SKIP_LEVELS,
MAX_SKIP_LEVELS,
BLOCK_SIZE,
true,
indexHasOffsets,

View File

@ -17,11 +17,22 @@ package org.apache.lucene.codecs.lucene50;
* limitations under the License.
*/
import static org.apache.lucene.codecs.lucene50.ForUtil.MAX_DATA_SIZE;
import static org.apache.lucene.codecs.lucene50.ForUtil.MAX_ENCODED_SIZE;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.DOC_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.PAY_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.POS_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.VERSION_CURRENT;
import java.io.IOException;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PushPostingsWriterBase;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
@ -34,11 +45,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts;
import static org.apache.lucene.codecs.lucene50.ForUtil.MAX_DATA_SIZE;
import static org.apache.lucene.codecs.lucene50.ForUtil.MAX_ENCODED_SIZE;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZE;
/**
* Concrete class that writes docId(maybe frq,pos,offset,payloads) list
* with postings format.
@ -50,21 +56,6 @@ import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZ
*/
public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
/**
* Expert: The maximum number of skip levels. Smaller values result in
* slightly smaller indexes, but slower skipping in big posting lists.
*/
static final int MAX_SKIP_LEVELS = 10;
final static String TERMS_CODEC = "Lucene50PostingsWriterTerms";
final static String DOC_CODEC = "Lucene50PostingsWriterDoc";
final static String POS_CODEC = "Lucene50PostingsWriterPos";
final static String PAY_CODEC = "Lucene50PostingsWriterPay";
// Increment version to change it
final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START;
IndexOutput docOut;
IndexOutput posOut;
IndexOutput payOut;
@ -178,41 +169,6 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
encoded = new byte[MAX_ENCODED_SIZE];
}
final static class IntBlockTermState extends BlockTermState {
long docStartFP = 0;
long posStartFP = 0;
long payStartFP = 0;
long skipOffset = -1;
long lastPosBlockOffset = -1;
// docid when there is a single pulsed posting, otherwise -1
// freq is always implicitly totalTermFreq in this case.
int singletonDocID = -1;
@Override
public IntBlockTermState clone() {
IntBlockTermState other = new IntBlockTermState();
other.copyFrom(this);
return other;
}
@Override
public void copyFrom(TermState _other) {
super.copyFrom(_other);
IntBlockTermState other = (IntBlockTermState) _other;
docStartFP = other.docStartFP;
posStartFP = other.posStartFP;
payStartFP = other.payStartFP;
lastPosBlockOffset = other.lastPosBlockOffset;
skipOffset = other.skipOffset;
singletonDocID = other.singletonDocID;
}
@Override
public String toString() {
return super.toString() + " docStartFP=" + docStartFP + " posStartFP=" + posStartFP + " payStartFP=" + payStartFP + " lastPosBlockOffset=" + lastPosBlockOffset + " singletonDocID=" + singletonDocID;
}
}
@Override
public IntBlockTermState newTermState() {
return new IntBlockTermState();