mirror of https://github.com/apache/lucene.git
LUCENE-3069: move TermDict impls to package 'memory', nuke all 'Temp' symbols
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1520034 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
12f60cb535
commit
aa1e8b37f4
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -70,17 +70,17 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class TempFSTOrdTermsReader extends FieldsProducer {
|
||||
static final int INTERVAL = TempFSTOrdTermsWriter.SKIP_INTERVAL;
|
||||
public class FSTOrdTermsReader extends FieldsProducer {
|
||||
static final int INTERVAL = FSTOrdTermsWriter.SKIP_INTERVAL;
|
||||
final TreeMap<String, TermsReader> fields = new TreeMap<String, TermsReader>();
|
||||
final PostingsReaderBase postingsReader;
|
||||
IndexInput indexIn = null;
|
||||
IndexInput blockIn = null;
|
||||
//static final boolean TEST = false;
|
||||
|
||||
public TempFSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
|
||||
final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTOrdTermsWriter.TERMS_INDEX_EXTENSION);
|
||||
final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);
|
||||
public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
|
||||
final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION);
|
||||
final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);
|
||||
|
||||
this.postingsReader = postingsReader;
|
||||
try {
|
||||
|
@ -113,9 +113,9 @@ public class TempFSTOrdTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
private int readHeader(IndexInput in) throws IOException {
|
||||
return CodecUtil.checkHeader(in, TempFSTOrdTermsWriter.TERMS_CODEC_NAME,
|
||||
TempFSTOrdTermsWriter.TERMS_VERSION_START,
|
||||
TempFSTOrdTermsWriter.TERMS_VERSION_CURRENT);
|
||||
return CodecUtil.checkHeader(in, FSTOrdTermsWriter.TERMS_CODEC_NAME,
|
||||
FSTOrdTermsWriter.TERMS_VERSION_START,
|
||||
FSTOrdTermsWriter.TERMS_VERSION_CURRENT);
|
||||
}
|
||||
private void seekDir(IndexInput in) throws IOException {
|
||||
in.seek(in.length() - 8);
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -27,6 +27,7 @@ import org.apache.lucene.index.FieldInfo;
|
|||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -46,12 +47,104 @@ import org.apache.lucene.codecs.TermStats;
|
|||
import org.apache.lucene.codecs.CodecUtil;
|
||||
|
||||
/**
|
||||
* FST based term dict, the FST maps each term and its ord.
|
||||
* FST-based term dict, using ord as FST output.
|
||||
*
|
||||
* @lucene.experimental
|
||||
* The FST holds the mapping between <term, ord>, and
|
||||
* term's metadata is delta encoded into a single byte block.
|
||||
*
|
||||
* Typically the byte block consists of four parts:
|
||||
* 1. term statistics: docFreq, totalTermFreq;
|
||||
* 2. monotonic long[], e.g. the pointer to the postings list for that term;
|
||||
* 3. generic byte[], e.g. other information customized by postings base.
|
||||
* 4. single-level skip list to speed up metadata decoding by ord.
|
||||
*
|
||||
* <p>
|
||||
* Files:
|
||||
* <ul>
|
||||
* <li><tt>.tix</tt>: <a href="#Termindex">Term Index</a></li>
|
||||
* <li><tt>.tbk</tt>: <a href="#Termblock">Term Block</a></li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*
|
||||
* <a name="Termindex" id="Termindex"></a>
|
||||
* <h3>Term Index</h3>
|
||||
* <p>
|
||||
* The .tix contains a list of FSTs, one for each field.
|
||||
* The FST maps a term to its corresponding order in current field.
|
||||
* </p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup></li>
|
||||
* <li>TermFST --> {@link FST FST<long>}</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>Notes:</p>
|
||||
* <ul>
|
||||
* <li>
|
||||
* Since terms are already sorted before writing to <a href="#Termblock">Term Block</a>,
|
||||
* their ords can directly used to seek term metadata from term block.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* <a name="Termblock" id="Termblock"></a>
|
||||
* <h3>Term Block</h3>
|
||||
* <p>
|
||||
* The .tbk contains all the statistics and metadata for terms, along with field summary (e.g.
|
||||
* per-field data like number of documents in current field). For each field, there are four blocks:
|
||||
* <ul>
|
||||
* <li>statistics bytes block: contains term statistics; </li>
|
||||
* <li>metadata longs block: delta-encodes monotonical part of metadata; </li>
|
||||
* <li>metadata bytes block: encodes other parts of metadata; </li>
|
||||
* <li>skip block: contains skip data, to speed up metadata seeking and decoding</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*
|
||||
* <p>File Format:</p>
|
||||
* <ul>
|
||||
* <li>TermBlock(.tbk) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
|
||||
* <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
|
||||
* DocCount, LongsSize, DataBlock > <sup>NumFields</sup></li>
|
||||
*
|
||||
* <li>DataBlock --> StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
|
||||
* SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
|
||||
* <li>SkipBlock --> < StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta,
|
||||
* MetaLongsSkipDelta<sup>LongsSize</sup> ><sup>NumTerms</sup>
|
||||
* <li>StatsBlock --> < DocFreq[Same?], (TotalTermFreq-DocFreq) ? > <sup>NumTerms</sup>
|
||||
* <li>MetaLongsBlock --> < LongDelta<sup>LongsSize</sup>, BytesSize > <sup>NumTerms</sup>
|
||||
* <li>MetaBytesBlock --> Byte <sup>MetaBytesBlockLength</sup>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
|
||||
* <li>NumFields, FieldNumber, DocCount, DocFreq, LongsSize,
|
||||
* FieldNumber, DocCount --> {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
|
||||
* StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
|
||||
* LongDelta,--> {@link DataOutput#writeVLong VLong}</li>
|
||||
* </ul>
|
||||
* <p>Notes: </p>
|
||||
* <ul>
|
||||
* <li>
|
||||
* The format of PostingsHeader and MetaBytes are customized by the specific postings implementation:
|
||||
* they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
|
||||
* (non-monotonical ones like pulsed postings data).
|
||||
* </li>
|
||||
* <li>
|
||||
* During initialization the reader will load all the blocks into memory. SkipBlock will be decoded, so that during seek
|
||||
* term dict can lookup file pointers directly. StatsFPDelta, MetaLongsSkipFPDelta, etc. are file offset
|
||||
* for every SkipInterval's term. MetaLongsSkipDelta is the difference from previous one, which indicates
|
||||
* the value of preceding metadata longs for every SkipInterval's term.
|
||||
* </li>
|
||||
* <li>
|
||||
* DocFreq is the count of documents which contain the term. TotalTermFreq is the total number of occurrences of the term.
|
||||
* Usually these two values are the same for long tail terms, therefore one bit is stole from DocFreq to check this case,
|
||||
* so that encoding of TotalTermFreq may be omitted.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public class TempFSTOrdTermsWriter extends FieldsConsumer {
|
||||
public class FSTOrdTermsWriter extends FieldsConsumer {
|
||||
static final String TERMS_INDEX_EXTENSION = "tix";
|
||||
static final String TERMS_BLOCK_EXTENSION = "tbk";
|
||||
static final String TERMS_CODEC_NAME = "FST_ORD_TERMS_DICT";
|
||||
|
@ -65,7 +158,7 @@ public class TempFSTOrdTermsWriter extends FieldsConsumer {
|
|||
IndexOutput blockOut = null;
|
||||
IndexOutput indexOut = null;
|
||||
|
||||
public TempFSTOrdTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter) throws IOException {
|
||||
public FSTOrdTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter) throws IOException {
|
||||
final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
|
||||
final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_BLOCK_EXTENSION);
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -29,15 +29,15 @@ import org.apache.lucene.util.LongsRef;
|
|||
|
||||
/**
|
||||
* An FST {@link Outputs} implementation for
|
||||
* {@link TempFSTPostingsFormat}.
|
||||
* {@link FSTTermsWriter}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
// NOTE: outputs should be per-field, since
|
||||
// longsSize is fixed for each field
|
||||
public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
||||
private final static TempTermData NO_OUTPUT = new TempTermData();
|
||||
class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
||||
private final static TermData NO_OUTPUT = new TermData();
|
||||
//private static boolean TEST = false;
|
||||
private final boolean hasPos;
|
||||
private final int longsSize;
|
||||
|
@ -47,18 +47,18 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
|||
* On an FST, only long[] part is 'shared' and pushed towards root.
|
||||
* byte[] and term stats will be kept on deeper arcs.
|
||||
*/
|
||||
public static class TempTermData {
|
||||
static class TermData {
|
||||
long[] longs;
|
||||
byte[] bytes;
|
||||
int docFreq;
|
||||
long totalTermFreq;
|
||||
TempTermData() {
|
||||
TermData() {
|
||||
this.longs = null;
|
||||
this.bytes = null;
|
||||
this.docFreq = 0;
|
||||
this.totalTermFreq = -1;
|
||||
}
|
||||
TempTermData(long[] longs, byte[] bytes, int docFreq, long totalTermFreq) {
|
||||
TermData(long[] longs, byte[] bytes, int docFreq, long totalTermFreq) {
|
||||
this.longs = longs;
|
||||
this.bytes = bytes;
|
||||
this.docFreq = docFreq;
|
||||
|
@ -92,10 +92,10 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
|||
public boolean equals(Object other_) {
|
||||
if (other_ == this) {
|
||||
return true;
|
||||
} else if (!(other_ instanceof TempTermOutputs.TempTermData)) {
|
||||
} else if (!(other_ instanceof FSTTermOutputs.TermData)) {
|
||||
return false;
|
||||
}
|
||||
TempTermData other = (TempTermData) other_;
|
||||
TermData other = (TermData) other_;
|
||||
return statsEqual(this, other) &&
|
||||
longsEqual(this, other) &&
|
||||
bytesEqual(this, other);
|
||||
|
@ -103,7 +103,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
|||
}
|
||||
}
|
||||
|
||||
protected TempTermOutputs(FieldInfo fieldInfo, int longsSize) {
|
||||
protected FSTTermOutputs(FieldInfo fieldInfo, int longsSize) {
|
||||
this.hasPos = (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY);
|
||||
this.longsSize = longsSize;
|
||||
}
|
||||
|
@ -115,7 +115,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
|||
// 1. every value in t1 is not larger than in t2, or
|
||||
// 2. every value in t1 is not smaller than t2.
|
||||
//
|
||||
public TempTermData common(TempTermData t1, TempTermData t2) {
|
||||
public TermData common(TermData t1, TermData t2) {
|
||||
//if (TEST) System.out.print("common("+t1+", "+t2+") = ");
|
||||
if (t1 == NO_OUTPUT || t2 == NO_OUTPUT) {
|
||||
//if (TEST) System.out.println("ret:"+NO_OUTPUT);
|
||||
|
@ -125,7 +125,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
|||
|
||||
long[] min = t1.longs, max = t2.longs;
|
||||
int pos = 0;
|
||||
TempTermData ret;
|
||||
TermData ret;
|
||||
|
||||
while (pos < longsSize && min[pos] == max[pos]) {
|
||||
pos++;
|
||||
|
@ -142,7 +142,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
|||
if (pos < longsSize || allZero(min)) { // not comparable or all-zero
|
||||
ret = NO_OUTPUT;
|
||||
} else {
|
||||
ret = new TempTermData(min, null, 0, -1);
|
||||
ret = new TermData(min, null, 0, -1);
|
||||
}
|
||||
} else { // equal long[]
|
||||
if (statsEqual(t1, t2) && bytesEqual(t1, t2)) {
|
||||
|
@ -150,7 +150,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
|||
} else if (allZero(min)) {
|
||||
ret = NO_OUTPUT;
|
||||
} else {
|
||||
ret = new TempTermData(min, null, 0, -1);
|
||||
ret = new TermData(min, null, 0, -1);
|
||||
}
|
||||
}
|
||||
//if (TEST) System.out.println("ret:"+ret);
|
||||
|
@ -158,7 +158,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TempTermData subtract(TempTermData t1, TempTermData t2) {
|
||||
public TermData subtract(TermData t1, TermData t2) {
|
||||
//if (TEST) System.out.print("subtract("+t1+", "+t2+") = ");
|
||||
if (t2 == NO_OUTPUT) {
|
||||
//if (TEST) System.out.println("ret:"+t1);
|
||||
|
@ -176,21 +176,21 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
|||
pos++;
|
||||
}
|
||||
|
||||
TempTermData ret;
|
||||
TermData ret;
|
||||
if (diff == 0 && statsEqual(t1, t2) && bytesEqual(t1, t2)) {
|
||||
ret = NO_OUTPUT;
|
||||
} else {
|
||||
ret = new TempTermData(share, t1.bytes, t1.docFreq, t1.totalTermFreq);
|
||||
ret = new TermData(share, t1.bytes, t1.docFreq, t1.totalTermFreq);
|
||||
}
|
||||
//if (TEST) System.out.println("ret:"+ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// TODO: if we refactor a 'addSelf(TempMetaDat other)',
|
||||
// TODO: if we refactor a 'addSelf(TermData other)',
|
||||
// we can gain about 5~7% for fuzzy queries, however this also
|
||||
// means we are putting too much stress on FST Outputs decoding?
|
||||
@Override
|
||||
public TempTermData add(TempTermData t1, TempTermData t2) {
|
||||
public TermData add(TermData t1, TermData t2) {
|
||||
//if (TEST) System.out.print("add("+t1+", "+t2+") = ");
|
||||
if (t1 == NO_OUTPUT) {
|
||||
//if (TEST) System.out.println("ret:"+t2);
|
||||
|
@ -209,18 +209,18 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
|||
pos++;
|
||||
}
|
||||
|
||||
TempTermData ret;
|
||||
TermData ret;
|
||||
if (t2.bytes != null || t2.docFreq > 0) {
|
||||
ret = new TempTermData(accum, t2.bytes, t2.docFreq, t2.totalTermFreq);
|
||||
ret = new TermData(accum, t2.bytes, t2.docFreq, t2.totalTermFreq);
|
||||
} else {
|
||||
ret = new TempTermData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq);
|
||||
ret = new TermData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq);
|
||||
}
|
||||
//if (TEST) System.out.println("ret:"+ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(TempTermData data, DataOutput out) throws IOException {
|
||||
public void write(TermData data, DataOutput out) throws IOException {
|
||||
int bit0 = allZero(data.longs) ? 0 : 1;
|
||||
int bit1 = ((data.bytes == null || data.bytes.length == 0) ? 0 : 1) << 1;
|
||||
int bit2 = ((data.docFreq == 0) ? 0 : 1) << 2;
|
||||
|
@ -259,7 +259,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TempTermData read(DataInput in) throws IOException {
|
||||
public TermData read(DataInput in) throws IOException {
|
||||
long[] longs = new long[longsSize];
|
||||
byte[] bytes = null;
|
||||
int docFreq = 0;
|
||||
|
@ -292,29 +292,29 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
|
|||
docFreq = code;
|
||||
}
|
||||
}
|
||||
return new TempTermData(longs, bytes, docFreq, totalTermFreq);
|
||||
return new TermData(longs, bytes, docFreq, totalTermFreq);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TempTermData getNoOutput() {
|
||||
public TermData getNoOutput() {
|
||||
return NO_OUTPUT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String outputToString(TempTermData data) {
|
||||
public String outputToString(TermData data) {
|
||||
return data.toString();
|
||||
}
|
||||
|
||||
static boolean statsEqual(final TempTermData t1, final TempTermData t2) {
|
||||
static boolean statsEqual(final TermData t1, final TermData t2) {
|
||||
return t1.docFreq == t2.docFreq && t1.totalTermFreq == t2.totalTermFreq;
|
||||
}
|
||||
static boolean bytesEqual(final TempTermData t1, final TempTermData t2) {
|
||||
static boolean bytesEqual(final TermData t1, final TermData t2) {
|
||||
if (t1.bytes == null && t2.bytes == null) {
|
||||
return true;
|
||||
}
|
||||
return t1.bytes != null && t2.bytes != null && Arrays.equals(t1.bytes, t2.bytes);
|
||||
}
|
||||
static boolean longsEqual(final TempTermData t1, final TempTermData t2) {
|
||||
static boolean longsEqual(final TermData t1, final TermData t2) {
|
||||
if (t1.longs == null && t2.longs == null) {
|
||||
return true;
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -68,14 +68,14 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public class TempFSTTermsReader extends FieldsProducer {
|
||||
public class FSTTermsReader extends FieldsProducer {
|
||||
final TreeMap<String, TermsReader> fields = new TreeMap<String, TermsReader>();
|
||||
final PostingsReaderBase postingsReader;
|
||||
final IndexInput in;
|
||||
//static boolean TEST = false;
|
||||
|
||||
public TempFSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
|
||||
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTTermsWriter.TERMS_EXTENSION);
|
||||
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
|
||||
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
|
||||
|
||||
this.postingsReader = postingsReader;
|
||||
this.in = state.directory.openInput(termsFileName, state.context);
|
||||
|
@ -109,9 +109,9 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
private int readHeader(IndexInput in) throws IOException {
|
||||
return CodecUtil.checkHeader(in, TempFSTTermsWriter.TERMS_CODEC_NAME,
|
||||
TempFSTTermsWriter.TERMS_VERSION_START,
|
||||
TempFSTTermsWriter.TERMS_VERSION_CURRENT);
|
||||
return CodecUtil.checkHeader(in, FSTTermsWriter.TERMS_CODEC_NAME,
|
||||
FSTTermsWriter.TERMS_VERSION_START,
|
||||
FSTTermsWriter.TERMS_VERSION_CURRENT);
|
||||
}
|
||||
private void seekDir(IndexInput in) throws IOException {
|
||||
in.seek(in.length() - 8);
|
||||
|
@ -167,7 +167,7 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
final long sumDocFreq;
|
||||
final int docCount;
|
||||
final int longsSize;
|
||||
final FST<TempTermOutputs.TempTermData> dict;
|
||||
final FST<FSTTermOutputs.TermData> dict;
|
||||
|
||||
TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
|
||||
this.fieldInfo = fieldInfo;
|
||||
|
@ -176,7 +176,7 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.longsSize = longsSize;
|
||||
this.dict = new FST<TempTermOutputs.TempTermData>(in, new TempTermOutputs(fieldInfo, longsSize));
|
||||
this.dict = new FST<FSTTermOutputs.TermData>(in, new FSTTermOutputs(fieldInfo, longsSize));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -238,7 +238,7 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
final BlockTermState state;
|
||||
|
||||
/* Current term stats + undecoded metadata (long[] & byte[]) */
|
||||
TempTermOutputs.TempTermData meta;
|
||||
FSTTermOutputs.TermData meta;
|
||||
ByteArrayDataInput bytesReader;
|
||||
|
||||
/** Decodes metadata into customized term state */
|
||||
|
@ -306,7 +306,7 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
|
||||
// Iterates through all terms in this field
|
||||
private final class SegmentTermsEnum extends BaseTermsEnum {
|
||||
final BytesRefFSTEnum<TempTermOutputs.TempTermData> fstEnum;
|
||||
final BytesRefFSTEnum<FSTTermOutputs.TermData> fstEnum;
|
||||
|
||||
/* True when current term's metadata is decoded */
|
||||
boolean decoded;
|
||||
|
@ -316,7 +316,7 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
|
||||
SegmentTermsEnum() throws IOException {
|
||||
super();
|
||||
this.fstEnum = new BytesRefFSTEnum<TempTermOutputs.TempTermData>(dict);
|
||||
this.fstEnum = new BytesRefFSTEnum<FSTTermOutputs.TermData>(dict);
|
||||
this.decoded = false;
|
||||
this.seekPending = false;
|
||||
this.meta = null;
|
||||
|
@ -335,7 +335,7 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
// Update current enum according to FSTEnum
|
||||
void updateEnum(final InputOutput<TempTermOutputs.TempTermData> pair) {
|
||||
void updateEnum(final InputOutput<FSTTermOutputs.TermData> pair) {
|
||||
if (pair == null) {
|
||||
term = null;
|
||||
} else {
|
||||
|
@ -405,22 +405,22 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
int metaUpto;
|
||||
|
||||
/* term dict fst */
|
||||
final FST<TempTermOutputs.TempTermData> fst;
|
||||
final FST<FSTTermOutputs.TermData> fst;
|
||||
final FST.BytesReader fstReader;
|
||||
final Outputs<TempTermOutputs.TempTermData> fstOutputs;
|
||||
final Outputs<FSTTermOutputs.TermData> fstOutputs;
|
||||
|
||||
/* query automaton to intersect with */
|
||||
final ByteRunAutomaton fsa;
|
||||
|
||||
private final class Frame {
|
||||
/* fst stats */
|
||||
FST.Arc<TempTermOutputs.TempTermData> fstArc;
|
||||
FST.Arc<FSTTermOutputs.TermData> fstArc;
|
||||
|
||||
/* automaton stats */
|
||||
int fsaState;
|
||||
|
||||
Frame() {
|
||||
this.fstArc = new FST.Arc<TempTermOutputs.TempTermData>();
|
||||
this.fstArc = new FST.Arc<FSTTermOutputs.TermData>();
|
||||
this.fsaState = -1;
|
||||
}
|
||||
|
||||
|
@ -475,7 +475,7 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
|
||||
/** Lazily accumulate meta data, when we got a accepted term */
|
||||
void loadMetaData() throws IOException {
|
||||
FST.Arc<TempTermOutputs.TempTermData> last, next;
|
||||
FST.Arc<FSTTermOutputs.TermData> last, next;
|
||||
last = stack[metaUpto].fstArc;
|
||||
while (metaUpto != level) {
|
||||
metaUpto++;
|
||||
|
@ -626,7 +626,7 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
/** Load frame for target arc(node) on fst, so that
|
||||
* arc.label >= label and !fsa.reject(arc.label) */
|
||||
Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException {
|
||||
FST.Arc<TempTermOutputs.TempTermData> arc = frame.fstArc;
|
||||
FST.Arc<FSTTermOutputs.TermData> arc = frame.fstArc;
|
||||
arc = Util.readCeilArc(label, fst, top.fstArc, arc, fstReader);
|
||||
if (arc == null) {
|
||||
return null;
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -27,6 +27,7 @@ import org.apache.lucene.index.FieldInfo;
|
|||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -44,13 +45,83 @@ import org.apache.lucene.codecs.TermsConsumer;
|
|||
import org.apache.lucene.codecs.TermStats;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
|
||||
/**
|
||||
* FST based term dict, the FST maps each term and its metadata.
|
||||
/**
|
||||
* FST-based term dict, using metadata as FST output.
|
||||
*
|
||||
* The FST directly holds the mapping between <term, metadata>.
|
||||
*
|
||||
* Term metadata consists of three parts:
|
||||
* 1. term statistics: docFreq, totalTermFreq;
|
||||
* 2. monotonic long[], e.g. the pointer to the postings list for that term;
|
||||
* 3. generic byte[], e.g. other information need by postings reader.
|
||||
*
|
||||
* <p>
|
||||
* File:
|
||||
* <ul>
|
||||
* <li><tt>.tst</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
|
||||
* </ul>
|
||||
* <p>
|
||||
*
|
||||
* <a name="Termdictionary" id="Termdictionary"></a>
|
||||
* <h3>Term Dictionary</h3>
|
||||
* <p>
|
||||
* The .tst contains a list of FSTs, one for each field.
|
||||
* The FST maps a term to its corresponding statistics (e.g. docfreq)
|
||||
* and metadata (e.g. information for postings list reader like file pointer
|
||||
* to postings list).
|
||||
* </p>
|
||||
* <p>
|
||||
* Typically the metadata is separated into two parts:
|
||||
* <ul>
|
||||
* <li>
|
||||
* Monotonical long array: Some metadata will always be ascending in order
|
||||
* with the corresponding term. This part is used by FST to share outputs between arcs.
|
||||
* </li>
|
||||
* <li>
|
||||
* Generic byte array: Used to store non-monotonical metadata.
|
||||
* </li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*
|
||||
* File format:
|
||||
* <ul>
|
||||
* <li>TermsDict(.tst) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
|
||||
* <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?,
|
||||
* SumDocFreq, DocCount, LongsSize, TermFST ><sup>NumFields</sup></li>
|
||||
* <li>TermFST --> {@link FST FST<TermData>}</li>
|
||||
* <li>TermData --> Flag, BytesSize?, LongDelta<sup>LongsSize</sup>?, Byte<sup>BytesSize</sup>?,
|
||||
* < DocFreq[Same?], (TotalTermFreq-DocFreq) > ? </li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
|
||||
* <li>DocFreq, LongsSize, BytesSize, NumFields,
|
||||
* FieldNumber, DocCount --> {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>TotalTermFreq, NumTerms, SumTotalTermFreq, SumDocFreq, LongDelta -->
|
||||
* {@link DataOutput#writeVLong VLong}</li>
|
||||
* </ul>
|
||||
* <p>Notes:</p>
|
||||
* <ul>
|
||||
* <li>
|
||||
* The format of PostingsHeader and generic meta bytes are customized by the specific postings implementation:
|
||||
* they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
|
||||
* (non-monotonical ones like pulsed postings data).
|
||||
* </li>
|
||||
* <li>
|
||||
* The format of TermData is determined by FST, typically monotonical metadata will be dense around shallow arcs,
|
||||
* while in deeper arcs only generic bytes and term statistics exist.
|
||||
* </li>
|
||||
* <li>
|
||||
* The byte Flag is used to indicate which part of metadata exists on current arc. Specially the monotonical part
|
||||
* is omitted when it is an array of 0s.
|
||||
* </li>
|
||||
* <li>
|
||||
* Since LongsSize is per-field fixed, it is only written once in field summary.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public class TempFSTTermsWriter extends FieldsConsumer {
|
||||
public class FSTTermsWriter extends FieldsConsumer {
|
||||
static final String TERMS_EXTENSION = "tmp";
|
||||
static final String TERMS_CODEC_NAME = "FST_TERMS_DICT";
|
||||
public static final int TERMS_VERSION_START = 0;
|
||||
|
@ -61,7 +132,7 @@ public class TempFSTTermsWriter extends FieldsConsumer {
|
|||
final IndexOutput out;
|
||||
final List<FieldMetaData> fields = new ArrayList<FieldMetaData>();
|
||||
|
||||
public TempFSTTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter) throws IOException {
|
||||
public FSTTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter) throws IOException {
|
||||
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
|
||||
|
||||
this.postingsWriter = postingsWriter;
|
||||
|
@ -125,9 +196,9 @@ public class TempFSTTermsWriter extends FieldsConsumer {
|
|||
public final long sumDocFreq;
|
||||
public final int docCount;
|
||||
public final int longsSize;
|
||||
public final FST<TempTermOutputs.TempTermData> dict;
|
||||
public final FST<FSTTermOutputs.TermData> dict;
|
||||
|
||||
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<TempTermOutputs.TempTermData> fst) {
|
||||
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<FSTTermOutputs.TermData> fst) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.numTerms = numTerms;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
|
@ -139,8 +210,8 @@ public class TempFSTTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
|
||||
final class TermsWriter extends TermsConsumer {
|
||||
private final Builder<TempTermOutputs.TempTermData> builder;
|
||||
private final TempTermOutputs outputs;
|
||||
private final Builder<FSTTermOutputs.TermData> builder;
|
||||
private final FSTTermOutputs outputs;
|
||||
private final FieldInfo fieldInfo;
|
||||
private final int longsSize;
|
||||
private long numTerms;
|
||||
|
@ -153,8 +224,8 @@ public class TempFSTTermsWriter extends FieldsConsumer {
|
|||
this.numTerms = 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.longsSize = postingsWriter.setField(fieldInfo);
|
||||
this.outputs = new TempTermOutputs(fieldInfo, longsSize);
|
||||
this.builder = new Builder<TempTermOutputs.TempTermData>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
this.outputs = new FSTTermOutputs(fieldInfo, longsSize);
|
||||
this.builder = new Builder<FSTTermOutputs.TermData>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -172,7 +243,7 @@ public class TempFSTTermsWriter extends FieldsConsumer {
|
|||
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
|
||||
// write term meta data into fst
|
||||
final BlockTermState state = postingsWriter.newTermState();
|
||||
final TempTermOutputs.TempTermData meta = new TempTermOutputs.TempTermData();
|
||||
final FSTTermOutputs.TermData meta = new FSTTermOutputs.TermData();
|
||||
meta.longs = new long[longsSize];
|
||||
meta.bytes = null;
|
||||
meta.docFreq = state.docFreq = stats.docFreq;
|
||||
|
@ -193,7 +264,7 @@ public class TempFSTTermsWriter extends FieldsConsumer {
|
|||
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
|
||||
// save FST dict
|
||||
if (numTerms > 0) {
|
||||
final FST<TempTermOutputs.TempTermData> fst = builder.finish();
|
||||
final FST<FSTTermOutputs.TermData> fst = builder.finish();
|
||||
fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, fst));
|
||||
}
|
||||
}
|
|
@ -20,6 +20,6 @@
|
|||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
Postings and DocValues formats that are read entirely into memory.
|
||||
Term dictionary, DocValues or Postings formats that are read entirely into memory.
|
||||
</body>
|
||||
</html>
|
||||
</html>
|
||||
|
|
|
@ -1,180 +0,0 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.codecs.CodecUtil; // javadocs
|
||||
import org.apache.lucene.store.DataOutput; // javadocs
|
||||
import org.apache.lucene.util.fst.FST; // javadocs
|
||||
|
||||
/**
|
||||
* FST-based term dict, using ord as FST output.
|
||||
*
|
||||
* The FST holds the mapping between <term, ord>, and
|
||||
* term's metadata is delta encoded into a single byte block.
|
||||
*
|
||||
* Typically the byte block consists of four parts:
|
||||
* 1. term statistics: docFreq, totalTermFreq;
|
||||
* 2. monotonic long[], e.g. the pointer to the postings list for that term;
|
||||
* 3. generic byte[], e.g. other information customized by postings base.
|
||||
* 4. single-level skip list to speed up metadata decoding by ord.
|
||||
*
|
||||
* <p>
|
||||
* Files:
|
||||
* <ul>
|
||||
* <li><tt>.tix</tt>: <a href="#Termindex">Term Index</a></li>
|
||||
* <li><tt>.tbk</tt>: <a href="#Termblock">Term Block</a></li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*
|
||||
* <a name="Termindex" id="Termindex"></a>
|
||||
* <h3>Term Index</h3>
|
||||
* <p>
|
||||
* The .tix contains a list of FSTs, one for each field.
|
||||
* The FST maps a term to its corresponding order in current field.
|
||||
* </p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup></li>
|
||||
* <li>TermFST --> {@link FST FST<long>}</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>Notes:</p>
|
||||
* <ul>
|
||||
* <li>
|
||||
* Since terms are already sorted before writing to <a href="#Termblock">Term Block</a>,
|
||||
* their ords can directly used to seek term metadata from term block.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* <a name="Termblock" id="Termblock"></a>
|
||||
* <h3>Term Block</h3>
|
||||
* <p>
|
||||
* The .tbk contains all the statistics and metadata for terms, along with field summary (e.g.
|
||||
* per-field data like number of documents in current field). For each field, there are four blocks:
|
||||
* <ul>
|
||||
* <li>statistics bytes block: contains term statistics; </li>
|
||||
* <li>metadata longs block: delta-encodes monotonical part of metadata; </li>
|
||||
* <li>metadata bytes block: encodes other parts of metadata; </li>
|
||||
* <li>skip block: contains skip data, to speed up metadata seeking and decoding</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*
|
||||
* <p>File Format:</p>
|
||||
* <ul>
|
||||
* <li>TermBlock(.tbk) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
|
||||
* <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
|
||||
* DocCount, LongsSize, DataBlock > <sup>NumFields</sup></li>
|
||||
*
|
||||
* <li>DataBlock --> StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
|
||||
* SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
|
||||
* <li>SkipBlock --> < StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta,
|
||||
* MetaLongsSkipDelta<sup>LongsSize</sup> ><sup>NumTerms</sup>
|
||||
* <li>StatsBlock --> < DocFreq[Same?], (TotalTermFreq-DocFreq) ? > <sup>NumTerms</sup>
|
||||
* <li>MetaLongsBlock --> < LongDelta<sup>LongsSize</sup>, BytesSize > <sup>NumTerms</sup>
|
||||
* <li>MetaBytesBlock --> Byte <sup>MetaBytesBlockLength</sup>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
|
||||
* <li>NumFields, FieldNumber, DocCount, DocFreq, LongsSize,
|
||||
* FieldNumber, DocCount --> {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
|
||||
* StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
|
||||
* LongDelta,--> {@link DataOutput#writeVLong VLong}</li>
|
||||
* </ul>
|
||||
* <p>Notes: </p>
|
||||
* <ul>
|
||||
* <li>
|
||||
* The format of PostingsHeader and MetaBytes are customized by the specific postings implementation:
|
||||
* they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
|
||||
* (non-monotonical ones like pulsed postings data).
|
||||
* </li>
|
||||
* <li>
|
||||
* During initialization the reader will load all the blocks into memory. SkipBlock will be decoded, so that during seek
|
||||
* term dict can lookup file pointers directly. StatsFPDelta, MetaLongsSkipFPDelta, etc. are file offset
|
||||
* for every SkipInterval's term. MetaLongsSkipDelta is the difference from previous one, which indicates
|
||||
* the value of preceding metadata longs for every SkipInterval's term.
|
||||
* </li>
|
||||
* <li>
|
||||
* DocFreq is the count of documents which contain the term. TotalTermFreq is the total number of occurrences of the term.
|
||||
* Usually these two values are the same for long tail terms, therefore one bit is stole from DocFreq to check this case,
|
||||
* so that encoding of TotalTermFreq may be omitted.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public final class TempFSTOrdPostingsFormat extends PostingsFormat {
|
||||
public TempFSTOrdPostingsFormat() {
|
||||
super("TempFSTOrd");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsConsumer ret = new TempFSTOrdTermsWriter(state, postingsWriter);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsWriter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.directory,
|
||||
state.fieldInfos,
|
||||
state.segmentInfo,
|
||||
state.context,
|
||||
state.segmentSuffix);
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsProducer ret = new TempFSTOrdTermsReader(state, postingsReader);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsReader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,158 +0,0 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.codecs.CodecUtil; // javadocs
|
||||
import org.apache.lucene.store.DataOutput; // javadocs
|
||||
import org.apache.lucene.util.fst.FST; // javadocs
|
||||
|
||||
/**
|
||||
* FST-based term dict, using metadata as FST output.
|
||||
*
|
||||
* The FST directly holds the mapping between <term, metadata>.
|
||||
*
|
||||
* Term metadata consists of three parts:
|
||||
* 1. term statistics: docFreq, totalTermFreq;
|
||||
* 2. monotonic long[], e.g. the pointer to the postings list for that term;
|
||||
* 3. generic byte[], e.g. other information need by postings reader.
|
||||
*
|
||||
* <p>
|
||||
* File:
|
||||
* <ul>
|
||||
* <li><tt>.tst</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
|
||||
* </ul>
|
||||
* <p>
|
||||
*
|
||||
* <a name="Termdictionary" id="Termdictionary"></a>
|
||||
* <h3>Term Dictionary</h3>
|
||||
* <p>
|
||||
* The .tst contains a list of FSTs, one for each field.
|
||||
* The FST maps a term to its corresponding statistics (e.g. docfreq)
|
||||
* and metadata (e.g. information for postings list reader like file pointer
|
||||
* to postings list).
|
||||
* </p>
|
||||
* <p>
|
||||
* Typically the metadata is separated into two parts:
|
||||
* <ul>
|
||||
* <li>
|
||||
* Monotonical long array: Some metadata will always be ascending in order
|
||||
* with the corresponding term. This part is used by FST to share outputs between arcs.
|
||||
* </li>
|
||||
* <li>
|
||||
* Generic byte array: Used to store non-monotonical metadata.
|
||||
* </li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*
|
||||
* File format:
|
||||
* <ul>
|
||||
* <li>TermsDict(.tst) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
|
||||
* <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?,
|
||||
* SumDocFreq, DocCount, LongsSize, TermFST ><sup>NumFields</sup></li>
|
||||
* <li>TermFST --> {@link FST FST<TermData>}</li>
|
||||
* <li>TermData --> Flag, BytesSize?, LongDelta<sup>LongsSize</sup>?, Byte<sup>BytesSize</sup>?,
|
||||
* < DocFreq[Same?], (TotalTermFreq-DocFreq) > ? </li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
|
||||
* <li>DocFreq, LongsSize, BytesSize, NumFields,
|
||||
* FieldNumber, DocCount --> {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>TotalTermFreq, NumTerms, SumTotalTermFreq, SumDocFreq, LongDelta -->
|
||||
* {@link DataOutput#writeVLong VLong}</li>
|
||||
* </ul>
|
||||
* <p>Notes:</p>
|
||||
* <ul>
|
||||
* <li>
|
||||
* The format of PostingsHeader and generic meta bytes are customized by the specific postings implementation:
|
||||
* they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
|
||||
* (non-monotonical ones like pulsed postings data).
|
||||
* </li>
|
||||
* <li>
|
||||
* The format of TermData is determined by FST, typically monotonical metadata will be dense around shallow arcs,
|
||||
* while in deeper arcs only generic bytes and term statistics exist.
|
||||
* </li>
|
||||
* <li>
|
||||
* The byte Flag is used to indicate which part of metadata exists on current arc. Specially the monotonical part
|
||||
* is omitted when it is an array of 0s.
|
||||
* </li>
|
||||
* <li>
|
||||
* Since LongsSize is per-field fixed, it is only written once in field summary.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public final class TempFSTPostingsFormat extends PostingsFormat {
|
||||
public TempFSTPostingsFormat() {
|
||||
super("TempFST");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsConsumer ret = new TempFSTTermsWriter(state, postingsWriter);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsWriter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.directory,
|
||||
state.fieldInfos,
|
||||
state.segmentInfo,
|
||||
state.context,
|
||||
state.segmentSuffix);
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsProducer ret = new TempFSTTermsReader(state, postingsReader);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsReader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -18,7 +18,3 @@ org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
|
|||
org.apache.lucene.codecs.memory.MemoryPostingsFormat
|
||||
org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
|
||||
org.apache.lucene.codecs.memory.DirectPostingsFormat
|
||||
org.apache.lucene.codecs.temp.TempFSTPulsing41PostingsFormat
|
||||
org.apache.lucene.codecs.temp.TempFSTOrdPulsing41PostingsFormat
|
||||
org.apache.lucene.codecs.temp.TempFSTPostingsFormat
|
||||
org.apache.lucene.codecs.temp.TempFSTOrdPostingsFormat
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* FSTOrd term dict + Lucene41PBF
|
||||
*/
|
||||
|
||||
public final class FSTOrdPostingsFormat extends PostingsFormat {
|
||||
public FSTOrdPostingsFormat() {
|
||||
super("FSTOrd41");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsConsumer ret = new FSTOrdTermsWriter(state, postingsWriter);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsWriter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.directory,
|
||||
state.fieldInfos,
|
||||
state.segmentInfo,
|
||||
state.context,
|
||||
state.segmentSuffix);
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsProducer ret = new FSTOrdTermsReader(state, postingsReader);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsReader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -35,19 +35,19 @@ import org.apache.lucene.index.SegmentReadState;
|
|||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/** TempFSTOrd + Pulsing41
|
||||
/** FSTOrd + Pulsing41
|
||||
* @lucene.experimental */
|
||||
|
||||
public class TempFSTOrdPulsing41PostingsFormat extends PostingsFormat {
|
||||
public class FSTOrdPulsing41PostingsFormat extends PostingsFormat {
|
||||
private final PostingsBaseFormat wrappedPostingsBaseFormat;
|
||||
private final int freqCutoff;
|
||||
|
||||
public TempFSTOrdPulsing41PostingsFormat() {
|
||||
public FSTOrdPulsing41PostingsFormat() {
|
||||
this(1);
|
||||
}
|
||||
|
||||
public TempFSTOrdPulsing41PostingsFormat(int freqCutoff) {
|
||||
super("TempFSTOrdPulsing41");
|
||||
public FSTOrdPulsing41PostingsFormat(int freqCutoff) {
|
||||
super("FSTOrdPulsing41");
|
||||
this.wrappedPostingsBaseFormat = new Lucene41PostingsBaseFormat();
|
||||
this.freqCutoff = freqCutoff;
|
||||
}
|
||||
|
@ -61,7 +61,7 @@ public class TempFSTOrdPulsing41PostingsFormat extends PostingsFormat {
|
|||
try {
|
||||
docsWriter = wrappedPostingsBaseFormat.postingsWriterBase(state);
|
||||
pulsingWriter = new PulsingPostingsWriter(state, freqCutoff, docsWriter);
|
||||
FieldsConsumer ret = new TempFSTOrdTermsWriter(state, pulsingWriter);
|
||||
FieldsConsumer ret = new FSTOrdTermsWriter(state, pulsingWriter);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
|
@ -79,7 +79,7 @@ public class TempFSTOrdPulsing41PostingsFormat extends PostingsFormat {
|
|||
try {
|
||||
docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
|
||||
pulsingReader = new PulsingPostingsReader(state, docsReader);
|
||||
FieldsProducer ret = new TempFSTOrdTermsReader(state, pulsingReader);
|
||||
FieldsProducer ret = new FSTOrdTermsReader(state, pulsingReader);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
|
@ -0,0 +1,83 @@
|
|||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* FST term dict + Lucene41PBF
|
||||
*/
|
||||
|
||||
public final class FSTPostingsFormat extends PostingsFormat {
|
||||
public FSTPostingsFormat() {
|
||||
super("FST41");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsConsumer ret = new FSTTermsWriter(state, postingsWriter);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsWriter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.directory,
|
||||
state.fieldInfos,
|
||||
state.segmentInfo,
|
||||
state.context,
|
||||
state.segmentSuffix);
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsProducer ret = new FSTTermsReader(state, postingsReader);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsReader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -35,20 +35,20 @@ import org.apache.lucene.index.SegmentReadState;
|
|||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/** TempFST + Pulsing41, test only, since
|
||||
/** FST + Pulsing41, test only, since
|
||||
* FST does no delta encoding here!
|
||||
* @lucene.experimental */
|
||||
|
||||
public class TempFSTPulsing41PostingsFormat extends PostingsFormat {
|
||||
public class FSTPulsing41PostingsFormat extends PostingsFormat {
|
||||
private final PostingsBaseFormat wrappedPostingsBaseFormat;
|
||||
private final int freqCutoff;
|
||||
|
||||
public TempFSTPulsing41PostingsFormat() {
|
||||
public FSTPulsing41PostingsFormat() {
|
||||
this(1);
|
||||
}
|
||||
|
||||
public TempFSTPulsing41PostingsFormat(int freqCutoff) {
|
||||
super("TempFSTPulsing41");
|
||||
public FSTPulsing41PostingsFormat(int freqCutoff) {
|
||||
super("FSTPulsing41");
|
||||
this.wrappedPostingsBaseFormat = new Lucene41PostingsBaseFormat();
|
||||
this.freqCutoff = freqCutoff;
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ public class TempFSTPulsing41PostingsFormat extends PostingsFormat {
|
|||
try {
|
||||
docsWriter = wrappedPostingsBaseFormat.postingsWriterBase(state);
|
||||
pulsingWriter = new PulsingPostingsWriter(state, freqCutoff, docsWriter);
|
||||
FieldsConsumer ret = new TempFSTTermsWriter(state, pulsingWriter);
|
||||
FieldsConsumer ret = new FSTTermsWriter(state, pulsingWriter);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
|
@ -80,7 +80,7 @@ public class TempFSTPulsing41PostingsFormat extends PostingsFormat {
|
|||
try {
|
||||
docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
|
||||
pulsingReader = new PulsingPostingsReader(state, docsReader);
|
||||
FieldsProducer ret = new TempFSTTermsReader(state, pulsingReader);
|
||||
FieldsProducer ret = new FSTTermsReader(state, pulsingReader);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
|
@ -20,6 +20,6 @@
|
|||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
FST term dict: FST-based term dictionary implementations.
|
||||
Codec to test FST-based term dictionary with some postings base format.
|
||||
</body>
|
||||
</html>
|
|
@ -50,10 +50,10 @@ import org.apache.lucene.codecs.sep.IntIndexOutput;
|
|||
import org.apache.lucene.codecs.sep.IntStreamFactory;
|
||||
import org.apache.lucene.codecs.sep.SepPostingsReader;
|
||||
import org.apache.lucene.codecs.sep.SepPostingsWriter;
|
||||
import org.apache.lucene.codecs.temp.TempFSTTermsWriter;
|
||||
import org.apache.lucene.codecs.temp.TempFSTTermsReader;
|
||||
import org.apache.lucene.codecs.temp.TempFSTOrdTermsWriter;
|
||||
import org.apache.lucene.codecs.temp.TempFSTOrdTermsReader;
|
||||
import org.apache.lucene.codecs.memory.FSTTermsWriter;
|
||||
import org.apache.lucene.codecs.memory.FSTTermsReader;
|
||||
import org.apache.lucene.codecs.memory.FSTOrdTermsWriter;
|
||||
import org.apache.lucene.codecs.memory.FSTOrdTermsReader;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
|
@ -196,7 +196,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
if (t1 == 0) {
|
||||
boolean success = false;
|
||||
try {
|
||||
fields = new TempFSTTermsWriter(state, postingsWriter);
|
||||
fields = new FSTTermsWriter(state, postingsWriter);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
|
@ -206,7 +206,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
} else if (t1 == 1) {
|
||||
boolean success = false;
|
||||
try {
|
||||
fields = new TempFSTOrdTermsWriter(state, postingsWriter);
|
||||
fields = new FSTOrdTermsWriter(state, postingsWriter);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
|
@ -355,7 +355,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
if (t1 == 0) {
|
||||
boolean success = false;
|
||||
try {
|
||||
fields = new TempFSTTermsReader(state, postingsReader);
|
||||
fields = new FSTTermsReader(state, postingsReader);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
|
@ -365,7 +365,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
} else if (t1 == 1) {
|
||||
boolean success = false;
|
||||
try {
|
||||
fields = new TempFSTOrdTermsReader(state, postingsReader);
|
||||
fields = new FSTOrdTermsReader(state, postingsReader);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
|
|
|
@ -50,10 +50,10 @@ import org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat;
|
|||
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
|
||||
import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
|
||||
import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat;
|
||||
import org.apache.lucene.codecs.temp.TempFSTOrdPostingsFormat;
|
||||
import org.apache.lucene.codecs.temp.TempFSTOrdPulsing41PostingsFormat;
|
||||
import org.apache.lucene.codecs.temp.TempFSTPostingsFormat;
|
||||
import org.apache.lucene.codecs.temp.TempFSTPulsing41PostingsFormat;
|
||||
import org.apache.lucene.codecs.memory.FSTOrdPostingsFormat;
|
||||
import org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat;
|
||||
import org.apache.lucene.codecs.memory.FSTPostingsFormat;
|
||||
import org.apache.lucene.codecs.memory.FSTPulsing41PostingsFormat;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
|
@ -129,10 +129,10 @@ public class RandomCodec extends Lucene45Codec {
|
|||
|
||||
add(avoidCodecs,
|
||||
new Lucene41PostingsFormat(minItemsPerBlock, maxItemsPerBlock),
|
||||
new TempFSTPostingsFormat(),
|
||||
new TempFSTOrdPostingsFormat(),
|
||||
new TempFSTPulsing41PostingsFormat(1 + random.nextInt(20)),
|
||||
new TempFSTOrdPulsing41PostingsFormat(1 + random.nextInt(20)),
|
||||
new FSTPostingsFormat(),
|
||||
new FSTOrdPostingsFormat(),
|
||||
new FSTPulsing41PostingsFormat(1 + random.nextInt(20)),
|
||||
new FSTOrdPulsing41PostingsFormat(1 + random.nextInt(20)),
|
||||
new DirectPostingsFormat(LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : maxItemsPerBlock),
|
||||
LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : lowFreqCutoff)),
|
||||
new Pulsing41PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
|
||||
|
|
|
@ -25,3 +25,7 @@ org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapDocFreqInterval
|
|||
org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings
|
||||
org.apache.lucene.codecs.asserting.AssertingPostingsFormat
|
||||
org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat
|
||||
org.apache.lucene.codecs.memory.FSTPulsing41PostingsFormat
|
||||
org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat
|
||||
org.apache.lucene.codecs.memory.FSTPostingsFormat
|
||||
org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
|
||||
|
|
Loading…
Reference in New Issue