LUCENE-3069: javadocs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1519909 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Han Jiang 2013-09-04 03:05:23 +00:00
parent ec07c84339
commit 054a95e182
10 changed files with 204 additions and 61 deletions

View File

@ -31,6 +31,9 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.codecs.CodecUtil; // javadocs
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.util.fst.FST; // javadocs
/** /**
* FST-based term dict, using ord as FST output. * FST-based term dict, using ord as FST output.
@ -44,7 +47,89 @@ import org.apache.lucene.util.IOUtils;
* 3. generic byte[], e.g. other information customized by postings base. * 3. generic byte[], e.g. other information customized by postings base.
* 4. single-level skip list to speed up metadata decoding by ord. * 4. single-level skip list to speed up metadata decoding by ord.
* *
* <!-- TODO: explain about the data format --> * <p>
* Files:
* <ul>
* <li><tt>.tix</tt>: <a href="#Termindex">Term Index</a></li>
* <li><tt>.tbk</tt>: <a href="#Termblock">Term Block</a></li>
* </ul>
* </p>
*
* <a name="Termindex" id="Termindex"></a>
* <h3>Term Index</h3>
* <p>
* The .tix contains a list of FSTs, one for each field.
* The FST maps a term to its corresponding order in current field.
* </p>
*
* <ul>
* <li>TermIndex(.tix) --&gt; Header, TermFST<sup>NumFields</sup></li>
* <li>TermFST --&gt; {@link FST FST&lt;long&gt;}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* </ul>
*
* <p>Notes:</p>
* <ul>
* <li>
* Since terms are already sorted before writing to <a href="#Termblock">Term Block</a>,
* their ords can directly used to seek term metadata from term block.
* </li>
* </ul>
*
* <a name="Termblock" id="Termblock"></a>
* <h3>Term Block</h3>
* <p>
* The .tbk contains all the statistics and metadata for terms, along with field summary (e.g.
* per-field data like number of documents in current field). For each field, there are four blocks:
* <ul>
* <li>statistics bytes block: contains term statistics; </li>
* <li>metadata longs block: delta-encodes monotonical part of metadata; </li>
* <li>metadata bytes block: encodes other parts of metadata; </li>
* <li>skip block: contains skip data, to speed up metadata seeking and decoding</li>
* </ul>
* </p>
*
* <p>File Format:</p>
* <ul>
* <li>TermBlock(.tbk) --&gt; Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
* <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
* DocCount, LongsSize, DataBlock &gt; <sup>NumFields</sup></li>
*
* <li>DataBlock --&gt; StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
* SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
* <li>SkipBlock --&gt; &lt; StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta,
* MetaLongsSkipDelta<sup>LongsSize</sup> &gt;<sup>NumTerms</sup>
* <li>StatsBlock --&gt; &lt; DocFreq[Same?], (TotalTermFreq-DocFreq) ? &gt; <sup>NumTerms</sup>
* <li>MetaLongsBlock --&gt; &lt; LongDelta<sup>LongsSize</sup>, BytesSize &gt; <sup>NumTerms</sup>
* <li>MetaBytesBlock --&gt; Byte <sup>MetaBytesBlockLength</sup>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
* <li>NumFields, FieldNumber, DocCount, DocFreq, LongsSize,
* FieldNumber, DocCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
* StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
* LongDelta,--&gt; {@link DataOutput#writeVLong VLong}</li>
* </ul>
* <p>Notes: </p>
* <ul>
* <li>
* The format of PostingsHeader and MetaBytes are customized by the specific postings implementation:
* they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
* (non-monotonical ones like pulsed postings data).
* </li>
* <li>
* During initialization the reader will load all the blocks into memory. SkipBlock will be decoded, so that during seek
* term dict can lookup file pointers directly. StatsFPDelta, MetaLongsSkipFPDelta, etc. are file offset
* for every SkipInterval's term. MetaLongsSkipDelta is the difference from previous one, which indicates
* the value of preceding metadata longs for every SkipInterval's term.
* </li>
* <li>
* DocFreq is the count of documents which contain the term. TotalTermFreq is the total number of occurrences of the term.
* Usually these two values are the same for long tail terms, therefore one bit is stole from DocFreq to check this case,
* so that encoding of TotalTermFreq may be omitted.
* </li>
* </ul>
*
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -89,7 +89,6 @@ public class TempFSTOrdTermsReader extends FieldsProducer {
readHeader(indexIn); readHeader(indexIn);
readHeader(blockIn); readHeader(blockIn);
this.postingsReader.init(blockIn); this.postingsReader.init(blockIn);
seekDir(indexIn);
seekDir(blockIn); seekDir(blockIn);
final FieldInfos fieldInfos = state.fieldInfos; final FieldInfos fieldInfos = state.fieldInfos;

View File

@ -120,7 +120,6 @@ public class TempFSTOrdTermsWriter extends FieldsConsumer {
field.metaBytesOut.writeTo(blockOut); field.metaBytesOut.writeTo(blockOut);
field.dict.save(indexOut); field.dict.save(indexOut);
} }
writeTrailer(indexOut, indexDirStart);
writeTrailer(blockOut, blockDirStart); writeTrailer(blockOut, blockDirStart);
} catch (IOException ioe2) { } catch (IOException ioe2) {
ioe = ioe2; ioe = ioe2;

View File

@ -31,6 +31,9 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.codecs.CodecUtil; // javadocs
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.util.fst.FST; // javadocs
/** /**
* FST-based term dict, using metadata as FST output. * FST-based term dict, using metadata as FST output.
@ -42,8 +45,69 @@ import org.apache.lucene.util.IOUtils;
* 2. monotonic long[], e.g. the pointer to the postings list for that term; * 2. monotonic long[], e.g. the pointer to the postings list for that term;
* 3. generic byte[], e.g. other information need by postings reader. * 3. generic byte[], e.g. other information need by postings reader.
* *
* <p>
* File:
* <ul>
* <li><tt>.tst</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
* </ul>
* <p>
*
* <a name="Termdictionary" id="Termdictionary"></a>
* <h3>Term Dictionary</h3>
* <p>
* The .tst contains a list of FSTs, one for each field.
* The FST maps a term to its corresponding statistics (e.g. docfreq)
* and metadata (e.g. information for postings list reader like file pointer
* to postings list).
* </p>
* <p>
* Typically the metadata is separated into two parts:
* <ul>
* <li>
* Monotonical long array: Some metadata will always be ascending in order
* with the corresponding term. This part is used by FST to share outputs between arcs.
* </li>
* <li>
* Generic byte array: Used to store non-monotonical metadata.
* </li>
* </ul>
* </p>
*
* File format:
* <ul>
* <li>TermsDict(.tst) --&gt; Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
* <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, SumTotalTermFreq?,
* SumDocFreq, DocCount, LongsSize, TermFST &gt;<sup>NumFields</sup></li>
* <li>TermFST --&gt; {@link FST FST&lt;TermData&gt;}</li>
* <li>TermData --&gt; Flag, BytesSize?, LongDelta<sup>LongsSize</sup>?, Byte<sup>BytesSize</sup>?,
* &lt; DocFreq[Same?], (TotalTermFreq-DocFreq) &gt; ? </li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
* <li>DocFreq, LongsSize, BytesSize, NumFields,
* FieldNumber, DocCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>TotalTermFreq, NumTerms, SumTotalTermFreq, SumDocFreq, LongDelta --&gt;
* {@link DataOutput#writeVLong VLong}</li>
* </ul>
* <p>Notes:</p>
* <ul>
* <li>
* The format of PostingsHeader and generic meta bytes are customized by the specific postings implementation:
* they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
* (non-monotonical ones like pulsed postings data).
* </li>
* <li>
* The format of TermData is determined by FST, typically monotonical metadata will be dense around shallow arcs,
* while in deeper arcs only generic bytes and term statistics exist.
* </li>
* <li>
* The byte Flag is used to indicate which part of metadata exists on current arc. Specially the monotonical part
* is omitted when it is an array of 0s.
* </li>
* <li>
* Since LongsSize is per-field fixed, it is only written once in field summary.
* </li>
* </ul>
* *
* <!-- TODO: explain about the data format -->
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -167,7 +167,7 @@ public class TempFSTTermsReader extends FieldsProducer {
final long sumDocFreq; final long sumDocFreq;
final int docCount; final int docCount;
final int longsSize; final int longsSize;
final FST<TempTermOutputs.TempMetaData> dict; final FST<TempTermOutputs.TempTermData> dict;
TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException { TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
@ -176,7 +176,7 @@ public class TempFSTTermsReader extends FieldsProducer {
this.sumDocFreq = sumDocFreq; this.sumDocFreq = sumDocFreq;
this.docCount = docCount; this.docCount = docCount;
this.longsSize = longsSize; this.longsSize = longsSize;
this.dict = new FST<TempTermOutputs.TempMetaData>(in, new TempTermOutputs(fieldInfo, longsSize)); this.dict = new FST<TempTermOutputs.TempTermData>(in, new TempTermOutputs(fieldInfo, longsSize));
} }
@Override @Override
@ -238,7 +238,7 @@ public class TempFSTTermsReader extends FieldsProducer {
final BlockTermState state; final BlockTermState state;
/* Current term stats + undecoded metadata (long[] & byte[]) */ /* Current term stats + undecoded metadata (long[] & byte[]) */
TempTermOutputs.TempMetaData meta; TempTermOutputs.TempTermData meta;
ByteArrayDataInput bytesReader; ByteArrayDataInput bytesReader;
/** Decodes metadata into customized term state */ /** Decodes metadata into customized term state */
@ -306,7 +306,7 @@ public class TempFSTTermsReader extends FieldsProducer {
// Iterates through all terms in this field // Iterates through all terms in this field
private final class SegmentTermsEnum extends BaseTermsEnum { private final class SegmentTermsEnum extends BaseTermsEnum {
final BytesRefFSTEnum<TempTermOutputs.TempMetaData> fstEnum; final BytesRefFSTEnum<TempTermOutputs.TempTermData> fstEnum;
/* True when current term's metadata is decoded */ /* True when current term's metadata is decoded */
boolean decoded; boolean decoded;
@ -316,7 +316,7 @@ public class TempFSTTermsReader extends FieldsProducer {
SegmentTermsEnum() throws IOException { SegmentTermsEnum() throws IOException {
super(); super();
this.fstEnum = new BytesRefFSTEnum<TempTermOutputs.TempMetaData>(dict); this.fstEnum = new BytesRefFSTEnum<TempTermOutputs.TempTermData>(dict);
this.decoded = false; this.decoded = false;
this.seekPending = false; this.seekPending = false;
this.meta = null; this.meta = null;
@ -335,7 +335,7 @@ public class TempFSTTermsReader extends FieldsProducer {
} }
// Update current enum according to FSTEnum // Update current enum according to FSTEnum
void updateEnum(final InputOutput<TempTermOutputs.TempMetaData> pair) { void updateEnum(final InputOutput<TempTermOutputs.TempTermData> pair) {
if (pair == null) { if (pair == null) {
term = null; term = null;
} else { } else {
@ -405,22 +405,22 @@ public class TempFSTTermsReader extends FieldsProducer {
int metaUpto; int metaUpto;
/* term dict fst */ /* term dict fst */
final FST<TempTermOutputs.TempMetaData> fst; final FST<TempTermOutputs.TempTermData> fst;
final FST.BytesReader fstReader; final FST.BytesReader fstReader;
final Outputs<TempTermOutputs.TempMetaData> fstOutputs; final Outputs<TempTermOutputs.TempTermData> fstOutputs;
/* query automaton to intersect with */ /* query automaton to intersect with */
final ByteRunAutomaton fsa; final ByteRunAutomaton fsa;
private final class Frame { private final class Frame {
/* fst stats */ /* fst stats */
FST.Arc<TempTermOutputs.TempMetaData> fstArc; FST.Arc<TempTermOutputs.TempTermData> fstArc;
/* automaton stats */ /* automaton stats */
int fsaState; int fsaState;
Frame() { Frame() {
this.fstArc = new FST.Arc<TempTermOutputs.TempMetaData>(); this.fstArc = new FST.Arc<TempTermOutputs.TempTermData>();
this.fsaState = -1; this.fsaState = -1;
} }
@ -475,7 +475,7 @@ public class TempFSTTermsReader extends FieldsProducer {
/** Lazily accumulate meta data, when we got a accepted term */ /** Lazily accumulate meta data, when we got a accepted term */
void loadMetaData() throws IOException { void loadMetaData() throws IOException {
FST.Arc<TempTermOutputs.TempMetaData> last, next; FST.Arc<TempTermOutputs.TempTermData> last, next;
last = stack[metaUpto].fstArc; last = stack[metaUpto].fstArc;
while (metaUpto != level) { while (metaUpto != level) {
metaUpto++; metaUpto++;
@ -626,7 +626,7 @@ public class TempFSTTermsReader extends FieldsProducer {
/** Load frame for target arc(node) on fst, so that /** Load frame for target arc(node) on fst, so that
* arc.label >= label and !fsa.reject(arc.label) */ * arc.label >= label and !fsa.reject(arc.label) */
Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException { Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException {
FST.Arc<TempTermOutputs.TempMetaData> arc = frame.fstArc; FST.Arc<TempTermOutputs.TempTermData> arc = frame.fstArc;
arc = Util.readCeilArc(label, fst, top.fstArc, arc, fstReader); arc = Util.readCeilArc(label, fst, top.fstArc, arc, fstReader);
if (arc == null) { if (arc == null) {
return null; return null;

View File

@ -125,9 +125,9 @@ public class TempFSTTermsWriter extends FieldsConsumer {
public final long sumDocFreq; public final long sumDocFreq;
public final int docCount; public final int docCount;
public final int longsSize; public final int longsSize;
public final FST<TempTermOutputs.TempMetaData> dict; public final FST<TempTermOutputs.TempTermData> dict;
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<TempTermOutputs.TempMetaData> fst) { public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<TempTermOutputs.TempTermData> fst) {
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
this.numTerms = numTerms; this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq; this.sumTotalTermFreq = sumTotalTermFreq;
@ -139,7 +139,7 @@ public class TempFSTTermsWriter extends FieldsConsumer {
} }
final class TermsWriter extends TermsConsumer { final class TermsWriter extends TermsConsumer {
private final Builder<TempTermOutputs.TempMetaData> builder; private final Builder<TempTermOutputs.TempTermData> builder;
private final TempTermOutputs outputs; private final TempTermOutputs outputs;
private final FieldInfo fieldInfo; private final FieldInfo fieldInfo;
private final int longsSize; private final int longsSize;
@ -154,7 +154,7 @@ public class TempFSTTermsWriter extends FieldsConsumer {
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
this.longsSize = postingsWriter.setField(fieldInfo); this.longsSize = postingsWriter.setField(fieldInfo);
this.outputs = new TempTermOutputs(fieldInfo, longsSize); this.outputs = new TempTermOutputs(fieldInfo, longsSize);
this.builder = new Builder<TempTermOutputs.TempMetaData>(FST.INPUT_TYPE.BYTE1, outputs); this.builder = new Builder<TempTermOutputs.TempTermData>(FST.INPUT_TYPE.BYTE1, outputs);
} }
@Override @Override
@ -172,7 +172,7 @@ public class TempFSTTermsWriter extends FieldsConsumer {
public void finishTerm(BytesRef text, TermStats stats) throws IOException { public void finishTerm(BytesRef text, TermStats stats) throws IOException {
// write term meta data into fst // write term meta data into fst
final BlockTermState state = postingsWriter.newTermState(); final BlockTermState state = postingsWriter.newTermState();
final TempTermOutputs.TempMetaData meta = new TempTermOutputs.TempMetaData(); final TempTermOutputs.TempTermData meta = new TempTermOutputs.TempTermData();
meta.longs = new long[longsSize]; meta.longs = new long[longsSize];
meta.bytes = null; meta.bytes = null;
meta.docFreq = state.docFreq = stats.docFreq; meta.docFreq = state.docFreq = stats.docFreq;
@ -193,7 +193,7 @@ public class TempFSTTermsWriter extends FieldsConsumer {
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException { public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
// save FST dict // save FST dict
if (numTerms > 0) { if (numTerms > 0) {
final FST<TempTermOutputs.TempMetaData> fst = builder.finish(); final FST<TempTermOutputs.TempTermData> fst = builder.finish();
fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, fst)); fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, fst));
} }
} }

View File

@ -36,8 +36,8 @@ import org.apache.lucene.util.LongsRef;
// NOTE: outputs should be per-field, since // NOTE: outputs should be per-field, since
// longsSize is fixed for each field // longsSize is fixed for each field
public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> { public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
private final static TempMetaData NO_OUTPUT = new TempMetaData(); private final static TempTermData NO_OUTPUT = new TempTermData();
//private static boolean TEST = false; //private static boolean TEST = false;
private final boolean hasPos; private final boolean hasPos;
private final int longsSize; private final int longsSize;
@ -47,18 +47,18 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
* On an FST, only long[] part is 'shared' and pushed towards root. * On an FST, only long[] part is 'shared' and pushed towards root.
* byte[] and term stats will be kept on deeper arcs. * byte[] and term stats will be kept on deeper arcs.
*/ */
public static class TempMetaData { public static class TempTermData {
long[] longs; long[] longs;
byte[] bytes; byte[] bytes;
int docFreq; int docFreq;
long totalTermFreq; long totalTermFreq;
TempMetaData() { TempTermData() {
this.longs = null; this.longs = null;
this.bytes = null; this.bytes = null;
this.docFreq = 0; this.docFreq = 0;
this.totalTermFreq = -1; this.totalTermFreq = -1;
} }
TempMetaData(long[] longs, byte[] bytes, int docFreq, long totalTermFreq) { TempTermData(long[] longs, byte[] bytes, int docFreq, long totalTermFreq) {
this.longs = longs; this.longs = longs;
this.bytes = bytes; this.bytes = bytes;
this.docFreq = docFreq; this.docFreq = docFreq;
@ -92,10 +92,10 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
public boolean equals(Object other_) { public boolean equals(Object other_) {
if (other_ == this) { if (other_ == this) {
return true; return true;
} else if (!(other_ instanceof TempTermOutputs.TempMetaData)) { } else if (!(other_ instanceof TempTermOutputs.TempTermData)) {
return false; return false;
} }
TempMetaData other = (TempMetaData) other_; TempTermData other = (TempTermData) other_;
return statsEqual(this, other) && return statsEqual(this, other) &&
longsEqual(this, other) && longsEqual(this, other) &&
bytesEqual(this, other); bytesEqual(this, other);
@ -115,7 +115,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
// 1. every value in t1 is not larger than in t2, or // 1. every value in t1 is not larger than in t2, or
// 2. every value in t1 is not smaller than t2. // 2. every value in t1 is not smaller than t2.
// //
public TempMetaData common(TempMetaData t1, TempMetaData t2) { public TempTermData common(TempTermData t1, TempTermData t2) {
//if (TEST) System.out.print("common("+t1+", "+t2+") = "); //if (TEST) System.out.print("common("+t1+", "+t2+") = ");
if (t1 == NO_OUTPUT || t2 == NO_OUTPUT) { if (t1 == NO_OUTPUT || t2 == NO_OUTPUT) {
//if (TEST) System.out.println("ret:"+NO_OUTPUT); //if (TEST) System.out.println("ret:"+NO_OUTPUT);
@ -125,7 +125,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
long[] min = t1.longs, max = t2.longs; long[] min = t1.longs, max = t2.longs;
int pos = 0; int pos = 0;
TempMetaData ret; TempTermData ret;
while (pos < longsSize && min[pos] == max[pos]) { while (pos < longsSize && min[pos] == max[pos]) {
pos++; pos++;
@ -142,7 +142,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
if (pos < longsSize || allZero(min)) { // not comparable or all-zero if (pos < longsSize || allZero(min)) { // not comparable or all-zero
ret = NO_OUTPUT; ret = NO_OUTPUT;
} else { } else {
ret = new TempMetaData(min, null, 0, -1); ret = new TempTermData(min, null, 0, -1);
} }
} else { // equal long[] } else { // equal long[]
if (statsEqual(t1, t2) && bytesEqual(t1, t2)) { if (statsEqual(t1, t2) && bytesEqual(t1, t2)) {
@ -150,7 +150,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
} else if (allZero(min)) { } else if (allZero(min)) {
ret = NO_OUTPUT; ret = NO_OUTPUT;
} else { } else {
ret = new TempMetaData(min, null, 0, -1); ret = new TempTermData(min, null, 0, -1);
} }
} }
//if (TEST) System.out.println("ret:"+ret); //if (TEST) System.out.println("ret:"+ret);
@ -158,7 +158,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
} }
@Override @Override
public TempMetaData subtract(TempMetaData t1, TempMetaData t2) { public TempTermData subtract(TempTermData t1, TempTermData t2) {
//if (TEST) System.out.print("subtract("+t1+", "+t2+") = "); //if (TEST) System.out.print("subtract("+t1+", "+t2+") = ");
if (t2 == NO_OUTPUT) { if (t2 == NO_OUTPUT) {
//if (TEST) System.out.println("ret:"+t1); //if (TEST) System.out.println("ret:"+t1);
@ -176,11 +176,11 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
pos++; pos++;
} }
TempMetaData ret; TempTermData ret;
if (diff == 0 && statsEqual(t1, t2) && bytesEqual(t1, t2)) { if (diff == 0 && statsEqual(t1, t2) && bytesEqual(t1, t2)) {
ret = NO_OUTPUT; ret = NO_OUTPUT;
} else { } else {
ret = new TempMetaData(share, t1.bytes, t1.docFreq, t1.totalTermFreq); ret = new TempTermData(share, t1.bytes, t1.docFreq, t1.totalTermFreq);
} }
//if (TEST) System.out.println("ret:"+ret); //if (TEST) System.out.println("ret:"+ret);
return ret; return ret;
@ -190,7 +190,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
// we can gain about 5~7% for fuzzy queries, however this also // we can gain about 5~7% for fuzzy queries, however this also
// means we are putting too much stress on FST Outputs decoding? // means we are putting too much stress on FST Outputs decoding?
@Override @Override
public TempMetaData add(TempMetaData t1, TempMetaData t2) { public TempTermData add(TempTermData t1, TempTermData t2) {
//if (TEST) System.out.print("add("+t1+", "+t2+") = "); //if (TEST) System.out.print("add("+t1+", "+t2+") = ");
if (t1 == NO_OUTPUT) { if (t1 == NO_OUTPUT) {
//if (TEST) System.out.println("ret:"+t2); //if (TEST) System.out.println("ret:"+t2);
@ -209,18 +209,18 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
pos++; pos++;
} }
TempMetaData ret; TempTermData ret;
if (t2.bytes != null || t2.docFreq > 0) { if (t2.bytes != null || t2.docFreq > 0) {
ret = new TempMetaData(accum, t2.bytes, t2.docFreq, t2.totalTermFreq); ret = new TempTermData(accum, t2.bytes, t2.docFreq, t2.totalTermFreq);
} else { } else {
ret = new TempMetaData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq); ret = new TempTermData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq);
} }
//if (TEST) System.out.println("ret:"+ret); //if (TEST) System.out.println("ret:"+ret);
return ret; return ret;
} }
@Override @Override
public void write(TempMetaData data, DataOutput out) throws IOException { public void write(TempTermData data, DataOutput out) throws IOException {
int bit0 = allZero(data.longs) ? 0 : 1; int bit0 = allZero(data.longs) ? 0 : 1;
int bit1 = ((data.bytes == null || data.bytes.length == 0) ? 0 : 1) << 1; int bit1 = ((data.bytes == null || data.bytes.length == 0) ? 0 : 1) << 1;
int bit2 = ((data.docFreq == 0) ? 0 : 1) << 2; int bit2 = ((data.docFreq == 0) ? 0 : 1) << 2;
@ -259,7 +259,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
} }
@Override @Override
public TempMetaData read(DataInput in) throws IOException { public TempTermData read(DataInput in) throws IOException {
long[] longs = new long[longsSize]; long[] longs = new long[longsSize];
byte[] bytes = null; byte[] bytes = null;
int docFreq = 0; int docFreq = 0;
@ -292,29 +292,29 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
docFreq = code; docFreq = code;
} }
} }
return new TempMetaData(longs, bytes, docFreq, totalTermFreq); return new TempTermData(longs, bytes, docFreq, totalTermFreq);
} }
@Override @Override
public TempMetaData getNoOutput() { public TempTermData getNoOutput() {
return NO_OUTPUT; return NO_OUTPUT;
} }
@Override @Override
public String outputToString(TempMetaData data) { public String outputToString(TempTermData data) {
return data.toString(); return data.toString();
} }
static boolean statsEqual(final TempMetaData t1, final TempMetaData t2) { static boolean statsEqual(final TempTermData t1, final TempTermData t2) {
return t1.docFreq == t2.docFreq && t1.totalTermFreq == t2.totalTermFreq; return t1.docFreq == t2.docFreq && t1.totalTermFreq == t2.totalTermFreq;
} }
static boolean bytesEqual(final TempMetaData t1, final TempMetaData t2) { static boolean bytesEqual(final TempTermData t1, final TempTermData t2) {
if (t1.bytes == null && t2.bytes == null) { if (t1.bytes == null && t2.bytes == null) {
return true; return true;
} }
return t1.bytes != null && t2.bytes != null && Arrays.equals(t1.bytes, t2.bytes); return t1.bytes != null && t2.bytes != null && Arrays.equals(t1.bytes, t2.bytes);
} }
static boolean longsEqual(final TempMetaData t1, final TempMetaData t2) { static boolean longsEqual(final TempTermData t1, final TempTermData t2) {
if (t1.longs == null && t2.longs == null) { if (t1.longs == null && t2.longs == null) {
return true; return true;
} }

View File

@ -104,14 +104,14 @@ import org.apache.lucene.util.packed.PackedInts;
* and decoding the Postings Metadata and Term Metadata sections.</p> * and decoding the Postings Metadata and Term Metadata sections.</p>
* *
* <ul> * <ul>
* <li>TermsDict (.tim) --&gt; Header, <i>Postings Header</i>, NodeBlock<sup>NumBlocks</sup>, * <li>TermsDict (.tim) --&gt; Header, <i>PostingsHeader</i>, NodeBlock<sup>NumBlocks</sup>,
* FieldSummary, DirOffset</li> * FieldSummary, DirOffset</li>
* <li>NodeBlock --&gt; (OuterNode | InnerNode)</li> * <li>NodeBlock --&gt; (OuterNode | InnerNode)</li>
* <li>OuterNode --&gt; EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>Term Metadata</i>&gt;<sup>EntryCount</sup></li> * <li>OuterNode --&gt; EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata</i>&gt;<sup>EntryCount</sup></li>
* <li>InnerNode --&gt; EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats ? &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>Term Metadata ? </i>&gt;<sup>EntryCount</sup></li> * <li>InnerNode --&gt; EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats ? &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata ? </i>&gt;<sup>EntryCount</sup></li>
* <li>TermStats --&gt; DocFreq, TotalTermFreq </li> * <li>TermStats --&gt; DocFreq, TotalTermFreq </li>
* <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, RootCodeLength, Byte<sup>RootCodeLength</sup>, * <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, RootCodeLength, Byte<sup>RootCodeLength</sup>,
* SumDocFreq, DocCount&gt;<sup>NumFields</sup></li> * SumTotalTermFreq?, SumDocFreq, DocCount&gt;<sup>NumFields</sup></li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li> * <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li> * <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
* <li>EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength,NumFields, * <li>EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength,NumFields,
@ -133,7 +133,7 @@ import org.apache.lucene.util.packed.PackedInts;
* <li>SumDocFreq is the total number of postings, the number of term-document pairs across * <li>SumDocFreq is the total number of postings, the number of term-document pairs across
* the entire field.</li> * the entire field.</li>
* <li>DocCount is the number of documents that have at least one posting for this field.</li> * <li>DocCount is the number of documents that have at least one posting for this field.</li>
* <li>PostingsMetadata and TermMetadata are plugged into by the specific postings implementation: * <li>PostingsHeader and TermMetadata are plugged into by the specific postings implementation:
* these contain arbitrary per-file data (such as parameters or versioning information) * these contain arbitrary per-file data (such as parameters or versioning information)
* and per-term data (such as pointers to inverted files).</li> * and per-term data (such as pointers to inverted files).</li>
* <li>For inner nodes of the tree, every entry will steal one bit to mark whether it points * <li>For inner nodes of the tree, every entry will steal one bit to mark whether it points

View File

@ -123,11 +123,11 @@ import org.apache.lucene.util.packed.PackedInts;
* *
* <p>NOTE: The term dictionary can plug into different postings implementations: * <p>NOTE: The term dictionary can plug into different postings implementations:
* the postings writer/reader are actually responsible for encoding * the postings writer/reader are actually responsible for encoding
* and decoding the Postings Metadata and Term Metadata sections described here:</p> * and decoding the PostingsHeader and TermMetadata sections described here:</p>
* *
* <ul> * <ul>
* <li>Postings Metadata --&gt; Header, PackedBlockSize</li> * <li>PostingsHeader --&gt; Header, PackedBlockSize</li>
* <li>Term Metadata --&gt; (DocFPDelta|SingletonDocID), PosFPDelta?, PosVIntBlockFPDelta?, PayFPDelta?, * <li>TermMetadata --&gt; (DocFPDelta|SingletonDocID), PosFPDelta?, PosVIntBlockFPDelta?, PayFPDelta?,
* SkipFPDelta?</li> * SkipFPDelta?</li>
* <li>Header, --&gt; {@link CodecUtil#writeHeader CodecHeader}</li> * <li>Header, --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>PackedBlockSize, SingletonDocID --&gt; {@link DataOutput#writeVInt VInt}</li> * <li>PackedBlockSize, SingletonDocID --&gt; {@link DataOutput#writeVInt VInt}</li>

View File

@ -324,11 +324,7 @@ public abstract class LuceneTestCase extends Assert {
"MockFixedIntBlock", "MockFixedIntBlock",
"MockVariableIntBlock", "MockVariableIntBlock",
"MockSep", "MockSep",
"MockRandom", "MockRandom"
"TempSep",
"TempFixedIntBlock",
"TempVariableIntBlock",
"TempRandom"
)); ));
// ----------------------------------------------------------------- // -----------------------------------------------------------------