mirror of https://github.com/apache/lucene.git
add javadocs, pass precommit
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1519288 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e1f85e73ed
commit
0d606b471b
|
@ -164,7 +164,7 @@ public final class SepPostingsWriter extends PostingsWriterBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public SepTermState newTermState() {
|
||||
public BlockTermState newTermState() {
|
||||
return new SepTermState();
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,22 @@ import org.apache.lucene.index.SegmentReadState;
|
|||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* FST-based term dict, using ord as FST output.
|
||||
*
|
||||
* The FST holds the mapping between <term, ord>, and
|
||||
* term's metadata is delta encoded into a single byte block.
|
||||
*
|
||||
* Typically the byte block consists of four parts:
|
||||
* 1. term statistics: docFreq, totalTermFreq;
|
||||
* 2. monotonic long[], e.g. the pointer to the postings list for that term;
|
||||
* 3. generic byte[], e.g. other information customized by postings base.
|
||||
* 4. single-level skip list to speed up metadata decoding by ord.
|
||||
*
|
||||
* <!-- TODO: explain about the data format -->
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public final class TempFSTOrdPostingsFormat extends PostingsFormat {
|
||||
public TempFSTOrdPostingsFormat() {
|
||||
super("TempFSTOrd");
|
||||
|
|
|
@ -61,6 +61,15 @@ import org.apache.lucene.codecs.FieldsProducer;
|
|||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
|
||||
/**
|
||||
* FST-based terms dictionary reader.
|
||||
*
|
||||
* The FST index maps each term and its ord, and during seek
|
||||
* the ord is used fetch metadata from a single block.
|
||||
* The term dictionary is fully memeory resident.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class TempFSTOrdTermsReader extends FieldsProducer {
|
||||
static final int INTERVAL = TempFSTOrdTermsWriter.SKIP_INTERVAL;
|
||||
final TreeMap<String, TermsReader> fields = new TreeMap<String, TermsReader>();
|
||||
|
|
|
@ -45,8 +45,11 @@ import org.apache.lucene.codecs.TermsConsumer;
|
|||
import org.apache.lucene.codecs.TermStats;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
|
||||
/** FST based term dict, only ords is hold in FST,
|
||||
* other metadata encoded into single byte block */
|
||||
/**
|
||||
* FST based term dict, the FST maps each term and its ord.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public class TempFSTOrdTermsWriter extends FieldsConsumer {
|
||||
static final String TERMS_INDEX_EXTENSION = "tix";
|
||||
|
|
|
@ -32,6 +32,21 @@ import org.apache.lucene.index.SegmentReadState;
|
|||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* FST-based term dict, using metadata as FST output.
|
||||
*
|
||||
* The FST directly holds the mapping between <term, metadata>.
|
||||
*
|
||||
* Term metadata consists of three parts:
|
||||
* 1. term statistics: docFreq, totalTermFreq;
|
||||
* 2. monotonic long[], e.g. the pointer to the postings list for that term;
|
||||
* 3. generic byte[], e.g. other information need by postings reader.
|
||||
*
|
||||
*
|
||||
* <!-- TODO: explain about the data format -->
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public final class TempFSTPostingsFormat extends PostingsFormat {
|
||||
public TempFSTPostingsFormat() {
|
||||
super("TempFST");
|
||||
|
|
|
@ -59,6 +59,15 @@ import org.apache.lucene.codecs.FieldsProducer;
|
|||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
|
||||
/**
|
||||
* FST-based terms dictionary reader.
|
||||
*
|
||||
* The FST directly maps each term and its metadata,
|
||||
* it is memeory resident.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public class TempFSTTermsReader extends FieldsProducer {
|
||||
final TreeMap<String, TermsReader> fields = new TreeMap<String, TermsReader>();
|
||||
final PostingsReaderBase postingsReader;
|
||||
|
|
|
@ -44,8 +44,11 @@ import org.apache.lucene.codecs.TermsConsumer;
|
|||
import org.apache.lucene.codecs.TermStats;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
|
||||
/** FST based term dict, all the metadata held
|
||||
* as output of FST */
|
||||
/**
|
||||
* FST based term dict, the FST maps each term and its metadata.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public class TempFSTTermsWriter extends FieldsConsumer {
|
||||
static final String TERMS_EXTENSION = "tmp";
|
||||
|
|
|
@ -27,14 +27,26 @@ import org.apache.lucene.store.DataOutput;
|
|||
import org.apache.lucene.util.fst.Outputs;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
|
||||
/**
|
||||
* An FST {@link Outputs} implementation for
|
||||
* {@link TempFSTPostingsFormat}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
// NOTE: outputs should be per-field, since
|
||||
// longsSize is fixed for each field
|
||||
public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
|
||||
private final static TempMetaData NO_OUTPUT = new TempMetaData();
|
||||
private static boolean DEBUG = false;
|
||||
//private static boolean TEST = false;
|
||||
private final boolean hasPos;
|
||||
private final int longsSize;
|
||||
|
||||
/**
|
||||
* Represents the metadata for one term.
|
||||
* On an FST, only long[] part is 'shared' and pushed towards root.
|
||||
* byte[] and term stats will be kept on deeper arcs.
|
||||
*/
|
||||
public static class TempMetaData {
|
||||
long[] longs;
|
||||
byte[] bytes;
|
||||
|
@ -89,33 +101,6 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
|
|||
bytesEqual(this, other);
|
||||
|
||||
}
|
||||
public String toString() {
|
||||
if (this == NO_OUTPUT) {
|
||||
return "no_output";
|
||||
}
|
||||
StringBuffer sb = new StringBuffer();
|
||||
if (longs != null) {
|
||||
sb.append("[ ");
|
||||
for (int i = 0; i < longs.length; i++) {
|
||||
sb.append(longs[i]+" ");
|
||||
}
|
||||
sb.append("]");
|
||||
} else {
|
||||
sb.append("null");
|
||||
}
|
||||
if (bytes != null) {
|
||||
sb.append(" [ ");
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
sb.append(Integer.toHexString((int)bytes[i] & 0xff)+" ");
|
||||
}
|
||||
sb.append("]");
|
||||
} else {
|
||||
sb.append(" null");
|
||||
}
|
||||
sb.append(" "+docFreq);
|
||||
sb.append(" "+totalTermFreq);
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
protected TempTermOutputs(FieldInfo fieldInfo, int longsSize) {
|
||||
|
@ -130,14 +115,10 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
|
|||
// 1. every value in t1 is not larger than in t2, or
|
||||
// 2. every value in t1 is not smaller than t2.
|
||||
//
|
||||
// NOTE:
|
||||
// Only long[] part is 'shared' and pushed towards root.
|
||||
// byte[] and term stats will be kept on deeper arcs.
|
||||
//
|
||||
public TempMetaData common(TempMetaData t1, TempMetaData t2) {
|
||||
//if (DEBUG) System.out.print("common("+t1+", "+t2+") = ");
|
||||
//if (TEST) System.out.print("common("+t1+", "+t2+") = ");
|
||||
if (t1 == NO_OUTPUT || t2 == NO_OUTPUT) {
|
||||
//if (DEBUG) System.out.println("ret:"+NO_OUTPUT);
|
||||
//if (TEST) System.out.println("ret:"+NO_OUTPUT);
|
||||
return NO_OUTPUT;
|
||||
}
|
||||
assert t1.longs.length == t2.longs.length;
|
||||
|
@ -172,15 +153,15 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
|
|||
ret = new TempMetaData(min, null, 0, -1);
|
||||
}
|
||||
}
|
||||
//if (DEBUG) System.out.println("ret:"+ret);
|
||||
//if (TEST) System.out.println("ret:"+ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TempMetaData subtract(TempMetaData t1, TempMetaData t2) {
|
||||
//if (DEBUG) System.out.print("subtract("+t1+", "+t2+") = ");
|
||||
//if (TEST) System.out.print("subtract("+t1+", "+t2+") = ");
|
||||
if (t2 == NO_OUTPUT) {
|
||||
//if (DEBUG) System.out.println("ret:"+t1);
|
||||
//if (TEST) System.out.println("ret:"+t1);
|
||||
return t1;
|
||||
}
|
||||
assert t1.longs.length == t2.longs.length;
|
||||
|
@ -201,7 +182,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
|
|||
} else {
|
||||
ret = new TempMetaData(share, t1.bytes, t1.docFreq, t1.totalTermFreq);
|
||||
}
|
||||
//if (DEBUG) System.out.println("ret:"+ret);
|
||||
//if (TEST) System.out.println("ret:"+ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -210,12 +191,12 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
|
|||
// we seem to put much stress on FST Outputs decoding?
|
||||
@Override
|
||||
public TempMetaData add(TempMetaData t1, TempMetaData t2) {
|
||||
//if (DEBUG) System.out.print("add("+t1+", "+t2+") = ");
|
||||
//if (TEST) System.out.print("add("+t1+", "+t2+") = ");
|
||||
if (t1 == NO_OUTPUT) {
|
||||
//if (DEBUG) System.out.println("ret:"+t2);
|
||||
//if (TEST) System.out.println("ret:"+t2);
|
||||
return t2;
|
||||
} else if (t2 == NO_OUTPUT) {
|
||||
//if (DEBUG) System.out.println("ret:"+t1);
|
||||
//if (TEST) System.out.println("ret:"+t1);
|
||||
return t1;
|
||||
}
|
||||
assert t1.longs.length == t2.longs.length;
|
||||
|
@ -234,7 +215,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
|
|||
} else {
|
||||
ret = new TempMetaData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq);
|
||||
}
|
||||
//if (DEBUG) System.out.println("ret:"+ret);
|
||||
//if (TEST) System.out.println("ret:"+ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
FST term dict: FST-based term dictionary implementations.
|
||||
</body>
|
||||
</html>
|
|
@ -33,7 +33,7 @@ import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZ
|
|||
* Encode all values in normal area with fixed bit width,
|
||||
* which is determined by the max value in this block.
|
||||
*/
|
||||
public final class ForUtil {
|
||||
final class ForUtil {
|
||||
|
||||
/**
|
||||
* Special number of bits per value used whenever all values to encode are equal.
|
||||
|
|
|
@ -575,7 +575,8 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
|
|||
}
|
||||
lastState = state;
|
||||
}
|
||||
public void _encodeTerm(DataOutput out, FieldInfo fieldInfo, IntBlockTermState state) throws IOException {
|
||||
|
||||
private void _encodeTerm(DataOutput out, FieldInfo fieldInfo, IntBlockTermState state) throws IOException {
|
||||
if (state.singletonDocID == -1) {
|
||||
out.writeVLong(state.docTermStartFP - lastState.docTermStartFP);
|
||||
lastState.docTermStartFP = state.docTermStartFP;
|
||||
|
|
|
@ -50,7 +50,7 @@ import org.apache.lucene.store.IndexInput;
|
|||
* Therefore, we'll trim df before passing it to the interface. see trim(int)
|
||||
*
|
||||
*/
|
||||
public final class Lucene41SkipReader extends MultiLevelSkipListReader {
|
||||
final class Lucene41SkipReader extends MultiLevelSkipListReader {
|
||||
// private boolean DEBUG = Lucene41PostingsReader.DEBUG;
|
||||
private final int blockSize;
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter;
|
|||
* 4. start offset.
|
||||
*
|
||||
*/
|
||||
public final class Lucene41SkipWriter extends MultiLevelSkipListWriter {
|
||||
final class Lucene41SkipWriter extends MultiLevelSkipListWriter {
|
||||
// private boolean DEBUG = Lucene41PostingsReader.DEBUG;
|
||||
|
||||
private int[] lastSkipDoc;
|
||||
|
|
Loading…
Reference in New Issue