add javadocs, pass precommit

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1519288 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Han Jiang 2013-09-01 16:32:17 +00:00
parent e1f85e73ed
commit 0d606b471b
13 changed files with 113 additions and 51 deletions

View File

@ -164,7 +164,7 @@ public final class SepPostingsWriter extends PostingsWriterBase {
}
@Override
public SepTermState newTermState() {
public BlockTermState newTermState() {
return new SepTermState();
}

View File

@ -32,6 +32,22 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
/**
* FST-based term dict, using ord as FST output.
*
* The FST holds the mapping between <term, ord>, and
* term's metadata is delta encoded into a single byte block.
*
* Typically the byte block consists of four parts:
* 1. term statistics: docFreq, totalTermFreq;
* 2. monotonic long[], e.g. the pointer to the postings list for that term;
* 3. generic byte[], e.g. other information customized by postings base.
* 4. single-level skip list to speed up metadata decoding by ord.
*
* <!-- TODO: explain about the data format -->
* @lucene.experimental
*/
public final class TempFSTOrdPostingsFormat extends PostingsFormat {
public TempFSTOrdPostingsFormat() {
super("TempFSTOrd");

View File

@ -61,6 +61,15 @@ import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.CodecUtil;
/**
* FST-based terms dictionary reader.
*
* The FST index maps each term and its ord, and during seek
* the ord is used fetch metadata from a single block.
* The term dictionary is fully memeory resident.
*
* @lucene.experimental
*/
public class TempFSTOrdTermsReader extends FieldsProducer {
static final int INTERVAL = TempFSTOrdTermsWriter.SKIP_INTERVAL;
final TreeMap<String, TermsReader> fields = new TreeMap<String, TermsReader>();

View File

@ -45,8 +45,11 @@ import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.CodecUtil;
/** FST based term dict, only ords is hold in FST,
* other metadata encoded into single byte block */
/**
* FST based term dict, the FST maps each term and its ord.
*
* @lucene.experimental
*/
public class TempFSTOrdTermsWriter extends FieldsConsumer {
static final String TERMS_INDEX_EXTENSION = "tix";

View File

@ -32,6 +32,21 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
/**
* FST-based term dict, using metadata as FST output.
*
* The FST directly holds the mapping between &lt;term, metadata&gt;.
*
* Term metadata consists of three parts:
* 1. term statistics: docFreq, totalTermFreq;
* 2. monotonic long[], e.g. the pointer to the postings list for that term;
* 3. generic byte[], e.g. other information need by postings reader.
*
*
* <!-- TODO: explain about the data format -->
* @lucene.experimental
*/
public final class TempFSTPostingsFormat extends PostingsFormat {
public TempFSTPostingsFormat() {
super("TempFST");

View File

@ -59,6 +59,15 @@ import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.CodecUtil;
/**
* FST-based terms dictionary reader.
*
* The FST directly maps each term and its metadata,
* it is memeory resident.
*
* @lucene.experimental
*/
public class TempFSTTermsReader extends FieldsProducer {
final TreeMap<String, TermsReader> fields = new TreeMap<String, TermsReader>();
final PostingsReaderBase postingsReader;

View File

@ -44,8 +44,11 @@ import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.CodecUtil;
/** FST based term dict, all the metadata held
* as output of FST */
/**
* FST based term dict, the FST maps each term and its metadata.
*
* @lucene.experimental
*/
public class TempFSTTermsWriter extends FieldsConsumer {
static final String TERMS_EXTENSION = "tmp";

View File

@ -27,14 +27,26 @@ import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.LongsRef;
/**
* An FST {@link Outputs} implementation for
* {@link TempFSTPostingsFormat}.
*
* @lucene.experimental
*/
// NOTE: outputs should be per-field, since
// longsSize is fixed for each field
public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
private final static TempMetaData NO_OUTPUT = new TempMetaData();
private static boolean DEBUG = false;
//private static boolean TEST = false;
private final boolean hasPos;
private final int longsSize;
/**
* Represents the metadata for one term.
* On an FST, only long[] part is 'shared' and pushed towards root.
* byte[] and term stats will be kept on deeper arcs.
*/
public static class TempMetaData {
long[] longs;
byte[] bytes;
@ -89,33 +101,6 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
bytesEqual(this, other);
}
public String toString() {
if (this == NO_OUTPUT) {
return "no_output";
}
StringBuffer sb = new StringBuffer();
if (longs != null) {
sb.append("[ ");
for (int i = 0; i < longs.length; i++) {
sb.append(longs[i]+" ");
}
sb.append("]");
} else {
sb.append("null");
}
if (bytes != null) {
sb.append(" [ ");
for (int i = 0; i < bytes.length; i++) {
sb.append(Integer.toHexString((int)bytes[i] & 0xff)+" ");
}
sb.append("]");
} else {
sb.append(" null");
}
sb.append(" "+docFreq);
sb.append(" "+totalTermFreq);
return sb.toString();
}
}
protected TempTermOutputs(FieldInfo fieldInfo, int longsSize) {
@ -130,14 +115,10 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
// 1. every value in t1 is not larger than in t2, or
// 2. every value in t1 is not smaller than t2.
//
// NOTE:
// Only long[] part is 'shared' and pushed towards root.
// byte[] and term stats will be kept on deeper arcs.
//
public TempMetaData common(TempMetaData t1, TempMetaData t2) {
//if (DEBUG) System.out.print("common("+t1+", "+t2+") = ");
//if (TEST) System.out.print("common("+t1+", "+t2+") = ");
if (t1 == NO_OUTPUT || t2 == NO_OUTPUT) {
//if (DEBUG) System.out.println("ret:"+NO_OUTPUT);
//if (TEST) System.out.println("ret:"+NO_OUTPUT);
return NO_OUTPUT;
}
assert t1.longs.length == t2.longs.length;
@ -172,15 +153,15 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
ret = new TempMetaData(min, null, 0, -1);
}
}
//if (DEBUG) System.out.println("ret:"+ret);
//if (TEST) System.out.println("ret:"+ret);
return ret;
}
@Override
public TempMetaData subtract(TempMetaData t1, TempMetaData t2) {
//if (DEBUG) System.out.print("subtract("+t1+", "+t2+") = ");
//if (TEST) System.out.print("subtract("+t1+", "+t2+") = ");
if (t2 == NO_OUTPUT) {
//if (DEBUG) System.out.println("ret:"+t1);
//if (TEST) System.out.println("ret:"+t1);
return t1;
}
assert t1.longs.length == t2.longs.length;
@ -201,7 +182,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
} else {
ret = new TempMetaData(share, t1.bytes, t1.docFreq, t1.totalTermFreq);
}
//if (DEBUG) System.out.println("ret:"+ret);
//if (TEST) System.out.println("ret:"+ret);
return ret;
}
@ -210,12 +191,12 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
// we seem to put much stress on FST Outputs decoding?
@Override
public TempMetaData add(TempMetaData t1, TempMetaData t2) {
//if (DEBUG) System.out.print("add("+t1+", "+t2+") = ");
//if (TEST) System.out.print("add("+t1+", "+t2+") = ");
if (t1 == NO_OUTPUT) {
//if (DEBUG) System.out.println("ret:"+t2);
//if (TEST) System.out.println("ret:"+t2);
return t2;
} else if (t2 == NO_OUTPUT) {
//if (DEBUG) System.out.println("ret:"+t1);
//if (TEST) System.out.println("ret:"+t1);
return t1;
}
assert t1.longs.length == t2.longs.length;
@ -234,7 +215,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
} else {
ret = new TempMetaData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq);
}
//if (DEBUG) System.out.println("ret:"+ret);
//if (TEST) System.out.println("ret:"+ret);
return ret;
}

View File

@ -0,0 +1,25 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
FST term dict: FST-based term dictionary implementations.
</body>
</html>

View File

@ -33,7 +33,7 @@ import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZ
* Encode all values in normal area with fixed bit width,
* which is determined by the max value in this block.
*/
public final class ForUtil {
final class ForUtil {
/**
* Special number of bits per value used whenever all values to encode are equal.

View File

@ -575,7 +575,8 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
}
lastState = state;
}
public void _encodeTerm(DataOutput out, FieldInfo fieldInfo, IntBlockTermState state) throws IOException {
private void _encodeTerm(DataOutput out, FieldInfo fieldInfo, IntBlockTermState state) throws IOException {
if (state.singletonDocID == -1) {
out.writeVLong(state.docTermStartFP - lastState.docTermStartFP);
lastState.docTermStartFP = state.docTermStartFP;

View File

@ -50,7 +50,7 @@ import org.apache.lucene.store.IndexInput;
* Therefore, we'll trim df before passing it to the interface. see trim(int)
*
*/
public final class Lucene41SkipReader extends MultiLevelSkipListReader {
final class Lucene41SkipReader extends MultiLevelSkipListReader {
// private boolean DEBUG = Lucene41PostingsReader.DEBUG;
private final int blockSize;

View File

@ -43,7 +43,7 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter;
* 4. start offset.
*
*/
public final class Lucene41SkipWriter extends MultiLevelSkipListWriter {
final class Lucene41SkipWriter extends MultiLevelSkipListWriter {
// private boolean DEBUG = Lucene41PostingsReader.DEBUG;
private int[] lastSkipDoc;