add javadocs, pass precommit

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1519288 13f79535-47bb-0310-9956-ffa450edef68
2013-09-01 16:32:17 +00:00 · 2013-09-01 16:32:17 +00:00 · 0d606b471b
parent e1f85e73ed
commit 0d606b471b
13 changed files with 113 additions and 51 deletions
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
@ -164,7 +164,7 @@ public final class SepPostingsWriter extends PostingsWriterBase {
  }

  @Override
-  public SepTermState newTermState() {
+  public BlockTermState newTermState() {
    return new SepTermState();
  }

--- a/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdPostingsFormat.java
@ -32,6 +32,22 @@ import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.util.IOUtils;

+/** 
+ * FST-based term dict, using ord as FST output.
+ *
+ * The FST holds the mapping between &lt;term, ord&gt;, and 
+ * term's metadata is delta encoded into a single byte block.
+ *
+ * Typically the byte block consists of four parts:
+ * 1. term statistics: docFreq, totalTermFreq;
+ * 2. monotonic long[], e.g. the pointer to the postings list for that term;
+ * 3. generic byte[], e.g. other information customized by postings base.
+ * 4. single-level skip list to speed up metadata decoding by ord.
+ *
+ * <!-- TODO: explain about the data format -->
+ * @lucene.experimental 
+ */
+
 public final class TempFSTOrdPostingsFormat extends PostingsFormat {
  public TempFSTOrdPostingsFormat() {
    super("TempFSTOrd");
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java
@ -61,6 +61,15 @@ import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.codecs.CodecUtil;

+/** 
+ * FST-based terms dictionary reader.
+ *
+ * The FST index maps each term and its ord, and during seek 
+ * the ord is used fetch metadata from a single block.
+ * The term dictionary is fully memeory resident.
+ *
+ * @lucene.experimental
+ */
 public class TempFSTOrdTermsReader extends FieldsProducer {
  static final int INTERVAL = TempFSTOrdTermsWriter.SKIP_INTERVAL;
  final TreeMap<String, TermsReader> fields = new TreeMap<String, TermsReader>();
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java
@ -45,8 +45,11 @@ import org.apache.lucene.codecs.TermsConsumer;
 import org.apache.lucene.codecs.TermStats;
 import org.apache.lucene.codecs.CodecUtil;

-/** FST based term dict, only ords is hold in FST, 
- *  other metadata encoded into single byte block */
+/** 
+ * FST based term dict, the FST maps each term and its ord.
+ *
+ * @lucene.experimental
+ */

 public class TempFSTOrdTermsWriter extends FieldsConsumer {
  static final String TERMS_INDEX_EXTENSION = "tix";
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTPostingsFormat.java
@ -32,6 +32,21 @@ import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.util.IOUtils;

+/**
+ * FST-based term dict, using metadata as FST output.
+ *
+ * The FST directly holds the mapping between &lt;term, metadata&gt;.
+ *
+ * Term metadata consists of three parts:
+ * 1. term statistics: docFreq, totalTermFreq;
+ * 2. monotonic long[], e.g. the pointer to the postings list for that term;
+ * 3. generic byte[], e.g. other information need by postings reader.
+ *
+ *
+ * <!-- TODO: explain about the data format -->
+ * @lucene.experimental
+ */
+
 public final class TempFSTPostingsFormat extends PostingsFormat {
  public TempFSTPostingsFormat() {
    super("TempFST");
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java
@ -59,6 +59,15 @@ import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.codecs.CodecUtil;

+/**
+ * FST-based terms dictionary reader.
+ *
+ * The FST directly maps each term and its metadata, 
+ * it is memeory resident.
+ *
+ * @lucene.experimental
+ */
+
 public class TempFSTTermsReader extends FieldsProducer {
  final TreeMap<String, TermsReader> fields = new TreeMap<String, TermsReader>();
  final PostingsReaderBase postingsReader;
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java
@ -44,8 +44,11 @@ import org.apache.lucene.codecs.TermsConsumer;
 import org.apache.lucene.codecs.TermStats;
 import org.apache.lucene.codecs.CodecUtil;

-/** FST based term dict, all the metadata held
- *  as output of FST */
+/** 
+ * FST based term dict, the FST maps each term and its metadata.
+ *
+ * @lucene.experimental
+ */

 public class TempFSTTermsWriter extends FieldsConsumer {
  static final String TERMS_EXTENSION = "tmp";
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java
@ -27,14 +27,26 @@ import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.fst.Outputs;
 import org.apache.lucene.util.LongsRef;

+/**
+ * An FST {@link Outputs} implementation for 
+ * {@link TempFSTPostingsFormat}.
+ *
+ * @lucene.experimental
+ */
+
 // NOTE: outputs should be per-field, since
 // longsSize is fixed for each field
 public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
  private final static TempMetaData NO_OUTPUT = new TempMetaData();
-  private static boolean DEBUG = false;
+  //private static boolean TEST = false;
  private final boolean hasPos;
  private final int longsSize;

+  /** 
+   * Represents the metadata for one term.
+   * On an FST, only long[] part is 'shared' and pushed towards root.
+   * byte[] and term stats will be kept on deeper arcs.
+   */
  public static class TempMetaData {
    long[] longs;
    byte[] bytes;
@ -89,33 +101,6 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
             bytesEqual(this, other);

    }
-    public String toString() {
-      if (this == NO_OUTPUT) {
-        return "no_output";
-      }
-      StringBuffer sb = new StringBuffer();
-      if (longs != null) {
-        sb.append("[ ");
-        for (int i = 0; i < longs.length; i++) {
-          sb.append(longs[i]+" ");
-        }
-        sb.append("]");
-      } else {
-        sb.append("null");
-      }
-      if (bytes != null) {
-        sb.append(" [ ");
-        for (int i = 0; i < bytes.length; i++) {
-          sb.append(Integer.toHexString((int)bytes[i] & 0xff)+" ");
-        }
-        sb.append("]");
-      } else {
-        sb.append(" null");
-      }
-      sb.append(" "+docFreq);
-      sb.append(" "+totalTermFreq);
-      return sb.toString();
-    }
  }
  
  protected TempTermOutputs(FieldInfo fieldInfo, int longsSize) {
@ -130,14 +115,10 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
  // 1. every value in t1 is not larger than in t2, or
  // 2. every value in t1 is not smaller than t2.
  //
-  // NOTE: 
-  // Only long[] part is 'shared' and pushed towards root.
-  // byte[] and term stats will be kept on deeper arcs.
-  //
  public TempMetaData common(TempMetaData t1, TempMetaData t2) {
-    //if (DEBUG) System.out.print("common("+t1+", "+t2+") = ");
+    //if (TEST) System.out.print("common("+t1+", "+t2+") = ");
    if (t1 == NO_OUTPUT || t2 == NO_OUTPUT) {
-      //if (DEBUG) System.out.println("ret:"+NO_OUTPUT);
+      //if (TEST) System.out.println("ret:"+NO_OUTPUT);
      return NO_OUTPUT;
    }
    assert t1.longs.length == t2.longs.length;
@ -172,15 +153,15 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
        ret = new TempMetaData(min, null, 0, -1);
      }
    }
-    //if (DEBUG) System.out.println("ret:"+ret);
+    //if (TEST) System.out.println("ret:"+ret);
    return ret;
  }

  @Override
  public TempMetaData subtract(TempMetaData t1, TempMetaData t2) {
-    //if (DEBUG) System.out.print("subtract("+t1+", "+t2+") = ");
+    //if (TEST) System.out.print("subtract("+t1+", "+t2+") = ");
    if (t2 == NO_OUTPUT) {
-      //if (DEBUG) System.out.println("ret:"+t1);
+      //if (TEST) System.out.println("ret:"+t1);
      return t1;
    }
    assert t1.longs.length == t2.longs.length;
@ -201,7 +182,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
    } else {
      ret = new TempMetaData(share, t1.bytes, t1.docFreq, t1.totalTermFreq);
    }
-    //if (DEBUG) System.out.println("ret:"+ret);
+    //if (TEST) System.out.println("ret:"+ret);
    return ret;
  }

@ -210,12 +191,12 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
  // we seem to put much stress on FST Outputs decoding?
  @Override
  public TempMetaData add(TempMetaData t1, TempMetaData t2) {
-    //if (DEBUG) System.out.print("add("+t1+", "+t2+") = ");
+    //if (TEST) System.out.print("add("+t1+", "+t2+") = ");
    if (t1 == NO_OUTPUT) {
-      //if (DEBUG) System.out.println("ret:"+t2);
+      //if (TEST) System.out.println("ret:"+t2);
      return t2;
    } else if (t2 == NO_OUTPUT) {
-      //if (DEBUG) System.out.println("ret:"+t1);
+      //if (TEST) System.out.println("ret:"+t1);
      return t1;
    }
    assert t1.longs.length == t2.longs.length;
@ -234,7 +215,7 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
    } else {
      ret = new TempMetaData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq);
    }
-    //if (DEBUG) System.out.println("ret:"+ret);
+    //if (TEST) System.out.println("ret:"+ret);
    return ret;
  }

--- a/lucene/codecs/src/java/org/apache/lucene/codecs/temp/package.html
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/temp/package.html
@ -0,0 +1,25 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+FST term dict: FST-based term dictionary implementations.
+</body>
+</html>
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java
@ -33,7 +33,7 @@ import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZ
 * Encode all values in normal area with fixed bit width, 
 * which is determined by the max value in this block.
 */
-public final class ForUtil {
+final class ForUtil {

  /**
   * Special number of bits per value used whenever all values to encode are equal.
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
@ -575,7 +575,8 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
    }
    lastState = state;
  }
-  public void _encodeTerm(DataOutput out, FieldInfo fieldInfo, IntBlockTermState state) throws IOException {
+
+  private void _encodeTerm(DataOutput out, FieldInfo fieldInfo, IntBlockTermState state) throws IOException {
    if (state.singletonDocID == -1) {
      out.writeVLong(state.docTermStartFP - lastState.docTermStartFP);
      lastState.docTermStartFP = state.docTermStartFP;
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java
@ -50,7 +50,7 @@ import org.apache.lucene.store.IndexInput;
 * Therefore, we'll trim df before passing it to the interface. see trim(int)
 *
 */
-public final class Lucene41SkipReader extends MultiLevelSkipListReader {
+final class Lucene41SkipReader extends MultiLevelSkipListReader {
  // private boolean DEBUG = Lucene41PostingsReader.DEBUG;
  private final int blockSize;

--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java
@ -43,7 +43,7 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter;
 * 4. start offset.
 *
 */
-public final class Lucene41SkipWriter extends MultiLevelSkipListWriter {
+final class Lucene41SkipWriter extends MultiLevelSkipListWriter {
  // private boolean DEBUG = Lucene41PostingsReader.DEBUG;
  
  private int[] lastSkipDoc;