refer to these integers as termIDs when they are not yet ordinals, just unsorted unique values

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1451958 13f79535-47bb-0310-9956-ffa450edef68
2013-03-02 23:38:55 +00:00 · 2013-03-02 23:38:55 +00:00 · 35f59181f5
parent 16b8796a25
commit 35f59181f5
2 changed files with 15 additions and 18 deletions
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
@ -24,7 +24,6 @@ import java.util.Iterator;
 import java.util.NoSuchElementException;

 import org.apache.lucene.codecs.DocValuesConsumer;
-import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
@ -33,7 +32,6 @@ import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.AppendingLongBuffer;

-
 /** Buffers up pending byte[] per doc, deref and sorting via
 *  int ord, then flushes when segment flushes. */
 class SortedDocValuesWriter extends DocValuesWriter {
@ -85,9 +83,9 @@ class SortedDocValuesWriter extends DocValuesWriter {
  }

  private void addOneValue(BytesRef value) {
-    int ord = hash.add(value);
-    if (ord < 0) {
-      ord = -ord-1;
+    int termID = hash.add(value);
+    if (termID < 0) {
+      termID = -termID-1;
    } else {
      // reserve additional space for each unique value:
      // 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints.
@ -96,7 +94,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
      iwBytesUsed.addAndGet(2 * RamUsageEstimator.NUM_BYTES_INT);
    }
    
-    pending.add(ord);
+    pending.add(termID);
    updateBytesUsed();
  }
  
@ -110,7 +108,6 @@ class SortedDocValuesWriter extends DocValuesWriter {
  public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
    final int maxDoc = state.segmentInfo.getDocCount();

-    final int emptyOrd;
    assert pending.size() == maxDoc;
    final int valueCount = hash.size();

--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
@ -38,8 +38,8 @@ import org.apache.lucene.util.packed.AppendingLongBuffer;
 *  int ord, then flushes when segment flushes. */
 class SortedSetDocValuesWriter extends DocValuesWriter {
  final BytesRefHash hash;
-  private AppendingLongBuffer pending; // stream of all ords
-  private AppendingLongBuffer pendingCounts; // ords per doc
+  private AppendingLongBuffer pending; // stream of all termIDs
+  private AppendingLongBuffer pendingCounts; // termIDs per doc
  private final Counter iwBytesUsed;
  private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
  private final FieldInfo fieldInfo;
@ -90,15 +90,15 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
    int lastValue = -1;
    int count = 0;
    for (int i = 0; i < currentUpto; i++) {
-      int v = currentValues[i];
+      int termID = currentValues[i];
      // if its not a duplicate
-      if (v != lastValue) {
-        pending.add(v); // record the ord
+      if (termID != lastValue) {
+        pending.add(termID); // record the term id
        count++;
      }
-      lastValue = v;
+      lastValue = termID;
    }
-    // record the number of unique ords for this doc
+    // record the number of unique term ids for this doc
    pendingCounts.add(count);
    maxCount = Math.max(maxCount, count);
    currentUpto = 0;
@ -116,9 +116,9 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
  }

  private void addOneValue(BytesRef value) {
-    int ord = hash.add(value);
-    if (ord < 0) {
-      ord = -ord-1;
+    int termID = hash.add(value);
+    if (termID < 0) {
+      termID = -termID-1;
    } else {
      // reserve additional space for each unique value:
      // 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints.
@ -134,7 +134,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
      iwBytesUsed.addAndGet((currentValues.length - currentUpto) * 2 * RamUsageEstimator.NUM_BYTES_INT);
    }
    
-    currentValues[currentUpto] = ord;
+    currentValues[currentUpto] = termID;
    currentUpto++;
  }