refer to these integers as termIDs when they are not yet ordinals, just unsorted unique values

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1451958 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-03-02 23:38:55 +00:00
parent 16b8796a25
commit 35f59181f5
2 changed files with 15 additions and 18 deletions

View File

@ -24,7 +24,6 @@ import java.util.Iterator;
import java.util.NoSuchElementException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
@ -33,7 +32,6 @@ import org.apache.lucene.util.Counter;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.AppendingLongBuffer;
/** Buffers up pending byte[] per doc, deref and sorting via
* int ord, then flushes when segment flushes. */
class SortedDocValuesWriter extends DocValuesWriter {
@ -85,9 +83,9 @@ class SortedDocValuesWriter extends DocValuesWriter {
}
private void addOneValue(BytesRef value) {
int ord = hash.add(value);
if (ord < 0) {
ord = -ord-1;
int termID = hash.add(value);
if (termID < 0) {
termID = -termID-1;
} else {
// reserve additional space for each unique value:
// 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints.
@ -96,7 +94,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
iwBytesUsed.addAndGet(2 * RamUsageEstimator.NUM_BYTES_INT);
}
pending.add(ord);
pending.add(termID);
updateBytesUsed();
}
@ -110,7 +108,6 @@ class SortedDocValuesWriter extends DocValuesWriter {
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
final int maxDoc = state.segmentInfo.getDocCount();
final int emptyOrd;
assert pending.size() == maxDoc;
final int valueCount = hash.size();

View File

@ -38,8 +38,8 @@ import org.apache.lucene.util.packed.AppendingLongBuffer;
* int ord, then flushes when segment flushes. */
class SortedSetDocValuesWriter extends DocValuesWriter {
final BytesRefHash hash;
private AppendingLongBuffer pending; // stream of all ords
private AppendingLongBuffer pendingCounts; // ords per doc
private AppendingLongBuffer pending; // stream of all termIDs
private AppendingLongBuffer pendingCounts; // termIDs per doc
private final Counter iwBytesUsed;
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
private final FieldInfo fieldInfo;
@ -90,15 +90,15 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
int lastValue = -1;
int count = 0;
for (int i = 0; i < currentUpto; i++) {
int v = currentValues[i];
int termID = currentValues[i];
// if its not a duplicate
if (v != lastValue) {
pending.add(v); // record the ord
if (termID != lastValue) {
pending.add(termID); // record the term id
count++;
}
lastValue = v;
lastValue = termID;
}
// record the number of unique ords for this doc
// record the number of unique term ids for this doc
pendingCounts.add(count);
maxCount = Math.max(maxCount, count);
currentUpto = 0;
@ -116,9 +116,9 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
}
private void addOneValue(BytesRef value) {
int ord = hash.add(value);
if (ord < 0) {
ord = -ord-1;
int termID = hash.add(value);
if (termID < 0) {
termID = -termID-1;
} else {
// reserve additional space for each unique value:
// 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints.
@ -134,7 +134,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
iwBytesUsed.addAndGet((currentValues.length - currentUpto) * 2 * RamUsageEstimator.NUM_BYTES_INT);
}
currentValues[currentUpto] = ord;
currentValues[currentUpto] = termID;
currentUpto++;
}