mirror of https://github.com/apache/lucene.git
refer to these integers as termIDs when they are not yet ordinals, just unsorted unique values
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1451958 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
16b8796a25
commit
35f59181f5
|
@ -24,7 +24,6 @@ import java.util.Iterator;
|
|||
import java.util.NoSuchElementException;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
||||
|
@ -33,7 +32,6 @@ import org.apache.lucene.util.Counter;
|
|||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
||||
|
||||
|
||||
/** Buffers up pending byte[] per doc, deref and sorting via
|
||||
* int ord, then flushes when segment flushes. */
|
||||
class SortedDocValuesWriter extends DocValuesWriter {
|
||||
|
@ -85,9 +83,9 @@ class SortedDocValuesWriter extends DocValuesWriter {
|
|||
}
|
||||
|
||||
private void addOneValue(BytesRef value) {
|
||||
int ord = hash.add(value);
|
||||
if (ord < 0) {
|
||||
ord = -ord-1;
|
||||
int termID = hash.add(value);
|
||||
if (termID < 0) {
|
||||
termID = -termID-1;
|
||||
} else {
|
||||
// reserve additional space for each unique value:
|
||||
// 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints.
|
||||
|
@ -96,7 +94,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
|
|||
iwBytesUsed.addAndGet(2 * RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
|
||||
pending.add(ord);
|
||||
pending.add(termID);
|
||||
updateBytesUsed();
|
||||
}
|
||||
|
||||
|
@ -110,7 +108,6 @@ class SortedDocValuesWriter extends DocValuesWriter {
|
|||
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
|
||||
final int maxDoc = state.segmentInfo.getDocCount();
|
||||
|
||||
final int emptyOrd;
|
||||
assert pending.size() == maxDoc;
|
||||
final int valueCount = hash.size();
|
||||
|
||||
|
|
|
@ -38,8 +38,8 @@ import org.apache.lucene.util.packed.AppendingLongBuffer;
|
|||
* int ord, then flushes when segment flushes. */
|
||||
class SortedSetDocValuesWriter extends DocValuesWriter {
|
||||
final BytesRefHash hash;
|
||||
private AppendingLongBuffer pending; // stream of all ords
|
||||
private AppendingLongBuffer pendingCounts; // ords per doc
|
||||
private AppendingLongBuffer pending; // stream of all termIDs
|
||||
private AppendingLongBuffer pendingCounts; // termIDs per doc
|
||||
private final Counter iwBytesUsed;
|
||||
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
|
||||
private final FieldInfo fieldInfo;
|
||||
|
@ -90,15 +90,15 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
int lastValue = -1;
|
||||
int count = 0;
|
||||
for (int i = 0; i < currentUpto; i++) {
|
||||
int v = currentValues[i];
|
||||
int termID = currentValues[i];
|
||||
// if its not a duplicate
|
||||
if (v != lastValue) {
|
||||
pending.add(v); // record the ord
|
||||
if (termID != lastValue) {
|
||||
pending.add(termID); // record the term id
|
||||
count++;
|
||||
}
|
||||
lastValue = v;
|
||||
lastValue = termID;
|
||||
}
|
||||
// record the number of unique ords for this doc
|
||||
// record the number of unique term ids for this doc
|
||||
pendingCounts.add(count);
|
||||
maxCount = Math.max(maxCount, count);
|
||||
currentUpto = 0;
|
||||
|
@ -116,9 +116,9 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
}
|
||||
|
||||
private void addOneValue(BytesRef value) {
|
||||
int ord = hash.add(value);
|
||||
if (ord < 0) {
|
||||
ord = -ord-1;
|
||||
int termID = hash.add(value);
|
||||
if (termID < 0) {
|
||||
termID = -termID-1;
|
||||
} else {
|
||||
// reserve additional space for each unique value:
|
||||
// 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints.
|
||||
|
@ -134,7 +134,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
iwBytesUsed.addAndGet((currentValues.length - currentUpto) * 2 * RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
|
||||
currentValues[currentUpto] = ord;
|
||||
currentValues[currentUpto] = termID;
|
||||
currentUpto++;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue