From c415bc8d1ddbb6477c83d559cb254a384aceb6df Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 10 Nov 2016 14:04:15 +0100 Subject: [PATCH] LUCENE-7545: Dense norms/doc-values should not consume memory for the IW buffer. --- .../lucene/index/BinaryDocValuesWriter.java | 15 ++-- .../apache/lucene/index/DocsWithFieldSet.java | 62 ++++++++++++++ .../apache/lucene/index/NormValuesWriter.java | 15 ++-- .../lucene/index/NumericDocValuesWriter.java | 15 ++-- .../lucene/index/SortedDocValuesWriter.java | 15 ++-- .../index/SortedNumericDocValuesWriter.java | 15 ++-- .../index/SortedSetDocValuesWriter.java | 15 ++-- .../lucene/index/TestDocsWithFieldSet.java | 80 +++++++++++++++++++ 8 files changed, 178 insertions(+), 54 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/index/DocsWithFieldSet.java create mode 100644 lucene/core/src/test/org/apache/lucene/index/TestDocsWithFieldSet.java diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java index 9611a03c265..0344c58b35c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java @@ -24,11 +24,9 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.Counter; -import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedLongValues; @@ -48,7 +46,7 @@ class BinaryDocValuesWriter extends DocValuesWriter { private final Counter iwBytesUsed; private final PackedLongValues.Builder lengths; - private FixedBitSet docsWithField; + private DocsWithFieldSet docsWithField; private final FieldInfo fieldInfo; private long bytesUsed; private int lastDocID = -1; @@ -60,7 +58,7 @@ class BinaryDocValuesWriter extends DocValuesWriter { this.bytesOut = bytes.getDataOutput(); this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); this.iwBytesUsed = iwBytesUsed; - this.docsWithField = new FixedBitSet(64); + this.docsWithField = new DocsWithFieldSet(); this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed(); iwBytesUsed.addAndGet(bytesUsed); } @@ -84,8 +82,7 @@ class BinaryDocValuesWriter extends DocValuesWriter { // Should never happen! throw new RuntimeException(ioe); } - docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID); - docsWithField.set(docID); + docsWithField.add(docID); updateBytesUsed(); lastDocID = docID; @@ -112,7 +109,7 @@ class BinaryDocValuesWriter extends DocValuesWriter { if (fieldInfoIn != fieldInfo) { throw new IllegalArgumentException("wrong fieldInfo"); } - return new BufferedBinaryDocValues(lengths, maxLength, bytes.getDataInput(), docsWithField); + return new BufferedBinaryDocValues(lengths, maxLength, bytes.getDataInput(), docsWithField.iterator()); } }); } @@ -124,12 +121,12 @@ class BinaryDocValuesWriter extends DocValuesWriter { final DocIdSetIterator docsWithField; final DataInput bytesIterator; - BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, FixedBitSet docsWithFields) { + BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, DocIdSetIterator docsWithFields) { this.value = new BytesRefBuilder(); this.value.grow(maxLength); this.lengthsIterator = lengths.iterator(); this.bytesIterator = bytesIterator; - this.docsWithField = new BitSetIterator(docsWithFields, lengths.size()); + this.docsWithField = docsWithFields; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/DocsWithFieldSet.java b/lucene/core/src/java/org/apache/lucene/index/DocsWithFieldSet.java new file mode 100644 index 00000000000..6c0d6ddefd6 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/index/DocsWithFieldSet.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RamUsageEstimator; + +/** Accumulator for documents that have a value for a field. This is optimized + * for the case that all documents have a value. */ +final class DocsWithFieldSet extends DocIdSet { + + private static long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DocsWithFieldSet.class); + + private FixedBitSet set; + private int cost = 0; + private int lastDocId = -1; + + void add(int docID) { + if (docID <= lastDocId) { + throw new IllegalArgumentException("Out of order doc ids: last=" + lastDocId + ", next=" + docID); + } + if (set != null) { + set = FixedBitSet.ensureCapacity(set, docID); + set.set(docID); + } else if (docID != cost) { + // migrate to a sparse encoding using a bit set + set = new FixedBitSet(docID + 1); + set.set(0, cost); + set.set(docID); + } + lastDocId = docID; + cost++; + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES_USED + (set == null ? 0 : set.ramBytesUsed()); + } + + @Override + public DocIdSetIterator iterator() { + return set != null ? new BitSetIterator(set, cost) : DocIdSetIterator.all(cost); + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java index b0d05e4859f..4923f3ba1f3 100644 --- a/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java @@ -22,9 +22,7 @@ import java.io.IOException; import org.apache.lucene.codecs.NormsConsumer; import org.apache.lucene.codecs.NormsProducer; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.Counter; -import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedLongValues; @@ -32,7 +30,7 @@ import org.apache.lucene.util.packed.PackedLongValues; * segment flushes. */ class NormValuesWriter { - private FixedBitSet docsWithField; + private DocsWithFieldSet docsWithField; private PackedLongValues.Builder pending; private final Counter iwBytesUsed; private long bytesUsed; @@ -40,7 +38,7 @@ class NormValuesWriter { private int lastDocID = -1; public NormValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { - docsWithField = new FixedBitSet(64); + docsWithField = new DocsWithFieldSet(); pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed(); this.fieldInfo = fieldInfo; @@ -54,8 +52,7 @@ class NormValuesWriter { } pending.add(value); - docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID); - docsWithField.set(docID); + docsWithField.add(docID); updateBytesUsed(); @@ -82,7 +79,7 @@ class NormValuesWriter { if (fieldInfo != NormValuesWriter.this.fieldInfo) { throw new IllegalArgumentException("wrong fieldInfo"); } - return new BufferedNorms(values, docsWithField); + return new BufferedNorms(values, docsWithField.iterator()); } @Override @@ -108,9 +105,9 @@ class NormValuesWriter { final DocIdSetIterator docsWithField; private long value; - BufferedNorms(PackedLongValues values, FixedBitSet docsWithFields) { + BufferedNorms(PackedLongValues values, DocIdSetIterator docsWithFields) { this.iter = values.iterator(); - this.docsWithField = new BitSetIterator(docsWithFields, values.size()); + this.docsWithField = docsWithFields; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java index 24a701064cb..d4dd66ae73e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java @@ -21,9 +21,7 @@ import java.io.IOException; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.Counter; -import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedLongValues; @@ -34,13 +32,13 @@ class NumericDocValuesWriter extends DocValuesWriter { private PackedLongValues.Builder pending; private final Counter iwBytesUsed; private long bytesUsed; - private FixedBitSet docsWithField; + private DocsWithFieldSet docsWithField; private final FieldInfo fieldInfo; private int lastDocID = -1; public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); - docsWithField = new FixedBitSet(64); + docsWithField = new DocsWithFieldSet(); bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed(); this.fieldInfo = fieldInfo; this.iwBytesUsed = iwBytesUsed; @@ -53,8 +51,7 @@ class NumericDocValuesWriter extends DocValuesWriter { } pending.add(value); - docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID); - docsWithField.set(docID); + docsWithField.add(docID); updateBytesUsed(); @@ -83,7 +80,7 @@ class NumericDocValuesWriter extends DocValuesWriter { if (fieldInfo != NumericDocValuesWriter.this.fieldInfo) { throw new IllegalArgumentException("wrong fieldInfo"); } - return new BufferedNumericDocValues(values, docsWithField); + return new BufferedNumericDocValues(values, docsWithField.iterator()); } }); } @@ -94,9 +91,9 @@ class NumericDocValuesWriter extends DocValuesWriter { final DocIdSetIterator docsWithField; private long value; - BufferedNumericDocValues(PackedLongValues values, FixedBitSet docsWithFields) { + BufferedNumericDocValues(PackedLongValues values, DocIdSetIterator docsWithFields) { this.iter = values.iterator(); - this.docsWithField = new BitSetIterator(docsWithFields, values.size()); + this.docsWithField = docsWithFields; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java index 7e43e497f7b..e439caf6bda 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java @@ -22,13 +22,11 @@ import java.io.IOException; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.Counter; -import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedLongValues; @@ -37,7 +35,7 @@ import org.apache.lucene.util.packed.PackedLongValues; class SortedDocValuesWriter extends DocValuesWriter { final BytesRefHash hash; private PackedLongValues.Builder pending; - private FixedBitSet docsWithField; + private DocsWithFieldSet docsWithField; private final Counter iwBytesUsed; private long bytesUsed; // this currently only tracks differences in 'pending' private final FieldInfo fieldInfo; @@ -52,7 +50,7 @@ class SortedDocValuesWriter extends DocValuesWriter { BytesRefHash.DEFAULT_CAPACITY, new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed)); pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); - docsWithField = new FixedBitSet(64); + docsWithField = new DocsWithFieldSet(); bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed(); iwBytesUsed.addAndGet(bytesUsed); } @@ -69,8 +67,7 @@ class SortedDocValuesWriter extends DocValuesWriter { } addOneValue(value); - docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID); - docsWithField.set(docID); + docsWithField.add(docID); lastDocID = docID; } @@ -121,7 +118,7 @@ class SortedDocValuesWriter extends DocValuesWriter { if (fieldInfoIn != fieldInfo) { throw new IllegalArgumentException("wrong fieldInfo"); } - return new BufferedSortedDocValues(hash, valueCount, ords, sortedValues, ordMap, docsWithField); + return new BufferedSortedDocValues(hash, valueCount, ords, sortedValues, ordMap, docsWithField.iterator()); } }); } @@ -136,13 +133,13 @@ class SortedDocValuesWriter extends DocValuesWriter { final PackedLongValues.Iterator iter; final DocIdSetIterator docsWithField; - public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, FixedBitSet docsWithField) { + public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, DocIdSetIterator docsWithField) { this.hash = hash; this.valueCount = valueCount; this.sortedValues = sortedValues; this.iter = docToOrd.iterator(); this.ordMap = ordMap; - this.docsWithField = new BitSetIterator(docsWithField, docToOrd.size()); + this.docsWithField = docsWithField; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java index 3f50623d4ea..75236cc335a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java @@ -23,9 +23,7 @@ import java.util.Arrays; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.Counter; -import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedLongValues; @@ -34,7 +32,7 @@ import org.apache.lucene.util.packed.PackedLongValues; class SortedNumericDocValuesWriter extends DocValuesWriter { private PackedLongValues.Builder pending; // stream of all values private PackedLongValues.Builder pendingCounts; // count of values per doc - private FixedBitSet docsWithField; + private DocsWithFieldSet docsWithField; private final Counter iwBytesUsed; private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts' private final FieldInfo fieldInfo; @@ -47,7 +45,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter { this.iwBytesUsed = iwBytesUsed; pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); - docsWithField = new FixedBitSet(64); + docsWithField = new DocsWithFieldSet(); bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues); iwBytesUsed.addAndGet(bytesUsed); } @@ -76,8 +74,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter { pendingCounts.add(currentUpto); currentUpto = 0; - docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc); - docsWithField.set(currentDoc); + docsWithField.add(currentDoc); } @Override @@ -112,7 +109,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter { if (fieldInfoIn != fieldInfo) { throw new IllegalArgumentException("wrong fieldInfo"); } - return new BufferedSortedNumericDocValues(values, valueCounts, docsWithField); + return new BufferedSortedNumericDocValues(values, valueCounts, docsWithField.iterator()); } }); } @@ -124,10 +121,10 @@ class SortedNumericDocValuesWriter extends DocValuesWriter { private int valueCount; private int valueUpto; - public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts, FixedBitSet docsWithField) { + public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts, DocIdSetIterator docsWithField) { valuesIter = values.iterator(); valueCountsIter = valueCounts.iterator(); - this.docsWithField = new BitSetIterator(docsWithField, values.size()); + this.docsWithField = docsWithField; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java index 35157d49c16..0f4fb5e5c71 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java @@ -24,13 +24,11 @@ import java.util.Arrays; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.Counter; -import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedLongValues; @@ -40,7 +38,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter { final BytesRefHash hash; private PackedLongValues.Builder pending; // stream of all termIDs private PackedLongValues.Builder pendingCounts; // termIDs per doc - private FixedBitSet docsWithField; + private DocsWithFieldSet docsWithField; private final Counter iwBytesUsed; private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts' private final FieldInfo fieldInfo; @@ -59,7 +57,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter { new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed)); pending = PackedLongValues.packedBuilder(PackedInts.COMPACT); pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); - docsWithField = new FixedBitSet(64); + docsWithField = new DocsWithFieldSet(); bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed(); iwBytesUsed.addAndGet(bytesUsed); } @@ -103,8 +101,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter { pendingCounts.add(count); maxCount = Math.max(maxCount, count); currentUpto = 0; - docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc); - docsWithField.set(currentDoc); + docsWithField.add(currentDoc); } @Override @@ -158,7 +155,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter { if (fieldInfoIn != fieldInfo) { throw new IllegalArgumentException("wrong fieldInfo"); } - return new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField); + return new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField.iterator()); } }); } @@ -176,14 +173,14 @@ class SortedSetDocValuesWriter extends DocValuesWriter { private int ordCount; private int ordUpto; - public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, FixedBitSet docsWithField) { + public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, DocIdSetIterator docsWithField) { this.currentDoc = new int[maxCount]; this.sortedValues = sortedValues; this.ordMap = ordMap; this.hash = hash; this.ordsIter = ords.iterator(); this.ordCountsIter = ordCounts.iterator(); - this.docsWithField = new BitSetIterator(docsWithField, ordCounts.size()); + this.docsWithField = docsWithField; } @Override diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocsWithFieldSet.java b/lucene/core/src/test/org/apache/lucene/index/TestDocsWithFieldSet.java new file mode 100644 index 00000000000..b719adfd7c2 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocsWithFieldSet.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; + +public class TestDocsWithFieldSet extends LuceneTestCase { + + public void testDense() throws IOException { + DocsWithFieldSet set = new DocsWithFieldSet(); + DocIdSetIterator it = set.iterator(); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc()); + + set.add(0); + it = set.iterator(); + assertEquals(0, it.nextDoc()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc()); + + long ramBytesUsed = set.ramBytesUsed(); + for (int i = 1; i < 1000; ++i) { + set.add(i); + } + assertEquals(ramBytesUsed, set.ramBytesUsed()); + it = set.iterator(); + for (int i = 0; i < 1000; ++i) { + assertEquals(i, it.nextDoc()); + } + assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc()); + } + + public void testSparse() throws IOException { + DocsWithFieldSet set = new DocsWithFieldSet(); + int doc = random().nextInt(10000); + set.add(doc); + DocIdSetIterator it = set.iterator(); + assertEquals(doc, it.nextDoc()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc()); + int doc2 = doc + TestUtil.nextInt(random(), 1, 100); + set.add(doc2); + it = set.iterator(); + assertEquals(doc, it.nextDoc()); + assertEquals(doc2, it.nextDoc()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc()); + } + + public void testDenseThenSparse() throws IOException { + int denseCount = random().nextInt(10000); + int nextDoc = denseCount + random().nextInt(10000); + DocsWithFieldSet set = new DocsWithFieldSet(); + for (int i = 0; i < denseCount; ++i) { + set.add(i); + } + set.add(nextDoc); + DocIdSetIterator it = set.iterator(); + for (int i = 0; i < denseCount; ++i) { + assertEquals(i, it.nextDoc()); + } + assertEquals(nextDoc, it.nextDoc()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc()); + } + +}