diff --git a/lucene/core/src/java/org/apache/lucene/util/SortedVIntList.java b/lucene/core/src/java/org/apache/lucene/util/SortedVIntList.java deleted file mode 100644 index 135d0adcaee..00000000000 --- a/lucene/core/src/java/org/apache/lucene/util/SortedVIntList.java +++ /dev/null @@ -1,227 +0,0 @@ -package org.apache.lucene.util; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.BitSet; - -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; - -/** - * Stores and iterate on sorted integers in compressed form in RAM.
- * The code for compressing the differences between ascending integers was - * borrowed from {@link org.apache.lucene.store.IndexInput} and - * {@link org.apache.lucene.store.IndexOutput}. - *

- * NOTE: this class assumes the stored integers are doc Ids (hence why it - * extends {@link DocIdSet}). Therefore its {@link #iterator()} assumes {@link - * DocIdSetIterator#NO_MORE_DOCS} can be used as sentinel. If you intent to use - * this value, then make sure it's not used during search flow. - * - * @lucene.internal - */ -public class SortedVIntList extends DocIdSet { - /** When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, - * a SortedVIntList representing the index numbers of the set bits - * will be smaller than that BitSet. - */ - final static int BITS2VINTLIST_SIZE = 8; - - private int size; - private byte[] bytes; - private int lastBytePos; - - /** - * Create a SortedVIntList from all elements of an array of integers. - * - * @param sortedInts A sorted array of non negative integers. - */ - public SortedVIntList(int... sortedInts) { - this(sortedInts, sortedInts.length); - } - - /** - * Create a SortedVIntList from an array of integers. - * @param sortedInts An array of sorted non negative integers. - * @param inputSize The number of integers to be used from the array. - */ - public SortedVIntList(int[] sortedInts, int inputSize) { - SortedVIntListBuilder builder = new SortedVIntListBuilder(); - for (int i = 0; i < inputSize; i++) { - builder.addInt(sortedInts[i]); - } - builder.done(); - } - - /** - * Create a SortedVIntList from a BitSet. - * @param bits A bit set representing a set of integers. - */ - public SortedVIntList(BitSet bits) { - SortedVIntListBuilder builder = new SortedVIntListBuilder(); - int nextInt = bits.nextSetBit(0); - while (nextInt != -1) { - builder.addInt(nextInt); - nextInt = bits.nextSetBit(nextInt + 1); - } - builder.done(); - } - - /** - * Create a SortedVIntList. - * @param docIdSetIterator An iterator providing document numbers as a set of integers. - * This DocIdSetIterator is iterated completely when this constructor - * is called and it must provide the integers in non - * decreasing order. - */ - public SortedVIntList(DocIdSetIterator docIdSetIterator) throws IOException { - SortedVIntListBuilder builder = new SortedVIntListBuilder(); - int doc; - while ((doc = docIdSetIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - builder.addInt(doc); - } - builder.done(); - } - - - private class SortedVIntListBuilder { - private int lastInt = 0; - - SortedVIntListBuilder() { - initBytes(); - lastInt = 0; - } - - void addInt(int nextInt) { - int diff = nextInt - lastInt; - if (diff < 0) { - throw new IllegalArgumentException( - "Input not sorted or first element negative."); - } - - if ((lastBytePos + MAX_BYTES_PER_INT) > bytes.length) { - // Biggest possible int does not fit. - resizeBytes(ArrayUtil.oversize(lastBytePos + MAX_BYTES_PER_INT, 1)); - } - - // See org.apache.lucene.store.IndexOutput.writeVInt() - while ((diff & ~VB1) != 0) { // The high bit of the next byte needs to be set. - bytes[lastBytePos++] = (byte) ((diff & VB1) | ~VB1); - diff >>>= BIT_SHIFT; - } - bytes[lastBytePos++] = (byte) diff; // Last byte, high bit not set. - size++; - lastInt = nextInt; - } - - void done() { - resizeBytes(lastBytePos); - } - } - - - private void initBytes() { - size = 0; - bytes = new byte[128]; // initial byte size - lastBytePos = 0; - } - - private void resizeBytes(int newSize) { - if (newSize != bytes.length) { - byte[] newBytes = new byte[newSize]; - System.arraycopy(bytes, 0, newBytes, 0, lastBytePos); - bytes = newBytes; - } - } - - private static final int VB1 = 0x7F; - private static final int BIT_SHIFT = 7; - private final int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1; - - /** - * @return The total number of sorted integers. - */ - public int size() { - return size; - } - - /** - * @return The size of the byte array storing the compressed sorted integers. - */ - public int getByteSize() { - return bytes.length; - } - - /** This DocIdSet implementation is cacheable. */ - @Override - public boolean isCacheable() { - return true; - } - - /** - * @return An iterator over the sorted integers. - */ - @Override - public DocIdSetIterator iterator() { - return new DocIdSetIterator() { - int bytePos = 0; - int lastInt = 0; - int doc = -1; - - private void advance() { - // See org.apache.lucene.store.IndexInput.readVInt() - byte b = bytes[bytePos++]; - lastInt += b & VB1; - for (int s = BIT_SHIFT; (b & ~VB1) != 0; s += BIT_SHIFT) { - b = bytes[bytePos++]; - lastInt += (b & VB1) << s; - } - } - - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() { - if (bytePos >= lastBytePos) { - doc = NO_MORE_DOCS; - } else { - advance(); - doc = lastInt; - } - return doc; - } - - @Override - public int advance(int target) { - while (bytePos < lastBytePos) { - advance(); - if (lastInt >= target) { - return doc = lastInt; - } - } - return doc = NO_MORE_DOCS; - } - - }; - } -} - diff --git a/lucene/core/src/test/org/apache/lucene/util/TestSortedVIntList.java b/lucene/core/src/test/org/apache/lucene/util/TestSortedVIntList.java deleted file mode 100644 index b07e973a59e..00000000000 --- a/lucene/core/src/test/org/apache/lucene/util/TestSortedVIntList.java +++ /dev/null @@ -1,195 +0,0 @@ -package org.apache.lucene.util; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.BitSet; - - -import org.apache.lucene.search.DocIdSetIterator; - -public class TestSortedVIntList extends LuceneTestCase { - - void tstIterator ( - SortedVIntList vintList, - int[] ints) throws IOException { - for (int i = 0; i < ints.length; i++) { - if ((i > 0) && (ints[i-1] == ints[i])) { - return; // DocNrSkipper should not skip to same document. - } - } - DocIdSetIterator m = vintList.iterator(); - for (int i = 0; i < ints.length; i++) { - assertTrue("No end of Matcher at: " + i, m.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); - assertEquals(ints[i], m.docID()); - } - assertTrue("End of Matcher", m.nextDoc() == DocIdSetIterator.NO_MORE_DOCS); - } - - void tstVIntList( - SortedVIntList vintList, - int[] ints, - int expectedByteSize) throws IOException { - assertEquals("Size", ints.length, vintList.size()); - assertEquals("Byte size", expectedByteSize, vintList.getByteSize()); - tstIterator(vintList, ints); - } - - public void tstViaBitSet(int [] ints, int expectedByteSize) throws IOException { - final int MAX_INT_FOR_BITSET = 1024 * 1024; - BitSet bs = new BitSet(); - for (int i = 0; i < ints.length; i++) { - if (ints[i] > MAX_INT_FOR_BITSET) { - return; // BitSet takes too much memory - } - if ((i > 0) && (ints[i-1] == ints[i])) { - return; // BitSet cannot store duplicate. - } - bs.set(ints[i]); - } - SortedVIntList svil = new SortedVIntList(bs); - tstVIntList(svil, ints, expectedByteSize); - tstVIntList(new SortedVIntList(svil.iterator()), ints, expectedByteSize); - } - - private static final int VB1 = 0x7F; - private static final int BIT_SHIFT = 7; - private static final int VB2 = (VB1 << BIT_SHIFT) | VB1; - private static final int VB3 = (VB2 << BIT_SHIFT) | VB1; - private static final int VB4 = (VB3 << BIT_SHIFT) | VB1; - - private int vIntByteSize(int i) { - assert i >= 0; - if (i <= VB1) return 1; - if (i <= VB2) return 2; - if (i <= VB3) return 3; - if (i <= VB4) return 4; - return 5; - } - - private int vIntListByteSize(int [] ints) { - int byteSize = 0; - int last = 0; - for (int i = 0; i < ints.length; i++) { - byteSize += vIntByteSize(ints[i] - last); - last = ints[i]; - } - return byteSize; - } - - public void tstInts(int [] ints) { - int expectedByteSize = vIntListByteSize(ints); - try { - tstVIntList(new SortedVIntList(ints), ints, expectedByteSize); - tstViaBitSet(ints, expectedByteSize); - } catch (IOException ioe) { - throw new Error(ioe); - } - } - - public void tstIllegalArgExc(int [] ints) { - try { - new SortedVIntList(ints); - } - catch (IllegalArgumentException e) { - return; - } - fail("Expected IllegalArgumentException"); - } - - private int[] fibArray(int a, int b, int size) { - final int[] fib = new int[size]; - fib[0] = a; - fib[1] = b; - for (int i = 2; i < size; i++) { - fib[i] = fib[i-1] + fib[i-2]; - } - return fib; - } - - private int[] reverseDiffs(int []ints) { // reverse the order of the successive differences - final int[] res = new int[ints.length]; - for (int i = 0; i < ints.length; i++) { - res[i] = ints[ints.length - 1] + (ints[0] - ints[ints.length - 1 - i]); - } - return res; - } - - public void test01() { - tstInts(new int[] {}); - } - public void test02() { - tstInts(new int[] {0}); - } - public void test04a() { - tstInts(new int[] {0, VB2 - 1}); - } - public void test04b() { - tstInts(new int[] {0, VB2}); - } - public void test04c() { - tstInts(new int[] {0, VB2 + 1}); - } - public void test05() { - tstInts(fibArray(0,1,7)); // includes duplicate value 1 - } - public void test05b() { - tstInts(reverseDiffs(fibArray(0,1,7))); - } - public void test06() { - tstInts(fibArray(1,2,45)); // no duplicates, size 46 exceeds max int. - } - public void test06b() { - tstInts(reverseDiffs(fibArray(1,2,45))); - } - public void test07a() { - tstInts(new int[] {0, VB3}); - } - public void test07b() { - tstInts(new int[] {1, VB3 + 2}); - } - public void test07c() { - tstInts(new int[] {2, VB3 + 4}); - } - public void test08a() { - tstInts(new int[] {0, VB4 + 1}); - } - public void test08b() { - tstInts(new int[] {1, VB4 + 1}); - } - public void test08c() { - tstInts(new int[] {2, VB4 + 1}); - } - - public void test10() { - tstIllegalArgExc(new int[] {-1}); - } - public void test11() { - tstIllegalArgExc(new int[] {1,0}); - } - public void test12() { - tstIllegalArgExc(new int[] {0,1,1,2,3,5,8,0}); - } - public void test13Allocation() throws Exception { - int [] a = new int[2000]; // SortedVIntList initial byte size is 128 - for (int i = 0; i < a.length; i++) { - a[i] = (107 + i) * i; - } - tstIterator(new SortedVIntList(a), a); - } -}