mirror of https://github.com/apache/lucene.git
LUCENE-4392: Remove SortedVIntList (unused).
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1386525 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
41ab62aece
commit
a35f22558a
|
@ -1,227 +0,0 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
/**
|
||||
* Stores and iterate on sorted integers in compressed form in RAM. <br>
|
||||
* The code for compressing the differences between ascending integers was
|
||||
* borrowed from {@link org.apache.lucene.store.IndexInput} and
|
||||
* {@link org.apache.lucene.store.IndexOutput}.
|
||||
* <p>
|
||||
* <b>NOTE:</b> this class assumes the stored integers are doc Ids (hence why it
|
||||
* extends {@link DocIdSet}). Therefore its {@link #iterator()} assumes {@link
|
||||
* DocIdSetIterator#NO_MORE_DOCS} can be used as sentinel. If you intent to use
|
||||
* this value, then make sure it's not used during search flow.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class SortedVIntList extends DocIdSet {
|
||||
/** When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set,
|
||||
* a SortedVIntList representing the index numbers of the set bits
|
||||
* will be smaller than that BitSet.
|
||||
*/
|
||||
final static int BITS2VINTLIST_SIZE = 8;
|
||||
|
||||
private int size;
|
||||
private byte[] bytes;
|
||||
private int lastBytePos;
|
||||
|
||||
/**
|
||||
* Create a SortedVIntList from all elements of an array of integers.
|
||||
*
|
||||
* @param sortedInts A sorted array of non negative integers.
|
||||
*/
|
||||
public SortedVIntList(int... sortedInts) {
|
||||
this(sortedInts, sortedInts.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a SortedVIntList from an array of integers.
|
||||
* @param sortedInts An array of sorted non negative integers.
|
||||
* @param inputSize The number of integers to be used from the array.
|
||||
*/
|
||||
public SortedVIntList(int[] sortedInts, int inputSize) {
|
||||
SortedVIntListBuilder builder = new SortedVIntListBuilder();
|
||||
for (int i = 0; i < inputSize; i++) {
|
||||
builder.addInt(sortedInts[i]);
|
||||
}
|
||||
builder.done();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a SortedVIntList from a BitSet.
|
||||
* @param bits A bit set representing a set of integers.
|
||||
*/
|
||||
public SortedVIntList(BitSet bits) {
|
||||
SortedVIntListBuilder builder = new SortedVIntListBuilder();
|
||||
int nextInt = bits.nextSetBit(0);
|
||||
while (nextInt != -1) {
|
||||
builder.addInt(nextInt);
|
||||
nextInt = bits.nextSetBit(nextInt + 1);
|
||||
}
|
||||
builder.done();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a SortedVIntList.
|
||||
* @param docIdSetIterator An iterator providing document numbers as a set of integers.
|
||||
* This DocIdSetIterator is iterated completely when this constructor
|
||||
* is called and it must provide the integers in non
|
||||
* decreasing order.
|
||||
*/
|
||||
public SortedVIntList(DocIdSetIterator docIdSetIterator) throws IOException {
|
||||
SortedVIntListBuilder builder = new SortedVIntListBuilder();
|
||||
int doc;
|
||||
while ((doc = docIdSetIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
builder.addInt(doc);
|
||||
}
|
||||
builder.done();
|
||||
}
|
||||
|
||||
|
||||
private class SortedVIntListBuilder {
|
||||
private int lastInt = 0;
|
||||
|
||||
SortedVIntListBuilder() {
|
||||
initBytes();
|
||||
lastInt = 0;
|
||||
}
|
||||
|
||||
void addInt(int nextInt) {
|
||||
int diff = nextInt - lastInt;
|
||||
if (diff < 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Input not sorted or first element negative.");
|
||||
}
|
||||
|
||||
if ((lastBytePos + MAX_BYTES_PER_INT) > bytes.length) {
|
||||
// Biggest possible int does not fit.
|
||||
resizeBytes(ArrayUtil.oversize(lastBytePos + MAX_BYTES_PER_INT, 1));
|
||||
}
|
||||
|
||||
// See org.apache.lucene.store.IndexOutput.writeVInt()
|
||||
while ((diff & ~VB1) != 0) { // The high bit of the next byte needs to be set.
|
||||
bytes[lastBytePos++] = (byte) ((diff & VB1) | ~VB1);
|
||||
diff >>>= BIT_SHIFT;
|
||||
}
|
||||
bytes[lastBytePos++] = (byte) diff; // Last byte, high bit not set.
|
||||
size++;
|
||||
lastInt = nextInt;
|
||||
}
|
||||
|
||||
void done() {
|
||||
resizeBytes(lastBytePos);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void initBytes() {
|
||||
size = 0;
|
||||
bytes = new byte[128]; // initial byte size
|
||||
lastBytePos = 0;
|
||||
}
|
||||
|
||||
private void resizeBytes(int newSize) {
|
||||
if (newSize != bytes.length) {
|
||||
byte[] newBytes = new byte[newSize];
|
||||
System.arraycopy(bytes, 0, newBytes, 0, lastBytePos);
|
||||
bytes = newBytes;
|
||||
}
|
||||
}
|
||||
|
||||
private static final int VB1 = 0x7F;
|
||||
private static final int BIT_SHIFT = 7;
|
||||
private final int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1;
|
||||
|
||||
/**
|
||||
* @return The total number of sorted integers.
|
||||
*/
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The size of the byte array storing the compressed sorted integers.
|
||||
*/
|
||||
public int getByteSize() {
|
||||
return bytes.length;
|
||||
}
|
||||
|
||||
/** This DocIdSet implementation is cacheable. */
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return An iterator over the sorted integers.
|
||||
*/
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return new DocIdSetIterator() {
|
||||
int bytePos = 0;
|
||||
int lastInt = 0;
|
||||
int doc = -1;
|
||||
|
||||
private void advance() {
|
||||
// See org.apache.lucene.store.IndexInput.readVInt()
|
||||
byte b = bytes[bytePos++];
|
||||
lastInt += b & VB1;
|
||||
for (int s = BIT_SHIFT; (b & ~VB1) != 0; s += BIT_SHIFT) {
|
||||
b = bytes[bytePos++];
|
||||
lastInt += (b & VB1) << s;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
if (bytePos >= lastBytePos) {
|
||||
doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
advance();
|
||||
doc = lastInt;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) {
|
||||
while (bytePos < lastBytePos) {
|
||||
advance();
|
||||
if (lastInt >= target) {
|
||||
return doc = lastInt;
|
||||
}
|
||||
}
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -1,195 +0,0 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
public class TestSortedVIntList extends LuceneTestCase {
|
||||
|
||||
void tstIterator (
|
||||
SortedVIntList vintList,
|
||||
int[] ints) throws IOException {
|
||||
for (int i = 0; i < ints.length; i++) {
|
||||
if ((i > 0) && (ints[i-1] == ints[i])) {
|
||||
return; // DocNrSkipper should not skip to same document.
|
||||
}
|
||||
}
|
||||
DocIdSetIterator m = vintList.iterator();
|
||||
for (int i = 0; i < ints.length; i++) {
|
||||
assertTrue("No end of Matcher at: " + i, m.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertEquals(ints[i], m.docID());
|
||||
}
|
||||
assertTrue("End of Matcher", m.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
}
|
||||
|
||||
void tstVIntList(
|
||||
SortedVIntList vintList,
|
||||
int[] ints,
|
||||
int expectedByteSize) throws IOException {
|
||||
assertEquals("Size", ints.length, vintList.size());
|
||||
assertEquals("Byte size", expectedByteSize, vintList.getByteSize());
|
||||
tstIterator(vintList, ints);
|
||||
}
|
||||
|
||||
public void tstViaBitSet(int [] ints, int expectedByteSize) throws IOException {
|
||||
final int MAX_INT_FOR_BITSET = 1024 * 1024;
|
||||
BitSet bs = new BitSet();
|
||||
for (int i = 0; i < ints.length; i++) {
|
||||
if (ints[i] > MAX_INT_FOR_BITSET) {
|
||||
return; // BitSet takes too much memory
|
||||
}
|
||||
if ((i > 0) && (ints[i-1] == ints[i])) {
|
||||
return; // BitSet cannot store duplicate.
|
||||
}
|
||||
bs.set(ints[i]);
|
||||
}
|
||||
SortedVIntList svil = new SortedVIntList(bs);
|
||||
tstVIntList(svil, ints, expectedByteSize);
|
||||
tstVIntList(new SortedVIntList(svil.iterator()), ints, expectedByteSize);
|
||||
}
|
||||
|
||||
private static final int VB1 = 0x7F;
|
||||
private static final int BIT_SHIFT = 7;
|
||||
private static final int VB2 = (VB1 << BIT_SHIFT) | VB1;
|
||||
private static final int VB3 = (VB2 << BIT_SHIFT) | VB1;
|
||||
private static final int VB4 = (VB3 << BIT_SHIFT) | VB1;
|
||||
|
||||
private int vIntByteSize(int i) {
|
||||
assert i >= 0;
|
||||
if (i <= VB1) return 1;
|
||||
if (i <= VB2) return 2;
|
||||
if (i <= VB3) return 3;
|
||||
if (i <= VB4) return 4;
|
||||
return 5;
|
||||
}
|
||||
|
||||
private int vIntListByteSize(int [] ints) {
|
||||
int byteSize = 0;
|
||||
int last = 0;
|
||||
for (int i = 0; i < ints.length; i++) {
|
||||
byteSize += vIntByteSize(ints[i] - last);
|
||||
last = ints[i];
|
||||
}
|
||||
return byteSize;
|
||||
}
|
||||
|
||||
public void tstInts(int [] ints) {
|
||||
int expectedByteSize = vIntListByteSize(ints);
|
||||
try {
|
||||
tstVIntList(new SortedVIntList(ints), ints, expectedByteSize);
|
||||
tstViaBitSet(ints, expectedByteSize);
|
||||
} catch (IOException ioe) {
|
||||
throw new Error(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
public void tstIllegalArgExc(int [] ints) {
|
||||
try {
|
||||
new SortedVIntList(ints);
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
return;
|
||||
}
|
||||
fail("Expected IllegalArgumentException");
|
||||
}
|
||||
|
||||
private int[] fibArray(int a, int b, int size) {
|
||||
final int[] fib = new int[size];
|
||||
fib[0] = a;
|
||||
fib[1] = b;
|
||||
for (int i = 2; i < size; i++) {
|
||||
fib[i] = fib[i-1] + fib[i-2];
|
||||
}
|
||||
return fib;
|
||||
}
|
||||
|
||||
private int[] reverseDiffs(int []ints) { // reverse the order of the successive differences
|
||||
final int[] res = new int[ints.length];
|
||||
for (int i = 0; i < ints.length; i++) {
|
||||
res[i] = ints[ints.length - 1] + (ints[0] - ints[ints.length - 1 - i]);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public void test01() {
|
||||
tstInts(new int[] {});
|
||||
}
|
||||
public void test02() {
|
||||
tstInts(new int[] {0});
|
||||
}
|
||||
public void test04a() {
|
||||
tstInts(new int[] {0, VB2 - 1});
|
||||
}
|
||||
public void test04b() {
|
||||
tstInts(new int[] {0, VB2});
|
||||
}
|
||||
public void test04c() {
|
||||
tstInts(new int[] {0, VB2 + 1});
|
||||
}
|
||||
public void test05() {
|
||||
tstInts(fibArray(0,1,7)); // includes duplicate value 1
|
||||
}
|
||||
public void test05b() {
|
||||
tstInts(reverseDiffs(fibArray(0,1,7)));
|
||||
}
|
||||
public void test06() {
|
||||
tstInts(fibArray(1,2,45)); // no duplicates, size 46 exceeds max int.
|
||||
}
|
||||
public void test06b() {
|
||||
tstInts(reverseDiffs(fibArray(1,2,45)));
|
||||
}
|
||||
public void test07a() {
|
||||
tstInts(new int[] {0, VB3});
|
||||
}
|
||||
public void test07b() {
|
||||
tstInts(new int[] {1, VB3 + 2});
|
||||
}
|
||||
public void test07c() {
|
||||
tstInts(new int[] {2, VB3 + 4});
|
||||
}
|
||||
public void test08a() {
|
||||
tstInts(new int[] {0, VB4 + 1});
|
||||
}
|
||||
public void test08b() {
|
||||
tstInts(new int[] {1, VB4 + 1});
|
||||
}
|
||||
public void test08c() {
|
||||
tstInts(new int[] {2, VB4 + 1});
|
||||
}
|
||||
|
||||
public void test10() {
|
||||
tstIllegalArgExc(new int[] {-1});
|
||||
}
|
||||
public void test11() {
|
||||
tstIllegalArgExc(new int[] {1,0});
|
||||
}
|
||||
public void test12() {
|
||||
tstIllegalArgExc(new int[] {0,1,1,2,3,5,8,0});
|
||||
}
|
||||
public void test13Allocation() throws Exception {
|
||||
int [] a = new int[2000]; // SortedVIntList initial byte size is 128
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = (107 + i) * i;
|
||||
}
|
||||
tstIterator(new SortedVIntList(a), a);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue