mirror of https://github.com/apache/lucene.git
LUCENE-4881: javadocs for SentinelIntSet
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1463587 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
aefecd26a4
commit
061f31e910
|
@ -20,26 +20,42 @@ package org.apache.lucene.util;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A native int set where one value is reserved to mean "EMPTY"
|
* A native int hash-based set where one value is reserved to mean "EMPTY" internally. The space overhead is fairly low
|
||||||
|
* as there is only one power-of-two sized int[] to hold the values. The set is re-hashed when adding a value that
|
||||||
|
* would make it >= 75% full. Consider extending and over-riding {@link #hash(int)} if the values might be poor
|
||||||
|
* hash keys; Lucene docids should be fine.
|
||||||
|
* The internal fields are exposed publicly to enable more efficient use at the expense of better O-O principles.
|
||||||
|
* <p/>
|
||||||
|
* To iterate over the integers held in this set, simply use code like this:
|
||||||
|
* <pre class="prettyprint">
|
||||||
|
* SentinelIntSet set = ...
|
||||||
|
* for (int v : set.keys) {
|
||||||
|
* if (v == set.emptyVal)
|
||||||
|
* continue;
|
||||||
|
* //use v...
|
||||||
|
* }</pre>
|
||||||
*
|
*
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public class SentinelIntSet {
|
public class SentinelIntSet {
|
||||||
|
/** A power-of-2 over-sized array holding the integers in the set along with empty values. */
|
||||||
public int[] keys;
|
public int[] keys;
|
||||||
public int count;
|
public int count;
|
||||||
public final int emptyVal;
|
public final int emptyVal;
|
||||||
public int rehashCount; // the count at which a rehash should be done
|
/** the count at which a rehash should be done */
|
||||||
|
public int rehashCount;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param size The minimum number of elements this set should be able to hold without re-hashing (i.e. the slots are guaranteed not to change)
|
* @param size The minimum number of elements this set should be able to hold without rehashing
|
||||||
|
* (i.e. the slots are guaranteed not to change)
|
||||||
* @param emptyVal The integer value to use for EMPTY
|
* @param emptyVal The integer value to use for EMPTY
|
||||||
*/
|
*/
|
||||||
public SentinelIntSet(int size, int emptyVal) {
|
public SentinelIntSet(int size, int emptyVal) {
|
||||||
this.emptyVal = emptyVal;
|
this.emptyVal = emptyVal;
|
||||||
int tsize = Math.max(org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo(size), 1);
|
int tsize = Math.max(org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo(size), 1);
|
||||||
rehashCount = tsize - (tsize>>2);
|
rehashCount = tsize - (tsize>>2);
|
||||||
if (size >= rehashCount) { // should be able to hold "size" w/o rehashing
|
if (size >= rehashCount) { // should be able to hold "size" w/o re-hashing
|
||||||
tsize <<= 1;
|
tsize <<= 1;
|
||||||
rehashCount = tsize - (tsize>>2);
|
rehashCount = tsize - (tsize>>2);
|
||||||
}
|
}
|
||||||
|
@ -53,13 +69,17 @@ public class SentinelIntSet {
|
||||||
count = 0;
|
count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** (internal) Return the hash for the key. The default implementation just returns the key,
|
||||||
|
* which is not appropriate for general purpose use.
|
||||||
|
*/
|
||||||
public int hash(int key) {
|
public int hash(int key) {
|
||||||
return key;
|
return key;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** The number of integers in this set. */
|
||||||
public int size() { return count; }
|
public int size() { return count; }
|
||||||
|
|
||||||
/** returns the slot for this key */
|
/** (internal) Returns the slot for this key */
|
||||||
public int getSlot(int key) {
|
public int getSlot(int key) {
|
||||||
assert key != emptyVal;
|
assert key != emptyVal;
|
||||||
int h = hash(key);
|
int h = hash(key);
|
||||||
|
@ -73,7 +93,7 @@ public class SentinelIntSet {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** returns the slot for this key, or -slot-1 if not found */
|
/** (internal) Returns the slot for this key, or -slot-1 if not found */
|
||||||
public int find(int key) {
|
public int find(int key) {
|
||||||
assert key != emptyVal;
|
assert key != emptyVal;
|
||||||
int h = hash(key);
|
int h = hash(key);
|
||||||
|
@ -89,10 +109,13 @@ public class SentinelIntSet {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Does this set contain the specified integer? */
|
||||||
public boolean exists(int key) {
|
public boolean exists(int key) {
|
||||||
return find(key) >= 0;
|
return find(key) >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Puts this integer (key) in the set, and returns the slot index it was added to.
|
||||||
|
* It rehashes if adding it would make the set more than 75% full. */
|
||||||
public int put(int key) {
|
public int put(int key) {
|
||||||
int s = find(key);
|
int s = find(key);
|
||||||
if (s < 0) {
|
if (s < 0) {
|
||||||
|
@ -108,14 +131,14 @@ public class SentinelIntSet {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** (internal) Rehashes by doubling {@code int[] key} and filling with the old values. */
|
||||||
public void rehash() {
|
public void rehash() {
|
||||||
int newSize = keys.length << 1;
|
int newSize = keys.length << 1;
|
||||||
int[] oldKeys = keys;
|
int[] oldKeys = keys;
|
||||||
keys = new int[newSize];
|
keys = new int[newSize];
|
||||||
if (emptyVal != 0) Arrays.fill(keys, emptyVal);
|
if (emptyVal != 0) Arrays.fill(keys, emptyVal);
|
||||||
|
|
||||||
for (int i=0; i<oldKeys.length; i++) {
|
for (int key : oldKeys) {
|
||||||
int key = oldKeys[i];
|
|
||||||
if (key == emptyVal) continue;
|
if (key == emptyVal) continue;
|
||||||
int newSlot = getSlot(key);
|
int newSlot = getSlot(key);
|
||||||
keys[newSlot] = key;
|
keys[newSlot] = key;
|
||||||
|
|
Loading…
Reference in New Issue