Replace Set<Integer> by IntHashSet and Set<Long> by LongHashSet (#13400)

Also rename lucene.document.LongHashSet to DocValuesLongHashSet.
This commit is contained in:
Bruno Roustant 2024-05-23 16:25:52 +02:00 committed by GitHub
parent 05f04aa08a
commit d078fb774d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
31 changed files with 2913 additions and 542 deletions

View File

@ -350,6 +350,8 @@ Optimizations
* GITHUB#13339: Add a MemorySegment Vector scorer - for scoring without copying on-heap (Chris Hegarty)
* GITHUB#13400: Replace Set<Integer> by IntHashSet and Set<Long> by LongHashSet. (Bruno Roustant)
Bug Fixes
---------------------

View File

@ -22,7 +22,6 @@ import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -39,6 +38,7 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.hppc.IntHashSet;
/**
* A map of synonyms, keys and values are phrases.
@ -228,10 +228,10 @@ public class SynonymMap {
BytesRefBuilder scratch = new BytesRefBuilder();
ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();
final Set<Integer> dedupSet;
final IntHashSet dedupSet;
if (dedup) {
dedupSet = new HashSet<>();
dedupSet = new IntHashSet();
} else {
dedupSet = null;
}
@ -260,8 +260,7 @@ public class SynonymMap {
int count = 0;
for (int i = 0; i < numEntries; i++) {
if (dedupSet != null) {
// box once
final Integer ent = output.ords.get(i);
int ent = output.ords.get(i);
if (dedupSet.contains(ent)) {
continue;
}

View File

@ -22,8 +22,6 @@ import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
@ -52,6 +50,7 @@ import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.MathUtil;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.compress.LZ4;
import org.apache.lucene.util.hppc.LongHashSet;
import org.apache.lucene.util.hppc.LongIntHashMap;
import org.apache.lucene.util.packed.DirectMonotonicWriter;
import org.apache.lucene.util.packed.DirectWriter;
@ -198,7 +197,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
MinMaxTracker minMax = new MinMaxTracker();
MinMaxTracker blockMinMax = new MinMaxTracker();
long gcd = 0;
Set<Long> uniqueValues = ords ? null : new HashSet<>();
LongHashSet uniqueValues = ords ? null : new LongHashSet();
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
for (int i = 0, count = values.docValueCount(); i < count; ++i) {
long v = values.nextValue();
@ -282,10 +281,10 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
&& DirectWriter.unsignedBitsRequired(uniqueValues.size() - 1)
< DirectWriter.unsignedBitsRequired((max - min) / gcd)) {
numBitsPerValue = DirectWriter.unsignedBitsRequired(uniqueValues.size() - 1);
final Long[] sortedUniqueValues = uniqueValues.toArray(new Long[0]);
final long[] sortedUniqueValues = uniqueValues.toArray();
Arrays.sort(sortedUniqueValues);
meta.writeInt(sortedUniqueValues.length); // tablesize
for (Long v : sortedUniqueValues) {
for (long v : sortedUniqueValues) {
meta.writeLong(v); // table[] entry
}
encode = new LongIntHashMap();

View File

@ -26,8 +26,6 @@ import java.util.Collection;
import java.util.Deque;
import java.util.Iterator;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.TermVectorsWriter;
@ -53,6 +51,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.hppc.IntHashSet;
import org.apache.lucene.util.packed.BlockPackedWriter;
import org.apache.lucene.util.packed.DirectWriter;
import org.apache.lucene.util.packed.PackedInts;
@ -454,16 +453,18 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
/** Returns a sorted array containing unique field numbers */
private int[] flushFieldNums() throws IOException {
SortedSet<Integer> fieldNums = new TreeSet<>();
IntHashSet fieldNumsSet = new IntHashSet();
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
fieldNums.add(fd.fieldNum);
fieldNumsSet.add(fd.fieldNum);
}
}
int[] fieldNums = fieldNumsSet.toArray();
Arrays.sort(fieldNums);
final int numDistinctFields = fieldNums.size();
final int numDistinctFields = fieldNums.length;
assert numDistinctFields > 0;
final int bitsRequired = PackedInts.bitsRequired(fieldNums.last());
final int bitsRequired = PackedInts.bitsRequired(fieldNums[numDistinctFields - 1]);
final int token = (Math.min(numDistinctFields - 1, 0x07) << 5) | bitsRequired;
vectorsStream.writeByte((byte) token);
if (numDistinctFields - 1 >= 0x07) {
@ -471,18 +472,13 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
}
final PackedInts.Writer writer =
PackedInts.getWriterNoHeader(
vectorsStream, PackedInts.Format.PACKED, fieldNums.size(), bitsRequired, 1);
vectorsStream, PackedInts.Format.PACKED, numDistinctFields, bitsRequired, 1);
for (Integer fieldNum : fieldNums) {
writer.add(fieldNum);
}
writer.finish();
int[] fns = new int[fieldNums.size()];
int i = 0;
for (Integer key : fieldNums) {
fns[i++] = key;
}
return fns;
return fieldNums;
}
private void flushFields(int totalFields, int[] fieldNums) throws IOException {

View File

@ -25,9 +25,9 @@ import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
/** Set of longs, optimized for docvalues usage */
final class LongHashSet implements Accountable {
final class DocValuesLongHashSet implements Accountable {
private static final long BASE_RAM_BYTES =
RamUsageEstimator.shallowSizeOfInstance(LongHashSet.class);
RamUsageEstimator.shallowSizeOfInstance(DocValuesLongHashSet.class);
private static final long MISSING = Long.MIN_VALUE;
@ -43,7 +43,7 @@ final class LongHashSet implements Accountable {
final long maxValue;
/** Construct a set. Values must be in sorted order. */
LongHashSet(long[] values) {
DocValuesLongHashSet(long[] values) {
int tableSize = Math.toIntExact(values.length * 3L / 2);
tableSize = 1 << PackedInts.bitsRequired(tableSize); // make it a power of 2
assert tableSize >= values.length * 3L / 2;
@ -119,8 +119,8 @@ final class LongHashSet implements Accountable {
@Override
public boolean equals(Object obj) {
if (obj != null && obj instanceof LongHashSet) {
LongHashSet that = (LongHashSet) obj;
if (obj != null && obj instanceof DocValuesLongHashSet) {
DocValuesLongHashSet that = (DocValuesLongHashSet) obj;
return size == that.size
&& minValue == that.minValue
&& maxValue == that.maxValue

View File

@ -42,12 +42,12 @@ final class SortedNumericDocValuesSetQuery extends Query implements Accountable
RamUsageEstimator.shallowSizeOfInstance(SortedNumericDocValuesSetQuery.class);
private final String field;
private final LongHashSet numbers;
private final DocValuesLongHashSet numbers;
SortedNumericDocValuesSetQuery(String field, long[] numbers) {
this.field = Objects.requireNonNull(field);
Arrays.sort(numbers);
this.numbers = new LongHashSet(numbers);
this.numbers = new DocValuesLongHashSet(numbers);
}
@Override

View File

@ -32,6 +32,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOConsumer;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.hppc.LongHashSet;
/**
* Tracks the stream of {@link FrozenBufferedUpdates}. When DocumentsWriterPerThread flushes, its
@ -323,7 +324,7 @@ final class BufferedUpdatesStream implements Accountable {
* This lets us track the "holes" in the current frontier of applying del gens; once the holes
* are filled in we can advance completedDelGen.
*/
private final Set<Long> finishedDelGens = new HashSet<>();
private final LongHashSet finishedDelGens = new LongHashSet();
private final InfoStream infoStream;

View File

@ -31,6 +31,7 @@ import org.apache.lucene.index.ImpactsSource;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.hppc.IntHashSet;
/**
* Find all slop-valid position-combinations (matches) encountered while traversing/hopping the
@ -556,8 +557,8 @@ public final class SloppyPhraseMatcher extends PhraseMatcher {
ArrayList<FixedBitSet> bb = ppTermsBitSets(rpp, rptTerms);
unionTermGroups(bb);
HashMap<Term, Integer> tg = termGroups(rptTerms, bb);
HashSet<Integer> distinctGroupIDs = new HashSet<>(tg.values());
for (int i = 0; i < distinctGroupIDs.size(); i++) {
int numDistinctGroupIds = new IntHashSet(tg.values()).size();
for (int i = 0; i < numDistinctGroupIds; i++) {
tmp.add(new HashSet<>());
}
for (PhrasePositions pp : rpp) {

View File

@ -18,14 +18,13 @@ package org.apache.lucene.util.automaton;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.Sorter;
import org.apache.lucene.util.hppc.IntHashSet;
// TODO
// - could use packed int arrays instead
@ -618,7 +617,7 @@ public class Automaton implements Accountable, TransitionAccessor {
/** Returns sorted array of all interval start points. */
public int[] getStartPoints() {
Set<Integer> pointset = new HashSet<>();
IntHashSet pointset = new IntHashSet();
pointset.add(Character.MIN_CODE_POINT);
// System.out.println("getStartPoints");
for (int s = 0; s < nextState; s += 2) {
@ -636,11 +635,7 @@ public class Automaton implements Accountable, TransitionAccessor {
trans += 3;
}
}
int[] points = new int[pointset.size()];
int n = 0;
for (Integer m : pointset) {
points[n++] = m;
}
int[] points = pointset.toArray();
Arrays.sort(points);
return points;
}

View File

@ -16,10 +16,9 @@
*/
package org.apache.lucene.util.automaton;
import java.util.Iterator;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.Arrays;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.hppc.IntHashSet;
/**
* Class to construct DFAs that match a word within some edit distance.
@ -68,7 +67,7 @@ public class LevenshteinAutomata {
this.alphaMax = alphaMax;
// calculate the alphabet
SortedSet<Integer> set = new TreeSet<>();
IntHashSet set = new IntHashSet();
for (int i = 0; i < word.length; i++) {
int v = word[i];
if (v > alphaMax) {
@ -76,9 +75,8 @@ public class LevenshteinAutomata {
}
set.add(v);
}
alphabet = new int[set.size()];
Iterator<Integer> iterator = set.iterator();
for (int i = 0; i < alphabet.length; i++) alphabet[i] = iterator.next();
alphabet = set.toArray();
Arrays.sort(alphabet);
rangeLower = new int[alphabet.length + 2];
rangeUpper = new int[alphabet.length + 2];

View File

@ -48,6 +48,8 @@ import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.hppc.BitMixer;
import org.apache.lucene.util.hppc.IntCursor;
import org.apache.lucene.util.hppc.IntHashSet;
import org.apache.lucene.util.hppc.IntObjectHashMap;
/**
@ -249,14 +251,14 @@ public final class Operations {
b = concatenate(as);
}
Set<Integer> prevAcceptStates = toSet(b, 0);
IntHashSet prevAcceptStates = toSet(b, 0);
Automaton.Builder builder = new Automaton.Builder();
builder.copy(b);
for (int i = min; i < max; i++) {
int numStates = builder.getNumStates();
builder.copy(a);
for (int s : prevAcceptStates) {
builder.addEpsilon(s, numStates);
for (IntCursor s : prevAcceptStates) {
builder.addEpsilon(s.value, numStates);
}
prevAcceptStates = toSet(a, numStates);
}
@ -264,16 +266,15 @@ public final class Operations {
return builder.finish();
}
private static Set<Integer> toSet(Automaton a, int offset) {
private static IntHashSet toSet(Automaton a, int offset) {
int numStates = a.getNumStates();
BitSet isAccept = a.getAcceptStates();
Set<Integer> result = new HashSet<Integer>();
IntHashSet result = new IntHashSet();
int upto = 0;
while (upto < numStates && (upto = isAccept.nextSetBit(upto)) != -1) {
result.add(offset + upto);
upto++;
}
return result;
}
@ -1130,7 +1131,7 @@ public final class Operations {
throw new IllegalArgumentException("input automaton must be deterministic");
}
IntsRefBuilder builder = new IntsRefBuilder();
HashSet<Integer> visited = new HashSet<>();
IntHashSet visited = new IntHashSet();
int s = 0;
Transition t = new Transition();
while (true) {

View File

@ -17,6 +17,8 @@
package org.apache.lucene.util.hppc;
import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo;
import java.util.concurrent.atomic.AtomicInteger;
/** Constants for primitive maps. */
@ -42,4 +44,62 @@ public class HashContainers {
public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1;
static final AtomicInteger ITERATION_SEED = new AtomicInteger();
static int iterationIncrement(int seed) {
return 29 + ((seed & 7) << 1); // Small odd integer.
}
static int nextBufferSize(int arraySize, int elements, double loadFactor) {
assert checkPowerOfTwo(arraySize);
if (arraySize == MAX_HASH_ARRAY_LENGTH) {
throw new BufferAllocationException(
"Maximum array size exceeded for this load factor (elements: %d, load factor: %f)",
elements, loadFactor);
}
return arraySize << 1;
}
static int expandAtCount(int arraySize, double loadFactor) {
assert checkPowerOfTwo(arraySize);
// Take care of hash container invariant (there has to be at least one empty slot to ensure
// the lookup loop finds either the element or an empty slot).
return Math.min(arraySize - 1, (int) Math.ceil(arraySize * loadFactor));
}
static boolean checkPowerOfTwo(int arraySize) {
// These are internals, we can just assert without retrying.
assert arraySize > 1;
assert nextHighestPowerOfTwo(arraySize) == arraySize;
return true;
}
static int minBufferSize(int elements, double loadFactor) {
if (elements < 0) {
throw new IllegalArgumentException("Number of elements must be >= 0: " + elements);
}
long length = (long) Math.ceil(elements / loadFactor);
if (length == elements) {
length++;
}
length = Math.max(MIN_HASH_ARRAY_LENGTH, nextHighestPowerOfTwo(length));
if (length > MAX_HASH_ARRAY_LENGTH) {
throw new BufferAllocationException(
"Maximum array size exceeded for this load factor (elements: %d, load factor: %f)",
elements, loadFactor);
}
return (int) length;
}
static void checkLoadFactor(
double loadFactor, double minAllowedInclusive, double maxAllowedInclusive) {
if (loadFactor < minAllowedInclusive || loadFactor > maxAllowedInclusive) {
throw new BufferAllocationException(
"The load factor should be in range [%.2f, %.2f]: %f",
minAllowedInclusive, maxAllowedInclusive, loadFactor);
}
}
}

View File

@ -0,0 +1,688 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.hppc;
import static org.apache.lucene.util.hppc.HashContainers.*;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
/**
* A hash set of <code>int</code>s, implemented using open addressing with linear probing for
* collision resolution.
*
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.IntHashSet
*
* <p>github: https://github.com/carrotsearch/hppc release 0.9.0
*/
public class IntHashSet implements Iterable<IntCursor>, Accountable, Cloneable {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(IntHashSet.class);
/** The hash array holding keys. */
public int[] keys;
/**
* The number of stored keys (assigned key slots), excluding the special "empty" key, if any.
*
* @see #size()
* @see #hasEmptyKey
*/
protected int assigned;
/** Mask for slot scans in {@link #keys}. */
protected int mask;
/** Expand (rehash) {@link #keys} when {@link #assigned} hits this value. */
protected int resizeAt;
/** Special treatment for the "empty slot" key marker. */
protected boolean hasEmptyKey;
/** The load factor for {@link #keys}. */
protected double loadFactor;
/** Seed used to ensure the hash iteration order is different from an iteration to another. */
protected int iterationSeed;
/** New instance with sane defaults. */
public IntHashSet() {
this(DEFAULT_EXPECTED_ELEMENTS);
}
/**
* New instance with sane defaults.
*
* @param expectedElements The expected number of elements guaranteed not to cause a rehash
* (inclusive).
*/
public IntHashSet(int expectedElements) {
this(expectedElements, DEFAULT_LOAD_FACTOR);
}
/**
* New instance with the provided defaults.
*
* @param expectedElements The expected number of elements guaranteed not to cause a rehash
* (inclusive).
* @param loadFactor The load factor for internal buffers. Insane load factors (zero, full
* capacity) are rejected by {@link #verifyLoadFactor(double)}.
*/
public IntHashSet(int expectedElements, double loadFactor) {
this.loadFactor = verifyLoadFactor(loadFactor);
iterationSeed = ITERATION_SEED.incrementAndGet();
ensureCapacity(expectedElements);
}
/** New instance copying elements from another set. */
public IntHashSet(IntHashSet set) {
this(set.size());
addAll(set);
}
/** New instance copying elements from another collection. */
public IntHashSet(Collection<Integer> collection) {
this(collection.size());
addAll(collection);
}
public boolean add(int key) {
if (((key) == 0)) {
assert ((keys[mask + 1]) == 0);
boolean added = !hasEmptyKey;
hasEmptyKey = true;
return added;
} else {
final int[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
int existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return false;
}
slot = (slot + 1) & mask;
}
if (assigned == resizeAt) {
allocateThenInsertThenRehash(slot, key);
} else {
keys[slot] = key;
}
assigned++;
return true;
}
}
/**
* Adds all elements from the given list (vararg) to this set.
*
* @return Returns the number of elements actually added as a result of this call (not previously
* present in the set).
*/
public final int addAll(int... elements) {
ensureCapacity(elements.length);
int count = 0;
for (int e : elements) {
if (add(e)) {
count++;
}
}
return count;
}
/**
* Adds all elements from the given iterable to this set.
*
* @return Returns the number of elements actually added as a result of this call (not previously
* present in the set).
*/
public int addAll(Iterable<? extends IntCursor> iterable) {
int count = 0;
for (IntCursor cursor : iterable) {
if (add(cursor.value)) {
count++;
}
}
return count;
}
public int addAll(Collection<Integer> collection) {
int count = 0;
for (int element : collection) {
if (add(element)) {
count++;
}
}
return count;
}
public int[] toArray() {
final int[] cloned = (new int[size()]);
int j = 0;
if (hasEmptyKey) {
cloned[j++] = 0;
}
final int[] keys = this.keys;
int seed = nextIterationSeed();
int inc = iterationIncrement(seed);
for (int i = 0, mask = this.mask, slot = seed & mask;
i <= mask;
i++, slot = (slot + inc) & mask) {
int existing;
if (!((existing = keys[slot]) == 0)) {
cloned[j++] = existing;
}
}
return cloned;
}
/** An alias for the (preferred) {@link #removeAll}. */
public boolean remove(int key) {
if (((key) == 0)) {
boolean hadEmptyKey = hasEmptyKey;
hasEmptyKey = false;
return hadEmptyKey;
} else {
final int[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
int existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
shiftConflictingKeys(slot);
return true;
}
slot = (slot + 1) & mask;
}
return false;
}
}
/**
* Removes all keys present in a given container.
*
* @return Returns the number of elements actually removed as a result of this call.
*/
public int removeAll(IntHashSet other) {
final int before = size();
// Try to iterate over the smaller set or over the container that isn't implementing
// efficient contains() lookup.
if (other.size() >= size()) {
if (hasEmptyKey && other.contains(0)) {
hasEmptyKey = false;
}
final int[] keys = this.keys;
for (int slot = 0, max = this.mask; slot <= max; ) {
int existing;
if (!((existing = keys[slot]) == 0) && other.contains(existing)) {
// Shift, do not increment slot.
shiftConflictingKeys(slot);
} else {
slot++;
}
}
} else {
for (IntCursor c : other) {
remove(c.value);
}
}
return before - size();
}
public boolean contains(int key) {
if (((key) == 0)) {
return hasEmptyKey;
} else {
final int[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
int existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return true;
}
slot = (slot + 1) & mask;
}
return false;
}
}
public void clear() {
assigned = 0;
hasEmptyKey = false;
Arrays.fill(keys, 0);
}
public void release() {
assigned = 0;
hasEmptyKey = false;
keys = null;
ensureCapacity(DEFAULT_EXPECTED_ELEMENTS);
}
public boolean isEmpty() {
return size() == 0;
}
/**
* Ensure this container can hold at least the given number of elements without resizing its
* buffers.
*
* @param expectedElements The total number of elements, inclusive.
*/
public void ensureCapacity(int expectedElements) {
if (expectedElements > resizeAt || keys == null) {
final int[] prevKeys = this.keys;
allocateBuffers(minBufferSize(expectedElements, loadFactor));
if (prevKeys != null && !isEmpty()) {
rehash(prevKeys);
}
}
}
public int size() {
return assigned + (hasEmptyKey ? 1 : 0);
}
@Override
public int hashCode() {
int h = hasEmptyKey ? 0xDEADBEEF : 0;
final int[] keys = this.keys;
for (int slot = mask; slot >= 0; slot--) {
int existing;
if (!((existing = keys[slot]) == 0)) {
h += BitMixer.mix(existing);
}
}
return h;
}
@Override
public boolean equals(Object obj) {
return (this == obj)
|| (obj != null && getClass() == obj.getClass() && sameKeys(getClass().cast(obj)));
}
/** Return true if all keys of some other container exist in this container. */
private boolean sameKeys(IntHashSet other) {
if (other.size() != size()) {
return false;
}
for (IntCursor c : other) {
if (!contains(c.value)) {
return false;
}
}
return true;
}
@Override
public IntHashSet clone() {
try {
/* */
IntHashSet cloned = (IntHashSet) super.clone();
cloned.keys = keys.clone();
cloned.hasEmptyKey = hasEmptyKey;
cloned.iterationSeed = ITERATION_SEED.incrementAndGet();
return cloned;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
}
@Override
public Iterator<IntCursor> iterator() {
return new EntryIterator();
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys);
}
/**
* Provides the next iteration seed used to build the iteration starting slot and offset
* increment. This method does not need to be synchronized, what matters is that each thread gets
* a sequence of varying seeds.
*/
protected int nextIterationSeed() {
return iterationSeed = BitMixer.mixPhi(iterationSeed);
}
/** An iterator implementation for {@link #iterator}. */
protected final class EntryIterator extends AbstractIterator<IntCursor> {
private final IntCursor cursor;
private final int increment;
private int index;
private int slot;
public EntryIterator() {
cursor = new IntCursor();
int seed = nextIterationSeed();
increment = iterationIncrement(seed);
slot = seed & mask;
}
@Override
protected IntCursor fetch() {
final int mask = IntHashSet.this.mask;
while (index <= mask) {
int existing;
index++;
slot = (slot + increment) & mask;
if (!((existing = keys[slot]) == 0)) {
cursor.index = slot;
cursor.value = existing;
return cursor;
}
}
if (index == mask + 1 && hasEmptyKey) {
cursor.index = index++;
cursor.value = 0;
return cursor;
}
return done();
}
}
/**
* Create a set from a variable number of arguments or an array of <code>int</code>. The elements
* are copied from the argument to the internal buffer.
*/
/* */
public static IntHashSet from(int... elements) {
final IntHashSet set = new IntHashSet(elements.length);
set.addAll(elements);
return set;
}
/**
* Returns a hash code for the given key.
*
* <p>The output from this function should evenly distribute keys across the entire integer range.
*/
protected int hashKey(int key) {
assert !((key) == 0); // Handled as a special case (empty slot marker).
return BitMixer.mixPhi(key);
}
/**
* Returns a logical "index" of a given key that can be used to speed up follow-up logic in
* certain scenarios (conditional logic).
*
* <p>The semantics of "indexes" are not strictly defined. Indexes may (and typically won't be)
* contiguous.
*
* <p>The index is valid only between modifications (it will not be affected by read-only
* operations).
*
* @see #indexExists
* @see #indexGet
* @see #indexInsert
* @see #indexReplace
* @param key The key to locate in the set.
* @return A non-negative value of the logical "index" of the key in the set or a negative value
* if the key did not exist.
*/
public int indexOf(int key) {
final int mask = this.mask;
if (((key) == 0)) {
return hasEmptyKey ? mask + 1 : ~(mask + 1);
} else {
final int[] keys = this.keys;
int slot = hashKey(key) & mask;
int existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return slot;
}
slot = (slot + 1) & mask;
}
return ~slot;
}
}
/**
* @see #indexOf
* @param index The index of a given key, as returned from {@link #indexOf}.
* @return Returns <code>true</code> if the index corresponds to an existing key or false
* otherwise. This is equivalent to checking whether the index is a positive value (existing
* keys) or a negative value (non-existing keys).
*/
public boolean indexExists(int index) {
assert index < 0 || index <= mask || (index == mask + 1 && hasEmptyKey);
return index >= 0;
}
/**
* Returns the exact value of the existing key. This method makes sense for sets of objects which
* define custom key-equality relationship.
*
* @see #indexOf
* @param index The index of an existing key.
* @return Returns the equivalent key currently stored in the set.
* @throws AssertionError If assertions are enabled and the index does not correspond to an
* existing key.
*/
public int indexGet(int index) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
return keys[index];
}
/**
* Replaces the existing equivalent key with the given one and returns any previous value stored
* for that key.
*
* @see #indexOf
* @param index The index of an existing key.
* @param equivalentKey The key to put in the set as a replacement. Must be equivalent to the key
* currently stored at the provided index.
* @return Returns the previous key stored in the set.
* @throws AssertionError If assertions are enabled and the index does not correspond to an
* existing key.
*/
public int indexReplace(int index, int equivalentKey) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
assert ((keys[index]) == (equivalentKey));
int previousValue = keys[index];
keys[index] = equivalentKey;
return previousValue;
}
/**
* Inserts a key for an index that is not present in the set. This method may help in avoiding
* double recalculation of the key's hash.
*
* @see #indexOf
* @param index The index of a previously non-existing key, as returned from {@link #indexOf}.
* @throws AssertionError If assertions are enabled and the index does not correspond to an
* existing key.
*/
public void indexInsert(int index, int key) {
assert index < 0 : "The index must not point at an existing key.";
index = ~index;
if (((key) == 0)) {
assert index == mask + 1;
assert ((keys[index]) == 0);
hasEmptyKey = true;
} else {
assert ((keys[index]) == 0);
if (assigned == resizeAt) {
allocateThenInsertThenRehash(index, key);
} else {
keys[index] = key;
}
assigned++;
}
}
/**
* Removes a key at an index previously acquired from {@link #indexOf}.
*
* @see #indexOf
* @param index The index of the key to remove, as returned from {@link #indexOf}.
* @throws AssertionError If assertions are enabled and the index does not correspond to an
* existing key.
*/
public void indexRemove(int index) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
if (index > mask) {
hasEmptyKey = false;
} else {
shiftConflictingKeys(index);
}
}
/**
* Validate load factor range and return it. Override and suppress if you need insane load
* factors.
*/
protected double verifyLoadFactor(double loadFactor) {
checkLoadFactor(loadFactor, MIN_LOAD_FACTOR, MAX_LOAD_FACTOR);
return loadFactor;
}
/** Rehash from old buffers to new buffers. */
protected void rehash(int[] fromKeys) {
assert HashContainers.checkPowerOfTwo(fromKeys.length - 1);
// Rehash all stored keys into the new buffers.
final int[] keys = this.keys;
final int mask = this.mask;
int existing;
for (int i = fromKeys.length - 1; --i >= 0; ) {
if (!((existing = fromKeys[i]) == 0)) {
int slot = hashKey(existing) & mask;
while (!((keys[slot]) == 0)) {
slot = (slot + 1) & mask;
}
keys[slot] = existing;
}
}
}
/**
* Allocate new internal buffers. This method attempts to allocate and assign internal buffers
* atomically (either allocations succeed or not).
*/
protected void allocateBuffers(int arraySize) {
assert Integer.bitCount(arraySize) == 1;
// Ensure no change is done if we hit an OOM.
int[] prevKeys = this.keys;
try {
int emptyElementSlot = 1;
this.keys = (new int[arraySize + emptyElementSlot]);
} catch (OutOfMemoryError e) {
this.keys = prevKeys;
throw new BufferAllocationException(
"Not enough memory to allocate buffers for rehashing: %,d -> %,d",
e, this.keys == null ? 0 : size(), arraySize);
}
this.resizeAt = expandAtCount(arraySize, loadFactor);
this.mask = arraySize - 1;
}
/**
* This method is invoked when there is a new key to be inserted into the buffer but there is not
* enough empty slots to do so.
*
* <p>New buffers are allocated. If this succeeds, we know we can proceed with rehashing so we
* assign the pending element to the previous buffer (possibly violating the invariant of having
* at least one empty slot) and rehash all keys, substituting new buffers at the end.
*/
protected void allocateThenInsertThenRehash(int slot, int pendingKey) {
assert assigned == resizeAt && ((keys[slot]) == 0) && !((pendingKey) == 0);
// Try to allocate new buffers first. If we OOM, we leave in a consistent state.
final int[] prevKeys = this.keys;
allocateBuffers(nextBufferSize(mask + 1, size(), loadFactor));
assert this.keys.length > prevKeys.length;
// We have succeeded at allocating new data so insert the pending key/value at
// the free slot in the old arrays before rehashing.
prevKeys[slot] = pendingKey;
// Rehash old keys, including the pending key.
rehash(prevKeys);
}
/** Shift all the slot-conflicting keys allocated to (and including) <code>slot</code>. */
protected void shiftConflictingKeys(int gapSlot) {
final int[] keys = this.keys;
final int mask = this.mask;
// Perform shifts of conflicting keys to fill in the gap.
int distance = 0;
while (true) {
final int slot = (gapSlot + (++distance)) & mask;
final int existing = keys[slot];
if (((existing) == 0)) {
break;
}
final int idealSlot = hashKey(existing);
final int shift = (slot - idealSlot) & mask;
if (shift >= distance) {
// Entry at this position was originally at or before the gap slot.
// Move the conflict-shifted entry to the gap's position and repeat the procedure
// for any entries to the right of the current position, treating it
// as the new gap.
keys[gapSlot] = existing;
gapSlot = slot;
distance = 0;
}
}
// Mark the last found gap slot without a conflict as empty.
keys[gapSlot] = 0;
assigned--;
}
}

View File

@ -17,7 +17,6 @@
package org.apache.lucene.util.hppc;
import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo;
import static org.apache.lucene.util.hppc.HashContainers.*;
import java.util.Arrays;
@ -96,10 +95,10 @@ public class IntIntHashMap implements Iterable<IntIntHashMap.IntIntCursor>, Acco
ensureCapacity(expectedElements);
}
/** Create a hash map from all key-value pairs of another container. */
public IntIntHashMap(Iterable<? extends IntIntCursor> container) {
this();
putAll(container);
/** Create a hash map from all key-value pairs of another map. */
public IntIntHashMap(IntIntHashMap map) {
this(map.size());
putAll(map);
}
public int put(int key, int value) {
@ -107,8 +106,8 @@ public class IntIntHashMap implements Iterable<IntIntHashMap.IntIntCursor>, Acco
final int mask = this.mask;
if (((key) == 0)) {
int previousValue = hasEmptyKey ? values[mask + 1] : 0;
hasEmptyKey = true;
int previousValue = values[mask + 1];
values[mask + 1] = value;
return previousValue;
} else {
@ -205,6 +204,9 @@ public class IntIntHashMap implements Iterable<IntIntHashMap.IntIntCursor>, Acco
public int remove(int key) {
final int mask = this.mask;
if (((key) == 0)) {
if (!hasEmptyKey) {
return 0;
}
hasEmptyKey = false;
int previousValue = values[mask + 1];
values[mask + 1] = 0;
@ -357,6 +359,7 @@ public class IntIntHashMap implements Iterable<IntIntHashMap.IntIntCursor>, Acco
int previousValue = values[index];
if (index > mask) {
assert index == mask + 1;
hasEmptyKey = false;
values[index] = 0;
} else {
@ -402,7 +405,8 @@ public class IntIntHashMap implements Iterable<IntIntHashMap.IntIntCursor>, Acco
@Override
public boolean equals(Object obj) {
return obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj));
return (this == obj)
|| (obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)));
}
/** Return true if all keys of some other container exist in this container. */
@ -625,7 +629,7 @@ public class IntIntHashMap implements Iterable<IntIntHashMap.IntIntCursor>, Acco
cloned.keys = keys.clone();
cloned.values = values.clone();
cloned.hasEmptyKey = hasEmptyKey;
cloned.iterationSeed = nextIterationSeed();
cloned.iterationSeed = ITERATION_SEED.incrementAndGet();
return cloned;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
@ -764,64 +768,6 @@ public class IntIntHashMap implements Iterable<IntIntHashMap.IntIntCursor>, Acco
rehash(prevKeys, prevValues);
}
static int nextBufferSize(int arraySize, int elements, double loadFactor) {
assert checkPowerOfTwo(arraySize);
if (arraySize == MAX_HASH_ARRAY_LENGTH) {
throw new BufferAllocationException(
"Maximum array size exceeded for this load factor (elements: %d, load factor: %f)",
elements, loadFactor);
}
return arraySize << 1;
}
static int expandAtCount(int arraySize, double loadFactor) {
assert checkPowerOfTwo(arraySize);
// Take care of hash container invariant (there has to be at least one empty slot to ensure
// the lookup loop finds either the element or an empty slot).
return Math.min(arraySize - 1, (int) Math.ceil(arraySize * loadFactor));
}
static boolean checkPowerOfTwo(int arraySize) {
// These are internals, we can just assert without retrying.
assert arraySize > 1;
assert nextHighestPowerOfTwo(arraySize) == arraySize;
return true;
}
static int minBufferSize(int elements, double loadFactor) {
if (elements < 0) {
throw new IllegalArgumentException("Number of elements must be >= 0: " + elements);
}
long length = (long) Math.ceil(elements / loadFactor);
if (length == elements) {
length++;
}
length = Math.max(MIN_HASH_ARRAY_LENGTH, nextHighestPowerOfTwo(length));
if (length > MAX_HASH_ARRAY_LENGTH) {
throw new BufferAllocationException(
"Maximum array size exceeded for this load factor (elements: %d, load factor: %f)",
elements, loadFactor);
}
return (int) length;
}
static void checkLoadFactor(
double loadFactor, double minAllowedInclusive, double maxAllowedInclusive) {
if (loadFactor < minAllowedInclusive || loadFactor > maxAllowedInclusive) {
throw new BufferAllocationException(
"The load factor should be in range [%.2f, %.2f]: %f",
minAllowedInclusive, maxAllowedInclusive, loadFactor);
}
}
static int iterationIncrement(int seed) {
return 29 + ((seed & 7) << 1); // Small odd integer.
}
/**
* Shift all the slot-conflicting keys and values allocated to (and including) <code>slot</code>.
*/

View File

@ -17,7 +17,6 @@
package org.apache.lucene.util.hppc;
import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo;
import static org.apache.lucene.util.hppc.HashContainers.*;
import java.util.Arrays;
@ -98,10 +97,10 @@ public class IntObjectHashMap<VType>
ensureCapacity(expectedElements);
}
/** Create a hash map from all key-value pairs of another container. */
public IntObjectHashMap(Iterable<? extends IntObjectCursor<? extends VType>> container) {
this();
putAll(container);
/** Create a hash map from all key-value pairs of another map. */
public IntObjectHashMap(IntObjectHashMap<VType> map) {
this(map.size());
putAll(map);
}
public VType put(int key, VType value) {
@ -109,8 +108,8 @@ public class IntObjectHashMap<VType>
final int mask = this.mask;
if (((key) == 0)) {
VType previousValue = hasEmptyKey ? (VType) values[mask + 1] : null;
hasEmptyKey = true;
VType previousValue = (VType) values[mask + 1];
values[mask + 1] = value;
return previousValue;
} else {
@ -173,6 +172,9 @@ public class IntObjectHashMap<VType>
public VType remove(int key) {
final int mask = this.mask;
if (((key) == 0)) {
if (!hasEmptyKey) {
return null;
}
hasEmptyKey = false;
VType previousValue = (VType) values[mask + 1];
values[mask + 1] = 0;
@ -325,6 +327,7 @@ public class IntObjectHashMap<VType>
VType previousValue = (VType) values[index];
if (index > mask) {
assert index == mask + 1;
hasEmptyKey = false;
values[index] = 0;
} else {
@ -370,7 +373,8 @@ public class IntObjectHashMap<VType>
@Override
public boolean equals(Object obj) {
return obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj));
return (this == obj)
|| (obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)));
}
/** Return true if all keys of some other container exist in this container. */
@ -612,7 +616,7 @@ public class IntObjectHashMap<VType>
cloned.keys = keys.clone();
cloned.values = values.clone();
cloned.hasEmptyKey = hasEmptyKey;
cloned.iterationSeed = nextIterationSeed();
cloned.iterationSeed = ITERATION_SEED.incrementAndGet();
return cloned;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
@ -751,64 +755,6 @@ public class IntObjectHashMap<VType>
rehash(prevKeys, prevValues);
}
static int nextBufferSize(int arraySize, int elements, double loadFactor) {
assert checkPowerOfTwo(arraySize);
if (arraySize == MAX_HASH_ARRAY_LENGTH) {
throw new BufferAllocationException(
"Maximum array size exceeded for this load factor (elements: %d, load factor: %f)",
elements, loadFactor);
}
return arraySize << 1;
}
static int expandAtCount(int arraySize, double loadFactor) {
assert checkPowerOfTwo(arraySize);
// Take care of hash container invariant (there has to be at least one empty slot to ensure
// the lookup loop finds either the element or an empty slot).
return Math.min(arraySize - 1, (int) Math.ceil(arraySize * loadFactor));
}
static boolean checkPowerOfTwo(int arraySize) {
// These are internals, we can just assert without retrying.
assert arraySize > 1;
assert nextHighestPowerOfTwo(arraySize) == arraySize;
return true;
}
static int minBufferSize(int elements, double loadFactor) {
if (elements < 0) {
throw new IllegalArgumentException("Number of elements must be >= 0: " + elements);
}
long length = (long) Math.ceil(elements / loadFactor);
if (length == elements) {
length++;
}
length = Math.max(MIN_HASH_ARRAY_LENGTH, nextHighestPowerOfTwo(length));
if (length > MAX_HASH_ARRAY_LENGTH) {
throw new BufferAllocationException(
"Maximum array size exceeded for this load factor (elements: %d, load factor: %f)",
elements, loadFactor);
}
return (int) length;
}
static void checkLoadFactor(
double loadFactor, double minAllowedInclusive, double maxAllowedInclusive) {
if (loadFactor < minAllowedInclusive || loadFactor > maxAllowedInclusive) {
throw new BufferAllocationException(
"The load factor should be in range [%.2f, %.2f]: %f",
minAllowedInclusive, maxAllowedInclusive, loadFactor);
}
}
static int iterationIncrement(int seed) {
return 29 + ((seed & 7) << 1); // Small odd integer.
}
/**
* Shift all the slot-conflicting keys and values allocated to (and including) <code>slot</code>.
*/

View File

@ -0,0 +1,671 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.hppc;
import static org.apache.lucene.util.hppc.HashContainers.*;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
/**
* A hash set of <code>long</code>s, implemented using open addressing with linear probing for
* collision resolution.
*
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.LongHashSet
*
* <p>github: https://github.com/carrotsearch/hppc release 0.9.0
*/
public class LongHashSet implements Iterable<LongCursor>, Accountable, Cloneable {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(LongHashSet.class);
/** The hash array holding keys. */
public long[] keys;
/**
* The number of stored keys (assigned key slots), excluding the special "empty" key, if any.
*
* @see #size()
* @see #hasEmptyKey
*/
protected int assigned;
/** Mask for slot scans in {@link #keys}. */
protected int mask;
/** Expand (rehash) {@link #keys} when {@link #assigned} hits this value. */
protected int resizeAt;
/** Special treatment for the "empty slot" key marker. */
protected boolean hasEmptyKey;
/** The load factor for {@link #keys}. */
protected double loadFactor;
/** Seed used to ensure the hash iteration order is different from an iteration to another. */
protected int iterationSeed;
/** New instance with sane defaults. */
public LongHashSet() {
this(DEFAULT_EXPECTED_ELEMENTS);
}
/**
* New instance with sane defaults.
*
* @param expectedElements The expected number of elements guaranteed not to cause a rehash
* (inclusive).
*/
public LongHashSet(int expectedElements) {
this(expectedElements, DEFAULT_LOAD_FACTOR);
}
/**
* New instance with the provided defaults.
*
* @param expectedElements The expected number of elements guaranteed not to cause a rehash
* (inclusive).
* @param loadFactor The load factor for internal buffers. Insane load factors (zero, full
* capacity) are rejected by {@link #verifyLoadFactor(double)}.
*/
public LongHashSet(int expectedElements, double loadFactor) {
this.loadFactor = verifyLoadFactor(loadFactor);
iterationSeed = ITERATION_SEED.incrementAndGet();
ensureCapacity(expectedElements);
}
/** New instance copying elements from another set. */
public LongHashSet(LongHashSet set) {
this(set.size());
addAll(set);
}
public boolean add(long key) {
if (((key) == 0)) {
assert ((keys[mask + 1]) == 0);
boolean added = !hasEmptyKey;
hasEmptyKey = true;
return added;
} else {
final long[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
long existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return false;
}
slot = (slot + 1) & mask;
}
if (assigned == resizeAt) {
allocateThenInsertThenRehash(slot, key);
} else {
keys[slot] = key;
}
assigned++;
return true;
}
}
/**
* Adds all elements from the given list (vararg) to this set.
*
* @return Returns the number of elements actually added as a result of this call (not previously
* present in the set).
*/
public final int addAll(long... elements) {
ensureCapacity(elements.length);
int count = 0;
for (long e : elements) {
if (add(e)) {
count++;
}
}
return count;
}
/**
* Adds all elements from the given iterable to this set.
*
* @return Returns the number of elements actually added as a result of this call (not previously
* present in the set).
*/
public int addAll(Iterable<? extends LongCursor> iterable) {
int count = 0;
for (LongCursor cursor : iterable) {
if (add(cursor.value)) {
count++;
}
}
return count;
}
public long[] toArray() {
final long[] cloned = (new long[size()]);
int j = 0;
if (hasEmptyKey) {
cloned[j++] = 0L;
}
final long[] keys = this.keys;
int seed = nextIterationSeed();
int inc = iterationIncrement(seed);
for (int i = 0, mask = this.mask, slot = seed & mask;
i <= mask;
i++, slot = (slot + inc) & mask) {
long existing;
if (!((existing = keys[slot]) == 0)) {
cloned[j++] = existing;
}
}
return cloned;
}
/** An alias for the (preferred) {@link #removeAll}. */
public boolean remove(long key) {
if (((key) == 0)) {
boolean hadEmptyKey = hasEmptyKey;
hasEmptyKey = false;
return hadEmptyKey;
} else {
final long[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
long existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
shiftConflictingKeys(slot);
return true;
}
slot = (slot + 1) & mask;
}
return false;
}
}
/**
* Removes all keys present in a given container.
*
* @return Returns the number of elements actually removed as a result of this call.
*/
public int removeAll(LongHashSet other) {
final int before = size();
// Try to iterate over the smaller set or over the container that isn't implementing
// efficient contains() lookup.
if (other.size() >= size()) {
if (hasEmptyKey && other.contains(0L)) {
hasEmptyKey = false;
}
final long[] keys = this.keys;
for (int slot = 0, max = this.mask; slot <= max; ) {
long existing;
if (!((existing = keys[slot]) == 0) && other.contains(existing)) {
// Shift, do not increment slot.
shiftConflictingKeys(slot);
} else {
slot++;
}
}
} else {
for (LongCursor c : other) {
remove(c.value);
}
}
return before - size();
}
public boolean contains(long key) {
if (((key) == 0)) {
return hasEmptyKey;
} else {
final long[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
long existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return true;
}
slot = (slot + 1) & mask;
}
return false;
}
}
public void clear() {
assigned = 0;
hasEmptyKey = false;
Arrays.fill(keys, 0L);
}
public void release() {
assigned = 0;
hasEmptyKey = false;
keys = null;
ensureCapacity(DEFAULT_EXPECTED_ELEMENTS);
}
public boolean isEmpty() {
return size() == 0;
}
/**
* Ensure this container can hold at least the given number of elements without resizing its
* buffers.
*
* @param expectedElements The total number of elements, inclusive.
*/
public void ensureCapacity(int expectedElements) {
if (expectedElements > resizeAt || keys == null) {
final long[] prevKeys = this.keys;
allocateBuffers(minBufferSize(expectedElements, loadFactor));
if (prevKeys != null && !isEmpty()) {
rehash(prevKeys);
}
}
}
public int size() {
return assigned + (hasEmptyKey ? 1 : 0);
}
@Override
public int hashCode() {
int h = hasEmptyKey ? 0xDEADBEEF : 0;
final long[] keys = this.keys;
for (int slot = mask; slot >= 0; slot--) {
long existing;
if (!((existing = keys[slot]) == 0)) {
h += BitMixer.mix(existing);
}
}
return h;
}
@Override
public boolean equals(Object obj) {
return (this == obj)
|| (obj != null && getClass() == obj.getClass() && sameKeys(getClass().cast(obj)));
}
/** Return true if all keys of some other container exist in this container. */
private boolean sameKeys(LongHashSet other) {
if (other.size() != size()) {
return false;
}
for (LongCursor c : other) {
if (!contains(c.value)) {
return false;
}
}
return true;
}
@Override
public LongHashSet clone() {
try {
/* */
LongHashSet cloned = (LongHashSet) super.clone();
cloned.keys = keys.clone();
cloned.hasEmptyKey = hasEmptyKey;
cloned.iterationSeed = ITERATION_SEED.incrementAndGet();
return cloned;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
}
@Override
public Iterator<LongCursor> iterator() {
return new EntryIterator();
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys);
}
/**
* Provides the next iteration seed used to build the iteration starting slot and offset
* increment. This method does not need to be synchronized, what matters is that each thread gets
* a sequence of varying seeds.
*/
protected int nextIterationSeed() {
return iterationSeed = BitMixer.mixPhi(iterationSeed);
}
/** An iterator implementation for {@link #iterator}. */
protected final class EntryIterator extends AbstractIterator<LongCursor> {
private final LongCursor cursor;
private final int increment;
private int index;
private int slot;
public EntryIterator() {
cursor = new LongCursor();
int seed = nextIterationSeed();
increment = iterationIncrement(seed);
slot = seed & mask;
}
@Override
protected LongCursor fetch() {
final int mask = LongHashSet.this.mask;
while (index <= mask) {
long existing;
index++;
slot = (slot + increment) & mask;
if (!((existing = keys[slot]) == 0)) {
cursor.index = slot;
cursor.value = existing;
return cursor;
}
}
if (index == mask + 1 && hasEmptyKey) {
cursor.index = index++;
cursor.value = 0L;
return cursor;
}
return done();
}
}
/**
* Create a set from a variable number of arguments or an array of <code>long</code>. The elements
* are copied from the argument to the internal buffer.
*/
/* */
public static LongHashSet from(long... elements) {
final LongHashSet set = new LongHashSet(elements.length);
set.addAll(elements);
return set;
}
/**
* Returns a hash code for the given key.
*
* <p>The output from this function should evenly distribute keys across the entire integer range.
*/
protected int hashKey(long key) {
assert !((key) == 0); // Handled as a special case (empty slot marker).
return BitMixer.mixPhi(key);
}
/**
* Returns a logical "index" of a given key that can be used to speed up follow-up logic in
* certain scenarios (conditional logic).
*
* <p>The semantics of "indexes" are not strictly defined. Indexes may (and typically won't be)
* contiguous.
*
* <p>The index is valid only between modifications (it will not be affected by read-only
* operations).
*
* @see #indexExists
* @see #indexGet
* @see #indexInsert
* @see #indexReplace
* @param key The key to locate in the set.
* @return A non-negative value of the logical "index" of the key in the set or a negative value
* if the key did not exist.
*/
public int indexOf(long key) {
final int mask = this.mask;
if (((key) == 0)) {
return hasEmptyKey ? mask + 1 : ~(mask + 1);
} else {
final long[] keys = this.keys;
int slot = hashKey(key) & mask;
long existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return slot;
}
slot = (slot + 1) & mask;
}
return ~slot;
}
}
/**
* @see #indexOf
* @param index The index of a given key, as returned from {@link #indexOf}.
* @return Returns <code>true</code> if the index corresponds to an existing key or false
* otherwise. This is equivalent to checking whether the index is a positive value (existing
* keys) or a negative value (non-existing keys).
*/
public boolean indexExists(int index) {
assert index < 0 || index <= mask || (index == mask + 1 && hasEmptyKey);
return index >= 0;
}
/**
* Returns the exact value of the existing key. This method makes sense for sets of objects which
* define custom key-equality relationship.
*
* @see #indexOf
* @param index The index of an existing key.
* @return Returns the equivalent key currently stored in the set.
* @throws AssertionError If assertions are enabled and the index does not correspond to an
* existing key.
*/
public long indexGet(int index) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
return keys[index];
}
/**
* Replaces the existing equivalent key with the given one and returns any previous value stored
* for that key.
*
* @see #indexOf
* @param index The index of an existing key.
* @param equivalentKey The key to put in the set as a replacement. Must be equivalent to the key
* currently stored at the provided index.
* @return Returns the previous key stored in the set.
* @throws AssertionError If assertions are enabled and the index does not correspond to an
* existing key.
*/
public long indexReplace(int index, long equivalentKey) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
assert ((keys[index]) == (equivalentKey));
long previousValue = keys[index];
keys[index] = equivalentKey;
return previousValue;
}
/**
* Inserts a key for an index that is not present in the set. This method may help in avoiding
* double recalculation of the key's hash.
*
* @see #indexOf
* @param index The index of a previously non-existing key, as returned from {@link #indexOf}.
* @throws AssertionError If assertions are enabled and the index does not correspond to an
* existing key.
*/
public void indexInsert(int index, long key) {
assert index < 0 : "The index must not point at an existing key.";
index = ~index;
if (((key) == 0)) {
assert index == mask + 1;
assert ((keys[index]) == 0);
hasEmptyKey = true;
} else {
assert ((keys[index]) == 0);
if (assigned == resizeAt) {
allocateThenInsertThenRehash(index, key);
} else {
keys[index] = key;
}
assigned++;
}
}
/**
* Removes a key at an index previously acquired from {@link #indexOf}.
*
* @see #indexOf
* @param index The index of the key to remove, as returned from {@link #indexOf}.
* @throws AssertionError If assertions are enabled and the index does not correspond to an
* existing key.
*/
public void indexRemove(int index) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
if (index > mask) {
hasEmptyKey = false;
} else {
shiftConflictingKeys(index);
}
}
/**
* Validate load factor range and return it. Override and suppress if you need insane load
* factors.
*/
protected double verifyLoadFactor(double loadFactor) {
checkLoadFactor(loadFactor, MIN_LOAD_FACTOR, MAX_LOAD_FACTOR);
return loadFactor;
}
/** Rehash from old buffers to new buffers. */
protected void rehash(long[] fromKeys) {
assert HashContainers.checkPowerOfTwo(fromKeys.length - 1);
// Rehash all stored keys into the new buffers.
final long[] keys = this.keys;
final int mask = this.mask;
long existing;
for (int i = fromKeys.length - 1; --i >= 0; ) {
if (!((existing = fromKeys[i]) == 0)) {
int slot = hashKey(existing) & mask;
while (!((keys[slot]) == 0)) {
slot = (slot + 1) & mask;
}
keys[slot] = existing;
}
}
}
/**
* Allocate new internal buffers. This method attempts to allocate and assign internal buffers
* atomically (either allocations succeed or not).
*/
protected void allocateBuffers(int arraySize) {
assert Integer.bitCount(arraySize) == 1;
// Ensure no change is done if we hit an OOM.
long[] prevKeys = this.keys;
try {
int emptyElementSlot = 1;
this.keys = (new long[arraySize + emptyElementSlot]);
} catch (OutOfMemoryError e) {
this.keys = prevKeys;
throw new BufferAllocationException(
"Not enough memory to allocate buffers for rehashing: %,d -> %,d",
e, this.keys == null ? 0 : size(), arraySize);
}
this.resizeAt = expandAtCount(arraySize, loadFactor);
this.mask = arraySize - 1;
}
/**
* This method is invoked when there is a new key to be inserted into the buffer but there is not
* enough empty slots to do so.
*
* <p>New buffers are allocated. If this succeeds, we know we can proceed with rehashing so we
* assign the pending element to the previous buffer (possibly violating the invariant of having
* at least one empty slot) and rehash all keys, substituting new buffers at the end.
*/
protected void allocateThenInsertThenRehash(int slot, long pendingKey) {
assert assigned == resizeAt && ((keys[slot]) == 0) && !((pendingKey) == 0);
// Try to allocate new buffers first. If we OOM, we leave in a consistent state.
final long[] prevKeys = this.keys;
allocateBuffers(nextBufferSize(mask + 1, size(), loadFactor));
assert this.keys.length > prevKeys.length;
// We have succeeded at allocating new data so insert the pending key/value at
// the free slot in the old arrays before rehashing.
prevKeys[slot] = pendingKey;
// Rehash old keys, including the pending key.
rehash(prevKeys);
}
/** Shift all the slot-conflicting keys allocated to (and including) <code>slot</code>. */
protected void shiftConflictingKeys(int gapSlot) {
final long[] keys = this.keys;
final int mask = this.mask;
// Perform shifts of conflicting keys to fill in the gap.
int distance = 0;
while (true) {
final int slot = (gapSlot + (++distance)) & mask;
final long existing = keys[slot];
if (((existing) == 0)) {
break;
}
final int idealSlot = hashKey(existing);
final int shift = (slot - idealSlot) & mask;
if (shift >= distance) {
// Entry at this position was originally at or before the gap slot.
// Move the conflict-shifted entry to the gap's position and repeat the procedure
// for any entries to the right of the current position, treating it
// as the new gap.
keys[gapSlot] = existing;
gapSlot = slot;
distance = 0;
}
}
// Mark the last found gap slot without a conflict as empty.
keys[gapSlot] = 0L;
assigned--;
}
}

View File

@ -17,14 +17,7 @@
package org.apache.lucene.util.hppc;
import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo;
import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_EXPECTED_ELEMENTS;
import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_LOAD_FACTOR;
import static org.apache.lucene.util.hppc.HashContainers.ITERATION_SEED;
import static org.apache.lucene.util.hppc.HashContainers.MAX_HASH_ARRAY_LENGTH;
import static org.apache.lucene.util.hppc.HashContainers.MAX_LOAD_FACTOR;
import static org.apache.lucene.util.hppc.HashContainers.MIN_HASH_ARRAY_LENGTH;
import static org.apache.lucene.util.hppc.HashContainers.MIN_LOAD_FACTOR;
import static org.apache.lucene.util.hppc.HashContainers.*;
import java.util.Arrays;
import java.util.Iterator;
@ -103,10 +96,10 @@ public class LongIntHashMap
ensureCapacity(expectedElements);
}
/** Create a hash map from all key-value pairs of another container. */
public LongIntHashMap(Iterable<? extends LongIntCursor> container) {
this();
putAll(container);
/** Create a hash map from all key-value pairs of another map. */
public LongIntHashMap(LongIntHashMap map) {
this(map.size());
putAll(map);
}
public int put(long key, int value) {
@ -114,8 +107,8 @@ public class LongIntHashMap
final int mask = this.mask;
if (((key) == 0)) {
int previousValue = hasEmptyKey ? values[mask + 1] : 0;
hasEmptyKey = true;
int previousValue = values[mask + 1];
values[mask + 1] = value;
return previousValue;
} else {
@ -212,6 +205,9 @@ public class LongIntHashMap
public int remove(long key) {
final int mask = this.mask;
if (((key) == 0)) {
if (!hasEmptyKey) {
return 0;
}
hasEmptyKey = false;
int previousValue = values[mask + 1];
values[mask + 1] = 0;
@ -364,6 +360,7 @@ public class LongIntHashMap
int previousValue = values[index];
if (index > mask) {
assert index == mask + 1;
hasEmptyKey = false;
values[index] = 0;
} else {
@ -409,7 +406,8 @@ public class LongIntHashMap
@Override
public boolean equals(Object obj) {
return obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj));
return (this == obj)
|| (obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)));
}
/** Return true if all keys of some other container exist in this container. */
@ -643,7 +641,7 @@ public class LongIntHashMap
cloned.keys = keys.clone();
cloned.values = values.clone();
cloned.hasEmptyKey = hasEmptyKey;
cloned.iterationSeed = nextIterationSeed();
cloned.iterationSeed = ITERATION_SEED.incrementAndGet();
return cloned;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
@ -782,64 +780,6 @@ public class LongIntHashMap
rehash(prevKeys, prevValues);
}
static int nextBufferSize(int arraySize, int elements, double loadFactor) {
assert checkPowerOfTwo(arraySize);
if (arraySize == MAX_HASH_ARRAY_LENGTH) {
throw new BufferAllocationException(
"Maximum array size exceeded for this load factor (elements: %d, load factor: %f)",
elements, loadFactor);
}
return arraySize << 1;
}
static int expandAtCount(int arraySize, double loadFactor) {
assert checkPowerOfTwo(arraySize);
// Take care of hash container invariant (there has to be at least one empty slot to ensure
// the lookup loop finds either the element or an empty slot).
return Math.min(arraySize - 1, (int) Math.ceil(arraySize * loadFactor));
}
static boolean checkPowerOfTwo(int arraySize) {
// These are internals, we can just assert without retrying.
assert arraySize > 1;
assert nextHighestPowerOfTwo(arraySize) == arraySize;
return true;
}
static int minBufferSize(int elements, double loadFactor) {
if (elements < 0) {
throw new IllegalArgumentException("Number of elements must be >= 0: " + elements);
}
long length = (long) Math.ceil(elements / loadFactor);
if (length == elements) {
length++;
}
length = Math.max(MIN_HASH_ARRAY_LENGTH, nextHighestPowerOfTwo(length));
if (length > MAX_HASH_ARRAY_LENGTH) {
throw new BufferAllocationException(
"Maximum array size exceeded for this load factor (elements: %d, load factor: %f)",
elements, loadFactor);
}
return (int) length;
}
static void checkLoadFactor(
double loadFactor, double minAllowedInclusive, double maxAllowedInclusive) {
if (loadFactor < minAllowedInclusive || loadFactor > maxAllowedInclusive) {
throw new BufferAllocationException(
"The load factor should be in range [%.2f, %.2f]: %f",
minAllowedInclusive, maxAllowedInclusive, loadFactor);
}
}
static int iterationIncrement(int seed) {
return 29 + ((seed & 7) << 1); // Small odd integer.
}
/**
* Shift all the slot-conflicting keys and values allocated to (and including) <code>slot</code>.
*/

View File

@ -17,7 +17,6 @@
package org.apache.lucene.util.hppc;
import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo;
import static org.apache.lucene.util.hppc.HashContainers.*;
import java.util.Arrays;
@ -98,10 +97,10 @@ public class LongObjectHashMap<VType>
ensureCapacity(expectedElements);
}
/** Create a hash map from all key-value pairs of another container. */
public LongObjectHashMap(Iterable<? extends LongObjectCursor<? extends VType>> container) {
this();
putAll(container);
/** Create a hash map from all key-value pairs of another map. */
public LongObjectHashMap(LongObjectHashMap<VType> map) {
this(map.size());
putAll(map);
}
public VType put(long key, VType value) {
@ -109,8 +108,8 @@ public class LongObjectHashMap<VType>
final int mask = this.mask;
if (((key) == 0)) {
VType previousValue = hasEmptyKey ? (VType) values[mask + 1] : null;
hasEmptyKey = true;
VType previousValue = (VType) values[mask + 1];
values[mask + 1] = value;
return previousValue;
} else {
@ -173,6 +172,9 @@ public class LongObjectHashMap<VType>
public VType remove(long key) {
final int mask = this.mask;
if (((key) == 0)) {
if (!hasEmptyKey) {
return null;
}
hasEmptyKey = false;
VType previousValue = (VType) values[mask + 1];
values[mask + 1] = 0;
@ -325,6 +327,7 @@ public class LongObjectHashMap<VType>
VType previousValue = (VType) values[index];
if (index > mask) {
assert index == mask + 1;
hasEmptyKey = false;
values[index] = 0;
} else {
@ -370,7 +373,8 @@ public class LongObjectHashMap<VType>
@Override
public boolean equals(Object obj) {
return obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj));
return (this == obj)
|| (obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)));
}
/** Return true if all keys of some other container exist in this container. */
@ -612,7 +616,7 @@ public class LongObjectHashMap<VType>
cloned.keys = keys.clone();
cloned.values = values.clone();
cloned.hasEmptyKey = hasEmptyKey;
cloned.iterationSeed = nextIterationSeed();
cloned.iterationSeed = ITERATION_SEED.incrementAndGet();
return cloned;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
@ -751,64 +755,6 @@ public class LongObjectHashMap<VType>
rehash(prevKeys, prevValues);
}
static int nextBufferSize(int arraySize, int elements, double loadFactor) {
assert checkPowerOfTwo(arraySize);
if (arraySize == MAX_HASH_ARRAY_LENGTH) {
throw new BufferAllocationException(
"Maximum array size exceeded for this load factor (elements: %d, load factor: %f)",
elements, loadFactor);
}
return arraySize << 1;
}
static int expandAtCount(int arraySize, double loadFactor) {
assert checkPowerOfTwo(arraySize);
// Take care of hash container invariant (there has to be at least one empty slot to ensure
// the lookup loop finds either the element or an empty slot).
return Math.min(arraySize - 1, (int) Math.ceil(arraySize * loadFactor));
}
static boolean checkPowerOfTwo(int arraySize) {
// These are internals, we can just assert without retrying.
assert arraySize > 1;
assert nextHighestPowerOfTwo(arraySize) == arraySize;
return true;
}
static int minBufferSize(int elements, double loadFactor) {
if (elements < 0) {
throw new IllegalArgumentException("Number of elements must be >= 0: " + elements);
}
long length = (long) Math.ceil(elements / loadFactor);
if (length == elements) {
length++;
}
length = Math.max(MIN_HASH_ARRAY_LENGTH, nextHighestPowerOfTwo(length));
if (length > MAX_HASH_ARRAY_LENGTH) {
throw new BufferAllocationException(
"Maximum array size exceeded for this load factor (elements: %d, load factor: %f)",
elements, loadFactor);
}
return (int) length;
}
static void checkLoadFactor(
double loadFactor, double minAllowedInclusive, double maxAllowedInclusive) {
if (loadFactor < minAllowedInclusive || loadFactor > maxAllowedInclusive) {
throw new BufferAllocationException(
"The load factor should be in range [%.2f, %.2f]: %f",
minAllowedInclusive, maxAllowedInclusive, loadFactor);
}
}
static int iterationIncrement(int seed) {
return 29 + ((seed & 7) << 1); // Small odd integer.
}
/**
* Shift all the slot-conflicting keys and values allocated to (and including) <code>slot</code>.
*/

View File

@ -23,9 +23,9 @@ import java.util.stream.Collectors;
import java.util.stream.LongStream;
import org.apache.lucene.tests.util.LuceneTestCase;
public class TestLongHashSet extends LuceneTestCase {
public class TestDocValuesLongHashSet extends LuceneTestCase {
private void assertEquals(Set<Long> set1, LongHashSet longHashSet) {
private void assertEquals(Set<Long> set1, DocValuesLongHashSet longHashSet) {
assertEquals(set1.size(), longHashSet.size());
Set<Long> set2 = longHashSet.stream().boxed().collect(Collectors.toSet());
@ -47,12 +47,13 @@ public class TestLongHashSet extends LuceneTestCase {
assertTrue(set1.stream().allMatch(longHashSet::contains));
}
private void assertNotEquals(Set<Long> set1, LongHashSet longHashSet) {
private void assertNotEquals(Set<Long> set1, DocValuesLongHashSet longHashSet) {
Set<Long> set2 = longHashSet.stream().boxed().collect(Collectors.toSet());
LuceneTestCase.assertNotEquals(set1, set2);
LongHashSet set3 = new LongHashSet(set1.stream().mapToLong(Long::longValue).sorted().toArray());
DocValuesLongHashSet set3 =
new DocValuesLongHashSet(set1.stream().mapToLong(Long::longValue).sorted().toArray());
LuceneTestCase.assertNotEquals(set2, set3.stream().boxed().collect(Collectors.toSet()));
@ -61,7 +62,7 @@ public class TestLongHashSet extends LuceneTestCase {
public void testEmpty() {
Set<Long> set1 = new HashSet<>();
LongHashSet set2 = new LongHashSet(new long[] {});
DocValuesLongHashSet set2 = new DocValuesLongHashSet(new long[] {});
assertEquals(0, set2.size());
assertEquals(Long.MAX_VALUE, set2.minValue);
assertEquals(Long.MIN_VALUE, set2.maxValue);
@ -70,14 +71,14 @@ public class TestLongHashSet extends LuceneTestCase {
public void testOneValue() {
Set<Long> set1 = new HashSet<>(Arrays.asList(42L));
LongHashSet set2 = new LongHashSet(new long[] {42L});
DocValuesLongHashSet set2 = new DocValuesLongHashSet(new long[] {42L});
assertEquals(1, set2.size());
assertEquals(42L, set2.minValue);
assertEquals(42L, set2.maxValue);
assertEquals(set1, set2);
set1 = new HashSet<>(Arrays.asList(Long.MIN_VALUE));
set2 = new LongHashSet(new long[] {Long.MIN_VALUE});
set2 = new DocValuesLongHashSet(new long[] {Long.MIN_VALUE});
assertEquals(1, set2.size());
assertEquals(Long.MIN_VALUE, set2.minValue);
assertEquals(Long.MIN_VALUE, set2.maxValue);
@ -86,14 +87,14 @@ public class TestLongHashSet extends LuceneTestCase {
public void testTwoValues() {
Set<Long> set1 = new HashSet<>(Arrays.asList(42L, Long.MAX_VALUE));
LongHashSet set2 = new LongHashSet(new long[] {42L, Long.MAX_VALUE});
DocValuesLongHashSet set2 = new DocValuesLongHashSet(new long[] {42L, Long.MAX_VALUE});
assertEquals(2, set2.size());
assertEquals(42, set2.minValue);
assertEquals(Long.MAX_VALUE, set2.maxValue);
assertEquals(set1, set2);
set1 = new HashSet<>(Arrays.asList(Long.MIN_VALUE, 42L));
set2 = new LongHashSet(new long[] {Long.MIN_VALUE, 42L});
set2 = new DocValuesLongHashSet(new long[] {Long.MIN_VALUE, 42L});
assertEquals(2, set2.size());
assertEquals(Long.MIN_VALUE, set2.minValue);
assertEquals(42, set2.maxValue);
@ -101,14 +102,15 @@ public class TestLongHashSet extends LuceneTestCase {
}
public void testSameValue() {
LongHashSet set2 = new LongHashSet(new long[] {42L, 42L});
DocValuesLongHashSet set2 = new DocValuesLongHashSet(new long[] {42L, 42L});
assertEquals(1, set2.size());
assertEquals(42L, set2.minValue);
assertEquals(42L, set2.maxValue);
}
public void testSameMissingPlaceholder() {
LongHashSet set2 = new LongHashSet(new long[] {Long.MIN_VALUE, Long.MIN_VALUE});
DocValuesLongHashSet set2 =
new DocValuesLongHashSet(new long[] {Long.MIN_VALUE, Long.MIN_VALUE});
assertEquals(1, set2.size());
assertEquals(Long.MIN_VALUE, set2.minValue);
assertEquals(Long.MIN_VALUE, set2.maxValue);
@ -130,7 +132,7 @@ public class TestLongHashSet extends LuceneTestCase {
}
Set<Long> set1 = LongStream.of(values).boxed().collect(Collectors.toSet());
Arrays.sort(values);
LongHashSet set2 = new LongHashSet(values);
DocValuesLongHashSet set2 = new DocValuesLongHashSet(values);
assertEquals(set1, set2);
}
}

View File

@ -0,0 +1,469 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.hppc;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.lessThan;
import static org.hamcrest.Matchers.not;
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.hamcrest.MatcherAssert;
import org.junit.Before;
import org.junit.Test;
/**
* Tests for {@link IntHashSet}.
*
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.IntHashSetTest
*
* <p>github: https://github.com/carrotsearch/hppc release: 0.9.0
*/
public class TestIntHashSet extends LuceneTestCase {
private static final int EMPTY_KEY = 0;
private final int keyE = 0;
private final int key1 = cast(1);
private final int key2 = cast(2);
private final int key3 = cast(3);
private final int key4 = cast(4);
/** Per-test fresh initialized instance. */
private IntHashSet set;
/** Convert to target type from an integer used to test stuff. */
private int cast(int v) {
return v;
}
@Before
public void initialize() {
set = new IntHashSet();
}
@Test
public void testAddAllViaInterface() {
set.addAll(key1, key2);
IntHashSet iface = new IntHashSet();
iface.clear();
iface.addAll(set);
MatcherAssert.assertThat(set(iface.toArray()), is(equalTo(set(key1, key2))));
}
@Test
public void testIndexMethods() {
set.add(keyE);
set.add(key1);
MatcherAssert.assertThat(set.indexOf(keyE), is(greaterThanOrEqualTo(0)));
MatcherAssert.assertThat(set.indexOf(key1), is(greaterThanOrEqualTo(0)));
MatcherAssert.assertThat(set.indexOf(key2), is(lessThan(0)));
MatcherAssert.assertThat(set.indexExists(set.indexOf(keyE)), is(true));
MatcherAssert.assertThat(set.indexExists(set.indexOf(key1)), is(true));
MatcherAssert.assertThat(set.indexExists(set.indexOf(key2)), is(false));
MatcherAssert.assertThat(set.indexGet(set.indexOf(keyE)), is(equalTo(keyE)));
MatcherAssert.assertThat(set.indexGet(set.indexOf(key1)), is(equalTo(key1)));
expectThrows(
AssertionError.class,
() -> {
set.indexGet(set.indexOf(key2));
});
MatcherAssert.assertThat(set.indexReplace(set.indexOf(keyE), keyE), is(equalTo(keyE)));
MatcherAssert.assertThat(set.indexReplace(set.indexOf(key1), key1), is(equalTo(key1)));
set.indexInsert(set.indexOf(key2), key2);
MatcherAssert.assertThat(set.indexGet(set.indexOf(key2)), is(equalTo(key2)));
MatcherAssert.assertThat(set.size(), is(equalTo(3)));
set.indexRemove(set.indexOf(keyE));
MatcherAssert.assertThat(set.size(), is(equalTo(2)));
set.indexRemove(set.indexOf(key2));
MatcherAssert.assertThat(set.size(), is(equalTo(1)));
MatcherAssert.assertThat(set.indexOf(keyE), is(lessThan(0)));
MatcherAssert.assertThat(set.indexOf(key1), is(greaterThanOrEqualTo(0)));
MatcherAssert.assertThat(set.indexOf(key2), is(lessThan(0)));
}
@Test
public void testCursorIndexIsValid() {
set.add(keyE);
set.add(key1);
set.add(key2);
for (IntCursor c : set) {
MatcherAssert.assertThat(set.indexExists(c.index), is(true));
MatcherAssert.assertThat(set.indexGet(c.index), is(equalTo(c.value)));
}
}
@Test
public void testEmptyKey() {
IntHashSet set = new IntHashSet();
boolean b = set.add(EMPTY_KEY);
MatcherAssert.assertThat(b, is(true));
MatcherAssert.assertThat(set.add(EMPTY_KEY), is(false));
MatcherAssert.assertThat(set.size(), is(equalTo(1)));
MatcherAssert.assertThat(set.isEmpty(), is(false));
MatcherAssert.assertThat(set(set.toArray()), is(equalTo(set(EMPTY_KEY))));
MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(true));
int index = set.indexOf(EMPTY_KEY);
MatcherAssert.assertThat(set.indexExists(index), is(true));
MatcherAssert.assertThat(set.indexGet(index), is(equalTo(EMPTY_KEY)));
MatcherAssert.assertThat(set.indexReplace(index, EMPTY_KEY), is(equalTo(EMPTY_KEY)));
if (random().nextBoolean()) {
b = set.remove(EMPTY_KEY);
MatcherAssert.assertThat(b, is(true));
} else {
set.indexRemove(index);
}
MatcherAssert.assertThat(set.size(), is(equalTo(0)));
MatcherAssert.assertThat(set.isEmpty(), is(true));
MatcherAssert.assertThat(set(set.toArray()), is(empty()));
MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(false));
index = set.indexOf(EMPTY_KEY);
MatcherAssert.assertThat(set.indexExists(index), is(false));
set.indexInsert(index, EMPTY_KEY);
set.add(key1);
MatcherAssert.assertThat(set.size(), is(equalTo(2)));
MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(true));
index = set.indexOf(EMPTY_KEY);
MatcherAssert.assertThat(set.indexExists(index), is(true));
MatcherAssert.assertThat(set.indexGet(index), is(equalTo(EMPTY_KEY)));
}
@Test
public void testEnsureCapacity() {
final AtomicInteger expands = new AtomicInteger();
IntHashSet set =
new IntHashSet(0) {
@Override
protected void allocateBuffers(int arraySize) {
super.allocateBuffers(arraySize);
expands.incrementAndGet();
}
};
// Add some elements.
final int max = rarely() ? 0 : randomIntBetween(0, 250);
for (int i = 0; i < max; i++) {
set.add(cast(i));
}
final int additions = randomIntBetween(max, max + 5000);
set.ensureCapacity(additions + set.size());
final int before = expands.get();
for (int i = 0; i < additions; i++) {
set.add(cast(i));
}
assertEquals(before, expands.get());
}
@Test
public void testInitiallyEmpty() {
assertEquals(0, set.size());
}
@Test
public void testAdd() {
assertTrue(set.add(key1));
assertFalse(set.add(key1));
assertEquals(1, set.size());
}
@Test
public void testAdd2() {
set.addAll(key1, key1);
assertEquals(1, set.size());
assertEquals(1, set.addAll(key1, key2));
assertEquals(2, set.size());
}
@Test
public void testAddVarArgs() {
set.addAll(asArray(0, 1, 2, 1, 0));
assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), 0, 1, 2);
}
@Test
public void testAddAll() {
IntHashSet set2 = new IntHashSet();
set2.addAll(asArray(1, 2));
set.addAll(asArray(0, 1));
assertEquals(1, set.addAll(set2));
assertEquals(0, set.addAll(set2));
assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), 0, 1, 2);
}
@Test
public void testRemove() {
set.addAll(asArray(0, 1, 2, 3, 4));
assertTrue(set.remove(key2));
assertFalse(set.remove(key2));
assertEquals(4, set.size());
assertSortedListEquals(set.toArray(), 0, 1, 3, 4);
}
@Test
public void testInitialCapacityAndGrowth() {
for (int i = 0; i < 256; i++) {
IntHashSet set = new IntHashSet(i);
for (int j = 0; j < i; j++) {
set.add(cast(j));
}
assertEquals(i, set.size());
}
}
@Test
public void testBug_HPPC73_FullCapacityGet() {
final AtomicInteger reallocations = new AtomicInteger();
final int elements = 0x7F;
set =
new IntHashSet(elements, 1f) {
@Override
protected double verifyLoadFactor(double loadFactor) {
// Skip load factor sanity range checking.
return loadFactor;
}
@Override
protected void allocateBuffers(int arraySize) {
super.allocateBuffers(arraySize);
reallocations.incrementAndGet();
}
};
int reallocationsBefore = reallocations.get();
assertEquals(reallocationsBefore, 1);
for (int i = 1; i <= elements; i++) {
set.add(cast(i));
}
// Non-existent key.
int outOfSet = cast(elements + 1);
set.remove(outOfSet);
assertFalse(set.contains(outOfSet));
assertEquals(reallocationsBefore, reallocations.get());
// Should not expand because we're replacing an existing element.
assertFalse(set.add(key1));
assertEquals(reallocationsBefore, reallocations.get());
// Remove from a full set.
set.remove(key1);
assertEquals(reallocationsBefore, reallocations.get());
set.add(key1);
// Check expand on "last slot of a full map" condition.
set.add(outOfSet);
assertEquals(reallocationsBefore + 1, reallocations.get());
}
@Test
public void testRemoveAllFromLookupContainer() {
set.addAll(asArray(0, 1, 2, 3, 4));
IntHashSet list2 = new IntHashSet();
list2.addAll(asArray(1, 3, 5));
assertEquals(2, set.removeAll(list2));
assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), 0, 2, 4);
}
@Test
public void testClear() {
set.addAll(asArray(1, 2, 3));
set.clear();
assertEquals(0, set.size());
}
@Test
public void testRelease() {
set.addAll(asArray(1, 2, 3));
set.release();
assertEquals(0, set.size());
set.addAll(asArray(1, 2, 3));
assertEquals(3, set.size());
}
@Test
public void testIterable() {
set.addAll(asArray(1, 2, 2, 3, 4));
set.remove(key2);
assertEquals(3, set.size());
int count = 0;
for (IntCursor cursor : set) {
count++;
assertTrue(set.contains(cursor.value));
}
assertEquals(count, set.size());
set.clear();
assertFalse(set.iterator().hasNext());
}
/** Runs random insertions/deletions/clearing and compares the results against {@link HashSet}. */
@Test
@SuppressWarnings({"rawtypes", "unchecked"})
public void testAgainstHashSet() {
final Random rnd = RandomizedTest.getRandom();
final HashSet other = new HashSet();
for (int size = 1000; size < 20000; size += 4000) {
other.clear();
set.clear();
for (int round = 0; round < size * 20; round++) {
int key = cast(rnd.nextInt(size));
if (rnd.nextInt(50) == 0) {
key = 0;
}
if (rnd.nextBoolean()) {
if (rnd.nextBoolean()) {
int index = set.indexOf(key);
if (set.indexExists(index)) {
set.indexReplace(index, key);
} else {
set.indexInsert(index, key);
}
} else {
set.add(key);
}
other.add(key);
assertTrue(set.contains(key));
assertTrue(set.indexExists(set.indexOf(key)));
} else {
assertEquals(other.contains(key), set.contains(key));
boolean removed;
if (set.contains(key) && rnd.nextBoolean()) {
set.indexRemove(set.indexOf(key));
removed = true;
} else {
removed = set.remove(key);
}
assertEquals(other.remove(key), removed);
}
assertEquals(other.size(), set.size());
}
}
}
@Test
public void testHashCodeEquals() {
IntHashSet l0 = new IntHashSet();
assertEquals(0, l0.hashCode());
assertEquals(l0, new IntHashSet());
IntHashSet l1 = IntHashSet.from(key1, key2, key3);
IntHashSet l2 = IntHashSet.from(key1, key2);
l2.add(key3);
assertEquals(l1.hashCode(), l2.hashCode());
assertEquals(l1, l2);
}
@Test
public void testClone() {
this.set.addAll(key1, key2, key3);
IntHashSet cloned = set.clone();
cloned.remove(key1);
assertSortedListEquals(set.toArray(), key1, key2, key3);
assertSortedListEquals(cloned.toArray(), key2, key3);
}
@Test
public void testEqualsSameClass() {
IntHashSet l1 = IntHashSet.from(key1, key2, key3);
IntHashSet l2 = IntHashSet.from(key1, key2, key3);
IntHashSet l3 = IntHashSet.from(key1, key2, key4);
MatcherAssert.assertThat(l1, is(equalTo(l2)));
MatcherAssert.assertThat(l1.hashCode(), is(equalTo(l2.hashCode())));
MatcherAssert.assertThat(l1, is(not(equalTo(l3))));
}
@Test
public void testEqualsSubClass() {
class Sub extends IntHashSet {}
;
IntHashSet l1 = IntHashSet.from(key1, key2, key3);
IntHashSet l2 = new Sub();
IntHashSet l3 = new Sub();
l2.addAll(l1);
l3.addAll(l1);
MatcherAssert.assertThat(l2, is(equalTo(l3)));
MatcherAssert.assertThat(l1, is(not(equalTo(l2))));
}
private static int randomIntBetween(int min, int max) {
return min + random().nextInt(max + 1 - min);
}
private static Set<Integer> set(int... elements) {
Set<Integer> set = new HashSet<>();
for (int element : elements) {
set.add(element);
}
return set;
}
private static int[] asArray(int... elements) {
return elements;
}
/** Check if the array's content is identical to a given sequence of elements. */
private static void assertSortedListEquals(int[] array, int... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
assertArrayEquals(elements, array);
}
}

View File

@ -17,7 +17,9 @@
package org.apache.lucene.util.hppc;
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
@ -35,51 +37,44 @@ import org.junit.Test;
public class TestIntIntHashMap extends LuceneTestCase {
/* Ready to use key values. */
protected int keyE = 0;
protected int key0 = cast(0), k0 = key0;
protected int key1 = cast(1), k1 = key1;
protected int key2 = cast(2), k2 = key2;
protected int key3 = cast(3), k3 = key3;
protected int key4 = cast(4), k4 = key4;
protected int key5 = cast(5), k5 = key5;
protected int key6 = cast(6), k6 = key6;
protected int key7 = cast(7), k7 = key7;
protected int key8 = cast(8), k8 = key8;
protected int key9 = cast(9), k9 = key9;
private final int keyE = 0;
private final int key1 = cast(1);
private final int key2 = cast(2);
private final int key3 = cast(3);
private final int key4 = cast(4);
/** Convert to target type from an integer used to test stuff. */
public int cast(int v) {
private int cast(int v) {
return v;
}
/** Create a new array of a given type and copy the arguments to this array. */
/* */
public final int[] newArray(int... elements) {
private int[] newArray(int... elements) {
return elements;
}
public static int randomIntBetween(int min, int max) {
private static int randomIntBetween(int min, int max) {
return min + random().nextInt(max + 1 - min);
}
/** Check if the array's content is identical to a given sequence of elements. */
public static void assertSortedListEquals(int[] array, int... elements) {
private static void assertSortedListEquals(int[] array, int... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
Arrays.sort(elements);
assertArrayEquals(elements, array);
}
protected int value0 = vcast(0);
protected int value1 = vcast(1);
protected int value2 = vcast(2);
protected int value3 = vcast(3);
protected int value4 = vcast(4);
private final int value0 = vcast(0);
private final int value1 = vcast(1);
private final int value2 = vcast(2);
private final int value3 = vcast(3);
private final int value4 = vcast(4);
/** Per-test fresh initialized instance. */
public IntIntHashMap map = newInstance();
private IntIntHashMap map = newInstance();
protected IntIntHashMap newInstance() {
private IntIntHashMap newInstance() {
return new IntIntHashMap();
}
@ -101,13 +96,12 @@ public class TestIntIntHashMap extends LuceneTestCase {
}
/** Convert to target type from an integer used to test stuff. */
protected int vcast(int value) {
private int vcast(int value) {
return value;
}
/** Create a new array of a given type and copy the arguments to this array. */
/* */
protected final int[] newvArray(int... elements) {
private int[] newvArray(int... elements) {
return elements;
}
@ -180,7 +174,6 @@ public class TestIntIntHashMap extends LuceneTestCase {
AssertionError.class,
() -> {
map.indexGet(map.indexOf(key2));
fail();
});
assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3));
@ -342,6 +335,15 @@ public class TestIntIntHashMap extends LuceneTestCase {
map.remove(empty);
assertEquals(0, map.get(empty));
assertEquals(0, map.size());
assertEquals(0, map.put(empty, value1));
assertEquals(value1, map.put(empty, value2));
map.clear();
assertFalse(map.indexExists(map.indexOf(empty)));
assertEquals(0, map.put(empty, value1));
map.clear();
assertEquals(0, map.remove(empty));
}
/* */
@ -380,6 +382,11 @@ public class TestIntIntHashMap extends LuceneTestCase {
// These are internals, but perhaps worth asserting too.
assertEquals(0, map.assigned);
// Check values are cleared.
assertEquals(0, map.put(key1, value1));
assertEquals(0, map.remove(key2));
map.clear();
// Check if the map behaves properly upon subsequent use.
testPutWithExpansions();
}
@ -455,13 +462,13 @@ public class TestIntIntHashMap extends LuceneTestCase {
assertEquals(reallocationsBefore, reallocations.get());
// Should not expand because we're replacing an existing element.
map.put(k1, value2);
map.put(key1, value2);
assertEquals(reallocationsBefore, reallocations.get());
// Remove from a full map.
map.remove(k1);
map.remove(key1);
assertEquals(reallocationsBefore, reallocations.get());
map.put(k1, value2);
map.put(key1, value2);
// Check expand on "last slot of a full map" condition.
map.put(outOfSet, value1);
@ -499,6 +506,61 @@ public class TestIntIntHashMap extends LuceneTestCase {
assertFalse(l2.equals(l1));
}
/** Runs random insertions/deletions/clearing and compares the results against {@link HashMap}. */
@Test
@SuppressWarnings({"rawtypes", "unchecked"})
public void testAgainstHashMap() {
final Random rnd = RandomizedTest.getRandom();
final HashMap other = new HashMap();
for (int size = 1000; size < 20000; size += 4000) {
other.clear();
map.clear();
for (int round = 0; round < size * 20; round++) {
int key = cast(rnd.nextInt(size));
if (rnd.nextInt(50) == 0) {
key = 0;
}
int value = vcast(rnd.nextInt());
boolean hadOldValue = map.containsKey(key);
if (rnd.nextBoolean()) {
int previousValue;
if (rnd.nextBoolean()) {
int index = map.indexOf(key);
if (map.indexExists(index)) {
previousValue = map.indexReplace(index, value);
} else {
map.indexInsert(index, key, value);
previousValue = 0;
}
} else {
previousValue = map.put(key, value);
}
assertEquals(
other.put(key, value), ((previousValue) == 0) && !hadOldValue ? null : previousValue);
assertEquals(value, map.get(key));
assertEquals(value, map.indexGet(map.indexOf(key)));
assertTrue(map.containsKey(key));
assertTrue(map.indexExists(map.indexOf(key)));
} else {
assertEquals(other.containsKey(key), map.containsKey(key));
int previousValue =
map.containsKey(key) && rnd.nextBoolean()
? map.indexRemove(map.indexOf(key))
: map.remove(key);
assertEquals(
other.remove(key), ((previousValue) == 0) && !hadOldValue ? null : previousValue);
}
assertEquals(other.size(), map.size());
}
}
}
/*
*
*/
@ -549,16 +611,16 @@ public class TestIntIntHashMap extends LuceneTestCase {
@Test
public void testEqualsSameClass() {
IntIntHashMap l1 = newInstance();
l1.put(k1, value0);
l1.put(k2, value1);
l1.put(k3, value2);
l1.put(key1, value0);
l1.put(key2, value1);
l1.put(key3, value2);
IntIntHashMap l2 = new IntIntHashMap(l1);
l2.putAll(l1);
IntIntHashMap l3 = new IntIntHashMap(l2);
l3.putAll(l2);
l3.put(k4, value0);
l3.put(key4, value0);
assertEquals(l2, l1);
assertEquals(l2.hashCode(), l1.hashCode());
@ -571,13 +633,13 @@ public class TestIntIntHashMap extends LuceneTestCase {
class Sub extends IntIntHashMap {}
IntIntHashMap l1 = newInstance();
l1.put(k1, value0);
l1.put(k2, value1);
l1.put(k3, value2);
l1.put(key1, value0);
l1.put(key2, value1);
l1.put(key3, value2);
IntIntHashMap l2 = new Sub();
l2.putAll(l1);
l2.put(k4, value3);
l2.put(key4, value3);
IntIntHashMap l3 = new Sub();
l3.putAll(l2);

View File

@ -17,7 +17,9 @@
package org.apache.lucene.util.hppc;
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
@ -36,35 +38,28 @@ import org.junit.Test;
public class TestIntObjectHashMap extends LuceneTestCase {
/* Ready to use key values. */
protected int keyE = 0;
protected int key0 = cast(0), k0 = key0;
protected int key1 = cast(1), k1 = key1;
protected int key2 = cast(2), k2 = key2;
protected int key3 = cast(3), k3 = key3;
protected int key4 = cast(4), k4 = key4;
protected int key5 = cast(5), k5 = key5;
protected int key6 = cast(6), k6 = key6;
protected int key7 = cast(7), k7 = key7;
protected int key8 = cast(8), k8 = key8;
protected int key9 = cast(9), k9 = key9;
private final int keyE = 0;
private final int key1 = cast(1);
private final int key2 = cast(2);
private final int key3 = cast(3);
private final int key4 = cast(4);
/** Convert to target type from an integer used to test stuff. */
public int cast(int v) {
private int cast(int v) {
return v;
}
/** Create a new array of a given type and copy the arguments to this array. */
/* */
public final int[] newArray(int... elements) {
private int[] newArray(int... elements) {
return elements;
}
public static int randomIntBetween(int min, int max) {
private static int randomIntBetween(int min, int max) {
return min + random().nextInt(max + 1 - min);
}
/** Check if the array's content is identical to a given sequence of elements. */
public static void assertSortedListEquals(int[] array, int... elements) {
private static void assertSortedListEquals(int[] array, int... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
Arrays.sort(elements);
@ -72,22 +67,22 @@ public class TestIntObjectHashMap extends LuceneTestCase {
}
/** Check if the array's content is identical to a given sequence of elements. */
public static void assertSortedListEquals(Object[] array, Object... elements) {
private static void assertSortedListEquals(Object[] array, Object... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
assertArrayEquals(elements, array);
}
protected int value0 = vcast(0);
protected int value1 = vcast(1);
protected int value2 = vcast(2);
protected int value3 = vcast(3);
protected int value4 = vcast(4);
private final int value0 = vcast(0);
private final int value1 = vcast(1);
private final int value2 = vcast(2);
private final int value3 = vcast(3);
private final int value4 = vcast(4);
/** Per-test fresh initialized instance. */
public IntObjectHashMap<Object> map = newInstance();
private IntObjectHashMap<Object> map = newInstance();
protected IntObjectHashMap newInstance() {
private IntObjectHashMap newInstance() {
return new IntObjectHashMap();
}
@ -109,13 +104,13 @@ public class TestIntObjectHashMap extends LuceneTestCase {
}
/** Convert to target type from an integer used to test stuff. */
protected int vcast(int value) {
private int vcast(int value) {
return value;
}
/** Create a new array of a given type and copy the arguments to this array. */
/* */
protected final Object[] newvArray(Object... elements) {
private Object[] newvArray(Object... elements) {
return elements;
}
@ -188,7 +183,6 @@ public class TestIntObjectHashMap extends LuceneTestCase {
AssertionError.class,
() -> {
map.indexGet(map.indexOf(key2));
fail();
});
assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3));
@ -353,6 +347,7 @@ public class TestIntObjectHashMap extends LuceneTestCase {
map.remove(empty);
assertEquals(null, map.get(empty));
assertEquals(0, map.size());
map.put(empty, null);
assertEquals(1, map.size());
@ -363,6 +358,14 @@ public class TestIntObjectHashMap extends LuceneTestCase {
assertEquals(0, map.size());
assertFalse(map.containsKey(empty));
assertNull(map.get(empty));
assertEquals(null, map.put(empty, value1));
assertEquals(value1, map.put(empty, value2));
map.clear();
assertFalse(map.indexExists(map.indexOf(empty)));
assertEquals(null, map.put(empty, value1));
map.clear();
assertEquals(null, map.remove(empty));
}
/* */
@ -401,6 +404,11 @@ public class TestIntObjectHashMap extends LuceneTestCase {
// These are internals, but perhaps worth asserting too.
assertEquals(0, map.assigned);
// Check values are cleared.
assertEquals(null, map.put(key1, value1));
assertEquals(null, map.remove(key2));
map.clear();
// Check if the map behaves properly upon subsequent use.
testPutWithExpansions();
}
@ -476,13 +484,13 @@ public class TestIntObjectHashMap extends LuceneTestCase {
assertEquals(reallocationsBefore, reallocations.get());
// Should not expand because we're replacing an existing element.
map.put(k1, value2);
map.put(key1, value2);
assertEquals(reallocationsBefore, reallocations.get());
// Remove from a full map.
map.remove(k1);
map.remove(key1);
assertEquals(reallocationsBefore, reallocations.get());
map.put(k1, value2);
map.put(key1, value2);
// Check expand on "last slot of a full map" condition.
map.put(outOfSet, value1);
@ -520,6 +528,58 @@ public class TestIntObjectHashMap extends LuceneTestCase {
assertFalse(l2.equals(l1));
}
/** Runs random insertions/deletions/clearing and compares the results against {@link HashMap}. */
@Test
@SuppressWarnings({"rawtypes", "unchecked"})
public void testAgainstHashMap() {
final Random rnd = RandomizedTest.getRandom();
final HashMap other = new HashMap();
for (int size = 1000; size < 20000; size += 4000) {
other.clear();
map.clear();
for (int round = 0; round < size * 20; round++) {
int key = cast(rnd.nextInt(size));
if (rnd.nextInt(50) == 0) {
key = 0;
}
int value = vcast(rnd.nextInt());
if (rnd.nextBoolean()) {
Object previousValue;
if (rnd.nextBoolean()) {
int index = map.indexOf(key);
if (map.indexExists(index)) {
previousValue = map.indexReplace(index, value);
} else {
map.indexInsert(index, key, value);
previousValue = null;
}
} else {
previousValue = map.put(key, value);
}
assertEquals(other.put(key, value), previousValue);
assertEquals(value, map.get(key));
assertEquals(value, map.indexGet(map.indexOf(key)));
assertTrue(map.containsKey(key));
assertTrue(map.indexExists(map.indexOf(key)));
} else {
assertEquals(other.containsKey(key), map.containsKey(key));
Object previousValue =
map.containsKey(key) && rnd.nextBoolean()
? map.indexRemove(map.indexOf(key))
: map.remove(key);
assertEquals(other.remove(key), previousValue);
}
assertEquals(other.size(), map.size());
}
}
}
/*
*
*/
@ -570,16 +630,16 @@ public class TestIntObjectHashMap extends LuceneTestCase {
@Test
public void testEqualsSameClass() {
IntObjectHashMap l1 = newInstance();
l1.put(k1, value0);
l1.put(k2, value1);
l1.put(k3, value2);
l1.put(key1, value0);
l1.put(key2, value1);
l1.put(key3, value2);
IntObjectHashMap l2 = new IntObjectHashMap(l1);
l2.putAll(l1);
IntObjectHashMap l3 = new IntObjectHashMap(l2);
l3.putAll(l2);
l3.put(k4, value0);
l3.put(key4, value0);
assertEquals(l2, l1);
assertEquals(l2.hashCode(), l1.hashCode());
@ -592,13 +652,13 @@ public class TestIntObjectHashMap extends LuceneTestCase {
class Sub extends IntObjectHashMap {}
IntObjectHashMap l1 = newInstance();
l1.put(k1, value0);
l1.put(k2, value1);
l1.put(k3, value2);
l1.put(key1, value0);
l1.put(key2, value1);
l1.put(key3, value2);
IntObjectHashMap l2 = new Sub();
l2.putAll(l1);
l2.put(k4, value3);
l2.put(key4, value3);
IntObjectHashMap l3 = new Sub();
l3.putAll(l2);

View File

@ -0,0 +1,464 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.hppc;
import static org.hamcrest.Matchers.*;
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.hamcrest.MatcherAssert;
import org.junit.Before;
import org.junit.Test;
/**
* Tests for {@link LongHashSet}.
*
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.LongHashSetTest
*
* <p>github: https://github.com/carrotsearch/hppc release: 0.9.0
*/
public class TestLongHashSet extends LuceneTestCase {
private static final long EMPTY_KEY = 0L;
private final long keyE = 0;
private final long key1 = cast(1);
private final long key2 = cast(2);
private final long key3 = cast(3);
private final long key4 = cast(4);
/** Per-test fresh initialized instance. */
private LongHashSet set;
/** Convert to target type from an integer used to test stuff. */
private long cast(int v) {
return v;
}
@Before
public void initialize() {
set = new LongHashSet();
}
@Test
public void testAddAllViaInterface() {
set.addAll(key1, key2);
LongHashSet iface = new LongHashSet();
iface.clear();
iface.addAll(set);
MatcherAssert.assertThat(set(iface.toArray()), is(equalTo(set(key1, key2))));
}
@Test
public void testIndexMethods() {
set.add(keyE);
set.add(key1);
MatcherAssert.assertThat(set.indexOf(keyE), is(greaterThanOrEqualTo(0)));
MatcherAssert.assertThat(set.indexOf(key1), is(greaterThanOrEqualTo(0)));
MatcherAssert.assertThat(set.indexOf(key2), is(lessThan(0)));
MatcherAssert.assertThat(set.indexExists(set.indexOf(keyE)), is(true));
MatcherAssert.assertThat(set.indexExists(set.indexOf(key1)), is(true));
MatcherAssert.assertThat(set.indexExists(set.indexOf(key2)), is(false));
MatcherAssert.assertThat(set.indexGet(set.indexOf(keyE)), is(equalTo(keyE)));
MatcherAssert.assertThat(set.indexGet(set.indexOf(key1)), is(equalTo(key1)));
expectThrows(
AssertionError.class,
() -> {
set.indexGet(set.indexOf(key2));
});
MatcherAssert.assertThat(set.indexReplace(set.indexOf(keyE), keyE), is(equalTo(keyE)));
MatcherAssert.assertThat(set.indexReplace(set.indexOf(key1), key1), is(equalTo(key1)));
set.indexInsert(set.indexOf(key2), key2);
MatcherAssert.assertThat(set.indexGet(set.indexOf(key2)), is(equalTo(key2)));
MatcherAssert.assertThat(set.size(), is(equalTo(3)));
set.indexRemove(set.indexOf(keyE));
MatcherAssert.assertThat(set.size(), is(equalTo(2)));
set.indexRemove(set.indexOf(key2));
MatcherAssert.assertThat(set.size(), is(equalTo(1)));
MatcherAssert.assertThat(set.indexOf(keyE), is(lessThan(0)));
MatcherAssert.assertThat(set.indexOf(key1), is(greaterThanOrEqualTo(0)));
MatcherAssert.assertThat(set.indexOf(key2), is(lessThan(0)));
}
@Test
public void testCursorIndexIsValid() {
set.add(keyE);
set.add(key1);
set.add(key2);
for (LongCursor c : set) {
MatcherAssert.assertThat(set.indexExists(c.index), is(true));
MatcherAssert.assertThat(set.indexGet(c.index), is(equalTo(c.value)));
}
}
@Test
public void testEmptyKey() {
LongHashSet set = new LongHashSet();
boolean b = set.add(EMPTY_KEY);
MatcherAssert.assertThat(b, is(true));
MatcherAssert.assertThat(set.add(EMPTY_KEY), is(false));
MatcherAssert.assertThat(set.size(), is(equalTo(1)));
MatcherAssert.assertThat(set.isEmpty(), is(false));
MatcherAssert.assertThat(set(set.toArray()), is(equalTo(set(EMPTY_KEY))));
MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(true));
int index = set.indexOf(EMPTY_KEY);
MatcherAssert.assertThat(set.indexExists(index), is(true));
MatcherAssert.assertThat(set.indexGet(index), is(equalTo(EMPTY_KEY)));
MatcherAssert.assertThat(set.indexReplace(index, EMPTY_KEY), is(equalTo(EMPTY_KEY)));
if (random().nextBoolean()) {
b = set.remove(EMPTY_KEY);
MatcherAssert.assertThat(b, is(true));
} else {
set.indexRemove(index);
}
MatcherAssert.assertThat(set.size(), is(equalTo(0)));
MatcherAssert.assertThat(set.isEmpty(), is(true));
MatcherAssert.assertThat(set(set.toArray()), is(empty()));
MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(false));
index = set.indexOf(EMPTY_KEY);
MatcherAssert.assertThat(set.indexExists(index), is(false));
set.indexInsert(index, EMPTY_KEY);
set.add(key1);
MatcherAssert.assertThat(set.size(), is(equalTo(2)));
MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(true));
index = set.indexOf(EMPTY_KEY);
MatcherAssert.assertThat(set.indexExists(index), is(true));
MatcherAssert.assertThat(set.indexGet(index), is(equalTo(EMPTY_KEY)));
}
@Test
public void testEnsureCapacity() {
final AtomicInteger expands = new AtomicInteger();
LongHashSet set =
new LongHashSet(0) {
@Override
protected void allocateBuffers(int arraySize) {
super.allocateBuffers(arraySize);
expands.incrementAndGet();
}
};
// Add some elements.
final int max = rarely() ? 0 : randomIntBetween(0, 250);
for (int i = 0; i < max; i++) {
set.add(cast(i));
}
final int additions = randomIntBetween(max, max + 5000);
set.ensureCapacity(additions + set.size());
final int before = expands.get();
for (int i = 0; i < additions; i++) {
set.add(cast(i));
}
assertEquals(before, expands.get());
}
@Test
public void testInitiallyEmpty() {
assertEquals(0, set.size());
}
@Test
public void testAdd() {
assertTrue(set.add(key1));
assertFalse(set.add(key1));
assertEquals(1, set.size());
}
@Test
public void testAdd2() {
set.addAll(key1, key1);
assertEquals(1, set.size());
assertEquals(1, set.addAll(key1, key2));
assertEquals(2, set.size());
}
@Test
public void testAddVarArgs() {
set.addAll(asArray(0, 1, 2, 1, 0));
assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), 0, 1, 2);
}
@Test
public void testAddAll() {
LongHashSet set2 = new LongHashSet();
set2.addAll(asArray(1, 2));
set.addAll(asArray(0, 1));
assertEquals(1, set.addAll(set2));
assertEquals(0, set.addAll(set2));
assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), 0, 1, 2);
}
@Test
public void testRemove() {
set.addAll(asArray(0, 1, 2, 3, 4));
assertTrue(set.remove(key2));
assertFalse(set.remove(key2));
assertEquals(4, set.size());
assertSortedListEquals(set.toArray(), 0, 1, 3, 4);
}
@Test
public void testInitialCapacityAndGrowth() {
for (int i = 0; i < 256; i++) {
LongHashSet set = new LongHashSet(i);
for (int j = 0; j < i; j++) {
set.add(cast(j));
}
assertEquals(i, set.size());
}
}
@Test
public void testBug_HPPC73_FullCapacityGet() {
final AtomicInteger reallocations = new AtomicInteger();
final int elements = 0x7F;
set =
new LongHashSet(elements, 1f) {
@Override
protected double verifyLoadFactor(double loadFactor) {
// Skip load factor sanity range checking.
return loadFactor;
}
@Override
protected void allocateBuffers(int arraySize) {
super.allocateBuffers(arraySize);
reallocations.incrementAndGet();
}
};
int reallocationsBefore = reallocations.get();
assertEquals(reallocationsBefore, 1);
for (int i = 1; i <= elements; i++) {
set.add(cast(i));
}
// Non-existent key.
long outOfSet = cast(elements + 1);
set.remove(outOfSet);
assertFalse(set.contains(outOfSet));
assertEquals(reallocationsBefore, reallocations.get());
// Should not expand because we're replacing an existing element.
assertFalse(set.add(key1));
assertEquals(reallocationsBefore, reallocations.get());
// Remove from a full set.
set.remove(key1);
assertEquals(reallocationsBefore, reallocations.get());
set.add(key1);
// Check expand on "last slot of a full map" condition.
set.add(outOfSet);
assertEquals(reallocationsBefore + 1, reallocations.get());
}
@Test
public void testRemoveAllFromLookupContainer() {
set.addAll(asArray(0, 1, 2, 3, 4));
LongHashSet list2 = new LongHashSet();
list2.addAll(asArray(1, 3, 5));
assertEquals(2, set.removeAll(list2));
assertEquals(3, set.size());
assertSortedListEquals(set.toArray(), 0, 2, 4);
}
@Test
public void testClear() {
set.addAll(asArray(1, 2, 3));
set.clear();
assertEquals(0, set.size());
}
@Test
public void testRelease() {
set.addAll(asArray(1, 2, 3));
set.release();
assertEquals(0, set.size());
set.addAll(asArray(1, 2, 3));
assertEquals(3, set.size());
}
@Test
public void testIterable() {
set.addAll(asArray(1, 2, 2, 3, 4));
set.remove(key2);
assertEquals(3, set.size());
int count = 0;
for (LongCursor cursor : set) {
count++;
assertTrue(set.contains(cursor.value));
}
assertEquals(count, set.size());
set.clear();
assertFalse(set.iterator().hasNext());
}
/** Runs random insertions/deletions/clearing and compares the results against {@link HashSet}. */
@Test
@SuppressWarnings({"rawtypes", "unchecked"})
public void testAgainstHashSet() {
final Random rnd = RandomizedTest.getRandom();
final HashSet other = new HashSet();
for (int size = 1000; size < 20000; size += 4000) {
other.clear();
set.clear();
for (int round = 0; round < size * 20; round++) {
long key = cast(rnd.nextInt(size));
if (rnd.nextInt(50) == 0) {
key = 0L;
}
if (rnd.nextBoolean()) {
if (rnd.nextBoolean()) {
int index = set.indexOf(key);
if (set.indexExists(index)) {
set.indexReplace(index, key);
} else {
set.indexInsert(index, key);
}
} else {
set.add(key);
}
other.add(key);
assertTrue(set.contains(key));
assertTrue(set.indexExists(set.indexOf(key)));
} else {
assertEquals(other.contains(key), set.contains(key));
boolean removed;
if (set.contains(key) && rnd.nextBoolean()) {
set.indexRemove(set.indexOf(key));
removed = true;
} else {
removed = set.remove(key);
}
assertEquals(other.remove(key), removed);
}
assertEquals(other.size(), set.size());
}
}
}
@Test
public void testHashCodeEquals() {
LongHashSet l0 = new LongHashSet();
assertEquals(0, l0.hashCode());
assertEquals(l0, new LongHashSet());
LongHashSet l1 = LongHashSet.from(key1, key2, key3);
LongHashSet l2 = LongHashSet.from(key1, key2);
l2.add(key3);
assertEquals(l1.hashCode(), l2.hashCode());
assertEquals(l1, l2);
}
@Test
public void testClone() {
this.set.addAll(key1, key2, key3);
LongHashSet cloned = set.clone();
cloned.remove(key1);
assertSortedListEquals(set.toArray(), key1, key2, key3);
assertSortedListEquals(cloned.toArray(), key2, key3);
}
@Test
public void testEqualsSameClass() {
LongHashSet l1 = LongHashSet.from(key1, key2, key3);
LongHashSet l2 = LongHashSet.from(key1, key2, key3);
LongHashSet l3 = LongHashSet.from(key1, key2, key4);
MatcherAssert.assertThat(l1, is(equalTo(l2)));
MatcherAssert.assertThat(l1.hashCode(), is(equalTo(l2.hashCode())));
MatcherAssert.assertThat(l1, is(not(equalTo(l3))));
}
@Test
public void testEqualsSubClass() {
class Sub extends LongHashSet {}
;
LongHashSet l1 = LongHashSet.from(key1, key2, key3);
LongHashSet l2 = new Sub();
LongHashSet l3 = new Sub();
l2.addAll(l1);
l3.addAll(l1);
MatcherAssert.assertThat(l2, is(equalTo(l3)));
MatcherAssert.assertThat(l1, is(not(equalTo(l2))));
}
private static int randomIntBetween(int min, int max) {
return min + random().nextInt(max + 1 - min);
}
private static Set<Long> set(long... elements) {
Set<Long> set = new HashSet<>();
for (long element : elements) {
set.add(element);
}
return set;
}
private static long[] asArray(long... elements) {
return elements;
}
/** Check if the array's content is identical to a given sequence of elements. */
private static void assertSortedListEquals(long[] array, long... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
assertArrayEquals(elements, array);
}
}

View File

@ -17,7 +17,9 @@
package org.apache.lucene.util.hppc;
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
@ -35,35 +37,28 @@ import org.junit.Test;
public class TestLongIntHashMap extends LuceneTestCase {
/* Ready to use key values. */
protected long keyE = 0;
protected long key0 = cast(0), k0 = key0;
protected long key1 = cast(1), k1 = key1;
protected long key2 = cast(2), k2 = key2;
protected long key3 = cast(3), k3 = key3;
protected long key4 = cast(4), k4 = key4;
protected long key5 = cast(5), k5 = key5;
protected long key6 = cast(6), k6 = key6;
protected long key7 = cast(7), k7 = key7;
protected long key8 = cast(8), k8 = key8;
protected long key9 = cast(9), k9 = key9;
private final long keyE = 0;
private final long key1 = cast(1);
private final long key2 = cast(2);
private final long key3 = cast(3);
private final long key4 = cast(4);
/** Convert to target type from an integer used to test stuff. */
public long cast(int v) {
private long cast(int v) {
return v;
}
/** Create a new array of a given type and copy the arguments to this array. */
/* */
public final long[] newArray(long... elements) {
private long[] newArray(long... elements) {
return elements;
}
public static int randomIntBetween(int min, int max) {
private static int randomIntBetween(int min, int max) {
return min + random().nextInt(max + 1 - min);
}
/** Check if the array's content is identical to a given sequence of elements. */
public static void assertSortedListEquals(long[] array, long... elements) {
private static void assertSortedListEquals(long[] array, long... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
Arrays.sort(elements);
@ -71,23 +66,23 @@ public class TestLongIntHashMap extends LuceneTestCase {
}
/** Check if the array's content is identical to a given sequence of elements. */
public static void assertSortedListEquals(int[] array, int... elements) {
private static void assertSortedListEquals(int[] array, int... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
Arrays.sort(elements);
assertArrayEquals(elements, array);
}
protected int value0 = vcast(0);
protected int value1 = vcast(1);
protected int value2 = vcast(2);
protected int value3 = vcast(3);
protected int value4 = vcast(4);
private final int value0 = vcast(0);
private final int value1 = vcast(1);
private final int value2 = vcast(2);
private final int value3 = vcast(3);
private final int value4 = vcast(4);
/** Per-test fresh initialized instance. */
public LongIntHashMap map = newInstance();
private LongIntHashMap map = newInstance();
protected LongIntHashMap newInstance() {
private LongIntHashMap newInstance() {
return new LongIntHashMap();
}
@ -109,13 +104,13 @@ public class TestLongIntHashMap extends LuceneTestCase {
}
/** Convert to target type from an integer used to test stuff. */
protected int vcast(int value) {
private int vcast(int value) {
return value;
}
/** Create a new array of a given type and copy the arguments to this array. */
/* */
protected final int[] newvArray(int... elements) {
private int[] newvArray(int... elements) {
return elements;
}
@ -188,7 +183,6 @@ public class TestLongIntHashMap extends LuceneTestCase {
AssertionError.class,
() -> {
map.indexGet(map.indexOf(key2));
fail();
});
assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3));
@ -350,6 +344,15 @@ public class TestLongIntHashMap extends LuceneTestCase {
map.remove(empty);
assertEquals(0, map.get(empty));
assertEquals(0, map.size());
assertEquals(0, map.put(empty, value1));
assertEquals(value1, map.put(empty, value2));
map.clear();
assertFalse(map.indexExists(map.indexOf(empty)));
assertEquals(0, map.put(empty, value1));
map.clear();
assertEquals(0, map.remove(empty));
}
/* */
@ -388,6 +391,11 @@ public class TestLongIntHashMap extends LuceneTestCase {
// These are internals, but perhaps worth asserting too.
assertEquals(0, map.assigned);
// Check values are cleared.
assertEquals(0, map.put(key1, value1));
assertEquals(0, map.remove(key2));
map.clear();
// Check if the map behaves properly upon subsequent use.
testPutWithExpansions();
}
@ -463,13 +471,13 @@ public class TestLongIntHashMap extends LuceneTestCase {
assertEquals(reallocationsBefore, reallocations.get());
// Should not expand because we're replacing an existing element.
map.put(k1, value2);
map.put(key1, value2);
assertEquals(reallocationsBefore, reallocations.get());
// Remove from a full map.
map.remove(k1);
map.remove(key1);
assertEquals(reallocationsBefore, reallocations.get());
map.put(k1, value2);
map.put(key1, value2);
// Check expand on "last slot of a full map" condition.
map.put(outOfSet, value1);
@ -507,6 +515,61 @@ public class TestLongIntHashMap extends LuceneTestCase {
assertFalse(l2.equals(l1));
}
/** Runs random insertions/deletions/clearing and compares the results against {@link HashMap}. */
@Test
@SuppressWarnings({"rawtypes", "unchecked"})
public void testAgainstHashMap() {
final Random rnd = RandomizedTest.getRandom();
final HashMap other = new HashMap();
for (int size = 1000; size < 20000; size += 4000) {
other.clear();
map.clear();
for (int round = 0; round < size * 20; round++) {
long key = cast(rnd.nextInt(size));
if (rnd.nextInt(50) == 0) {
key = 0;
}
int value = vcast(rnd.nextInt());
boolean hadOldValue = map.containsKey(key);
if (rnd.nextBoolean()) {
int previousValue;
if (rnd.nextBoolean()) {
int index = map.indexOf(key);
if (map.indexExists(index)) {
previousValue = map.indexReplace(index, value);
} else {
map.indexInsert(index, key, value);
previousValue = 0;
}
} else {
previousValue = map.put(key, value);
}
assertEquals(
other.put(key, value), ((previousValue) == 0) && !hadOldValue ? null : previousValue);
assertEquals(value, map.get(key));
assertEquals(value, map.indexGet(map.indexOf(key)));
assertTrue(map.containsKey(key));
assertTrue(map.indexExists(map.indexOf(key)));
} else {
assertEquals(other.containsKey(key), map.containsKey(key));
int previousValue =
map.containsKey(key) && rnd.nextBoolean()
? map.indexRemove(map.indexOf(key))
: map.remove(key);
assertEquals(
other.remove(key), ((previousValue) == 0) && !hadOldValue ? null : previousValue);
}
assertEquals(other.size(), map.size());
}
}
}
/*
*
*/
@ -557,16 +620,16 @@ public class TestLongIntHashMap extends LuceneTestCase {
@Test
public void testEqualsSameClass() {
LongIntHashMap l1 = newInstance();
l1.put(k1, value0);
l1.put(k2, value1);
l1.put(k3, value2);
l1.put(key1, value0);
l1.put(key2, value1);
l1.put(key3, value2);
LongIntHashMap l2 = new LongIntHashMap(l1);
l2.putAll(l1);
LongIntHashMap l3 = new LongIntHashMap(l2);
l3.putAll(l2);
l3.put(k4, value0);
l3.put(key4, value0);
assertEquals(l2, l1);
assertEquals(l2.hashCode(), l1.hashCode());
@ -579,13 +642,13 @@ public class TestLongIntHashMap extends LuceneTestCase {
class Sub extends LongIntHashMap {}
LongIntHashMap l1 = newInstance();
l1.put(k1, value0);
l1.put(k2, value1);
l1.put(k3, value2);
l1.put(key1, value0);
l1.put(key2, value1);
l1.put(key3, value2);
LongIntHashMap l2 = new Sub();
l2.putAll(l1);
l2.put(k4, value3);
l2.put(key4, value3);
LongIntHashMap l3 = new Sub();
l3.putAll(l2);

View File

@ -17,7 +17,9 @@
package org.apache.lucene.util.hppc;
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
@ -36,35 +38,28 @@ import org.junit.Test;
public class TestLongObjectHashMap extends LuceneTestCase {
/* Ready to use key values. */
protected long keyE = 0;
protected long key0 = cast(0), k0 = key0;
protected long key1 = cast(1), k1 = key1;
protected long key2 = cast(2), k2 = key2;
protected long key3 = cast(3), k3 = key3;
protected long key4 = cast(4), k4 = key4;
protected long key5 = cast(5), k5 = key5;
protected long key6 = cast(6), k6 = key6;
protected long key7 = cast(7), k7 = key7;
protected long key8 = cast(8), k8 = key8;
protected long key9 = cast(9), k9 = key9;
private final long keyE = 0;
private final long key1 = cast(1);
private final long key2 = cast(2);
private final long key3 = cast(3);
private final long key4 = cast(4);
/** Convert to target type from an integer used to test stuff. */
public long cast(int v) {
private long cast(int v) {
return v;
}
/** Create a new array of a given type and copy the arguments to this array. */
/* */
public final long[] newArray(long... elements) {
private long[] newArray(long... elements) {
return elements;
}
public static int randomIntBetween(int min, int max) {
private static int randomIntBetween(int min, int max) {
return min + random().nextInt(max + 1 - min);
}
/** Check if the array's content is identical to a given sequence of elements. */
public static void assertSortedListEquals(long[] array, long... elements) {
private static void assertSortedListEquals(long[] array, long... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
Arrays.sort(elements);
@ -72,22 +67,22 @@ public class TestLongObjectHashMap extends LuceneTestCase {
}
/** Check if the array's content is identical to a given sequence of elements. */
public static void assertSortedListEquals(Object[] array, Object... elements) {
private static void assertSortedListEquals(Object[] array, Object... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
assertArrayEquals(elements, array);
}
protected int value0 = vcast(0);
protected int value1 = vcast(1);
protected int value2 = vcast(2);
protected int value3 = vcast(3);
protected int value4 = vcast(4);
private final int value0 = vcast(0);
private final int value1 = vcast(1);
private final int value2 = vcast(2);
private final int value3 = vcast(3);
private final int value4 = vcast(4);
/** Per-test fresh initialized instance. */
public LongObjectHashMap<Object> map = newInstance();
private LongObjectHashMap<Object> map = newInstance();
protected LongObjectHashMap newInstance() {
private LongObjectHashMap newInstance() {
return new LongObjectHashMap();
}
@ -109,13 +104,13 @@ public class TestLongObjectHashMap extends LuceneTestCase {
}
/** Convert to target type from an integer used to test stuff. */
protected int vcast(int value) {
private int vcast(int value) {
return value;
}
/** Create a new array of a given type and copy the arguments to this array. */
/* */
protected final Object[] newvArray(Object... elements) {
private Object[] newvArray(Object... elements) {
return elements;
}
@ -189,7 +184,6 @@ public class TestLongObjectHashMap extends LuceneTestCase {
AssertionError.class,
() -> {
map.indexGet(map.indexOf(key2));
fail();
});
assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3));
@ -354,6 +348,7 @@ public class TestLongObjectHashMap extends LuceneTestCase {
map.remove(empty);
assertEquals(null, map.get(empty));
assertEquals(0, map.size());
map.put(empty, null);
assertEquals(1, map.size());
@ -364,6 +359,14 @@ public class TestLongObjectHashMap extends LuceneTestCase {
assertEquals(0, map.size());
assertFalse(map.containsKey(empty));
assertNull(map.get(empty));
assertEquals(null, map.put(empty, value1));
assertEquals(value1, map.put(empty, value2));
map.clear();
assertFalse(map.indexExists(map.indexOf(empty)));
assertEquals(null, map.put(empty, value1));
map.clear();
assertEquals(null, map.remove(empty));
}
/* */
@ -402,6 +405,11 @@ public class TestLongObjectHashMap extends LuceneTestCase {
// These are internals, but perhaps worth asserting too.
assertEquals(0, map.assigned);
// Check values are cleared.
assertEquals(null, map.put(key1, value1));
assertEquals(null, map.remove(key2));
map.clear();
// Check if the map behaves properly upon subsequent use.
testPutWithExpansions();
}
@ -477,13 +485,13 @@ public class TestLongObjectHashMap extends LuceneTestCase {
assertEquals(reallocationsBefore, reallocations.get());
// Should not expand because we're replacing an existing element.
map.put(k1, value2);
map.put(key1, value2);
assertEquals(reallocationsBefore, reallocations.get());
// Remove from a full map.
map.remove(k1);
map.remove(key1);
assertEquals(reallocationsBefore, reallocations.get());
map.put(k1, value2);
map.put(key1, value2);
// Check expand on "last slot of a full map" condition.
map.put(outOfSet, value1);
@ -521,6 +529,58 @@ public class TestLongObjectHashMap extends LuceneTestCase {
assertFalse(l2.equals(l1));
}
/** Runs random insertions/deletions/clearing and compares the results against {@link HashMap}. */
@Test
@SuppressWarnings({"rawtypes", "unchecked"})
public void testAgainstHashMap() {
final Random rnd = RandomizedTest.getRandom();
final HashMap other = new HashMap();
for (int size = 1000; size < 20000; size += 4000) {
other.clear();
map.clear();
for (int round = 0; round < size * 20; round++) {
long key = cast(rnd.nextInt(size));
if (rnd.nextInt(50) == 0) {
key = 0;
}
int value = vcast(rnd.nextInt());
if (rnd.nextBoolean()) {
Object previousValue;
if (rnd.nextBoolean()) {
int index = map.indexOf(key);
if (map.indexExists(index)) {
previousValue = map.indexReplace(index, value);
} else {
map.indexInsert(index, key, value);
previousValue = null;
}
} else {
previousValue = map.put(key, value);
}
assertEquals(other.put(key, value), previousValue);
assertEquals(value, map.get(key));
assertEquals(value, map.indexGet(map.indexOf(key)));
assertTrue(map.containsKey(key));
assertTrue(map.indexExists(map.indexOf(key)));
} else {
assertEquals(other.containsKey(key), map.containsKey(key));
Object previousValue =
map.containsKey(key) && rnd.nextBoolean()
? map.indexRemove(map.indexOf(key))
: map.remove(key);
assertEquals(other.remove(key), previousValue);
}
assertEquals(other.size(), map.size());
}
}
}
/*
*
*/
@ -571,16 +631,16 @@ public class TestLongObjectHashMap extends LuceneTestCase {
@Test
public void testEqualsSameClass() {
LongObjectHashMap l1 = newInstance();
l1.put(k1, value0);
l1.put(k2, value1);
l1.put(k3, value2);
l1.put(key1, value0);
l1.put(key2, value1);
l1.put(key3, value2);
LongObjectHashMap l2 = new LongObjectHashMap(l1);
l2.putAll(l1);
LongObjectHashMap l3 = new LongObjectHashMap(l2);
l3.putAll(l2);
l3.put(k4, value0);
l3.put(key4, value0);
assertEquals(l2, l1);
assertEquals(l2.hashCode(), l1.hashCode());
@ -593,13 +653,13 @@ public class TestLongObjectHashMap extends LuceneTestCase {
class Sub extends LongObjectHashMap {}
LongObjectHashMap l1 = newInstance();
l1.put(k1, value0);
l1.put(k2, value1);
l1.put(k3, value2);
l1.put(key1, value0);
l1.put(key2, value1);
l1.put(key3, value2);
LongObjectHashMap l2 = new Sub();
l2.putAll(l1);
l2.put(k4, value3);
l2.put(key4, value3);
LongObjectHashMap l3 = new Sub();
l3.putAll(l2);

View File

@ -18,12 +18,10 @@ package org.apache.lucene.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
@ -33,6 +31,8 @@ import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.hppc.IntCursor;
import org.apache.lucene.util.hppc.IntHashSet;
/**
* A {@link Query} for drill-down over facet categories. You should call {@link #add(String,
@ -56,7 +56,7 @@ public final class DrillDownQuery extends Query {
private final List<BooleanQuery.Builder> dimQueries = new ArrayList<>();
private final Map<String, Integer> drillDownDims = new LinkedHashMap<>();
private final List<Query> builtDimQueries = new ArrayList<>();
private final Set<Integer> dirtyDimQueryIndex = new HashSet<>();
private final IntHashSet dirtyDimQueryIndex = new IntHashSet();
/** Used by clone() and DrillSideways */
DrillDownQuery(
@ -202,8 +202,8 @@ public final class DrillDownQuery extends Query {
* @return The array of dimQueries
*/
public Query[] getDrillDownQueries() {
for (Integer dirtyDimIndex : dirtyDimQueryIndex) {
builtDimQueries.set(dirtyDimIndex, this.dimQueries.get(dirtyDimIndex).build());
for (IntCursor dirtyDimIndex : dirtyDimQueryIndex) {
builtDimQueries.set(dirtyDimIndex.value, this.dimQueries.get(dirtyDimIndex.value).build());
}
dirtyDimQueryIndex.clear();

View File

@ -1,9 +1,9 @@
{
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java": "7a8a8fd5b2ea78f9a17f54cbae8b0e4496e8988e",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java": "49bf5362c3f41a1f398284f05eede08fceec6d78",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj": "c9584bbe50c3c7479f72ea84145ebbf034a201ea",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java": "380daae0f6e27b3872d117fc4aef955b1e4296ca",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj": "968452b64449655b035fffb45944086c3032732b",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserConstants.java": "e59a3fd38b66a3d56779c55955c1e014225a1f50",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java": "56d191d6f3033dd554efcb38f536b5f7df2f1e06",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java": "dc99a1083bfa50e429d40e114fabe7dd5d434693",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java": "310665ba37d982327fcb55cc3523d629ef29ef54",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java": "7e2dd6ab7489048bb70f3077ca9fed90f925ec33"
}

View File

@ -4,11 +4,8 @@ package org.apache.lucene.queryparser.classic;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
@ -17,6 +14,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
import org.apache.lucene.util.hppc.IntHashSet;
/**
* This class is generated by JavaCC. The most important method is
@ -143,8 +141,8 @@ import org.apache.lucene.queryparser.charstream.FastCharStream;
}
private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
private static Set<Integer> disallowedPostMultiTerm
= new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
private static IntHashSet disallowedPostMultiTerm
= IntHashSet.from(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR);
private static boolean allowedPostMultiTerm(int tokenKind) {
return disallowedPostMultiTerm.contains(tokenKind) == false;
}
@ -708,19 +706,35 @@ if (splitOnWhitespace == false) {
finally { jj_save(2, xla); }
}
private boolean jj_3R_MultiTerm_391_3_6()
private boolean jj_3R_MultiTerm_381_3_3()
{
if (jj_scan_token(TERM)) return true;
jj_lookingAhead = true;
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
jj_lookingAhead = false;
if (!jj_semLA || jj_3R_MultiTerm_389_3_6()) return true;
Token xsp;
if (jj_3R_MultiTerm_391_5_7()) return true;
while (true) {
xsp = jj_scanpos;
if (jj_3R_MultiTerm_391_5_7()) { jj_scanpos = xsp; break; }
}
return false;
}
private boolean jj_3R_MultiTerm_389_3_6()
{
return false;
}
private boolean jj_3R_Clause_308_9_5()
private boolean jj_3R_Clause_306_9_5()
{
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
private boolean jj_3R_Clause_307_7_4()
private boolean jj_3R_Clause_305_7_4()
{
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
@ -729,17 +743,17 @@ if (splitOnWhitespace == false) {
private boolean jj_3_2()
{
if (jj_3R_MultiTerm_383_3_3()) return true;
if (jj_3R_MultiTerm_381_3_3()) return true;
return false;
}
private boolean jj_3_1()
{
if (jj_3R_MultiTerm_383_3_3()) return true;
if (jj_3R_MultiTerm_381_3_3()) return true;
return false;
}
private boolean jj_3R_MultiTerm_393_5_7()
private boolean jj_3R_MultiTerm_391_5_7()
{
if (jj_scan_token(TERM)) return true;
return false;
@ -749,25 +763,9 @@ if (splitOnWhitespace == false) {
{
Token xsp;
xsp = jj_scanpos;
if (jj_3R_Clause_307_7_4()) {
if (jj_3R_Clause_305_7_4()) {
jj_scanpos = xsp;
if (jj_3R_Clause_308_9_5()) return true;
}
return false;
}
private boolean jj_3R_MultiTerm_383_3_3()
{
if (jj_scan_token(TERM)) return true;
jj_lookingAhead = true;
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
jj_lookingAhead = false;
if (!jj_semLA || jj_3R_MultiTerm_391_3_6()) return true;
Token xsp;
if (jj_3R_MultiTerm_393_5_7()) return true;
while (true) {
xsp = jj_scanpos;
if (jj_3R_MultiTerm_393_5_7()) { jj_scanpos = xsp; break; }
if (jj_3R_Clause_306_9_5()) return true;
}
return false;
}

View File

@ -27,11 +27,8 @@ package org.apache.lucene.queryparser.classic;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
@ -40,6 +37,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
import org.apache.lucene.util.hppc.IntHashSet;
/**
* This class is generated by JavaCC. The most important method is
@ -166,8 +164,8 @@ public class QueryParser extends QueryParserBase {
}
private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
private static Set<Integer> disallowedPostMultiTerm
= new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
private static IntHashSet disallowedPostMultiTerm
= IntHashSet.from(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR);
private static boolean allowedPostMultiTerm(int tokenKind) {
return disallowedPostMultiTerm.contains(tokenKind) == false;
}

View File

@ -15,8 +15,6 @@ package org.apache.lucene.queryparser.classic;
/** Token Manager. */
@SuppressWarnings ("unused")
public class QueryParserTokenManager implements QueryParserConstants {

View File

@ -17,10 +17,9 @@
package org.apache.lucene.search.suggest.document;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryVisitor;
@ -37,6 +36,7 @@ import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.hppc.IntHashSet;
/**
* A {@link CompletionQuery} that matches documents specified by a wrapped {@link CompletionQuery}
@ -200,21 +200,29 @@ public class ContextQuery extends CompletionQuery implements Accountable {
Operations.determinize(contextsAutomaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
final Map<IntsRef, Float> contextMap = CollectionUtil.newHashMap(contexts.size());
final TreeSet<Integer> contextLengths = new TreeSet<>();
final IntHashSet contextLengths = new IntHashSet();
for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
ContextMetaData contextMetaData = entry.getValue();
contextMap.put(entry.getKey(), contextMetaData.boost);
contextLengths.add(entry.getKey().length);
}
int[] contextLengthArray = new int[contextLengths.size()];
final Iterator<Integer> iterator = contextLengths.descendingIterator();
for (int i = 0; iterator.hasNext(); i++) {
contextLengthArray[i] = iterator.next();
}
int[] contextLengthArray = contextLengths.toArray();
sortDescending(contextLengthArray);
return new ContextCompletionWeight(
this, contextsAutomaton, innerWeight, contextMap, contextLengthArray);
}
/** Sorts and reverses the array. */
private static void sortDescending(int[] array) {
Arrays.sort(array);
for (int i = 0, midLength = array.length / 2, last = array.length - 1; i < midLength; i++) {
int swapIndex = last - i;
int tmp = array[i];
array[i] = array[swapIndex];
array[swapIndex] = tmp;
}
}
private static Automaton toContextAutomaton(
final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());