diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 725e64976e7..db5a4ea2677 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -350,6 +350,8 @@ Optimizations * GITHUB#13339: Add a MemorySegment Vector scorer - for scoring without copying on-heap (Chris Hegarty) +* GITHUB#13400: Replace Set by IntHashSet and Set by LongHashSet. (Bruno Roustant) + Bug Fixes --------------------- diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java index 7d08424c27a..29ab08e895b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java @@ -22,7 +22,6 @@ import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.HashSet; import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -39,6 +38,7 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FSTCompiler; import org.apache.lucene.util.fst.Util; +import org.apache.lucene.util.hppc.IntHashSet; /** * A map of synonyms, keys and values are phrases. @@ -228,10 +228,10 @@ public class SynonymMap { BytesRefBuilder scratch = new BytesRefBuilder(); ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput(); - final Set dedupSet; + final IntHashSet dedupSet; if (dedup) { - dedupSet = new HashSet<>(); + dedupSet = new IntHashSet(); } else { dedupSet = null; } @@ -260,8 +260,7 @@ public class SynonymMap { int count = 0; for (int i = 0; i < numEntries; i++) { if (dedupSet != null) { - // box once - final Integer ent = output.ords.get(i); + int ent = output.ords.get(i); if (dedupSet.contains(ent)) { continue; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java index 7c6a1b0a584..57caf34d1a6 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java @@ -22,8 +22,6 @@ import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_ import java.io.IOException; import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; @@ -52,6 +50,7 @@ import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.MathUtil; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.compress.LZ4; +import org.apache.lucene.util.hppc.LongHashSet; import org.apache.lucene.util.hppc.LongIntHashMap; import org.apache.lucene.util.packed.DirectMonotonicWriter; import org.apache.lucene.util.packed.DirectWriter; @@ -198,7 +197,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer { MinMaxTracker minMax = new MinMaxTracker(); MinMaxTracker blockMinMax = new MinMaxTracker(); long gcd = 0; - Set uniqueValues = ords ? null : new HashSet<>(); + LongHashSet uniqueValues = ords ? null : new LongHashSet(); for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { for (int i = 0, count = values.docValueCount(); i < count; ++i) { long v = values.nextValue(); @@ -282,10 +281,10 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer { && DirectWriter.unsignedBitsRequired(uniqueValues.size() - 1) < DirectWriter.unsignedBitsRequired((max - min) / gcd)) { numBitsPerValue = DirectWriter.unsignedBitsRequired(uniqueValues.size() - 1); - final Long[] sortedUniqueValues = uniqueValues.toArray(new Long[0]); + final long[] sortedUniqueValues = uniqueValues.toArray(); Arrays.sort(sortedUniqueValues); meta.writeInt(sortedUniqueValues.length); // tablesize - for (Long v : sortedUniqueValues) { + for (long v : sortedUniqueValues) { meta.writeLong(v); // table[] entry } encode = new LongIntHashMap(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java index 925aa68ca1a..7a37e8c9300 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java @@ -26,8 +26,6 @@ import java.util.Collection; import java.util.Deque; import java.util.Iterator; import java.util.List; -import java.util.SortedSet; -import java.util.TreeSet; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.codecs.TermVectorsWriter; @@ -53,6 +51,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.hppc.IntHashSet; import org.apache.lucene.util.packed.BlockPackedWriter; import org.apache.lucene.util.packed.DirectWriter; import org.apache.lucene.util.packed.PackedInts; @@ -454,16 +453,18 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite /** Returns a sorted array containing unique field numbers */ private int[] flushFieldNums() throws IOException { - SortedSet fieldNums = new TreeSet<>(); + IntHashSet fieldNumsSet = new IntHashSet(); for (DocData dd : pendingDocs) { for (FieldData fd : dd.fields) { - fieldNums.add(fd.fieldNum); + fieldNumsSet.add(fd.fieldNum); } } + int[] fieldNums = fieldNumsSet.toArray(); + Arrays.sort(fieldNums); - final int numDistinctFields = fieldNums.size(); + final int numDistinctFields = fieldNums.length; assert numDistinctFields > 0; - final int bitsRequired = PackedInts.bitsRequired(fieldNums.last()); + final int bitsRequired = PackedInts.bitsRequired(fieldNums[numDistinctFields - 1]); final int token = (Math.min(numDistinctFields - 1, 0x07) << 5) | bitsRequired; vectorsStream.writeByte((byte) token); if (numDistinctFields - 1 >= 0x07) { @@ -471,18 +472,13 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite } final PackedInts.Writer writer = PackedInts.getWriterNoHeader( - vectorsStream, PackedInts.Format.PACKED, fieldNums.size(), bitsRequired, 1); + vectorsStream, PackedInts.Format.PACKED, numDistinctFields, bitsRequired, 1); for (Integer fieldNum : fieldNums) { writer.add(fieldNum); } writer.finish(); - int[] fns = new int[fieldNums.size()]; - int i = 0; - for (Integer key : fieldNums) { - fns[i++] = key; - } - return fns; + return fieldNums; } private void flushFields(int totalFields, int[] fieldNums) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/document/LongHashSet.java b/lucene/core/src/java/org/apache/lucene/document/DocValuesLongHashSet.java similarity index 93% rename from lucene/core/src/java/org/apache/lucene/document/LongHashSet.java rename to lucene/core/src/java/org/apache/lucene/document/DocValuesLongHashSet.java index 5234193b047..7daa0e2c6d1 100644 --- a/lucene/core/src/java/org/apache/lucene/document/LongHashSet.java +++ b/lucene/core/src/java/org/apache/lucene/document/DocValuesLongHashSet.java @@ -25,9 +25,9 @@ import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.PackedInts; /** Set of longs, optimized for docvalues usage */ -final class LongHashSet implements Accountable { +final class DocValuesLongHashSet implements Accountable { private static final long BASE_RAM_BYTES = - RamUsageEstimator.shallowSizeOfInstance(LongHashSet.class); + RamUsageEstimator.shallowSizeOfInstance(DocValuesLongHashSet.class); private static final long MISSING = Long.MIN_VALUE; @@ -43,7 +43,7 @@ final class LongHashSet implements Accountable { final long maxValue; /** Construct a set. Values must be in sorted order. */ - LongHashSet(long[] values) { + DocValuesLongHashSet(long[] values) { int tableSize = Math.toIntExact(values.length * 3L / 2); tableSize = 1 << PackedInts.bitsRequired(tableSize); // make it a power of 2 assert tableSize >= values.length * 3L / 2; @@ -119,8 +119,8 @@ final class LongHashSet implements Accountable { @Override public boolean equals(Object obj) { - if (obj != null && obj instanceof LongHashSet) { - LongHashSet that = (LongHashSet) obj; + if (obj != null && obj instanceof DocValuesLongHashSet) { + DocValuesLongHashSet that = (DocValuesLongHashSet) obj; return size == that.size && minValue == that.minValue && maxValue == that.maxValue diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesSetQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesSetQuery.java index ba76e6e01d5..b5596a67221 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesSetQuery.java @@ -42,12 +42,12 @@ final class SortedNumericDocValuesSetQuery extends Query implements Accountable RamUsageEstimator.shallowSizeOfInstance(SortedNumericDocValuesSetQuery.class); private final String field; - private final LongHashSet numbers; + private final DocValuesLongHashSet numbers; SortedNumericDocValuesSetQuery(String field, long[] numbers) { this.field = Objects.requireNonNull(field); Arrays.sort(numbers); - this.numbers = new LongHashSet(numbers); + this.numbers = new DocValuesLongHashSet(numbers); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java b/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java index 06c54d30b0e..78f862514c6 100644 --- a/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java +++ b/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java @@ -32,6 +32,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOConsumer; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.hppc.LongHashSet; /** * Tracks the stream of {@link FrozenBufferedUpdates}. When DocumentsWriterPerThread flushes, its @@ -323,7 +324,7 @@ final class BufferedUpdatesStream implements Accountable { * This lets us track the "holes" in the current frontier of applying del gens; once the holes * are filled in we can advance completedDelGen. */ - private final Set finishedDelGens = new HashSet<>(); + private final LongHashSet finishedDelGens = new LongHashSet(); private final InfoStream infoStream; diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseMatcher.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseMatcher.java index 68e43642707..863035da17b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseMatcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseMatcher.java @@ -31,6 +31,7 @@ import org.apache.lucene.index.ImpactsSource; import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.hppc.IntHashSet; /** * Find all slop-valid position-combinations (matches) encountered while traversing/hopping the @@ -556,8 +557,8 @@ public final class SloppyPhraseMatcher extends PhraseMatcher { ArrayList bb = ppTermsBitSets(rpp, rptTerms); unionTermGroups(bb); HashMap tg = termGroups(rptTerms, bb); - HashSet distinctGroupIDs = new HashSet<>(tg.values()); - for (int i = 0; i < distinctGroupIDs.size(); i++) { + int numDistinctGroupIds = new IntHashSet(tg.values()).size(); + for (int i = 0; i < numDistinctGroupIds; i++) { tmp.add(new HashSet<>()); } for (PhrasePositions pp : rpp) { diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java index e26db82901c..0b3c444ab15 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java @@ -18,14 +18,13 @@ package org.apache.lucene.util.automaton; import java.util.Arrays; import java.util.BitSet; -import java.util.HashSet; import java.util.Objects; -import java.util.Set; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.InPlaceMergeSorter; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.Sorter; +import org.apache.lucene.util.hppc.IntHashSet; // TODO // - could use packed int arrays instead @@ -618,7 +617,7 @@ public class Automaton implements Accountable, TransitionAccessor { /** Returns sorted array of all interval start points. */ public int[] getStartPoints() { - Set pointset = new HashSet<>(); + IntHashSet pointset = new IntHashSet(); pointset.add(Character.MIN_CODE_POINT); // System.out.println("getStartPoints"); for (int s = 0; s < nextState; s += 2) { @@ -636,11 +635,7 @@ public class Automaton implements Accountable, TransitionAccessor { trans += 3; } } - int[] points = new int[pointset.size()]; - int n = 0; - for (Integer m : pointset) { - points[n++] = m; - } + int[] points = pointset.toArray(); Arrays.sort(points); return points; } diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java b/lucene/core/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java index 83c51cf56d5..b59bb80e8c5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java @@ -16,10 +16,9 @@ */ package org.apache.lucene.util.automaton; -import java.util.Iterator; -import java.util.SortedSet; -import java.util.TreeSet; +import java.util.Arrays; import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.util.hppc.IntHashSet; /** * Class to construct DFAs that match a word within some edit distance. @@ -68,7 +67,7 @@ public class LevenshteinAutomata { this.alphaMax = alphaMax; // calculate the alphabet - SortedSet set = new TreeSet<>(); + IntHashSet set = new IntHashSet(); for (int i = 0; i < word.length; i++) { int v = word[i]; if (v > alphaMax) { @@ -76,9 +75,8 @@ public class LevenshteinAutomata { } set.add(v); } - alphabet = new int[set.size()]; - Iterator iterator = set.iterator(); - for (int i = 0; i < alphabet.length; i++) alphabet[i] = iterator.next(); + alphabet = set.toArray(); + Arrays.sort(alphabet); rangeLower = new int[alphabet.length + 2]; rangeUpper = new int[alphabet.length + 2]; diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java index 9798934c22c..e8a9ad21a15 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java @@ -48,6 +48,8 @@ import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.hppc.BitMixer; +import org.apache.lucene.util.hppc.IntCursor; +import org.apache.lucene.util.hppc.IntHashSet; import org.apache.lucene.util.hppc.IntObjectHashMap; /** @@ -249,14 +251,14 @@ public final class Operations { b = concatenate(as); } - Set prevAcceptStates = toSet(b, 0); + IntHashSet prevAcceptStates = toSet(b, 0); Automaton.Builder builder = new Automaton.Builder(); builder.copy(b); for (int i = min; i < max; i++) { int numStates = builder.getNumStates(); builder.copy(a); - for (int s : prevAcceptStates) { - builder.addEpsilon(s, numStates); + for (IntCursor s : prevAcceptStates) { + builder.addEpsilon(s.value, numStates); } prevAcceptStates = toSet(a, numStates); } @@ -264,16 +266,15 @@ public final class Operations { return builder.finish(); } - private static Set toSet(Automaton a, int offset) { + private static IntHashSet toSet(Automaton a, int offset) { int numStates = a.getNumStates(); BitSet isAccept = a.getAcceptStates(); - Set result = new HashSet(); + IntHashSet result = new IntHashSet(); int upto = 0; while (upto < numStates && (upto = isAccept.nextSetBit(upto)) != -1) { result.add(offset + upto); upto++; } - return result; } @@ -1130,7 +1131,7 @@ public final class Operations { throw new IllegalArgumentException("input automaton must be deterministic"); } IntsRefBuilder builder = new IntsRefBuilder(); - HashSet visited = new HashSet<>(); + IntHashSet visited = new IntHashSet(); int s = 0; Transition t = new Transition(); while (true) { diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/HashContainers.java b/lucene/core/src/java/org/apache/lucene/util/hppc/HashContainers.java index f54abd34e73..08fef9f9606 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hppc/HashContainers.java +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/HashContainers.java @@ -17,6 +17,8 @@ package org.apache.lucene.util.hppc; +import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo; + import java.util.concurrent.atomic.AtomicInteger; /** Constants for primitive maps. */ @@ -42,4 +44,62 @@ public class HashContainers { public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1; static final AtomicInteger ITERATION_SEED = new AtomicInteger(); + + static int iterationIncrement(int seed) { + return 29 + ((seed & 7) << 1); // Small odd integer. + } + + static int nextBufferSize(int arraySize, int elements, double loadFactor) { + assert checkPowerOfTwo(arraySize); + if (arraySize == MAX_HASH_ARRAY_LENGTH) { + throw new BufferAllocationException( + "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", + elements, loadFactor); + } + + return arraySize << 1; + } + + static int expandAtCount(int arraySize, double loadFactor) { + assert checkPowerOfTwo(arraySize); + // Take care of hash container invariant (there has to be at least one empty slot to ensure + // the lookup loop finds either the element or an empty slot). + return Math.min(arraySize - 1, (int) Math.ceil(arraySize * loadFactor)); + } + + static boolean checkPowerOfTwo(int arraySize) { + // These are internals, we can just assert without retrying. + assert arraySize > 1; + assert nextHighestPowerOfTwo(arraySize) == arraySize; + return true; + } + + static int minBufferSize(int elements, double loadFactor) { + if (elements < 0) { + throw new IllegalArgumentException("Number of elements must be >= 0: " + elements); + } + + long length = (long) Math.ceil(elements / loadFactor); + if (length == elements) { + length++; + } + length = Math.max(MIN_HASH_ARRAY_LENGTH, nextHighestPowerOfTwo(length)); + + if (length > MAX_HASH_ARRAY_LENGTH) { + throw new BufferAllocationException( + "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", + elements, loadFactor); + } + + return (int) length; + } + + static void checkLoadFactor( + double loadFactor, double minAllowedInclusive, double maxAllowedInclusive) { + if (loadFactor < minAllowedInclusive || loadFactor > maxAllowedInclusive) { + throw new BufferAllocationException( + "The load factor should be in range [%.2f, %.2f]: %f", + minAllowedInclusive, maxAllowedInclusive, loadFactor); + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/IntHashSet.java b/lucene/core/src/java/org/apache/lucene/util/hppc/IntHashSet.java new file mode 100644 index 00000000000..c2d72930a93 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/IntHashSet.java @@ -0,0 +1,688 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util.hppc; + +import static org.apache.lucene.util.hppc.HashContainers.*; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; + +/** + * A hash set of ints, implemented using open addressing with linear probing for + * collision resolution. + * + *

Mostly forked and trimmed from com.carrotsearch.hppc.IntHashSet + * + *

github: https://github.com/carrotsearch/hppc release 0.9.0 + */ +public class IntHashSet implements Iterable, Accountable, Cloneable { + + private static final long BASE_RAM_BYTES_USED = + RamUsageEstimator.shallowSizeOfInstance(IntHashSet.class); + + /** The hash array holding keys. */ + public int[] keys; + + /** + * The number of stored keys (assigned key slots), excluding the special "empty" key, if any. + * + * @see #size() + * @see #hasEmptyKey + */ + protected int assigned; + + /** Mask for slot scans in {@link #keys}. */ + protected int mask; + + /** Expand (rehash) {@link #keys} when {@link #assigned} hits this value. */ + protected int resizeAt; + + /** Special treatment for the "empty slot" key marker. */ + protected boolean hasEmptyKey; + + /** The load factor for {@link #keys}. */ + protected double loadFactor; + + /** Seed used to ensure the hash iteration order is different from an iteration to another. */ + protected int iterationSeed; + + /** New instance with sane defaults. */ + public IntHashSet() { + this(DEFAULT_EXPECTED_ELEMENTS); + } + + /** + * New instance with sane defaults. + * + * @param expectedElements The expected number of elements guaranteed not to cause a rehash + * (inclusive). + */ + public IntHashSet(int expectedElements) { + this(expectedElements, DEFAULT_LOAD_FACTOR); + } + + /** + * New instance with the provided defaults. + * + * @param expectedElements The expected number of elements guaranteed not to cause a rehash + * (inclusive). + * @param loadFactor The load factor for internal buffers. Insane load factors (zero, full + * capacity) are rejected by {@link #verifyLoadFactor(double)}. + */ + public IntHashSet(int expectedElements, double loadFactor) { + this.loadFactor = verifyLoadFactor(loadFactor); + iterationSeed = ITERATION_SEED.incrementAndGet(); + ensureCapacity(expectedElements); + } + + /** New instance copying elements from another set. */ + public IntHashSet(IntHashSet set) { + this(set.size()); + addAll(set); + } + + /** New instance copying elements from another collection. */ + public IntHashSet(Collection collection) { + this(collection.size()); + addAll(collection); + } + + public boolean add(int key) { + if (((key) == 0)) { + assert ((keys[mask + 1]) == 0); + boolean added = !hasEmptyKey; + hasEmptyKey = true; + return added; + } else { + final int[] keys = this.keys; + final int mask = this.mask; + int slot = hashKey(key) & mask; + + int existing; + while (!((existing = keys[slot]) == 0)) { + if (((key) == (existing))) { + return false; + } + slot = (slot + 1) & mask; + } + + if (assigned == resizeAt) { + allocateThenInsertThenRehash(slot, key); + } else { + keys[slot] = key; + } + + assigned++; + return true; + } + } + + /** + * Adds all elements from the given list (vararg) to this set. + * + * @return Returns the number of elements actually added as a result of this call (not previously + * present in the set). + */ + public final int addAll(int... elements) { + ensureCapacity(elements.length); + int count = 0; + for (int e : elements) { + if (add(e)) { + count++; + } + } + return count; + } + + /** + * Adds all elements from the given iterable to this set. + * + * @return Returns the number of elements actually added as a result of this call (not previously + * present in the set). + */ + public int addAll(Iterable iterable) { + int count = 0; + for (IntCursor cursor : iterable) { + if (add(cursor.value)) { + count++; + } + } + return count; + } + + public int addAll(Collection collection) { + int count = 0; + for (int element : collection) { + if (add(element)) { + count++; + } + } + return count; + } + + public int[] toArray() { + + final int[] cloned = (new int[size()]); + int j = 0; + if (hasEmptyKey) { + cloned[j++] = 0; + } + + final int[] keys = this.keys; + int seed = nextIterationSeed(); + int inc = iterationIncrement(seed); + for (int i = 0, mask = this.mask, slot = seed & mask; + i <= mask; + i++, slot = (slot + inc) & mask) { + int existing; + if (!((existing = keys[slot]) == 0)) { + cloned[j++] = existing; + } + } + + return cloned; + } + + /** An alias for the (preferred) {@link #removeAll}. */ + public boolean remove(int key) { + if (((key) == 0)) { + boolean hadEmptyKey = hasEmptyKey; + hasEmptyKey = false; + return hadEmptyKey; + } else { + final int[] keys = this.keys; + final int mask = this.mask; + int slot = hashKey(key) & mask; + + int existing; + while (!((existing = keys[slot]) == 0)) { + if (((key) == (existing))) { + shiftConflictingKeys(slot); + return true; + } + slot = (slot + 1) & mask; + } + return false; + } + } + + /** + * Removes all keys present in a given container. + * + * @return Returns the number of elements actually removed as a result of this call. + */ + public int removeAll(IntHashSet other) { + final int before = size(); + + // Try to iterate over the smaller set or over the container that isn't implementing + // efficient contains() lookup. + + if (other.size() >= size()) { + if (hasEmptyKey && other.contains(0)) { + hasEmptyKey = false; + } + + final int[] keys = this.keys; + for (int slot = 0, max = this.mask; slot <= max; ) { + int existing; + if (!((existing = keys[slot]) == 0) && other.contains(existing)) { + // Shift, do not increment slot. + shiftConflictingKeys(slot); + } else { + slot++; + } + } + } else { + for (IntCursor c : other) { + remove(c.value); + } + } + + return before - size(); + } + + public boolean contains(int key) { + if (((key) == 0)) { + return hasEmptyKey; + } else { + final int[] keys = this.keys; + final int mask = this.mask; + int slot = hashKey(key) & mask; + int existing; + while (!((existing = keys[slot]) == 0)) { + if (((key) == (existing))) { + return true; + } + slot = (slot + 1) & mask; + } + return false; + } + } + + public void clear() { + assigned = 0; + hasEmptyKey = false; + Arrays.fill(keys, 0); + } + + public void release() { + assigned = 0; + hasEmptyKey = false; + keys = null; + ensureCapacity(DEFAULT_EXPECTED_ELEMENTS); + } + + public boolean isEmpty() { + return size() == 0; + } + + /** + * Ensure this container can hold at least the given number of elements without resizing its + * buffers. + * + * @param expectedElements The total number of elements, inclusive. + */ + public void ensureCapacity(int expectedElements) { + if (expectedElements > resizeAt || keys == null) { + final int[] prevKeys = this.keys; + allocateBuffers(minBufferSize(expectedElements, loadFactor)); + if (prevKeys != null && !isEmpty()) { + rehash(prevKeys); + } + } + } + + public int size() { + return assigned + (hasEmptyKey ? 1 : 0); + } + + @Override + public int hashCode() { + int h = hasEmptyKey ? 0xDEADBEEF : 0; + final int[] keys = this.keys; + for (int slot = mask; slot >= 0; slot--) { + int existing; + if (!((existing = keys[slot]) == 0)) { + h += BitMixer.mix(existing); + } + } + return h; + } + + @Override + public boolean equals(Object obj) { + return (this == obj) + || (obj != null && getClass() == obj.getClass() && sameKeys(getClass().cast(obj))); + } + + /** Return true if all keys of some other container exist in this container. */ + private boolean sameKeys(IntHashSet other) { + if (other.size() != size()) { + return false; + } + + for (IntCursor c : other) { + if (!contains(c.value)) { + return false; + } + } + + return true; + } + + @Override + public IntHashSet clone() { + try { + /* */ + IntHashSet cloned = (IntHashSet) super.clone(); + cloned.keys = keys.clone(); + cloned.hasEmptyKey = hasEmptyKey; + cloned.iterationSeed = ITERATION_SEED.incrementAndGet(); + return cloned; + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + @Override + public Iterator iterator() { + return new EntryIterator(); + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys); + } + + /** + * Provides the next iteration seed used to build the iteration starting slot and offset + * increment. This method does not need to be synchronized, what matters is that each thread gets + * a sequence of varying seeds. + */ + protected int nextIterationSeed() { + return iterationSeed = BitMixer.mixPhi(iterationSeed); + } + + /** An iterator implementation for {@link #iterator}. */ + protected final class EntryIterator extends AbstractIterator { + private final IntCursor cursor; + private final int increment; + private int index; + private int slot; + + public EntryIterator() { + cursor = new IntCursor(); + int seed = nextIterationSeed(); + increment = iterationIncrement(seed); + slot = seed & mask; + } + + @Override + protected IntCursor fetch() { + final int mask = IntHashSet.this.mask; + while (index <= mask) { + int existing; + index++; + slot = (slot + increment) & mask; + if (!((existing = keys[slot]) == 0)) { + cursor.index = slot; + cursor.value = existing; + return cursor; + } + } + + if (index == mask + 1 && hasEmptyKey) { + cursor.index = index++; + cursor.value = 0; + return cursor; + } + + return done(); + } + } + + /** + * Create a set from a variable number of arguments or an array of int. The elements + * are copied from the argument to the internal buffer. + */ + /* */ + public static IntHashSet from(int... elements) { + final IntHashSet set = new IntHashSet(elements.length); + set.addAll(elements); + return set; + } + + /** + * Returns a hash code for the given key. + * + *

The output from this function should evenly distribute keys across the entire integer range. + */ + protected int hashKey(int key) { + assert !((key) == 0); // Handled as a special case (empty slot marker). + return BitMixer.mixPhi(key); + } + + /** + * Returns a logical "index" of a given key that can be used to speed up follow-up logic in + * certain scenarios (conditional logic). + * + *

The semantics of "indexes" are not strictly defined. Indexes may (and typically won't be) + * contiguous. + * + *

The index is valid only between modifications (it will not be affected by read-only + * operations). + * + * @see #indexExists + * @see #indexGet + * @see #indexInsert + * @see #indexReplace + * @param key The key to locate in the set. + * @return A non-negative value of the logical "index" of the key in the set or a negative value + * if the key did not exist. + */ + public int indexOf(int key) { + final int mask = this.mask; + if (((key) == 0)) { + return hasEmptyKey ? mask + 1 : ~(mask + 1); + } else { + final int[] keys = this.keys; + int slot = hashKey(key) & mask; + + int existing; + while (!((existing = keys[slot]) == 0)) { + if (((key) == (existing))) { + return slot; + } + slot = (slot + 1) & mask; + } + + return ~slot; + } + } + + /** + * @see #indexOf + * @param index The index of a given key, as returned from {@link #indexOf}. + * @return Returns true if the index corresponds to an existing key or false + * otherwise. This is equivalent to checking whether the index is a positive value (existing + * keys) or a negative value (non-existing keys). + */ + public boolean indexExists(int index) { + assert index < 0 || index <= mask || (index == mask + 1 && hasEmptyKey); + + return index >= 0; + } + + /** + * Returns the exact value of the existing key. This method makes sense for sets of objects which + * define custom key-equality relationship. + * + * @see #indexOf + * @param index The index of an existing key. + * @return Returns the equivalent key currently stored in the set. + * @throws AssertionError If assertions are enabled and the index does not correspond to an + * existing key. + */ + public int indexGet(int index) { + assert index >= 0 : "The index must point at an existing key."; + assert index <= mask || (index == mask + 1 && hasEmptyKey); + + return keys[index]; + } + + /** + * Replaces the existing equivalent key with the given one and returns any previous value stored + * for that key. + * + * @see #indexOf + * @param index The index of an existing key. + * @param equivalentKey The key to put in the set as a replacement. Must be equivalent to the key + * currently stored at the provided index. + * @return Returns the previous key stored in the set. + * @throws AssertionError If assertions are enabled and the index does not correspond to an + * existing key. + */ + public int indexReplace(int index, int equivalentKey) { + assert index >= 0 : "The index must point at an existing key."; + assert index <= mask || (index == mask + 1 && hasEmptyKey); + assert ((keys[index]) == (equivalentKey)); + + int previousValue = keys[index]; + keys[index] = equivalentKey; + return previousValue; + } + + /** + * Inserts a key for an index that is not present in the set. This method may help in avoiding + * double recalculation of the key's hash. + * + * @see #indexOf + * @param index The index of a previously non-existing key, as returned from {@link #indexOf}. + * @throws AssertionError If assertions are enabled and the index does not correspond to an + * existing key. + */ + public void indexInsert(int index, int key) { + assert index < 0 : "The index must not point at an existing key."; + + index = ~index; + if (((key) == 0)) { + assert index == mask + 1; + assert ((keys[index]) == 0); + hasEmptyKey = true; + } else { + assert ((keys[index]) == 0); + + if (assigned == resizeAt) { + allocateThenInsertThenRehash(index, key); + } else { + keys[index] = key; + } + + assigned++; + } + } + + /** + * Removes a key at an index previously acquired from {@link #indexOf}. + * + * @see #indexOf + * @param index The index of the key to remove, as returned from {@link #indexOf}. + * @throws AssertionError If assertions are enabled and the index does not correspond to an + * existing key. + */ + public void indexRemove(int index) { + assert index >= 0 : "The index must point at an existing key."; + assert index <= mask || (index == mask + 1 && hasEmptyKey); + + if (index > mask) { + hasEmptyKey = false; + } else { + shiftConflictingKeys(index); + } + } + + /** + * Validate load factor range and return it. Override and suppress if you need insane load + * factors. + */ + protected double verifyLoadFactor(double loadFactor) { + checkLoadFactor(loadFactor, MIN_LOAD_FACTOR, MAX_LOAD_FACTOR); + return loadFactor; + } + + /** Rehash from old buffers to new buffers. */ + protected void rehash(int[] fromKeys) { + assert HashContainers.checkPowerOfTwo(fromKeys.length - 1); + + // Rehash all stored keys into the new buffers. + final int[] keys = this.keys; + final int mask = this.mask; + int existing; + for (int i = fromKeys.length - 1; --i >= 0; ) { + if (!((existing = fromKeys[i]) == 0)) { + int slot = hashKey(existing) & mask; + while (!((keys[slot]) == 0)) { + slot = (slot + 1) & mask; + } + keys[slot] = existing; + } + } + } + + /** + * Allocate new internal buffers. This method attempts to allocate and assign internal buffers + * atomically (either allocations succeed or not). + */ + protected void allocateBuffers(int arraySize) { + assert Integer.bitCount(arraySize) == 1; + + // Ensure no change is done if we hit an OOM. + int[] prevKeys = this.keys; + try { + int emptyElementSlot = 1; + this.keys = (new int[arraySize + emptyElementSlot]); + } catch (OutOfMemoryError e) { + this.keys = prevKeys; + throw new BufferAllocationException( + "Not enough memory to allocate buffers for rehashing: %,d -> %,d", + e, this.keys == null ? 0 : size(), arraySize); + } + + this.resizeAt = expandAtCount(arraySize, loadFactor); + this.mask = arraySize - 1; + } + + /** + * This method is invoked when there is a new key to be inserted into the buffer but there is not + * enough empty slots to do so. + * + *

New buffers are allocated. If this succeeds, we know we can proceed with rehashing so we + * assign the pending element to the previous buffer (possibly violating the invariant of having + * at least one empty slot) and rehash all keys, substituting new buffers at the end. + */ + protected void allocateThenInsertThenRehash(int slot, int pendingKey) { + assert assigned == resizeAt && ((keys[slot]) == 0) && !((pendingKey) == 0); + + // Try to allocate new buffers first. If we OOM, we leave in a consistent state. + final int[] prevKeys = this.keys; + allocateBuffers(nextBufferSize(mask + 1, size(), loadFactor)); + assert this.keys.length > prevKeys.length; + + // We have succeeded at allocating new data so insert the pending key/value at + // the free slot in the old arrays before rehashing. + prevKeys[slot] = pendingKey; + + // Rehash old keys, including the pending key. + rehash(prevKeys); + } + + /** Shift all the slot-conflicting keys allocated to (and including) slot. */ + protected void shiftConflictingKeys(int gapSlot) { + final int[] keys = this.keys; + final int mask = this.mask; + + // Perform shifts of conflicting keys to fill in the gap. + int distance = 0; + while (true) { + final int slot = (gapSlot + (++distance)) & mask; + final int existing = keys[slot]; + if (((existing) == 0)) { + break; + } + + final int idealSlot = hashKey(existing); + final int shift = (slot - idealSlot) & mask; + if (shift >= distance) { + // Entry at this position was originally at or before the gap slot. + // Move the conflict-shifted entry to the gap's position and repeat the procedure + // for any entries to the right of the current position, treating it + // as the new gap. + keys[gapSlot] = existing; + gapSlot = slot; + distance = 0; + } + } + + // Mark the last found gap slot without a conflict as empty. + keys[gapSlot] = 0; + assigned--; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/IntIntHashMap.java b/lucene/core/src/java/org/apache/lucene/util/hppc/IntIntHashMap.java index 856f7870968..bb9488ce9d5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hppc/IntIntHashMap.java +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/IntIntHashMap.java @@ -17,7 +17,6 @@ package org.apache.lucene.util.hppc; -import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo; import static org.apache.lucene.util.hppc.HashContainers.*; import java.util.Arrays; @@ -96,10 +95,10 @@ public class IntIntHashMap implements Iterable, Acco ensureCapacity(expectedElements); } - /** Create a hash map from all key-value pairs of another container. */ - public IntIntHashMap(Iterable container) { - this(); - putAll(container); + /** Create a hash map from all key-value pairs of another map. */ + public IntIntHashMap(IntIntHashMap map) { + this(map.size()); + putAll(map); } public int put(int key, int value) { @@ -107,8 +106,8 @@ public class IntIntHashMap implements Iterable, Acco final int mask = this.mask; if (((key) == 0)) { + int previousValue = hasEmptyKey ? values[mask + 1] : 0; hasEmptyKey = true; - int previousValue = values[mask + 1]; values[mask + 1] = value; return previousValue; } else { @@ -205,6 +204,9 @@ public class IntIntHashMap implements Iterable, Acco public int remove(int key) { final int mask = this.mask; if (((key) == 0)) { + if (!hasEmptyKey) { + return 0; + } hasEmptyKey = false; int previousValue = values[mask + 1]; values[mask + 1] = 0; @@ -357,6 +359,7 @@ public class IntIntHashMap implements Iterable, Acco int previousValue = values[index]; if (index > mask) { + assert index == mask + 1; hasEmptyKey = false; values[index] = 0; } else { @@ -402,7 +405,8 @@ public class IntIntHashMap implements Iterable, Acco @Override public boolean equals(Object obj) { - return obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)); + return (this == obj) + || (obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj))); } /** Return true if all keys of some other container exist in this container. */ @@ -625,7 +629,7 @@ public class IntIntHashMap implements Iterable, Acco cloned.keys = keys.clone(); cloned.values = values.clone(); cloned.hasEmptyKey = hasEmptyKey; - cloned.iterationSeed = nextIterationSeed(); + cloned.iterationSeed = ITERATION_SEED.incrementAndGet(); return cloned; } catch (CloneNotSupportedException e) { throw new RuntimeException(e); @@ -764,64 +768,6 @@ public class IntIntHashMap implements Iterable, Acco rehash(prevKeys, prevValues); } - static int nextBufferSize(int arraySize, int elements, double loadFactor) { - assert checkPowerOfTwo(arraySize); - if (arraySize == MAX_HASH_ARRAY_LENGTH) { - throw new BufferAllocationException( - "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", - elements, loadFactor); - } - - return arraySize << 1; - } - - static int expandAtCount(int arraySize, double loadFactor) { - assert checkPowerOfTwo(arraySize); - // Take care of hash container invariant (there has to be at least one empty slot to ensure - // the lookup loop finds either the element or an empty slot). - return Math.min(arraySize - 1, (int) Math.ceil(arraySize * loadFactor)); - } - - static boolean checkPowerOfTwo(int arraySize) { - // These are internals, we can just assert without retrying. - assert arraySize > 1; - assert nextHighestPowerOfTwo(arraySize) == arraySize; - return true; - } - - static int minBufferSize(int elements, double loadFactor) { - if (elements < 0) { - throw new IllegalArgumentException("Number of elements must be >= 0: " + elements); - } - - long length = (long) Math.ceil(elements / loadFactor); - if (length == elements) { - length++; - } - length = Math.max(MIN_HASH_ARRAY_LENGTH, nextHighestPowerOfTwo(length)); - - if (length > MAX_HASH_ARRAY_LENGTH) { - throw new BufferAllocationException( - "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", - elements, loadFactor); - } - - return (int) length; - } - - static void checkLoadFactor( - double loadFactor, double minAllowedInclusive, double maxAllowedInclusive) { - if (loadFactor < minAllowedInclusive || loadFactor > maxAllowedInclusive) { - throw new BufferAllocationException( - "The load factor should be in range [%.2f, %.2f]: %f", - minAllowedInclusive, maxAllowedInclusive, loadFactor); - } - } - - static int iterationIncrement(int seed) { - return 29 + ((seed & 7) << 1); // Small odd integer. - } - /** * Shift all the slot-conflicting keys and values allocated to (and including) slot. */ diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/IntObjectHashMap.java b/lucene/core/src/java/org/apache/lucene/util/hppc/IntObjectHashMap.java index 003f9835175..b493312115f 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hppc/IntObjectHashMap.java +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/IntObjectHashMap.java @@ -17,7 +17,6 @@ package org.apache.lucene.util.hppc; -import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo; import static org.apache.lucene.util.hppc.HashContainers.*; import java.util.Arrays; @@ -98,10 +97,10 @@ public class IntObjectHashMap ensureCapacity(expectedElements); } - /** Create a hash map from all key-value pairs of another container. */ - public IntObjectHashMap(Iterable> container) { - this(); - putAll(container); + /** Create a hash map from all key-value pairs of another map. */ + public IntObjectHashMap(IntObjectHashMap map) { + this(map.size()); + putAll(map); } public VType put(int key, VType value) { @@ -109,8 +108,8 @@ public class IntObjectHashMap final int mask = this.mask; if (((key) == 0)) { + VType previousValue = hasEmptyKey ? (VType) values[mask + 1] : null; hasEmptyKey = true; - VType previousValue = (VType) values[mask + 1]; values[mask + 1] = value; return previousValue; } else { @@ -173,6 +172,9 @@ public class IntObjectHashMap public VType remove(int key) { final int mask = this.mask; if (((key) == 0)) { + if (!hasEmptyKey) { + return null; + } hasEmptyKey = false; VType previousValue = (VType) values[mask + 1]; values[mask + 1] = 0; @@ -325,6 +327,7 @@ public class IntObjectHashMap VType previousValue = (VType) values[index]; if (index > mask) { + assert index == mask + 1; hasEmptyKey = false; values[index] = 0; } else { @@ -370,7 +373,8 @@ public class IntObjectHashMap @Override public boolean equals(Object obj) { - return obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)); + return (this == obj) + || (obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj))); } /** Return true if all keys of some other container exist in this container. */ @@ -612,7 +616,7 @@ public class IntObjectHashMap cloned.keys = keys.clone(); cloned.values = values.clone(); cloned.hasEmptyKey = hasEmptyKey; - cloned.iterationSeed = nextIterationSeed(); + cloned.iterationSeed = ITERATION_SEED.incrementAndGet(); return cloned; } catch (CloneNotSupportedException e) { throw new RuntimeException(e); @@ -751,64 +755,6 @@ public class IntObjectHashMap rehash(prevKeys, prevValues); } - static int nextBufferSize(int arraySize, int elements, double loadFactor) { - assert checkPowerOfTwo(arraySize); - if (arraySize == MAX_HASH_ARRAY_LENGTH) { - throw new BufferAllocationException( - "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", - elements, loadFactor); - } - - return arraySize << 1; - } - - static int expandAtCount(int arraySize, double loadFactor) { - assert checkPowerOfTwo(arraySize); - // Take care of hash container invariant (there has to be at least one empty slot to ensure - // the lookup loop finds either the element or an empty slot). - return Math.min(arraySize - 1, (int) Math.ceil(arraySize * loadFactor)); - } - - static boolean checkPowerOfTwo(int arraySize) { - // These are internals, we can just assert without retrying. - assert arraySize > 1; - assert nextHighestPowerOfTwo(arraySize) == arraySize; - return true; - } - - static int minBufferSize(int elements, double loadFactor) { - if (elements < 0) { - throw new IllegalArgumentException("Number of elements must be >= 0: " + elements); - } - - long length = (long) Math.ceil(elements / loadFactor); - if (length == elements) { - length++; - } - length = Math.max(MIN_HASH_ARRAY_LENGTH, nextHighestPowerOfTwo(length)); - - if (length > MAX_HASH_ARRAY_LENGTH) { - throw new BufferAllocationException( - "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", - elements, loadFactor); - } - - return (int) length; - } - - static void checkLoadFactor( - double loadFactor, double minAllowedInclusive, double maxAllowedInclusive) { - if (loadFactor < minAllowedInclusive || loadFactor > maxAllowedInclusive) { - throw new BufferAllocationException( - "The load factor should be in range [%.2f, %.2f]: %f", - minAllowedInclusive, maxAllowedInclusive, loadFactor); - } - } - - static int iterationIncrement(int seed) { - return 29 + ((seed & 7) << 1); // Small odd integer. - } - /** * Shift all the slot-conflicting keys and values allocated to (and including) slot. */ diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/LongHashSet.java b/lucene/core/src/java/org/apache/lucene/util/hppc/LongHashSet.java new file mode 100644 index 00000000000..d131bfc7386 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/LongHashSet.java @@ -0,0 +1,671 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util.hppc; + +import static org.apache.lucene.util.hppc.HashContainers.*; + +import java.util.Arrays; +import java.util.Iterator; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; + +/** + * A hash set of longs, implemented using open addressing with linear probing for + * collision resolution. + * + *

Mostly forked and trimmed from com.carrotsearch.hppc.LongHashSet + * + *

github: https://github.com/carrotsearch/hppc release 0.9.0 + */ +public class LongHashSet implements Iterable, Accountable, Cloneable { + + private static final long BASE_RAM_BYTES_USED = + RamUsageEstimator.shallowSizeOfInstance(LongHashSet.class); + + /** The hash array holding keys. */ + public long[] keys; + + /** + * The number of stored keys (assigned key slots), excluding the special "empty" key, if any. + * + * @see #size() + * @see #hasEmptyKey + */ + protected int assigned; + + /** Mask for slot scans in {@link #keys}. */ + protected int mask; + + /** Expand (rehash) {@link #keys} when {@link #assigned} hits this value. */ + protected int resizeAt; + + /** Special treatment for the "empty slot" key marker. */ + protected boolean hasEmptyKey; + + /** The load factor for {@link #keys}. */ + protected double loadFactor; + + /** Seed used to ensure the hash iteration order is different from an iteration to another. */ + protected int iterationSeed; + + /** New instance with sane defaults. */ + public LongHashSet() { + this(DEFAULT_EXPECTED_ELEMENTS); + } + + /** + * New instance with sane defaults. + * + * @param expectedElements The expected number of elements guaranteed not to cause a rehash + * (inclusive). + */ + public LongHashSet(int expectedElements) { + this(expectedElements, DEFAULT_LOAD_FACTOR); + } + + /** + * New instance with the provided defaults. + * + * @param expectedElements The expected number of elements guaranteed not to cause a rehash + * (inclusive). + * @param loadFactor The load factor for internal buffers. Insane load factors (zero, full + * capacity) are rejected by {@link #verifyLoadFactor(double)}. + */ + public LongHashSet(int expectedElements, double loadFactor) { + this.loadFactor = verifyLoadFactor(loadFactor); + iterationSeed = ITERATION_SEED.incrementAndGet(); + ensureCapacity(expectedElements); + } + + /** New instance copying elements from another set. */ + public LongHashSet(LongHashSet set) { + this(set.size()); + addAll(set); + } + + public boolean add(long key) { + if (((key) == 0)) { + assert ((keys[mask + 1]) == 0); + boolean added = !hasEmptyKey; + hasEmptyKey = true; + return added; + } else { + final long[] keys = this.keys; + final int mask = this.mask; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((key) == (existing))) { + return false; + } + slot = (slot + 1) & mask; + } + + if (assigned == resizeAt) { + allocateThenInsertThenRehash(slot, key); + } else { + keys[slot] = key; + } + + assigned++; + return true; + } + } + + /** + * Adds all elements from the given list (vararg) to this set. + * + * @return Returns the number of elements actually added as a result of this call (not previously + * present in the set). + */ + public final int addAll(long... elements) { + ensureCapacity(elements.length); + int count = 0; + for (long e : elements) { + if (add(e)) { + count++; + } + } + return count; + } + + /** + * Adds all elements from the given iterable to this set. + * + * @return Returns the number of elements actually added as a result of this call (not previously + * present in the set). + */ + public int addAll(Iterable iterable) { + int count = 0; + for (LongCursor cursor : iterable) { + if (add(cursor.value)) { + count++; + } + } + return count; + } + + public long[] toArray() { + + final long[] cloned = (new long[size()]); + int j = 0; + if (hasEmptyKey) { + cloned[j++] = 0L; + } + + final long[] keys = this.keys; + int seed = nextIterationSeed(); + int inc = iterationIncrement(seed); + for (int i = 0, mask = this.mask, slot = seed & mask; + i <= mask; + i++, slot = (slot + inc) & mask) { + long existing; + if (!((existing = keys[slot]) == 0)) { + cloned[j++] = existing; + } + } + + return cloned; + } + + /** An alias for the (preferred) {@link #removeAll}. */ + public boolean remove(long key) { + if (((key) == 0)) { + boolean hadEmptyKey = hasEmptyKey; + hasEmptyKey = false; + return hadEmptyKey; + } else { + final long[] keys = this.keys; + final int mask = this.mask; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((key) == (existing))) { + shiftConflictingKeys(slot); + return true; + } + slot = (slot + 1) & mask; + } + return false; + } + } + + /** + * Removes all keys present in a given container. + * + * @return Returns the number of elements actually removed as a result of this call. + */ + public int removeAll(LongHashSet other) { + final int before = size(); + + // Try to iterate over the smaller set or over the container that isn't implementing + // efficient contains() lookup. + + if (other.size() >= size()) { + if (hasEmptyKey && other.contains(0L)) { + hasEmptyKey = false; + } + + final long[] keys = this.keys; + for (int slot = 0, max = this.mask; slot <= max; ) { + long existing; + if (!((existing = keys[slot]) == 0) && other.contains(existing)) { + // Shift, do not increment slot. + shiftConflictingKeys(slot); + } else { + slot++; + } + } + } else { + for (LongCursor c : other) { + remove(c.value); + } + } + + return before - size(); + } + + public boolean contains(long key) { + if (((key) == 0)) { + return hasEmptyKey; + } else { + final long[] keys = this.keys; + final int mask = this.mask; + int slot = hashKey(key) & mask; + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((key) == (existing))) { + return true; + } + slot = (slot + 1) & mask; + } + return false; + } + } + + public void clear() { + assigned = 0; + hasEmptyKey = false; + Arrays.fill(keys, 0L); + } + + public void release() { + assigned = 0; + hasEmptyKey = false; + keys = null; + ensureCapacity(DEFAULT_EXPECTED_ELEMENTS); + } + + public boolean isEmpty() { + return size() == 0; + } + + /** + * Ensure this container can hold at least the given number of elements without resizing its + * buffers. + * + * @param expectedElements The total number of elements, inclusive. + */ + public void ensureCapacity(int expectedElements) { + if (expectedElements > resizeAt || keys == null) { + final long[] prevKeys = this.keys; + allocateBuffers(minBufferSize(expectedElements, loadFactor)); + if (prevKeys != null && !isEmpty()) { + rehash(prevKeys); + } + } + } + + public int size() { + return assigned + (hasEmptyKey ? 1 : 0); + } + + @Override + public int hashCode() { + int h = hasEmptyKey ? 0xDEADBEEF : 0; + final long[] keys = this.keys; + for (int slot = mask; slot >= 0; slot--) { + long existing; + if (!((existing = keys[slot]) == 0)) { + h += BitMixer.mix(existing); + } + } + return h; + } + + @Override + public boolean equals(Object obj) { + return (this == obj) + || (obj != null && getClass() == obj.getClass() && sameKeys(getClass().cast(obj))); + } + + /** Return true if all keys of some other container exist in this container. */ + private boolean sameKeys(LongHashSet other) { + if (other.size() != size()) { + return false; + } + + for (LongCursor c : other) { + if (!contains(c.value)) { + return false; + } + } + + return true; + } + + @Override + public LongHashSet clone() { + try { + /* */ + LongHashSet cloned = (LongHashSet) super.clone(); + cloned.keys = keys.clone(); + cloned.hasEmptyKey = hasEmptyKey; + cloned.iterationSeed = ITERATION_SEED.incrementAndGet(); + return cloned; + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + @Override + public Iterator iterator() { + return new EntryIterator(); + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys); + } + + /** + * Provides the next iteration seed used to build the iteration starting slot and offset + * increment. This method does not need to be synchronized, what matters is that each thread gets + * a sequence of varying seeds. + */ + protected int nextIterationSeed() { + return iterationSeed = BitMixer.mixPhi(iterationSeed); + } + + /** An iterator implementation for {@link #iterator}. */ + protected final class EntryIterator extends AbstractIterator { + private final LongCursor cursor; + private final int increment; + private int index; + private int slot; + + public EntryIterator() { + cursor = new LongCursor(); + int seed = nextIterationSeed(); + increment = iterationIncrement(seed); + slot = seed & mask; + } + + @Override + protected LongCursor fetch() { + final int mask = LongHashSet.this.mask; + while (index <= mask) { + long existing; + index++; + slot = (slot + increment) & mask; + if (!((existing = keys[slot]) == 0)) { + cursor.index = slot; + cursor.value = existing; + return cursor; + } + } + + if (index == mask + 1 && hasEmptyKey) { + cursor.index = index++; + cursor.value = 0L; + return cursor; + } + + return done(); + } + } + + /** + * Create a set from a variable number of arguments or an array of long. The elements + * are copied from the argument to the internal buffer. + */ + /* */ + public static LongHashSet from(long... elements) { + final LongHashSet set = new LongHashSet(elements.length); + set.addAll(elements); + return set; + } + + /** + * Returns a hash code for the given key. + * + *

The output from this function should evenly distribute keys across the entire integer range. + */ + protected int hashKey(long key) { + assert !((key) == 0); // Handled as a special case (empty slot marker). + return BitMixer.mixPhi(key); + } + + /** + * Returns a logical "index" of a given key that can be used to speed up follow-up logic in + * certain scenarios (conditional logic). + * + *

The semantics of "indexes" are not strictly defined. Indexes may (and typically won't be) + * contiguous. + * + *

The index is valid only between modifications (it will not be affected by read-only + * operations). + * + * @see #indexExists + * @see #indexGet + * @see #indexInsert + * @see #indexReplace + * @param key The key to locate in the set. + * @return A non-negative value of the logical "index" of the key in the set or a negative value + * if the key did not exist. + */ + public int indexOf(long key) { + final int mask = this.mask; + if (((key) == 0)) { + return hasEmptyKey ? mask + 1 : ~(mask + 1); + } else { + final long[] keys = this.keys; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((key) == (existing))) { + return slot; + } + slot = (slot + 1) & mask; + } + + return ~slot; + } + } + + /** + * @see #indexOf + * @param index The index of a given key, as returned from {@link #indexOf}. + * @return Returns true if the index corresponds to an existing key or false + * otherwise. This is equivalent to checking whether the index is a positive value (existing + * keys) or a negative value (non-existing keys). + */ + public boolean indexExists(int index) { + assert index < 0 || index <= mask || (index == mask + 1 && hasEmptyKey); + + return index >= 0; + } + + /** + * Returns the exact value of the existing key. This method makes sense for sets of objects which + * define custom key-equality relationship. + * + * @see #indexOf + * @param index The index of an existing key. + * @return Returns the equivalent key currently stored in the set. + * @throws AssertionError If assertions are enabled and the index does not correspond to an + * existing key. + */ + public long indexGet(int index) { + assert index >= 0 : "The index must point at an existing key."; + assert index <= mask || (index == mask + 1 && hasEmptyKey); + + return keys[index]; + } + + /** + * Replaces the existing equivalent key with the given one and returns any previous value stored + * for that key. + * + * @see #indexOf + * @param index The index of an existing key. + * @param equivalentKey The key to put in the set as a replacement. Must be equivalent to the key + * currently stored at the provided index. + * @return Returns the previous key stored in the set. + * @throws AssertionError If assertions are enabled and the index does not correspond to an + * existing key. + */ + public long indexReplace(int index, long equivalentKey) { + assert index >= 0 : "The index must point at an existing key."; + assert index <= mask || (index == mask + 1 && hasEmptyKey); + assert ((keys[index]) == (equivalentKey)); + + long previousValue = keys[index]; + keys[index] = equivalentKey; + return previousValue; + } + + /** + * Inserts a key for an index that is not present in the set. This method may help in avoiding + * double recalculation of the key's hash. + * + * @see #indexOf + * @param index The index of a previously non-existing key, as returned from {@link #indexOf}. + * @throws AssertionError If assertions are enabled and the index does not correspond to an + * existing key. + */ + public void indexInsert(int index, long key) { + assert index < 0 : "The index must not point at an existing key."; + + index = ~index; + if (((key) == 0)) { + assert index == mask + 1; + assert ((keys[index]) == 0); + hasEmptyKey = true; + } else { + assert ((keys[index]) == 0); + + if (assigned == resizeAt) { + allocateThenInsertThenRehash(index, key); + } else { + keys[index] = key; + } + + assigned++; + } + } + + /** + * Removes a key at an index previously acquired from {@link #indexOf}. + * + * @see #indexOf + * @param index The index of the key to remove, as returned from {@link #indexOf}. + * @throws AssertionError If assertions are enabled and the index does not correspond to an + * existing key. + */ + public void indexRemove(int index) { + assert index >= 0 : "The index must point at an existing key."; + assert index <= mask || (index == mask + 1 && hasEmptyKey); + + if (index > mask) { + hasEmptyKey = false; + } else { + shiftConflictingKeys(index); + } + } + + /** + * Validate load factor range and return it. Override and suppress if you need insane load + * factors. + */ + protected double verifyLoadFactor(double loadFactor) { + checkLoadFactor(loadFactor, MIN_LOAD_FACTOR, MAX_LOAD_FACTOR); + return loadFactor; + } + + /** Rehash from old buffers to new buffers. */ + protected void rehash(long[] fromKeys) { + assert HashContainers.checkPowerOfTwo(fromKeys.length - 1); + + // Rehash all stored keys into the new buffers. + final long[] keys = this.keys; + final int mask = this.mask; + long existing; + for (int i = fromKeys.length - 1; --i >= 0; ) { + if (!((existing = fromKeys[i]) == 0)) { + int slot = hashKey(existing) & mask; + while (!((keys[slot]) == 0)) { + slot = (slot + 1) & mask; + } + keys[slot] = existing; + } + } + } + + /** + * Allocate new internal buffers. This method attempts to allocate and assign internal buffers + * atomically (either allocations succeed or not). + */ + protected void allocateBuffers(int arraySize) { + assert Integer.bitCount(arraySize) == 1; + + // Ensure no change is done if we hit an OOM. + long[] prevKeys = this.keys; + try { + int emptyElementSlot = 1; + this.keys = (new long[arraySize + emptyElementSlot]); + } catch (OutOfMemoryError e) { + this.keys = prevKeys; + throw new BufferAllocationException( + "Not enough memory to allocate buffers for rehashing: %,d -> %,d", + e, this.keys == null ? 0 : size(), arraySize); + } + + this.resizeAt = expandAtCount(arraySize, loadFactor); + this.mask = arraySize - 1; + } + + /** + * This method is invoked when there is a new key to be inserted into the buffer but there is not + * enough empty slots to do so. + * + *

New buffers are allocated. If this succeeds, we know we can proceed with rehashing so we + * assign the pending element to the previous buffer (possibly violating the invariant of having + * at least one empty slot) and rehash all keys, substituting new buffers at the end. + */ + protected void allocateThenInsertThenRehash(int slot, long pendingKey) { + assert assigned == resizeAt && ((keys[slot]) == 0) && !((pendingKey) == 0); + + // Try to allocate new buffers first. If we OOM, we leave in a consistent state. + final long[] prevKeys = this.keys; + allocateBuffers(nextBufferSize(mask + 1, size(), loadFactor)); + assert this.keys.length > prevKeys.length; + + // We have succeeded at allocating new data so insert the pending key/value at + // the free slot in the old arrays before rehashing. + prevKeys[slot] = pendingKey; + + // Rehash old keys, including the pending key. + rehash(prevKeys); + } + + /** Shift all the slot-conflicting keys allocated to (and including) slot. */ + protected void shiftConflictingKeys(int gapSlot) { + final long[] keys = this.keys; + final int mask = this.mask; + + // Perform shifts of conflicting keys to fill in the gap. + int distance = 0; + while (true) { + final int slot = (gapSlot + (++distance)) & mask; + final long existing = keys[slot]; + if (((existing) == 0)) { + break; + } + + final int idealSlot = hashKey(existing); + final int shift = (slot - idealSlot) & mask; + if (shift >= distance) { + // Entry at this position was originally at or before the gap slot. + // Move the conflict-shifted entry to the gap's position and repeat the procedure + // for any entries to the right of the current position, treating it + // as the new gap. + keys[gapSlot] = existing; + gapSlot = slot; + distance = 0; + } + } + + // Mark the last found gap slot without a conflict as empty. + keys[gapSlot] = 0L; + assigned--; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/LongIntHashMap.java b/lucene/core/src/java/org/apache/lucene/util/hppc/LongIntHashMap.java index 6c28f6b947e..5240beeeced 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hppc/LongIntHashMap.java +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/LongIntHashMap.java @@ -17,14 +17,7 @@ package org.apache.lucene.util.hppc; -import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo; -import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_EXPECTED_ELEMENTS; -import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_LOAD_FACTOR; -import static org.apache.lucene.util.hppc.HashContainers.ITERATION_SEED; -import static org.apache.lucene.util.hppc.HashContainers.MAX_HASH_ARRAY_LENGTH; -import static org.apache.lucene.util.hppc.HashContainers.MAX_LOAD_FACTOR; -import static org.apache.lucene.util.hppc.HashContainers.MIN_HASH_ARRAY_LENGTH; -import static org.apache.lucene.util.hppc.HashContainers.MIN_LOAD_FACTOR; +import static org.apache.lucene.util.hppc.HashContainers.*; import java.util.Arrays; import java.util.Iterator; @@ -103,10 +96,10 @@ public class LongIntHashMap ensureCapacity(expectedElements); } - /** Create a hash map from all key-value pairs of another container. */ - public LongIntHashMap(Iterable container) { - this(); - putAll(container); + /** Create a hash map from all key-value pairs of another map. */ + public LongIntHashMap(LongIntHashMap map) { + this(map.size()); + putAll(map); } public int put(long key, int value) { @@ -114,8 +107,8 @@ public class LongIntHashMap final int mask = this.mask; if (((key) == 0)) { + int previousValue = hasEmptyKey ? values[mask + 1] : 0; hasEmptyKey = true; - int previousValue = values[mask + 1]; values[mask + 1] = value; return previousValue; } else { @@ -212,6 +205,9 @@ public class LongIntHashMap public int remove(long key) { final int mask = this.mask; if (((key) == 0)) { + if (!hasEmptyKey) { + return 0; + } hasEmptyKey = false; int previousValue = values[mask + 1]; values[mask + 1] = 0; @@ -364,6 +360,7 @@ public class LongIntHashMap int previousValue = values[index]; if (index > mask) { + assert index == mask + 1; hasEmptyKey = false; values[index] = 0; } else { @@ -409,7 +406,8 @@ public class LongIntHashMap @Override public boolean equals(Object obj) { - return obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)); + return (this == obj) + || (obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj))); } /** Return true if all keys of some other container exist in this container. */ @@ -643,7 +641,7 @@ public class LongIntHashMap cloned.keys = keys.clone(); cloned.values = values.clone(); cloned.hasEmptyKey = hasEmptyKey; - cloned.iterationSeed = nextIterationSeed(); + cloned.iterationSeed = ITERATION_SEED.incrementAndGet(); return cloned; } catch (CloneNotSupportedException e) { throw new RuntimeException(e); @@ -782,64 +780,6 @@ public class LongIntHashMap rehash(prevKeys, prevValues); } - static int nextBufferSize(int arraySize, int elements, double loadFactor) { - assert checkPowerOfTwo(arraySize); - if (arraySize == MAX_HASH_ARRAY_LENGTH) { - throw new BufferAllocationException( - "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", - elements, loadFactor); - } - - return arraySize << 1; - } - - static int expandAtCount(int arraySize, double loadFactor) { - assert checkPowerOfTwo(arraySize); - // Take care of hash container invariant (there has to be at least one empty slot to ensure - // the lookup loop finds either the element or an empty slot). - return Math.min(arraySize - 1, (int) Math.ceil(arraySize * loadFactor)); - } - - static boolean checkPowerOfTwo(int arraySize) { - // These are internals, we can just assert without retrying. - assert arraySize > 1; - assert nextHighestPowerOfTwo(arraySize) == arraySize; - return true; - } - - static int minBufferSize(int elements, double loadFactor) { - if (elements < 0) { - throw new IllegalArgumentException("Number of elements must be >= 0: " + elements); - } - - long length = (long) Math.ceil(elements / loadFactor); - if (length == elements) { - length++; - } - length = Math.max(MIN_HASH_ARRAY_LENGTH, nextHighestPowerOfTwo(length)); - - if (length > MAX_HASH_ARRAY_LENGTH) { - throw new BufferAllocationException( - "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", - elements, loadFactor); - } - - return (int) length; - } - - static void checkLoadFactor( - double loadFactor, double minAllowedInclusive, double maxAllowedInclusive) { - if (loadFactor < minAllowedInclusive || loadFactor > maxAllowedInclusive) { - throw new BufferAllocationException( - "The load factor should be in range [%.2f, %.2f]: %f", - minAllowedInclusive, maxAllowedInclusive, loadFactor); - } - } - - static int iterationIncrement(int seed) { - return 29 + ((seed & 7) << 1); // Small odd integer. - } - /** * Shift all the slot-conflicting keys and values allocated to (and including) slot. */ diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/LongObjectHashMap.java b/lucene/core/src/java/org/apache/lucene/util/hppc/LongObjectHashMap.java index c72bd35b4bc..ae6c111789b 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hppc/LongObjectHashMap.java +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/LongObjectHashMap.java @@ -17,7 +17,6 @@ package org.apache.lucene.util.hppc; -import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo; import static org.apache.lucene.util.hppc.HashContainers.*; import java.util.Arrays; @@ -98,10 +97,10 @@ public class LongObjectHashMap ensureCapacity(expectedElements); } - /** Create a hash map from all key-value pairs of another container. */ - public LongObjectHashMap(Iterable> container) { - this(); - putAll(container); + /** Create a hash map from all key-value pairs of another map. */ + public LongObjectHashMap(LongObjectHashMap map) { + this(map.size()); + putAll(map); } public VType put(long key, VType value) { @@ -109,8 +108,8 @@ public class LongObjectHashMap final int mask = this.mask; if (((key) == 0)) { + VType previousValue = hasEmptyKey ? (VType) values[mask + 1] : null; hasEmptyKey = true; - VType previousValue = (VType) values[mask + 1]; values[mask + 1] = value; return previousValue; } else { @@ -173,6 +172,9 @@ public class LongObjectHashMap public VType remove(long key) { final int mask = this.mask; if (((key) == 0)) { + if (!hasEmptyKey) { + return null; + } hasEmptyKey = false; VType previousValue = (VType) values[mask + 1]; values[mask + 1] = 0; @@ -325,6 +327,7 @@ public class LongObjectHashMap VType previousValue = (VType) values[index]; if (index > mask) { + assert index == mask + 1; hasEmptyKey = false; values[index] = 0; } else { @@ -370,7 +373,8 @@ public class LongObjectHashMap @Override public boolean equals(Object obj) { - return obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)); + return (this == obj) + || (obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj))); } /** Return true if all keys of some other container exist in this container. */ @@ -612,7 +616,7 @@ public class LongObjectHashMap cloned.keys = keys.clone(); cloned.values = values.clone(); cloned.hasEmptyKey = hasEmptyKey; - cloned.iterationSeed = nextIterationSeed(); + cloned.iterationSeed = ITERATION_SEED.incrementAndGet(); return cloned; } catch (CloneNotSupportedException e) { throw new RuntimeException(e); @@ -751,64 +755,6 @@ public class LongObjectHashMap rehash(prevKeys, prevValues); } - static int nextBufferSize(int arraySize, int elements, double loadFactor) { - assert checkPowerOfTwo(arraySize); - if (arraySize == MAX_HASH_ARRAY_LENGTH) { - throw new BufferAllocationException( - "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", - elements, loadFactor); - } - - return arraySize << 1; - } - - static int expandAtCount(int arraySize, double loadFactor) { - assert checkPowerOfTwo(arraySize); - // Take care of hash container invariant (there has to be at least one empty slot to ensure - // the lookup loop finds either the element or an empty slot). - return Math.min(arraySize - 1, (int) Math.ceil(arraySize * loadFactor)); - } - - static boolean checkPowerOfTwo(int arraySize) { - // These are internals, we can just assert without retrying. - assert arraySize > 1; - assert nextHighestPowerOfTwo(arraySize) == arraySize; - return true; - } - - static int minBufferSize(int elements, double loadFactor) { - if (elements < 0) { - throw new IllegalArgumentException("Number of elements must be >= 0: " + elements); - } - - long length = (long) Math.ceil(elements / loadFactor); - if (length == elements) { - length++; - } - length = Math.max(MIN_HASH_ARRAY_LENGTH, nextHighestPowerOfTwo(length)); - - if (length > MAX_HASH_ARRAY_LENGTH) { - throw new BufferAllocationException( - "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", - elements, loadFactor); - } - - return (int) length; - } - - static void checkLoadFactor( - double loadFactor, double minAllowedInclusive, double maxAllowedInclusive) { - if (loadFactor < minAllowedInclusive || loadFactor > maxAllowedInclusive) { - throw new BufferAllocationException( - "The load factor should be in range [%.2f, %.2f]: %f", - minAllowedInclusive, maxAllowedInclusive, loadFactor); - } - } - - static int iterationIncrement(int seed) { - return 29 + ((seed & 7) << 1); // Small odd integer. - } - /** * Shift all the slot-conflicting keys and values allocated to (and including) slot. */ diff --git a/lucene/core/src/test/org/apache/lucene/document/TestLongHashSet.java b/lucene/core/src/test/org/apache/lucene/document/TestDocValuesLongHashSet.java similarity index 80% rename from lucene/core/src/test/org/apache/lucene/document/TestLongHashSet.java rename to lucene/core/src/test/org/apache/lucene/document/TestDocValuesLongHashSet.java index 510d68d7a2b..f252d235541 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestLongHashSet.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestDocValuesLongHashSet.java @@ -23,9 +23,9 @@ import java.util.stream.Collectors; import java.util.stream.LongStream; import org.apache.lucene.tests.util.LuceneTestCase; -public class TestLongHashSet extends LuceneTestCase { +public class TestDocValuesLongHashSet extends LuceneTestCase { - private void assertEquals(Set set1, LongHashSet longHashSet) { + private void assertEquals(Set set1, DocValuesLongHashSet longHashSet) { assertEquals(set1.size(), longHashSet.size()); Set set2 = longHashSet.stream().boxed().collect(Collectors.toSet()); @@ -47,12 +47,13 @@ public class TestLongHashSet extends LuceneTestCase { assertTrue(set1.stream().allMatch(longHashSet::contains)); } - private void assertNotEquals(Set set1, LongHashSet longHashSet) { + private void assertNotEquals(Set set1, DocValuesLongHashSet longHashSet) { Set set2 = longHashSet.stream().boxed().collect(Collectors.toSet()); LuceneTestCase.assertNotEquals(set1, set2); - LongHashSet set3 = new LongHashSet(set1.stream().mapToLong(Long::longValue).sorted().toArray()); + DocValuesLongHashSet set3 = + new DocValuesLongHashSet(set1.stream().mapToLong(Long::longValue).sorted().toArray()); LuceneTestCase.assertNotEquals(set2, set3.stream().boxed().collect(Collectors.toSet())); @@ -61,7 +62,7 @@ public class TestLongHashSet extends LuceneTestCase { public void testEmpty() { Set set1 = new HashSet<>(); - LongHashSet set2 = new LongHashSet(new long[] {}); + DocValuesLongHashSet set2 = new DocValuesLongHashSet(new long[] {}); assertEquals(0, set2.size()); assertEquals(Long.MAX_VALUE, set2.minValue); assertEquals(Long.MIN_VALUE, set2.maxValue); @@ -70,14 +71,14 @@ public class TestLongHashSet extends LuceneTestCase { public void testOneValue() { Set set1 = new HashSet<>(Arrays.asList(42L)); - LongHashSet set2 = new LongHashSet(new long[] {42L}); + DocValuesLongHashSet set2 = new DocValuesLongHashSet(new long[] {42L}); assertEquals(1, set2.size()); assertEquals(42L, set2.minValue); assertEquals(42L, set2.maxValue); assertEquals(set1, set2); set1 = new HashSet<>(Arrays.asList(Long.MIN_VALUE)); - set2 = new LongHashSet(new long[] {Long.MIN_VALUE}); + set2 = new DocValuesLongHashSet(new long[] {Long.MIN_VALUE}); assertEquals(1, set2.size()); assertEquals(Long.MIN_VALUE, set2.minValue); assertEquals(Long.MIN_VALUE, set2.maxValue); @@ -86,14 +87,14 @@ public class TestLongHashSet extends LuceneTestCase { public void testTwoValues() { Set set1 = new HashSet<>(Arrays.asList(42L, Long.MAX_VALUE)); - LongHashSet set2 = new LongHashSet(new long[] {42L, Long.MAX_VALUE}); + DocValuesLongHashSet set2 = new DocValuesLongHashSet(new long[] {42L, Long.MAX_VALUE}); assertEquals(2, set2.size()); assertEquals(42, set2.minValue); assertEquals(Long.MAX_VALUE, set2.maxValue); assertEquals(set1, set2); set1 = new HashSet<>(Arrays.asList(Long.MIN_VALUE, 42L)); - set2 = new LongHashSet(new long[] {Long.MIN_VALUE, 42L}); + set2 = new DocValuesLongHashSet(new long[] {Long.MIN_VALUE, 42L}); assertEquals(2, set2.size()); assertEquals(Long.MIN_VALUE, set2.minValue); assertEquals(42, set2.maxValue); @@ -101,14 +102,15 @@ public class TestLongHashSet extends LuceneTestCase { } public void testSameValue() { - LongHashSet set2 = new LongHashSet(new long[] {42L, 42L}); + DocValuesLongHashSet set2 = new DocValuesLongHashSet(new long[] {42L, 42L}); assertEquals(1, set2.size()); assertEquals(42L, set2.minValue); assertEquals(42L, set2.maxValue); } public void testSameMissingPlaceholder() { - LongHashSet set2 = new LongHashSet(new long[] {Long.MIN_VALUE, Long.MIN_VALUE}); + DocValuesLongHashSet set2 = + new DocValuesLongHashSet(new long[] {Long.MIN_VALUE, Long.MIN_VALUE}); assertEquals(1, set2.size()); assertEquals(Long.MIN_VALUE, set2.minValue); assertEquals(Long.MIN_VALUE, set2.maxValue); @@ -130,7 +132,7 @@ public class TestLongHashSet extends LuceneTestCase { } Set set1 = LongStream.of(values).boxed().collect(Collectors.toSet()); Arrays.sort(values); - LongHashSet set2 = new LongHashSet(values); + DocValuesLongHashSet set2 = new DocValuesLongHashSet(values); assertEquals(set1, set2); } } diff --git a/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntHashSet.java b/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntHashSet.java new file mode 100644 index 00000000000..b911e0d8a4f --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntHashSet.java @@ -0,0 +1,469 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util.hppc; + +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.lessThan; +import static org.hamcrest.Matchers.not; + +import com.carrotsearch.randomizedtesting.RandomizedTest; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.hamcrest.MatcherAssert; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests for {@link IntHashSet}. + * + *

Mostly forked and trimmed from com.carrotsearch.hppc.IntHashSetTest + * + *

github: https://github.com/carrotsearch/hppc release: 0.9.0 + */ +public class TestIntHashSet extends LuceneTestCase { + private static final int EMPTY_KEY = 0; + + private final int keyE = 0; + private final int key1 = cast(1); + private final int key2 = cast(2); + private final int key3 = cast(3); + private final int key4 = cast(4); + + /** Per-test fresh initialized instance. */ + private IntHashSet set; + + /** Convert to target type from an integer used to test stuff. */ + private int cast(int v) { + return v; + } + + @Before + public void initialize() { + set = new IntHashSet(); + } + + @Test + public void testAddAllViaInterface() { + set.addAll(key1, key2); + + IntHashSet iface = new IntHashSet(); + iface.clear(); + iface.addAll(set); + MatcherAssert.assertThat(set(iface.toArray()), is(equalTo(set(key1, key2)))); + } + + @Test + public void testIndexMethods() { + set.add(keyE); + set.add(key1); + + MatcherAssert.assertThat(set.indexOf(keyE), is(greaterThanOrEqualTo(0))); + MatcherAssert.assertThat(set.indexOf(key1), is(greaterThanOrEqualTo(0))); + MatcherAssert.assertThat(set.indexOf(key2), is(lessThan(0))); + + MatcherAssert.assertThat(set.indexExists(set.indexOf(keyE)), is(true)); + MatcherAssert.assertThat(set.indexExists(set.indexOf(key1)), is(true)); + MatcherAssert.assertThat(set.indexExists(set.indexOf(key2)), is(false)); + + MatcherAssert.assertThat(set.indexGet(set.indexOf(keyE)), is(equalTo(keyE))); + MatcherAssert.assertThat(set.indexGet(set.indexOf(key1)), is(equalTo(key1))); + + expectThrows( + AssertionError.class, + () -> { + set.indexGet(set.indexOf(key2)); + }); + + MatcherAssert.assertThat(set.indexReplace(set.indexOf(keyE), keyE), is(equalTo(keyE))); + MatcherAssert.assertThat(set.indexReplace(set.indexOf(key1), key1), is(equalTo(key1))); + + set.indexInsert(set.indexOf(key2), key2); + MatcherAssert.assertThat(set.indexGet(set.indexOf(key2)), is(equalTo(key2))); + MatcherAssert.assertThat(set.size(), is(equalTo(3))); + + set.indexRemove(set.indexOf(keyE)); + MatcherAssert.assertThat(set.size(), is(equalTo(2))); + set.indexRemove(set.indexOf(key2)); + MatcherAssert.assertThat(set.size(), is(equalTo(1))); + MatcherAssert.assertThat(set.indexOf(keyE), is(lessThan(0))); + MatcherAssert.assertThat(set.indexOf(key1), is(greaterThanOrEqualTo(0))); + MatcherAssert.assertThat(set.indexOf(key2), is(lessThan(0))); + } + + @Test + public void testCursorIndexIsValid() { + set.add(keyE); + set.add(key1); + set.add(key2); + + for (IntCursor c : set) { + MatcherAssert.assertThat(set.indexExists(c.index), is(true)); + MatcherAssert.assertThat(set.indexGet(c.index), is(equalTo(c.value))); + } + } + + @Test + public void testEmptyKey() { + IntHashSet set = new IntHashSet(); + + boolean b = set.add(EMPTY_KEY); + + MatcherAssert.assertThat(b, is(true)); + MatcherAssert.assertThat(set.add(EMPTY_KEY), is(false)); + MatcherAssert.assertThat(set.size(), is(equalTo(1))); + MatcherAssert.assertThat(set.isEmpty(), is(false)); + MatcherAssert.assertThat(set(set.toArray()), is(equalTo(set(EMPTY_KEY)))); + MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(true)); + int index = set.indexOf(EMPTY_KEY); + MatcherAssert.assertThat(set.indexExists(index), is(true)); + MatcherAssert.assertThat(set.indexGet(index), is(equalTo(EMPTY_KEY))); + MatcherAssert.assertThat(set.indexReplace(index, EMPTY_KEY), is(equalTo(EMPTY_KEY))); + + if (random().nextBoolean()) { + b = set.remove(EMPTY_KEY); + MatcherAssert.assertThat(b, is(true)); + } else { + set.indexRemove(index); + } + + MatcherAssert.assertThat(set.size(), is(equalTo(0))); + MatcherAssert.assertThat(set.isEmpty(), is(true)); + MatcherAssert.assertThat(set(set.toArray()), is(empty())); + MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(false)); + index = set.indexOf(EMPTY_KEY); + MatcherAssert.assertThat(set.indexExists(index), is(false)); + + set.indexInsert(index, EMPTY_KEY); + set.add(key1); + MatcherAssert.assertThat(set.size(), is(equalTo(2))); + MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(true)); + index = set.indexOf(EMPTY_KEY); + MatcherAssert.assertThat(set.indexExists(index), is(true)); + MatcherAssert.assertThat(set.indexGet(index), is(equalTo(EMPTY_KEY))); + } + + @Test + public void testEnsureCapacity() { + final AtomicInteger expands = new AtomicInteger(); + IntHashSet set = + new IntHashSet(0) { + @Override + protected void allocateBuffers(int arraySize) { + super.allocateBuffers(arraySize); + expands.incrementAndGet(); + } + }; + + // Add some elements. + final int max = rarely() ? 0 : randomIntBetween(0, 250); + for (int i = 0; i < max; i++) { + set.add(cast(i)); + } + + final int additions = randomIntBetween(max, max + 5000); + set.ensureCapacity(additions + set.size()); + final int before = expands.get(); + for (int i = 0; i < additions; i++) { + set.add(cast(i)); + } + assertEquals(before, expands.get()); + } + + @Test + public void testInitiallyEmpty() { + assertEquals(0, set.size()); + } + + @Test + public void testAdd() { + assertTrue(set.add(key1)); + assertFalse(set.add(key1)); + assertEquals(1, set.size()); + } + + @Test + public void testAdd2() { + set.addAll(key1, key1); + assertEquals(1, set.size()); + assertEquals(1, set.addAll(key1, key2)); + assertEquals(2, set.size()); + } + + @Test + public void testAddVarArgs() { + set.addAll(asArray(0, 1, 2, 1, 0)); + assertEquals(3, set.size()); + assertSortedListEquals(set.toArray(), 0, 1, 2); + } + + @Test + public void testAddAll() { + IntHashSet set2 = new IntHashSet(); + set2.addAll(asArray(1, 2)); + set.addAll(asArray(0, 1)); + + assertEquals(1, set.addAll(set2)); + assertEquals(0, set.addAll(set2)); + + assertEquals(3, set.size()); + assertSortedListEquals(set.toArray(), 0, 1, 2); + } + + @Test + public void testRemove() { + set.addAll(asArray(0, 1, 2, 3, 4)); + + assertTrue(set.remove(key2)); + assertFalse(set.remove(key2)); + assertEquals(4, set.size()); + assertSortedListEquals(set.toArray(), 0, 1, 3, 4); + } + + @Test + public void testInitialCapacityAndGrowth() { + for (int i = 0; i < 256; i++) { + IntHashSet set = new IntHashSet(i); + + for (int j = 0; j < i; j++) { + set.add(cast(j)); + } + + assertEquals(i, set.size()); + } + } + + @Test + public void testBug_HPPC73_FullCapacityGet() { + final AtomicInteger reallocations = new AtomicInteger(); + final int elements = 0x7F; + set = + new IntHashSet(elements, 1f) { + @Override + protected double verifyLoadFactor(double loadFactor) { + // Skip load factor sanity range checking. + return loadFactor; + } + + @Override + protected void allocateBuffers(int arraySize) { + super.allocateBuffers(arraySize); + reallocations.incrementAndGet(); + } + }; + + int reallocationsBefore = reallocations.get(); + assertEquals(reallocationsBefore, 1); + for (int i = 1; i <= elements; i++) { + set.add(cast(i)); + } + + // Non-existent key. + int outOfSet = cast(elements + 1); + set.remove(outOfSet); + assertFalse(set.contains(outOfSet)); + assertEquals(reallocationsBefore, reallocations.get()); + + // Should not expand because we're replacing an existing element. + assertFalse(set.add(key1)); + assertEquals(reallocationsBefore, reallocations.get()); + + // Remove from a full set. + set.remove(key1); + assertEquals(reallocationsBefore, reallocations.get()); + set.add(key1); + + // Check expand on "last slot of a full map" condition. + set.add(outOfSet); + assertEquals(reallocationsBefore + 1, reallocations.get()); + } + + @Test + public void testRemoveAllFromLookupContainer() { + set.addAll(asArray(0, 1, 2, 3, 4)); + + IntHashSet list2 = new IntHashSet(); + list2.addAll(asArray(1, 3, 5)); + + assertEquals(2, set.removeAll(list2)); + assertEquals(3, set.size()); + assertSortedListEquals(set.toArray(), 0, 2, 4); + } + + @Test + public void testClear() { + set.addAll(asArray(1, 2, 3)); + set.clear(); + assertEquals(0, set.size()); + } + + @Test + public void testRelease() { + set.addAll(asArray(1, 2, 3)); + set.release(); + assertEquals(0, set.size()); + set.addAll(asArray(1, 2, 3)); + assertEquals(3, set.size()); + } + + @Test + public void testIterable() { + set.addAll(asArray(1, 2, 2, 3, 4)); + set.remove(key2); + assertEquals(3, set.size()); + + int count = 0; + for (IntCursor cursor : set) { + count++; + assertTrue(set.contains(cursor.value)); + } + assertEquals(count, set.size()); + + set.clear(); + assertFalse(set.iterator().hasNext()); + } + + /** Runs random insertions/deletions/clearing and compares the results against {@link HashSet}. */ + @Test + @SuppressWarnings({"rawtypes", "unchecked"}) + public void testAgainstHashSet() { + final Random rnd = RandomizedTest.getRandom(); + final HashSet other = new HashSet(); + + for (int size = 1000; size < 20000; size += 4000) { + other.clear(); + set.clear(); + + for (int round = 0; round < size * 20; round++) { + int key = cast(rnd.nextInt(size)); + if (rnd.nextInt(50) == 0) { + key = 0; + } + + if (rnd.nextBoolean()) { + if (rnd.nextBoolean()) { + int index = set.indexOf(key); + if (set.indexExists(index)) { + set.indexReplace(index, key); + } else { + set.indexInsert(index, key); + } + } else { + set.add(key); + } + other.add(key); + + assertTrue(set.contains(key)); + assertTrue(set.indexExists(set.indexOf(key))); + } else { + assertEquals(other.contains(key), set.contains(key)); + boolean removed; + if (set.contains(key) && rnd.nextBoolean()) { + set.indexRemove(set.indexOf(key)); + removed = true; + } else { + removed = set.remove(key); + } + assertEquals(other.remove(key), removed); + } + + assertEquals(other.size(), set.size()); + } + } + } + + @Test + public void testHashCodeEquals() { + IntHashSet l0 = new IntHashSet(); + assertEquals(0, l0.hashCode()); + assertEquals(l0, new IntHashSet()); + + IntHashSet l1 = IntHashSet.from(key1, key2, key3); + IntHashSet l2 = IntHashSet.from(key1, key2); + l2.add(key3); + + assertEquals(l1.hashCode(), l2.hashCode()); + assertEquals(l1, l2); + } + + @Test + public void testClone() { + this.set.addAll(key1, key2, key3); + + IntHashSet cloned = set.clone(); + cloned.remove(key1); + + assertSortedListEquals(set.toArray(), key1, key2, key3); + assertSortedListEquals(cloned.toArray(), key2, key3); + } + + @Test + public void testEqualsSameClass() { + IntHashSet l1 = IntHashSet.from(key1, key2, key3); + IntHashSet l2 = IntHashSet.from(key1, key2, key3); + IntHashSet l3 = IntHashSet.from(key1, key2, key4); + + MatcherAssert.assertThat(l1, is(equalTo(l2))); + MatcherAssert.assertThat(l1.hashCode(), is(equalTo(l2.hashCode()))); + MatcherAssert.assertThat(l1, is(not(equalTo(l3)))); + } + + @Test + public void testEqualsSubClass() { + class Sub extends IntHashSet {} + ; + + IntHashSet l1 = IntHashSet.from(key1, key2, key3); + IntHashSet l2 = new Sub(); + IntHashSet l3 = new Sub(); + l2.addAll(l1); + l3.addAll(l1); + + MatcherAssert.assertThat(l2, is(equalTo(l3))); + MatcherAssert.assertThat(l1, is(not(equalTo(l2)))); + } + + private static int randomIntBetween(int min, int max) { + return min + random().nextInt(max + 1 - min); + } + + private static Set set(int... elements) { + Set set = new HashSet<>(); + for (int element : elements) { + set.add(element); + } + return set; + } + + private static int[] asArray(int... elements) { + return elements; + } + + /** Check if the array's content is identical to a given sequence of elements. */ + private static void assertSortedListEquals(int[] array, int... elements) { + assertEquals(elements.length, array.length); + Arrays.sort(array); + assertArrayEquals(elements, array); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntIntHashMap.java b/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntIntHashMap.java index d31afe49375..f1c036d2458 100644 --- a/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntIntHashMap.java +++ b/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntIntHashMap.java @@ -17,7 +17,9 @@ package org.apache.lucene.util.hppc; +import com.carrotsearch.randomizedtesting.RandomizedTest; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.Random; import java.util.concurrent.atomic.AtomicInteger; @@ -35,51 +37,44 @@ import org.junit.Test; public class TestIntIntHashMap extends LuceneTestCase { /* Ready to use key values. */ - protected int keyE = 0; - protected int key0 = cast(0), k0 = key0; - protected int key1 = cast(1), k1 = key1; - protected int key2 = cast(2), k2 = key2; - protected int key3 = cast(3), k3 = key3; - protected int key4 = cast(4), k4 = key4; - protected int key5 = cast(5), k5 = key5; - protected int key6 = cast(6), k6 = key6; - protected int key7 = cast(7), k7 = key7; - protected int key8 = cast(8), k8 = key8; - protected int key9 = cast(9), k9 = key9; + private final int keyE = 0; + private final int key1 = cast(1); + private final int key2 = cast(2); + private final int key3 = cast(3); + private final int key4 = cast(4); /** Convert to target type from an integer used to test stuff. */ - public int cast(int v) { + private int cast(int v) { return v; } /** Create a new array of a given type and copy the arguments to this array. */ - /* */ - public final int[] newArray(int... elements) { + private int[] newArray(int... elements) { return elements; } - public static int randomIntBetween(int min, int max) { + private static int randomIntBetween(int min, int max) { return min + random().nextInt(max + 1 - min); } /** Check if the array's content is identical to a given sequence of elements. */ - public static void assertSortedListEquals(int[] array, int... elements) { + private static void assertSortedListEquals(int[] array, int... elements) { assertEquals(elements.length, array.length); Arrays.sort(array); Arrays.sort(elements); assertArrayEquals(elements, array); } - protected int value0 = vcast(0); - protected int value1 = vcast(1); - protected int value2 = vcast(2); - protected int value3 = vcast(3); - protected int value4 = vcast(4); + private final int value0 = vcast(0); + private final int value1 = vcast(1); + private final int value2 = vcast(2); + private final int value3 = vcast(3); + private final int value4 = vcast(4); /** Per-test fresh initialized instance. */ - public IntIntHashMap map = newInstance(); + private IntIntHashMap map = newInstance(); - protected IntIntHashMap newInstance() { + private IntIntHashMap newInstance() { return new IntIntHashMap(); } @@ -101,13 +96,12 @@ public class TestIntIntHashMap extends LuceneTestCase { } /** Convert to target type from an integer used to test stuff. */ - protected int vcast(int value) { + private int vcast(int value) { return value; } /** Create a new array of a given type and copy the arguments to this array. */ - /* */ - protected final int[] newvArray(int... elements) { + private int[] newvArray(int... elements) { return elements; } @@ -180,7 +174,6 @@ public class TestIntIntHashMap extends LuceneTestCase { AssertionError.class, () -> { map.indexGet(map.indexOf(key2)); - fail(); }); assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3)); @@ -342,6 +335,15 @@ public class TestIntIntHashMap extends LuceneTestCase { map.remove(empty); assertEquals(0, map.get(empty)); + assertEquals(0, map.size()); + + assertEquals(0, map.put(empty, value1)); + assertEquals(value1, map.put(empty, value2)); + map.clear(); + assertFalse(map.indexExists(map.indexOf(empty))); + assertEquals(0, map.put(empty, value1)); + map.clear(); + assertEquals(0, map.remove(empty)); } /* */ @@ -380,6 +382,11 @@ public class TestIntIntHashMap extends LuceneTestCase { // These are internals, but perhaps worth asserting too. assertEquals(0, map.assigned); + // Check values are cleared. + assertEquals(0, map.put(key1, value1)); + assertEquals(0, map.remove(key2)); + map.clear(); + // Check if the map behaves properly upon subsequent use. testPutWithExpansions(); } @@ -455,13 +462,13 @@ public class TestIntIntHashMap extends LuceneTestCase { assertEquals(reallocationsBefore, reallocations.get()); // Should not expand because we're replacing an existing element. - map.put(k1, value2); + map.put(key1, value2); assertEquals(reallocationsBefore, reallocations.get()); // Remove from a full map. - map.remove(k1); + map.remove(key1); assertEquals(reallocationsBefore, reallocations.get()); - map.put(k1, value2); + map.put(key1, value2); // Check expand on "last slot of a full map" condition. map.put(outOfSet, value1); @@ -499,6 +506,61 @@ public class TestIntIntHashMap extends LuceneTestCase { assertFalse(l2.equals(l1)); } + /** Runs random insertions/deletions/clearing and compares the results against {@link HashMap}. */ + @Test + @SuppressWarnings({"rawtypes", "unchecked"}) + public void testAgainstHashMap() { + final Random rnd = RandomizedTest.getRandom(); + final HashMap other = new HashMap(); + + for (int size = 1000; size < 20000; size += 4000) { + other.clear(); + map.clear(); + + for (int round = 0; round < size * 20; round++) { + int key = cast(rnd.nextInt(size)); + if (rnd.nextInt(50) == 0) { + key = 0; + } + + int value = vcast(rnd.nextInt()); + + boolean hadOldValue = map.containsKey(key); + if (rnd.nextBoolean()) { + int previousValue; + if (rnd.nextBoolean()) { + int index = map.indexOf(key); + if (map.indexExists(index)) { + previousValue = map.indexReplace(index, value); + } else { + map.indexInsert(index, key, value); + previousValue = 0; + } + } else { + previousValue = map.put(key, value); + } + assertEquals( + other.put(key, value), ((previousValue) == 0) && !hadOldValue ? null : previousValue); + + assertEquals(value, map.get(key)); + assertEquals(value, map.indexGet(map.indexOf(key))); + assertTrue(map.containsKey(key)); + assertTrue(map.indexExists(map.indexOf(key))); + } else { + assertEquals(other.containsKey(key), map.containsKey(key)); + int previousValue = + map.containsKey(key) && rnd.nextBoolean() + ? map.indexRemove(map.indexOf(key)) + : map.remove(key); + assertEquals( + other.remove(key), ((previousValue) == 0) && !hadOldValue ? null : previousValue); + } + + assertEquals(other.size(), map.size()); + } + } + } + /* * */ @@ -549,16 +611,16 @@ public class TestIntIntHashMap extends LuceneTestCase { @Test public void testEqualsSameClass() { IntIntHashMap l1 = newInstance(); - l1.put(k1, value0); - l1.put(k2, value1); - l1.put(k3, value2); + l1.put(key1, value0); + l1.put(key2, value1); + l1.put(key3, value2); IntIntHashMap l2 = new IntIntHashMap(l1); l2.putAll(l1); IntIntHashMap l3 = new IntIntHashMap(l2); l3.putAll(l2); - l3.put(k4, value0); + l3.put(key4, value0); assertEquals(l2, l1); assertEquals(l2.hashCode(), l1.hashCode()); @@ -571,13 +633,13 @@ public class TestIntIntHashMap extends LuceneTestCase { class Sub extends IntIntHashMap {} IntIntHashMap l1 = newInstance(); - l1.put(k1, value0); - l1.put(k2, value1); - l1.put(k3, value2); + l1.put(key1, value0); + l1.put(key2, value1); + l1.put(key3, value2); IntIntHashMap l2 = new Sub(); l2.putAll(l1); - l2.put(k4, value3); + l2.put(key4, value3); IntIntHashMap l3 = new Sub(); l3.putAll(l2); diff --git a/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntObjectHashMap.java b/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntObjectHashMap.java index abfba94f573..e450e17f7cb 100644 --- a/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntObjectHashMap.java +++ b/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntObjectHashMap.java @@ -17,7 +17,9 @@ package org.apache.lucene.util.hppc; +import com.carrotsearch.randomizedtesting.RandomizedTest; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.Random; import java.util.concurrent.atomic.AtomicInteger; @@ -36,35 +38,28 @@ import org.junit.Test; public class TestIntObjectHashMap extends LuceneTestCase { /* Ready to use key values. */ - protected int keyE = 0; - protected int key0 = cast(0), k0 = key0; - protected int key1 = cast(1), k1 = key1; - protected int key2 = cast(2), k2 = key2; - protected int key3 = cast(3), k3 = key3; - protected int key4 = cast(4), k4 = key4; - protected int key5 = cast(5), k5 = key5; - protected int key6 = cast(6), k6 = key6; - protected int key7 = cast(7), k7 = key7; - protected int key8 = cast(8), k8 = key8; - protected int key9 = cast(9), k9 = key9; + private final int keyE = 0; + private final int key1 = cast(1); + private final int key2 = cast(2); + private final int key3 = cast(3); + private final int key4 = cast(4); /** Convert to target type from an integer used to test stuff. */ - public int cast(int v) { + private int cast(int v) { return v; } /** Create a new array of a given type and copy the arguments to this array. */ - /* */ - public final int[] newArray(int... elements) { + private int[] newArray(int... elements) { return elements; } - public static int randomIntBetween(int min, int max) { + private static int randomIntBetween(int min, int max) { return min + random().nextInt(max + 1 - min); } /** Check if the array's content is identical to a given sequence of elements. */ - public static void assertSortedListEquals(int[] array, int... elements) { + private static void assertSortedListEquals(int[] array, int... elements) { assertEquals(elements.length, array.length); Arrays.sort(array); Arrays.sort(elements); @@ -72,22 +67,22 @@ public class TestIntObjectHashMap extends LuceneTestCase { } /** Check if the array's content is identical to a given sequence of elements. */ - public static void assertSortedListEquals(Object[] array, Object... elements) { + private static void assertSortedListEquals(Object[] array, Object... elements) { assertEquals(elements.length, array.length); Arrays.sort(array); assertArrayEquals(elements, array); } - protected int value0 = vcast(0); - protected int value1 = vcast(1); - protected int value2 = vcast(2); - protected int value3 = vcast(3); - protected int value4 = vcast(4); + private final int value0 = vcast(0); + private final int value1 = vcast(1); + private final int value2 = vcast(2); + private final int value3 = vcast(3); + private final int value4 = vcast(4); /** Per-test fresh initialized instance. */ - public IntObjectHashMap map = newInstance(); + private IntObjectHashMap map = newInstance(); - protected IntObjectHashMap newInstance() { + private IntObjectHashMap newInstance() { return new IntObjectHashMap(); } @@ -109,13 +104,13 @@ public class TestIntObjectHashMap extends LuceneTestCase { } /** Convert to target type from an integer used to test stuff. */ - protected int vcast(int value) { + private int vcast(int value) { return value; } /** Create a new array of a given type and copy the arguments to this array. */ /* */ - protected final Object[] newvArray(Object... elements) { + private Object[] newvArray(Object... elements) { return elements; } @@ -188,7 +183,6 @@ public class TestIntObjectHashMap extends LuceneTestCase { AssertionError.class, () -> { map.indexGet(map.indexOf(key2)); - fail(); }); assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3)); @@ -353,6 +347,7 @@ public class TestIntObjectHashMap extends LuceneTestCase { map.remove(empty); assertEquals(null, map.get(empty)); + assertEquals(0, map.size()); map.put(empty, null); assertEquals(1, map.size()); @@ -363,6 +358,14 @@ public class TestIntObjectHashMap extends LuceneTestCase { assertEquals(0, map.size()); assertFalse(map.containsKey(empty)); assertNull(map.get(empty)); + + assertEquals(null, map.put(empty, value1)); + assertEquals(value1, map.put(empty, value2)); + map.clear(); + assertFalse(map.indexExists(map.indexOf(empty))); + assertEquals(null, map.put(empty, value1)); + map.clear(); + assertEquals(null, map.remove(empty)); } /* */ @@ -401,6 +404,11 @@ public class TestIntObjectHashMap extends LuceneTestCase { // These are internals, but perhaps worth asserting too. assertEquals(0, map.assigned); + // Check values are cleared. + assertEquals(null, map.put(key1, value1)); + assertEquals(null, map.remove(key2)); + map.clear(); + // Check if the map behaves properly upon subsequent use. testPutWithExpansions(); } @@ -476,13 +484,13 @@ public class TestIntObjectHashMap extends LuceneTestCase { assertEquals(reallocationsBefore, reallocations.get()); // Should not expand because we're replacing an existing element. - map.put(k1, value2); + map.put(key1, value2); assertEquals(reallocationsBefore, reallocations.get()); // Remove from a full map. - map.remove(k1); + map.remove(key1); assertEquals(reallocationsBefore, reallocations.get()); - map.put(k1, value2); + map.put(key1, value2); // Check expand on "last slot of a full map" condition. map.put(outOfSet, value1); @@ -520,6 +528,58 @@ public class TestIntObjectHashMap extends LuceneTestCase { assertFalse(l2.equals(l1)); } + /** Runs random insertions/deletions/clearing and compares the results against {@link HashMap}. */ + @Test + @SuppressWarnings({"rawtypes", "unchecked"}) + public void testAgainstHashMap() { + final Random rnd = RandomizedTest.getRandom(); + final HashMap other = new HashMap(); + + for (int size = 1000; size < 20000; size += 4000) { + other.clear(); + map.clear(); + + for (int round = 0; round < size * 20; round++) { + int key = cast(rnd.nextInt(size)); + if (rnd.nextInt(50) == 0) { + key = 0; + } + + int value = vcast(rnd.nextInt()); + + if (rnd.nextBoolean()) { + Object previousValue; + if (rnd.nextBoolean()) { + int index = map.indexOf(key); + if (map.indexExists(index)) { + previousValue = map.indexReplace(index, value); + } else { + map.indexInsert(index, key, value); + previousValue = null; + } + } else { + previousValue = map.put(key, value); + } + assertEquals(other.put(key, value), previousValue); + + assertEquals(value, map.get(key)); + assertEquals(value, map.indexGet(map.indexOf(key))); + assertTrue(map.containsKey(key)); + assertTrue(map.indexExists(map.indexOf(key))); + } else { + assertEquals(other.containsKey(key), map.containsKey(key)); + Object previousValue = + map.containsKey(key) && rnd.nextBoolean() + ? map.indexRemove(map.indexOf(key)) + : map.remove(key); + assertEquals(other.remove(key), previousValue); + } + + assertEquals(other.size(), map.size()); + } + } + } + /* * */ @@ -570,16 +630,16 @@ public class TestIntObjectHashMap extends LuceneTestCase { @Test public void testEqualsSameClass() { IntObjectHashMap l1 = newInstance(); - l1.put(k1, value0); - l1.put(k2, value1); - l1.put(k3, value2); + l1.put(key1, value0); + l1.put(key2, value1); + l1.put(key3, value2); IntObjectHashMap l2 = new IntObjectHashMap(l1); l2.putAll(l1); IntObjectHashMap l3 = new IntObjectHashMap(l2); l3.putAll(l2); - l3.put(k4, value0); + l3.put(key4, value0); assertEquals(l2, l1); assertEquals(l2.hashCode(), l1.hashCode()); @@ -592,13 +652,13 @@ public class TestIntObjectHashMap extends LuceneTestCase { class Sub extends IntObjectHashMap {} IntObjectHashMap l1 = newInstance(); - l1.put(k1, value0); - l1.put(k2, value1); - l1.put(k3, value2); + l1.put(key1, value0); + l1.put(key2, value1); + l1.put(key3, value2); IntObjectHashMap l2 = new Sub(); l2.putAll(l1); - l2.put(k4, value3); + l2.put(key4, value3); IntObjectHashMap l3 = new Sub(); l3.putAll(l2); diff --git a/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongHashSet.java b/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongHashSet.java new file mode 100644 index 00000000000..14ea848ecb9 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongHashSet.java @@ -0,0 +1,464 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util.hppc; + +import static org.hamcrest.Matchers.*; + +import com.carrotsearch.randomizedtesting.RandomizedTest; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.hamcrest.MatcherAssert; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests for {@link LongHashSet}. + * + *

Mostly forked and trimmed from com.carrotsearch.hppc.LongHashSetTest + * + *

github: https://github.com/carrotsearch/hppc release: 0.9.0 + */ +public class TestLongHashSet extends LuceneTestCase { + private static final long EMPTY_KEY = 0L; + + private final long keyE = 0; + private final long key1 = cast(1); + private final long key2 = cast(2); + private final long key3 = cast(3); + private final long key4 = cast(4); + + /** Per-test fresh initialized instance. */ + private LongHashSet set; + + /** Convert to target type from an integer used to test stuff. */ + private long cast(int v) { + return v; + } + + @Before + public void initialize() { + set = new LongHashSet(); + } + + @Test + public void testAddAllViaInterface() { + set.addAll(key1, key2); + + LongHashSet iface = new LongHashSet(); + iface.clear(); + iface.addAll(set); + MatcherAssert.assertThat(set(iface.toArray()), is(equalTo(set(key1, key2)))); + } + + @Test + public void testIndexMethods() { + set.add(keyE); + set.add(key1); + + MatcherAssert.assertThat(set.indexOf(keyE), is(greaterThanOrEqualTo(0))); + MatcherAssert.assertThat(set.indexOf(key1), is(greaterThanOrEqualTo(0))); + MatcherAssert.assertThat(set.indexOf(key2), is(lessThan(0))); + + MatcherAssert.assertThat(set.indexExists(set.indexOf(keyE)), is(true)); + MatcherAssert.assertThat(set.indexExists(set.indexOf(key1)), is(true)); + MatcherAssert.assertThat(set.indexExists(set.indexOf(key2)), is(false)); + + MatcherAssert.assertThat(set.indexGet(set.indexOf(keyE)), is(equalTo(keyE))); + MatcherAssert.assertThat(set.indexGet(set.indexOf(key1)), is(equalTo(key1))); + + expectThrows( + AssertionError.class, + () -> { + set.indexGet(set.indexOf(key2)); + }); + + MatcherAssert.assertThat(set.indexReplace(set.indexOf(keyE), keyE), is(equalTo(keyE))); + MatcherAssert.assertThat(set.indexReplace(set.indexOf(key1), key1), is(equalTo(key1))); + + set.indexInsert(set.indexOf(key2), key2); + MatcherAssert.assertThat(set.indexGet(set.indexOf(key2)), is(equalTo(key2))); + MatcherAssert.assertThat(set.size(), is(equalTo(3))); + + set.indexRemove(set.indexOf(keyE)); + MatcherAssert.assertThat(set.size(), is(equalTo(2))); + set.indexRemove(set.indexOf(key2)); + MatcherAssert.assertThat(set.size(), is(equalTo(1))); + MatcherAssert.assertThat(set.indexOf(keyE), is(lessThan(0))); + MatcherAssert.assertThat(set.indexOf(key1), is(greaterThanOrEqualTo(0))); + MatcherAssert.assertThat(set.indexOf(key2), is(lessThan(0))); + } + + @Test + public void testCursorIndexIsValid() { + set.add(keyE); + set.add(key1); + set.add(key2); + + for (LongCursor c : set) { + MatcherAssert.assertThat(set.indexExists(c.index), is(true)); + MatcherAssert.assertThat(set.indexGet(c.index), is(equalTo(c.value))); + } + } + + @Test + public void testEmptyKey() { + LongHashSet set = new LongHashSet(); + + boolean b = set.add(EMPTY_KEY); + + MatcherAssert.assertThat(b, is(true)); + MatcherAssert.assertThat(set.add(EMPTY_KEY), is(false)); + MatcherAssert.assertThat(set.size(), is(equalTo(1))); + MatcherAssert.assertThat(set.isEmpty(), is(false)); + MatcherAssert.assertThat(set(set.toArray()), is(equalTo(set(EMPTY_KEY)))); + MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(true)); + int index = set.indexOf(EMPTY_KEY); + MatcherAssert.assertThat(set.indexExists(index), is(true)); + MatcherAssert.assertThat(set.indexGet(index), is(equalTo(EMPTY_KEY))); + MatcherAssert.assertThat(set.indexReplace(index, EMPTY_KEY), is(equalTo(EMPTY_KEY))); + + if (random().nextBoolean()) { + b = set.remove(EMPTY_KEY); + MatcherAssert.assertThat(b, is(true)); + } else { + set.indexRemove(index); + } + + MatcherAssert.assertThat(set.size(), is(equalTo(0))); + MatcherAssert.assertThat(set.isEmpty(), is(true)); + MatcherAssert.assertThat(set(set.toArray()), is(empty())); + MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(false)); + index = set.indexOf(EMPTY_KEY); + MatcherAssert.assertThat(set.indexExists(index), is(false)); + + set.indexInsert(index, EMPTY_KEY); + set.add(key1); + MatcherAssert.assertThat(set.size(), is(equalTo(2))); + MatcherAssert.assertThat(set.contains(EMPTY_KEY), is(true)); + index = set.indexOf(EMPTY_KEY); + MatcherAssert.assertThat(set.indexExists(index), is(true)); + MatcherAssert.assertThat(set.indexGet(index), is(equalTo(EMPTY_KEY))); + } + + @Test + public void testEnsureCapacity() { + final AtomicInteger expands = new AtomicInteger(); + LongHashSet set = + new LongHashSet(0) { + @Override + protected void allocateBuffers(int arraySize) { + super.allocateBuffers(arraySize); + expands.incrementAndGet(); + } + }; + + // Add some elements. + final int max = rarely() ? 0 : randomIntBetween(0, 250); + for (int i = 0; i < max; i++) { + set.add(cast(i)); + } + + final int additions = randomIntBetween(max, max + 5000); + set.ensureCapacity(additions + set.size()); + final int before = expands.get(); + for (int i = 0; i < additions; i++) { + set.add(cast(i)); + } + assertEquals(before, expands.get()); + } + + @Test + public void testInitiallyEmpty() { + assertEquals(0, set.size()); + } + + @Test + public void testAdd() { + assertTrue(set.add(key1)); + assertFalse(set.add(key1)); + assertEquals(1, set.size()); + } + + @Test + public void testAdd2() { + set.addAll(key1, key1); + assertEquals(1, set.size()); + assertEquals(1, set.addAll(key1, key2)); + assertEquals(2, set.size()); + } + + @Test + public void testAddVarArgs() { + set.addAll(asArray(0, 1, 2, 1, 0)); + assertEquals(3, set.size()); + assertSortedListEquals(set.toArray(), 0, 1, 2); + } + + @Test + public void testAddAll() { + LongHashSet set2 = new LongHashSet(); + set2.addAll(asArray(1, 2)); + set.addAll(asArray(0, 1)); + + assertEquals(1, set.addAll(set2)); + assertEquals(0, set.addAll(set2)); + + assertEquals(3, set.size()); + assertSortedListEquals(set.toArray(), 0, 1, 2); + } + + @Test + public void testRemove() { + set.addAll(asArray(0, 1, 2, 3, 4)); + + assertTrue(set.remove(key2)); + assertFalse(set.remove(key2)); + assertEquals(4, set.size()); + assertSortedListEquals(set.toArray(), 0, 1, 3, 4); + } + + @Test + public void testInitialCapacityAndGrowth() { + for (int i = 0; i < 256; i++) { + LongHashSet set = new LongHashSet(i); + + for (int j = 0; j < i; j++) { + set.add(cast(j)); + } + + assertEquals(i, set.size()); + } + } + + @Test + public void testBug_HPPC73_FullCapacityGet() { + final AtomicInteger reallocations = new AtomicInteger(); + final int elements = 0x7F; + set = + new LongHashSet(elements, 1f) { + @Override + protected double verifyLoadFactor(double loadFactor) { + // Skip load factor sanity range checking. + return loadFactor; + } + + @Override + protected void allocateBuffers(int arraySize) { + super.allocateBuffers(arraySize); + reallocations.incrementAndGet(); + } + }; + + int reallocationsBefore = reallocations.get(); + assertEquals(reallocationsBefore, 1); + for (int i = 1; i <= elements; i++) { + set.add(cast(i)); + } + + // Non-existent key. + long outOfSet = cast(elements + 1); + set.remove(outOfSet); + assertFalse(set.contains(outOfSet)); + assertEquals(reallocationsBefore, reallocations.get()); + + // Should not expand because we're replacing an existing element. + assertFalse(set.add(key1)); + assertEquals(reallocationsBefore, reallocations.get()); + + // Remove from a full set. + set.remove(key1); + assertEquals(reallocationsBefore, reallocations.get()); + set.add(key1); + + // Check expand on "last slot of a full map" condition. + set.add(outOfSet); + assertEquals(reallocationsBefore + 1, reallocations.get()); + } + + @Test + public void testRemoveAllFromLookupContainer() { + set.addAll(asArray(0, 1, 2, 3, 4)); + + LongHashSet list2 = new LongHashSet(); + list2.addAll(asArray(1, 3, 5)); + + assertEquals(2, set.removeAll(list2)); + assertEquals(3, set.size()); + assertSortedListEquals(set.toArray(), 0, 2, 4); + } + + @Test + public void testClear() { + set.addAll(asArray(1, 2, 3)); + set.clear(); + assertEquals(0, set.size()); + } + + @Test + public void testRelease() { + set.addAll(asArray(1, 2, 3)); + set.release(); + assertEquals(0, set.size()); + set.addAll(asArray(1, 2, 3)); + assertEquals(3, set.size()); + } + + @Test + public void testIterable() { + set.addAll(asArray(1, 2, 2, 3, 4)); + set.remove(key2); + assertEquals(3, set.size()); + + int count = 0; + for (LongCursor cursor : set) { + count++; + assertTrue(set.contains(cursor.value)); + } + assertEquals(count, set.size()); + + set.clear(); + assertFalse(set.iterator().hasNext()); + } + + /** Runs random insertions/deletions/clearing and compares the results against {@link HashSet}. */ + @Test + @SuppressWarnings({"rawtypes", "unchecked"}) + public void testAgainstHashSet() { + final Random rnd = RandomizedTest.getRandom(); + final HashSet other = new HashSet(); + + for (int size = 1000; size < 20000; size += 4000) { + other.clear(); + set.clear(); + + for (int round = 0; round < size * 20; round++) { + long key = cast(rnd.nextInt(size)); + if (rnd.nextInt(50) == 0) { + key = 0L; + } + + if (rnd.nextBoolean()) { + if (rnd.nextBoolean()) { + int index = set.indexOf(key); + if (set.indexExists(index)) { + set.indexReplace(index, key); + } else { + set.indexInsert(index, key); + } + } else { + set.add(key); + } + other.add(key); + + assertTrue(set.contains(key)); + assertTrue(set.indexExists(set.indexOf(key))); + } else { + assertEquals(other.contains(key), set.contains(key)); + boolean removed; + if (set.contains(key) && rnd.nextBoolean()) { + set.indexRemove(set.indexOf(key)); + removed = true; + } else { + removed = set.remove(key); + } + assertEquals(other.remove(key), removed); + } + + assertEquals(other.size(), set.size()); + } + } + } + + @Test + public void testHashCodeEquals() { + LongHashSet l0 = new LongHashSet(); + assertEquals(0, l0.hashCode()); + assertEquals(l0, new LongHashSet()); + + LongHashSet l1 = LongHashSet.from(key1, key2, key3); + LongHashSet l2 = LongHashSet.from(key1, key2); + l2.add(key3); + + assertEquals(l1.hashCode(), l2.hashCode()); + assertEquals(l1, l2); + } + + @Test + public void testClone() { + this.set.addAll(key1, key2, key3); + + LongHashSet cloned = set.clone(); + cloned.remove(key1); + + assertSortedListEquals(set.toArray(), key1, key2, key3); + assertSortedListEquals(cloned.toArray(), key2, key3); + } + + @Test + public void testEqualsSameClass() { + LongHashSet l1 = LongHashSet.from(key1, key2, key3); + LongHashSet l2 = LongHashSet.from(key1, key2, key3); + LongHashSet l3 = LongHashSet.from(key1, key2, key4); + + MatcherAssert.assertThat(l1, is(equalTo(l2))); + MatcherAssert.assertThat(l1.hashCode(), is(equalTo(l2.hashCode()))); + MatcherAssert.assertThat(l1, is(not(equalTo(l3)))); + } + + @Test + public void testEqualsSubClass() { + class Sub extends LongHashSet {} + ; + + LongHashSet l1 = LongHashSet.from(key1, key2, key3); + LongHashSet l2 = new Sub(); + LongHashSet l3 = new Sub(); + l2.addAll(l1); + l3.addAll(l1); + + MatcherAssert.assertThat(l2, is(equalTo(l3))); + MatcherAssert.assertThat(l1, is(not(equalTo(l2)))); + } + + private static int randomIntBetween(int min, int max) { + return min + random().nextInt(max + 1 - min); + } + + private static Set set(long... elements) { + Set set = new HashSet<>(); + for (long element : elements) { + set.add(element); + } + return set; + } + + private static long[] asArray(long... elements) { + return elements; + } + + /** Check if the array's content is identical to a given sequence of elements. */ + private static void assertSortedListEquals(long[] array, long... elements) { + assertEquals(elements.length, array.length); + Arrays.sort(array); + assertArrayEquals(elements, array); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongIntHashMap.java b/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongIntHashMap.java index 7fe5561de1e..4dddb081ea5 100644 --- a/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongIntHashMap.java +++ b/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongIntHashMap.java @@ -17,7 +17,9 @@ package org.apache.lucene.util.hppc; +import com.carrotsearch.randomizedtesting.RandomizedTest; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.Random; import java.util.concurrent.atomic.AtomicInteger; @@ -35,35 +37,28 @@ import org.junit.Test; public class TestLongIntHashMap extends LuceneTestCase { /* Ready to use key values. */ - protected long keyE = 0; - protected long key0 = cast(0), k0 = key0; - protected long key1 = cast(1), k1 = key1; - protected long key2 = cast(2), k2 = key2; - protected long key3 = cast(3), k3 = key3; - protected long key4 = cast(4), k4 = key4; - protected long key5 = cast(5), k5 = key5; - protected long key6 = cast(6), k6 = key6; - protected long key7 = cast(7), k7 = key7; - protected long key8 = cast(8), k8 = key8; - protected long key9 = cast(9), k9 = key9; + private final long keyE = 0; + private final long key1 = cast(1); + private final long key2 = cast(2); + private final long key3 = cast(3); + private final long key4 = cast(4); /** Convert to target type from an integer used to test stuff. */ - public long cast(int v) { + private long cast(int v) { return v; } /** Create a new array of a given type and copy the arguments to this array. */ - /* */ - public final long[] newArray(long... elements) { + private long[] newArray(long... elements) { return elements; } - public static int randomIntBetween(int min, int max) { + private static int randomIntBetween(int min, int max) { return min + random().nextInt(max + 1 - min); } /** Check if the array's content is identical to a given sequence of elements. */ - public static void assertSortedListEquals(long[] array, long... elements) { + private static void assertSortedListEquals(long[] array, long... elements) { assertEquals(elements.length, array.length); Arrays.sort(array); Arrays.sort(elements); @@ -71,23 +66,23 @@ public class TestLongIntHashMap extends LuceneTestCase { } /** Check if the array's content is identical to a given sequence of elements. */ - public static void assertSortedListEquals(int[] array, int... elements) { + private static void assertSortedListEquals(int[] array, int... elements) { assertEquals(elements.length, array.length); Arrays.sort(array); Arrays.sort(elements); assertArrayEquals(elements, array); } - protected int value0 = vcast(0); - protected int value1 = vcast(1); - protected int value2 = vcast(2); - protected int value3 = vcast(3); - protected int value4 = vcast(4); + private final int value0 = vcast(0); + private final int value1 = vcast(1); + private final int value2 = vcast(2); + private final int value3 = vcast(3); + private final int value4 = vcast(4); /** Per-test fresh initialized instance. */ - public LongIntHashMap map = newInstance(); + private LongIntHashMap map = newInstance(); - protected LongIntHashMap newInstance() { + private LongIntHashMap newInstance() { return new LongIntHashMap(); } @@ -109,13 +104,13 @@ public class TestLongIntHashMap extends LuceneTestCase { } /** Convert to target type from an integer used to test stuff. */ - protected int vcast(int value) { + private int vcast(int value) { return value; } /** Create a new array of a given type and copy the arguments to this array. */ /* */ - protected final int[] newvArray(int... elements) { + private int[] newvArray(int... elements) { return elements; } @@ -188,7 +183,6 @@ public class TestLongIntHashMap extends LuceneTestCase { AssertionError.class, () -> { map.indexGet(map.indexOf(key2)); - fail(); }); assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3)); @@ -350,6 +344,15 @@ public class TestLongIntHashMap extends LuceneTestCase { map.remove(empty); assertEquals(0, map.get(empty)); + assertEquals(0, map.size()); + + assertEquals(0, map.put(empty, value1)); + assertEquals(value1, map.put(empty, value2)); + map.clear(); + assertFalse(map.indexExists(map.indexOf(empty))); + assertEquals(0, map.put(empty, value1)); + map.clear(); + assertEquals(0, map.remove(empty)); } /* */ @@ -388,6 +391,11 @@ public class TestLongIntHashMap extends LuceneTestCase { // These are internals, but perhaps worth asserting too. assertEquals(0, map.assigned); + // Check values are cleared. + assertEquals(0, map.put(key1, value1)); + assertEquals(0, map.remove(key2)); + map.clear(); + // Check if the map behaves properly upon subsequent use. testPutWithExpansions(); } @@ -463,13 +471,13 @@ public class TestLongIntHashMap extends LuceneTestCase { assertEquals(reallocationsBefore, reallocations.get()); // Should not expand because we're replacing an existing element. - map.put(k1, value2); + map.put(key1, value2); assertEquals(reallocationsBefore, reallocations.get()); // Remove from a full map. - map.remove(k1); + map.remove(key1); assertEquals(reallocationsBefore, reallocations.get()); - map.put(k1, value2); + map.put(key1, value2); // Check expand on "last slot of a full map" condition. map.put(outOfSet, value1); @@ -507,6 +515,61 @@ public class TestLongIntHashMap extends LuceneTestCase { assertFalse(l2.equals(l1)); } + /** Runs random insertions/deletions/clearing and compares the results against {@link HashMap}. */ + @Test + @SuppressWarnings({"rawtypes", "unchecked"}) + public void testAgainstHashMap() { + final Random rnd = RandomizedTest.getRandom(); + final HashMap other = new HashMap(); + + for (int size = 1000; size < 20000; size += 4000) { + other.clear(); + map.clear(); + + for (int round = 0; round < size * 20; round++) { + long key = cast(rnd.nextInt(size)); + if (rnd.nextInt(50) == 0) { + key = 0; + } + + int value = vcast(rnd.nextInt()); + + boolean hadOldValue = map.containsKey(key); + if (rnd.nextBoolean()) { + int previousValue; + if (rnd.nextBoolean()) { + int index = map.indexOf(key); + if (map.indexExists(index)) { + previousValue = map.indexReplace(index, value); + } else { + map.indexInsert(index, key, value); + previousValue = 0; + } + } else { + previousValue = map.put(key, value); + } + assertEquals( + other.put(key, value), ((previousValue) == 0) && !hadOldValue ? null : previousValue); + + assertEquals(value, map.get(key)); + assertEquals(value, map.indexGet(map.indexOf(key))); + assertTrue(map.containsKey(key)); + assertTrue(map.indexExists(map.indexOf(key))); + } else { + assertEquals(other.containsKey(key), map.containsKey(key)); + int previousValue = + map.containsKey(key) && rnd.nextBoolean() + ? map.indexRemove(map.indexOf(key)) + : map.remove(key); + assertEquals( + other.remove(key), ((previousValue) == 0) && !hadOldValue ? null : previousValue); + } + + assertEquals(other.size(), map.size()); + } + } + } + /* * */ @@ -557,16 +620,16 @@ public class TestLongIntHashMap extends LuceneTestCase { @Test public void testEqualsSameClass() { LongIntHashMap l1 = newInstance(); - l1.put(k1, value0); - l1.put(k2, value1); - l1.put(k3, value2); + l1.put(key1, value0); + l1.put(key2, value1); + l1.put(key3, value2); LongIntHashMap l2 = new LongIntHashMap(l1); l2.putAll(l1); LongIntHashMap l3 = new LongIntHashMap(l2); l3.putAll(l2); - l3.put(k4, value0); + l3.put(key4, value0); assertEquals(l2, l1); assertEquals(l2.hashCode(), l1.hashCode()); @@ -579,13 +642,13 @@ public class TestLongIntHashMap extends LuceneTestCase { class Sub extends LongIntHashMap {} LongIntHashMap l1 = newInstance(); - l1.put(k1, value0); - l1.put(k2, value1); - l1.put(k3, value2); + l1.put(key1, value0); + l1.put(key2, value1); + l1.put(key3, value2); LongIntHashMap l2 = new Sub(); l2.putAll(l1); - l2.put(k4, value3); + l2.put(key4, value3); LongIntHashMap l3 = new Sub(); l3.putAll(l2); diff --git a/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongObjectHashMap.java b/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongObjectHashMap.java index d047b3d1091..7d368eed8d3 100644 --- a/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongObjectHashMap.java +++ b/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongObjectHashMap.java @@ -17,7 +17,9 @@ package org.apache.lucene.util.hppc; +import com.carrotsearch.randomizedtesting.RandomizedTest; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.Random; import java.util.concurrent.atomic.AtomicInteger; @@ -36,35 +38,28 @@ import org.junit.Test; public class TestLongObjectHashMap extends LuceneTestCase { /* Ready to use key values. */ - protected long keyE = 0; - protected long key0 = cast(0), k0 = key0; - protected long key1 = cast(1), k1 = key1; - protected long key2 = cast(2), k2 = key2; - protected long key3 = cast(3), k3 = key3; - protected long key4 = cast(4), k4 = key4; - protected long key5 = cast(5), k5 = key5; - protected long key6 = cast(6), k6 = key6; - protected long key7 = cast(7), k7 = key7; - protected long key8 = cast(8), k8 = key8; - protected long key9 = cast(9), k9 = key9; + private final long keyE = 0; + private final long key1 = cast(1); + private final long key2 = cast(2); + private final long key3 = cast(3); + private final long key4 = cast(4); /** Convert to target type from an integer used to test stuff. */ - public long cast(int v) { + private long cast(int v) { return v; } /** Create a new array of a given type and copy the arguments to this array. */ - /* */ - public final long[] newArray(long... elements) { + private long[] newArray(long... elements) { return elements; } - public static int randomIntBetween(int min, int max) { + private static int randomIntBetween(int min, int max) { return min + random().nextInt(max + 1 - min); } /** Check if the array's content is identical to a given sequence of elements. */ - public static void assertSortedListEquals(long[] array, long... elements) { + private static void assertSortedListEquals(long[] array, long... elements) { assertEquals(elements.length, array.length); Arrays.sort(array); Arrays.sort(elements); @@ -72,22 +67,22 @@ public class TestLongObjectHashMap extends LuceneTestCase { } /** Check if the array's content is identical to a given sequence of elements. */ - public static void assertSortedListEquals(Object[] array, Object... elements) { + private static void assertSortedListEquals(Object[] array, Object... elements) { assertEquals(elements.length, array.length); Arrays.sort(array); assertArrayEquals(elements, array); } - protected int value0 = vcast(0); - protected int value1 = vcast(1); - protected int value2 = vcast(2); - protected int value3 = vcast(3); - protected int value4 = vcast(4); + private final int value0 = vcast(0); + private final int value1 = vcast(1); + private final int value2 = vcast(2); + private final int value3 = vcast(3); + private final int value4 = vcast(4); /** Per-test fresh initialized instance. */ - public LongObjectHashMap map = newInstance(); + private LongObjectHashMap map = newInstance(); - protected LongObjectHashMap newInstance() { + private LongObjectHashMap newInstance() { return new LongObjectHashMap(); } @@ -109,13 +104,13 @@ public class TestLongObjectHashMap extends LuceneTestCase { } /** Convert to target type from an integer used to test stuff. */ - protected int vcast(int value) { + private int vcast(int value) { return value; } /** Create a new array of a given type and copy the arguments to this array. */ /* */ - protected final Object[] newvArray(Object... elements) { + private Object[] newvArray(Object... elements) { return elements; } @@ -189,7 +184,6 @@ public class TestLongObjectHashMap extends LuceneTestCase { AssertionError.class, () -> { map.indexGet(map.indexOf(key2)); - fail(); }); assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3)); @@ -354,6 +348,7 @@ public class TestLongObjectHashMap extends LuceneTestCase { map.remove(empty); assertEquals(null, map.get(empty)); + assertEquals(0, map.size()); map.put(empty, null); assertEquals(1, map.size()); @@ -364,6 +359,14 @@ public class TestLongObjectHashMap extends LuceneTestCase { assertEquals(0, map.size()); assertFalse(map.containsKey(empty)); assertNull(map.get(empty)); + + assertEquals(null, map.put(empty, value1)); + assertEquals(value1, map.put(empty, value2)); + map.clear(); + assertFalse(map.indexExists(map.indexOf(empty))); + assertEquals(null, map.put(empty, value1)); + map.clear(); + assertEquals(null, map.remove(empty)); } /* */ @@ -402,6 +405,11 @@ public class TestLongObjectHashMap extends LuceneTestCase { // These are internals, but perhaps worth asserting too. assertEquals(0, map.assigned); + // Check values are cleared. + assertEquals(null, map.put(key1, value1)); + assertEquals(null, map.remove(key2)); + map.clear(); + // Check if the map behaves properly upon subsequent use. testPutWithExpansions(); } @@ -477,13 +485,13 @@ public class TestLongObjectHashMap extends LuceneTestCase { assertEquals(reallocationsBefore, reallocations.get()); // Should not expand because we're replacing an existing element. - map.put(k1, value2); + map.put(key1, value2); assertEquals(reallocationsBefore, reallocations.get()); // Remove from a full map. - map.remove(k1); + map.remove(key1); assertEquals(reallocationsBefore, reallocations.get()); - map.put(k1, value2); + map.put(key1, value2); // Check expand on "last slot of a full map" condition. map.put(outOfSet, value1); @@ -521,6 +529,58 @@ public class TestLongObjectHashMap extends LuceneTestCase { assertFalse(l2.equals(l1)); } + /** Runs random insertions/deletions/clearing and compares the results against {@link HashMap}. */ + @Test + @SuppressWarnings({"rawtypes", "unchecked"}) + public void testAgainstHashMap() { + final Random rnd = RandomizedTest.getRandom(); + final HashMap other = new HashMap(); + + for (int size = 1000; size < 20000; size += 4000) { + other.clear(); + map.clear(); + + for (int round = 0; round < size * 20; round++) { + long key = cast(rnd.nextInt(size)); + if (rnd.nextInt(50) == 0) { + key = 0; + } + + int value = vcast(rnd.nextInt()); + + if (rnd.nextBoolean()) { + Object previousValue; + if (rnd.nextBoolean()) { + int index = map.indexOf(key); + if (map.indexExists(index)) { + previousValue = map.indexReplace(index, value); + } else { + map.indexInsert(index, key, value); + previousValue = null; + } + } else { + previousValue = map.put(key, value); + } + assertEquals(other.put(key, value), previousValue); + + assertEquals(value, map.get(key)); + assertEquals(value, map.indexGet(map.indexOf(key))); + assertTrue(map.containsKey(key)); + assertTrue(map.indexExists(map.indexOf(key))); + } else { + assertEquals(other.containsKey(key), map.containsKey(key)); + Object previousValue = + map.containsKey(key) && rnd.nextBoolean() + ? map.indexRemove(map.indexOf(key)) + : map.remove(key); + assertEquals(other.remove(key), previousValue); + } + + assertEquals(other.size(), map.size()); + } + } + } + /* * */ @@ -571,16 +631,16 @@ public class TestLongObjectHashMap extends LuceneTestCase { @Test public void testEqualsSameClass() { LongObjectHashMap l1 = newInstance(); - l1.put(k1, value0); - l1.put(k2, value1); - l1.put(k3, value2); + l1.put(key1, value0); + l1.put(key2, value1); + l1.put(key3, value2); LongObjectHashMap l2 = new LongObjectHashMap(l1); l2.putAll(l1); LongObjectHashMap l3 = new LongObjectHashMap(l2); l3.putAll(l2); - l3.put(k4, value0); + l3.put(key4, value0); assertEquals(l2, l1); assertEquals(l2.hashCode(), l1.hashCode()); @@ -593,13 +653,13 @@ public class TestLongObjectHashMap extends LuceneTestCase { class Sub extends LongObjectHashMap {} LongObjectHashMap l1 = newInstance(); - l1.put(k1, value0); - l1.put(k2, value1); - l1.put(k3, value2); + l1.put(key1, value0); + l1.put(key2, value1); + l1.put(key3, value2); LongObjectHashMap l2 = new Sub(); l2.putAll(l1); - l2.put(k4, value3); + l2.put(key4, value3); LongObjectHashMap l3 = new Sub(); l3.putAll(l2); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java index 9fe4b15b907..d0120d47ec8 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java @@ -18,12 +18,10 @@ package org.apache.lucene.facet; import java.io.IOException; import java.util.ArrayList; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Set; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; @@ -33,6 +31,8 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.hppc.IntCursor; +import org.apache.lucene.util.hppc.IntHashSet; /** * A {@link Query} for drill-down over facet categories. You should call {@link #add(String, @@ -56,7 +56,7 @@ public final class DrillDownQuery extends Query { private final List dimQueries = new ArrayList<>(); private final Map drillDownDims = new LinkedHashMap<>(); private final List builtDimQueries = new ArrayList<>(); - private final Set dirtyDimQueryIndex = new HashSet<>(); + private final IntHashSet dirtyDimQueryIndex = new IntHashSet(); /** Used by clone() and DrillSideways */ DrillDownQuery( @@ -202,8 +202,8 @@ public final class DrillDownQuery extends Query { * @return The array of dimQueries */ public Query[] getDrillDownQueries() { - for (Integer dirtyDimIndex : dirtyDimQueryIndex) { - builtDimQueries.set(dirtyDimIndex, this.dimQueries.get(dirtyDimIndex).build()); + for (IntCursor dirtyDimIndex : dirtyDimQueryIndex) { + builtDimQueries.set(dirtyDimIndex.value, this.dimQueries.get(dirtyDimIndex.value).build()); } dirtyDimQueryIndex.clear(); diff --git a/lucene/queryparser/src/generated/checksums/javaccParserClassic.json b/lucene/queryparser/src/generated/checksums/javaccParserClassic.json index b7e86a62517..eeb0d527027 100644 --- a/lucene/queryparser/src/generated/checksums/javaccParserClassic.json +++ b/lucene/queryparser/src/generated/checksums/javaccParserClassic.json @@ -1,9 +1,9 @@ { "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java": "7a8a8fd5b2ea78f9a17f54cbae8b0e4496e8988e", - "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java": "49bf5362c3f41a1f398284f05eede08fceec6d78", - "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj": "c9584bbe50c3c7479f72ea84145ebbf034a201ea", + "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java": "380daae0f6e27b3872d117fc4aef955b1e4296ca", + "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj": "968452b64449655b035fffb45944086c3032732b", "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserConstants.java": "e59a3fd38b66a3d56779c55955c1e014225a1f50", - "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java": "56d191d6f3033dd554efcb38f536b5f7df2f1e06", + "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java": "dc99a1083bfa50e429d40e114fabe7dd5d434693", "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java": "310665ba37d982327fcb55cc3523d629ef29ef54", "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java": "7e2dd6ab7489048bb70f3077ca9fed90f925ec33" } \ No newline at end of file diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java index c0ecc8912d5..9299377a908 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java @@ -4,11 +4,8 @@ package org.apache.lucene.queryparser.classic; import java.io.StringReader; import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; import java.util.List; import java.util.Locale; -import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.DateTools; @@ -17,6 +14,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.queryparser.charstream.CharStream; import org.apache.lucene.queryparser.charstream.FastCharStream; +import org.apache.lucene.util.hppc.IntHashSet; /** * This class is generated by JavaCC. The most important method is @@ -143,8 +141,8 @@ import org.apache.lucene.queryparser.charstream.FastCharStream; } private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE; - private static Set disallowedPostMultiTerm - = new HashSet(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR)); + private static IntHashSet disallowedPostMultiTerm + = IntHashSet.from(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR); private static boolean allowedPostMultiTerm(int tokenKind) { return disallowedPostMultiTerm.contains(tokenKind) == false; } @@ -708,19 +706,35 @@ if (splitOnWhitespace == false) { finally { jj_save(2, xla); } } - private boolean jj_3R_MultiTerm_391_3_6() + private boolean jj_3R_MultiTerm_381_3_3() + { + if (jj_scan_token(TERM)) return true; + jj_lookingAhead = true; + jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind); + jj_lookingAhead = false; + if (!jj_semLA || jj_3R_MultiTerm_389_3_6()) return true; + Token xsp; + if (jj_3R_MultiTerm_391_5_7()) return true; + while (true) { + xsp = jj_scanpos; + if (jj_3R_MultiTerm_391_5_7()) { jj_scanpos = xsp; break; } + } + return false; + } + + private boolean jj_3R_MultiTerm_389_3_6() { return false; } - private boolean jj_3R_Clause_308_9_5() + private boolean jj_3R_Clause_306_9_5() { if (jj_scan_token(STAR)) return true; if (jj_scan_token(COLON)) return true; return false; } - private boolean jj_3R_Clause_307_7_4() + private boolean jj_3R_Clause_305_7_4() { if (jj_scan_token(TERM)) return true; if (jj_scan_token(COLON)) return true; @@ -729,17 +743,17 @@ if (splitOnWhitespace == false) { private boolean jj_3_2() { - if (jj_3R_MultiTerm_383_3_3()) return true; + if (jj_3R_MultiTerm_381_3_3()) return true; return false; } private boolean jj_3_1() { - if (jj_3R_MultiTerm_383_3_3()) return true; + if (jj_3R_MultiTerm_381_3_3()) return true; return false; } - private boolean jj_3R_MultiTerm_393_5_7() + private boolean jj_3R_MultiTerm_391_5_7() { if (jj_scan_token(TERM)) return true; return false; @@ -749,25 +763,9 @@ if (splitOnWhitespace == false) { { Token xsp; xsp = jj_scanpos; - if (jj_3R_Clause_307_7_4()) { + if (jj_3R_Clause_305_7_4()) { jj_scanpos = xsp; - if (jj_3R_Clause_308_9_5()) return true; - } - return false; - } - - private boolean jj_3R_MultiTerm_383_3_3() - { - if (jj_scan_token(TERM)) return true; - jj_lookingAhead = true; - jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind); - jj_lookingAhead = false; - if (!jj_semLA || jj_3R_MultiTerm_391_3_6()) return true; - Token xsp; - if (jj_3R_MultiTerm_393_5_7()) return true; - while (true) { - xsp = jj_scanpos; - if (jj_3R_MultiTerm_393_5_7()) { jj_scanpos = xsp; break; } + if (jj_3R_Clause_306_9_5()) return true; } return false; } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj index 11ba876b332..af558d556ca 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj @@ -27,11 +27,8 @@ package org.apache.lucene.queryparser.classic; import java.io.StringReader; import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; import java.util.List; import java.util.Locale; -import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.DateTools; @@ -40,6 +37,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.queryparser.charstream.CharStream; import org.apache.lucene.queryparser.charstream.FastCharStream; +import org.apache.lucene.util.hppc.IntHashSet; /** * This class is generated by JavaCC. The most important method is @@ -166,8 +164,8 @@ public class QueryParser extends QueryParserBase { } private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE; - private static Set disallowedPostMultiTerm - = new HashSet(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR)); + private static IntHashSet disallowedPostMultiTerm + = IntHashSet.from(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR); private static boolean allowedPostMultiTerm(int tokenKind) { return disallowedPostMultiTerm.contains(tokenKind) == false; } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java index eabb0bc7e45..fb172fa6e36 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java @@ -15,8 +15,6 @@ package org.apache.lucene.queryparser.classic; - - /** Token Manager. */ @SuppressWarnings ("unused") public class QueryParserTokenManager implements QueryParserConstants { diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java index 924f8d91ab6..999cf97832e 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java @@ -17,10 +17,9 @@ package org.apache.lucene.search.suggest.document; import java.io.IOException; +import java.util.Arrays; import java.util.HashMap; -import java.util.Iterator; import java.util.Map; -import java.util.TreeSet; import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.QueryVisitor; @@ -37,6 +36,7 @@ import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.fst.Util; +import org.apache.lucene.util.hppc.IntHashSet; /** * A {@link CompletionQuery} that matches documents specified by a wrapped {@link CompletionQuery} @@ -200,21 +200,29 @@ public class ContextQuery extends CompletionQuery implements Accountable { Operations.determinize(contextsAutomaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); final Map contextMap = CollectionUtil.newHashMap(contexts.size()); - final TreeSet contextLengths = new TreeSet<>(); + final IntHashSet contextLengths = new IntHashSet(); for (Map.Entry entry : contexts.entrySet()) { ContextMetaData contextMetaData = entry.getValue(); contextMap.put(entry.getKey(), contextMetaData.boost); contextLengths.add(entry.getKey().length); } - int[] contextLengthArray = new int[contextLengths.size()]; - final Iterator iterator = contextLengths.descendingIterator(); - for (int i = 0; iterator.hasNext(); i++) { - contextLengthArray[i] = iterator.next(); - } + int[] contextLengthArray = contextLengths.toArray(); + sortDescending(contextLengthArray); return new ContextCompletionWeight( this, contextsAutomaton, innerWeight, contextMap, contextLengthArray); } + /** Sorts and reverses the array. */ + private static void sortDescending(int[] array) { + Arrays.sort(array); + for (int i = 0, midLength = array.length / 2, last = array.length - 1; i < midLength; i++) { + int swapIndex = last - i; + int tmp = array[i]; + array[i] = array[swapIndex]; + array[swapIndex] = tmp; + } + } + private static Automaton toContextAutomaton( final Map contexts, final boolean matchAllContexts) { final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());