From f70999980c7d80443aa9e2fcded9c3cbb48638e5 Mon Sep 17 00:00:00 2001 From: Bruno Roustant <33934988+bruno-roustant@users.noreply.github.com> Date: Tue, 21 May 2024 17:11:34 +0200 Subject: [PATCH] Replace Map by primitive LongObjectHashMap. (#13392) Add LongObjectHashMap and replace Map. Add LongIntHashMap and replace Map. Add HPPC dependency to join and spatial modules for primitive values float and double. --- lucene/CHANGES.txt | 2 + .../analysis/cn/smart/hhmm/BiSegGraph.java | 4 +- .../lucene90/Lucene90DocValuesConsumer.java | 9 +- .../org/apache/lucene/index/IndexWriter.java | 16 +- .../apache/lucene/index/SegmentDocValues.java | 6 +- .../lucene/util/hppc/HashContainers.java | 45 + .../lucene/util/hppc/IntIntHashMap.java | 33 +- .../lucene/util/hppc/IntObjectHashMap.java | 62 +- .../apache/lucene/util/hppc/LongCursor.java | 35 + .../lucene/util/hppc/LongIntHashMap.java | 900 ++++++++++++++++++ .../lucene/util/hppc/LongObjectHashMap.java | 869 +++++++++++++++++ .../apache/lucene/util/hppc/ObjectCursor.java | 35 + .../lucene/util/hppc/TestIntIntHashMap.java | 47 +- .../util/hppc/TestIntObjectHashMap.java | 75 +- .../lucene/util/hppc/TestLongIntHashMap.java | 597 ++++++++++++ .../util/hppc/TestLongObjectHashMap.java | 611 ++++++++++++ .../range/OverlappingLongRangeCounter.java | 28 +- lucene/join/build.gradle | 2 + lucene/join/src/java/module-info.java | 2 + ...versifyingNearestChildrenKnnCollector.java | 13 +- .../apache/lucene/search/join/JoinUtil.java | 101 +- .../search/DiversifiedTopDocsCollector.java | 7 +- .../sandbox/search/TermAutomatonQuery.java | 28 +- lucene/spatial-extras/build.gradle | 2 + .../spatial-extras/src/java/module-info.java | 1 + .../util/CachingDoubleValueSource.java | 15 +- 26 files changed, 3280 insertions(+), 265 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/util/hppc/HashContainers.java create mode 100644 lucene/core/src/java/org/apache/lucene/util/hppc/LongCursor.java create mode 100644 lucene/core/src/java/org/apache/lucene/util/hppc/LongIntHashMap.java create mode 100644 lucene/core/src/java/org/apache/lucene/util/hppc/LongObjectHashMap.java create mode 100644 lucene/core/src/java/org/apache/lucene/util/hppc/ObjectCursor.java create mode 100644 lucene/core/src/test/org/apache/lucene/util/hppc/TestLongIntHashMap.java create mode 100644 lucene/core/src/test/org/apache/lucene/util/hppc/TestLongObjectHashMap.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a41ec633834..7a390dc990d 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -346,6 +346,8 @@ Optimizations * GITHUB#13368: Replace Map by primitive IntObjectHashMap. (Bruno Roustant) +* GITHUB#13392: Replace Map by primitive LongObjectHashMap. (Bruno Roustant) + Bug Fixes --------------------- diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java index 548d8d41360..a57f6cfc20d 100644 --- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java +++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.cn.smart.Utility; import org.apache.lucene.util.hppc.IntObjectHashMap; +import org.apache.lucene.util.hppc.ObjectCursor; /** * Graph representing possible token pairs (bigrams) at each start offset in the sentence. @@ -218,8 +219,7 @@ class BiSegGraph { @Override public String toString() { StringBuilder sb = new StringBuilder(); - for (IntObjectHashMap.ObjectCursor> segList : - tokenPairListTable.values()) { + for (ObjectCursor> segList : tokenPairListTable.values()) { for (SegTokenPair pair : segList.value) { sb.append(pair).append("\n"); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java index c3d170447ae..7c6a1b0a584 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java @@ -22,9 +22,7 @@ import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_ import java.io.IOException; import java.util.Arrays; -import java.util.HashMap; import java.util.HashSet; -import java.util.Map; import java.util.Set; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesConsumer; @@ -54,6 +52,7 @@ import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.MathUtil; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.compress.LZ4; +import org.apache.lucene.util.hppc.LongIntHashMap; import org.apache.lucene.util.packed.DirectMonotonicWriter; import org.apache.lucene.util.packed.DirectWriter; @@ -273,7 +272,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer { meta.writeLong(numValues); final int numBitsPerValue; boolean doBlocks = false; - Map encode = null; + LongIntHashMap encode = null; if (min >= max) { // meta[-1]: All values are 0 numBitsPerValue = 0; meta.writeInt(-1); // tablesize @@ -289,7 +288,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer { for (Long v : sortedUniqueValues) { meta.writeLong(v); // table[] entry } - encode = new HashMap<>(); + encode = new LongIntHashMap(); for (int i = 0; i < sortedUniqueValues.length; ++i) { encode.put(sortedUniqueValues[i], i); } @@ -339,7 +338,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer { int numBitsPerValue, long min, long gcd, - Map encode) + LongIntHashMap encode) throws IOException { DirectWriter writer = DirectWriter.getInstance(data, numValues, numBitsPerValue); for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 3cbf433e174..65e318a57c9 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -90,6 +90,8 @@ import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.Version; +import org.apache.lucene.util.hppc.LongObjectHashMap; +import org.apache.lucene.util.hppc.ObjectCursor; /** * An IndexWriter creates and maintains an index. @@ -4379,7 +4381,7 @@ public class IndexWriter final ReadersAndUpdates mergedDeletesAndUpdates = getPooledInstance(merge.info, true); int numDeletesBefore = mergedDeletesAndUpdates.getDelCount(); // field -> delGen -> dv field updates - Map> mappedDVUpdates = new HashMap<>(); + Map> mappedDVUpdates = new HashMap<>(); boolean anyDVUpdates = false; @@ -4412,9 +4414,9 @@ public class IndexWriter String field = ent.getKey(); - Map mappedField = mappedDVUpdates.get(field); + LongObjectHashMap mappedField = mappedDVUpdates.get(field); if (mappedField == null) { - mappedField = new HashMap<>(); + mappedField = new LongObjectHashMap<>(); mappedDVUpdates.put(field, mappedField); } @@ -4470,10 +4472,10 @@ public class IndexWriter if (anyDVUpdates) { // Persist the merged DV updates onto the RAU for the merged segment: - for (Map d : mappedDVUpdates.values()) { - for (DocValuesFieldUpdates updates : d.values()) { - updates.finish(); - mergedDeletesAndUpdates.addDVUpdate(updates); + for (LongObjectHashMap d : mappedDVUpdates.values()) { + for (ObjectCursor updates : d.values()) { + updates.value.finish(); + mergedDeletesAndUpdates.addDVUpdate(updates.value); } } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java index 42e3daa3944..52340aa97b3 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java @@ -17,15 +17,14 @@ package org.apache.lucene.index; import java.io.IOException; -import java.util.HashMap; import java.util.List; -import java.util.Map; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RefCount; +import org.apache.lucene.util.hppc.LongObjectHashMap; /** * Manages the {@link DocValuesProducer} held by {@link SegmentReader} and keeps track of their @@ -33,7 +32,8 @@ import org.apache.lucene.util.RefCount; */ final class SegmentDocValues { - private final Map> genDVProducers = new HashMap<>(); + private final LongObjectHashMap> genDVProducers = + new LongObjectHashMap<>(); private RefCount newDocValuesProducer( SegmentCommitInfo si, Directory dir, final Long gen, FieldInfos infos) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/HashContainers.java b/lucene/core/src/java/org/apache/lucene/util/hppc/HashContainers.java new file mode 100644 index 00000000000..f54abd34e73 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/HashContainers.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util.hppc; + +import java.util.concurrent.atomic.AtomicInteger; + +/** Constants for primitive maps. */ +public class HashContainers { + + public static final int DEFAULT_EXPECTED_ELEMENTS = 4; + + public static final float DEFAULT_LOAD_FACTOR = 0.75f; + + /** Minimal sane load factor (99 empty slots per 100). */ + public static final float MIN_LOAD_FACTOR = 1 / 100.0f; + + /** Maximum sane load factor (1 empty slot per 100). */ + public static final float MAX_LOAD_FACTOR = 99 / 100.0f; + + /** Minimum hash buffer size. */ + public static final int MIN_HASH_ARRAY_LENGTH = 4; + + /** + * Maximum array size for hash containers (power-of-two and still allocable in Java, not a + * negative int). + */ + public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1; + + static final AtomicInteger ITERATION_SEED = new AtomicInteger(); +} diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/IntIntHashMap.java b/lucene/core/src/java/org/apache/lucene/util/hppc/IntIntHashMap.java index 3d37e306a71..856f7870968 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hppc/IntIntHashMap.java +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/IntIntHashMap.java @@ -18,10 +18,12 @@ package org.apache.lucene.util.hppc; import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo; +import static org.apache.lucene.util.hppc.HashContainers.*; import java.util.Arrays; import java.util.Iterator; -import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; /** * A hash map of int to int, implemented using open addressing with linear @@ -31,28 +33,10 @@ import java.util.concurrent.atomic.AtomicInteger; * *

github: https://github.com/carrotsearch/hppc release 0.9.0 */ -public class IntIntHashMap implements Iterable, Cloneable { +public class IntIntHashMap implements Iterable, Accountable, Cloneable { - public static final int DEFAULT_EXPECTED_ELEMENTS = 4; - - public static final float DEFAULT_LOAD_FACTOR = 0.75f; - - private static final AtomicInteger ITERATION_SEED = new AtomicInteger(); - - /** Minimal sane load factor (99 empty slots per 100). */ - public static final float MIN_LOAD_FACTOR = 1 / 100.0f; - - /** Maximum sane load factor (1 empty slot per 100). */ - public static final float MAX_LOAD_FACTOR = 99 / 100.0f; - - /** Minimum hash buffer size. */ - public static final int MIN_HASH_ARRAY_LENGTH = 4; - - /** - * Maximum array size for hash containers (power-of-two and still allocable in Java, not a - * negative int). - */ - public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1; + private static final long BASE_RAM_BYTES_USED = + RamUsageEstimator.shallowSizeOfInstance(IntIntHashMap.class); /** The array holding keys. */ public int[] keys; @@ -463,6 +447,11 @@ public class IntIntHashMap implements Iterable, Clon return iterationSeed = BitMixer.mixPhi(iterationSeed); } + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys) + RamUsageEstimator.sizeOf(values); + } + /** An iterator implementation for {@link #iterator}. */ private final class EntryIterator extends AbstractIterator { private final IntIntCursor cursor; diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/IntObjectHashMap.java b/lucene/core/src/java/org/apache/lucene/util/hppc/IntObjectHashMap.java index f063a0e7f09..003f9835175 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hppc/IntObjectHashMap.java +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/IntObjectHashMap.java @@ -18,14 +18,16 @@ package org.apache.lucene.util.hppc; import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo; +import static org.apache.lucene.util.hppc.HashContainers.*; import java.util.Arrays; import java.util.Iterator; -import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; /** * A hash map of int to Object, implemented using open addressing with - * linear probing for collision resolution. + * linear probing for collision resolution. Supports null values. * *

Mostly forked and trimmed from com.carrotsearch.hppc.IntObjectHashMap * @@ -33,28 +35,10 @@ import java.util.concurrent.atomic.AtomicInteger; */ @SuppressWarnings("unchecked") public class IntObjectHashMap - implements Iterable>, Cloneable { + implements Iterable>, Accountable, Cloneable { - public static final int DEFAULT_EXPECTED_ELEMENTS = 4; - - public static final float DEFAULT_LOAD_FACTOR = 0.75f; - - private static final AtomicInteger ITERATION_SEED = new AtomicInteger(); - - /** Minimal sane load factor (99 empty slots per 100). */ - public static final float MIN_LOAD_FACTOR = 1 / 100.0f; - - /** Maximum sane load factor (1 empty slot per 100). */ - public static final float MAX_LOAD_FACTOR = 99 / 100.0f; - - /** Minimum hash buffer size. */ - public static final int MIN_HASH_ARRAY_LENGTH = 4; - - /** - * Maximum array size for hash containers (power-of-two and still allocable in Java, not a - * negative int). - */ - public static final int MAX_HASH_ARRAY_LENGTH = 0x80000000 >>> 1; + private static final long BASE_RAM_BYTES_USED = + RamUsageEstimator.shallowSizeOfInstance(IntObjectHashMap.class); /** The array holding keys. */ public int[] keys; @@ -304,7 +288,7 @@ public class IntObjectHashMap return (VType) values[index]; } - public VType indexReplace(int index, int newValue) { + public VType indexReplace(int index, VType newValue) { assert index >= 0 : "The index must point at an existing key."; assert index <= mask || (index == mask + 1 && hasEmptyKey); @@ -436,6 +420,19 @@ public class IntObjectHashMap return new EntryIterator(); } + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys) + sizeOfValues(); + } + + private long sizeOfValues() { + long size = RamUsageEstimator.shallowSizeOf(values); + for (ObjectCursor value : values()) { + size += RamUsageEstimator.sizeOfObject(value); + } + return size; + } + /** An iterator implementation for {@link #iterator}. */ private final class EntryIterator extends AbstractIterator> { private final IntObjectCursor cursor; @@ -869,21 +866,4 @@ public class IntObjectHashMap return "[cursor, index: " + index + ", key: " + key + ", value: " + value + "]"; } } - - /** Forked from HPPC, holding int index and Object value */ - public static final class ObjectCursor { - /** - * The current value's index in the container this cursor belongs to. The meaning of this index - * is defined by the container (usually it will be an index in the underlying storage buffer). - */ - public int index; - - /** The current value. */ - public VType value; - - @Override - public String toString() { - return "[cursor, index: " + index + ", value: " + value + "]"; - } - } } diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/LongCursor.java b/lucene/core/src/java/org/apache/lucene/util/hppc/LongCursor.java new file mode 100644 index 00000000000..2a424254306 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/LongCursor.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util.hppc; + +/** Forked from HPPC, holding int index and long value */ +public final class LongCursor { + /** + * The current value's index in the container this cursor belongs to. The meaning of this index is + * defined by the container (usually it will be an index in the underlying storage buffer). + */ + public int index; + + /** The current value. */ + public long value; + + @Override + public String toString() { + return "[cursor, index: " + index + ", value: " + value + "]"; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/LongIntHashMap.java b/lucene/core/src/java/org/apache/lucene/util/hppc/LongIntHashMap.java new file mode 100644 index 00000000000..6c28f6b947e --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/LongIntHashMap.java @@ -0,0 +1,900 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util.hppc; + +import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo; +import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_EXPECTED_ELEMENTS; +import static org.apache.lucene.util.hppc.HashContainers.DEFAULT_LOAD_FACTOR; +import static org.apache.lucene.util.hppc.HashContainers.ITERATION_SEED; +import static org.apache.lucene.util.hppc.HashContainers.MAX_HASH_ARRAY_LENGTH; +import static org.apache.lucene.util.hppc.HashContainers.MAX_LOAD_FACTOR; +import static org.apache.lucene.util.hppc.HashContainers.MIN_HASH_ARRAY_LENGTH; +import static org.apache.lucene.util.hppc.HashContainers.MIN_LOAD_FACTOR; + +import java.util.Arrays; +import java.util.Iterator; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; + +/** + * A hash map of long to int, implemented using open addressing with + * linear probing for collision resolution. + * + *

Mostly forked and trimmed from com.carrotsearch.hppc.LongIntHashMap + * + *

github: https://github.com/carrotsearch/hppc release 0.9.0 + */ +public class LongIntHashMap + implements Iterable, Accountable, Cloneable { + + private static final long BASE_RAM_BYTES_USED = + RamUsageEstimator.shallowSizeOfInstance(LongIntHashMap.class); + + /** The array holding keys. */ + public long[] keys; + + /** The array holding values. */ + public int[] values; + + /** + * The number of stored keys (assigned key slots), excluding the special "empty" key, if any (use + * {@link #size()} instead). + * + * @see #size() + */ + protected int assigned; + + /** Mask for slot scans in {@link #keys}. */ + protected int mask; + + /** Expand (rehash) {@link #keys} when {@link #assigned} hits this value. */ + protected int resizeAt; + + /** Special treatment for the "empty slot" key marker. */ + protected boolean hasEmptyKey; + + /** The load factor for {@link #keys}. */ + protected double loadFactor; + + /** Seed used to ensure the hash iteration order is different from an iteration to another. */ + protected int iterationSeed; + + /** New instance with sane defaults. */ + public LongIntHashMap() { + this(DEFAULT_EXPECTED_ELEMENTS); + } + + /** + * New instance with sane defaults. + * + * @param expectedElements The expected number of elements guaranteed not to cause buffer + * expansion (inclusive). + */ + public LongIntHashMap(int expectedElements) { + this(expectedElements, DEFAULT_LOAD_FACTOR); + } + + /** + * New instance with the provided defaults. + * + * @param expectedElements The expected number of elements guaranteed not to cause a rehash + * (inclusive). + * @param loadFactor The load factor for internal buffers. Insane load factors (zero, full + * capacity) are rejected by {@link #verifyLoadFactor(double)}. + */ + public LongIntHashMap(int expectedElements, double loadFactor) { + this.loadFactor = verifyLoadFactor(loadFactor); + iterationSeed = ITERATION_SEED.incrementAndGet(); + ensureCapacity(expectedElements); + } + + /** Create a hash map from all key-value pairs of another container. */ + public LongIntHashMap(Iterable container) { + this(); + putAll(container); + } + + public int put(long key, int value) { + assert assigned < mask + 1; + + final int mask = this.mask; + if (((key) == 0)) { + hasEmptyKey = true; + int previousValue = values[mask + 1]; + values[mask + 1] = value; + return previousValue; + } else { + final long[] keys = this.keys; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((existing) == (key))) { + final int previousValue = values[slot]; + values[slot] = value; + return previousValue; + } + slot = (slot + 1) & mask; + } + + if (assigned == resizeAt) { + allocateThenInsertThenRehash(slot, key, value); + } else { + keys[slot] = key; + values[slot] = value; + } + + assigned++; + return 0; + } + } + + public int putAll(Iterable iterable) { + final int count = size(); + for (LongIntCursor c : iterable) { + put(c.key, c.value); + } + return size() - count; + } + + /** + * Trove-inspired API method. An equivalent of the + * following code: + * + *

+   * if (!map.containsKey(key)) map.put(value);
+   * 
+ * + * @param key The key of the value to check. + * @param value The value to put if key does not exist. + * @return true if key did not exist and value was placed + * in the map. + */ + public boolean putIfAbsent(long key, int value) { + int keyIndex = indexOf(key); + if (!indexExists(keyIndex)) { + indexInsert(keyIndex, key, value); + return true; + } else { + return false; + } + } + + /** + * If key exists, putValue is inserted into the map, otherwise any + * existing value is incremented by additionValue. + * + * @param key The key of the value to adjust. + * @param putValue The value to put if key does not exist. + * @param incrementValue The value to add to the existing value if key exists. + * @return Returns the current value associated with key (after changes). + */ + public int putOrAdd(long key, int putValue, int incrementValue) { + assert assigned < mask + 1; + + int keyIndex = indexOf(key); + if (indexExists(keyIndex)) { + putValue = values[keyIndex] + incrementValue; + indexReplace(keyIndex, putValue); + } else { + indexInsert(keyIndex, key, putValue); + } + return putValue; + } + + /** + * Adds incrementValue to any existing value for the given key or + * inserts incrementValue if key did not previously exist. + * + * @param key The key of the value to adjust. + * @param incrementValue The value to put or add to the existing value if key exists. + * @return Returns the current value associated with key (after changes). + */ + public int addTo(long key, int incrementValue) { + return putOrAdd(key, incrementValue, incrementValue); + } + + public int remove(long key) { + final int mask = this.mask; + if (((key) == 0)) { + hasEmptyKey = false; + int previousValue = values[mask + 1]; + values[mask + 1] = 0; + return previousValue; + } else { + final long[] keys = this.keys; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((existing) == (key))) { + final int previousValue = values[slot]; + shiftConflictingKeys(slot); + return previousValue; + } + slot = (slot + 1) & mask; + } + + return 0; + } + } + + public int get(long key) { + if (((key) == 0)) { + return hasEmptyKey ? values[mask + 1] : 0; + } else { + final long[] keys = this.keys; + final int mask = this.mask; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((existing) == (key))) { + return values[slot]; + } + slot = (slot + 1) & mask; + } + + return 0; + } + } + + public int getOrDefault(long key, int defaultValue) { + if (((key) == 0)) { + return hasEmptyKey ? values[mask + 1] : defaultValue; + } else { + final long[] keys = this.keys; + final int mask = this.mask; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((existing) == (key))) { + return values[slot]; + } + slot = (slot + 1) & mask; + } + + return defaultValue; + } + } + + public boolean containsKey(long key) { + if (((key) == 0)) { + return hasEmptyKey; + } else { + final long[] keys = this.keys; + final int mask = this.mask; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((existing) == (key))) { + return true; + } + slot = (slot + 1) & mask; + } + + return false; + } + } + + public int indexOf(long key) { + final int mask = this.mask; + if (((key) == 0)) { + return hasEmptyKey ? mask + 1 : ~(mask + 1); + } else { + final long[] keys = this.keys; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((existing) == (key))) { + return slot; + } + slot = (slot + 1) & mask; + } + + return ~slot; + } + } + + public boolean indexExists(int index) { + assert index < 0 || (index >= 0 && index <= mask) || (index == mask + 1 && hasEmptyKey); + + return index >= 0; + } + + public int indexGet(int index) { + assert index >= 0 : "The index must point at an existing key."; + assert index <= mask || (index == mask + 1 && hasEmptyKey); + + return values[index]; + } + + public int indexReplace(int index, int newValue) { + assert index >= 0 : "The index must point at an existing key."; + assert index <= mask || (index == mask + 1 && hasEmptyKey); + + int previousValue = values[index]; + values[index] = newValue; + return previousValue; + } + + public void indexInsert(int index, long key, int value) { + assert index < 0 : "The index must not point at an existing key."; + + index = ~index; + if (((key) == 0)) { + assert index == mask + 1; + values[index] = value; + hasEmptyKey = true; + } else { + assert ((keys[index]) == 0); + + if (assigned == resizeAt) { + allocateThenInsertThenRehash(index, key, value); + } else { + keys[index] = key; + values[index] = value; + } + + assigned++; + } + } + + public int indexRemove(int index) { + assert index >= 0 : "The index must point at an existing key."; + assert index <= mask || (index == mask + 1 && hasEmptyKey); + + int previousValue = values[index]; + if (index > mask) { + hasEmptyKey = false; + values[index] = 0; + } else { + shiftConflictingKeys(index); + } + return previousValue; + } + + public void clear() { + assigned = 0; + hasEmptyKey = false; + + Arrays.fill(keys, 0); + + /* */ + } + + public void release() { + assigned = 0; + hasEmptyKey = false; + + keys = null; + values = null; + ensureCapacity(DEFAULT_EXPECTED_ELEMENTS); + } + + public int size() { + return assigned + (hasEmptyKey ? 1 : 0); + } + + public boolean isEmpty() { + return size() == 0; + } + + @Override + public int hashCode() { + int h = hasEmptyKey ? 0xDEADBEEF : 0; + for (LongIntCursor c : this) { + h += BitMixer.mix(c.key) + BitMixer.mix(c.value); + } + return h; + } + + @Override + public boolean equals(Object obj) { + return obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)); + } + + /** Return true if all keys of some other container exist in this container. */ + protected boolean equalElements(LongIntHashMap other) { + if (other.size() != size()) { + return false; + } + + for (LongIntCursor c : other) { + long key = c.key; + if (!containsKey(key) || !((get(key)) == (c.value))) { + return false; + } + } + + return true; + } + + /** + * Ensure this container can hold at least the given number of keys (entries) without resizing its + * buffers. + * + * @param expectedElements The total number of keys, inclusive. + */ + public void ensureCapacity(int expectedElements) { + if (expectedElements > resizeAt || keys == null) { + final long[] prevKeys = this.keys; + final int[] prevValues = this.values; + allocateBuffers(minBufferSize(expectedElements, loadFactor)); + if (prevKeys != null && !isEmpty()) { + rehash(prevKeys, prevValues); + } + } + } + + /** + * Provides the next iteration seed used to build the iteration starting slot and offset + * increment. This method does not need to be synchronized, what matters is that each thread gets + * a sequence of varying seeds. + */ + protected int nextIterationSeed() { + return iterationSeed = BitMixer.mixPhi(iterationSeed); + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys) + RamUsageEstimator.sizeOf(values); + } + + /** An iterator implementation for {@link #iterator}. */ + private final class EntryIterator extends AbstractIterator { + private final LongIntCursor cursor; + private final int increment; + private int index; + private int slot; + + public EntryIterator() { + cursor = new LongIntCursor(); + int seed = nextIterationSeed(); + increment = iterationIncrement(seed); + slot = seed & mask; + } + + @Override + protected LongIntCursor fetch() { + final int mask = LongIntHashMap.this.mask; + while (index <= mask) { + long existing; + index++; + slot = (slot + increment) & mask; + if (!((existing = keys[slot]) == 0)) { + cursor.index = slot; + cursor.key = existing; + cursor.value = values[slot]; + return cursor; + } + } + + if (index == mask + 1 && hasEmptyKey) { + cursor.index = index; + cursor.key = 0; + cursor.value = values[index++]; + return cursor; + } + + return done(); + } + } + + @Override + public Iterator iterator() { + return new EntryIterator(); + } + + /** Returns a specialized view of the keys of this associated container. */ + public KeysContainer keys() { + return new KeysContainer(); + } + + /** A view of the keys inside this hash map. */ + public final class KeysContainer implements Iterable { + + @Override + public Iterator iterator() { + return new KeysIterator(); + } + + public int size() { + return LongIntHashMap.this.size(); + } + + public long[] toArray() { + long[] array = new long[size()]; + int i = 0; + for (LongCursor cursor : this) { + array[i++] = cursor.value; + } + return array; + } + } + + /** An iterator over the set of assigned keys. */ + private final class KeysIterator extends AbstractIterator { + private final LongCursor cursor; + private final int increment; + private int index; + private int slot; + + public KeysIterator() { + cursor = new LongCursor(); + int seed = nextIterationSeed(); + increment = iterationIncrement(seed); + slot = seed & mask; + } + + @Override + protected LongCursor fetch() { + final int mask = LongIntHashMap.this.mask; + while (index <= mask) { + long existing; + index++; + slot = (slot + increment) & mask; + if (!((existing = keys[slot]) == 0)) { + cursor.index = slot; + cursor.value = existing; + return cursor; + } + } + + if (index == mask + 1 && hasEmptyKey) { + cursor.index = index++; + cursor.value = 0; + return cursor; + } + + return done(); + } + } + + /** + * @return Returns a container with all values stored in this map. + */ + public ValuesContainer values() { + return new ValuesContainer(); + } + + /** A view over the set of values of this map. */ + public final class ValuesContainer implements Iterable { + + @Override + public Iterator iterator() { + return new ValuesIterator(); + } + + public int size() { + return LongIntHashMap.this.size(); + } + + public int[] toArray() { + int[] array = new int[size()]; + int i = 0; + for (IntCursor cursor : this) { + array[i++] = cursor.value; + } + return array; + } + } + + /** An iterator over the set of assigned values. */ + private final class ValuesIterator extends AbstractIterator { + private final IntCursor cursor; + private final int increment; + private int index; + private int slot; + + public ValuesIterator() { + cursor = new IntCursor(); + int seed = nextIterationSeed(); + increment = iterationIncrement(seed); + slot = seed & mask; + } + + @Override + protected IntCursor fetch() { + final int mask = LongIntHashMap.this.mask; + while (index <= mask) { + index++; + slot = (slot + increment) & mask; + if (!((keys[slot]) == 0)) { + cursor.index = slot; + cursor.value = values[slot]; + return cursor; + } + } + + if (index == mask + 1 && hasEmptyKey) { + cursor.index = index; + cursor.value = values[index++]; + return cursor; + } + + return done(); + } + } + + @Override + public LongIntHashMap clone() { + try { + /* */ + LongIntHashMap cloned = (LongIntHashMap) super.clone(); + cloned.keys = keys.clone(); + cloned.values = values.clone(); + cloned.hasEmptyKey = hasEmptyKey; + cloned.iterationSeed = nextIterationSeed(); + return cloned; + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + /** Convert the contents of this map to a human-friendly string. */ + @Override + public String toString() { + final StringBuilder buffer = new StringBuilder(); + buffer.append("["); + + boolean first = true; + for (LongIntCursor cursor : this) { + if (!first) { + buffer.append(", "); + } + buffer.append(cursor.key); + buffer.append("=>"); + buffer.append(cursor.value); + first = false; + } + buffer.append("]"); + return buffer.toString(); + } + + /** Creates a hash map from two index-aligned arrays of key-value pairs. */ + public static LongIntHashMap from(long[] keys, int[] values) { + if (keys.length != values.length) { + throw new IllegalArgumentException( + "Arrays of keys and values must have an identical length."); + } + + LongIntHashMap map = new LongIntHashMap(keys.length); + for (int i = 0; i < keys.length; i++) { + map.put(keys[i], values[i]); + } + + return map; + } + + /** + * Returns a hash code for the given key. + * + *

The output from this function should evenly distribute keys across the entire integer range. + */ + protected int hashKey(long key) { + assert !((key) == 0); // Handled as a special case (empty slot marker). + return BitMixer.mixPhi(key); + } + + /** + * Validate load factor range and return it. Override and suppress if you need insane load + * factors. + */ + protected double verifyLoadFactor(double loadFactor) { + checkLoadFactor(loadFactor, MIN_LOAD_FACTOR, MAX_LOAD_FACTOR); + return loadFactor; + } + + /** Rehash from old buffers to new buffers. */ + protected void rehash(long[] fromKeys, int[] fromValues) { + assert fromKeys.length == fromValues.length && checkPowerOfTwo(fromKeys.length - 1); + + // Rehash all stored key/value pairs into the new buffers. + final long[] keys = this.keys; + final int[] values = this.values; + final int mask = this.mask; + long existing; + + // Copy the zero element's slot, then rehash everything else. + int from = fromKeys.length - 1; + keys[keys.length - 1] = fromKeys[from]; + values[values.length - 1] = fromValues[from]; + while (--from >= 0) { + if (!((existing = fromKeys[from]) == 0)) { + int slot = hashKey(existing) & mask; + while (!((keys[slot]) == 0)) { + slot = (slot + 1) & mask; + } + keys[slot] = existing; + values[slot] = fromValues[from]; + } + } + } + + /** + * Allocate new internal buffers. This method attempts to allocate and assign internal buffers + * atomically (either allocations succeed or not). + */ + protected void allocateBuffers(int arraySize) { + assert Integer.bitCount(arraySize) == 1; + + // Ensure no change is done if we hit an OOM. + long[] prevKeys = this.keys; + int[] prevValues = this.values; + try { + int emptyElementSlot = 1; + this.keys = (new long[arraySize + emptyElementSlot]); + this.values = (new int[arraySize + emptyElementSlot]); + } catch (OutOfMemoryError e) { + this.keys = prevKeys; + this.values = prevValues; + throw new BufferAllocationException( + "Not enough memory to allocate buffers for rehashing: %,d -> %,d", + e, this.mask + 1, arraySize); + } + + this.resizeAt = expandAtCount(arraySize, loadFactor); + this.mask = arraySize - 1; + } + + /** + * This method is invoked when there is a new key/ value pair to be inserted into the buffers but + * there is not enough empty slots to do so. + * + *

New buffers are allocated. If this succeeds, we know we can proceed with rehashing so we + * assign the pending element to the previous buffer (possibly violating the invariant of having + * at least one empty slot) and rehash all keys, substituting new buffers at the end. + */ + protected void allocateThenInsertThenRehash(int slot, long pendingKey, int pendingValue) { + assert assigned == resizeAt && ((keys[slot]) == 0) && !((pendingKey) == 0); + + // Try to allocate new buffers first. If we OOM, we leave in a consistent state. + final long[] prevKeys = this.keys; + final int[] prevValues = this.values; + allocateBuffers(nextBufferSize(mask + 1, size(), loadFactor)); + assert this.keys.length > prevKeys.length; + + // We have succeeded at allocating new data so insert the pending key/value at + // the free slot in the old arrays before rehashing. + prevKeys[slot] = pendingKey; + prevValues[slot] = pendingValue; + + // Rehash old keys, including the pending key. + rehash(prevKeys, prevValues); + } + + static int nextBufferSize(int arraySize, int elements, double loadFactor) { + assert checkPowerOfTwo(arraySize); + if (arraySize == MAX_HASH_ARRAY_LENGTH) { + throw new BufferAllocationException( + "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", + elements, loadFactor); + } + + return arraySize << 1; + } + + static int expandAtCount(int arraySize, double loadFactor) { + assert checkPowerOfTwo(arraySize); + // Take care of hash container invariant (there has to be at least one empty slot to ensure + // the lookup loop finds either the element or an empty slot). + return Math.min(arraySize - 1, (int) Math.ceil(arraySize * loadFactor)); + } + + static boolean checkPowerOfTwo(int arraySize) { + // These are internals, we can just assert without retrying. + assert arraySize > 1; + assert nextHighestPowerOfTwo(arraySize) == arraySize; + return true; + } + + static int minBufferSize(int elements, double loadFactor) { + if (elements < 0) { + throw new IllegalArgumentException("Number of elements must be >= 0: " + elements); + } + + long length = (long) Math.ceil(elements / loadFactor); + if (length == elements) { + length++; + } + length = Math.max(MIN_HASH_ARRAY_LENGTH, nextHighestPowerOfTwo(length)); + + if (length > MAX_HASH_ARRAY_LENGTH) { + throw new BufferAllocationException( + "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", + elements, loadFactor); + } + + return (int) length; + } + + static void checkLoadFactor( + double loadFactor, double minAllowedInclusive, double maxAllowedInclusive) { + if (loadFactor < minAllowedInclusive || loadFactor > maxAllowedInclusive) { + throw new BufferAllocationException( + "The load factor should be in range [%.2f, %.2f]: %f", + minAllowedInclusive, maxAllowedInclusive, loadFactor); + } + } + + static int iterationIncrement(int seed) { + return 29 + ((seed & 7) << 1); // Small odd integer. + } + + /** + * Shift all the slot-conflicting keys and values allocated to (and including) slot. + */ + protected void shiftConflictingKeys(int gapSlot) { + final long[] keys = this.keys; + final int[] values = this.values; + final int mask = this.mask; + + // Perform shifts of conflicting keys to fill in the gap. + int distance = 0; + while (true) { + final int slot = (gapSlot + (++distance)) & mask; + final long existing = keys[slot]; + if (((existing) == 0)) { + break; + } + + final int idealSlot = hashKey(existing); + final int shift = (slot - idealSlot) & mask; + if (shift >= distance) { + // Entry at this position was originally at or before the gap slot. + // Move the conflict-shifted entry to the gap's position and repeat the procedure + // for any entries to the right of the current position, treating it + // as the new gap. + keys[gapSlot] = existing; + values[gapSlot] = values[slot]; + gapSlot = slot; + distance = 0; + } + } + + // Mark the last found gap slot without a conflict as empty. + keys[gapSlot] = 0; + values[gapSlot] = 0; + assigned--; + } + + /** Forked from HPPC, holding int index,key and value */ + public final class LongIntCursor { + /** + * The current key and value's index in the container this cursor belongs to. The meaning of + * this index is defined by the container (usually it will be an index in the underlying storage + * buffer). + */ + public int index; + + /** The current key. */ + public long key; + + /** The current value. */ + public int value; + + @Override + public String toString() { + return "[cursor, index: " + index + ", key: " + key + ", value: " + value + "]"; + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/LongObjectHashMap.java b/lucene/core/src/java/org/apache/lucene/util/hppc/LongObjectHashMap.java new file mode 100644 index 00000000000..c72bd35b4bc --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/LongObjectHashMap.java @@ -0,0 +1,869 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util.hppc; + +import static org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo; +import static org.apache.lucene.util.hppc.HashContainers.*; + +import java.util.Arrays; +import java.util.Iterator; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; + +/** + * A hash map of long to Object, implemented using open addressing with + * linear probing for collision resolution. Supports null values. + * + *

Mostly forked and trimmed from com.carrotsearch.hppc.LongObjectHashMap + * + *

github: https://github.com/carrotsearch/hppc release 0.9.0 + */ +@SuppressWarnings("unchecked") +public class LongObjectHashMap + implements Iterable>, Accountable, Cloneable { + + private static final long BASE_RAM_BYTES_USED = + RamUsageEstimator.shallowSizeOfInstance(LongObjectHashMap.class); + + /** The array holding keys. */ + public long[] keys; + + /** The array holding values. */ + public Object[] values; + + /** + * The number of stored keys (assigned key slots), excluding the special "empty" key, if any (use + * {@link #size()} instead). + * + * @see #size() + */ + protected int assigned; + + /** Mask for slot scans in {@link #keys}. */ + protected int mask; + + /** Expand (rehash) {@link #keys} when {@link #assigned} hits this value. */ + protected int resizeAt; + + /** Special treatment for the "empty slot" key marker. */ + protected boolean hasEmptyKey; + + /** The load factor for {@link #keys}. */ + protected double loadFactor; + + /** Seed used to ensure the hash iteration order is different from an iteration to another. */ + protected int iterationSeed; + + /** New instance with sane defaults. */ + public LongObjectHashMap() { + this(DEFAULT_EXPECTED_ELEMENTS); + } + + /** + * New instance with sane defaults. + * + * @param expectedElements The expected number of elements guaranteed not to cause buffer + * expansion (inclusive). + */ + public LongObjectHashMap(int expectedElements) { + this(expectedElements, DEFAULT_LOAD_FACTOR); + } + + /** + * New instance with the provided defaults. + * + * @param expectedElements The expected number of elements guaranteed not to cause a rehash + * (inclusive). + * @param loadFactor The load factor for internal buffers. Insane load factors (zero, full + * capacity) are rejected by {@link #verifyLoadFactor(double)}. + */ + public LongObjectHashMap(int expectedElements, double loadFactor) { + this.loadFactor = verifyLoadFactor(loadFactor); + iterationSeed = ITERATION_SEED.incrementAndGet(); + ensureCapacity(expectedElements); + } + + /** Create a hash map from all key-value pairs of another container. */ + public LongObjectHashMap(Iterable> container) { + this(); + putAll(container); + } + + public VType put(long key, VType value) { + assert assigned < mask + 1; + + final int mask = this.mask; + if (((key) == 0)) { + hasEmptyKey = true; + VType previousValue = (VType) values[mask + 1]; + values[mask + 1] = value; + return previousValue; + } else { + final long[] keys = this.keys; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((existing) == (key))) { + final VType previousValue = (VType) values[slot]; + values[slot] = value; + return previousValue; + } + slot = (slot + 1) & mask; + } + + if (assigned == resizeAt) { + allocateThenInsertThenRehash(slot, key, value); + } else { + keys[slot] = key; + values[slot] = value; + } + + assigned++; + return null; + } + } + + public int putAll(Iterable> iterable) { + final int count = size(); + for (LongObjectCursor c : iterable) { + put(c.key, c.value); + } + return size() - count; + } + + /** + * Trove-inspired API method. An equivalent of the + * following code: + * + *

+   * if (!map.containsKey(key)) map.put(value);
+   * 
+ * + * @param key The key of the value to check. + * @param value The value to put if key does not exist. + * @return true if key did not exist and value was placed + * in the map. + */ + public boolean putIfAbsent(long key, VType value) { + int keyIndex = indexOf(key); + if (!indexExists(keyIndex)) { + indexInsert(keyIndex, key, value); + return true; + } else { + return false; + } + } + + public VType remove(long key) { + final int mask = this.mask; + if (((key) == 0)) { + hasEmptyKey = false; + VType previousValue = (VType) values[mask + 1]; + values[mask + 1] = 0; + return previousValue; + } else { + final long[] keys = this.keys; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((existing) == (key))) { + final VType previousValue = (VType) values[slot]; + shiftConflictingKeys(slot); + return previousValue; + } + slot = (slot + 1) & mask; + } + + return null; + } + } + + public VType get(long key) { + if (((key) == 0)) { + return hasEmptyKey ? (VType) values[mask + 1] : null; + } else { + final long[] keys = this.keys; + final int mask = this.mask; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((existing) == (key))) { + return (VType) values[slot]; + } + slot = (slot + 1) & mask; + } + + return null; + } + } + + public VType getOrDefault(long key, VType defaultValue) { + if (((key) == 0)) { + return hasEmptyKey ? (VType) values[mask + 1] : defaultValue; + } else { + final long[] keys = this.keys; + final int mask = this.mask; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((existing) == (key))) { + return (VType) values[slot]; + } + slot = (slot + 1) & mask; + } + + return defaultValue; + } + } + + public boolean containsKey(long key) { + if (((key) == 0)) { + return hasEmptyKey; + } else { + final long[] keys = this.keys; + final int mask = this.mask; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((existing) == (key))) { + return true; + } + slot = (slot + 1) & mask; + } + + return false; + } + } + + public int indexOf(long key) { + final int mask = this.mask; + if (((key) == 0)) { + return hasEmptyKey ? mask + 1 : ~(mask + 1); + } else { + final long[] keys = this.keys; + int slot = hashKey(key) & mask; + + long existing; + while (!((existing = keys[slot]) == 0)) { + if (((existing) == (key))) { + return slot; + } + slot = (slot + 1) & mask; + } + + return ~slot; + } + } + + public boolean indexExists(int index) { + assert index < 0 || (index >= 0 && index <= mask) || (index == mask + 1 && hasEmptyKey); + + return index >= 0; + } + + public VType indexGet(int index) { + assert index >= 0 : "The index must point at an existing key."; + assert index <= mask || (index == mask + 1 && hasEmptyKey); + + return (VType) values[index]; + } + + public VType indexReplace(int index, VType newValue) { + assert index >= 0 : "The index must point at an existing key."; + assert index <= mask || (index == mask + 1 && hasEmptyKey); + + VType previousValue = (VType) values[index]; + values[index] = newValue; + return previousValue; + } + + public void indexInsert(int index, long key, VType value) { + assert index < 0 : "The index must not point at an existing key."; + + index = ~index; + if (((key) == 0)) { + assert index == mask + 1; + values[index] = value; + hasEmptyKey = true; + } else { + assert ((keys[index]) == 0); + + if (assigned == resizeAt) { + allocateThenInsertThenRehash(index, key, value); + } else { + keys[index] = key; + values[index] = value; + } + + assigned++; + } + } + + public VType indexRemove(int index) { + assert index >= 0 : "The index must point at an existing key."; + assert index <= mask || (index == mask + 1 && hasEmptyKey); + + VType previousValue = (VType) values[index]; + if (index > mask) { + hasEmptyKey = false; + values[index] = 0; + } else { + shiftConflictingKeys(index); + } + return previousValue; + } + + public void clear() { + assigned = 0; + hasEmptyKey = false; + + Arrays.fill(keys, 0); + + /* */ + } + + public void release() { + assigned = 0; + hasEmptyKey = false; + + keys = null; + values = null; + ensureCapacity(DEFAULT_EXPECTED_ELEMENTS); + } + + public int size() { + return assigned + (hasEmptyKey ? 1 : 0); + } + + public boolean isEmpty() { + return size() == 0; + } + + @Override + public int hashCode() { + int h = hasEmptyKey ? 0xDEADBEEF : 0; + for (LongObjectCursor c : this) { + h += BitMixer.mix(c.key) + BitMixer.mix(c.value); + } + return h; + } + + @Override + public boolean equals(Object obj) { + return obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)); + } + + /** Return true if all keys of some other container exist in this container. */ + protected boolean equalElements(LongObjectHashMap other) { + if (other.size() != size()) { + return false; + } + + for (LongObjectCursor c : other) { + long key = c.key; + if (!containsKey(key) || !java.util.Objects.equals(c.value, get(key))) { + return false; + } + } + + return true; + } + + /** + * Ensure this container can hold at least the given number of keys (entries) without resizing its + * buffers. + * + * @param expectedElements The total number of keys, inclusive. + */ + public void ensureCapacity(int expectedElements) { + if (expectedElements > resizeAt || keys == null) { + final long[] prevKeys = this.keys; + final VType[] prevValues = (VType[]) this.values; + allocateBuffers(minBufferSize(expectedElements, loadFactor)); + if (prevKeys != null && !isEmpty()) { + rehash(prevKeys, prevValues); + } + } + } + + /** + * Provides the next iteration seed used to build the iteration starting slot and offset + * increment. This method does not need to be synchronized, what matters is that each thread gets + * a sequence of varying seeds. + */ + protected int nextIterationSeed() { + return iterationSeed = BitMixer.mixPhi(iterationSeed); + } + + @Override + public Iterator> iterator() { + return new EntryIterator(); + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys) + sizeOfValues(); + } + + private long sizeOfValues() { + long size = RamUsageEstimator.shallowSizeOf(values); + for (ObjectCursor value : values()) { + size += RamUsageEstimator.sizeOfObject(value); + } + return size; + } + + /** An iterator implementation for {@link #iterator}. */ + private final class EntryIterator extends AbstractIterator> { + private final LongObjectCursor cursor; + private final int increment; + private int index; + private int slot; + + public EntryIterator() { + cursor = new LongObjectCursor(); + int seed = nextIterationSeed(); + increment = iterationIncrement(seed); + slot = seed & mask; + } + + @Override + protected LongObjectCursor fetch() { + final int mask = LongObjectHashMap.this.mask; + while (index <= mask) { + long existing; + index++; + slot = (slot + increment) & mask; + if (!((existing = keys[slot]) == 0)) { + cursor.index = slot; + cursor.key = existing; + cursor.value = (VType) values[slot]; + return cursor; + } + } + + if (index == mask + 1 && hasEmptyKey) { + cursor.index = index; + cursor.key = 0; + cursor.value = (VType) values[index++]; + return cursor; + } + + return done(); + } + } + + /** Returns a specialized view of the keys of this associated container. */ + public KeysContainer keys() { + return new KeysContainer(); + } + + /** A view of the keys inside this hash map. */ + public final class KeysContainer implements Iterable { + + @Override + public Iterator iterator() { + return new KeysIterator(); + } + + public int size() { + return LongObjectHashMap.this.size(); + } + + public long[] toArray() { + long[] array = new long[size()]; + int i = 0; + for (LongCursor cursor : this) { + array[i++] = cursor.value; + } + return array; + } + } + + /** An iterator over the set of assigned keys. */ + private final class KeysIterator extends AbstractIterator { + private final LongCursor cursor; + private final int increment; + private int index; + private int slot; + + public KeysIterator() { + cursor = new LongCursor(); + int seed = nextIterationSeed(); + increment = iterationIncrement(seed); + slot = seed & mask; + } + + @Override + protected LongCursor fetch() { + final int mask = LongObjectHashMap.this.mask; + while (index <= mask) { + long existing; + index++; + slot = (slot + increment) & mask; + if (!((existing = keys[slot]) == 0)) { + cursor.index = slot; + cursor.value = existing; + return cursor; + } + } + + if (index == mask + 1 && hasEmptyKey) { + cursor.index = index++; + cursor.value = 0; + return cursor; + } + + return done(); + } + } + + /** + * @return Returns a container with all values stored in this map. + */ + public ValuesContainer values() { + return new ValuesContainer(); + } + + /** A view over the set of values of this map. */ + public final class ValuesContainer implements Iterable> { + + @Override + public Iterator> iterator() { + return new ValuesIterator(); + } + + public int size() { + return LongObjectHashMap.this.size(); + } + + public VType[] toArray() { + VType[] array = (VType[]) new Object[size()]; + int i = 0; + for (ObjectCursor cursor : this) { + array[i++] = cursor.value; + } + return array; + } + } + + /** An iterator over the set of assigned values. */ + private final class ValuesIterator extends AbstractIterator> { + private final ObjectCursor cursor; + private final int increment; + private int index; + private int slot; + + public ValuesIterator() { + cursor = new ObjectCursor<>(); + int seed = nextIterationSeed(); + increment = iterationIncrement(seed); + slot = seed & mask; + } + + @Override + protected ObjectCursor fetch() { + final int mask = LongObjectHashMap.this.mask; + while (index <= mask) { + index++; + slot = (slot + increment) & mask; + if (!((keys[slot]) == 0)) { + cursor.index = slot; + cursor.value = (VType) values[slot]; + return cursor; + } + } + + if (index == mask + 1 && hasEmptyKey) { + cursor.index = index; + cursor.value = (VType) values[index++]; + return cursor; + } + + return done(); + } + } + + @Override + public LongObjectHashMap clone() { + try { + /* */ + LongObjectHashMap cloned = (LongObjectHashMap) super.clone(); + cloned.keys = keys.clone(); + cloned.values = values.clone(); + cloned.hasEmptyKey = hasEmptyKey; + cloned.iterationSeed = nextIterationSeed(); + return cloned; + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + /** Convert the contents of this map to a human-friendly string. */ + @Override + public String toString() { + final StringBuilder buffer = new StringBuilder(); + buffer.append("["); + + boolean first = true; + for (LongObjectCursor cursor : this) { + if (!first) { + buffer.append(", "); + } + buffer.append(cursor.key); + buffer.append("=>"); + buffer.append(cursor.value); + first = false; + } + buffer.append("]"); + return buffer.toString(); + } + + /** Creates a hash map from two index-aligned arrays of key-value pairs. */ + public static LongObjectHashMap from(long[] keys, VType[] values) { + if (keys.length != values.length) { + throw new IllegalArgumentException( + "Arrays of keys and values must have an identical length."); + } + + LongObjectHashMap map = new LongObjectHashMap<>(keys.length); + for (int i = 0; i < keys.length; i++) { + map.put(keys[i], values[i]); + } + + return map; + } + + /** + * Returns a hash code for the given key. + * + *

The output from this function should evenly distribute keys across the entire integer range. + */ + protected int hashKey(long key) { + assert !((key) == 0); // Handled as a special case (empty slot marker). + return BitMixer.mixPhi(key); + } + + /** + * Validate load factor range and return it. Override and suppress if you need insane load + * factors. + */ + protected double verifyLoadFactor(double loadFactor) { + checkLoadFactor(loadFactor, MIN_LOAD_FACTOR, MAX_LOAD_FACTOR); + return loadFactor; + } + + /** Rehash from old buffers to new buffers. */ + protected void rehash(long[] fromKeys, VType[] fromValues) { + assert fromKeys.length == fromValues.length && checkPowerOfTwo(fromKeys.length - 1); + + // Rehash all stored key/value pairs into the new buffers. + final long[] keys = this.keys; + final VType[] values = (VType[]) this.values; + final int mask = this.mask; + long existing; + + // Copy the zero element's slot, then rehash everything else. + int from = fromKeys.length - 1; + keys[keys.length - 1] = fromKeys[from]; + values[values.length - 1] = fromValues[from]; + while (--from >= 0) { + if (!((existing = fromKeys[from]) == 0)) { + int slot = hashKey(existing) & mask; + while (!((keys[slot]) == 0)) { + slot = (slot + 1) & mask; + } + keys[slot] = existing; + values[slot] = fromValues[from]; + } + } + } + + /** + * Allocate new internal buffers. This method attempts to allocate and assign internal buffers + * atomically (either allocations succeed or not). + */ + protected void allocateBuffers(int arraySize) { + assert Integer.bitCount(arraySize) == 1; + + // Ensure no change is done if we hit an OOM. + long[] prevKeys = this.keys; + VType[] prevValues = (VType[]) this.values; + try { + int emptyElementSlot = 1; + this.keys = (new long[arraySize + emptyElementSlot]); + this.values = new Object[arraySize + emptyElementSlot]; + } catch (OutOfMemoryError e) { + this.keys = prevKeys; + this.values = prevValues; + throw new BufferAllocationException( + "Not enough memory to allocate buffers for rehashing: %,d -> %,d", + e, this.mask + 1, arraySize); + } + + this.resizeAt = expandAtCount(arraySize, loadFactor); + this.mask = arraySize - 1; + } + + /** + * This method is invoked when there is a new key/ value pair to be inserted into the buffers but + * there is not enough empty slots to do so. + * + *

New buffers are allocated. If this succeeds, we know we can proceed with rehashing so we + * assign the pending element to the previous buffer (possibly violating the invariant of having + * at least one empty slot) and rehash all keys, substituting new buffers at the end. + */ + protected void allocateThenInsertThenRehash(int slot, long pendingKey, VType pendingValue) { + assert assigned == resizeAt && ((keys[slot]) == 0) && !((pendingKey) == 0); + + // Try to allocate new buffers first. If we OOM, we leave in a consistent state. + final long[] prevKeys = this.keys; + final VType[] prevValues = (VType[]) this.values; + allocateBuffers(nextBufferSize(mask + 1, size(), loadFactor)); + assert this.keys.length > prevKeys.length; + + // We have succeeded at allocating new data so insert the pending key/value at + // the free slot in the old arrays before rehashing. + prevKeys[slot] = pendingKey; + prevValues[slot] = pendingValue; + + // Rehash old keys, including the pending key. + rehash(prevKeys, prevValues); + } + + static int nextBufferSize(int arraySize, int elements, double loadFactor) { + assert checkPowerOfTwo(arraySize); + if (arraySize == MAX_HASH_ARRAY_LENGTH) { + throw new BufferAllocationException( + "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", + elements, loadFactor); + } + + return arraySize << 1; + } + + static int expandAtCount(int arraySize, double loadFactor) { + assert checkPowerOfTwo(arraySize); + // Take care of hash container invariant (there has to be at least one empty slot to ensure + // the lookup loop finds either the element or an empty slot). + return Math.min(arraySize - 1, (int) Math.ceil(arraySize * loadFactor)); + } + + static boolean checkPowerOfTwo(int arraySize) { + // These are internals, we can just assert without retrying. + assert arraySize > 1; + assert nextHighestPowerOfTwo(arraySize) == arraySize; + return true; + } + + static int minBufferSize(int elements, double loadFactor) { + if (elements < 0) { + throw new IllegalArgumentException("Number of elements must be >= 0: " + elements); + } + + long length = (long) Math.ceil(elements / loadFactor); + if (length == elements) { + length++; + } + length = Math.max(MIN_HASH_ARRAY_LENGTH, nextHighestPowerOfTwo(length)); + + if (length > MAX_HASH_ARRAY_LENGTH) { + throw new BufferAllocationException( + "Maximum array size exceeded for this load factor (elements: %d, load factor: %f)", + elements, loadFactor); + } + + return (int) length; + } + + static void checkLoadFactor( + double loadFactor, double minAllowedInclusive, double maxAllowedInclusive) { + if (loadFactor < minAllowedInclusive || loadFactor > maxAllowedInclusive) { + throw new BufferAllocationException( + "The load factor should be in range [%.2f, %.2f]: %f", + minAllowedInclusive, maxAllowedInclusive, loadFactor); + } + } + + static int iterationIncrement(int seed) { + return 29 + ((seed & 7) << 1); // Small odd integer. + } + + /** + * Shift all the slot-conflicting keys and values allocated to (and including) slot. + */ + protected void shiftConflictingKeys(int gapSlot) { + final long[] keys = this.keys; + final VType[] values = (VType[]) this.values; + final int mask = this.mask; + + // Perform shifts of conflicting keys to fill in the gap. + int distance = 0; + while (true) { + final int slot = (gapSlot + (++distance)) & mask; + final long existing = keys[slot]; + if (((existing) == 0)) { + break; + } + + final int idealSlot = hashKey(existing); + final int shift = (slot - idealSlot) & mask; + if (shift >= distance) { + // Entry at this position was originally at or before the gap slot. + // Move the conflict-shifted entry to the gap's position and repeat the procedure + // for any entries to the right of the current position, treating it + // as the new gap. + keys[gapSlot] = existing; + values[gapSlot] = values[slot]; + gapSlot = slot; + distance = 0; + } + } + + // Mark the last found gap slot without a conflict as empty. + keys[gapSlot] = 0; + values[gapSlot] = null; + assigned--; + } + + /** Forked from HPPC, holding int index,key and value */ + public static final class LongObjectCursor { + /** + * The current key and value's index in the container this cursor belongs to. The meaning of + * this index is defined by the container (usually it will be an index in the underlying storage + * buffer). + */ + public int index; + + /** The current key. */ + public long key; + + /** The current value. */ + public VType value; + + @Override + public String toString() { + return "[cursor, index: " + index + ", key: " + key + ", value: " + value + "]"; + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/hppc/ObjectCursor.java b/lucene/core/src/java/org/apache/lucene/util/hppc/ObjectCursor.java new file mode 100644 index 00000000000..45d0cb56b62 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/hppc/ObjectCursor.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util.hppc; + +/** Forked from HPPC, holding int index and Object value */ +public final class ObjectCursor { + /** + * The current value's index in the container this cursor belongs to. The meaning of this index is + * defined by the container (usually it will be an index in the underlying storage buffer). + */ + public int index; + + /** The current value. */ + public VType value; + + @Override + public String toString() { + return "[cursor, index: " + index + ", value: " + value + "]"; + } +} diff --git a/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntIntHashMap.java b/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntIntHashMap.java index 84a4ce96533..d31afe49375 100644 --- a/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntIntHashMap.java +++ b/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntIntHashMap.java @@ -48,59 +48,16 @@ public class TestIntIntHashMap extends LuceneTestCase { protected int key9 = cast(9), k9 = key9; /** Convert to target type from an integer used to test stuff. */ - public int cast(Integer v) { - return v.intValue(); - } - - public int[] asArray(int... ints) { - int[] values = (new int[ints.length]); - for (int i = 0; i < ints.length; i++) values[i] = ints[i]; - return values; + public int cast(int v) { + return v; } /** Create a new array of a given type and copy the arguments to this array. */ /* */ public final int[] newArray(int... elements) { - return newArray0(elements); - } - - /* */ - private final int[] newArray0(int... elements) { return elements; } - public int[] newArray(int v0) { - return this.newArray0(v0); - } - - public int[] newArray(int v0, int v1) { - return this.newArray0(v0, v1); - } - - public int[] newArray(int v0, int v1, int v2) { - return this.newArray0(v0, v1, v2); - } - - public int[] newArray(int v0, int v1, int v2, int v3) { - return this.newArray0(v0, v1, v2, v3); - } - - public int[] newArray(int v0, int v1, int v2, int v3, int v4, int v5, int v6) { - return this.newArray0(v0, v1, v2, v3, v4, v5, v6); - } - - public int[] newArray(int v0, int v1, int v2, int v3, int v4, int v5) { - return this.newArray0(v0, v1, v2, v3, v4, v5); - } - - public int[] newArray(int v0, int v1, int v2, int v3, int v4) { - return this.newArray0(v0, v1, v2, v3, v4); - } - - public int[] newArray(int v0, int v1, int v2, int v3, int v4, int v5, int v6, int v7) { - return this.newArray0(v0, v1, v2, v3, v4, v5, v6, v7); - } - public static int randomIntBetween(int min, int max) { return min + random().nextInt(max + 1 - min); } diff --git a/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntObjectHashMap.java b/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntObjectHashMap.java index dcfea7ab095..abfba94f573 100644 --- a/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntObjectHashMap.java +++ b/lucene/core/src/test/org/apache/lucene/util/hppc/TestIntObjectHashMap.java @@ -49,59 +49,16 @@ public class TestIntObjectHashMap extends LuceneTestCase { protected int key9 = cast(9), k9 = key9; /** Convert to target type from an integer used to test stuff. */ - public int cast(Integer v) { - return v.intValue(); - } - - public int[] asArray(int... ints) { - int[] values = (new int[ints.length]); - for (int i = 0; i < ints.length; i++) values[i] = ints[i]; - return values; + public int cast(int v) { + return v; } /** Create a new array of a given type and copy the arguments to this array. */ /* */ public final int[] newArray(int... elements) { - return newArray0(elements); - } - - /* */ - private final int[] newArray0(int... elements) { return elements; } - public int[] newArray(int v0) { - return this.newArray0(v0); - } - - public int[] newArray(int v0, int v1) { - return this.newArray0(v0, v1); - } - - public int[] newArray(int v0, int v1, int v2) { - return this.newArray0(v0, v1, v2); - } - - public int[] newArray(int v0, int v1, int v2, int v3) { - return this.newArray0(v0, v1, v2, v3); - } - - public int[] newArray(int v0, int v1, int v2, int v3, int v4, int v5, int v6) { - return this.newArray0(v0, v1, v2, v3, v4, v5, v6); - } - - public int[] newArray(int v0, int v1, int v2, int v3, int v4, int v5) { - return this.newArray0(v0, v1, v2, v3, v4, v5); - } - - public int[] newArray(int v0, int v1, int v2, int v3, int v4) { - return this.newArray0(v0, v1, v2, v3, v4); - } - - public int[] newArray(int v0, int v1, int v2, int v3, int v4, int v5, int v6, int v7) { - return this.newArray0(v0, v1, v2, v3, v4, v5, v6, v7); - } - public static int randomIntBetween(int min, int max) { return min + random().nextInt(max + 1 - min); } @@ -297,11 +254,26 @@ public class TestIntObjectHashMap extends LuceneTestCase { } /* */ + @Test + public void testNullValue() { + map.put(key1, null); + + assertTrue(map.containsKey(key1)); + assertNull(map.get(key1)); + } + @Test public void testPutOverExistingKey() { map.put(key1, value1); assertEquals(value1, map.put(key1, value3)); assertEquals(value3, map.get(key1)); + + assertEquals(value3, map.put(key1, null)); + assertTrue(map.containsKey(key1)); + assertNull(map.get(key1)); + + assertNull(map.put(key1, value1)); + assertEquals(value1, map.get(key1)); } /* */ @@ -381,6 +353,16 @@ public class TestIntObjectHashMap extends LuceneTestCase { map.remove(empty); assertEquals(null, map.get(empty)); + + map.put(empty, null); + assertEquals(1, map.size()); + assertTrue(map.containsKey(empty)); + assertNull(map.get(empty)); + + map.remove(empty); + assertEquals(0, map.size()); + assertFalse(map.containsKey(empty)); + assertNull(map.get(empty)); } /* */ @@ -577,7 +559,7 @@ public class TestIntObjectHashMap extends LuceneTestCase { map.put(key3, value1); int counted = 0; - for (IntObjectHashMap.ObjectCursor c : map.values()) { + for (ObjectCursor c : map.values()) { assertEquals(map.values[c.index], c.value); counted++; } @@ -608,7 +590,6 @@ public class TestIntObjectHashMap extends LuceneTestCase { @Test public void testEqualsSubClass() { class Sub extends IntObjectHashMap {} - ; IntObjectHashMap l1 = newInstance(); l1.put(k1, value0); diff --git a/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongIntHashMap.java b/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongIntHashMap.java new file mode 100644 index 00000000000..7fe5561de1e --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongIntHashMap.java @@ -0,0 +1,597 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util.hppc; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Random; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.junit.After; +import org.junit.Test; + +/** + * Tests for {@link LongIntHashMap}. + * + *

Mostly forked and trimmed from com.carrotsearch.hppc.LongIntHashMapTest + * + *

github: https://github.com/carrotsearch/hppc release: 0.9.0 + */ +public class TestLongIntHashMap extends LuceneTestCase { + /* Ready to use key values. */ + + protected long keyE = 0; + protected long key0 = cast(0), k0 = key0; + protected long key1 = cast(1), k1 = key1; + protected long key2 = cast(2), k2 = key2; + protected long key3 = cast(3), k3 = key3; + protected long key4 = cast(4), k4 = key4; + protected long key5 = cast(5), k5 = key5; + protected long key6 = cast(6), k6 = key6; + protected long key7 = cast(7), k7 = key7; + protected long key8 = cast(8), k8 = key8; + protected long key9 = cast(9), k9 = key9; + + /** Convert to target type from an integer used to test stuff. */ + public long cast(int v) { + return v; + } + + /** Create a new array of a given type and copy the arguments to this array. */ + /* */ + public final long[] newArray(long... elements) { + return elements; + } + + public static int randomIntBetween(int min, int max) { + return min + random().nextInt(max + 1 - min); + } + + /** Check if the array's content is identical to a given sequence of elements. */ + public static void assertSortedListEquals(long[] array, long... elements) { + assertEquals(elements.length, array.length); + Arrays.sort(array); + Arrays.sort(elements); + assertArrayEquals(elements, array); + } + + /** Check if the array's content is identical to a given sequence of elements. */ + public static void assertSortedListEquals(int[] array, int... elements) { + assertEquals(elements.length, array.length); + Arrays.sort(array); + Arrays.sort(elements); + assertArrayEquals(elements, array); + } + + protected int value0 = vcast(0); + protected int value1 = vcast(1); + protected int value2 = vcast(2); + protected int value3 = vcast(3); + protected int value4 = vcast(4); + + /** Per-test fresh initialized instance. */ + public LongIntHashMap map = newInstance(); + + protected LongIntHashMap newInstance() { + return new LongIntHashMap(); + } + + @After + public void checkEmptySlotsUninitialized() { + if (map != null) { + int occupied = 0; + for (int i = 0; i <= map.mask; i++) { + if (((map.keys[i]) == 0)) { + + } else { + occupied++; + } + } + assertEquals(occupied, map.assigned); + + if (!map.hasEmptyKey) {} + } + } + + /** Convert to target type from an integer used to test stuff. */ + protected int vcast(int value) { + return value; + } + + /** Create a new array of a given type and copy the arguments to this array. */ + /* */ + protected final int[] newvArray(int... elements) { + return elements; + } + + private void assertSameMap(final LongIntHashMap c1, final LongIntHashMap c2) { + assertEquals(c1.size(), c2.size()); + + for (LongIntHashMap.LongIntCursor entry : c1) { + assertTrue(c2.containsKey(entry.key)); + assertEquals(entry.value, c2.get(entry.key)); + } + } + + /* */ + @Test + public void testEnsureCapacity() { + final AtomicInteger expands = new AtomicInteger(); + LongIntHashMap map = + new LongIntHashMap(0) { + @Override + protected void allocateBuffers(int arraySize) { + super.allocateBuffers(arraySize); + expands.incrementAndGet(); + } + }; + + // Add some elements. + final int max = rarely() ? 0 : randomIntBetween(0, 250); + for (int i = 0; i < max; i++) { + map.put(cast(i), value0); + } + + final int additions = randomIntBetween(max, max + 5000); + map.ensureCapacity(additions + map.size()); + final int before = expands.get(); + for (int i = 0; i < additions; i++) { + map.put(cast(i), value0); + } + assertEquals(before, expands.get()); + } + + @Test + public void testCursorIndexIsValid() { + map.put(keyE, value1); + map.put(key1, value2); + map.put(key2, value3); + + for (LongIntHashMap.LongIntCursor c : map) { + assertTrue(map.indexExists(c.index)); + assertEquals(c.value, map.indexGet(c.index)); + } + } + + @Test + public void testIndexMethods() { + map.put(keyE, value1); + map.put(key1, value2); + + assertTrue(map.indexOf(keyE) >= 0); + assertTrue(map.indexOf(key1) >= 0); + assertTrue(map.indexOf(key2) < 0); + + assertTrue(map.indexExists(map.indexOf(keyE))); + assertTrue(map.indexExists(map.indexOf(key1))); + assertFalse(map.indexExists(map.indexOf(key2))); + + assertEquals(value1, map.indexGet(map.indexOf(keyE))); + assertEquals(value2, map.indexGet(map.indexOf(key1))); + + expectThrows( + AssertionError.class, + () -> { + map.indexGet(map.indexOf(key2)); + fail(); + }); + + assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3)); + assertEquals(value2, map.indexReplace(map.indexOf(key1), value4)); + assertEquals(value3, map.indexGet(map.indexOf(keyE))); + assertEquals(value4, map.indexGet(map.indexOf(key1))); + + map.indexInsert(map.indexOf(key2), key2, value1); + assertEquals(value1, map.indexGet(map.indexOf(key2))); + assertEquals(3, map.size()); + + assertEquals(value3, map.indexRemove(map.indexOf(keyE))); + assertEquals(2, map.size()); + assertEquals(value1, map.indexRemove(map.indexOf(key2))); + assertEquals(1, map.size()); + assertTrue(map.indexOf(keyE) < 0); + assertTrue(map.indexOf(key1) >= 0); + assertTrue(map.indexOf(key2) < 0); + } + + /* */ + @Test + public void testCloningConstructor() { + map.put(key1, value1); + map.put(key2, value2); + map.put(key3, value3); + + assertSameMap(map, new LongIntHashMap(map)); + } + + /* */ + @Test + public void testFromArrays() { + map.put(key1, value1); + map.put(key2, value2); + map.put(key3, value3); + + LongIntHashMap map2 = + LongIntHashMap.from(newArray(key1, key2, key3), newvArray(value1, value2, value3)); + + assertSameMap(map, map2); + } + + @Test + public void testGetOrDefault() { + map.put(key2, value2); + assertTrue(map.containsKey(key2)); + + map.put(key1, value1); + assertEquals(value1, map.getOrDefault(key1, value3)); + assertEquals(value3, map.getOrDefault(key3, value3)); + map.remove(key1); + assertEquals(value3, map.getOrDefault(key1, value3)); + } + + /* */ + @Test + public void testPut() { + map.put(key1, value1); + + assertTrue(map.containsKey(key1)); + assertEquals(value1, map.get(key1)); + } + + /* */ + @Test + public void testPutOverExistingKey() { + map.put(key1, value1); + assertEquals(value1, map.put(key1, value3)); + assertEquals(value3, map.get(key1)); + } + + /* */ + @Test + public void testPutWithExpansions() { + final int COUNT = 10000; + final Random rnd = new Random(random().nextLong()); + final HashSet values = new HashSet(); + + for (int i = 0; i < COUNT; i++) { + final int v = rnd.nextInt(); + final boolean hadKey = values.contains(cast(v)); + values.add(cast(v)); + + assertEquals(hadKey, map.containsKey(cast(v))); + map.put(cast(v), vcast(v)); + assertEquals(values.size(), map.size()); + } + assertEquals(values.size(), map.size()); + } + + /* */ + @Test + public void testPutAll() { + map.put(key1, value1); + map.put(key2, value1); + + LongIntHashMap map2 = newInstance(); + + map2.put(key2, value2); + map2.put(keyE, value1); + + // One new key (keyE). + assertEquals(1, map.putAll(map2)); + + // Assert the value under key2 has been replaced. + assertEquals(value2, map.get(key2)); + + // And key3 has been added. + assertEquals(value1, map.get(keyE)); + assertEquals(3, map.size()); + } + + /* */ + @Test + public void testPutIfAbsent() { + assertTrue(map.putIfAbsent(key1, value1)); + assertFalse(map.putIfAbsent(key1, value2)); + assertEquals(value1, map.get(key1)); + } + + @Test + public void testPutOrAdd() { + assertEquals(value1, map.putOrAdd(key1, value1, value2)); + assertEquals(value3, map.putOrAdd(key1, value1, value2)); + } + + @Test + public void testAddTo() { + assertEquals(value1, map.addTo(key1, value1)); + assertEquals(value3, map.addTo(key1, value2)); + } + + /* */ + @Test + public void testRemove() { + map.put(key1, value1); + assertEquals(value1, map.remove(key1)); + assertEquals(0, map.remove(key1)); + assertEquals(0, map.size()); + + // These are internals, but perhaps worth asserting too. + assertEquals(0, map.assigned); + } + + /* */ + @Test + public void testEmptyKey() { + final int empty = 0; + + map.put(empty, value1); + assertEquals(1, map.size()); + assertEquals(false, map.isEmpty()); + assertEquals(value1, map.get(empty)); + assertEquals(value1, map.getOrDefault(empty, value2)); + assertEquals(true, map.iterator().hasNext()); + assertEquals(empty, map.iterator().next().key); + assertEquals(value1, map.iterator().next().value); + + map.remove(empty); + assertEquals(0, map.get(empty)); + } + + /* */ + @Test + public void testMapKeySet() { + map.put(key1, value3); + map.put(key2, value2); + map.put(key3, value1); + + assertSortedListEquals(map.keys().toArray(), key1, key2, key3); + } + + /* */ + @Test + public void testMapKeySetIterator() { + map.put(key1, value3); + map.put(key2, value2); + map.put(key3, value1); + + int counted = 0; + for (LongCursor c : map.keys()) { + assertEquals(map.keys[c.index], c.value); + counted++; + } + assertEquals(counted, map.size()); + } + + /* */ + @Test + public void testClear() { + map.put(key1, value1); + map.put(key2, value1); + map.clear(); + assertEquals(0, map.size()); + + // These are internals, but perhaps worth asserting too. + assertEquals(0, map.assigned); + + // Check if the map behaves properly upon subsequent use. + testPutWithExpansions(); + } + + /* */ + @Test + public void testRelease() { + map.put(key1, value1); + map.put(key2, value1); + map.release(); + assertEquals(0, map.size()); + + // These are internals, but perhaps worth asserting too. + assertEquals(0, map.assigned); + + // Check if the map behaves properly upon subsequent use. + testPutWithExpansions(); + } + + /* */ + @Test + public void testIterable() { + map.put(key1, value1); + map.put(key2, value2); + map.put(key3, value3); + map.remove(key2); + + int count = 0; + for (LongIntHashMap.LongIntCursor cursor : map) { + count++; + assertTrue(map.containsKey(cursor.key)); + assertEquals(cursor.value, map.get(cursor.key)); + + assertEquals(cursor.value, map.values[cursor.index]); + assertEquals(cursor.key, map.keys[cursor.index]); + } + assertEquals(count, map.size()); + + map.clear(); + assertFalse(map.iterator().hasNext()); + } + + /* */ + @Test + public void testBug_HPPC73_FullCapacityGet() { + final AtomicInteger reallocations = new AtomicInteger(); + final int elements = 0x7F; + map = + new LongIntHashMap(elements, 1f) { + @Override + protected double verifyLoadFactor(double loadFactor) { + // Skip load factor sanity range checking. + return loadFactor; + } + + @Override + protected void allocateBuffers(int arraySize) { + super.allocateBuffers(arraySize); + reallocations.incrementAndGet(); + } + }; + + int reallocationsBefore = reallocations.get(); + assertEquals(reallocationsBefore, 1); + for (int i = 1; i <= elements; i++) { + map.put(cast(i), value1); + } + + // Non-existent key. + long outOfSet = cast(elements + 1); + map.remove(outOfSet); + assertFalse(map.containsKey(outOfSet)); + assertEquals(reallocationsBefore, reallocations.get()); + + // Should not expand because we're replacing an existing element. + map.put(k1, value2); + assertEquals(reallocationsBefore, reallocations.get()); + + // Remove from a full map. + map.remove(k1); + assertEquals(reallocationsBefore, reallocations.get()); + map.put(k1, value2); + + // Check expand on "last slot of a full map" condition. + map.put(outOfSet, value1); + assertEquals(reallocationsBefore + 1, reallocations.get()); + } + + @Test + public void testHashCodeEquals() { + LongIntHashMap l0 = newInstance(); + assertEquals(0, l0.hashCode()); + assertEquals(l0, newInstance()); + + LongIntHashMap l1 = + LongIntHashMap.from(newArray(key1, key2, key3), newvArray(value1, value2, value3)); + + LongIntHashMap l2 = + LongIntHashMap.from(newArray(key2, key1, key3), newvArray(value2, value1, value3)); + + LongIntHashMap l3 = LongIntHashMap.from(newArray(key1, key2), newvArray(value2, value1)); + + assertEquals(l1.hashCode(), l2.hashCode()); + assertEquals(l1, l2); + + assertFalse(l1.equals(l3)); + assertFalse(l2.equals(l3)); + } + + @Test + public void testBug_HPPC37() { + LongIntHashMap l1 = LongIntHashMap.from(newArray(key1), newvArray(value1)); + + LongIntHashMap l2 = LongIntHashMap.from(newArray(key2), newvArray(value1)); + + assertFalse(l1.equals(l2)); + assertFalse(l2.equals(l1)); + } + + /* + * + */ + @Test + public void testClone() { + this.map.put(key1, value1); + this.map.put(key2, value2); + this.map.put(key3, value3); + + LongIntHashMap cloned = map.clone(); + cloned.remove(key1); + + assertSortedListEquals(map.keys().toArray(), key1, key2, key3); + assertSortedListEquals(cloned.keys().toArray(), key2, key3); + } + + /* */ + @Test + public void testMapValues() { + map.put(key1, value3); + map.put(key2, value2); + map.put(key3, value1); + assertSortedListEquals(map.values().toArray(), value1, value2, value3); + + map.clear(); + map.put(key1, value1); + map.put(key2, value2); + map.put(key3, value2); + assertSortedListEquals(map.values().toArray(), value1, value2, value2); + } + + /* */ + @Test + public void testMapValuesIterator() { + map.put(key1, value3); + map.put(key2, value2); + map.put(key3, value1); + + int counted = 0; + for (IntCursor c : map.values()) { + assertEquals(map.values[c.index], c.value); + counted++; + } + assertEquals(counted, map.size()); + } + + /* */ + @Test + public void testEqualsSameClass() { + LongIntHashMap l1 = newInstance(); + l1.put(k1, value0); + l1.put(k2, value1); + l1.put(k3, value2); + + LongIntHashMap l2 = new LongIntHashMap(l1); + l2.putAll(l1); + + LongIntHashMap l3 = new LongIntHashMap(l2); + l3.putAll(l2); + l3.put(k4, value0); + + assertEquals(l2, l1); + assertEquals(l2.hashCode(), l1.hashCode()); + assertNotEquals(l1, l3); + } + + /* */ + @Test + public void testEqualsSubClass() { + class Sub extends LongIntHashMap {} + + LongIntHashMap l1 = newInstance(); + l1.put(k1, value0); + l1.put(k2, value1); + l1.put(k3, value2); + + LongIntHashMap l2 = new Sub(); + l2.putAll(l1); + l2.put(k4, value3); + + LongIntHashMap l3 = new Sub(); + l3.putAll(l2); + + assertNotEquals(l1, l2); + assertEquals(l3.hashCode(), l2.hashCode()); + assertEquals(l3, l2); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongObjectHashMap.java b/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongObjectHashMap.java new file mode 100644 index 00000000000..d047b3d1091 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/hppc/TestLongObjectHashMap.java @@ -0,0 +1,611 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util.hppc; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Random; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.junit.After; +import org.junit.Test; + +/** + * Tests for {@link LongObjectHashMap}. + * + *

Mostly forked and trimmed from com.carrotsearch.hppc.LongObjectHashMapTest + * + *

github: https://github.com/carrotsearch/hppc release: 0.9.0 + */ +@SuppressWarnings({"rawtypes", "unchecked"}) +public class TestLongObjectHashMap extends LuceneTestCase { + /* Ready to use key values. */ + + protected long keyE = 0; + protected long key0 = cast(0), k0 = key0; + protected long key1 = cast(1), k1 = key1; + protected long key2 = cast(2), k2 = key2; + protected long key3 = cast(3), k3 = key3; + protected long key4 = cast(4), k4 = key4; + protected long key5 = cast(5), k5 = key5; + protected long key6 = cast(6), k6 = key6; + protected long key7 = cast(7), k7 = key7; + protected long key8 = cast(8), k8 = key8; + protected long key9 = cast(9), k9 = key9; + + /** Convert to target type from an integer used to test stuff. */ + public long cast(int v) { + return v; + } + + /** Create a new array of a given type and copy the arguments to this array. */ + /* */ + public final long[] newArray(long... elements) { + return elements; + } + + public static int randomIntBetween(int min, int max) { + return min + random().nextInt(max + 1 - min); + } + + /** Check if the array's content is identical to a given sequence of elements. */ + public static void assertSortedListEquals(long[] array, long... elements) { + assertEquals(elements.length, array.length); + Arrays.sort(array); + Arrays.sort(elements); + assertArrayEquals(elements, array); + } + + /** Check if the array's content is identical to a given sequence of elements. */ + public static void assertSortedListEquals(Object[] array, Object... elements) { + assertEquals(elements.length, array.length); + Arrays.sort(array); + assertArrayEquals(elements, array); + } + + protected int value0 = vcast(0); + protected int value1 = vcast(1); + protected int value2 = vcast(2); + protected int value3 = vcast(3); + protected int value4 = vcast(4); + + /** Per-test fresh initialized instance. */ + public LongObjectHashMap map = newInstance(); + + protected LongObjectHashMap newInstance() { + return new LongObjectHashMap(); + } + + @After + public void checkEmptySlotsUninitialized() { + if (map != null) { + int occupied = 0; + for (int i = 0; i <= map.mask; i++) { + if (((map.keys[i]) == 0)) { + + } else { + occupied++; + } + } + assertEquals(occupied, map.assigned); + + if (!map.hasEmptyKey) {} + } + } + + /** Convert to target type from an integer used to test stuff. */ + protected int vcast(int value) { + return value; + } + + /** Create a new array of a given type and copy the arguments to this array. */ + /* */ + protected final Object[] newvArray(Object... elements) { + return elements; + } + + private void assertSameMap( + final LongObjectHashMap c1, final LongObjectHashMap c2) { + assertEquals(c1.size(), c2.size()); + + for (LongObjectHashMap.LongObjectCursor entry : c1) { + assertTrue(c2.containsKey(entry.key)); + assertEquals(entry.value, c2.get(entry.key)); + } + } + + /* */ + @Test + public void testEnsureCapacity() { + final AtomicInteger expands = new AtomicInteger(); + LongObjectHashMap map = + new LongObjectHashMap(0) { + @Override + protected void allocateBuffers(int arraySize) { + super.allocateBuffers(arraySize); + expands.incrementAndGet(); + } + }; + + // Add some elements. + final int max = rarely() ? 0 : randomIntBetween(0, 250); + for (int i = 0; i < max; i++) { + map.put(cast(i), value0); + } + + final int additions = randomIntBetween(max, max + 5000); + map.ensureCapacity(additions + map.size()); + final int before = expands.get(); + for (int i = 0; i < additions; i++) { + map.put(cast(i), value0); + } + assertEquals(before, expands.get()); + } + + @Test + public void testCursorIndexIsValid() { + map.put(keyE, value1); + map.put(key1, value2); + map.put(key2, value3); + + for (LongObjectHashMap.LongObjectCursor c : map) { + assertTrue(map.indexExists(c.index)); + assertEquals(c.value, map.indexGet(c.index)); + } + } + + @Test + public void testIndexMethods() { + map.put(keyE, value1); + map.put(key1, value2); + + assertTrue(map.indexOf(keyE) >= 0); + assertTrue(map.indexOf(key1) >= 0); + assertTrue(map.indexOf(key2) < 0); + + assertTrue(map.indexExists(map.indexOf(keyE))); + assertTrue(map.indexExists(map.indexOf(key1))); + assertFalse(map.indexExists(map.indexOf(key2))); + + assertEquals(value1, map.indexGet(map.indexOf(keyE))); + assertEquals(value2, map.indexGet(map.indexOf(key1))); + + expectThrows( + AssertionError.class, + () -> { + map.indexGet(map.indexOf(key2)); + fail(); + }); + + assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3)); + assertEquals(value2, map.indexReplace(map.indexOf(key1), value4)); + assertEquals(value3, map.indexGet(map.indexOf(keyE))); + assertEquals(value4, map.indexGet(map.indexOf(key1))); + + map.indexInsert(map.indexOf(key2), key2, value1); + assertEquals(value1, map.indexGet(map.indexOf(key2))); + assertEquals(3, map.size()); + + assertEquals(value3, map.indexRemove(map.indexOf(keyE))); + assertEquals(2, map.size()); + assertEquals(value1, map.indexRemove(map.indexOf(key2))); + assertEquals(1, map.size()); + assertTrue(map.indexOf(keyE) < 0); + assertTrue(map.indexOf(key1) >= 0); + assertTrue(map.indexOf(key2) < 0); + } + + /* */ + @Test + public void testCloningConstructor() { + map.put(key1, value1); + map.put(key2, value2); + map.put(key3, value3); + + assertSameMap(map, new LongObjectHashMap(map)); + } + + /* */ + @Test + public void testFromArrays() { + map.put(key1, value1); + map.put(key2, value2); + map.put(key3, value3); + + LongObjectHashMap map2 = + LongObjectHashMap.from(newArray(key1, key2, key3), newvArray(value1, value2, value3)); + + assertSameMap(map, map2); + } + + @Test + public void testGetOrDefault() { + map.put(key2, value2); + assertTrue(map.containsKey(key2)); + + map.put(key1, value1); + assertEquals(value1, map.getOrDefault(key1, value3)); + assertEquals(value3, map.getOrDefault(key3, value3)); + map.remove(key1); + assertEquals(value3, map.getOrDefault(key1, value3)); + } + + /* */ + @Test + public void testPut() { + map.put(key1, value1); + + assertTrue(map.containsKey(key1)); + assertEquals(value1, map.get(key1)); + } + + /* */ + @Test + public void testNullValue() { + map.put(key1, null); + + assertTrue(map.containsKey(key1)); + assertNull(map.get(key1)); + } + + @Test + public void testPutOverExistingKey() { + map.put(key1, value1); + assertEquals(value1, map.put(key1, value3)); + assertEquals(value3, map.get(key1)); + + assertEquals(value3, map.put(key1, null)); + assertTrue(map.containsKey(key1)); + assertNull(map.get(key1)); + + assertNull(map.put(key1, value1)); + assertEquals(value1, map.get(key1)); + } + + /* */ + @Test + public void testPutWithExpansions() { + final int COUNT = 10000; + final Random rnd = new Random(random().nextLong()); + final HashSet values = new HashSet(); + + for (int i = 0; i < COUNT; i++) { + final int v = rnd.nextInt(); + final boolean hadKey = values.contains(cast(v)); + values.add(cast(v)); + + assertEquals(hadKey, map.containsKey(cast(v))); + map.put(cast(v), vcast(v)); + assertEquals(values.size(), map.size()); + } + assertEquals(values.size(), map.size()); + } + + /* */ + @Test + public void testPutAll() { + map.put(key1, value1); + map.put(key2, value1); + + LongObjectHashMap map2 = newInstance(); + + map2.put(key2, value2); + map2.put(keyE, value1); + + // One new key (keyE). + assertEquals(1, map.putAll(map2)); + + // Assert the value under key2 has been replaced. + assertEquals(value2, map.get(key2)); + + // And key3 has been added. + assertEquals(value1, map.get(keyE)); + assertEquals(3, map.size()); + } + + /* */ + @Test + public void testPutIfAbsent() { + assertTrue(map.putIfAbsent(key1, value1)); + assertFalse(map.putIfAbsent(key1, value2)); + assertEquals(value1, map.get(key1)); + } + + /* */ + @Test + public void testRemove() { + map.put(key1, value1); + assertEquals(value1, map.remove(key1)); + assertEquals(null, map.remove(key1)); + assertEquals(0, map.size()); + + // These are internals, but perhaps worth asserting too. + assertEquals(0, map.assigned); + } + + /* */ + @Test + public void testEmptyKey() { + final int empty = 0; + + map.put(empty, value1); + assertEquals(1, map.size()); + assertEquals(false, map.isEmpty()); + assertEquals(value1, map.get(empty)); + assertEquals(value1, map.getOrDefault(empty, value2)); + assertEquals(true, map.iterator().hasNext()); + assertEquals(empty, map.iterator().next().key); + assertEquals(value1, map.iterator().next().value); + + map.remove(empty); + assertEquals(null, map.get(empty)); + + map.put(empty, null); + assertEquals(1, map.size()); + assertTrue(map.containsKey(empty)); + assertNull(map.get(empty)); + + map.remove(empty); + assertEquals(0, map.size()); + assertFalse(map.containsKey(empty)); + assertNull(map.get(empty)); + } + + /* */ + @Test + public void testMapKeySet() { + map.put(key1, value3); + map.put(key2, value2); + map.put(key3, value1); + + assertSortedListEquals(map.keys().toArray(), key1, key2, key3); + } + + /* */ + @Test + public void testMapKeySetIterator() { + map.put(key1, value3); + map.put(key2, value2); + map.put(key3, value1); + + int counted = 0; + for (LongCursor c : map.keys()) { + assertEquals(map.keys[c.index], c.value); + counted++; + } + assertEquals(counted, map.size()); + } + + /* */ + @Test + public void testClear() { + map.put(key1, value1); + map.put(key2, value1); + map.clear(); + assertEquals(0, map.size()); + + // These are internals, but perhaps worth asserting too. + assertEquals(0, map.assigned); + + // Check if the map behaves properly upon subsequent use. + testPutWithExpansions(); + } + + /* */ + @Test + public void testRelease() { + map.put(key1, value1); + map.put(key2, value1); + map.release(); + assertEquals(0, map.size()); + + // These are internals, but perhaps worth asserting too. + assertEquals(0, map.assigned); + + // Check if the map behaves properly upon subsequent use. + testPutWithExpansions(); + } + + /* */ + @Test + public void testIterable() { + map.put(key1, value1); + map.put(key2, value2); + map.put(key3, value3); + map.remove(key2); + + int count = 0; + for (LongObjectHashMap.LongObjectCursor cursor : map) { + count++; + assertTrue(map.containsKey(cursor.key)); + assertEquals(cursor.value, map.get(cursor.key)); + + assertEquals(cursor.value, map.values[cursor.index]); + assertEquals(cursor.key, map.keys[cursor.index]); + } + assertEquals(count, map.size()); + + map.clear(); + assertFalse(map.iterator().hasNext()); + } + + /* */ + @Test + public void testBug_HPPC73_FullCapacityGet() { + final AtomicInteger reallocations = new AtomicInteger(); + final int elements = 0x7F; + map = + new LongObjectHashMap(elements, 1f) { + @Override + protected double verifyLoadFactor(double loadFactor) { + // Skip load factor sanity range checking. + return loadFactor; + } + + @Override + protected void allocateBuffers(int arraySize) { + super.allocateBuffers(arraySize); + reallocations.incrementAndGet(); + } + }; + + int reallocationsBefore = reallocations.get(); + assertEquals(reallocationsBefore, 1); + for (int i = 1; i <= elements; i++) { + map.put(cast(i), value1); + } + + // Non-existent key. + long outOfSet = cast(elements + 1); + map.remove(outOfSet); + assertFalse(map.containsKey(outOfSet)); + assertEquals(reallocationsBefore, reallocations.get()); + + // Should not expand because we're replacing an existing element. + map.put(k1, value2); + assertEquals(reallocationsBefore, reallocations.get()); + + // Remove from a full map. + map.remove(k1); + assertEquals(reallocationsBefore, reallocations.get()); + map.put(k1, value2); + + // Check expand on "last slot of a full map" condition. + map.put(outOfSet, value1); + assertEquals(reallocationsBefore + 1, reallocations.get()); + } + + @Test + public void testHashCodeEquals() { + LongObjectHashMap l0 = newInstance(); + assertEquals(0, l0.hashCode()); + assertEquals(l0, newInstance()); + + LongObjectHashMap l1 = + LongObjectHashMap.from(newArray(key1, key2, key3), newvArray(value1, value2, value3)); + + LongObjectHashMap l2 = + LongObjectHashMap.from(newArray(key2, key1, key3), newvArray(value2, value1, value3)); + + LongObjectHashMap l3 = LongObjectHashMap.from(newArray(key1, key2), newvArray(value2, value1)); + + assertEquals(l1.hashCode(), l2.hashCode()); + assertEquals(l1, l2); + + assertFalse(l1.equals(l3)); + assertFalse(l2.equals(l3)); + } + + @Test + public void testBug_HPPC37() { + LongObjectHashMap l1 = LongObjectHashMap.from(newArray(key1), newvArray(value1)); + + LongObjectHashMap l2 = LongObjectHashMap.from(newArray(key2), newvArray(value1)); + + assertFalse(l1.equals(l2)); + assertFalse(l2.equals(l1)); + } + + /* + * + */ + @Test + public void testClone() { + this.map.put(key1, value1); + this.map.put(key2, value2); + this.map.put(key3, value3); + + LongObjectHashMap cloned = map.clone(); + cloned.remove(key1); + + assertSortedListEquals(map.keys().toArray(), key1, key2, key3); + assertSortedListEquals(cloned.keys().toArray(), key2, key3); + } + + /* */ + @Test + public void testMapValues() { + map.put(key1, value3); + map.put(key2, value2); + map.put(key3, value1); + assertSortedListEquals(map.values().toArray(), value1, value2, value3); + + map.clear(); + map.put(key1, value1); + map.put(key2, value2); + map.put(key3, value2); + assertSortedListEquals(map.values().toArray(), value1, value2, value2); + } + + /* */ + @Test + public void testMapValuesIterator() { + map.put(key1, value3); + map.put(key2, value2); + map.put(key3, value1); + + int counted = 0; + for (ObjectCursor c : map.values()) { + assertEquals(map.values[c.index], c.value); + counted++; + } + assertEquals(counted, map.size()); + } + + /* */ + @Test + public void testEqualsSameClass() { + LongObjectHashMap l1 = newInstance(); + l1.put(k1, value0); + l1.put(k2, value1); + l1.put(k3, value2); + + LongObjectHashMap l2 = new LongObjectHashMap(l1); + l2.putAll(l1); + + LongObjectHashMap l3 = new LongObjectHashMap(l2); + l3.putAll(l2); + l3.put(k4, value0); + + assertEquals(l2, l1); + assertEquals(l2.hashCode(), l1.hashCode()); + assertNotEquals(l1, l3); + } + + /* */ + @Test + public void testEqualsSubClass() { + class Sub extends LongObjectHashMap {} + + LongObjectHashMap l1 = newInstance(); + l1.put(k1, value0); + l1.put(k2, value1); + l1.put(k3, value2); + + LongObjectHashMap l2 = new Sub(); + l2.putAll(l1); + l2.put(k4, value3); + + LongObjectHashMap l3 = new Sub(); + l3.putAll(l2); + + assertNotEquals(l1, l2); + assertEquals(l3.hashCode(), l2.hashCode()); + assertEquals(l3, l2); + } +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/OverlappingLongRangeCounter.java b/lucene/facet/src/java/org/apache/lucene/facet/range/OverlappingLongRangeCounter.java index 498cb6b4d11..a46adac7b86 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/OverlappingLongRangeCounter.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/OverlappingLongRangeCounter.java @@ -16,11 +16,11 @@ */ package org.apache.lucene.facet.range; +import com.carrotsearch.hppc.LongArrayList; +import com.carrotsearch.hppc.LongIntHashMap; import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; +import java.util.Arrays; import java.util.List; -import java.util.Map; import org.apache.lucene.util.FixedBitSet; /** @@ -238,28 +238,28 @@ class OverlappingLongRangeCounter extends LongRangeCounter { // track the start vs end case separately because if a // given point is both, then it must be its own // elementary interval: - Map endsMap = new HashMap<>(); + LongIntHashMap endsMap = new LongIntHashMap(); endsMap.put(Long.MIN_VALUE, 1); endsMap.put(Long.MAX_VALUE, 2); for (LongRange range : ranges) { - Integer cur = endsMap.get(range.min); - if (cur == null) { - endsMap.put(range.min, 1); + int index = endsMap.indexOf(range.min); + if (index < 0) { + endsMap.indexInsert(index, range.min, 1); } else { - endsMap.put(range.min, cur | 1); + endsMap.indexReplace(index, endsMap.indexGet(index) | 1); } - cur = endsMap.get(range.max); - if (cur == null) { - endsMap.put(range.max, 2); + index = endsMap.indexOf(range.max); + if (index < 0) { + endsMap.indexInsert(index, range.max, 2); } else { - endsMap.put(range.max, cur | 2); + endsMap.indexReplace(index, endsMap.indexGet(index) | 2); } } - List endsList = new ArrayList<>(endsMap.keySet()); - Collections.sort(endsList); + LongArrayList endsList = new LongArrayList(endsMap.keys()); + Arrays.sort(endsList.buffer, 0, endsList.size()); // Build elementaryIntervals (a 1D Venn diagram): List elementaryIntervals = new ArrayList<>(); diff --git a/lucene/join/build.gradle b/lucene/join/build.gradle index 840b3bc8f65..5cf8f80bb79 100644 --- a/lucene/join/build.gradle +++ b/lucene/join/build.gradle @@ -21,5 +21,7 @@ description = 'Index-time and Query-time joins for normalized content' dependencies { moduleApi project(':lucene:core') + moduleImplementation 'com.carrotsearch:hppc' + moduleTestImplementation project(':lucene:test-framework') } \ No newline at end of file diff --git a/lucene/join/src/java/module-info.java b/lucene/join/src/java/module-info.java index 80d2261b5a5..9ab3c8152dd 100644 --- a/lucene/join/src/java/module-info.java +++ b/lucene/join/src/java/module-info.java @@ -16,8 +16,10 @@ */ /** Index-time and Query-time joins for normalized content */ +@SuppressWarnings({"requires-automatic"}) module org.apache.lucene.join { requires org.apache.lucene.core; + requires com.carrotsearch.hppc; exports org.apache.lucene.search.join; } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java index 085c163847a..b71adb4f014 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java @@ -17,14 +17,13 @@ package org.apache.lucene.search.join; -import java.util.HashMap; -import java.util.Map; import org.apache.lucene.search.AbstractKnnCollector; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TotalHits; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitSet; +import org.apache.lucene.util.hppc.IntIntHashMap; /** * This collects the nearest children vectors. Diversifying the results over the provided parent @@ -117,7 +116,7 @@ class DiversifyingNearestChildrenKnnCollector extends AbstractKnnCollector { // Used to keep track of nodeId -> positionInHeap. This way when new scores are added for a // node, the heap can be // updated efficiently. - private final Map nodeIdHeapIndex; + private final IntIntHashMap nodeIdHeapIndex; private boolean closed = false; public NodeIdCachingHeap(int maxSize) { @@ -130,8 +129,7 @@ class DiversifyingNearestChildrenKnnCollector extends AbstractKnnCollector { // NOTE: we add +1 because all access to heap is 1-based not 0-based. heap[0] is unused. heapSize = maxSize + 1; this.maxSize = maxSize; - this.nodeIdHeapIndex = - new HashMap<>(maxSize < 2 ? maxSize + 1 : (int) (maxSize / 0.75 + 1.0)); + this.nodeIdHeapIndex = new IntIntHashMap(maxSize); this.heapNodes = new ParentChildScore[heapSize]; } @@ -179,8 +177,9 @@ class DiversifyingNearestChildrenKnnCollector extends AbstractKnnCollector { if (closed) { throw new IllegalStateException(); } - Integer previousNodeIndex = nodeIdHeapIndex.get(parentNode); - if (previousNodeIndex != null) { + int index = nodeIdHeapIndex.indexOf(parentNode); + if (index >= 0) { + int previousNodeIndex = nodeIdHeapIndex.indexGet(index); if (heapNodes[previousNodeIndex].score < score) { updateElement(previousNodeIndex, node, parentNode, score); return true; diff --git a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java index 0bf84e3ed3d..93b25e42409 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java @@ -16,14 +16,16 @@ */ package org.apache.lucene.search.join; +import com.carrotsearch.hppc.LongArrayList; +import com.carrotsearch.hppc.LongFloatHashMap; +import com.carrotsearch.hppc.LongHashSet; +import com.carrotsearch.hppc.LongIntHashMap; +import com.carrotsearch.hppc.cursors.LongCursor; +import com.carrotsearch.hppc.procedures.LongFloatProcedure; import java.io.IOException; -import java.util.HashMap; +import java.util.Arrays; import java.util.Iterator; import java.util.Locale; -import java.util.Map; -import java.util.TreeSet; -import java.util.function.BiConsumer; -import java.util.function.LongFunction; import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.IntPoint; @@ -147,46 +149,40 @@ public final class JoinUtil { IndexSearcher fromSearcher, ScoreMode scoreMode) throws IOException { - TreeSet joinValues = new TreeSet<>(); - Map aggregatedScores = new HashMap<>(); - Map occurrences = new HashMap<>(); + LongHashSet joinValues = new LongHashSet(); + LongFloatHashMap aggregatedScores = new LongFloatHashMap(); + LongIntHashMap occurrences = new LongIntHashMap(); boolean needsScore = scoreMode != ScoreMode.None; - BiConsumer scoreAggregator; + LongFloatProcedure scoreAggregator; if (scoreMode == ScoreMode.Max) { scoreAggregator = (key, score) -> { - Float currentValue = aggregatedScores.putIfAbsent(key, score); - if (currentValue != null) { - aggregatedScores.put(key, Math.max(currentValue, score)); + int index = aggregatedScores.indexOf(key); + if (index < 0) { + aggregatedScores.indexInsert(index, key, score); + } else { + float currentScore = aggregatedScores.indexGet(index); + aggregatedScores.indexReplace(index, Math.max(currentScore, score)); } }; } else if (scoreMode == ScoreMode.Min) { scoreAggregator = (key, score) -> { - Float currentValue = aggregatedScores.putIfAbsent(key, score); - if (currentValue != null) { - aggregatedScores.put(key, Math.min(currentValue, score)); + int index = aggregatedScores.indexOf(key); + if (index < 0) { + aggregatedScores.indexInsert(index, key, score); + } else { + float currentScore = aggregatedScores.indexGet(index); + aggregatedScores.indexReplace(index, Math.min(currentScore, score)); } }; } else if (scoreMode == ScoreMode.Total) { - scoreAggregator = - (key, score) -> { - Float currentValue = aggregatedScores.putIfAbsent(key, score); - if (currentValue != null) { - aggregatedScores.put(key, currentValue + score); - } - }; + scoreAggregator = aggregatedScores::addTo; } else if (scoreMode == ScoreMode.Avg) { scoreAggregator = (key, score) -> { - Float currentSore = aggregatedScores.putIfAbsent(key, score); - if (currentSore != null) { - aggregatedScores.put(key, currentSore + score); - } - Integer currentOccurrence = occurrences.putIfAbsent(key, 1); - if (currentOccurrence != null) { - occurrences.put(key, ++currentOccurrence); - } + aggregatedScores.addTo(key, score); + occurrences.addTo(key, 1); }; } else { scoreAggregator = @@ -195,12 +191,12 @@ public final class JoinUtil { }; } - LongFunction joinScorer; + LongFloatFunction joinScorer; if (scoreMode == ScoreMode.Avg) { joinScorer = (joinValue) -> { - Float aggregatedScore = aggregatedScores.get(joinValue); - Integer occurrence = occurrences.get(joinValue); + float aggregatedScore = aggregatedScores.get(joinValue); + int occurrence = occurrences.get(joinValue); return aggregatedScore / occurrence; }; } else { @@ -222,7 +218,7 @@ public final class JoinUtil { long value = sortedNumericDocValues.nextValue(); joinValues.add(value); if (needsScore) { - scoreAggregator.accept(value, scorer.score()); + scoreAggregator.apply(value, scorer.score()); } } } @@ -271,7 +267,7 @@ public final class JoinUtil { } joinValues.add(value); if (needsScore) { - scoreAggregator.accept(value, scorer.score()); + scoreAggregator.apply(value, scorer.score()); } } @@ -296,7 +292,9 @@ public final class JoinUtil { } fromSearcher.search(fromQuery, collector); - Iterator iterator = joinValues.iterator(); + LongArrayList joinValuesList = new LongArrayList(joinValues); + Arrays.sort(joinValuesList.buffer, 0, joinValuesList.size()); + Iterator iterator = joinValuesList.iterator(); final int bytesPerDim; final BytesRef encoded = new BytesRef(); @@ -308,10 +306,10 @@ public final class JoinUtil { @Override public BytesRef next() { if (iterator.hasNext()) { - long value = iterator.next(); - IntPoint.encodeDimension((int) value, encoded.bytes, 0); + LongCursor value = iterator.next(); + IntPoint.encodeDimension((int) value.value, encoded.bytes, 0); if (needsScore) { - score = joinScorer.apply(value); + score = joinScorer.apply(value.value); } return encoded; } else { @@ -326,10 +324,10 @@ public final class JoinUtil { @Override public BytesRef next() { if (iterator.hasNext()) { - long value = iterator.next(); - LongPoint.encodeDimension(value, encoded.bytes, 0); + LongCursor value = iterator.next(); + LongPoint.encodeDimension(value.value, encoded.bytes, 0); if (needsScore) { - score = joinScorer.apply(value); + score = joinScorer.apply(value.value); } return encoded; } else { @@ -344,10 +342,11 @@ public final class JoinUtil { @Override public BytesRef next() { if (iterator.hasNext()) { - long value = iterator.next(); - FloatPoint.encodeDimension(Float.intBitsToFloat((int) value), encoded.bytes, 0); + LongCursor value = iterator.next(); + FloatPoint.encodeDimension( + Float.intBitsToFloat((int) value.value), encoded.bytes, 0); if (needsScore) { - score = joinScorer.apply(value); + score = joinScorer.apply(value.value); } return encoded; } else { @@ -362,10 +361,10 @@ public final class JoinUtil { @Override public BytesRef next() { if (iterator.hasNext()) { - long value = iterator.next(); - DoublePoint.encodeDimension(Double.longBitsToDouble(value), encoded.bytes, 0); + LongCursor value = iterator.next(); + DoublePoint.encodeDimension(Double.longBitsToDouble(value.value), encoded.bytes, 0); if (needsScore) { - score = joinScorer.apply(value); + score = joinScorer.apply(value.value); } return encoded; } else { @@ -583,4 +582,10 @@ public final class JoinUtil { max, searcher.getTopReaderContext().id()); } + + /** Similar to {@link java.util.function.LongFunction} for primitive argument and result. */ + private interface LongFloatFunction { + + float apply(long value); + } } diff --git a/lucene/misc/src/java/org/apache/lucene/misc/search/DiversifiedTopDocsCollector.java b/lucene/misc/src/java/org/apache/lucene/misc/search/DiversifiedTopDocsCollector.java index ceb92fa92d5..e7a24964c96 100644 --- a/lucene/misc/src/java/org/apache/lucene/misc/search/DiversifiedTopDocsCollector.java +++ b/lucene/misc/src/java/org/apache/lucene/misc/search/DiversifiedTopDocsCollector.java @@ -19,8 +19,6 @@ package org.apache.lucene.misc.search; import java.io.IOException; import java.util.ArrayDeque; import java.util.Deque; -import java.util.HashMap; -import java.util.Map; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.misc.search.DiversifiedTopDocsCollector.ScoreDocKey; @@ -32,6 +30,7 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TotalHits; import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.hppc.LongObjectHashMap; /** * A {@link TopDocsCollector} that controls diversity in results by ensuring no more than @@ -69,7 +68,7 @@ public abstract class DiversifiedTopDocsCollector extends TopDocsCollector perKeyQueues; + private LongObjectHashMap perKeyQueues; protected int maxNumPerKey; private Deque sparePerKeyQueues = new ArrayDeque<>(); @@ -77,7 +76,7 @@ public abstract class DiversifiedTopDocsCollector extends TopDocsCollector(); + perKeyQueues = new LongObjectHashMap<>(); this.numHits = numHits; this.maxNumPerKey = maxHitsPerKey; } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/TermAutomatonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/TermAutomatonQuery.java index 9886b9d4e5d..eafa8ed6a01 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/TermAutomatonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/TermAutomatonQuery.java @@ -54,6 +54,7 @@ import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Transition; +import org.apache.lucene.util.hppc.IntObjectHashMap; // TODO // - compare perf to PhraseQuery exact and sloppy @@ -86,7 +87,7 @@ public class TermAutomatonQuery extends Query implements Accountable { private final Automaton.Builder builder; Automaton det; private final Map termToID = new HashMap<>(); - private final Map idToTerm = new HashMap<>(); + private final IntObjectHashMap idToTerm = new IntObjectHashMap<>(); private int anyTermID = -1; public TermAutomatonQuery(String field) { @@ -209,7 +210,7 @@ public class TermAutomatonQuery extends Query implements Accountable { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - Map termStates = new HashMap<>(); + IntObjectHashMap termStates = new IntObjectHashMap<>(); for (Map.Entry ent : termToID.entrySet()) { if (ent.getKey() != null) { @@ -360,14 +361,14 @@ public class TermAutomatonQuery extends Query implements Accountable { final class TermAutomatonWeight extends Weight { final Automaton automaton; - private final Map termStates; + private final IntObjectHashMap termStates; private final Similarity.SimScorer stats; private final Similarity similarity; public TermAutomatonWeight( Automaton automaton, IndexSearcher searcher, - Map termStates, + IntObjectHashMap termStates, float boost) throws IOException { super(TermAutomatonQuery.this); @@ -375,14 +376,13 @@ public class TermAutomatonQuery extends Query implements Accountable { this.termStates = termStates; this.similarity = searcher.getSimilarity(); List allTermStats = new ArrayList<>(); - for (Map.Entry ent : idToTerm.entrySet()) { - Integer termID = ent.getKey(); - if (ent.getValue() != null) { - TermStates ts = termStates.get(termID); + for (IntObjectHashMap.IntObjectCursor ent : idToTerm) { + if (ent.value != null) { + TermStates ts = termStates.get(ent.key); if (ts.docFreq() > 0) { allTermStats.add( searcher.termStatistics( - new Term(field, ent.getValue()), ts.docFreq(), ts.totalTermFreq())); + new Term(field, ent.value), ts.docFreq(), ts.totalTermFreq())); } } } @@ -410,18 +410,18 @@ public class TermAutomatonQuery extends Query implements Accountable { EnumAndScorer[] enums = new EnumAndScorer[idToTerm.size()]; boolean any = false; - for (Map.Entry ent : termStates.entrySet()) { - TermStates termStates = ent.getValue(); + for (IntObjectHashMap.IntObjectCursor ent : termStates) { + TermStates termStates = ent.value; assert termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); - BytesRef term = idToTerm.get(ent.getKey()); + BytesRef term = idToTerm.get(ent.key); TermState state = termStates.get(context); if (state != null) { TermsEnum termsEnum = context.reader().terms(field).iterator(); termsEnum.seekExact(term, state); - enums[ent.getKey()] = - new EnumAndScorer(ent.getKey(), termsEnum.postings(null, PostingsEnum.POSITIONS)); + enums[ent.key] = + new EnumAndScorer(ent.key, termsEnum.postings(null, PostingsEnum.POSITIONS)); any = true; } } diff --git a/lucene/spatial-extras/build.gradle b/lucene/spatial-extras/build.gradle index baa772f3053..7ba6648eede 100644 --- a/lucene/spatial-extras/build.gradle +++ b/lucene/spatial-extras/build.gradle @@ -30,6 +30,8 @@ dependencies { moduleApi 'org.locationtech.spatial4j:spatial4j' moduleApi 'io.sgr:s2-geometry-library-java' + moduleImplementation 'com.carrotsearch:hppc' + moduleTestImplementation project(':lucene:test-framework') moduleTestImplementation project(':lucene:spatial-test-fixtures') moduleTestImplementation 'org.locationtech.jts:jts-core' diff --git a/lucene/spatial-extras/src/java/module-info.java b/lucene/spatial-extras/src/java/module-info.java index 30fb97668ef..608684764fd 100644 --- a/lucene/spatial-extras/src/java/module-info.java +++ b/lucene/spatial-extras/src/java/module-info.java @@ -20,6 +20,7 @@ module org.apache.lucene.spatial_extras { requires spatial4j; requires s2.geometry.library.java; + requires com.carrotsearch.hppc; requires org.apache.lucene.core; requires org.apache.lucene.spatial3d; diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/util/CachingDoubleValueSource.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/util/CachingDoubleValueSource.java index b825ec4b39d..234efc45de5 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/util/CachingDoubleValueSource.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/util/CachingDoubleValueSource.java @@ -16,13 +16,13 @@ */ package org.apache.lucene.spatial.util; +import com.carrotsearch.hppc.IntDoubleHashMap; import java.io.IOException; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.DoubleValues; import org.apache.lucene.search.DoubleValuesSource; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.util.hppc.IntObjectHashMap; /** * Caches the doubleVal of another value source in a HashMap so that it is computed only once. @@ -32,11 +32,11 @@ import org.apache.lucene.util.hppc.IntObjectHashMap; public class CachingDoubleValueSource extends DoubleValuesSource { final DoubleValuesSource source; - final IntObjectHashMap cache; + final IntDoubleHashMap cache; public CachingDoubleValueSource(DoubleValuesSource source) { this.source = source; - cache = new IntObjectHashMap<>(); + cache = new IntDoubleHashMap(); } @Override @@ -53,11 +53,14 @@ public class CachingDoubleValueSource extends DoubleValuesSource { @Override public double doubleValue() throws IOException { + double v; int key = base + doc; - Double v = cache.get(key); - if (v == null) { + int index = cache.indexOf(key); + if (index < 0) { v = vals.doubleValue(); - cache.put(key, v); + cache.indexInsert(index, key, v); + } else { + v = cache.indexGet(index); } return v; }