Replace Map<Integer, Object> by primitive IntObjectHashMap. (#13368)

2024-05-18 14:40:40 +02:00 · 2024-05-18 14:40:40 +02:00 · 7db9c8c9bd
parent 24fd426d98
commit 7db9c8c9bd
16 changed files with 63 additions and 59 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -341,6 +341,8 @@ Optimizations

 * GITHUB#13327: Reduce memory usage of field maps in FieldInfos and BlockTree TermsReader. (Bruno Roustant, David Smiley)

+* GITHUB#13368: Replace Map<Integer, Object> by primitive IntObjectHashMap. (Bruno Roustant)
+
 Bug Fixes
 ---------------------

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SuggestibleEntryCache.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SuggestibleEntryCache.java
@ -16,12 +16,11 @@
 */
 package org.apache.lucene.analysis.hunspell;

-import java.util.HashMap;
-import java.util.Map;
 import java.util.function.Consumer;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.hppc.IntObjectHashMap;

 /**
 * A cache allowing for CPU-cache-friendlier iteration over {@link WordStorage} entries that can be
@ -35,9 +34,7 @@ class SuggestibleEntryCache {

  private final Section[] sections;

-  private SuggestibleEntryCache(Map<Integer, SectionBuilder> builders) {
-    int maxLength =
-        builders.isEmpty() ? 0 : builders.keySet().stream().max(Integer::compare).orElseThrow();
+  private SuggestibleEntryCache(IntObjectHashMap<SectionBuilder> builders, int maxLength) {
    sections = new Section[maxLength + 1];
    for (int i = 0; i < sections.length; i++) {
      SectionBuilder builder = builders.get(i);
@ -48,7 +45,8 @@ class SuggestibleEntryCache {
  static SuggestibleEntryCache buildCache(WordStorage storage) {
    var consumer =
        new Consumer<FlyweightEntry>() {
-          final Map<Integer, SectionBuilder> builders = new HashMap<>();
+          final IntObjectHashMap<SectionBuilder> builders = new IntObjectHashMap<>();
+          int maxLength;

          @Override
          public void accept(FlyweightEntry entry) {
@ -56,14 +54,24 @@ class SuggestibleEntryCache {
            if (root.length > Short.MAX_VALUE) {
              throw new UnsupportedOperationException(
                  "Too long dictionary entry, please report this to dev@lucene.apache.org");
+            } else if (root.length > maxLength) {
+              maxLength = root.length;
            }

-            builders.computeIfAbsent(root.length, __ -> new SectionBuilder()).add(entry);
+            SectionBuilder builder;
+            int index = builders.indexOf(root.length);
+            if (index < 0) {
+              builder = new SectionBuilder();
+              builders.indexInsert(index, root.length, builder);
+            } else {
+              builder = builders.indexGet(index);
+            }
+            builder.add(entry);
          }
        };
    storage.processSuggestibleWords(1, Integer.MAX_VALUE, consumer);

-    return new SuggestibleEntryCache(consumer.builders);
+    return new SuggestibleEntryCache(consumer.builders, consumer.maxLength);
  }

  private static class SectionBuilder {
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ViterbiNBest.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ViterbiNBest.java
@ -21,10 +21,10 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumMap;
-import java.util.HashMap;
 import java.util.List;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.hppc.IntIntHashMap;

 /** {@link Viterbi} subclass for n-best path calculation. */
 public abstract class ViterbiNBest<T extends Token, U extends MorphData>
@ -137,14 +137,14 @@ public abstract class ViterbiNBest<T extends Token, U extends MorphData>
    }

    // offset=>position map
-    HashMap<Integer, Integer> map = new HashMap<>();
+    IntIntHashMap map = new IntIntHashMap();
    for (Token t : pending) {
      map.put(t.getOffset(), 0);
      map.put(t.getOffset() + t.getLength(), 0);
    }

    // Get unique and sorted list of all edge position of tokens.
-    Integer[] offsets = map.keySet().toArray(new Integer[0]);
+    int[] offsets = map.keys().toArray();
    Arrays.sort(offsets);

    // setup all value of map.  It specifies N-th position from begin.
--- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java
+++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java
@ -24,7 +24,6 @@ import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.lucene.analysis.TokenizerFactory;
@ -32,6 +31,7 @@ import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.ResourceLoader;
 import org.apache.lucene.util.ResourceLoaderAware;
+import org.apache.lucene.util.hppc.IntObjectHashMap;

 /**
 * Factory for {@link ICUTokenizer}. Words are broken across script boundaries, then segmented
@ -74,7 +74,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
  public static final String NAME = "icu";

  static final String RULEFILES = "rulefiles";
-  private final Map<Integer, String> tailored;
+  private final IntObjectHashMap<String> tailored;
  private ICUTokenizerConfig config;
  private final boolean cjkAsWords;
  private final boolean myanmarAsWords;
@ -82,7 +82,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
  /** Creates a new ICUTokenizerFactory */
  public ICUTokenizerFactory(Map<String, String> args) {
    super(args);
-    tailored = new HashMap<>();
+    tailored = new IntObjectHashMap<>();
    String rulefilesArg = get(args, RULEFILES);
    if (rulefilesArg != null) {
      List<String> scriptAndResourcePaths = splitFileNames(rulefilesArg);
@ -113,9 +113,9 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
    } else {
      final BreakIterator[] breakers =
          new BreakIterator[1 + UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)];
-      for (Map.Entry<Integer, String> entry : tailored.entrySet()) {
-        int code = entry.getKey();
-        String resourcePath = entry.getValue();
+      for (IntObjectHashMap.IntObjectCursor<String> entry : tailored) {
+        int code = entry.key;
+        String resourcePath = entry.value;
        breakers[code] = parseRules(resourcePath, loader);
      }
      config =
--- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java
+++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java
@ -17,11 +17,9 @@
 package org.apache.lucene.analysis.cn.smart.hhmm;

 import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import org.apache.lucene.analysis.cn.smart.Utility;
+import org.apache.lucene.util.hppc.IntObjectHashMap;

 /**
 * Graph representing possible token pairs (bigrams) at each start offset in the sentence.
@ -32,7 +30,7 @@ import org.apache.lucene.analysis.cn.smart.Utility;
 */
 class BiSegGraph {

-  private Map<Integer, ArrayList<SegTokenPair>> tokenPairListTable = new HashMap<>();
+  private IntObjectHashMap<ArrayList<SegTokenPair>> tokenPairListTable = new IntObjectHashMap<>();

  private List<SegToken> segTokenList;

@ -122,7 +120,7 @@ class BiSegGraph {
   * @return true if a token pair exists
   */
  public boolean isToExist(int to) {
-    return tokenPairListTable.get(Integer.valueOf(to)) != null;
+    return tokenPairListTable.get(to) != null;
  }

  /**
@ -220,9 +218,9 @@ class BiSegGraph {
  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
-    Collection<ArrayList<SegTokenPair>> values = tokenPairListTable.values();
-    for (ArrayList<SegTokenPair> segList : values) {
-      for (SegTokenPair pair : segList) {
+    for (IntObjectHashMap.ObjectCursor<ArrayList<SegTokenPair>> segList :
+        tokenPairListTable.values()) {
+      for (SegTokenPair pair : segList.value) {
        sb.append(pair).append("\n");
      }
    }
--- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java
+++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java
@ -17,9 +17,8 @@
 package org.apache.lucene.analysis.cn.smart.hhmm;

 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
+import org.apache.lucene.util.hppc.IntObjectHashMap;

 /**
 * Graph representing possible tokens at each start offset in the sentence.
@ -31,7 +30,7 @@ import java.util.Map;
 class SegGraph {

  /** Map of start offsets to ArrayList of tokens at that position */
-  private Map<Integer, ArrayList<SegToken>> tokenListTable = new HashMap<>();
+  private IntObjectHashMap<ArrayList<SegToken>> tokenListTable = new IntObjectHashMap<>();

  private int maxStart = -1;

--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java
@ -17,7 +17,6 @@
 package org.apache.lucene.benchmark.byTask.feeds;

 import java.util.AbstractMap;
-import java.util.HashMap;
 import java.util.Map;
 import java.util.Random;
 import java.util.Set;
@ -31,6 +30,7 @@ import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree;
 import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
 import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory;
 import org.apache.lucene.spatial.serialized.SerializedDVStrategy;
+import org.apache.lucene.util.hppc.IntObjectHashMap;
 import org.locationtech.spatial4j.context.SpatialContext;
 import org.locationtech.spatial4j.context.SpatialContextFactory;
 import org.locationtech.spatial4j.shape.Point;
@ -50,7 +50,7 @@ public class SpatialDocMaker extends DocMaker {
  public static final String SPATIAL_FIELD = "spatial";

  // cache spatialStrategy by round number
-  private static Map<Integer, SpatialStrategy> spatialStrategyCache = new HashMap<>();
+  private static IntObjectHashMap<SpatialStrategy> spatialStrategyCache = new IntObjectHashMap<>();

  private SpatialStrategy strategy;
  private ShapeConverter shapeConverter;
--- a/lucene/core/src/java/org/apache/lucene/analysis/AutomatonToTokenStream.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/AutomatonToTokenStream.java
@ -19,16 +19,15 @@ package org.apache.lucene.analysis;

 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Map;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.hppc.IntIntHashMap;

 /** Converts an Automaton into a TokenStream. */
 public class AutomatonToTokenStream {
@ -61,7 +60,7 @@ public class AutomatonToTokenStream {
    }

    LinkedList<RemapNode> noIncomingEdges = new LinkedList<>();
-    Map<Integer, Integer> idToPos = new HashMap<>();
+    IntIntHashMap idToPos = new IntIntHashMap();
    noIncomingEdges.addLast(new RemapNode(0, 0));
    while (noIncomingEdges.isEmpty() == false) {
      RemapNode currState = noIncomingEdges.removeFirst();
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java
@ -20,8 +20,6 @@ import static org.apache.lucene.codecs.lucene90.Lucene90NormsFormat.VERSION_CURR
 import static org.apache.lucene.codecs.lucene90.Lucene90NormsFormat.VERSION_START;

 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.NormsProducer;
 import org.apache.lucene.index.CorruptIndexException;
@ -35,17 +33,18 @@ import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.RandomAccessInput;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.hppc.IntObjectHashMap;

 /** Reader for {@link Lucene90NormsFormat} */
 final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
  // metadata maps (just file pointers and minimal stuff)
-  private final Map<Integer, NormsEntry> norms = new HashMap<>();
+  private final IntObjectHashMap<NormsEntry> norms = new IntObjectHashMap<>();
  private final int maxDoc;
  private IndexInput data;
  private boolean merging;
-  private Map<Integer, IndexInput> disiInputs;
-  private Map<Integer, RandomAccessInput> disiJumpTables;
-  private Map<Integer, RandomAccessInput> dataInputs;
+  private IntObjectHashMap<IndexInput> disiInputs;
+  private IntObjectHashMap<RandomAccessInput> disiJumpTables;
+  private IntObjectHashMap<RandomAccessInput> dataInputs;

  Lucene90NormsProducer(
      SegmentReadState state,
@ -121,9 +120,9 @@ final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
      throw new RuntimeException(e);
    }
    clone.data = data.clone();
-    clone.disiInputs = new HashMap<>();
-    clone.disiJumpTables = new HashMap<>();
-    clone.dataInputs = new HashMap<>();
+    clone.disiInputs = new IntObjectHashMap<>();
+    clone.disiJumpTables = new IntObjectHashMap<>();
+    clone.dataInputs = new IntObjectHashMap<>();
    clone.merging = true;
    return clone;
  }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
@ -17,8 +17,6 @@
 package org.apache.lucene.codecs.lucene90;

 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.PointsReader;
 import org.apache.lucene.index.CorruptIndexException;
@ -31,12 +29,13 @@ import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.ReadAdvice;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.bkd.BKDReader;
+import org.apache.lucene.util.hppc.IntObjectHashMap;

 /** Reads point values previously written with {@link Lucene90PointsWriter} */
 public class Lucene90PointsReader extends PointsReader {
  final IndexInput indexIn, dataIn;
  final SegmentReadState readState;
-  final Map<Integer, PointValues> readers = new HashMap<>();
+  final IntObjectHashMap<PointValues> readers = new IntObjectHashMap<>();

  /** Sole constructor */
  public Lucene90PointsReader(SegmentReadState readState) throws IOException {
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
@ -34,6 +34,7 @@ import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 import org.apache.lucene.util.CollectionUtil;
+import org.apache.lucene.util.hppc.IntObjectHashMap;

 /**
 * Collection of {@link FieldInfo}s (accessible by number or by name).
@ -374,7 +375,7 @@ public class FieldInfos implements Iterable<FieldInfo> {

  static final class FieldNumbers {

-    private final Map<Integer, String> numberToName;
+    private final IntObjectHashMap<String> numberToName;
    private final Map<String, Integer> nameToNumber;
    private final Map<String, IndexOptions> indexOptions;
    // We use this to enforce that a given field never
@ -401,7 +402,7 @@ public class FieldInfos implements Iterable<FieldInfo> {

    FieldNumbers(String softDeletesFieldName, String parentFieldName) {
      this.nameToNumber = new HashMap<>();
-      this.numberToName = new HashMap<>();
+      this.numberToName = new IntObjectHashMap<>();
      this.indexOptions = new HashMap<>();
      this.docValuesType = new HashMap<>();
      this.dimensions = new HashMap<>();
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
@ -48,6 +48,7 @@ import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.hppc.BitMixer;
+import org.apache.lucene.util.hppc.IntObjectHashMap;

 /**
 * Automata operations.
@ -573,7 +574,7 @@ public final class Operations {
    PointTransitions[] points = new PointTransitions[5];

    private static final int HASHMAP_CUTOVER = 30;
-    private final HashMap<Integer, PointTransitions> map = new HashMap<>();
+    private final IntObjectHashMap<PointTransitions> map = new IntObjectHashMap<>();
    private boolean useHash = false;

    private PointTransitions next(int point) {
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupSelector.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupSelector.java
@ -19,21 +19,20 @@ package org.apache.lucene.search.grouping;

 import java.io.IOException;
 import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.search.Scorable;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.hppc.IntIntHashMap;

 /** A GroupSelector implementation that groups via SortedDocValues */
 public class TermGroupSelector extends GroupSelector<BytesRef> {

  private final String field;
  private final BytesRefHash values = new BytesRefHash();
-  private final Map<Integer, Integer> ordsToGroupIds = new HashMap<>();
+  private final IntIntHashMap ordsToGroupIds = new IntIntHashMap();

  private SortedDocValues docValues;
  private int groupId;
--- a/lucene/misc/src/java/org/apache/lucene/misc/document/LazyDocument.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/document/LazyDocument.java
@ -19,10 +19,8 @@ package org.apache.lucene.misc.document;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
@ -34,6 +32,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.IndexableFieldType;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.hppc.IntObjectHashMap;

 /**
 * Defers actually loading a field's value until you ask for it. You must not use the returned Field
@ -48,7 +47,7 @@ public class LazyDocument {
  // null until first field is loaded
  private Document doc;

-  private Map<Integer, List<LazyField>> fields = new HashMap<>();
+  private IntObjectHashMap<List<LazyField>> fields = new IntObjectHashMap<>();
  private Set<String> fieldNames = new HashSet<>();

  public LazyDocument(IndexReader reader, int docID) {
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/util/CachingDoubleValueSource.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/util/CachingDoubleValueSource.java
@ -17,13 +17,12 @@
 package org.apache.lucene.spatial.util;

 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.DoubleValues;
 import org.apache.lucene.search.DoubleValuesSource;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.util.hppc.IntObjectHashMap;

 /**
 * Caches the doubleVal of another value source in a HashMap so that it is computed only once.
@ -33,11 +32,11 @@ import org.apache.lucene.search.IndexSearcher;
 public class CachingDoubleValueSource extends DoubleValuesSource {

  final DoubleValuesSource source;
-  final Map<Integer, Double> cache;
+  final IntObjectHashMap<Double> cache;

  public CachingDoubleValueSource(DoubleValuesSource source) {
    this.source = source;
-    cache = new HashMap<>();
+    cache = new IntObjectHashMap<>();
  }

  @Override
--- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/StandardObjects.java
+++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/StandardObjects.java
@ -19,6 +19,7 @@ package org.apache.lucene.spatial3d.geom;

 import java.util.HashMap;
 import java.util.Map;
+import org.apache.lucene.util.hppc.IntObjectHashMap;

 /**
 * Lookup tables for classes that can be serialized using a code.
@ -31,7 +32,7 @@ class StandardObjects {
  static Map<Class<?>, Integer> classRegsitry = new HashMap<>();

  /** Registry of codes to corresponding classes */
-  static Map<Integer, Class<?>> codeRegsitry = new HashMap<>();
+  static IntObjectHashMap<Class<?>> codeRegsitry = new IntObjectHashMap<>();

  static {
    classRegsitry.put(GeoPoint.class, 0);