Replace Map<Integer, Object> by primitive IntObjectHashMap. (#13368)

This commit is contained in:
Bruno Roustant 2024-05-18 14:40:40 +02:00 committed by GitHub
parent 24fd426d98
commit 7db9c8c9bd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 63 additions and 59 deletions

View File

@ -341,6 +341,8 @@ Optimizations
* GITHUB#13327: Reduce memory usage of field maps in FieldInfos and BlockTree TermsReader. (Bruno Roustant, David Smiley)
* GITHUB#13368: Replace Map<Integer, Object> by primitive IntObjectHashMap. (Bruno Roustant)
Bug Fixes
---------------------

View File

@ -16,12 +16,11 @@
*/
package org.apache.lucene.analysis.hunspell;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Consumer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.hppc.IntObjectHashMap;
/**
* A cache allowing for CPU-cache-friendlier iteration over {@link WordStorage} entries that can be
@ -35,9 +34,7 @@ class SuggestibleEntryCache {
private final Section[] sections;
private SuggestibleEntryCache(Map<Integer, SectionBuilder> builders) {
int maxLength =
builders.isEmpty() ? 0 : builders.keySet().stream().max(Integer::compare).orElseThrow();
private SuggestibleEntryCache(IntObjectHashMap<SectionBuilder> builders, int maxLength) {
sections = new Section[maxLength + 1];
for (int i = 0; i < sections.length; i++) {
SectionBuilder builder = builders.get(i);
@ -48,7 +45,8 @@ class SuggestibleEntryCache {
static SuggestibleEntryCache buildCache(WordStorage storage) {
var consumer =
new Consumer<FlyweightEntry>() {
final Map<Integer, SectionBuilder> builders = new HashMap<>();
final IntObjectHashMap<SectionBuilder> builders = new IntObjectHashMap<>();
int maxLength;
@Override
public void accept(FlyweightEntry entry) {
@ -56,14 +54,24 @@ class SuggestibleEntryCache {
if (root.length > Short.MAX_VALUE) {
throw new UnsupportedOperationException(
"Too long dictionary entry, please report this to dev@lucene.apache.org");
} else if (root.length > maxLength) {
maxLength = root.length;
}
builders.computeIfAbsent(root.length, __ -> new SectionBuilder()).add(entry);
SectionBuilder builder;
int index = builders.indexOf(root.length);
if (index < 0) {
builder = new SectionBuilder();
builders.indexInsert(index, root.length, builder);
} else {
builder = builders.indexGet(index);
}
builder.add(entry);
}
};
storage.processSuggestibleWords(1, Integer.MAX_VALUE, consumer);
return new SuggestibleEntryCache(consumer.builders);
return new SuggestibleEntryCache(consumer.builders, consumer.maxLength);
}
private static class SectionBuilder {

View File

@ -21,10 +21,10 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.List;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.hppc.IntIntHashMap;
/** {@link Viterbi} subclass for n-best path calculation. */
public abstract class ViterbiNBest<T extends Token, U extends MorphData>
@ -137,14 +137,14 @@ public abstract class ViterbiNBest<T extends Token, U extends MorphData>
}
// offset=>position map
HashMap<Integer, Integer> map = new HashMap<>();
IntIntHashMap map = new IntIntHashMap();
for (Token t : pending) {
map.put(t.getOffset(), 0);
map.put(t.getOffset() + t.getLength(), 0);
}
// Get unique and sorted list of all edge position of tokens.
Integer[] offsets = map.keySet().toArray(new Integer[0]);
int[] offsets = map.keys().toArray();
Arrays.sort(offsets);
// setup all value of map. It specifies N-th position from begin.

View File

@ -24,7 +24,6 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.TokenizerFactory;
@ -32,6 +31,7 @@ import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.ResourceLoader;
import org.apache.lucene.util.ResourceLoaderAware;
import org.apache.lucene.util.hppc.IntObjectHashMap;
/**
* Factory for {@link ICUTokenizer}. Words are broken across script boundaries, then segmented
@ -74,7 +74,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
public static final String NAME = "icu";
static final String RULEFILES = "rulefiles";
private final Map<Integer, String> tailored;
private final IntObjectHashMap<String> tailored;
private ICUTokenizerConfig config;
private final boolean cjkAsWords;
private final boolean myanmarAsWords;
@ -82,7 +82,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
/** Creates a new ICUTokenizerFactory */
public ICUTokenizerFactory(Map<String, String> args) {
super(args);
tailored = new HashMap<>();
tailored = new IntObjectHashMap<>();
String rulefilesArg = get(args, RULEFILES);
if (rulefilesArg != null) {
List<String> scriptAndResourcePaths = splitFileNames(rulefilesArg);
@ -113,9 +113,9 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
} else {
final BreakIterator[] breakers =
new BreakIterator[1 + UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)];
for (Map.Entry<Integer, String> entry : tailored.entrySet()) {
int code = entry.getKey();
String resourcePath = entry.getValue();
for (IntObjectHashMap.IntObjectCursor<String> entry : tailored) {
int code = entry.key;
String resourcePath = entry.value;
breakers[code] = parseRules(resourcePath, loader);
}
config =

View File

@ -17,11 +17,9 @@
package org.apache.lucene.analysis.cn.smart.hhmm;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.cn.smart.Utility;
import org.apache.lucene.util.hppc.IntObjectHashMap;
/**
* Graph representing possible token pairs (bigrams) at each start offset in the sentence.
@ -32,7 +30,7 @@ import org.apache.lucene.analysis.cn.smart.Utility;
*/
class BiSegGraph {
private Map<Integer, ArrayList<SegTokenPair>> tokenPairListTable = new HashMap<>();
private IntObjectHashMap<ArrayList<SegTokenPair>> tokenPairListTable = new IntObjectHashMap<>();
private List<SegToken> segTokenList;
@ -122,7 +120,7 @@ class BiSegGraph {
* @return true if a token pair exists
*/
public boolean isToExist(int to) {
return tokenPairListTable.get(Integer.valueOf(to)) != null;
return tokenPairListTable.get(to) != null;
}
/**
@ -220,9 +218,9 @@ class BiSegGraph {
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Collection<ArrayList<SegTokenPair>> values = tokenPairListTable.values();
for (ArrayList<SegTokenPair> segList : values) {
for (SegTokenPair pair : segList) {
for (IntObjectHashMap.ObjectCursor<ArrayList<SegTokenPair>> segList :
tokenPairListTable.values()) {
for (SegTokenPair pair : segList.value) {
sb.append(pair).append("\n");
}
}

View File

@ -17,9 +17,8 @@
package org.apache.lucene.analysis.cn.smart.hhmm;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.util.hppc.IntObjectHashMap;
/**
* Graph representing possible tokens at each start offset in the sentence.
@ -31,7 +30,7 @@ import java.util.Map;
class SegGraph {
/** Map of start offsets to ArrayList of tokens at that position */
private Map<Integer, ArrayList<SegToken>> tokenListTable = new HashMap<>();
private IntObjectHashMap<ArrayList<SegToken>> tokenListTable = new IntObjectHashMap<>();
private int maxStart = -1;

View File

@ -17,7 +17,6 @@
package org.apache.lucene.benchmark.byTask.feeds;
import java.util.AbstractMap;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.Set;
@ -31,6 +30,7 @@ import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory;
import org.apache.lucene.spatial.serialized.SerializedDVStrategy;
import org.apache.lucene.util.hppc.IntObjectHashMap;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.context.SpatialContextFactory;
import org.locationtech.spatial4j.shape.Point;
@ -50,7 +50,7 @@ public class SpatialDocMaker extends DocMaker {
public static final String SPATIAL_FIELD = "spatial";
// cache spatialStrategy by round number
private static Map<Integer, SpatialStrategy> spatialStrategyCache = new HashMap<>();
private static IntObjectHashMap<SpatialStrategy> spatialStrategyCache = new IntObjectHashMap<>();
private SpatialStrategy strategy;
private ShapeConverter shapeConverter;

View File

@ -19,16 +19,15 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.hppc.IntIntHashMap;
/** Converts an Automaton into a TokenStream. */
public class AutomatonToTokenStream {
@ -61,7 +60,7 @@ public class AutomatonToTokenStream {
}
LinkedList<RemapNode> noIncomingEdges = new LinkedList<>();
Map<Integer, Integer> idToPos = new HashMap<>();
IntIntHashMap idToPos = new IntIntHashMap();
noIncomingEdges.addLast(new RemapNode(0, 0));
while (noIncomingEdges.isEmpty() == false) {
RemapNode currState = noIncomingEdges.removeFirst();

View File

@ -20,8 +20,6 @@ import static org.apache.lucene.codecs.lucene90.Lucene90NormsFormat.VERSION_CURR
import static org.apache.lucene.codecs.lucene90.Lucene90NormsFormat.VERSION_START;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.index.CorruptIndexException;
@ -35,17 +33,18 @@ import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.hppc.IntObjectHashMap;
/** Reader for {@link Lucene90NormsFormat} */
final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
// metadata maps (just file pointers and minimal stuff)
private final Map<Integer, NormsEntry> norms = new HashMap<>();
private final IntObjectHashMap<NormsEntry> norms = new IntObjectHashMap<>();
private final int maxDoc;
private IndexInput data;
private boolean merging;
private Map<Integer, IndexInput> disiInputs;
private Map<Integer, RandomAccessInput> disiJumpTables;
private Map<Integer, RandomAccessInput> dataInputs;
private IntObjectHashMap<IndexInput> disiInputs;
private IntObjectHashMap<RandomAccessInput> disiJumpTables;
private IntObjectHashMap<RandomAccessInput> dataInputs;
Lucene90NormsProducer(
SegmentReadState state,
@ -121,9 +120,9 @@ final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
throw new RuntimeException(e);
}
clone.data = data.clone();
clone.disiInputs = new HashMap<>();
clone.disiJumpTables = new HashMap<>();
clone.dataInputs = new HashMap<>();
clone.disiInputs = new IntObjectHashMap<>();
clone.disiJumpTables = new IntObjectHashMap<>();
clone.dataInputs = new IntObjectHashMap<>();
clone.merging = true;
return clone;
}

View File

@ -17,8 +17,6 @@
package org.apache.lucene.codecs.lucene90;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.index.CorruptIndexException;
@ -31,12 +29,13 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader;
import org.apache.lucene.util.hppc.IntObjectHashMap;
/** Reads point values previously written with {@link Lucene90PointsWriter} */
public class Lucene90PointsReader extends PointsReader {
final IndexInput indexIn, dataIn;
final SegmentReadState readState;
final Map<Integer, PointValues> readers = new HashMap<>();
final IntObjectHashMap<PointValues> readers = new IntObjectHashMap<>();
/** Sole constructor */
public Lucene90PointsReader(SegmentReadState readState) throws IOException {

View File

@ -34,6 +34,7 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.hppc.IntObjectHashMap;
/**
* Collection of {@link FieldInfo}s (accessible by number or by name).
@ -374,7 +375,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
static final class FieldNumbers {
private final Map<Integer, String> numberToName;
private final IntObjectHashMap<String> numberToName;
private final Map<String, Integer> nameToNumber;
private final Map<String, IndexOptions> indexOptions;
// We use this to enforce that a given field never
@ -401,7 +402,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
FieldNumbers(String softDeletesFieldName, String parentFieldName) {
this.nameToNumber = new HashMap<>();
this.numberToName = new HashMap<>();
this.numberToName = new IntObjectHashMap<>();
this.indexOptions = new HashMap<>();
this.docValuesType = new HashMap<>();
this.dimensions = new HashMap<>();

View File

@ -48,6 +48,7 @@ import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.hppc.BitMixer;
import org.apache.lucene.util.hppc.IntObjectHashMap;
/**
* Automata operations.
@ -573,7 +574,7 @@ public final class Operations {
PointTransitions[] points = new PointTransitions[5];
private static final int HASHMAP_CUTOVER = 30;
private final HashMap<Integer, PointTransitions> map = new HashMap<>();
private final IntObjectHashMap<PointTransitions> map = new IntObjectHashMap<>();
private boolean useHash = false;
private PointTransitions next(int point) {

View File

@ -19,21 +19,20 @@ package org.apache.lucene.search.grouping;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.hppc.IntIntHashMap;
/** A GroupSelector implementation that groups via SortedDocValues */
public class TermGroupSelector extends GroupSelector<BytesRef> {
private final String field;
private final BytesRefHash values = new BytesRefHash();
private final Map<Integer, Integer> ordsToGroupIds = new HashMap<>();
private final IntIntHashMap ordsToGroupIds = new IntIntHashMap();
private SortedDocValues docValues;
private int groupId;

View File

@ -19,10 +19,8 @@ package org.apache.lucene.misc.document;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -34,6 +32,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.hppc.IntObjectHashMap;
/**
* Defers actually loading a field's value until you ask for it. You must not use the returned Field
@ -48,7 +47,7 @@ public class LazyDocument {
// null until first field is loaded
private Document doc;
private Map<Integer, List<LazyField>> fields = new HashMap<>();
private IntObjectHashMap<List<LazyField>> fields = new IntObjectHashMap<>();
private Set<String> fieldNames = new HashSet<>();
public LazyDocument(IndexReader reader, int docID) {

View File

@ -17,13 +17,12 @@
package org.apache.lucene.spatial.util;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.hppc.IntObjectHashMap;
/**
* Caches the doubleVal of another value source in a HashMap so that it is computed only once.
@ -33,11 +32,11 @@ import org.apache.lucene.search.IndexSearcher;
public class CachingDoubleValueSource extends DoubleValuesSource {
final DoubleValuesSource source;
final Map<Integer, Double> cache;
final IntObjectHashMap<Double> cache;
public CachingDoubleValueSource(DoubleValuesSource source) {
this.source = source;
cache = new HashMap<>();
cache = new IntObjectHashMap<>();
}
@Override

View File

@ -19,6 +19,7 @@ package org.apache.lucene.spatial3d.geom;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.util.hppc.IntObjectHashMap;
/**
* Lookup tables for classes that can be serialized using a code.
@ -31,7 +32,7 @@ class StandardObjects {
static Map<Class<?>, Integer> classRegsitry = new HashMap<>();
/** Registry of codes to corresponding classes */
static Map<Integer, Class<?>> codeRegsitry = new HashMap<>();
static IntObjectHashMap<Class<?>> codeRegsitry = new IntObjectHashMap<>();
static {
classRegsitry.put(GeoPoint.class, 0);