From 4581add24bf8cf9750dc56217a7022225e2a3895 Mon Sep 17 00:00:00 2001 From: panguixin Date: Tue, 29 Oct 2024 23:30:15 +0800 Subject: [PATCH] replace Map with IntObjectHashMap for DV producer (#13961) --- lucene/CHANGES.txt | 2 + .../lucene80/Lucene80DocValuesProducer.java | 33 +++++----- .../lucene90/Lucene90DocValuesProducer.java | 63 +++++++++---------- .../perfield/PerFieldDocValuesFormat.java | 23 +++---- .../index/SegmentDocValuesProducer.java | 21 +++---- 5 files changed, 71 insertions(+), 71 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b861ffe6aff..49aa619ce43 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -49,6 +49,8 @@ Optimizations * GITHUB#13943: Removed shared `HitsThresholdChecker`, which reduces overhead but may delay a bit when dynamic pruning kicks in. (Adrien Grand) +* GITHUB#13961: Replace Map with IntObjectHashMap for DV producer. (Pan Guixin) + Bug Fixes --------------------- * GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java index c5754e5d1e5..211267d4c03 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java @@ -17,8 +17,6 @@ package org.apache.lucene.backward_codecs.lucene80; import java.io.IOException; -import java.util.HashMap; -import java.util.Map; import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicReader; import org.apache.lucene.backward_codecs.packed.LegacyDirectReader; import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil; @@ -41,6 +39,7 @@ import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.DataInput; @@ -53,11 +52,11 @@ import org.apache.lucene.util.compress.LZ4; /** reader for {@link Lucene80DocValuesFormat} */ final class Lucene80DocValuesProducer extends DocValuesProducer { - private final Map numerics = new HashMap<>(); - private final Map binaries = new HashMap<>(); - private final Map sorted = new HashMap<>(); - private final Map sortedSets = new HashMap<>(); - private final Map sortedNumerics = new HashMap<>(); + private final IntObjectHashMap numerics = new IntObjectHashMap<>(); + private final IntObjectHashMap binaries = new IntObjectHashMap<>(); + private final IntObjectHashMap sorted = new IntObjectHashMap<>(); + private final IntObjectHashMap sortedSets = new IntObjectHashMap<>(); + private final IntObjectHashMap sortedNumerics = new IntObjectHashMap<>(); private final IndexInput data; private final int maxDoc; private int version = -1; @@ -139,7 +138,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer { } byte type = meta.readByte(); if (type == Lucene80DocValuesFormat.NUMERIC) { - numerics.put(info.name, readNumeric(meta)); + numerics.put(info.number, readNumeric(meta)); } else if (type == Lucene80DocValuesFormat.BINARY) { final boolean compressed; if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) { @@ -158,13 +157,13 @@ final class Lucene80DocValuesProducer extends DocValuesProducer { } else { compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED; } - binaries.put(info.name, readBinary(meta, compressed)); + binaries.put(info.number, readBinary(meta, compressed)); } else if (type == Lucene80DocValuesFormat.SORTED) { - sorted.put(info.name, readSorted(meta)); + sorted.put(info.number, readSorted(meta)); } else if (type == Lucene80DocValuesFormat.SORTED_SET) { - sortedSets.put(info.name, readSortedSet(meta)); + sortedSets.put(info.number, readSortedSet(meta)); } else if (type == Lucene80DocValuesFormat.SORTED_NUMERIC) { - sortedNumerics.put(info.name, readSortedNumeric(meta)); + sortedNumerics.put(info.number, readSortedNumeric(meta)); } else { throw new CorruptIndexException("invalid type: " + type, meta); } @@ -426,7 +425,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { - NumericEntry entry = numerics.get(field.name); + NumericEntry entry = numerics.get(field.number); return getNumeric(entry); } @@ -915,7 +914,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer { @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { - BinaryEntry entry = binaries.get(field.name); + BinaryEntry entry = binaries.get(field.number); if (entry.compressed) { return getCompressedBinary(entry); } else { @@ -973,7 +972,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer { @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { - SortedEntry entry = sorted.get(field.name); + SortedEntry entry = sorted.get(field.number); return getSorted(entry); } @@ -1407,7 +1406,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer { @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { - SortedNumericEntry entry = sortedNumerics.get(field.name); + SortedNumericEntry entry = sortedNumerics.get(field.number); if (entry.numValues == entry.numDocsWithField) { return DocValues.singleton(getNumeric(entry)); } @@ -1543,7 +1542,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer { @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { - SortedSetEntry entry = sortedSets.get(field.name); + SortedSetEntry entry = sortedSets.get(field.number); if (entry.singleValueEntry != null) { return DocValues.singleton(getSorted(entry.singleValueEntry)); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java index da027a35f17..11e83b3f03c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java @@ -21,8 +21,6 @@ import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_IND import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT; import java.io.IOException; -import java.util.HashMap; -import java.util.Map; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.BaseTermsEnum; @@ -43,6 +41,7 @@ import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; @@ -59,12 +58,12 @@ import org.apache.lucene.util.packed.DirectReader; /** reader for {@link Lucene90DocValuesFormat} */ final class Lucene90DocValuesProducer extends DocValuesProducer { - private final Map numerics; - private final Map binaries; - private final Map sorted; - private final Map sortedSets; - private final Map sortedNumerics; - private final Map skippers; + private final IntObjectHashMap numerics; + private final IntObjectHashMap binaries; + private final IntObjectHashMap sorted; + private final IntObjectHashMap sortedSets; + private final IntObjectHashMap sortedNumerics; + private final IntObjectHashMap skippers; private final IndexInput data; private final int maxDoc; private int version = -1; @@ -81,12 +80,12 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); this.maxDoc = state.segmentInfo.maxDoc(); - numerics = new HashMap<>(); - binaries = new HashMap<>(); - sorted = new HashMap<>(); - sortedSets = new HashMap<>(); - sortedNumerics = new HashMap<>(); - skippers = new HashMap<>(); + numerics = new IntObjectHashMap<>(); + binaries = new IntObjectHashMap<>(); + sorted = new IntObjectHashMap<>(); + sortedSets = new IntObjectHashMap<>(); + sortedNumerics = new IntObjectHashMap<>(); + skippers = new IntObjectHashMap<>(); merging = false; // read in the entries from the metadata file. @@ -149,12 +148,12 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { // Used for cloning private Lucene90DocValuesProducer( - Map numerics, - Map binaries, - Map sorted, - Map sortedSets, - Map sortedNumerics, - Map skippers, + IntObjectHashMap numerics, + IntObjectHashMap binaries, + IntObjectHashMap sorted, + IntObjectHashMap sortedSets, + IntObjectHashMap sortedNumerics, + IntObjectHashMap skippers, IndexInput data, int maxDoc, int version, @@ -194,18 +193,18 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { } byte type = meta.readByte(); if (info.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) { - skippers.put(info.name, readDocValueSkipperMeta(meta)); + skippers.put(info.number, readDocValueSkipperMeta(meta)); } if (type == Lucene90DocValuesFormat.NUMERIC) { - numerics.put(info.name, readNumeric(meta)); + numerics.put(info.number, readNumeric(meta)); } else if (type == Lucene90DocValuesFormat.BINARY) { - binaries.put(info.name, readBinary(meta)); + binaries.put(info.number, readBinary(meta)); } else if (type == Lucene90DocValuesFormat.SORTED) { - sorted.put(info.name, readSorted(meta)); + sorted.put(info.number, readSorted(meta)); } else if (type == Lucene90DocValuesFormat.SORTED_SET) { - sortedSets.put(info.name, readSortedSet(meta)); + sortedSets.put(info.number, readSortedSet(meta)); } else if (type == Lucene90DocValuesFormat.SORTED_NUMERIC) { - sortedNumerics.put(info.name, readSortedNumeric(meta)); + sortedNumerics.put(info.number, readSortedNumeric(meta)); } else { throw new CorruptIndexException("invalid type: " + type, meta); } @@ -430,7 +429,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { - NumericEntry entry = numerics.get(field.name); + NumericEntry entry = numerics.get(field.number); return getNumeric(entry); } @@ -786,7 +785,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { - BinaryEntry entry = binaries.get(field.name); + BinaryEntry entry = binaries.get(field.number); if (entry.docsWithFieldOffset == -2) { return DocValues.emptyBinary(); @@ -887,7 +886,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { - SortedEntry entry = sorted.get(field.name); + SortedEntry entry = sorted.get(field.number); return getSorted(entry); } @@ -1363,7 +1362,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { - SortedNumericEntry entry = sortedNumerics.get(field.name); + SortedNumericEntry entry = sortedNumerics.get(field.number); return getSortedNumeric(entry); } @@ -1508,7 +1507,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { - SortedSetEntry entry = sortedSets.get(field.name); + SortedSetEntry entry = sortedSets.get(field.number); if (entry.singleValueEntry != null) { return DocValues.singleton(getSorted(entry.singleValueEntry)); } @@ -1782,7 +1781,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { @Override public DocValuesSkipper getSkipper(FieldInfo field) throws IOException { - final DocValuesSkipperEntry entry = skippers.get(field.name); + final DocValuesSkipperEntry entry = skippers.get(field.number); final IndexInput input = data.slice("doc value skipper", entry.offset, entry.length); // Prefetch the first page of data. Following pages are expected to get prefetched through diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java index 2e45e232b5f..9350c016f67 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java @@ -38,6 +38,7 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.util.IOUtils; /** @@ -256,7 +257,7 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat { private static class FieldsReader extends DocValuesProducer { - private final Map fields = new HashMap<>(); + private final IntObjectHashMap fields = new IntObjectHashMap<>(); private final Map formats = new HashMap<>(); // clone for merge @@ -270,10 +271,10 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat { } // Then rebuild fields: - for (Map.Entry ent : other.fields.entrySet()) { - DocValuesProducer producer = oldToNew.get(ent.getValue()); + for (IntObjectHashMap.IntObjectCursor ent : other.fields) { + DocValuesProducer producer = oldToNew.get(ent.value); assert producer != null; - fields.put(ent.getKey(), producer); + fields.put(ent.key, producer); } } @@ -302,7 +303,7 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat { segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix))); } - fields.put(fieldName, formats.get(segmentSuffix)); + fields.put(fi.number, formats.get(segmentSuffix)); } } } @@ -316,37 +317,37 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { - DocValuesProducer producer = fields.get(field.name); + DocValuesProducer producer = fields.get(field.number); return producer == null ? null : producer.getNumeric(field); } @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { - DocValuesProducer producer = fields.get(field.name); + DocValuesProducer producer = fields.get(field.number); return producer == null ? null : producer.getBinary(field); } @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { - DocValuesProducer producer = fields.get(field.name); + DocValuesProducer producer = fields.get(field.number); return producer == null ? null : producer.getSorted(field); } @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { - DocValuesProducer producer = fields.get(field.name); + DocValuesProducer producer = fields.get(field.number); return producer == null ? null : producer.getSortedNumeric(field); } @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { - DocValuesProducer producer = fields.get(field.name); + DocValuesProducer producer = fields.get(field.number); return producer == null ? null : producer.getSortedSet(field); } @Override public DocValuesSkipper getSkipper(FieldInfo field) throws IOException { - DocValuesProducer producer = fields.get(field.name); + DocValuesProducer producer = fields.get(field.number); return producer == null ? null : producer.getSkipper(field); } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java index 1d9878fe0db..0f4df818ddc 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java @@ -18,11 +18,10 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.Collections; -import java.util.HashMap; import java.util.IdentityHashMap; -import java.util.Map; import java.util.Set; import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.internal.hppc.LongArrayList; import org.apache.lucene.store.Directory; @@ -32,7 +31,7 @@ import org.apache.lucene.store.Directory; // producer? class SegmentDocValuesProducer extends DocValuesProducer { - final Map dvProducersByField = new HashMap<>(); + final IntObjectHashMap dvProducersByField = new IntObjectHashMap<>(); final Set dvProducers = Collections.newSetFromMap(new IdentityHashMap()); final LongArrayList dvGens = new LongArrayList(); @@ -67,7 +66,7 @@ class SegmentDocValuesProducer extends DocValuesProducer { dvGens.add(docValuesGen); dvProducers.add(baseProducer); } - dvProducersByField.put(fi.name, baseProducer); + dvProducersByField.put(fi.number, baseProducer); } else { assert !dvGens.contains(docValuesGen); // otherwise, producer sees only the one fieldinfo it wrote @@ -76,7 +75,7 @@ class SegmentDocValuesProducer extends DocValuesProducer { docValuesGen, si, dir, new FieldInfos(new FieldInfo[] {fi})); dvGens.add(docValuesGen); dvProducers.add(dvp); - dvProducersByField.put(fi.name, dvp); + dvProducersByField.put(fi.number, dvp); } } } catch (Throwable t) { @@ -91,42 +90,42 @@ class SegmentDocValuesProducer extends DocValuesProducer { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { - DocValuesProducer dvProducer = dvProducersByField.get(field.name); + DocValuesProducer dvProducer = dvProducersByField.get(field.number); assert dvProducer != null; return dvProducer.getNumeric(field); } @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { - DocValuesProducer dvProducer = dvProducersByField.get(field.name); + DocValuesProducer dvProducer = dvProducersByField.get(field.number); assert dvProducer != null; return dvProducer.getBinary(field); } @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { - DocValuesProducer dvProducer = dvProducersByField.get(field.name); + DocValuesProducer dvProducer = dvProducersByField.get(field.number); assert dvProducer != null; return dvProducer.getSorted(field); } @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { - DocValuesProducer dvProducer = dvProducersByField.get(field.name); + DocValuesProducer dvProducer = dvProducersByField.get(field.number); assert dvProducer != null; return dvProducer.getSortedNumeric(field); } @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { - DocValuesProducer dvProducer = dvProducersByField.get(field.name); + DocValuesProducer dvProducer = dvProducersByField.get(field.number); assert dvProducer != null; return dvProducer.getSortedSet(field); } @Override public DocValuesSkipper getSkipper(FieldInfo field) throws IOException { - DocValuesProducer dvProducer = dvProducersByField.get(field.name); + DocValuesProducer dvProducer = dvProducersByField.get(field.number); assert dvProducer != null; return dvProducer.getSkipper(field); }