replace Map<String,Object> with IntObjectHashMap for DV producer (#13961)

This commit is contained in:
panguixin 2024-10-29 23:30:15 +08:00 committed by Adrien Grand
parent af9177a598
commit 4581add24b
5 changed files with 71 additions and 71 deletions

View File

@ -49,6 +49,8 @@ Optimizations
* GITHUB#13943: Removed shared `HitsThresholdChecker`, which reduces overhead
but may delay a bit when dynamic pruning kicks in. (Adrien Grand)
* GITHUB#13961: Replace Map<String,Object> with IntObjectHashMap for DV producer. (Pan Guixin)
Bug Fixes
---------------------
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended

View File

@ -17,8 +17,6 @@
package org.apache.lucene.backward_codecs.lucene80;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicReader;
import org.apache.lucene.backward_codecs.packed.LegacyDirectReader;
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
@ -41,6 +39,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.internal.hppc.IntObjectHashMap;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
@ -53,11 +52,11 @@ import org.apache.lucene.util.compress.LZ4;
/** reader for {@link Lucene80DocValuesFormat} */
final class Lucene80DocValuesProducer extends DocValuesProducer {
private final Map<String, NumericEntry> numerics = new HashMap<>();
private final Map<String, BinaryEntry> binaries = new HashMap<>();
private final Map<String, SortedEntry> sorted = new HashMap<>();
private final Map<String, SortedSetEntry> sortedSets = new HashMap<>();
private final Map<String, SortedNumericEntry> sortedNumerics = new HashMap<>();
private final IntObjectHashMap<NumericEntry> numerics = new IntObjectHashMap<>();
private final IntObjectHashMap<BinaryEntry> binaries = new IntObjectHashMap<>();
private final IntObjectHashMap<SortedEntry> sorted = new IntObjectHashMap<>();
private final IntObjectHashMap<SortedSetEntry> sortedSets = new IntObjectHashMap<>();
private final IntObjectHashMap<SortedNumericEntry> sortedNumerics = new IntObjectHashMap<>();
private final IndexInput data;
private final int maxDoc;
private int version = -1;
@ -139,7 +138,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
}
byte type = meta.readByte();
if (type == Lucene80DocValuesFormat.NUMERIC) {
numerics.put(info.name, readNumeric(meta));
numerics.put(info.number, readNumeric(meta));
} else if (type == Lucene80DocValuesFormat.BINARY) {
final boolean compressed;
if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) {
@ -158,13 +157,13 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
} else {
compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED;
}
binaries.put(info.name, readBinary(meta, compressed));
binaries.put(info.number, readBinary(meta, compressed));
} else if (type == Lucene80DocValuesFormat.SORTED) {
sorted.put(info.name, readSorted(meta));
sorted.put(info.number, readSorted(meta));
} else if (type == Lucene80DocValuesFormat.SORTED_SET) {
sortedSets.put(info.name, readSortedSet(meta));
sortedSets.put(info.number, readSortedSet(meta));
} else if (type == Lucene80DocValuesFormat.SORTED_NUMERIC) {
sortedNumerics.put(info.name, readSortedNumeric(meta));
sortedNumerics.put(info.number, readSortedNumeric(meta));
} else {
throw new CorruptIndexException("invalid type: " + type, meta);
}
@ -426,7 +425,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
NumericEntry entry = numerics.get(field.name);
NumericEntry entry = numerics.get(field.number);
return getNumeric(entry);
}
@ -915,7 +914,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
BinaryEntry entry = binaries.get(field.name);
BinaryEntry entry = binaries.get(field.number);
if (entry.compressed) {
return getCompressedBinary(entry);
} else {
@ -973,7 +972,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
SortedEntry entry = sorted.get(field.name);
SortedEntry entry = sorted.get(field.number);
return getSorted(entry);
}
@ -1407,7 +1406,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
SortedNumericEntry entry = sortedNumerics.get(field.name);
SortedNumericEntry entry = sortedNumerics.get(field.number);
if (entry.numValues == entry.numDocsWithField) {
return DocValues.singleton(getNumeric(entry));
}
@ -1543,7 +1542,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
SortedSetEntry entry = sortedSets.get(field.name);
SortedSetEntry entry = sortedSets.get(field.number);
if (entry.singleValueEntry != null) {
return DocValues.singleton(getSorted(entry.singleValueEntry));
}

View File

@ -21,8 +21,6 @@ import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_IND
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BaseTermsEnum;
@ -43,6 +41,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.internal.hppc.IntObjectHashMap;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
@ -59,12 +58,12 @@ import org.apache.lucene.util.packed.DirectReader;
/** reader for {@link Lucene90DocValuesFormat} */
final class Lucene90DocValuesProducer extends DocValuesProducer {
private final Map<String, NumericEntry> numerics;
private final Map<String, BinaryEntry> binaries;
private final Map<String, SortedEntry> sorted;
private final Map<String, SortedSetEntry> sortedSets;
private final Map<String, SortedNumericEntry> sortedNumerics;
private final Map<String, DocValuesSkipperEntry> skippers;
private final IntObjectHashMap<NumericEntry> numerics;
private final IntObjectHashMap<BinaryEntry> binaries;
private final IntObjectHashMap<SortedEntry> sorted;
private final IntObjectHashMap<SortedSetEntry> sortedSets;
private final IntObjectHashMap<SortedNumericEntry> sortedNumerics;
private final IntObjectHashMap<DocValuesSkipperEntry> skippers;
private final IndexInput data;
private final int maxDoc;
private int version = -1;
@ -81,12 +80,12 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
String metaName =
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
this.maxDoc = state.segmentInfo.maxDoc();
numerics = new HashMap<>();
binaries = new HashMap<>();
sorted = new HashMap<>();
sortedSets = new HashMap<>();
sortedNumerics = new HashMap<>();
skippers = new HashMap<>();
numerics = new IntObjectHashMap<>();
binaries = new IntObjectHashMap<>();
sorted = new IntObjectHashMap<>();
sortedSets = new IntObjectHashMap<>();
sortedNumerics = new IntObjectHashMap<>();
skippers = new IntObjectHashMap<>();
merging = false;
// read in the entries from the metadata file.
@ -149,12 +148,12 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
// Used for cloning
private Lucene90DocValuesProducer(
Map<String, NumericEntry> numerics,
Map<String, BinaryEntry> binaries,
Map<String, SortedEntry> sorted,
Map<String, SortedSetEntry> sortedSets,
Map<String, SortedNumericEntry> sortedNumerics,
Map<String, DocValuesSkipperEntry> skippers,
IntObjectHashMap<NumericEntry> numerics,
IntObjectHashMap<BinaryEntry> binaries,
IntObjectHashMap<SortedEntry> sorted,
IntObjectHashMap<SortedSetEntry> sortedSets,
IntObjectHashMap<SortedNumericEntry> sortedNumerics,
IntObjectHashMap<DocValuesSkipperEntry> skippers,
IndexInput data,
int maxDoc,
int version,
@ -194,18 +193,18 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
byte type = meta.readByte();
if (info.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
skippers.put(info.name, readDocValueSkipperMeta(meta));
skippers.put(info.number, readDocValueSkipperMeta(meta));
}
if (type == Lucene90DocValuesFormat.NUMERIC) {
numerics.put(info.name, readNumeric(meta));
numerics.put(info.number, readNumeric(meta));
} else if (type == Lucene90DocValuesFormat.BINARY) {
binaries.put(info.name, readBinary(meta));
binaries.put(info.number, readBinary(meta));
} else if (type == Lucene90DocValuesFormat.SORTED) {
sorted.put(info.name, readSorted(meta));
sorted.put(info.number, readSorted(meta));
} else if (type == Lucene90DocValuesFormat.SORTED_SET) {
sortedSets.put(info.name, readSortedSet(meta));
sortedSets.put(info.number, readSortedSet(meta));
} else if (type == Lucene90DocValuesFormat.SORTED_NUMERIC) {
sortedNumerics.put(info.name, readSortedNumeric(meta));
sortedNumerics.put(info.number, readSortedNumeric(meta));
} else {
throw new CorruptIndexException("invalid type: " + type, meta);
}
@ -430,7 +429,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
NumericEntry entry = numerics.get(field.name);
NumericEntry entry = numerics.get(field.number);
return getNumeric(entry);
}
@ -786,7 +785,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
BinaryEntry entry = binaries.get(field.name);
BinaryEntry entry = binaries.get(field.number);
if (entry.docsWithFieldOffset == -2) {
return DocValues.emptyBinary();
@ -887,7 +886,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
SortedEntry entry = sorted.get(field.name);
SortedEntry entry = sorted.get(field.number);
return getSorted(entry);
}
@ -1363,7 +1362,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
SortedNumericEntry entry = sortedNumerics.get(field.name);
SortedNumericEntry entry = sortedNumerics.get(field.number);
return getSortedNumeric(entry);
}
@ -1508,7 +1507,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
SortedSetEntry entry = sortedSets.get(field.name);
SortedSetEntry entry = sortedSets.get(field.number);
if (entry.singleValueEntry != null) {
return DocValues.singleton(getSorted(entry.singleValueEntry));
}
@ -1782,7 +1781,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
final DocValuesSkipperEntry entry = skippers.get(field.name);
final DocValuesSkipperEntry entry = skippers.get(field.number);
final IndexInput input = data.slice("doc value skipper", entry.offset, entry.length);
// Prefetch the first page of data. Following pages are expected to get prefetched through

View File

@ -38,6 +38,7 @@ import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.internal.hppc.IntObjectHashMap;
import org.apache.lucene.util.IOUtils;
/**
@ -256,7 +257,7 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
private static class FieldsReader extends DocValuesProducer {
private final Map<String, DocValuesProducer> fields = new HashMap<>();
private final IntObjectHashMap<DocValuesProducer> fields = new IntObjectHashMap<>();
private final Map<String, DocValuesProducer> formats = new HashMap<>();
// clone for merge
@ -270,10 +271,10 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
}
// Then rebuild fields:
for (Map.Entry<String, DocValuesProducer> ent : other.fields.entrySet()) {
DocValuesProducer producer = oldToNew.get(ent.getValue());
for (IntObjectHashMap.IntObjectCursor<DocValuesProducer> ent : other.fields) {
DocValuesProducer producer = oldToNew.get(ent.value);
assert producer != null;
fields.put(ent.getKey(), producer);
fields.put(ent.key, producer);
}
}
@ -302,7 +303,7 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
segmentSuffix,
format.fieldsProducer(new SegmentReadState(readState, segmentSuffix)));
}
fields.put(fieldName, formats.get(segmentSuffix));
fields.put(fi.number, formats.get(segmentSuffix));
}
}
}
@ -316,37 +317,37 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
DocValuesProducer producer = fields.get(field.name);
DocValuesProducer producer = fields.get(field.number);
return producer == null ? null : producer.getNumeric(field);
}
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
DocValuesProducer producer = fields.get(field.name);
DocValuesProducer producer = fields.get(field.number);
return producer == null ? null : producer.getBinary(field);
}
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
DocValuesProducer producer = fields.get(field.name);
DocValuesProducer producer = fields.get(field.number);
return producer == null ? null : producer.getSorted(field);
}
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
DocValuesProducer producer = fields.get(field.name);
DocValuesProducer producer = fields.get(field.number);
return producer == null ? null : producer.getSortedNumeric(field);
}
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
DocValuesProducer producer = fields.get(field.name);
DocValuesProducer producer = fields.get(field.number);
return producer == null ? null : producer.getSortedSet(field);
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
DocValuesProducer producer = fields.get(field.name);
DocValuesProducer producer = fields.get(field.number);
return producer == null ? null : producer.getSkipper(field);
}

View File

@ -18,11 +18,10 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.internal.hppc.IntObjectHashMap;
import org.apache.lucene.internal.hppc.LongArrayList;
import org.apache.lucene.store.Directory;
@ -32,7 +31,7 @@ import org.apache.lucene.store.Directory;
// producer?
class SegmentDocValuesProducer extends DocValuesProducer {
final Map<String, DocValuesProducer> dvProducersByField = new HashMap<>();
final IntObjectHashMap<DocValuesProducer> dvProducersByField = new IntObjectHashMap<>();
final Set<DocValuesProducer> dvProducers =
Collections.newSetFromMap(new IdentityHashMap<DocValuesProducer, Boolean>());
final LongArrayList dvGens = new LongArrayList();
@ -67,7 +66,7 @@ class SegmentDocValuesProducer extends DocValuesProducer {
dvGens.add(docValuesGen);
dvProducers.add(baseProducer);
}
dvProducersByField.put(fi.name, baseProducer);
dvProducersByField.put(fi.number, baseProducer);
} else {
assert !dvGens.contains(docValuesGen);
// otherwise, producer sees only the one fieldinfo it wrote
@ -76,7 +75,7 @@ class SegmentDocValuesProducer extends DocValuesProducer {
docValuesGen, si, dir, new FieldInfos(new FieldInfo[] {fi}));
dvGens.add(docValuesGen);
dvProducers.add(dvp);
dvProducersByField.put(fi.name, dvp);
dvProducersByField.put(fi.number, dvp);
}
}
} catch (Throwable t) {
@ -91,42 +90,42 @@ class SegmentDocValuesProducer extends DocValuesProducer {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
DocValuesProducer dvProducer = dvProducersByField.get(field.number);
assert dvProducer != null;
return dvProducer.getNumeric(field);
}
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
DocValuesProducer dvProducer = dvProducersByField.get(field.number);
assert dvProducer != null;
return dvProducer.getBinary(field);
}
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
DocValuesProducer dvProducer = dvProducersByField.get(field.number);
assert dvProducer != null;
return dvProducer.getSorted(field);
}
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
DocValuesProducer dvProducer = dvProducersByField.get(field.number);
assert dvProducer != null;
return dvProducer.getSortedNumeric(field);
}
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
DocValuesProducer dvProducer = dvProducersByField.get(field.number);
assert dvProducer != null;
return dvProducer.getSortedSet(field);
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
DocValuesProducer dvProducer = dvProducersByField.get(field.number);
assert dvProducer != null;
return dvProducer.getSkipper(field);
}