mirror of
https://github.com/apache/lucene.git
synced 2025-02-10 12:05:36 +00:00
LUCENE-9045: Do not use TreeMap/TreeSet in BlockTree and PerFieldPostingsFormat.
Closes #1007
This commit is contained in:
parent
58473a04b9
commit
bb600e3638
@ -61,6 +61,8 @@ Optimizations
|
||||
* LUCENE-9049: Remove FST cached root arcs now redundant with labels indexed by bitset.
|
||||
This frees some on-heap FST space. (Jack Conradson via Bruno Roustant)
|
||||
|
||||
* LUCENE-9045: Do not use TreeMap/TreeSet in BlockTree and PerFieldPostingsFormat. (Bruno Roustant)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-9001: Fix race condition in SetOnce. (Przemko Robakowski)
|
||||
|
@ -22,10 +22,10 @@ import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
@ -146,7 +146,8 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
||||
// produce DocsEnum on demand
|
||||
final PostingsReaderBase postingsReader;
|
||||
|
||||
private final TreeMap<String,FieldReader> fields = new TreeMap<>();
|
||||
private final Map<String,FieldReader> fieldMap;
|
||||
private final List<String> fieldList;
|
||||
|
||||
final String segment;
|
||||
|
||||
@ -195,6 +196,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
||||
if (numFields < 0) {
|
||||
throw new CorruptIndexException("invalid numFields: " + numFields, termsIn);
|
||||
}
|
||||
fieldMap = new HashMap<>((int) (numFields / 0.75f) + 1);
|
||||
for (int i = 0; i < numFields; ++i) {
|
||||
final int field = termsIn.readVInt();
|
||||
final long numTerms = termsIn.readVLong();
|
||||
@ -227,13 +229,16 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
||||
}
|
||||
final FSTLoadMode perFieldLoadMode = getLoadMode(state.readerAttributes, FST_MODE_KEY + "." + fieldInfo.name, fstLoadMode);
|
||||
final long indexStartFP = indexIn.readVLong();
|
||||
FieldReader previous = fields.put(fieldInfo.name,
|
||||
FieldReader previous = fieldMap.put(fieldInfo.name,
|
||||
new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
|
||||
indexStartFP, longsSize, indexIn, minTerm, maxTerm, state.openedFromWriter, perFieldLoadMode));
|
||||
if (previous != null) {
|
||||
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn);
|
||||
}
|
||||
}
|
||||
List<String> fieldList = new ArrayList<>(fieldMap.keySet());
|
||||
fieldList.sort(null);
|
||||
this.fieldList = Collections.unmodifiableList(fieldList);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
@ -289,24 +294,24 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
||||
} finally {
|
||||
// Clear so refs to terms index is GCable even if
|
||||
// app hangs onto us:
|
||||
fields.clear();
|
||||
fieldMap.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Collections.unmodifiableSet(fields.keySet()).iterator();
|
||||
return fieldList.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
assert field != null;
|
||||
return fields.get(field);
|
||||
return fieldMap.get(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return fields.size();
|
||||
return fieldMap.size();
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@ -328,7 +333,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long sizeInBytes = postingsReader.ramBytesUsed();
|
||||
for(FieldReader reader : fields.values()) {
|
||||
for(FieldReader reader : fieldMap.values()) {
|
||||
sizeInBytes += reader.ramBytesUsed();
|
||||
}
|
||||
return sizeInBytes;
|
||||
@ -336,7 +341,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
||||
|
||||
@Override
|
||||
public Collection<Accountable> getChildResources() {
|
||||
List<Accountable> resources = new ArrayList<>(Accountables.namedAccountables("field", fields));
|
||||
List<Accountable> resources = new ArrayList<>(Accountables.namedAccountables("field", fieldMap));
|
||||
resources.add(Accountables.namedAccountable("delegate", postingsReader));
|
||||
return Collections.unmodifiableList(resources);
|
||||
}
|
||||
@ -352,6 +357,6 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + "(fields=" + fields.size() + ",delegate=" + postingsReader + ")";
|
||||
return getClass().getSimpleName() + "(fields=" + fieldMap.size() + ",delegate=" + postingsReader + ")";
|
||||
}
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
@ -31,7 +32,6 @@ import java.util.Map;
|
||||
import java.util.ServiceLoader;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
@ -86,13 +86,41 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
||||
|
||||
/** Group of fields written by one PostingsFormat */
|
||||
static class FieldsGroup {
|
||||
final Set<String> fields = new TreeSet<>();
|
||||
int suffix;
|
||||
|
||||
final List<String> fields;
|
||||
final int suffix;
|
||||
/** Custom SegmentWriteState for this group of fields,
|
||||
* with the segmentSuffix uniqueified for this
|
||||
* PostingsFormat */
|
||||
SegmentWriteState state;
|
||||
final SegmentWriteState state;
|
||||
|
||||
private FieldsGroup(List<String> fields, int suffix, SegmentWriteState state) {
|
||||
this.fields = fields;
|
||||
this.suffix = suffix;
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
static class Builder {
|
||||
final Set<String> fields;
|
||||
final int suffix;
|
||||
final SegmentWriteState state;
|
||||
|
||||
Builder(int suffix, SegmentWriteState state) {
|
||||
this.suffix = suffix;
|
||||
this.state = state;
|
||||
fields = new HashSet<>();
|
||||
}
|
||||
|
||||
Builder addField(String field) {
|
||||
fields.add(field);
|
||||
return this;
|
||||
}
|
||||
|
||||
FieldsGroup build() {
|
||||
List<String> fieldList = new ArrayList<>(fields);
|
||||
fieldList.sort(null);
|
||||
return new FieldsGroup(fieldList, suffix, state);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static String getSuffix(String formatName, String suffix) {
|
||||
@ -178,9 +206,8 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
||||
}
|
||||
|
||||
private Map<PostingsFormat, FieldsGroup> buildFieldsGroupMapping(Iterable<String> indexedFieldNames) {
|
||||
// Maps a PostingsFormat instance to the suffix it
|
||||
// should use
|
||||
Map<PostingsFormat,FieldsGroup> formatToGroups = new HashMap<>();
|
||||
// Maps a PostingsFormat instance to the suffix it should use
|
||||
Map<PostingsFormat,FieldsGroup.Builder> formatToGroupBuilders = new HashMap<>();
|
||||
|
||||
// Holds last suffix of each PostingFormat name
|
||||
Map<String,Integer> suffixes = new HashMap<>();
|
||||
@ -196,10 +223,9 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
||||
}
|
||||
String formatName = format.getName();
|
||||
|
||||
FieldsGroup group = formatToGroups.get(format);
|
||||
if (group == null) {
|
||||
// First time we are seeing this format; create a
|
||||
// new instance
|
||||
FieldsGroup.Builder groupBuilder = formatToGroupBuilders.get(format);
|
||||
if (groupBuilder == null) {
|
||||
// First time we are seeing this format; create a new instance
|
||||
|
||||
// bump the suffix
|
||||
Integer suffix = suffixes.get(formatName);
|
||||
@ -213,22 +239,23 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
||||
String segmentSuffix = getFullSegmentSuffix(field,
|
||||
writeState.segmentSuffix,
|
||||
getSuffix(formatName, Integer.toString(suffix)));
|
||||
group = new FieldsGroup();
|
||||
group.state = new SegmentWriteState(writeState, segmentSuffix);
|
||||
group.suffix = suffix;
|
||||
formatToGroups.put(format, group);
|
||||
groupBuilder = new FieldsGroup.Builder(suffix, new SegmentWriteState(writeState, segmentSuffix));
|
||||
formatToGroupBuilders.put(format, groupBuilder);
|
||||
} else {
|
||||
// we've already seen this format, so just grab its suffix
|
||||
if (!suffixes.containsKey(formatName)) {
|
||||
throw new IllegalStateException("no suffix for format name: " + formatName + ", expected: " + group.suffix);
|
||||
throw new IllegalStateException("no suffix for format name: " + formatName + ", expected: " + groupBuilder.suffix);
|
||||
}
|
||||
}
|
||||
|
||||
group.fields.add(field);
|
||||
groupBuilder.addField(field);
|
||||
|
||||
fieldInfo.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
|
||||
fieldInfo.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(group.suffix));
|
||||
fieldInfo.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(groupBuilder.suffix));
|
||||
}
|
||||
|
||||
Map<PostingsFormat,FieldsGroup> formatToGroups = new HashMap<>((int) (formatToGroupBuilders.size() / 0.75f) + 1);
|
||||
formatToGroupBuilders.forEach((postingsFormat, builder) -> formatToGroups.put(postingsFormat, builder.build()));
|
||||
return formatToGroups;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user