LUCENE-9045: Do not use TreeMap/TreeSet in BlockTree and PerFieldPostingsFormat.

Closes #1007
This commit is contained in:
Bruno Roustant 2019-12-02 11:41:56 +01:00
parent 58473a04b9
commit bb600e3638
No known key found for this signature in database
GPG Key ID: CD28DABB95360525
3 changed files with 63 additions and 29 deletions

View File

@ -61,6 +61,8 @@ Optimizations
* LUCENE-9049: Remove FST cached root arcs now redundant with labels indexed by bitset.
This frees some on-heap FST space. (Jack Conradson via Bruno Roustant)
* LUCENE-9045: Do not use TreeMap/TreeSet in BlockTree and PerFieldPostingsFormat. (Bruno Roustant)
Bug Fixes
* LUCENE-9001: Fix race condition in SetOnce. (Przemko Robakowski)

View File

@ -22,10 +22,10 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsProducer;
@ -146,7 +146,8 @@ public final class BlockTreeTermsReader extends FieldsProducer {
// produce DocsEnum on demand
final PostingsReaderBase postingsReader;
private final TreeMap<String,FieldReader> fields = new TreeMap<>();
private final Map<String,FieldReader> fieldMap;
private final List<String> fieldList;
final String segment;
@ -195,6 +196,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
if (numFields < 0) {
throw new CorruptIndexException("invalid numFields: " + numFields, termsIn);
}
fieldMap = new HashMap<>((int) (numFields / 0.75f) + 1);
for (int i = 0; i < numFields; ++i) {
final int field = termsIn.readVInt();
final long numTerms = termsIn.readVLong();
@ -227,13 +229,16 @@ public final class BlockTreeTermsReader extends FieldsProducer {
}
final FSTLoadMode perFieldLoadMode = getLoadMode(state.readerAttributes, FST_MODE_KEY + "." + fieldInfo.name, fstLoadMode);
final long indexStartFP = indexIn.readVLong();
FieldReader previous = fields.put(fieldInfo.name,
FieldReader previous = fieldMap.put(fieldInfo.name,
new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
indexStartFP, longsSize, indexIn, minTerm, maxTerm, state.openedFromWriter, perFieldLoadMode));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn);
}
}
List<String> fieldList = new ArrayList<>(fieldMap.keySet());
fieldList.sort(null);
this.fieldList = Collections.unmodifiableList(fieldList);
success = true;
} finally {
if (!success) {
@ -289,24 +294,24 @@ public final class BlockTreeTermsReader extends FieldsProducer {
} finally {
// Clear so refs to terms index is GCable even if
// app hangs onto us:
fields.clear();
fieldMap.clear();
}
}
@Override
public Iterator<String> iterator() {
return Collections.unmodifiableSet(fields.keySet()).iterator();
return fieldList.iterator();
}
@Override
public Terms terms(String field) throws IOException {
assert field != null;
return fields.get(field);
return fieldMap.get(field);
}
@Override
public int size() {
return fields.size();
return fieldMap.size();
}
// for debugging
@ -328,7 +333,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
@Override
public long ramBytesUsed() {
long sizeInBytes = postingsReader.ramBytesUsed();
for(FieldReader reader : fields.values()) {
for(FieldReader reader : fieldMap.values()) {
sizeInBytes += reader.ramBytesUsed();
}
return sizeInBytes;
@ -336,7 +341,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>(Accountables.namedAccountables("field", fields));
List<Accountable> resources = new ArrayList<>(Accountables.namedAccountables("field", fieldMap));
resources.add(Accountables.namedAccountable("delegate", postingsReader));
return Collections.unmodifiableList(resources);
}
@ -352,6 +357,6 @@ public final class BlockTreeTermsReader extends FieldsProducer {
@Override
public String toString() {
return getClass().getSimpleName() + "(fields=" + fields.size() + ",delegate=" + postingsReader + ")";
return getClass().getSimpleName() + "(fields=" + fieldMap.size() + ",delegate=" + postingsReader + ")";
}
}

View File

@ -24,6 +24,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
@ -31,7 +32,6 @@ import java.util.Map;
import java.util.ServiceLoader;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
@ -86,13 +86,41 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
/** Group of fields written by one PostingsFormat */
static class FieldsGroup {
final Set<String> fields = new TreeSet<>();
int suffix;
final List<String> fields;
final int suffix;
/** Custom SegmentWriteState for this group of fields,
* with the segmentSuffix uniqueified for this
* PostingsFormat */
SegmentWriteState state;
final SegmentWriteState state;
private FieldsGroup(List<String> fields, int suffix, SegmentWriteState state) {
this.fields = fields;
this.suffix = suffix;
this.state = state;
}
static class Builder {
final Set<String> fields;
final int suffix;
final SegmentWriteState state;
Builder(int suffix, SegmentWriteState state) {
this.suffix = suffix;
this.state = state;
fields = new HashSet<>();
}
Builder addField(String field) {
fields.add(field);
return this;
}
FieldsGroup build() {
List<String> fieldList = new ArrayList<>(fields);
fieldList.sort(null);
return new FieldsGroup(fieldList, suffix, state);
}
}
};
static String getSuffix(String formatName, String suffix) {
@ -178,9 +206,8 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
}
private Map<PostingsFormat, FieldsGroup> buildFieldsGroupMapping(Iterable<String> indexedFieldNames) {
// Maps a PostingsFormat instance to the suffix it
// should use
Map<PostingsFormat,FieldsGroup> formatToGroups = new HashMap<>();
// Maps a PostingsFormat instance to the suffix it should use
Map<PostingsFormat,FieldsGroup.Builder> formatToGroupBuilders = new HashMap<>();
// Holds last suffix of each PostingFormat name
Map<String,Integer> suffixes = new HashMap<>();
@ -196,10 +223,9 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
}
String formatName = format.getName();
FieldsGroup group = formatToGroups.get(format);
if (group == null) {
// First time we are seeing this format; create a
// new instance
FieldsGroup.Builder groupBuilder = formatToGroupBuilders.get(format);
if (groupBuilder == null) {
// First time we are seeing this format; create a new instance
// bump the suffix
Integer suffix = suffixes.get(formatName);
@ -213,22 +239,23 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
String segmentSuffix = getFullSegmentSuffix(field,
writeState.segmentSuffix,
getSuffix(formatName, Integer.toString(suffix)));
group = new FieldsGroup();
group.state = new SegmentWriteState(writeState, segmentSuffix);
group.suffix = suffix;
formatToGroups.put(format, group);
groupBuilder = new FieldsGroup.Builder(suffix, new SegmentWriteState(writeState, segmentSuffix));
formatToGroupBuilders.put(format, groupBuilder);
} else {
// we've already seen this format, so just grab its suffix
if (!suffixes.containsKey(formatName)) {
throw new IllegalStateException("no suffix for format name: " + formatName + ", expected: " + group.suffix);
throw new IllegalStateException("no suffix for format name: " + formatName + ", expected: " + groupBuilder.suffix);
}
}
group.fields.add(field);
groupBuilder.addField(field);
fieldInfo.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
fieldInfo.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(group.suffix));
fieldInfo.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(groupBuilder.suffix));
}
Map<PostingsFormat,FieldsGroup> formatToGroups = new HashMap<>((int) (formatToGroupBuilders.size() / 0.75f) + 1);
formatToGroupBuilders.forEach((postingsFormat, builder) -> formatToGroups.put(postingsFormat, builder.build()));
return formatToGroups;
}