Cleanup redundant allocations and code around Comparator use (#13795)

Noticed some visible allocations in CompetitiveImpactAccumulator
during benchmarking and fixed the needless allocation for the comparator
in that class as well as a couple other similar spots where needless
classes and/or objects could easily be replaced by more lightweight
solutions.
This commit is contained in:
Armin Braun 2024-09-17 14:34:31 +02:00 committed by GitHub
parent a817426511
commit 644feeb02a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 113 additions and 263 deletions

View File

@ -61,8 +61,7 @@ class GeneratingSuggester {
private List<Weighted<Root<String>>> findSimilarDictionaryEntries(
String word, WordCase originalCase) {
Comparator<Weighted<Root<String>>> natural = Comparator.naturalOrder();
PriorityQueue<Weighted<Root<String>>> roots = new PriorityQueue<>(natural.reversed());
PriorityQueue<Weighted<Root<String>>> roots = new PriorityQueue<>(Comparator.reverseOrder());
char[] excludeFlags = dictionary.allNonSuggestibleFlags();
FlagEnumerator.Lookup flagLookup = dictionary.flagLookup;

View File

@ -17,8 +17,6 @@
package org.apache.lucene.analysis.hunspell;
import java.io.IOException;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -117,7 +115,16 @@ public final class HunspellStemFilter extends TokenFilter {
}
if (longestOnly && buffer.size() > 1) {
Collections.sort(buffer, lengthComparator);
buffer.sort(
(o1, o2) -> {
int cmp = Integer.compare(o2.length, o1.length);
if (cmp == 0) {
// tie break on text
return o2.compareTo(o1);
} else {
return cmp;
}
});
}
CharsRef stem = buffer.remove(0);
@ -139,18 +146,4 @@ public final class HunspellStemFilter extends TokenFilter {
super.reset();
buffer = null;
}
static final Comparator<CharsRef> lengthComparator =
new Comparator<CharsRef>() {
@Override
public int compare(CharsRef o1, CharsRef o2) {
int cmp = Integer.compare(o2.length, o1.length);
if (cmp == 0) {
// tie break on text
return o2.compareTo(o1);
} else {
return cmp;
}
}
};
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -147,26 +146,23 @@ public class FingerprintFilter extends TokenFilter {
Arrays.sort(
items,
new Comparator<Object>() {
@Override
public int compare(Object o1, Object o2) {
char[] v1 = (char[]) o1;
char[] v2 = (char[]) o2;
int len1 = v1.length;
int len2 = v2.length;
int lim = Math.min(len1, len2);
(o1, o2) -> {
char[] v1 = (char[]) o1;
char[] v2 = (char[]) o2;
int len1 = v1.length;
int len2 = v2.length;
int lim = Math.min(len1, len2);
int k = 0;
while (k < lim) {
char c1 = v1[k];
char c2 = v2[k];
if (c1 != c2) {
return c1 - c2;
}
k++;
int k = 0;
while (k < lim) {
char c1 = v1[k];
char c2 = v2[k];
if (c1 != c2) {
return c1 - c2;
}
return len1 - len2;
k++;
}
return len1 - len2;
});
// TODO lets append directly to termAttribute?

View File

@ -20,8 +20,6 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.morph.Dictionary;
@ -83,14 +81,7 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
// TODO: should we allow multiple segmentations per input 'phrase'?
// the old treemap didn't support this either, and i'm not sure if it's needed/useful?
Collections.sort(
featureEntries,
new Comparator<String[]>() {
@Override
public int compare(String[] left, String[] right) {
return left[0].compareTo(right[0]);
}
});
featureEntries.sort((left, right) -> left[0].compareTo(right[0]));
List<String> data = new ArrayList<>(featureEntries.size());
List<int[]> segmentations = new ArrayList<>(featureEntries.size());

View File

@ -18,7 +18,6 @@ package org.apache.lucene.codecs;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.TreeSet;
@ -39,20 +38,17 @@ public final class CompetitiveImpactAccumulator {
/** Sole constructor. */
public CompetitiveImpactAccumulator() {
maxFreqs = new int[256];
Comparator<Impact> comparator =
new Comparator<Impact>() {
@Override
public int compare(Impact o1, Impact o2) {
// greater freqs compare greater
int cmp = Integer.compare(o1.freq, o2.freq);
if (cmp == 0) {
// greater norms compare lower
cmp = Long.compareUnsigned(o2.norm, o1.norm);
}
return cmp;
}
};
otherFreqNormPairs = new TreeSet<>(comparator);
otherFreqNormPairs =
new TreeSet<>(
(o1, o2) -> {
// greater freqs compare greater
int cmp = Integer.compare(o1.freq, o2.freq);
if (cmp == 0) {
// greater norms compare lower
cmp = Long.compareUnsigned(o2.norm, o1.norm);
}
return cmp;
});
}
/** Reset to the same state it was in after creation. */

View File

@ -17,7 +17,6 @@
package org.apache.lucene.document;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.MatchNoDocsQuery;
@ -238,14 +237,7 @@ public final class BinaryPoint extends Field {
// Don't unexpectedly change the user's incoming values array:
byte[][] sortedValues = values.clone();
Arrays.sort(
sortedValues,
new Comparator<byte[]>() {
@Override
public int compare(byte[] a, byte[] b) {
return Arrays.compareUnsigned(a, 0, a.length, b, 0, b.length);
}
});
Arrays.sort(sortedValues, (a, b) -> Arrays.compareUnsigned(a, 0, a.length, b, 0, b.length));
final BytesRef encoded = new BytesRef(new byte[bytesPerDim]);

View File

@ -19,7 +19,6 @@ package org.apache.lucene.document;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery;
@ -288,14 +287,7 @@ public class InetAddressPoint extends Field {
sortedValues[i] = encode(values[i]);
}
Arrays.sort(
sortedValues,
new Comparator<byte[]>() {
@Override
public int compare(byte[] a, byte[] b) {
return Arrays.compareUnsigned(a, 0, BYTES, b, 0, BYTES);
}
});
Arrays.sort(sortedValues, (a, b) -> Arrays.compareUnsigned(a, 0, BYTES, b, 0, BYTES));
final BytesRef encoded = new BytesRef(new byte[BYTES]);

View File

@ -20,7 +20,6 @@ import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude;
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude;
import java.io.IOException;
import java.util.Comparator;
import java.util.List;
import java.util.PriorityQueue;
import org.apache.lucene.geo.Rectangle;
@ -240,18 +239,15 @@ class NearestNeighbor {
final PriorityQueue<NearestHit> hitQueue =
new PriorityQueue<>(
n,
new Comparator<NearestHit>() {
@Override
public int compare(NearestHit a, NearestHit b) {
// sort by opposite distanceSortKey natural order
int cmp = Double.compare(a.distanceSortKey, b.distanceSortKey);
if (cmp != 0) {
return -cmp;
}
// tie-break by higher docID:
return b.docID - a.docID;
(a, b) -> {
// sort by opposite distanceSortKey natural order
int cmp = Double.compare(a.distanceSortKey, b.distanceSortKey);
if (cmp != 0) {
return -cmp;
}
// tie-break by higher docID:
return b.docID - a.docID;
});
// Holds all cells, sorted by closest to the point:

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@ -32,14 +31,6 @@ import org.apache.lucene.util.PriorityQueue;
*/
public final class MultiTermsEnum extends BaseTermsEnum {
private static final Comparator<TermsEnumWithSlice> INDEX_COMPARATOR =
new Comparator<TermsEnumWithSlice>() {
@Override
public int compare(TermsEnumWithSlice o1, TermsEnumWithSlice o2) {
return o1.subIndex - o2.subIndex;
}
};
private final TermMergeQueue queue;
// all of our subs (one per sub-reader)
private final TermsEnumWithSlice[] subs;
@ -338,7 +329,7 @@ public final class MultiTermsEnum extends BaseTermsEnum {
int upto = 0;
ArrayUtil.timSort(top, 0, numTop, INDEX_COMPARATOR);
ArrayUtil.timSort(top, 0, numTop, (o1, o2) -> o1.subIndex - o2.subIndex);
for (int i = 0; i < numTop; i++) {

View File

@ -40,7 +40,7 @@ final class DisjunctionScoreBlockBoundaryPropagator {
throw new RuntimeException(e);
}
})
.thenComparing(Comparator.comparing(s -> s.iterator().cost()));
.thenComparing(s -> s.iterator().cost());
private final Scorer[] scorers;
private final float[] maxScores;

View File

@ -50,14 +50,7 @@ public abstract class QueryRescorer extends Rescorer {
throws IOException {
ScoreDoc[] hits = firstPassTopDocs.scoreDocs.clone();
Arrays.sort(
hits,
new Comparator<ScoreDoc>() {
@Override
public int compare(ScoreDoc a, ScoreDoc b) {
return a.doc - b.doc;
}
});
Arrays.sort(hits, (a, b) -> a.doc - b.doc);
List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
@ -111,19 +104,16 @@ public abstract class QueryRescorer extends Rescorer {
}
Comparator<ScoreDoc> sortDocComparator =
new Comparator<ScoreDoc>() {
@Override
public int compare(ScoreDoc a, ScoreDoc b) {
// Sort by score descending, then docID ascending:
if (a.score > b.score) {
return -1;
} else if (a.score < b.score) {
return 1;
} else {
// This subtraction can't overflow int
// because docIDs are >= 0:
return a.doc - b.doc;
}
(a, b) -> {
// Sort by score descending, then docID ascending:
if (a.score > b.score) {
return -1;
} else if (a.score < b.score) {
return 1;
} else {
// This subtraction can't overflow int
// because docIDs are >= 0:
return a.doc - b.doc;
}
};

View File

@ -17,7 +17,6 @@
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.PriorityQueue;
@ -160,7 +159,7 @@ public abstract class TopTermsRewrite<B> extends TermCollectingRewrite<B> {
final B b = getTopLevelBuilder();
final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
ArrayUtil.timSort(scoreTerms, scoreTermSortByTermComp);
ArrayUtil.timSort(scoreTerms, (st1, st2) -> st1.bytes.get().compareTo(st2.bytes.get()));
for (final ScoreTerm st : scoreTerms) {
final Term term = new Term(query.field, st.bytes.toBytesRef());
@ -188,14 +187,6 @@ public abstract class TopTermsRewrite<B> extends TermCollectingRewrite<B> {
return true;
}
private static final Comparator<ScoreTerm> scoreTermSortByTermComp =
new Comparator<ScoreTerm>() {
@Override
public int compare(ScoreTerm st1, ScoreTerm st2) {
return st1.bytes.get().compareTo(st2.bytes.get());
}
};
static final class ScoreTerm implements Comparable<ScoreTerm> {
public final BytesRefBuilder bytes = new BytesRefBuilder();
public float boost;

View File

@ -19,7 +19,6 @@ package org.apache.lucene.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
@ -98,14 +97,7 @@ public class Accountables {
for (Map.Entry<?, ? extends Accountable> kv : in.entrySet()) {
resources.add(namedAccountable(prefix + " '" + kv.getKey() + "'", kv.getValue()));
}
Collections.sort(
resources,
new Comparator<Accountable>() {
@Override
public int compare(Accountable o1, Accountable o2) {
return o1.toString().compareTo(o2.toString());
}
});
resources.sort((o1, o2) -> o1.toString().compareTo(o2.toString()));
return Collections.unmodifiableList(resources);
}

View File

@ -20,7 +20,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -40,20 +39,6 @@ import org.apache.lucene.util.PriorityQueue;
/** Base class for SSDV faceting implementations. */
abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
private static final Comparator<FacetResult> FACET_RESULT_COMPARATOR =
new Comparator<>() {
@Override
public int compare(FacetResult a, FacetResult b) {
if (a.value.intValue() > b.value.intValue()) {
return -1;
} else if (b.value.intValue() > a.value.intValue()) {
return 1;
} else {
return a.dim.compareTo(b.dim);
}
}
};
final SortedSetDocValuesReaderState state;
final FacetsConfig stateConfig;
final SortedSetDocValues dv;
@ -140,7 +125,16 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
}
// Sort by highest count:
results.sort(FACET_RESULT_COMPARATOR);
results.sort(
(a, b) -> {
if (a.value.intValue() > b.value.intValue()) {
return -1;
} else if (b.value.intValue() > a.value.intValue()) {
return 1;
} else {
return a.dim.compareTo(b.dim);
}
});
return results;
}

View File

@ -55,20 +55,6 @@ abstract class TaxonomyFacets extends Facets {
}
}
private static final Comparator<FacetResult> BY_VALUE_THEN_DIM =
new Comparator<FacetResult>() {
@Override
public int compare(FacetResult a, FacetResult b) {
if (a.value.doubleValue() > b.value.doubleValue()) {
return -1;
} else if (b.value.doubleValue() > a.value.doubleValue()) {
return 1;
} else {
return a.dim.compareTo(b.dim);
}
}
};
/** Index field name provided to the constructor. */
final String indexFieldName;
@ -591,7 +577,16 @@ abstract class TaxonomyFacets extends Facets {
}
// Sort by highest value, tie break by dim:
results.sort(BY_VALUE_THEN_DIM);
results.sort(
(a, b) -> {
if (a.value.doubleValue() > b.value.doubleValue()) {
return -1;
} else if (b.value.doubleValue() > a.value.doubleValue()) {
return 1;
} else {
return a.dim.compareTo(b.dim);
}
});
return results;
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.search.grouping;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.NavigableSet;
@ -126,28 +125,6 @@ public abstract class GroupFacetCollector extends SimpleCollector {
*/
public static class GroupedFacetResult {
private static final Comparator<FacetEntry> orderByCountAndValue =
new Comparator<FacetEntry>() {
@Override
public int compare(FacetEntry a, FacetEntry b) {
int cmp = b.count - a.count; // Highest count first!
if (cmp != 0) {
return cmp;
}
return a.value.compareTo(b.value);
}
};
private static final Comparator<FacetEntry> orderByValue =
new Comparator<FacetEntry>() {
@Override
public int compare(FacetEntry a, FacetEntry b) {
return a.value.compareTo(b.value);
}
};
private final int maxSize;
private final NavigableSet<FacetEntry> facetEntries;
private final int totalMissingCount;
@ -157,7 +134,17 @@ public abstract class GroupFacetCollector extends SimpleCollector {
public GroupedFacetResult(
int size, int minCount, boolean orderByCount, int totalCount, int totalMissingCount) {
this.facetEntries = new TreeSet<>(orderByCount ? orderByCountAndValue : orderByValue);
this.facetEntries =
new TreeSet<>(
orderByCount
? (a, b) -> {
int cmp = b.count - a.count; // Highest count first!
if (cmp != 0) {
return cmp;
}
return a.value.compareTo(b.value);
}
: (a, b) -> a.value.compareTo(b.value));
this.totalMissingCount = totalMissingCount;
this.totalCount = totalCount;
maxSize = size;

View File

@ -18,8 +18,6 @@ package org.apache.lucene.search.vectorhighlight;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@ -377,16 +375,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
for (List<WeightedFragInfo> weightedFragInfos : fieldNameToFragInfos.values()) {
result.addAll(weightedFragInfos);
}
Collections.sort(
result,
new Comparator<WeightedFragInfo>() {
@Override
public int compare(
FieldFragList.WeightedFragInfo info1, FieldFragList.WeightedFragInfo info2) {
return info1.getStartOffset() - info2.getStartOffset();
}
});
result.sort((info1, info2) -> info1.getStartOffset() - info2.getStartOffset());
return result;
}

View File

@ -615,7 +615,7 @@ public class FreeTextSuggester extends Lookup {
// Must do num+seen.size() for queue depth because we may
// reject up to seen.size() paths in acceptResult():
Util.TopNSearcher<Long> searcher =
new Util.TopNSearcher<Long>(fst, num, num + seen.size(), weightComparator) {
new Util.TopNSearcher<>(fst, num, num + seen.size(), Comparator.naturalOrder()) {
BytesRefBuilder scratchBytes = new BytesRefBuilder();
@ -702,19 +702,15 @@ public class FreeTextSuggester extends Lookup {
backoff *= ALPHA;
}
Collections.sort(
results,
new Comparator<LookupResult>() {
@Override
public int compare(LookupResult a, LookupResult b) {
if (a.value > b.value) {
return -1;
} else if (a.value < b.value) {
return 1;
} else {
// Tie break by UTF16 sort order:
return ((String) a.key).compareTo((String) b.key);
}
results.sort(
(a, b) -> {
if (a.value > b.value) {
return -1;
} else if (a.value < b.value) {
return 1;
} else {
// Tie break by UTF16 sort order:
return ((String) a.key).compareTo((String) b.key);
}
});
@ -761,14 +757,6 @@ public class FreeTextSuggester extends Lookup {
return output;
}
static final Comparator<Long> weightComparator =
new Comparator<Long>() {
@Override
public int compare(Long left, Long right) {
return left.compareTo(right);
}
};
/** Returns the weight associated with an input string, or null if it does not exist. */
public Object get(CharSequence key) {
throw new UnsupportedOperationException();

View File

@ -146,10 +146,13 @@ public final class NRTSuggester implements Accountable {
final CharsRefBuilder spare = new CharsRefBuilder();
Comparator<Pair<Long, BytesRef>> comparator = getComparator();
Util.TopNSearcher<Pair<Long, BytesRef>> searcher =
new Util.TopNSearcher<Pair<Long, BytesRef>>(
fst, topN, queueSize, comparator, new ScoringPathComparator(scorer)) {
new Util.TopNSearcher<>(
fst,
topN,
queueSize,
(o1, o2) -> Long.compare(o1.output1, o2.output1),
new ScoringPathComparator(scorer)) {
private final ByteArrayDataInput scratchInput = new ByteArrayDataInput();
@ -275,15 +278,6 @@ public final class NRTSuggester implements Accountable {
}
}
private static Comparator<Pair<Long, BytesRef>> getComparator() {
return new Comparator<Pair<Long, BytesRef>>() {
@Override
public int compare(Pair<Long, BytesRef> o1, Pair<Long, BytesRef> o2) {
return Long.compare(o1.output1, o2.output1);
}
};
}
/**
* Simple heuristics to try to avoid over-pruning potential suggestions by the TopNSearcher. Since
* suggestion entries can be rejected if they belong to a deleted document, the length of the

View File

@ -194,7 +194,8 @@ public class WFSTCompletionLookup extends Lookup {
// complete top-N
TopResults<Long> completions = null;
try {
completions = Util.shortestPaths(fst, arc, prefixOutput, weightComparator, num, !exactFirst);
completions =
Util.shortestPaths(fst, arc, prefixOutput, Comparator.naturalOrder(), num, !exactFirst);
assert completions.isComplete;
} catch (IOException bogus) {
throw new RuntimeException(bogus);
@ -301,14 +302,6 @@ public class WFSTCompletionLookup extends Lookup {
}
}
static final Comparator<Long> weightComparator =
new Comparator<Long>() {
@Override
public int compare(Long left, Long right) {
return left.compareTo(right);
}
};
/** Returns byte size of the underlying FST. */
@Override
public long ramBytesUsed() {

View File

@ -18,8 +18,6 @@ package org.apache.lucene.tests.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
@ -49,14 +47,7 @@ public class PerThreadPKLookup {
List<LeafReaderContext> leaves = new ArrayList<>(r.leaves());
// Larger segments are more likely to have the id, so we sort largest to smallest by numDocs:
Collections.sort(
leaves,
new Comparator<LeafReaderContext>() {
@Override
public int compare(LeafReaderContext c1, LeafReaderContext c2) {
return c2.reader().numDocs() - c1.reader().numDocs();
}
});
leaves.sort((c1, c2) -> c2.reader().numDocs() - c1.reader().numDocs());
termsEnums = new TermsEnum[leaves.size()];
postingsEnums = new PostingsEnum[leaves.size()];