mirror of https://github.com/apache/druid.git
StringComparators: No need to convert to UTF-8 for lexicographic comparison. (#11171)
Lexicographic ordering of UTF-8 byte sequences and in-memory UTF-16 strings are equivalent. So, we can skip the (expensive) conversion and get an equivalent result. Thank you, Unicode!
This commit is contained in:
parent
fdab95ea99
commit
6d82c3cbf1
|
@ -21,9 +21,7 @@ package org.apache.druid.query.ordering;
|
|||
|
||||
import com.google.common.collect.Ordering;
|
||||
import com.google.common.primitives.Ints;
|
||||
import com.google.common.primitives.UnsignedBytes;
|
||||
import org.apache.druid.common.guava.GuavaUtils;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.maven.artifact.versioning.DefaultArtifactVersion;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
@ -51,20 +49,13 @@ public class StringComparators
|
|||
|
||||
public static class LexicographicComparator extends StringComparator
|
||||
{
|
||||
private static final Ordering<String> ORDERING = Ordering.from(new Comparator<String>()
|
||||
{
|
||||
@Override
|
||||
public int compare(String s, String s2)
|
||||
{
|
||||
return UnsignedBytes.lexicographicalComparator().compare(
|
||||
StringUtils.toUtf8(s), StringUtils.toUtf8(s2));
|
||||
}
|
||||
}).nullsFirst();
|
||||
|
||||
// Equivalent to comparing UTF-8 encoded strings as byte arrays.
|
||||
private static final Ordering<String> ORDERING = Ordering.from(String::compareTo).nullsFirst();
|
||||
|
||||
@Override
|
||||
public int compare(String s, String s2)
|
||||
{
|
||||
// Avoid conversion to bytes for equal references
|
||||
// Avoid comparisons for equal references
|
||||
// Assuming we mostly compare different strings, checking s.equals(s2) will only make the comparison slower.
|
||||
//noinspection StringEquality
|
||||
if (s == s2) {
|
||||
|
|
Loading…
Reference in New Issue