StringComparators: No need to convert to UTF-8 for lexicographic comparison. (#11171)

Lexicographic ordering of UTF-8 byte sequences and in-memory UTF-16
strings are equivalent. So, we can skip the (expensive) conversion and
get an equivalent result. Thank you, Unicode!
This commit is contained in:
Gian Merlino 2021-04-30 10:54:20 -07:00 committed by GitHub
parent fdab95ea99
commit 6d82c3cbf1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 4 additions and 13 deletions

View File

@ -21,9 +21,7 @@ package org.apache.druid.query.ordering;
import com.google.common.collect.Ordering; import com.google.common.collect.Ordering;
import com.google.common.primitives.Ints; import com.google.common.primitives.Ints;
import com.google.common.primitives.UnsignedBytes;
import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.common.guava.GuavaUtils;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.maven.artifact.versioning.DefaultArtifactVersion; import org.apache.maven.artifact.versioning.DefaultArtifactVersion;
import java.math.BigDecimal; import java.math.BigDecimal;
@ -51,20 +49,13 @@ public class StringComparators
public static class LexicographicComparator extends StringComparator public static class LexicographicComparator extends StringComparator
{ {
private static final Ordering<String> ORDERING = Ordering.from(new Comparator<String>() // Equivalent to comparing UTF-8 encoded strings as byte arrays.
{ private static final Ordering<String> ORDERING = Ordering.from(String::compareTo).nullsFirst();
@Override
public int compare(String s, String s2)
{
return UnsignedBytes.lexicographicalComparator().compare(
StringUtils.toUtf8(s), StringUtils.toUtf8(s2));
}
}).nullsFirst();
@Override @Override
public int compare(String s, String s2) public int compare(String s, String s2)
{ {
// Avoid conversion to bytes for equal references // Avoid comparisons for equal references
// Assuming we mostly compare different strings, checking s.equals(s2) will only make the comparison slower. // Assuming we mostly compare different strings, checking s.equals(s2) will only make the comparison slower.
//noinspection StringEquality //noinspection StringEquality
if (s == s2) { if (s == s2) {