mirror of https://github.com/apache/druid.git
StringComparators: No need to convert to UTF-8 for lexicographic comparison. (#11171)
Lexicographic ordering of UTF-8 byte sequences and in-memory UTF-16 strings are equivalent. So, we can skip the (expensive) conversion and get an equivalent result. Thank you, Unicode!
This commit is contained in:
parent
fdab95ea99
commit
6d82c3cbf1
|
@ -21,9 +21,7 @@ package org.apache.druid.query.ordering;
|
||||||
|
|
||||||
import com.google.common.collect.Ordering;
|
import com.google.common.collect.Ordering;
|
||||||
import com.google.common.primitives.Ints;
|
import com.google.common.primitives.Ints;
|
||||||
import com.google.common.primitives.UnsignedBytes;
|
|
||||||
import org.apache.druid.common.guava.GuavaUtils;
|
import org.apache.druid.common.guava.GuavaUtils;
|
||||||
import org.apache.druid.java.util.common.StringUtils;
|
|
||||||
import org.apache.maven.artifact.versioning.DefaultArtifactVersion;
|
import org.apache.maven.artifact.versioning.DefaultArtifactVersion;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
import java.math.BigDecimal;
|
||||||
|
@ -51,20 +49,13 @@ public class StringComparators
|
||||||
|
|
||||||
public static class LexicographicComparator extends StringComparator
|
public static class LexicographicComparator extends StringComparator
|
||||||
{
|
{
|
||||||
private static final Ordering<String> ORDERING = Ordering.from(new Comparator<String>()
|
// Equivalent to comparing UTF-8 encoded strings as byte arrays.
|
||||||
{
|
private static final Ordering<String> ORDERING = Ordering.from(String::compareTo).nullsFirst();
|
||||||
@Override
|
|
||||||
public int compare(String s, String s2)
|
|
||||||
{
|
|
||||||
return UnsignedBytes.lexicographicalComparator().compare(
|
|
||||||
StringUtils.toUtf8(s), StringUtils.toUtf8(s2));
|
|
||||||
}
|
|
||||||
}).nullsFirst();
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compare(String s, String s2)
|
public int compare(String s, String s2)
|
||||||
{
|
{
|
||||||
// Avoid conversion to bytes for equal references
|
// Avoid comparisons for equal references
|
||||||
// Assuming we mostly compare different strings, checking s.equals(s2) will only make the comparison slower.
|
// Assuming we mostly compare different strings, checking s.equals(s2) will only make the comparison slower.
|
||||||
//noinspection StringEquality
|
//noinspection StringEquality
|
||||||
if (s == s2) {
|
if (s == s2) {
|
||||||
|
|
Loading…
Reference in New Issue