From 78d0b0abce2b27b88a53790684d81530a2318fc9 Mon Sep 17 00:00:00 2001
From: Gian Merlino <gianmerlino@gmail.com>
Date: Wed, 16 Nov 2022 07:15:00 -0800
Subject: [PATCH]  Add string comparison methods to StringUtils, fix dictionary
 comparisons. (#13364)

* Add string comparison methods to StringUtils, fix dictionary comparisons.

There are various places in Druid code where we assume that String.compareTo
is consistent with Unicode code-point ordering. Sadly this is not the case.

To help deal with this, this patch introduces the following helpers:

1) compareUnicode: Compares two Strings in Unicode code-point order.
2) compareUtf8: Compares two UTF-8 byte arrays in Unicode code-point order.
   Equivalent to comparison as unsigned bytes.
3) compareUtf8UsingJavaStringOrdering: Compares two UTF-8 byte arrays, or
   ByteBuffers, in a manner consistent with String.compareTo.

There is no helper for comparing two Strings in a manner consistent
with String.compareTo, because for that we can use compareTo directly.

The patch also fixes an inconsistency between the String and UTF-8
dictionary GenericIndexed flavors of string-typed columns: they were
formerly using incompatible comparators.

* Adjust test.

* FrontCodedIndexed updates.

* Add test.

* Fix comments.
---
 .../druid/benchmark/BoundFilterBenchmark.java |   2 +-
 ...ryEncodedStringIndexSupplierBenchmark.java |   2 +-
 .../DimensionPredicateFilterBenchmark.java    |   2 +-
 .../benchmark/FrontCodedIndexedBenchmark.java |   2 +-
 .../druid/benchmark/InFilterBenchmark.java    |   2 +-
 .../druid/benchmark/LikeFilterBenchmark.java  |   2 +-
 .../java/util/common/ByteBufferUtils.java     |  52 ++-----
 .../druid/java/util/common/StringUtils.java   | 142 ++++++++++++++++++
 .../java/util/common/ByteBufferUtilsTest.java |  59 ++++++--
 .../java/util/common/StringUtilsTest.java     |  96 ++++++++++++
 .../druid/frame/write/FrameWriterUtils.java   |   2 +-
 .../druid/query/filter/InDimFilter.java       |   2 +-
 .../query/ordering/StringComparators.java     |   8 +-
 .../org/apache/druid/segment/IndexIO.java     |   2 +-
 .../column/IndexedUtf8ValueSetIndex.java      |   2 +-
 .../segment/column/Utf8ValueSetIndex.java     |   2 +-
 .../druid/segment/data/FrontCodedIndexed.java |  20 +--
 .../segment/data/FrontCodedIndexedWriter.java |  24 ++-
 .../druid/segment/data/GenericIndexed.java    |  13 +-
 .../nested/NestedDataColumnSupplier.java      |   4 +-
 .../DictionaryEncodedColumnPartSerde.java     |   2 +-
 .../segment/data/FrontCodedIndexedTest.java   |   4 +-
 .../filter/ExtractionDimFilterTest.java       |   2 +-
 .../PredicateValueMatcherFactoryTest.java     |   4 +-
 .../segment/filter/ValueMatchersTest.java     |   6 +-
 ...edFieldLiteralColumnIndexSupplierTest.java |   2 +-
 ...tionaryEncodedStringIndexSupplierTest.java |   4 +-
 27 files changed, 359 insertions(+), 105 deletions(-)
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java
index cdb3cf2f7ca..819c528198b 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java
@@ -179,7 +179,7 @@ public class BoundFilterBenchmark
     final GenericIndexed<ByteBuffer> dictionaryUtf8 = GenericIndexed.fromIterable(
         FluentIterable.from(ints)
                       .transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
-        GenericIndexed.BYTE_BUFFER_STRATEGY
+        GenericIndexed.UTF8_STRATEGY
     );
     selector = new MockColumnIndexSelector(
         bitmapFactory,
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java
index 282b25e1986..1806f28ad1d 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java
@@ -101,7 +101,7 @@ public class DictionaryEncodedStringIndexSupplierBenchmark
       final GenericIndexed<ByteBuffer> dictionaryUtf8 = GenericIndexed.fromIterable(
           FluentIterable.from(ints)
                         .transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
-          GenericIndexed.BYTE_BUFFER_STRATEGY
+          GenericIndexed.UTF8_STRATEGY
       );
       final GenericIndexed<ImmutableBitmap> bitmaps = GenericIndexed.fromIterable(
           () -> IntStream.range(0, dictionarySize)
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
index f01b09a79fe..34a35b559a0 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
@@ -130,7 +130,7 @@ public class DimensionPredicateFilterBenchmark
     final GenericIndexed<ByteBuffer> dictionaryUtf8 = GenericIndexed.fromIterable(
         FluentIterable.from(ints)
                       .transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
-        GenericIndexed.BYTE_BUFFER_STRATEGY
+        GenericIndexed.UTF8_STRATEGY
     );
     final GenericIndexed<ImmutableBitmap> bitmaps = GenericIndexed.fromIterable(
         FluentIterable.from(ints)
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/FrontCodedIndexedBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/FrontCodedIndexedBenchmark.java
index 2dba1ba5c0e..30656630656 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/FrontCodedIndexedBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/FrontCodedIndexedBenchmark.java
@@ -174,7 +174,7 @@ public class FrontCodedIndexedBenchmark
 
     genericIndexed = GenericIndexed.read(
         byteBufferGeneric,
-        GenericIndexed.BYTE_BUFFER_STRATEGY,
+        GenericIndexed.UTF8_STRATEGY,
         SmooshedFileMapper.load(smooshDirFrontCoded)
     );
     frontCodedIndexed = FrontCodedIndexed.read(
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java
index 09c3253af08..0a97367493d 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java
@@ -93,7 +93,7 @@ public class InFilterBenchmark
     final GenericIndexed<ByteBuffer> dictionaryUtf8 = GenericIndexed.fromIterable(
         FluentIterable.from(ints)
                       .transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
-        GenericIndexed.BYTE_BUFFER_STRATEGY
+        GenericIndexed.UTF8_STRATEGY
     );
     final GenericIndexed<ImmutableBitmap> bitmaps = GenericIndexed.fromIterable(
         () -> IntStream.range(0, dictionarySize)
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java
index 1369d69787d..bb85422792f 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java
@@ -130,7 +130,7 @@ public class LikeFilterBenchmark
     final GenericIndexed<ByteBuffer> dictionaryUtf8 = GenericIndexed.fromIterable(
         FluentIterable.from(ints)
                       .transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
-        GenericIndexed.BYTE_BUFFER_STRATEGY
+        GenericIndexed.UTF8_STRATEGY
     );
     final GenericIndexed<ImmutableBitmap> bitmaps = GenericIndexed.fromIterable(
         FluentIterable.from(ints)
diff --git a/core/src/main/java/org/apache/druid/java/util/common/ByteBufferUtils.java b/core/src/main/java/org/apache/druid/java/util/common/ByteBufferUtils.java
index 8209049b844..fb67c323834 100644
--- a/core/src/main/java/org/apache/druid/java/util/common/ByteBufferUtils.java
+++ b/core/src/main/java/org/apache/druid/java/util/common/ByteBufferUtils.java
@@ -47,7 +47,7 @@ public class ByteBufferUtils
   // null if unmap is supported
   private static final RuntimeException UNMAP_NOT_SUPPORTED_EXCEPTION;
 
-  private static final Comparator<ByteBuffer> COMPARATOR_UNSIGNED = new UnsignedByteBufferComparator();
+  private static final Comparator<ByteBuffer> COMPARATOR_UTF8 = new Utf8ByteBufferComparator();
 
   static {
     Object unmap = null;
@@ -214,40 +214,12 @@ public class ByteBufferUtils
   }
 
   /**
-   * Compares two ByteBuffer ranges using unsigned byte ordering.
+   * Compares two ByteBuffers from their positions to their limits using ordering consistent with
+   * {@link String#compareTo(String)}. Null buffers are accepted, and are ordered earlier than any nonnull buffer.
    *
-   * Different from {@link ByteBuffer#compareTo}, which uses signed ordering.
+   * Different from {@link ByteBuffer#compareTo}, which uses signed-bytes ordering.
    */
-  public static int compareByteBuffers(
-      final ByteBuffer buf1,
-      final int position1,
-      final int length1,
-      final ByteBuffer buf2,
-      final int position2,
-      final int length2
-  )
-  {
-    final int commonLength = Math.min(length1, length2);
-
-    for (int i = 0; i < commonLength; i++) {
-      final byte byte1 = buf1.get(position1 + i);
-      final byte byte2 = buf2.get(position2 + i);
-      final int cmp = (byte1 & 0xFF) - (byte2 & 0xFF); // Unsigned comparison
-      if (cmp != 0) {
-        return cmp;
-      }
-    }
-
-    return Integer.compare(length1, length2);
-  }
-
-  /**
-   * Compares two ByteBuffers from their positions to their limits using unsigned byte ordering. Accepts null
-   * buffers, which are ordered earlier than any nonnull buffer.
-   *
-   * Different from {@link ByteBuffer#compareTo}, which uses signed ordering.
-   */
-  public static int compareByteBuffers(
+  public static int compareUtf8ByteBuffers(
       @Nullable final ByteBuffer buf1,
       @Nullable final ByteBuffer buf2
   )
@@ -260,7 +232,7 @@ public class ByteBufferUtils
       return 1;
     }
 
-    return ByteBufferUtils.compareByteBuffers(
+    return StringUtils.compareUtf8UsingJavaStringOrdering(
         buf1,
         buf1.position(),
         buf1.remaining(),
@@ -271,20 +243,20 @@ public class ByteBufferUtils
   }
 
   /**
-   * Comparator that compares two {@link ByteBuffer} using unsigned ordering. Null buffers are accepted, and
-   * are ordered earlier than any nonnull buffer.
+   * Comparator that compares two {@link ByteBuffer} using ordering consistent with {@link String#compareTo(String)}.
+   * Null buffers are accepted, and are ordered earlier than any nonnull buffer.
    */
-  public static Comparator<ByteBuffer> unsignedComparator()
+  public static Comparator<ByteBuffer> utf8Comparator()
   {
-    return COMPARATOR_UNSIGNED;
+    return COMPARATOR_UTF8;
   }
 
-  private static class UnsignedByteBufferComparator implements Comparator<ByteBuffer>
+  private static class Utf8ByteBufferComparator implements Comparator<ByteBuffer>
   {
     @Override
     public int compare(@Nullable ByteBuffer o1, @Nullable ByteBuffer o2)
     {
-      return ByteBufferUtils.compareByteBuffers(o1, o2);
+      return compareUtf8ByteBuffers(o1, o2);
     }
   }
 }
diff --git a/core/src/main/java/org/apache/druid/java/util/common/StringUtils.java b/core/src/main/java/org/apache/druid/java/util/common/StringUtils.java
index bd17f42c403..41078961a0c 100644
--- a/core/src/main/java/org/apache/druid/java/util/common/StringUtils.java
+++ b/core/src/main/java/org/apache/druid/java/util/common/StringUtils.java
@@ -77,6 +77,148 @@ public class StringUtils
     return string == null ? EMPTY_BYTES : toUtf8(string);
   }
 
+  /**
+   * Compares two Java Strings in Unicode code-point order.
+   *
+   * Order is consistent with {@link #compareUtf8(byte[], byte[])}, but is not consistent with
+   * {@link String#compareTo(String)}.
+   */
+  public static int compareUnicode(final String a, final String b)
+  {
+    final int commonLength = Math.min(a.length(), b.length());
+
+    for (int i = 0; i < commonLength; i++) {
+      int char1 = a.charAt(i) & 0xFFFF; // Unsigned
+      int char2 = b.charAt(i) & 0xFFFF; // Unsigned
+
+      if (char1 != char2 && char1 >= 0xd800 && char2 >= 0xd800) {
+        // Fixup logic for code units at or above the surrogate range, based on logic described at
+        // https://www.icu-project.org/docs/papers/utf16_code_point_order.html.
+        //
+        // If both code units are at or above the surrogate range (>= 0xd800) then adjust non-surrogates (legitimate
+        // single-code-unit characters) to be below the surrogate range, so they compare earlier than surrogates.
+
+        if (!Character.isSurrogate((char) char1)) {
+          char1 -= 0x2800;
+        }
+
+        if (!Character.isSurrogate((char) char2)) {
+          char2 -= 0x2800;
+        }
+      }
+
+      final int cmp = char1 - char2;
+      if (cmp != 0) {
+        return cmp;
+      }
+    }
+
+    return Integer.compare(a.length(), b.length());
+  }
+
+  /**
+   * Compares two UTF-8 byte strings in Unicode code-point order.
+   *
+   * Equivalent to a comparison of the two byte arrays as if they were unsigned bytes.
+   *
+   * Order is consistent with {@link #compareUnicode(String, String)}, but is not consistent with
+   * {@link String#compareTo(String)}. For an ordering consistent with {@link String#compareTo(String)}, use
+   * {@link #compareUtf8UsingJavaStringOrdering(byte[], byte[])} instead.
+   */
+  public static int compareUtf8(final byte[] a, final byte[] b)
+  {
+    final int commonLength = Math.min(a.length, b.length);
+
+    for (int i = 0; i < commonLength; i++) {
+      final byte byte1 = a[i];
+      final byte byte2 = b[i];
+      final int cmp = (byte1 & 0xFF) - (byte2 & 0xFF); // Unsigned comparison
+      if (cmp != 0) {
+        return cmp;
+      }
+    }
+
+    return Integer.compare(a.length, b.length);
+  }
+
+  /**
+   * Compares two UTF-8 byte strings in UTF-16 code-unit order.
+   *
+   * Order is consistent with {@link String#compareTo(String)}, but is not consistent with
+   * {@link #compareUnicode(String, String)} or {@link #compareUtf8(byte[], byte[])}.
+   */
+  public static int compareUtf8UsingJavaStringOrdering(final byte[] a, final byte[] b)
+  {
+    final int commonLength = Math.min(a.length, b.length);
+
+    for (int i = 0; i < commonLength; i++) {
+      final int cmp = compareUtf8UsingJavaStringOrdering(a[i], b[i]);
+      if (cmp != 0) {
+        return cmp;
+      }
+    }
+
+    return Integer.compare(a.length, b.length);
+  }
+
+  /**
+   * Compares two UTF-8 byte strings in UTF-16 code-unit order.
+   *
+   * Order is consistent with {@link String#compareTo(String)}, but is not consistent with
+   * {@link #compareUnicode(String, String)} or {@link #compareUtf8(byte[], byte[])}.
+   */
+  public static int compareUtf8UsingJavaStringOrdering(
+      final ByteBuffer buf1,
+      final int position1,
+      final int length1,
+      final ByteBuffer buf2,
+      final int position2,
+      final int length2
+  )
+  {
+    final int commonLength = Math.min(length1, length2);
+
+    for (int i = 0; i < commonLength; i++) {
+      final int cmp = compareUtf8UsingJavaStringOrdering(buf1.get(position1 + i), buf2.get(position2 + i));
+      if (cmp != 0) {
+        return cmp;
+      }
+    }
+
+    return Integer.compare(length1, length2);
+  }
+
+  /**
+   * Compares two bytes from UTF-8 strings in such a way that the entire byte arrays are compared in UTF-16
+   * code-unit order.
+   *
+   * Compatible with {@link #compareUtf8UsingJavaStringOrdering(byte[], byte[])} and
+   * {@link #compareUtf8UsingJavaStringOrdering(ByteBuffer, int, int, ByteBuffer, int, int)}.
+   */
+  public static int compareUtf8UsingJavaStringOrdering(byte byte1, byte byte2)
+  {
+    // Treat as unsigned bytes.
+    int ubyte1 = byte1 & 0xFF;
+    int ubyte2 = byte2 & 0xFF;
+
+    if (ubyte1 != ubyte2 && ubyte1 >= 0xEE && ubyte2 >= 0xEE) {
+      // Fixup logic for lead bytes for U+E000 ... U+FFFF, based on logic described at
+      // https://www.icu-project.org/docs/papers/utf16_code_point_order.html.
+      //
+      // Move possible lead bytes for this range (0xEE and 0xEF) above all other bytes, so they compare later.
+
+      if (ubyte1 == 0xEE || ubyte1 == 0xEF) {
+        ubyte1 += 0xFF;
+      }
+
+      if (ubyte2 == 0xEE || ubyte2 == 0xEF) {
+        ubyte2 += 0xFF;
+      }
+    }
+
+    return ubyte1 - ubyte2;
+  }
+
   public static String fromUtf8(final byte[] bytes)
   {
     try {
diff --git a/core/src/test/java/org/apache/druid/java/util/common/ByteBufferUtilsTest.java b/core/src/test/java/org/apache/druid/java/util/common/ByteBufferUtilsTest.java
index f5acb6e0308..c32f29bfdd3 100644
--- a/core/src/test/java/org/apache/druid/java/util/common/ByteBufferUtilsTest.java
+++ b/core/src/test/java/org/apache/druid/java/util/common/ByteBufferUtilsTest.java
@@ -19,6 +19,7 @@
 
 package org.apache.druid.java.util.common;
 
+import com.google.common.collect.ImmutableList;
 import com.google.common.io.Files;
 import org.apache.druid.collections.ResourceHolder;
 import org.hamcrest.MatcherAssert;
@@ -36,9 +37,28 @@ import java.nio.ByteBuffer;
 import java.nio.MappedByteBuffer;
 import java.util.Arrays;
 import java.util.Comparator;
+import java.util.List;
 
 public class ByteBufferUtilsTest
 {
+  private static final List<String> COMPARE_TEST_STRINGS = ImmutableList.of(
+      "（請參見已被刪除版本）",
+      "請參見已被刪除版本",
+      "שָׁלוֹם",
+      "＋{{[[Template:別名重定向|別名重定向]]}}",
+      "\uD83D\uDC4D\uD83D\uDC4D\uD83D\uDC4D",
+      "\uD83D\uDCA9",
+      "",
+      "f",
+      "fo",
+      "\uD83D\uDE42",
+      "\uD83E\uDEE5",
+      "\uD83E\uDD20",
+      "quick",
+      "brown",
+      "fox"
+  );
+
   @Rule
   public TemporaryFolder temporaryFolder = new TemporaryFolder();
 
@@ -82,9 +102,9 @@ public class ByteBufferUtilsTest
 
   @Test
   @SuppressWarnings("EqualsWithItself")
-  public void testUnsignedComparator()
+  public void testUtf8Comparator()
   {
-    final Comparator<ByteBuffer> comparator = ByteBufferUtils.unsignedComparator();
+    final Comparator<ByteBuffer> comparator = ByteBufferUtils.utf8Comparator();
 
     // Tests involving null
     MatcherAssert.assertThat(comparator.compare(null, null), Matchers.equalTo(0));
@@ -112,18 +132,33 @@ public class ByteBufferUtilsTest
         Matchers.greaterThan(0)
     );
 
-    // Tests involving the full range of bytes
-    for (byte i = Byte.MIN_VALUE; i < Byte.MAX_VALUE; i++) {
-      for (byte j = Byte.MIN_VALUE; j < Byte.MAX_VALUE; j++) {
-        final int cmp = Integer.compare(Byte.toUnsignedInt(i), Byte.toUnsignedInt(j));
+    for (final String string1 : COMPARE_TEST_STRINGS) {
+      for (final String string2 : COMPARE_TEST_STRINGS) {
+        final byte[] utf8Bytes1 = StringUtils.toUtf8(string1);
+        final byte[] utf8Bytes2 = StringUtils.toUtf8(string2);
+        final ByteBuffer utf8ByteBuffer1 = ByteBuffer.allocate(utf8Bytes1.length + 2);
+        final ByteBuffer utf8ByteBuffer2 = ByteBuffer.allocate(utf8Bytes2.length + 2);
+        utf8ByteBuffer1.position(1);
+        utf8ByteBuffer1.put(utf8Bytes1, 0, utf8Bytes1.length).position(utf8Bytes1.length);
+        utf8ByteBuffer1.position(1).limit(1 + utf8Bytes1.length);
+        utf8ByteBuffer2.position(1);
+        utf8ByteBuffer2.put(utf8Bytes2, 0, utf8Bytes2.length).position(utf8Bytes2.length);
+        utf8ByteBuffer2.position(1).limit(1 + utf8Bytes2.length);
 
-        MatcherAssert.assertThat(
-            StringUtils.format("comparison of %s to %s", Byte.toUnsignedInt(i), Byte.toUnsignedInt(j)),
-            comparator.compare(
-                ByteBuffer.wrap(new byte[]{i}),
-                ByteBuffer.wrap(new byte[]{j})
+        final int compareByteBufferUtilsUtf8 = ByteBufferUtils.utf8Comparator().compare(
+            utf8ByteBuffer1,
+            utf8ByteBuffer2
+        );
+
+        Assert.assertEquals(
+            StringUtils.format(
+                "compareByteBufferUtilsUtf8(byte[]) (actual) "
+                + "matches compareJavaString (expected) for [%s] vs [%s]",
+                string1,
+                string2
             ),
-            cmp < 0 ? Matchers.lessThan(0) : cmp > 0 ? Matchers.greaterThan(0) : Matchers.equalTo(0)
+            (int) Math.signum(string1.compareTo(string2)),
+            (int) Math.signum(compareByteBufferUtilsUtf8)
         );
       }
     }
diff --git a/core/src/test/java/org/apache/druid/java/util/common/StringUtilsTest.java b/core/src/test/java/org/apache/druid/java/util/common/StringUtilsTest.java
index 754e7237c76..3f2d5713c2f 100644
--- a/core/src/test/java/org/apache/druid/java/util/common/StringUtilsTest.java
+++ b/core/src/test/java/org/apache/druid/java/util/common/StringUtilsTest.java
@@ -19,6 +19,7 @@
 
 package org.apache.druid.java.util.common;
 
+import com.google.common.collect.ImmutableList;
 import org.apache.druid.collections.ResourceHolder;
 import org.junit.Assert;
 import org.junit.Rule;
@@ -28,12 +29,31 @@ import org.junit.rules.ExpectedException;
 import java.io.UnsupportedEncodingException;
 import java.nio.BufferUnderflowException;
 import java.nio.ByteBuffer;
+import java.util.List;
 
 /**
  *
  */
 public class StringUtilsTest
 {
+  private static final List<String> COMPARE_TEST_STRINGS = ImmutableList.of(
+      "（請參見已被刪除版本）",
+      "請參見已被刪除版本",
+      "שָׁלוֹם",
+      "＋{{[[Template:別名重定向|別名重定向]]}}",
+      "\uD83D\uDC4D\uD83D\uDC4D\uD83D\uDC4D",
+      "\uD83D\uDCA9",
+      "",
+      "f",
+      "fo",
+      "\uD83D\uDE42",
+      "\uD83E\uDEE5",
+      "\uD83E\uDD20",
+      "quick",
+      "brown",
+      "fox"
+  );
+
   @Rule
   public ExpectedException expectedException = ExpectedException.none();
 
@@ -290,4 +310,80 @@ public class StringUtilsTest
     Assert.assertEquals("smile ", StringUtils.fastLooseChop("smile 🙂 for the camera", 6));
     Assert.assertEquals("smile", StringUtils.fastLooseChop("smile 🙂 for the camera", 5));
   }
+
+  @Test
+  public void testUnicodeStringCompare()
+  {
+    for (final String string1 : COMPARE_TEST_STRINGS) {
+      for (final String string2 : COMPARE_TEST_STRINGS) {
+        final int compareUnicode = StringUtils.compareUnicode(string1, string2);
+        final int compareUtf8 = StringUtils.compareUtf8(
+            StringUtils.toUtf8(string1),
+            StringUtils.toUtf8(string2)
+        );
+
+        Assert.assertEquals(
+            StringUtils.format(
+                "compareUnicode (actual) matches compareUtf8 (expected) for [%s] vs [%s]",
+                string1,
+                string2
+            ),
+            (int) Math.signum(compareUtf8),
+            (int) Math.signum(compareUnicode)
+        );
+      }
+    }
+  }
+
+  @Test
+  public void testJavaStringCompare()
+  {
+    for (final String string1 : COMPARE_TEST_STRINGS) {
+      for (final String string2 : COMPARE_TEST_STRINGS) {
+        final int compareJavaString = string1.compareTo(string2);
+
+        final byte[] utf8Bytes1 = StringUtils.toUtf8(string1);
+        final byte[] utf8Bytes2 = StringUtils.toUtf8(string2);
+        final int compareByteArrayUtf8UsingJavaStringOrdering =
+            StringUtils.compareUtf8UsingJavaStringOrdering(utf8Bytes1, utf8Bytes2);
+
+        final ByteBuffer utf8ByteBuffer1 = ByteBuffer.allocate(utf8Bytes1.length + 2);
+        final ByteBuffer utf8ByteBuffer2 = ByteBuffer.allocate(utf8Bytes2.length + 2);
+        utf8ByteBuffer1.position(1);
+        utf8ByteBuffer1.put(utf8Bytes1, 0, utf8Bytes1.length).position(utf8Bytes1.length);
+        utf8ByteBuffer2.position(1);
+        utf8ByteBuffer2.put(utf8Bytes2, 0, utf8Bytes2.length).position(utf8Bytes2.length);
+        final int compareByteBufferUtf8UsingJavaStringOrdering = StringUtils.compareUtf8UsingJavaStringOrdering(
+            utf8ByteBuffer1,
+            1,
+            utf8Bytes1.length,
+            utf8ByteBuffer2,
+            1,
+            utf8Bytes2.length
+        );
+
+        Assert.assertEquals(
+            StringUtils.format(
+                "compareUtf8UsingJavaStringOrdering(byte[]) (actual) "
+                + "matches compareJavaString (expected) for [%s] vs [%s]",
+                string1,
+                string2
+            ),
+            (int) Math.signum(compareJavaString),
+            (int) Math.signum(compareByteArrayUtf8UsingJavaStringOrdering)
+        );
+
+        Assert.assertEquals(
+            StringUtils.format(
+                "compareByteBufferUtf8UsingJavaStringOrdering(ByteBuffer) (actual) "
+                + "matches compareJavaString (expected) for [%s] vs [%s]",
+                string1,
+                string2
+            ),
+            (int) Math.signum(compareJavaString),
+            (int) Math.signum(compareByteBufferUtf8UsingJavaStringOrdering)
+        );
+      }
+    }
+  }
 }
diff --git a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java
index 961e99a3f0d..ac6d9bfe651 100644
--- a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java
+++ b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java
@@ -205,7 +205,7 @@ public class FrameWriterUtils
   /**
    * Copies "len" bytes from {@code src.position()} to "dstPosition" in "memory". Does not update the position of src.
    *
-   * @throws InvalidNullByteException "allowNullBytes" is true and a null byte is encountered
+   * @throws InvalidNullByteException if "allowNullBytes" is false and a null byte is encountered
    */
   public static void copyByteBufferToMemory(
       final ByteBuffer src,
diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
index 2dec044cf19..afddb0e42af 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
@@ -674,7 +674,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
 
     public SortedSet<ByteBuffer> toUtf8()
     {
-      final TreeSet<ByteBuffer> valuesUtf8 = new TreeSet<>(ByteBufferUtils.unsignedComparator());
+      final TreeSet<ByteBuffer> valuesUtf8 = new TreeSet<>(ByteBufferUtils.utf8Comparator());
 
       for (final String value : values) {
         if (value == null) {
diff --git a/processing/src/main/java/org/apache/druid/query/ordering/StringComparators.java b/processing/src/main/java/org/apache/druid/query/ordering/StringComparators.java
index 58e228ad2c6..4fdcc5c6f3a 100644
--- a/processing/src/main/java/org/apache/druid/query/ordering/StringComparators.java
+++ b/processing/src/main/java/org/apache/druid/query/ordering/StringComparators.java
@@ -47,9 +47,15 @@ public class StringComparators
   public static final int STRLEN_CACHE_ID = 0x04;
   public static final int VERSION_CACHE_ID = 0x05;
 
+  /**
+   * Comparison using the natural comparator of {@link String}.
+   *
+   * Note that this is not equivalent to comparing UTF-8 byte arrays; see javadocs for
+   * {@link org.apache.druid.java.util.common.StringUtils#compareUnicode(String, String)} and
+   * {@link org.apache.druid.java.util.common.StringUtils#compareUtf8UsingJavaStringOrdering(byte[], byte[])}.
+   */
   public static class LexicographicComparator extends StringComparator
   {
-    // Equivalent to comparing UTF-8 encoded strings as byte arrays.
     private static final Ordering<String> ORDERING = Ordering.from(String::compareTo).nullsFirst();
 
     @Override
diff --git a/processing/src/main/java/org/apache/druid/segment/IndexIO.java b/processing/src/main/java/org/apache/druid/segment/IndexIO.java
index 9698ebdc2be..9b74f71768b 100644
--- a/processing/src/main/java/org/apache/druid/segment/IndexIO.java
+++ b/processing/src/main/java/org/apache/druid/segment/IndexIO.java
@@ -379,7 +379,7 @@ public class IndexIO
 
         // Duplicate the first buffer since we are reading the dictionary twice.
         dimValueLookups.put(dimension, GenericIndexed.read(dimBuffer.duplicate(), GenericIndexed.STRING_STRATEGY));
-        dimValueUtf8Lookups.put(dimension, GenericIndexed.read(dimBuffer, GenericIndexed.BYTE_BUFFER_STRATEGY));
+        dimValueUtf8Lookups.put(dimension, GenericIndexed.read(dimBuffer, GenericIndexed.UTF8_STRATEGY));
         dimColumns.put(dimension, VSizeColumnarMultiInts.readFromByteBuffer(dimBuffer));
       }
 
diff --git a/processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8ValueSetIndex.java b/processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8ValueSetIndex.java
index 5680a1400f0..c568e78d9b0 100644
--- a/processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8ValueSetIndex.java
+++ b/processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8ValueSetIndex.java
@@ -47,7 +47,7 @@ public final class IndexedUtf8ValueSetIndex<TDictionary extends Indexed<ByteBuff
   // sorted merge instead of binary-search based algorithm.
   private static final double SORTED_MERGE_RATIO_THRESHOLD = 0.12D;
   private static final int SIZE_WORTH_CHECKING_MIN = 8;
-  private static final Comparator<ByteBuffer> COMPARATOR = ByteBufferUtils.unsignedComparator();
+  private static final Comparator<ByteBuffer> COMPARATOR = ByteBufferUtils.utf8Comparator();
 
   private final BitmapFactory bitmapFactory;
   private final TDictionary dictionary;
diff --git a/processing/src/main/java/org/apache/druid/segment/column/Utf8ValueSetIndex.java b/processing/src/main/java/org/apache/druid/segment/column/Utf8ValueSetIndex.java
index ef0d08ee0a3..6598e36f206 100644
--- a/processing/src/main/java/org/apache/druid/segment/column/Utf8ValueSetIndex.java
+++ b/processing/src/main/java/org/apache/druid/segment/column/Utf8ValueSetIndex.java
@@ -29,7 +29,7 @@ public interface Utf8ValueSetIndex
   /**
    * Get an {@link Iterable} of {@link ImmutableBitmap} corresponding to the specified set of values (if they are
    * contained in the underlying column). The set must be sorted using
-   * {@link org.apache.druid.java.util.common.ByteBufferUtils#unsignedComparator()}.
+   * {@link org.apache.druid.java.util.common.ByteBufferUtils#utf8Comparator()}.
    */
   BitmapColumnIndex forSortedValuesUtf8(SortedSet<ByteBuffer> valuesUtf8);
 }
diff --git a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexed.java b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexed.java
index d2d6c28d340..2596f7ec2bf 100644
--- a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexed.java
+++ b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexed.java
@@ -23,6 +23,7 @@ import com.google.common.base.Preconditions;
 import com.google.common.base.Supplier;
 import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
 
 import javax.annotation.Nullable;
@@ -340,10 +341,13 @@ public final class FrontCodedIndexed implements Indexed<ByteBuffer>
 
 
   /**
-   * Performs an unsigned byte comparison of the first value in a bucket with the specified value. Note that this method
+   * Performs byte-by-byte comparison of the first value in a bucket with the specified value. Note that this method
    * MUST be prepared before calling, as it expects the length of the first value to have already been read externally,
    * and the buffer position to be at the start of the first bucket value. The final buffer position will be the
-   * 'shared prefix length' of the first value in the bucket and the value to compare
+   * 'shared prefix length' of the first value in the bucket and the value to compare.
+   *
+   * Bytes are compared using {@link StringUtils#compareUtf8UsingJavaStringOrdering(byte, byte)}. Therefore, when the
+   * values are UTF-8 encoded strings, the ordering is compatible with {@link String#compareTo(String)}.
    */
   private static int compareBucketFirstValue(ByteBuffer bucketBuffer, int length, ByteBuffer value)
   {
@@ -355,7 +359,7 @@ public final class FrontCodedIndexed implements Indexed<ByteBuffer>
     int sharedPrefix;
     int comparison = 0;
     for (sharedPrefix = 0; sharedPrefix < commonLength; sharedPrefix++) {
-      comparison = unsignedByteCompare(bucketBuffer.get(), value.get(sharedPrefix));
+      comparison = StringUtils.compareUtf8UsingJavaStringOrdering(bucketBuffer.get(), value.get(sharedPrefix));
       if (comparison != 0) {
         bucketBuffer.position(startOffset + sharedPrefix);
         break;
@@ -403,7 +407,10 @@ public final class FrontCodedIndexed implements Indexed<ByteBuffer>
         final int common = Math.min(fragmentLength, value.remaining() - prefixLength);
         int fragmentComparison = 0;
         for (int i = 0; i < common; i++) {
-          fragmentComparison = unsignedByteCompare(buffer.get(buffer.position() + i), value.get(prefixLength + i));
+          fragmentComparison = StringUtils.compareUtf8UsingJavaStringOrdering(
+              buffer.get(buffer.position() + i),
+              value.get(prefixLength + i)
+          );
           if (fragmentComparison != 0) {
             break;
           }
@@ -502,9 +509,4 @@ public final class FrontCodedIndexed implements Indexed<ByteBuffer>
     }
     return bucketBuffers;
   }
-
-  public static int unsignedByteCompare(byte b1, byte b2)
-  {
-    return (b1 & 0xFF) - (b2 & 0xFF);
-  }
 }
diff --git a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexedWriter.java b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexedWriter.java
index b6120d6c123..bcbe47db624 100644
--- a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexedWriter.java
+++ b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexedWriter.java
@@ -44,8 +44,8 @@ import java.nio.channels.WritableByteChannel;
  * the bucket is written entirely, and remaining values are stored as pairs of an integer which indicates how much
  * of the first byte array of the bucket to use as a prefix, followed by the remaining value bytes after the prefix.
  *
- * This is valid to use for any values which can be compared byte by byte with unsigned comparison. Otherwise, this
- * is not the collection for you.
+ * This writer is designed for use with UTF-8 encoded strings that are written in an order compatible with
+ * {@link String#compareTo(String)}.
  *
  * @see FrontCodedIndexed for additional details.
  */
@@ -99,7 +99,7 @@ public class FrontCodedIndexedWriter implements DictionaryWriter<byte[]>
   @Override
   public void write(@Nullable byte[] value) throws IOException
   {
-    if (prevObject != null && unsignedCompare(prevObject, value) >= 0) {
+    if (prevObject != null && compareNullableUtf8UsingJavaStringOrdering(prevObject, value) >= 0) {
       throw new ISE(
           "Values must be sorted and unique. Element [%s] with value [%s] is before or equivalent to [%s]",
           numWritten,
@@ -283,7 +283,7 @@ public class FrontCodedIndexedWriter implements DictionaryWriter<byte[]>
         // all other values must be partitioned into a prefix length and suffix bytes
         int prefixLength = 0;
         for (; prefixLength < first.length; prefixLength++) {
-          final int cmp = FrontCodedIndexed.unsignedByteCompare(first[prefixLength], next[prefixLength]);
+          final int cmp = StringUtils.compareUtf8UsingJavaStringOrdering(first[prefixLength], next[prefixLength]);
           if (cmp != 0) {
             break;
           }
@@ -325,7 +325,11 @@ public class FrontCodedIndexedWriter implements DictionaryWriter<byte[]>
     return buffer.position() - pos;
   }
 
-  public static int unsignedCompare(
+  /**
+   * Same as {@link StringUtils#compareUtf8UsingJavaStringOrdering(byte[], byte[])}, but accepts nulls. Nulls are
+   * sorted first.
+   */
+  private static int compareNullableUtf8UsingJavaStringOrdering(
       @Nullable final byte[] b1,
       @Nullable final byte[] b2
   )
@@ -337,15 +341,7 @@ public class FrontCodedIndexedWriter implements DictionaryWriter<byte[]>
     if (b2 == null) {
       return 1;
     }
-    final int commonLength = Math.min(b1.length, b2.length);
 
-    for (int i = 0; i < commonLength; i++) {
-      final int cmp = FrontCodedIndexed.unsignedByteCompare(b1[i], b2[i]);
-      if (cmp != 0) {
-        return cmp;
-      }
-    }
-
-    return Integer.compare(b1.length, b2.length);
+    return StringUtils.compareUtf8UsingJavaStringOrdering(b1, b2);
   }
 }
diff --git a/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java b/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java
index ff1c570b03a..62f50b0dc60 100644
--- a/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java
+++ b/processing/src/main/java/org/apache/druid/segment/data/GenericIndexed.java
@@ -100,13 +100,16 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
   private static final SerializerUtils SERIALIZER_UTILS = new SerializerUtils();
 
   /**
-   * An ObjectStrategy that returns a big-endian ByteBuffer pointing to the original data.
+   * An ObjectStrategy that returns a big-endian ByteBuffer pointing to original data.
    *
    * The returned ByteBuffer is a fresh read-only instance, so it is OK for callers to modify its position, limit, etc.
    * However, it does point to the original data, so callers must take care not to use it if the original data may
    * have been freed.
+   *
+   * The compare method of this instance uses {@link StringUtils#compareUtf8UsingJavaStringOrdering(byte[], byte[])}
+   * so that behavior is consistent with {@link #STRING_STRATEGY}.
    */
-  public static final ObjectStrategy<ByteBuffer> BYTE_BUFFER_STRATEGY = new ObjectStrategy<ByteBuffer>()
+  public static final ObjectStrategy<ByteBuffer> UTF8_STRATEGY = new ObjectStrategy<ByteBuffer>()
   {
     @Override
     public Class<ByteBuffer> getClazz()
@@ -140,7 +143,7 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
     @Override
     public int compare(@Nullable ByteBuffer o1, @Nullable ByteBuffer o2)
     {
-      return ByteBufferUtils.unsignedComparator().compare(o1, o2);
+      return ByteBufferUtils.utf8Comparator().compare(o1, o2);
     }
   };
 
@@ -541,7 +544,7 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
       }
 
       //noinspection ObjectEquality
-      final boolean isByteBufferStrategy = strategy == BYTE_BUFFER_STRATEGY;
+      final boolean isByteBufferStrategy = strategy == UTF8_STRATEGY;
 
       int minIndex = 0;
       int maxIndex = size - 1;
@@ -553,7 +556,7 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
         if (isByteBufferStrategy) {
           // Specialization avoids ByteBuffer allocation in strategy.fromByteBuffer.
           ByteBuffer currValue = getByteBuffer(currIndex);
-          comparison = ByteBufferUtils.compareByteBuffers(currValue, (ByteBuffer) value);
+          comparison = ByteBufferUtils.compareUtf8ByteBuffers(currValue, (ByteBuffer) value);
         } else {
           T currValue = get(currIndex);
           comparison = strategy.compare(currValue, value);
diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java
index 8594cab8cab..39d8cf081bd 100644
--- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java
+++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java
@@ -107,7 +107,7 @@ public class NestedDataColumnSupplier implements Supplier<ComplexColumn>
             // this cannot happen naturally right now since generic indexed is written in the 'legacy' format, but
             // this provides backwards compatibility should we switch at some point in the future to always
             // writing dictionaryVersion
-            dictionary = GenericIndexed.read(stringDictionaryBuffer, GenericIndexed.BYTE_BUFFER_STRATEGY, mapper);
+            dictionary = GenericIndexed.read(stringDictionaryBuffer, GenericIndexed.UTF8_STRATEGY, mapper);
             frontCodedDictionarySupplier = null;
           } else {
             throw new ISE("impossible, unknown encoding strategy id: %s", encodingId);
@@ -117,7 +117,7 @@ public class NestedDataColumnSupplier implements Supplier<ComplexColumn>
           // as dictionaryVersion is actually also the GenericIndexed version, so we reset start position so the
           // GenericIndexed version can be correctly read
           stringDictionaryBuffer.position(dictionaryStartPosition);
-          dictionary = GenericIndexed.read(stringDictionaryBuffer, GenericIndexed.BYTE_BUFFER_STRATEGY, mapper);
+          dictionary = GenericIndexed.read(stringDictionaryBuffer, GenericIndexed.UTF8_STRATEGY, mapper);
           frontCodedDictionarySupplier = null;
         }
         final ByteBuffer longDictionaryBuffer = loadInternalFile(
diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
index ef130f5428a..b1eef3307b6 100644
--- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
+++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
@@ -353,7 +353,7 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
 
         final GenericIndexed<ByteBuffer> rDictionaryUtf8 = GenericIndexed.read(
             buffer,
-            GenericIndexed.BYTE_BUFFER_STRATEGY,
+            GenericIndexed.UTF8_STRATEGY,
             builder.getFileMapper()
         );
 
diff --git a/processing/src/test/java/org/apache/druid/segment/data/FrontCodedIndexedTest.java b/processing/src/test/java/org/apache/druid/segment/data/FrontCodedIndexedTest.java
index f1bd478c954..c9e56135365 100644
--- a/processing/src/test/java/org/apache/druid/segment/data/FrontCodedIndexedTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/data/FrontCodedIndexedTest.java
@@ -241,7 +241,9 @@ public class FrontCodedIndexedTest extends InitializedNullHandlingTest
   public void testFrontCodedIndexedUnicodes() throws IOException
   {
     ByteBuffer buffer = ByteBuffer.allocate(1 << 12).order(order);
-    List<String> theList = ImmutableList.of("Győ-Moson-Sopron", "Győr");
+
+    // "\uD83D\uDCA9" and "（請參見已被刪除版本）" are a regression test for https://github.com/apache/druid/pull/13364
+    List<String> theList = ImmutableList.of("Győ-Moson-Sopron", "Győr", "\uD83D\uDCA9", "（請參見已被刪除版本）");
     fillBuffer(buffer, theList, 4);
 
     buffer.position(0);
diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java
index 268a4a2c971..eaf161036da 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java
@@ -121,7 +121,7 @@ public class ExtractionDimFilterTest extends InitializedNullHandlingTest
             GenericIndexed.fromIterable(Collections.singletonList("foo1"), GenericIndexed.STRING_STRATEGY),
             GenericIndexed.fromIterable(
                 Collections.singletonList(ByteBuffer.wrap(StringUtils.toUtf8("foo1"))),
-                GenericIndexed.BYTE_BUFFER_STRATEGY
+                GenericIndexed.UTF8_STRATEGY
             ),
             GenericIndexed.fromIterable(Collections.singletonList(foo1BitMap), serdeFactory.getObjectStrategy()),
             null
diff --git a/processing/src/test/java/org/apache/druid/segment/filter/PredicateValueMatcherFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/filter/PredicateValueMatcherFactoryTest.java
index e10d64241f2..f7525e3f9c4 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/PredicateValueMatcherFactoryTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/PredicateValueMatcherFactoryTest.java
@@ -75,7 +75,7 @@ public class PredicateValueMatcherFactoryTest extends InitializedNullHandlingTes
                 ByteBuffer.wrap(StringUtils.toUtf8("v2")),
                 ByteBuffer.wrap(StringUtils.toUtf8("v3"))
             ),
-            GenericIndexed.BYTE_BUFFER_STRATEGY
+            GenericIndexed.UTF8_STRATEGY
         ),
         null,
         () -> VSizeColumnarMultiInts.fromIterable(ImmutableList.of(VSizeColumnarInts.fromArray(new int[]{1}))),
@@ -98,7 +98,7 @@ public class PredicateValueMatcherFactoryTest extends InitializedNullHandlingTes
                 ByteBuffer.wrap(StringUtils.toUtf8("v2")),
                 ByteBuffer.wrap(StringUtils.toUtf8("v3"))
             ),
-            GenericIndexed.BYTE_BUFFER_STRATEGY
+            GenericIndexed.UTF8_STRATEGY
         ),
         null,
         () -> VSizeColumnarMultiInts.fromIterable(ImmutableList.of(VSizeColumnarInts.fromArray(new int[]{1}))),
diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ValueMatchersTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ValueMatchersTest.java
index cb5459226b2..98631d3dc9c 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/ValueMatchersTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/ValueMatchersTest.java
@@ -49,7 +49,7 @@ public class ValueMatchersTest extends InitializedNullHandlingTest
         GenericIndexed.fromIterable(ImmutableList.of("value"), GenericIndexed.STRING_STRATEGY),
         GenericIndexed.fromIterable(
             ImmutableList.of(ByteBuffer.wrap(StringUtils.toUtf8("value"))),
-            GenericIndexed.BYTE_BUFFER_STRATEGY
+            GenericIndexed.UTF8_STRATEGY
         ),
         () -> VSizeColumnarInts.fromArray(new int[]{0}),
         null,
@@ -62,7 +62,7 @@ public class ValueMatchersTest extends InitializedNullHandlingTest
                 ByteBuffer.wrap(StringUtils.toUtf8("value")),
                 ByteBuffer.wrap(StringUtils.toUtf8("value2"))
             ),
-            GenericIndexed.BYTE_BUFFER_STRATEGY
+            GenericIndexed.UTF8_STRATEGY
         ),
         () -> VSizeColumnarInts.fromArray(new int[]{0, 0, 1, 0, 1}),
         null,
@@ -72,7 +72,7 @@ public class ValueMatchersTest extends InitializedNullHandlingTest
         GenericIndexed.fromIterable(ImmutableList.of("value"), GenericIndexed.STRING_STRATEGY),
         GenericIndexed.fromIterable(
             ImmutableList.of(ByteBuffer.wrap(StringUtils.toUtf8("value"))),
-            GenericIndexed.BYTE_BUFFER_STRATEGY
+            GenericIndexed.UTF8_STRATEGY
         ),
         null,
         () -> VSizeColumnarMultiInts.fromIterable(
diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java
index 6d66464eee8..15b40b729f0 100644
--- a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java
@@ -127,7 +127,7 @@ public class NestedFieldLiteralColumnIndexSupplierTest extends InitializedNullHa
     doubleWriter.write(9.9);
     writeToBuffer(doubleBuffer, doubleWriter);
 
-    GenericIndexed<ByteBuffer> strings = GenericIndexed.read(stringBuffer, GenericIndexed.BYTE_BUFFER_STRATEGY);
+    GenericIndexed<ByteBuffer> strings = GenericIndexed.read(stringBuffer, GenericIndexed.UTF8_STRATEGY);
     globalStrings = () -> strings.singleThreaded();
     globalLongs = FixedIndexed.read(longBuffer, TypeStrategies.LONG, ByteOrder.nativeOrder(), Long.BYTES);
     globalDoubles = FixedIndexed.read(doubleBuffer, TypeStrategies.DOUBLE, ByteOrder.nativeOrder(), Double.BYTES);
diff --git a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java
index 36d5ba76a05..a9ab64a378a 100644
--- a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java
@@ -115,7 +115,7 @@ public class DictionaryEncodedStringIndexSupplierTest extends InitializedNullHan
     GenericIndexedWriter<ByteBuffer> byteBufferWriter = new GenericIndexedWriter<>(
         new OnHeapMemorySegmentWriteOutMedium(),
         "byteBuffers",
-        GenericIndexed.BYTE_BUFFER_STRATEGY
+        GenericIndexed.UTF8_STRATEGY
     );
 
     stringWriter.open();
@@ -167,7 +167,7 @@ public class DictionaryEncodedStringIndexSupplierTest extends InitializedNullHan
     return new DictionaryEncodedStringIndexSupplier(
         roaringFactory.getBitmapFactory(),
         GenericIndexed.read(stringBuffer, GenericIndexed.STRING_STRATEGY),
-        GenericIndexed.read(byteBuffer, GenericIndexed.BYTE_BUFFER_STRATEGY),
+        GenericIndexed.read(byteBuffer, GenericIndexed.UTF8_STRATEGY),
         bitmaps,
         null
     );