From 8147e491ce3905bb3543f2c7e34a4ecb60382b49 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Fri, 17 Jan 2020 13:35:05 -0500 Subject: [PATCH] LUCENE-9053: improve FST's package-info.java comment to clarify required (Unicode code point) sort order for FST.Builder --- .../src/java/org/apache/lucene/util/fst/package-info.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java b/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java index d9845861528..8ab7b66b8ac 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java @@ -34,7 +34,9 @@ *

* FST Construction example: *

- *     // Input values (keys). These must be provided to Builder in Unicode sorted order!
+ *     // Input values (keys). These must be provided to Builder in Unicode code point (UTF8 or UTF32) sorted order.
+ *     // Note that sorting by Java's String.compareTo, which is UTF16 sorted order, is not correct and can lead to
+ *     // exceptions while building the FST:
  *     String inputValues[] = {"cat", "dog", "dogs"};
  *     long outputValues[] = {5, 7, 12};
  *