LUCENE-8476: Optimizations in UserDictionary (KoreanAnalyzer)

Signed-off-by: Namgyu Kim <kng0828@gmail.com> Signed-off-by: Jim Ferenczi <jimczi@apache.org>
2018-09-05 00:12:10 +09:00 · 2018-09-05 00:12:10 +09:00 · 97ccbc734b
parent 3b1a335fb3
commit 97ccbc734b
2 changed files with 5 additions and 4 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -316,6 +316,9 @@ Other:

 * LUCENE-765: Improved org.apache.lucene.index javadocs. (Mike Sokolov)

+* LUCENE-8476: Remove redundant nullity check and switch to optimized List.sort in the
+  Korean's user dictionary. (Namgyu Kim)
+
 ======================= Lucene 7.4.1 =======================

 Bug Fixes:
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
@ -20,7 +20,6 @@ import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;

@ -81,8 +80,7 @@ public final class UserDictionary implements Dictionary {

  private UserDictionary(List<String> entries) throws IOException {
    final CharacterDefinition charDef = CharacterDefinition.getInstance();
-    Collections.sort(entries,
-        Comparator.comparing(e -> e.split("\\s+")[0]));
+    entries.sort(Comparator.comparing(e -> e.split("\\s+")[0]));

    PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
    Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput);
@ -95,7 +93,7 @@ public final class UserDictionary implements Dictionary {
    for (String entry : entries) {
      String[] splits = entry.split("\\s+");
      String token = splits[0];
-      if (lastToken != null && token.equals(lastToken)) {
+      if (token.equals(lastToken)) {
        continue;
      }
      char lastChar = entry.charAt(entry.length()-1);