LUCENE-8476: Optimizations in UserDictionary (KoreanAnalyzer)

Signed-off-by: Namgyu Kim <kng0828@gmail.com>
Signed-off-by: Jim Ferenczi <jimczi@apache.org>
This commit is contained in:
Namgyu Kim 2018-09-05 00:12:10 +09:00 committed by Jim Ferenczi
parent 3b1a335fb3
commit 97ccbc734b
2 changed files with 5 additions and 4 deletions

View File

@ -316,6 +316,9 @@ Other:
* LUCENE-765: Improved org.apache.lucene.index javadocs. (Mike Sokolov) * LUCENE-765: Improved org.apache.lucene.index javadocs. (Mike Sokolov)
* LUCENE-8476: Remove redundant nullity check and switch to optimized List.sort in the
Korean's user dictionary. (Namgyu Kim)
======================= Lucene 7.4.1 ======================= ======================= Lucene 7.4.1 =======================
Bug Fixes: Bug Fixes:

View File

@ -20,7 +20,6 @@ import java.io.BufferedReader;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
@ -81,8 +80,7 @@ public final class UserDictionary implements Dictionary {
private UserDictionary(List<String> entries) throws IOException { private UserDictionary(List<String> entries) throws IOException {
final CharacterDefinition charDef = CharacterDefinition.getInstance(); final CharacterDefinition charDef = CharacterDefinition.getInstance();
Collections.sort(entries, entries.sort(Comparator.comparing(e -> e.split("\\s+")[0]));
Comparator.comparing(e -> e.split("\\s+")[0]));
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(); PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput); Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput);
@ -95,7 +93,7 @@ public final class UserDictionary implements Dictionary {
for (String entry : entries) { for (String entry : entries) {
String[] splits = entry.split("\\s+"); String[] splits = entry.split("\\s+");
String token = splits[0]; String token = splits[0];
if (lastToken != null && token.equals(lastToken)) { if (token.equals(lastToken)) {
continue; continue;
} }
char lastChar = entry.charAt(entry.length()-1); char lastChar = entry.charAt(entry.length()-1);