LUCENE-8476: Optimizations in UserDictionary (KoreanAnalyzer)

Signed-off-by: Namgyu Kim <kng0828@gmail.com>
Signed-off-by: Jim Ferenczi <jimczi@apache.org>
This commit is contained in:
Namgyu Kim 2018-09-05 00:12:10 +09:00 committed by Jim Ferenczi
parent 3b1a335fb3
commit 97ccbc734b
2 changed files with 5 additions and 4 deletions

View File

@ -316,6 +316,9 @@ Other:
* LUCENE-765: Improved org.apache.lucene.index javadocs. (Mike Sokolov)
* LUCENE-8476: Remove redundant nullity check and switch to optimized List.sort in the
Korean's user dictionary. (Namgyu Kim)
======================= Lucene 7.4.1 =======================
Bug Fixes:

View File

@ -20,7 +20,6 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
@ -81,8 +80,7 @@ public final class UserDictionary implements Dictionary {
private UserDictionary(List<String> entries) throws IOException {
final CharacterDefinition charDef = CharacterDefinition.getInstance();
Collections.sort(entries,
Comparator.comparing(e -> e.split("\\s+")[0]));
entries.sort(Comparator.comparing(e -> e.split("\\s+")[0]));
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput);
@ -95,7 +93,7 @@ public final class UserDictionary implements Dictionary {
for (String entry : entries) {
String[] splits = entry.split("\\s+");
String token = splits[0];
if (lastToken != null && token.equals(lastToken)) {
if (token.equals(lastToken)) {
continue;
}
char lastChar = entry.charAt(entry.length()-1);