diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/GraphvizFormatter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/GraphvizFormatter.java index 9feb354f56a..a6c0e4bb1e5 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/GraphvizFormatter.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/GraphvizFormatter.java @@ -157,17 +157,14 @@ public class GraphvizFormatter { } private String formatHeader() { - StringBuilder sb = new StringBuilder(); - sb.append("digraph viterbi {\n"); - sb.append(" graph [ fontsize=30 labelloc=\"t\" label=\"\" splines=true overlap=false rankdir = \"LR\"];\n"); - //sb.append(" // A2 paper size\n"); - //sb.append(" size = \"34.4,16.5\";\n"); - //sb.append(" // try to fill paper\n"); - //sb.append(" ratio = fill;\n"); - sb.append(" edge [ fontname=\"" + FONT_NAME + "\" fontcolor=\"red\" color=\"#606060\" ]\n"); - sb.append(" node [ style=\"filled\" fillcolor=\"#e8e8f0\" shape=\"Mrecord\" fontname=\"" + FONT_NAME + "\" ]\n"); - - return sb.toString(); + return "digraph viterbi {\n" + + " graph [ fontsize=30 labelloc=\"t\" label=\"\" splines=true overlap=false rankdir = \"LR\"];\n" + + //sb.append(" // A2 paper size\n"); + //sb.append(" size = \"34.4,16.5\";\n"); + //sb.append(" // try to fill paper\n"); + //sb.append(" ratio = fill;\n"); + " edge [ fontname=\"" + FONT_NAME + "\" fontcolor=\"red\" color=\"#606060\" ]\n" + + " node [ style=\"filled\" fillcolor=\"#e8e8f0\" shape=\"Mrecord\" fontname=\"" + FONT_NAME + "\" ]\n"; } private String formatTrailer() { diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java index b0c26aae155..3f8769f018b 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java @@ -74,7 +74,6 @@ public class KoreanAnalyzer extends Analyzer { @Override protected TokenStream normalize(String fieldName, TokenStream in) { - TokenStream result = new LowerCaseFilter(in); - return result; + return new LowerCaseFilter(in); } } diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanNumberFilter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanNumberFilter.java index a953a210528..732aeb36e06 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanNumberFilter.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanNumberFilter.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.ko; import java.io.IOException; import java.math.BigDecimal; +import java.util.Arrays; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -98,9 +99,7 @@ public class KoreanNumberFilter extends TokenFilter { static { numerals = new char[0x10000]; - for (int i = 0; i < numerals.length; i++) { - numerals[i] = NO_NUMERAL; - } + Arrays.fill(numerals, NO_NUMERAL); numerals['영'] = 0; // 영 U+C601 0 numerals['일'] = 1; // 일 U+C77C 1 numerals['이'] = 2; // 이 U+C774 2 @@ -113,9 +112,7 @@ public class KoreanNumberFilter extends TokenFilter { numerals['구'] = 9; // 구 U+AD6C 9 exponents = new char[0x10000]; - for (int i = 0; i < exponents.length; i++) { - exponents[i] = 0; - } + Arrays.fill(exponents, (char) 0); exponents['십'] = 1; // 십 U+C2ED 10 exponents['백'] = 2; // 백 U+BC31 100 exponents['천'] = 3; // 천 U+CC9C 1,000 diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilter.java index 4fa75241a71..a5ab4d89ac3 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilter.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilter.java @@ -18,6 +18,7 @@ package org.apache.lucene.analysis.ko; import java.util.Arrays; +import java.util.HashSet; import java.util.Set; import java.util.stream.Collectors; @@ -36,7 +37,7 @@ public final class KoreanPartOfSpeechStopFilter extends FilteringTokenFilter { /** * Default list of tags to filter. */ - public static final Set DEFAULT_STOP_TAGS = Arrays.asList( + public static final Set DEFAULT_STOP_TAGS = new HashSet<>(Arrays.asList( POS.Tag.E, POS.Tag.IC, POS.Tag.J, @@ -55,7 +56,7 @@ public final class KoreanPartOfSpeechStopFilter extends FilteringTokenFilter { POS.Tag.UNA, POS.Tag.NA, POS.Tag.VSV - ).stream().collect(Collectors.toSet()); + )); /** * Create a new {@link KoreanPartOfSpeechStopFilter} with the default diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java index b408aa75820..3efccaf575e 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java @@ -340,7 +340,7 @@ public final class KoreanTokenizer extends Tokenizer { } - private void add(Dictionary dict, Position fromPosData, int wordPos, int endPos, int wordID, Type type) throws IOException { + private void add(Dictionary dict, Position fromPosData, int wordPos, int endPos, int wordID, Type type) { final POS.Tag leftPOS = dict.getLeftPOS(wordID); final int wordCost = dict.getWordCost(wordID); final int leftID = dict.getLeftId(wordID); @@ -533,15 +533,9 @@ public final class KoreanTokenizer extends Tokenizer { int userWordMaxPosAhead = -1; // Advances over each position (character): - while (true) { - - if (buffer.get(pos) == -1) { - // End - break; - } - + while (buffer.get(pos) != -1) { final Position posData = positions.get(pos); - final boolean isFrontier = positions.getNextPos() == pos+1; + final boolean isFrontier = positions.getNextPos() == pos + 1; if (posData.count == 0) { // No arcs arrive here; move to next position: @@ -585,9 +579,9 @@ public final class KoreanTokenizer extends Tokenizer { int leastIDX = -1; int leastCost = Integer.MAX_VALUE; Position leastPosData = null; - for(int pos2=pos;pos2 clazz, String suffix) throws IOException { + public static InputStream getClassResource(Class clazz, String suffix) throws IOException { final InputStream is = clazz.getResourceAsStream(clazz.getSimpleName() + suffix); if (is == null) { throw new FileNotFoundException("Not in classpath: " + clazz.getName().replace('.', '/') + suffix); @@ -236,7 +230,7 @@ public abstract class BinaryDictionary implements Dictionary { int offset = wordId + 6; boolean hasSinglePos = hasSinglePOS(wordId); if (hasSinglePos == false) { - offset ++; // skip rightPOS + offset++; // skip rightPOS } int length = buffer.get(offset++); if (length == 0) { @@ -264,7 +258,7 @@ public abstract class BinaryDictionary implements Dictionary { private String readString(int offset) { int strOffset = offset; int len = buffer.get(strOffset++); - char text[] = new char[len]; + char[] text = new char[len]; for (int i = 0; i < len; i++) { text[i] = buffer.getChar(strOffset + (i<<1)); } diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CharacterDefinition.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CharacterDefinition.java index ac5230c844d..59d4dacd1fb 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CharacterDefinition.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CharacterDefinition.java @@ -38,7 +38,7 @@ public final class CharacterDefinition { // only used internally for lookup: enum CharacterClass { - NGRAM, DEFAULT, SPACE, SYMBOL, NUMERIC, ALPHA, CYRILLIC, GREEK, HIRAGANA, KATAKANA, KANJI, HANGUL, HANJA, HANJANUMERIC; + NGRAM, DEFAULT, SPACE, SYMBOL, NUMERIC, ALPHA, CYRILLIC, GREEK, HIRAGANA, KATAKANA, KANJI, HANGUL, HANJA, HANJANUMERIC } private final byte[] characterCategoryMap = new byte[0x10000]; @@ -108,11 +108,7 @@ public final class CharacterDefinition { } public boolean hasCoda(char ch){ - if (((ch - 0xAC00) % 0x001C) == 0) { - return false; - } else { - return true; - } + return ((ch - 0xAC00) % 0x001C) != 0; } public static byte lookupCharacterClass(String characterClassName) { diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java index 95d0e8b6111..36cbe1519d5 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java @@ -40,7 +40,7 @@ public final class ConnectionCosts { private ConnectionCosts() throws IOException { InputStream is = null; - ByteBuffer buffer = null; + ByteBuffer buffer; boolean success = false; try { is = BinaryDictionary.getClassResource(getClass(), FILENAME_SUFFIX); diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoFST.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoFST.java index 7f9bec6265d..a79b35b4dfe 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoFST.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoFST.java @@ -28,7 +28,7 @@ public final class TokenInfoFST { private final FST fst; private final int cacheCeiling; - private final FST.Arc rootCache[]; + private final FST.Arc[] rootCache; public final Long NO_OUTPUT; @@ -41,7 +41,7 @@ public final class TokenInfoFST { @SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc[] cacheRootArcs() throws IOException { - FST.Arc rootCache[] = new FST.Arc[1+(cacheCeiling-0xAC00)]; + FST.Arc[] rootCache = new FST.Arc[1+(cacheCeiling-0xAC00)]; FST.Arc firstArc = new FST.Arc<>(); fst.getFirstArc(firstArc); FST.Arc arc = new FST.Arc<>(); diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java index e04d133335e..186990e0b57 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java @@ -37,26 +37,26 @@ public final class UserDictionary implements Dictionary { // text -> wordID private final TokenInfoFST fst; - public static final int WORD_COST = -100000; + private static final int WORD_COST = -100000; // NNG left - public static final short LEFT_ID = 1781; + private static final short LEFT_ID = 1781; // NNG right - public static final short RIGHT_ID = 3533; + private static final short RIGHT_ID = 3533; // NNG right with hangul and a coda on the last char - public static final short RIGHT_ID_T = 3535; + private static final short RIGHT_ID_T = 3535; // NNG right with hangul and no coda on the last char - public static final short RIGHT_ID_F = 3534; + private static final short RIGHT_ID_F = 3534; // length, length... indexed by compound ID or null for simple noun - private final int segmentations[][]; + private final int[][] segmentations; private final short[] rightIds; public static UserDictionary open(Reader reader) throws IOException { BufferedReader br = new BufferedReader(reader); - String line = null; + String line; List entries = new ArrayList<>(); // text + optional segmentations @@ -127,7 +127,7 @@ public final class UserDictionary implements Dictionary { scratch.grow(token.length()); scratch.setLength(token.length()); for (int i = 0; i < token.length(); i++) { - scratch.setIntAt(i, (int) token.charAt(i)); + scratch.setIntAt(i, token.charAt(i)); } fstBuilder.add(scratch.get(), ord); lastToken = token; diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/BinaryDictionaryWriter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/BinaryDictionaryWriter.java index 6a19b1b56a8..fec02dbcae0 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/BinaryDictionaryWriter.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/BinaryDictionaryWriter.java @@ -117,8 +117,8 @@ abstract class BinaryDictionaryWriter { boolean hasSinglePOS = (leftPOS == rightPOS); if (posType != POS.Type.MORPHEME && expression.length() > 0) { String[] exprTokens = expression.split("\\+"); - for (int i = 0; i < exprTokens.length; i++) { - String[] tokenSplit = exprTokens[i].split("/"); + for (String exprToken : exprTokens) { + String[] tokenSplit = exprToken.split("/"); assert tokenSplit.length == 3; String surfaceForm = tokenSplit[0].trim(); if (surfaceForm.isEmpty() == false) { diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/CSVUtil.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/CSVUtil.java index f911b5585c2..27380e94bd6 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/CSVUtil.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/CSVUtil.java @@ -70,7 +70,7 @@ public final class CSVUtil { return new String[0]; } - return result.toArray(new String[result.size()]); + return result.toArray(new String[0]); } private static String unQuoteUnEscape(String original) { @@ -84,7 +84,7 @@ public final class CSVUtil { } // Unescape - if (result.indexOf(ESCAPED_QUOTE) >= 0) { + if (result.contains(ESCAPED_QUOTE)) { result = result.replace(ESCAPED_QUOTE, "\""); } } diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java index e4c288b9b2c..4f4f0b7be01 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java @@ -114,7 +114,7 @@ class TokenInfoDictionaryBuilder { scratch.grow(surfaceForm.length()); scratch.setLength(surfaceForm.length()); for (int i = 0; i < surfaceForm.length(); i++) { - scratch.setIntAt(i, (int) surfaceForm.charAt(i)); + scratch.setIntAt(i, surfaceForm.charAt(i)); } fstBuilder.add(scratch.get(), ord); } diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java index cc1ee00e725..8e7cd7b9e48 100644 --- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java +++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.util.ResourceLoader; /** Fake resource loader for tests: works if you want to fake reading a single file */ class StringMockResourceLoader implements ResourceLoader { - String text; + private String text; public StringMockResourceLoader(String text) { this.text = text; diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanAnalyzer.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanAnalyzer.java index 2ba2f37c8fe..d82409fc9c0 100644 --- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanAnalyzer.java +++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanAnalyzer.java @@ -18,9 +18,9 @@ package org.apache.lucene.analysis.ko; import java.io.IOException; import java.util.Arrays; +import java.util.HashSet; import java.util.Random; import java.util.Set; -import java.util.stream.Collectors; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; @@ -41,7 +41,7 @@ public class TestKoreanAnalyzer extends BaseTokenStreamTestCase { } public void testStopTags() throws IOException { - Set stopTags = Arrays.asList(POS.Tag.NNP, POS.Tag.NNG).stream().collect(Collectors.toSet()); + Set stopTags = new HashSet<>(Arrays.asList(POS.Tag.NNP, POS.Tag.NNG)); Analyzer a = new KoreanAnalyzer(null, KoreanTokenizer.DecompoundMode.DISCARD, stopTags, false); assertAnalyzesTo(a, "한국은 대단한 나라입니다.", new String[]{"은", "대단", "하", "ᆫ", "이", "ᄇ니다"}, diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanNumberFilterFactory.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanNumberFilterFactory.java index d5499330b34..8564521d701 100644 --- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanNumberFilterFactory.java +++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanNumberFilterFactory.java @@ -50,12 +50,12 @@ public class TestKoreanNumberFilterFactory extends BaseTokenStreamTestCase { } /** Test that bogus arguments result in exception */ - public void testBogusArguments() throws Exception { - IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { - new KoreanNumberFilterFactory(new HashMap() {{ - put("bogusArg", "bogusValue"); - }}); - }); + public void testBogusArguments() { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> + new KoreanNumberFilterFactory(new HashMap() {{ + put("bogusArg", "bogusValue"); + }}) + ); assertTrue(expected.getMessage().contains("Unknown parameters")); } } diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanPartOfSpeechStopFilterFactory.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanPartOfSpeechStopFilterFactory.java index 5486f3fc277..72e5c58ec13 100644 --- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanPartOfSpeechStopFilterFactory.java +++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanPartOfSpeechStopFilterFactory.java @@ -32,7 +32,7 @@ import org.apache.lucene.util.Version; */ public class TestKoreanPartOfSpeechStopFilterFactory extends BaseTokenStreamTestCase { public void testStopTags() throws IOException { - KoreanTokenizerFactory tokenizerFactory = new KoreanTokenizerFactory(new HashMap()); + KoreanTokenizerFactory tokenizerFactory = new KoreanTokenizerFactory(new HashMap<>()); tokenizerFactory.inform(new StringMockResourceLoader("")); TokenStream ts = tokenizerFactory.create(); ((Tokenizer)ts).setReader(new StringReader(" 한국은 대단한 나라입니다.")); @@ -47,13 +47,13 @@ public class TestKoreanPartOfSpeechStopFilterFactory extends BaseTokenStreamTest } /** Test that bogus arguments result in exception */ - public void testBogusArguments() throws Exception { - IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { - new KoreanPartOfSpeechStopFilterFactory(new HashMap() {{ - put("luceneMatchVersion", Version.LATEST.toString()); - put("bogusArg", "bogusValue"); - }}); - }); + public void testBogusArguments() { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> + new KoreanPartOfSpeechStopFilterFactory(new HashMap() {{ + put("luceneMatchVersion", Version.LATEST.toString()); + put("bogusArg", "bogusValue"); + }}) + ); assertTrue(expected.getMessage().contains("Unknown parameters")); } } diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilterFactory.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilterFactory.java index f058a44d0df..ca9a8ea43d6 100644 --- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilterFactory.java +++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilterFactory.java @@ -31,8 +31,8 @@ public class TestKoreanReadingFormFilterFactory extends BaseTokenStreamTestCase public void testReadings() throws IOException { KoreanTokenizerFactory tokenizerFactory = new KoreanTokenizerFactory(new HashMap<>()); tokenizerFactory.inform(new StringMockResourceLoader("")); - TokenStream tokenStream = tokenizerFactory.create(); - ((Tokenizer)tokenStream).setReader(new StringReader("丞相")); + Tokenizer tokenStream = tokenizerFactory.create(); + tokenStream.setReader(new StringReader("丞相")); KoreanReadingFormFilterFactory filterFactory = new KoreanReadingFormFilterFactory(new HashMap<>()); assertTokenStreamContents(filterFactory.create(tokenStream), new String[] { "승상" } @@ -40,12 +40,12 @@ public class TestKoreanReadingFormFilterFactory extends BaseTokenStreamTestCase } /** Test that bogus arguments result in exception */ - public void testBogusArguments() throws Exception { - IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { - new KoreanReadingFormFilterFactory(new HashMap() {{ - put("bogusArg", "bogusValue"); - }}); - }); + public void testBogusArguments() { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> + new KoreanReadingFormFilterFactory(new HashMap() {{ + put("bogusArg", "bogusValue"); + }}) + ); assertTrue(expected.getMessage().contains("Unknown parameters")); } } diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizerFactory.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizerFactory.java index 9ed6566a8e8..132f244ff2f 100644 --- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizerFactory.java +++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizerFactory.java @@ -33,8 +33,8 @@ public class TestKoreanTokenizerFactory extends BaseTokenStreamTestCase { public void testSimple() throws IOException { KoreanTokenizerFactory factory = new KoreanTokenizerFactory(Collections.emptyMap()); factory.inform(new StringMockResourceLoader("")); - TokenStream ts = factory.create(newAttributeFactory()); - ((Tokenizer)ts).setReader(new StringReader("안녕하세요")); + Tokenizer ts = factory.create(newAttributeFactory()); + ts.setReader(new StringReader("안녕하세요")); assertTokenStreamContents(ts, new String[] { "안녕", "하", "시", "어요" }, new int[] { 0, 2, 3, 3 }, @@ -50,8 +50,8 @@ public class TestKoreanTokenizerFactory extends BaseTokenStreamTestCase { args.put("decompoundMode", "discard"); KoreanTokenizerFactory factory = new KoreanTokenizerFactory(args); factory.inform(new StringMockResourceLoader("")); - TokenStream ts = factory.create(newAttributeFactory()); - ((Tokenizer)ts).setReader(new StringReader("갠지스강")); + Tokenizer ts = factory.create(newAttributeFactory()); + ts.setReader(new StringReader("갠지스강")); assertTokenStreamContents(ts, new String[] { "갠지스", "강" } ); @@ -62,8 +62,8 @@ public class TestKoreanTokenizerFactory extends BaseTokenStreamTestCase { args.put("decompoundMode", "none"); KoreanTokenizerFactory factory = new KoreanTokenizerFactory(args); factory.inform(new StringMockResourceLoader("")); - TokenStream ts = factory.create(newAttributeFactory()); - ((Tokenizer)ts).setReader(new StringReader("갠지스강")); + Tokenizer ts = factory.create(newAttributeFactory()); + ts.setReader(new StringReader("갠지스강")); assertTokenStreamContents(ts, new String[] { "갠지스강" } ); @@ -74,8 +74,8 @@ public class TestKoreanTokenizerFactory extends BaseTokenStreamTestCase { args.put("decompoundMode", "mixed"); KoreanTokenizerFactory factory = new KoreanTokenizerFactory(args); factory.inform(new StringMockResourceLoader("")); - TokenStream ts = factory.create(newAttributeFactory()); - ((Tokenizer)ts).setReader(new StringReader("갠지스강")); + Tokenizer ts = factory.create(newAttributeFactory()); + ts.setReader(new StringReader("갠지스강")); assertTokenStreamContents(ts, new String[] { "갠지스강", "갠지스", "강" } ); @@ -94,8 +94,8 @@ public class TestKoreanTokenizerFactory extends BaseTokenStreamTestCase { args.put("userDictionary", "userdict.txt"); KoreanTokenizerFactory factory = new KoreanTokenizerFactory(args); factory.inform(new StringMockResourceLoader(userDict)); - TokenStream ts = factory.create(newAttributeFactory()); - ((Tokenizer)ts).setReader(new StringReader("세종시")); + Tokenizer ts = factory.create(newAttributeFactory()); + ts.setReader(new StringReader("세종시")); assertTokenStreamContents(ts, new String[] { "세종", "시" } ); @@ -109,8 +109,8 @@ public class TestKoreanTokenizerFactory extends BaseTokenStreamTestCase { args.put("discardPunctuation", "true"); KoreanTokenizerFactory factory = new KoreanTokenizerFactory(args); factory.inform(new StringMockResourceLoader("")); - TokenStream ts = factory.create(newAttributeFactory()); - ((Tokenizer)ts).setReader(new StringReader("10.1 인치 모니터")); + Tokenizer ts = factory.create(newAttributeFactory()); + ts.setReader(new StringReader("10.1 인치 모니터")); assertTokenStreamContents(ts, new String[] { "10", "1", "인치", "모니터" } ); @@ -124,20 +124,20 @@ public class TestKoreanTokenizerFactory extends BaseTokenStreamTestCase { args.put("discardPunctuation", "false"); KoreanTokenizerFactory factory = new KoreanTokenizerFactory(args); factory.inform(new StringMockResourceLoader("")); - TokenStream ts = factory.create(newAttributeFactory()); - ((Tokenizer)ts).setReader(new StringReader("10.1 인치 모니터")); + Tokenizer ts = factory.create(newAttributeFactory()); + ts.setReader(new StringReader("10.1 인치 모니터")); assertTokenStreamContents(ts, new String[] { "10", ".", "1", " ", "인치", " ", "모니터" } ); } /** Test that bogus arguments result in exception */ - public void testBogusArguments() throws Exception { - IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { - new KoreanTokenizerFactory(new HashMap() {{ - put("bogusArg", "bogusValue"); - }}); - }); + public void testBogusArguments() { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> + new KoreanTokenizerFactory(new HashMap() {{ + put("bogusArg", "bogusValue"); + }}) + ); assertTrue(expected.getMessage().contains("Unknown parameters")); } } diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryTest.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryTest.java index 9bbf258ab45..bbbc07ee8a5 100644 --- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryTest.java +++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryTest.java @@ -136,11 +136,11 @@ public class TokenInfoDictionaryTest extends LuceneTestCase { POS.Tag rightPOS = tid.getRightPOS(wordId); if (type == POS.Type.MORPHEME) { - assertTrue(leftPOS == rightPOS); + assertSame(leftPOS, rightPOS); String reading = tid.getReading(wordId); boolean isHanja = charDef.isHanja(surfaceForm.charAt(0)); if (isHanja) { - assertTrue(reading != null); + assertNotNull(reading); for (int j = 0; j < reading.length(); j++) { assertTrue(charDef.isHangul(reading.charAt(j))); } @@ -150,7 +150,7 @@ public class TokenInfoDictionaryTest extends LuceneTestCase { } } else { if (type == POS.Type.COMPOUND) { - assertTrue(leftPOS == rightPOS); + assertSame(leftPOS, rightPOS); assertTrue(leftPOS == POS.Tag.NNG || rightPOS == POS.Tag.NNP); } Dictionary.Morpheme[] decompound = tid.getMorphemes(wordId, chars, 0, chars.length); diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/UserDictionaryTest.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/UserDictionaryTest.java index b008cf39ca6..2f12ba41800 100644 --- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/UserDictionaryTest.java +++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/UserDictionaryTest.java @@ -41,7 +41,8 @@ public class UserDictionaryTest extends LuceneTestCase { assertNull(dictionary.getMorphemes(wordIds.get(0), sArray, 0, s.length())); Dictionary.Morpheme[] decompound = dictionary.getMorphemes(wordIds.get(1), sArray, 0, s.length()); - assertTrue(decompound.length == 2); + assertNotNull(decompound); + assertEquals(2, decompound.length); assertEquals(decompound[0].posTag, POS.Tag.NNG); assertEquals(decompound[0].surfaceForm, "세종"); assertEquals(decompound[1].posTag, POS.Tag.NNG); @@ -55,7 +56,7 @@ public class UserDictionaryTest extends LuceneTestCase { } @Test - public void testRead() throws IOException { + public void testRead() { UserDictionary dictionary = TestKoreanTokenizer.readDict(); assertNotNull(dictionary); }