diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java index 1aec333aabc..2cd4cca3031 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java @@ -34,7 +34,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.analysis.ja.dict.BinaryDictionary; -public abstract class BinaryDictionaryWriter { +abstract class BinaryDictionaryWriter { private final static int ID_LIMIT = 8192; private final Class implClazz; diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CharacterDefinitionWriter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CharacterDefinitionWriter.java index 60edabed0d9..cf5a8311a95 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CharacterDefinitionWriter.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CharacterDefinitionWriter.java @@ -30,7 +30,7 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.OutputStreamDataOutput; -public final class CharacterDefinitionWriter { +final class CharacterDefinitionWriter { private final byte[] characterCategoryMap = new byte[0x10000]; diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java index d1bc8467110..38063c34954 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java @@ -24,7 +24,7 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; -public class ConnectionCostsBuilder { +class ConnectionCostsBuilder { private ConnectionCostsBuilder() { } diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java index a629fffa3b4..746a4f74144 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java @@ -29,7 +29,7 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.OutputStreamDataOutput; -public final class ConnectionCostsWriter { +final class ConnectionCostsWriter { private final short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter. private final int forwardSize; diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/DictionaryBuilder.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/DictionaryBuilder.java index bdf83683ea4..373ce0970db 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/DictionaryBuilder.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/DictionaryBuilder.java @@ -20,10 +20,20 @@ package org.apache.lucene.analysis.ja.util; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Locale; +/** + * Tool to build dictionaries. + */ public class DictionaryBuilder { - public enum DictionaryFormat { IPADIC, UNIDIC } + /** Format of the dictionary. */ + public enum DictionaryFormat { + /** IPADIC format */ + IPADIC, + /** UNIDIC format */ + UNIDIC + } private DictionaryBuilder() { } @@ -42,7 +52,7 @@ public class DictionaryBuilder { } public static void main(String[] args) throws IOException { - DictionaryFormat format = DictionaryFormat.valueOf(args[0].toUpperCase()); + DictionaryFormat format = DictionaryFormat.valueOf(args[0].toUpperCase(Locale.ROOT)); String inputDirName = args[1]; String outputDirName = args[2]; String inputEncoding = args[3]; diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java index bbed37b0692..3274fefb15f 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java @@ -37,7 +37,7 @@ import org.apache.lucene.util.fst.PositiveIntOutputs; /** */ -public class TokenInfoDictionaryBuilder { +class TokenInfoDictionaryBuilder { private final String encoding; private final Normalizer.Form normalForm; diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryWriter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryWriter.java index 81cad4f8aba..8487bd725cc 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryWriter.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryWriter.java @@ -25,7 +25,7 @@ import java.util.Objects; import org.apache.lucene.analysis.ja.dict.TokenInfoDictionary; import org.apache.lucene.util.fst.FST; -public class TokenInfoDictionaryWriter extends BinaryDictionaryWriter { +class TokenInfoDictionaryWriter extends BinaryDictionaryWriter { private FST fst; TokenInfoDictionaryWriter(int size) { diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryBuilder.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryBuilder.java index c3abd456038..4316add3a6c 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryBuilder.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryBuilder.java @@ -29,7 +29,7 @@ import java.util.List; import org.apache.lucene.analysis.ja.dict.CharacterDefinition; -public class UnknownDictionaryBuilder { +class UnknownDictionaryBuilder { private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*"; private final String encoding; diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryWriter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryWriter.java index 6809825629d..1144d23f483 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryWriter.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryWriter.java @@ -19,12 +19,11 @@ package org.apache.lucene.analysis.ja.util; import java.io.IOException; import java.nio.file.Path; -import java.nio.file.Paths; import org.apache.lucene.analysis.ja.dict.CharacterDefinition; import org.apache.lucene.analysis.ja.dict.UnknownDictionary; -public class UnknownDictionaryWriter extends BinaryDictionaryWriter { +class UnknownDictionaryWriter extends BinaryDictionaryWriter { private final CharacterDefinitionWriter characterDefinition = new CharacterDefinitionWriter(); public UnknownDictionaryWriter(int size) { diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryTest.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryTest.java index 69328d85813..4b9d6bd8622 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryTest.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryTest.java @@ -23,10 +23,9 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import org.apache.lucene.analysis.ja.util.DictionaryBuilder; import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat; import org.apache.lucene.analysis.ja.util.ToStringUtil; -import org.apache.lucene.analysis.ja.util.TokenInfoDictionaryBuilder; -import org.apache.lucene.analysis.ja.util.TokenInfoDictionaryWriter; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.LuceneTestCase; @@ -68,9 +67,13 @@ public class TokenInfoDictionaryTest extends LuceneTestCase { printer.println(entry); } } - TokenInfoDictionaryBuilder builder = new TokenInfoDictionaryBuilder(DictionaryFormat.IPADIC, "utf-8", true); - TokenInfoDictionaryWriter writer = builder.build(dir); - writer.write(dir); + Files.createFile(dir.resolve("unk.def")); + Files.createFile(dir.resolve("char.def")); + try (OutputStream out = Files.newOutputStream(dir.resolve("matrix.def")); + PrintWriter printer = new PrintWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8))) { + printer.println("1 1"); + } + DictionaryBuilder.build(DictionaryFormat.IPADIC, dir, dir, "utf-8", true); String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/'); // We must also load the other files (in BinaryDictionary) from the correct path return new TokenInfoDictionary(ResourceScheme.FILE, dir.resolve(dictionaryPath).toString()); diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/UnknownDictionaryTest.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/util/UnknownDictionaryTest.java similarity index 93% rename from lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/UnknownDictionaryTest.java rename to lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/util/UnknownDictionaryTest.java index 6330f41cea8..9a0fbc9e76e 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/UnknownDictionaryTest.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/util/UnknownDictionaryTest.java @@ -14,11 +14,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.analysis.ja.dict; +package org.apache.lucene.analysis.ja.util; - -import org.apache.lucene.analysis.ja.util.CSVUtil; -import org.apache.lucene.analysis.ja.util.UnknownDictionaryWriter; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test;