mirror of https://github.com/apache/lucene.git
LUCENE-8863: enhance Kuromoji DictionaryBuilder tool
added tests enabled ids up to 8191 support loading custom system dictionary from filesystem or classpath
This commit is contained in:
parent
df3011a539
commit
4502065f03
|
@ -136,8 +136,8 @@
|
||||||
</compile>
|
</compile>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="test-tools" depends="compile-tools-tests">
|
<target name="test-tools" depends="install-junit4-taskdef, compile-tools-tests">
|
||||||
<test-macro dataDir="src/tools/test" junit.classpath="tools.test.classpath"/>
|
<test-macro testsDir="${build.dir}/classes/tools-test" workDir="src/tools/test" junit.classpath="tools.test.classpath"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="compile-test" depends="module-build.compile-test, compile-tools-tests"/>
|
<target name="compile-test" depends="module-build.compile-test, compile-tools-tests"/>
|
||||||
|
|
|
@ -23,6 +23,8 @@ import java.io.IOException;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Paths;
|
||||||
import java.nio.channels.Channels;
|
import java.nio.channels.Channels;
|
||||||
import java.nio.channels.ReadableByteChannel;
|
import java.nio.channels.ReadableByteChannel;
|
||||||
|
|
||||||
|
@ -37,6 +39,10 @@ import org.apache.lucene.util.IOUtils;
|
||||||
*/
|
*/
|
||||||
public abstract class BinaryDictionary implements Dictionary {
|
public abstract class BinaryDictionary implements Dictionary {
|
||||||
|
|
||||||
|
enum ResourceScheme {
|
||||||
|
CLASSPATH, FILE
|
||||||
|
}
|
||||||
|
|
||||||
public static final String DICT_FILENAME_SUFFIX = "$buffer.dat";
|
public static final String DICT_FILENAME_SUFFIX = "$buffer.dat";
|
||||||
public static final String TARGETMAP_FILENAME_SUFFIX = "$targetMap.dat";
|
public static final String TARGETMAP_FILENAME_SUFFIX = "$targetMap.dat";
|
||||||
public static final String POSDICT_FILENAME_SUFFIX = "$posDict.dat";
|
public static final String POSDICT_FILENAME_SUFFIX = "$posDict.dat";
|
||||||
|
@ -46,6 +52,8 @@ public abstract class BinaryDictionary implements Dictionary {
|
||||||
public static final String POSDICT_HEADER = "kuromoji_dict_pos";
|
public static final String POSDICT_HEADER = "kuromoji_dict_pos";
|
||||||
public static final int VERSION = 1;
|
public static final int VERSION = 1;
|
||||||
|
|
||||||
|
private final ResourceScheme resourceScheme;
|
||||||
|
private final String resourcePath;
|
||||||
private final ByteBuffer buffer;
|
private final ByteBuffer buffer;
|
||||||
private final int[] targetMapOffsets, targetMap;
|
private final int[] targetMapOffsets, targetMap;
|
||||||
private final String[] posDict;
|
private final String[] posDict;
|
||||||
|
@ -53,6 +61,24 @@ public abstract class BinaryDictionary implements Dictionary {
|
||||||
private final String[] inflFormDict;
|
private final String[] inflFormDict;
|
||||||
|
|
||||||
protected BinaryDictionary() throws IOException {
|
protected BinaryDictionary() throws IOException {
|
||||||
|
this(ResourceScheme.CLASSPATH, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param resourceScheme - scheme for loading resources (FILE or CLASSPATH).
|
||||||
|
* @param resourcePath - where to load resources (dictionaries) from. If null, with CLASSPATH scheme only, use
|
||||||
|
* this class's name as the path.
|
||||||
|
*/
|
||||||
|
protected BinaryDictionary(ResourceScheme resourceScheme, String resourcePath) throws IOException {
|
||||||
|
this.resourceScheme = resourceScheme;
|
||||||
|
if (resourcePath == null) {
|
||||||
|
if (resourceScheme != ResourceScheme.CLASSPATH) {
|
||||||
|
throw new IllegalArgumentException("resourcePath must be supplied with FILE resource scheme");
|
||||||
|
}
|
||||||
|
this.resourcePath = getClass().getName().replace('.', '/');
|
||||||
|
} else {
|
||||||
|
this.resourcePath = resourcePath;
|
||||||
|
}
|
||||||
InputStream mapIS = null, dictIS = null, posIS = null;
|
InputStream mapIS = null, dictIS = null, posIS = null;
|
||||||
int[] targetMapOffsets = null, targetMap = null;
|
int[] targetMapOffsets = null, targetMap = null;
|
||||||
String[] posDict = null;
|
String[] posDict = null;
|
||||||
|
@ -78,7 +104,9 @@ public abstract class BinaryDictionary implements Dictionary {
|
||||||
targetMap[ofs] = accum;
|
targetMap[ofs] = accum;
|
||||||
}
|
}
|
||||||
if (sourceId + 1 != targetMapOffsets.length)
|
if (sourceId + 1 != targetMapOffsets.length)
|
||||||
throw new IOException("targetMap file format broken");
|
throw new IOException("targetMap file format broken; targetMap.length=" + targetMap.length
|
||||||
|
+ ", targetMapOffsets.length=" + targetMapOffsets.length
|
||||||
|
+ ", sourceId=" + sourceId);
|
||||||
targetMapOffsets[sourceId] = targetMap.length;
|
targetMapOffsets[sourceId] = targetMap.length;
|
||||||
mapIS.close(); mapIS = null;
|
mapIS.close(); mapIS = null;
|
||||||
|
|
||||||
|
@ -135,17 +163,33 @@ public abstract class BinaryDictionary implements Dictionary {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final InputStream getResource(String suffix) throws IOException {
|
protected final InputStream getResource(String suffix) throws IOException {
|
||||||
return getClassResource(getClass(), suffix);
|
switch(resourceScheme) {
|
||||||
|
case CLASSPATH:
|
||||||
|
return getClassResource(resourcePath + suffix);
|
||||||
|
case FILE:
|
||||||
|
return Files.newInputStream(Paths.get(resourcePath + suffix));
|
||||||
|
default:
|
||||||
|
throw new IllegalStateException("unknown resource scheme " + resourceScheme);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// util, reused by ConnectionCosts and CharacterDefinition
|
// util, reused by ConnectionCosts and CharacterDefinition
|
||||||
public static final InputStream getClassResource(Class<?> clazz, String suffix) throws IOException {
|
public static final InputStream getClassResource(Class<?> clazz, String suffix) throws IOException {
|
||||||
final InputStream is = clazz.getResourceAsStream(clazz.getSimpleName() + suffix);
|
final InputStream is = clazz.getResourceAsStream(clazz.getSimpleName() + suffix);
|
||||||
if (is == null)
|
if (is == null) {
|
||||||
throw new FileNotFoundException("Not in classpath: " + clazz.getName().replace('.','/') + suffix);
|
throw new FileNotFoundException("Not in classpath: " + clazz.getName().replace('.','/') + suffix);
|
||||||
|
}
|
||||||
return is;
|
return is;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private InputStream getClassResource(String path) throws IOException {
|
||||||
|
final InputStream is = BinaryDictionary.class.getClassLoader().getResourceAsStream(path);
|
||||||
|
if (is == null) {
|
||||||
|
throw new FileNotFoundException("Not in classpath: " + path);
|
||||||
|
}
|
||||||
|
return is;
|
||||||
|
}
|
||||||
|
|
||||||
public void lookupWordIds(int sourceId, IntsRef ref) {
|
public void lookupWordIds(int sourceId, IntsRef ref) {
|
||||||
ref.ints = targetMap;
|
ref.ints = targetMap;
|
||||||
ref.offset = targetMapOffsets[sourceId];
|
ref.offset = targetMapOffsets[sourceId];
|
||||||
|
@ -155,12 +199,12 @@ public abstract class BinaryDictionary implements Dictionary {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getLeftId(int wordId) {
|
public int getLeftId(int wordId) {
|
||||||
return buffer.getShort(wordId) >>> 3;
|
return (buffer.getShort(wordId) & 0xffff) >>> 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getRightId(int wordId) {
|
public int getRightId(int wordId) {
|
||||||
return buffer.getShort(wordId) >>> 3;
|
return (buffer.getShort(wordId) & 0xffff) >>> 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -36,10 +36,15 @@ public final class TokenInfoDictionary extends BinaryDictionary {
|
||||||
|
|
||||||
private final TokenInfoFST fst;
|
private final TokenInfoFST fst;
|
||||||
|
|
||||||
private TokenInfoDictionary() throws IOException {
|
/**
|
||||||
super();
|
* @param resourceScheme - scheme for loading resources (FILE or CLASSPATH).
|
||||||
|
* @param resourcePath - where to load resources (dictionaries) from. If null, with CLASSPATH scheme only, use
|
||||||
|
* this class's name as the path.
|
||||||
|
*/
|
||||||
|
TokenInfoDictionary(ResourceScheme resourceScheme, String resourcePath) throws IOException {
|
||||||
|
super(resourceScheme, resourcePath);
|
||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
FST<Long> fst = null;
|
FST<Long> fst;
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
is = getResource(FST_FILENAME_SUFFIX);
|
is = getResource(FST_FILENAME_SUFFIX);
|
||||||
|
@ -56,6 +61,10 @@ public final class TokenInfoDictionary extends BinaryDictionary {
|
||||||
// TODO: some way to configure?
|
// TODO: some way to configure?
|
||||||
this.fst = new TokenInfoFST(fst, true);
|
this.fst = new TokenInfoFST(fst, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private TokenInfoDictionary() throws IOException {
|
||||||
|
this(ResourceScheme.CLASSPATH, null);
|
||||||
|
}
|
||||||
|
|
||||||
public TokenInfoFST getFST() {
|
public TokenInfoFST getFST() {
|
||||||
return fst;
|
return fst;
|
||||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.analysis.ja.dict.BinaryDictionary;
|
import org.apache.lucene.analysis.ja.dict.BinaryDictionary;
|
||||||
|
|
||||||
public abstract class BinaryDictionaryWriter {
|
public abstract class BinaryDictionaryWriter {
|
||||||
|
private final static int ID_LIMIT = 8192;
|
||||||
|
|
||||||
protected final Class<? extends BinaryDictionary> implClazz;
|
protected final Class<? extends BinaryDictionary> implClazz;
|
||||||
protected ByteBuffer buffer;
|
protected ByteBuffer buffer;
|
||||||
private int targetMapEndOffset = 0, lastWordId = -1, lastSourceId = -1;
|
private int targetMapEndOffset = 0, lastWordId = -1, lastSourceId = -1;
|
||||||
|
@ -71,7 +73,9 @@ public abstract class BinaryDictionaryWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
String posData = sb.toString();
|
String posData = sb.toString();
|
||||||
|
if (posData.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("POS fields are empty");
|
||||||
|
}
|
||||||
sb.setLength(0);
|
sb.setLength(0);
|
||||||
sb.append(CSVUtil.quoteEscape(posData));
|
sb.append(CSVUtil.quoteEscape(posData));
|
||||||
sb.append(',');
|
sb.append(',');
|
||||||
|
@ -100,6 +104,9 @@ public abstract class BinaryDictionaryWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
int flags = 0;
|
int flags = 0;
|
||||||
|
if (baseForm.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("base form is empty");
|
||||||
|
}
|
||||||
if (!("*".equals(baseForm) || baseForm.equals(entry[0]))) {
|
if (!("*".equals(baseForm) || baseForm.equals(entry[0]))) {
|
||||||
flags |= BinaryDictionary.HAS_BASEFORM;
|
flags |= BinaryDictionary.HAS_BASEFORM;
|
||||||
}
|
}
|
||||||
|
@ -110,8 +117,12 @@ public abstract class BinaryDictionaryWriter {
|
||||||
flags |= BinaryDictionary.HAS_PRONUNCIATION;
|
flags |= BinaryDictionary.HAS_PRONUNCIATION;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert leftId == rightId;
|
if (leftId != rightId) {
|
||||||
assert leftId < 4096; // there are still unused bits
|
throw new IllegalArgumentException("rightId != leftId: " + rightId + " " +leftId);
|
||||||
|
}
|
||||||
|
if (leftId >= ID_LIMIT) {
|
||||||
|
throw new IllegalArgumentException("leftId >= " + ID_LIMIT + ": " + leftId);
|
||||||
|
}
|
||||||
// add pos mapping
|
// add pos mapping
|
||||||
int toFill = 1+leftId - posDict.size();
|
int toFill = 1+leftId - posDict.size();
|
||||||
for (int i = 0; i < toFill; i++) {
|
for (int i = 0; i < toFill; i++) {
|
||||||
|
@ -119,14 +130,19 @@ public abstract class BinaryDictionaryWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
String existing = posDict.get(leftId);
|
String existing = posDict.get(leftId);
|
||||||
assert existing == null || existing.equals(fullPOSData);
|
if (existing != null && existing.equals(fullPOSData) == false) {
|
||||||
|
// TODO: test me
|
||||||
|
throw new IllegalArgumentException("Multiple entries found for leftID=" + leftId);
|
||||||
|
}
|
||||||
posDict.set(leftId, fullPOSData);
|
posDict.set(leftId, fullPOSData);
|
||||||
|
|
||||||
buffer.putShort((short)(leftId << 3 | flags));
|
buffer.putShort((short)(leftId << 3 | flags));
|
||||||
buffer.putShort(wordCost);
|
buffer.putShort(wordCost);
|
||||||
|
|
||||||
if ((flags & BinaryDictionary.HAS_BASEFORM) != 0) {
|
if ((flags & BinaryDictionary.HAS_BASEFORM) != 0) {
|
||||||
assert baseForm.length() < 16;
|
if (baseForm.length() >= 16) {
|
||||||
|
throw new IllegalArgumentException("Length of base form " + baseForm + " is >= 16");
|
||||||
|
}
|
||||||
int shared = sharedPrefix(entry[0], baseForm);
|
int shared = sharedPrefix(entry[0], baseForm);
|
||||||
int suffix = baseForm.length() - shared;
|
int suffix = baseForm.length() - shared;
|
||||||
buffer.put((byte) (shared << 4 | suffix));
|
buffer.put((byte) (shared << 4 | suffix));
|
||||||
|
@ -204,16 +220,17 @@ public abstract class BinaryDictionaryWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addMapping(int sourceId, int wordId) {
|
public void addMapping(int sourceId, int wordId) {
|
||||||
assert wordId > lastWordId : "words out of order: " + wordId + " vs lastID: " + lastWordId;
|
if (wordId <= lastWordId) {
|
||||||
|
throw new IllegalStateException("words out of order: " + wordId + " vs lastID: " + lastWordId);
|
||||||
|
}
|
||||||
|
|
||||||
if (sourceId > lastSourceId) {
|
if (sourceId > lastSourceId) {
|
||||||
assert sourceId > lastSourceId : "source ids out of order: lastSourceId=" + lastSourceId + " vs sourceId=" + sourceId;
|
|
||||||
targetMapOffsets = ArrayUtil.grow(targetMapOffsets, sourceId + 1);
|
targetMapOffsets = ArrayUtil.grow(targetMapOffsets, sourceId + 1);
|
||||||
for (int i = lastSourceId + 1; i <= sourceId; i++) {
|
for (int i = lastSourceId + 1; i <= sourceId; i++) {
|
||||||
targetMapOffsets[i] = targetMapEndOffset;
|
targetMapOffsets[i] = targetMapEndOffset;
|
||||||
}
|
}
|
||||||
} else {
|
} else if (sourceId != lastSourceId) {
|
||||||
assert sourceId == lastSourceId;
|
throw new IllegalStateException("source ids not in increasing order: lastSourceId=" + lastSourceId + " vs sourceId=" + sourceId);
|
||||||
}
|
}
|
||||||
|
|
||||||
targetMap = ArrayUtil.grow(targetMap, targetMapEndOffset + 1);
|
targetMap = ArrayUtil.grow(targetMap, targetMapEndOffset + 1);
|
||||||
|
@ -265,7 +282,9 @@ public abstract class BinaryDictionaryWriter {
|
||||||
}
|
}
|
||||||
prev += delta;
|
prev += delta;
|
||||||
}
|
}
|
||||||
assert sourceId == numSourceIds : "sourceId:"+sourceId+" != numSourceIds:"+numSourceIds;
|
if (sourceId != numSourceIds) {
|
||||||
|
throw new IllegalStateException("sourceId:" + sourceId + " != numSourceIds:" + numSourceIds);
|
||||||
|
}
|
||||||
} finally {
|
} finally {
|
||||||
os.close();
|
os.close();
|
||||||
}
|
}
|
||||||
|
@ -286,7 +305,9 @@ public abstract class BinaryDictionaryWriter {
|
||||||
out.writeByte((byte)0);
|
out.writeByte((byte)0);
|
||||||
} else {
|
} else {
|
||||||
String data[] = CSVUtil.parse(s);
|
String data[] = CSVUtil.parse(s);
|
||||||
assert data.length == 3 : "malformed pos/inflection: " + s;
|
if (data.length != 3) {
|
||||||
|
throw new IllegalArgumentException("Malformed pos/inflection: " + s + "; expected 3 characters");
|
||||||
|
}
|
||||||
out.writeString(data[0]);
|
out.writeString(data[0]);
|
||||||
out.writeString(data[1]);
|
out.writeString(data[1]);
|
||||||
out.writeString(data[2]);
|
out.writeString(data[2]);
|
||||||
|
|
|
@ -96,8 +96,7 @@ public class TokenInfoDictionaryBuilder {
|
||||||
String[] entry = CSVUtil.parse(line);
|
String[] entry = CSVUtil.parse(line);
|
||||||
|
|
||||||
if(entry.length < 13) {
|
if(entry.length < 13) {
|
||||||
System.out.println("Entry in CSV is not valid: " + line);
|
throw new IllegalArgumentException("Entry in CSV is not valid (13 field values expected): " + line);
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
String[] formatted = formatEntry(entry);
|
String[] formatted = formatEntry(entry);
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.analysis.ja.dict;
|
||||||
|
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat;
|
||||||
|
import org.apache.lucene.analysis.ja.util.TokenInfoDictionaryBuilder;
|
||||||
|
import org.apache.lucene.analysis.ja.util.TokenInfoDictionaryWriter;
|
||||||
|
import org.apache.lucene.util.IntsRef;
|
||||||
|
import org.apache.lucene.util.IntsRefBuilder;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
import static java.io.File.separatorChar;
|
||||||
|
import static org.apache.lucene.analysis.ja.dict.BinaryDictionary.ResourceScheme;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests of TokenInfoDictionary build tools; run using ant test-tools
|
||||||
|
*/
|
||||||
|
public class TokenInfoDictionaryTest extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testPut() throws Exception {
|
||||||
|
TokenInfoDictionary dict = newDictionary("名詞,1,1,2,名詞,一般,*,*,*,*,*,*,*",
|
||||||
|
// "large" id
|
||||||
|
"一般,5000,5000,3,名詞,一般,*,*,*,*,*,*,*");
|
||||||
|
IntsRef wordIdRef = new IntsRefBuilder().get();
|
||||||
|
|
||||||
|
dict.lookupWordIds(0, wordIdRef);
|
||||||
|
int wordId = wordIdRef.ints[wordIdRef.offset];
|
||||||
|
assertEquals(5000, dict.getLeftId(wordId));
|
||||||
|
assertEquals(5000, dict.getRightId(wordId));
|
||||||
|
assertEquals(3, dict.getWordCost(wordId));
|
||||||
|
|
||||||
|
dict.lookupWordIds(1, wordIdRef);
|
||||||
|
wordId = wordIdRef.ints[wordIdRef.offset];
|
||||||
|
assertEquals(1, dict.getLeftId(wordId));
|
||||||
|
assertEquals(1, dict.getRightId(wordId));
|
||||||
|
assertEquals(2, dict.getWordCost(wordId));
|
||||||
|
}
|
||||||
|
|
||||||
|
private TokenInfoDictionary newDictionary(String... entries) throws Exception {
|
||||||
|
Path dir = createTempDir();
|
||||||
|
try (OutputStream out = Files.newOutputStream(dir.resolve("test.csv"));
|
||||||
|
PrintWriter printer = new PrintWriter(new OutputStreamWriter(out, "utf-8"))) {
|
||||||
|
for (String entry : entries) {
|
||||||
|
printer.println(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TokenInfoDictionaryBuilder builder = new TokenInfoDictionaryBuilder(DictionaryFormat.IPADIC, "utf-8", true);
|
||||||
|
TokenInfoDictionaryWriter writer = builder.build(dir.toString());
|
||||||
|
writer.write(dir.toString());
|
||||||
|
String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', separatorChar);
|
||||||
|
// We must also load the other files (in BinaryDictionary) from the correct path
|
||||||
|
return new TokenInfoDictionary(ResourceScheme.FILE, dir.resolve(dictionaryPath).toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPutException() throws Exception {
|
||||||
|
// too few columns
|
||||||
|
expectThrows(IllegalArgumentException.class, () -> newDictionary("KANJI,1,1,1,名詞,一般,*,*,*,*,*"));
|
||||||
|
// left id != right id
|
||||||
|
expectThrows(IllegalArgumentException.class, () -> newDictionary("KANJI,1285,1,1,名詞,一般,*,*,*,*,*,*,*"));
|
||||||
|
// left id != right id
|
||||||
|
expectThrows(IllegalArgumentException.class, () -> newDictionary("KANJI,1285,1,1,名詞,一般,*,*,*,*,*,*,*"));
|
||||||
|
// id too large
|
||||||
|
expectThrows(IllegalArgumentException.class, () -> newDictionary("KANJI,8192,8192,1,名詞,一般,*,*,*,*,*,*,*"));
|
||||||
|
}
|
||||||
|
}
|
|
@ -29,19 +29,9 @@ public class UnknownDictionaryTest extends LuceneTestCase {
|
||||||
public void testPutCharacterCategory() {
|
public void testPutCharacterCategory() {
|
||||||
UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);
|
UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);
|
||||||
|
|
||||||
try{
|
expectThrows(Exception.class, () -> unkDic.putCharacterCategory(0, "DUMMY_NAME"));
|
||||||
unkDic.putCharacterCategory(0, "DUMMY_NAME");
|
|
||||||
fail();
|
|
||||||
} catch(Exception e) {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
try{
|
expectThrows(Exception.class, () -> unkDic.putCharacterCategory(-1, "KATAKANA"));
|
||||||
unkDic.putCharacterCategory(-1, "KATAKANA");
|
|
||||||
fail();
|
|
||||||
} catch(Exception e) {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
unkDic.putCharacterCategory(0, "DEFAULT");
|
unkDic.putCharacterCategory(0, "DEFAULT");
|
||||||
unkDic.putCharacterCategory(1, "GREEK");
|
unkDic.putCharacterCategory(1, "GREEK");
|
||||||
|
@ -53,12 +43,8 @@ public class UnknownDictionaryTest extends LuceneTestCase {
|
||||||
@Test
|
@Test
|
||||||
public void testPut() {
|
public void testPut() {
|
||||||
UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);
|
UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);
|
||||||
try{
|
expectThrows(NumberFormatException.class, () ->
|
||||||
unkDic.put(CSVUtil.parse("KANJI,1285,11426,名詞,一般,*,*,*,*,*,*,*"));
|
unkDic.put(CSVUtil.parse("KANJI,1285,11426,名詞,一般,*,*,*,*,*,*,*")));
|
||||||
fail();
|
|
||||||
} catch(Exception e){
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
String entry1 = "ALPHA,1285,1285,13398,名詞,一般,*,*,*,*,*,*,*";
|
String entry1 = "ALPHA,1285,1285,13398,名詞,一般,*,*,*,*,*,*,*";
|
||||||
String entry2 = "HIRAGANA,1285,1285,13069,名詞,一般,*,*,*,*,*,*,*";
|
String entry2 = "HIRAGANA,1285,1285,13069,名詞,一般,*,*,*,*,*,*,*";
|
||||||
|
|
Loading…
Reference in New Issue