LUCENE-8871: Fix precommit failures.

This commit is contained in:
Adrien Grand 2019-06-27 11:56:15 +02:00
parent 754ce1f437
commit 23b6a3cd3a
11 changed files with 29 additions and 20 deletions

View File

@ -34,7 +34,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.analysis.ja.dict.BinaryDictionary; import org.apache.lucene.analysis.ja.dict.BinaryDictionary;
public abstract class BinaryDictionaryWriter { abstract class BinaryDictionaryWriter {
private final static int ID_LIMIT = 8192; private final static int ID_LIMIT = 8192;
private final Class<? extends BinaryDictionary> implClazz; private final Class<? extends BinaryDictionary> implClazz;

View File

@ -30,7 +30,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.store.OutputStreamDataOutput;
public final class CharacterDefinitionWriter { final class CharacterDefinitionWriter {
private final byte[] characterCategoryMap = new byte[0x10000]; private final byte[] characterCategoryMap = new byte[0x10000];

View File

@ -24,7 +24,7 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
public class ConnectionCostsBuilder { class ConnectionCostsBuilder {
private ConnectionCostsBuilder() { private ConnectionCostsBuilder() {
} }

View File

@ -29,7 +29,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.store.OutputStreamDataOutput;
public final class ConnectionCostsWriter { final class ConnectionCostsWriter {
private final short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter. private final short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.
private final int forwardSize; private final int forwardSize;

View File

@ -20,10 +20,20 @@ package org.apache.lucene.analysis.ja.util;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.Locale;
/**
* Tool to build dictionaries.
*/
public class DictionaryBuilder { public class DictionaryBuilder {
public enum DictionaryFormat { IPADIC, UNIDIC } /** Format of the dictionary. */
public enum DictionaryFormat {
/** IPADIC format */
IPADIC,
/** UNIDIC format */
UNIDIC
}
private DictionaryBuilder() { private DictionaryBuilder() {
} }
@ -42,7 +52,7 @@ public class DictionaryBuilder {
} }
public static void main(String[] args) throws IOException { public static void main(String[] args) throws IOException {
DictionaryFormat format = DictionaryFormat.valueOf(args[0].toUpperCase()); DictionaryFormat format = DictionaryFormat.valueOf(args[0].toUpperCase(Locale.ROOT));
String inputDirName = args[1]; String inputDirName = args[1];
String outputDirName = args[2]; String outputDirName = args[2];
String inputEncoding = args[3]; String inputEncoding = args[3];

View File

@ -37,7 +37,7 @@ import org.apache.lucene.util.fst.PositiveIntOutputs;
/** /**
*/ */
public class TokenInfoDictionaryBuilder { class TokenInfoDictionaryBuilder {
private final String encoding; private final String encoding;
private final Normalizer.Form normalForm; private final Normalizer.Form normalForm;

View File

@ -25,7 +25,7 @@ import java.util.Objects;
import org.apache.lucene.analysis.ja.dict.TokenInfoDictionary; import org.apache.lucene.analysis.ja.dict.TokenInfoDictionary;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
public class TokenInfoDictionaryWriter extends BinaryDictionaryWriter { class TokenInfoDictionaryWriter extends BinaryDictionaryWriter {
private FST<Long> fst; private FST<Long> fst;
TokenInfoDictionaryWriter(int size) { TokenInfoDictionaryWriter(int size) {

View File

@ -29,7 +29,7 @@ import java.util.List;
import org.apache.lucene.analysis.ja.dict.CharacterDefinition; import org.apache.lucene.analysis.ja.dict.CharacterDefinition;
public class UnknownDictionaryBuilder { class UnknownDictionaryBuilder {
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*"; private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*";
private final String encoding; private final String encoding;

View File

@ -19,12 +19,11 @@ package org.apache.lucene.analysis.ja.util;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.lucene.analysis.ja.dict.CharacterDefinition; import org.apache.lucene.analysis.ja.dict.CharacterDefinition;
import org.apache.lucene.analysis.ja.dict.UnknownDictionary; import org.apache.lucene.analysis.ja.dict.UnknownDictionary;
public class UnknownDictionaryWriter extends BinaryDictionaryWriter { class UnknownDictionaryWriter extends BinaryDictionaryWriter {
private final CharacterDefinitionWriter characterDefinition = new CharacterDefinitionWriter(); private final CharacterDefinitionWriter characterDefinition = new CharacterDefinitionWriter();
public UnknownDictionaryWriter(int size) { public UnknownDictionaryWriter(int size) {

View File

@ -23,10 +23,9 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import org.apache.lucene.analysis.ja.util.DictionaryBuilder;
import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat; import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat;
import org.apache.lucene.analysis.ja.util.ToStringUtil; import org.apache.lucene.analysis.ja.util.ToStringUtil;
import org.apache.lucene.analysis.ja.util.TokenInfoDictionaryBuilder;
import org.apache.lucene.analysis.ja.util.TokenInfoDictionaryWriter;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -68,9 +67,13 @@ public class TokenInfoDictionaryTest extends LuceneTestCase {
printer.println(entry); printer.println(entry);
} }
} }
TokenInfoDictionaryBuilder builder = new TokenInfoDictionaryBuilder(DictionaryFormat.IPADIC, "utf-8", true); Files.createFile(dir.resolve("unk.def"));
TokenInfoDictionaryWriter writer = builder.build(dir); Files.createFile(dir.resolve("char.def"));
writer.write(dir); try (OutputStream out = Files.newOutputStream(dir.resolve("matrix.def"));
PrintWriter printer = new PrintWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8))) {
printer.println("1 1");
}
DictionaryBuilder.build(DictionaryFormat.IPADIC, dir, dir, "utf-8", true);
String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/'); String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
// We must also load the other files (in BinaryDictionary) from the correct path // We must also load the other files (in BinaryDictionary) from the correct path
return new TokenInfoDictionary(ResourceScheme.FILE, dir.resolve(dictionaryPath).toString()); return new TokenInfoDictionary(ResourceScheme.FILE, dir.resolve(dictionaryPath).toString());

View File

@ -14,11 +14,8 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.lucene.analysis.ja.dict; package org.apache.lucene.analysis.ja.util;
import org.apache.lucene.analysis.ja.util.CSVUtil;
import org.apache.lucene.analysis.ja.util.UnknownDictionaryWriter;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test; import org.junit.Test;