mirror of https://github.com/apache/lucene.git
LUCENE-8871: Fix precommit failures.
This commit is contained in:
parent
754ce1f437
commit
23b6a3cd3a
|
@ -34,7 +34,7 @@ import org.apache.lucene.util.ArrayUtil;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.ja.dict.BinaryDictionary;
|
import org.apache.lucene.analysis.ja.dict.BinaryDictionary;
|
||||||
|
|
||||||
public abstract class BinaryDictionaryWriter {
|
abstract class BinaryDictionaryWriter {
|
||||||
private final static int ID_LIMIT = 8192;
|
private final static int ID_LIMIT = 8192;
|
||||||
|
|
||||||
private final Class<? extends BinaryDictionary> implClazz;
|
private final Class<? extends BinaryDictionary> implClazz;
|
||||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||||
|
|
||||||
public final class CharacterDefinitionWriter {
|
final class CharacterDefinitionWriter {
|
||||||
|
|
||||||
private final byte[] characterCategoryMap = new byte[0x10000];
|
private final byte[] characterCategoryMap = new byte[0x10000];
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,7 @@ import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
public class ConnectionCostsBuilder {
|
class ConnectionCostsBuilder {
|
||||||
|
|
||||||
private ConnectionCostsBuilder() {
|
private ConnectionCostsBuilder() {
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||||
|
|
||||||
public final class ConnectionCostsWriter {
|
final class ConnectionCostsWriter {
|
||||||
|
|
||||||
private final short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.
|
private final short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.
|
||||||
private final int forwardSize;
|
private final int forwardSize;
|
||||||
|
|
|
@ -20,10 +20,20 @@ package org.apache.lucene.analysis.ja.util;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tool to build dictionaries.
|
||||||
|
*/
|
||||||
public class DictionaryBuilder {
|
public class DictionaryBuilder {
|
||||||
|
|
||||||
public enum DictionaryFormat { IPADIC, UNIDIC }
|
/** Format of the dictionary. */
|
||||||
|
public enum DictionaryFormat {
|
||||||
|
/** IPADIC format */
|
||||||
|
IPADIC,
|
||||||
|
/** UNIDIC format */
|
||||||
|
UNIDIC
|
||||||
|
}
|
||||||
|
|
||||||
private DictionaryBuilder() {
|
private DictionaryBuilder() {
|
||||||
}
|
}
|
||||||
|
@ -42,7 +52,7 @@ public class DictionaryBuilder {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) throws IOException {
|
public static void main(String[] args) throws IOException {
|
||||||
DictionaryFormat format = DictionaryFormat.valueOf(args[0].toUpperCase());
|
DictionaryFormat format = DictionaryFormat.valueOf(args[0].toUpperCase(Locale.ROOT));
|
||||||
String inputDirName = args[1];
|
String inputDirName = args[1];
|
||||||
String outputDirName = args[2];
|
String outputDirName = args[2];
|
||||||
String inputEncoding = args[3];
|
String inputEncoding = args[3];
|
||||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*/
|
*/
|
||||||
public class TokenInfoDictionaryBuilder {
|
class TokenInfoDictionaryBuilder {
|
||||||
|
|
||||||
private final String encoding;
|
private final String encoding;
|
||||||
private final Normalizer.Form normalForm;
|
private final Normalizer.Form normalForm;
|
||||||
|
|
|
@ -25,7 +25,7 @@ import java.util.Objects;
|
||||||
import org.apache.lucene.analysis.ja.dict.TokenInfoDictionary;
|
import org.apache.lucene.analysis.ja.dict.TokenInfoDictionary;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
|
|
||||||
public class TokenInfoDictionaryWriter extends BinaryDictionaryWriter {
|
class TokenInfoDictionaryWriter extends BinaryDictionaryWriter {
|
||||||
private FST<Long> fst;
|
private FST<Long> fst;
|
||||||
|
|
||||||
TokenInfoDictionaryWriter(int size) {
|
TokenInfoDictionaryWriter(int size) {
|
||||||
|
|
|
@ -29,7 +29,7 @@ import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.ja.dict.CharacterDefinition;
|
import org.apache.lucene.analysis.ja.dict.CharacterDefinition;
|
||||||
|
|
||||||
public class UnknownDictionaryBuilder {
|
class UnknownDictionaryBuilder {
|
||||||
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*";
|
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*";
|
||||||
|
|
||||||
private final String encoding;
|
private final String encoding;
|
||||||
|
|
|
@ -19,12 +19,11 @@ package org.apache.lucene.analysis.ja.util;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.ja.dict.CharacterDefinition;
|
import org.apache.lucene.analysis.ja.dict.CharacterDefinition;
|
||||||
import org.apache.lucene.analysis.ja.dict.UnknownDictionary;
|
import org.apache.lucene.analysis.ja.dict.UnknownDictionary;
|
||||||
|
|
||||||
public class UnknownDictionaryWriter extends BinaryDictionaryWriter {
|
class UnknownDictionaryWriter extends BinaryDictionaryWriter {
|
||||||
private final CharacterDefinitionWriter characterDefinition = new CharacterDefinitionWriter();
|
private final CharacterDefinitionWriter characterDefinition = new CharacterDefinitionWriter();
|
||||||
|
|
||||||
public UnknownDictionaryWriter(int size) {
|
public UnknownDictionaryWriter(int size) {
|
||||||
|
|
|
@ -23,10 +23,9 @@ import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.ja.util.DictionaryBuilder;
|
||||||
import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat;
|
import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat;
|
||||||
import org.apache.lucene.analysis.ja.util.ToStringUtil;
|
import org.apache.lucene.analysis.ja.util.ToStringUtil;
|
||||||
import org.apache.lucene.analysis.ja.util.TokenInfoDictionaryBuilder;
|
|
||||||
import org.apache.lucene.analysis.ja.util.TokenInfoDictionaryWriter;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.IntsRefBuilder;
|
import org.apache.lucene.util.IntsRefBuilder;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -68,9 +67,13 @@ public class TokenInfoDictionaryTest extends LuceneTestCase {
|
||||||
printer.println(entry);
|
printer.println(entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TokenInfoDictionaryBuilder builder = new TokenInfoDictionaryBuilder(DictionaryFormat.IPADIC, "utf-8", true);
|
Files.createFile(dir.resolve("unk.def"));
|
||||||
TokenInfoDictionaryWriter writer = builder.build(dir);
|
Files.createFile(dir.resolve("char.def"));
|
||||||
writer.write(dir);
|
try (OutputStream out = Files.newOutputStream(dir.resolve("matrix.def"));
|
||||||
|
PrintWriter printer = new PrintWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8))) {
|
||||||
|
printer.println("1 1");
|
||||||
|
}
|
||||||
|
DictionaryBuilder.build(DictionaryFormat.IPADIC, dir, dir, "utf-8", true);
|
||||||
String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
|
String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
|
||||||
// We must also load the other files (in BinaryDictionary) from the correct path
|
// We must also load the other files (in BinaryDictionary) from the correct path
|
||||||
return new TokenInfoDictionary(ResourceScheme.FILE, dir.resolve(dictionaryPath).toString());
|
return new TokenInfoDictionary(ResourceScheme.FILE, dir.resolve(dictionaryPath).toString());
|
||||||
|
|
|
@ -14,11 +14,8 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.ja.dict;
|
package org.apache.lucene.analysis.ja.util;
|
||||||
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.ja.util.CSVUtil;
|
|
||||||
import org.apache.lucene.analysis.ja.util.UnknownDictionaryWriter;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
Loading…
Reference in New Issue