mirror of https://github.com/apache/lucene.git
Ensure Nori/Kuromoji shipped binary FST is the latest version (#12933)
* ensure Nori/Kuromoji shipped binary FST is the latest version (closes #12911) * fold feedback from @uschindler: sharpen test failure methods to give the specific gradlew command to regenerate the precise FST (not everything) * add javadoc for FSTMetadata.getVersion
This commit is contained in:
parent
3965319441
commit
ebf9e29570
|
@ -111,7 +111,7 @@ public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphDa
|
|||
this.fst = new TokenInfoFST(fst, true);
|
||||
}
|
||||
|
||||
private static InputStream getClassResource(String suffix) throws IOException {
|
||||
static InputStream getClassResource(String suffix) throws IOException {
|
||||
final String resourcePath = TokenInfoDictionary.class.getSimpleName() + suffix;
|
||||
return IOUtils.requireResourceNonNull(
|
||||
TokenInfoDictionary.class.getResourceAsStream(resourcePath), resourcePath);
|
||||
|
|
|
@ -21,18 +21,22 @@ import static org.apache.lucene.analysis.morph.BinaryDictionary.DICT_FILENAME_SU
|
|||
import static org.apache.lucene.analysis.morph.BinaryDictionary.POSDICT_FILENAME_SUFFIX;
|
||||
import static org.apache.lucene.analysis.morph.BinaryDictionary.TARGETMAP_FILENAME_SUFFIX;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.IntsRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
|
||||
/** Tests of TokenInfoDictionary build tools; run using ant test-tools */
|
||||
public class TestTokenInfoDictionary extends LuceneTestCase {
|
||||
|
@ -178,4 +182,25 @@ public class TestTokenInfoDictionary extends LuceneTestCase {
|
|||
System.out.println("checked " + numTerms + " terms, " + numWords + " words.");
|
||||
}
|
||||
}
|
||||
|
||||
// #12911: make sure our shipped binary FST for TokenInfoDictionary is the latest & greatest
|
||||
// format
|
||||
public void testBinaryFSTIsLatestFormat() throws Exception {
|
||||
try (InputStream is =
|
||||
new BufferedInputStream(
|
||||
TokenInfoDictionary.getClassResource(TokenInfoDictionary.FST_FILENAME_SUFFIX))) {
|
||||
// we only need to load the FSTMetadata to check version:
|
||||
int actualVersion =
|
||||
FST.readMetadata(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton())
|
||||
.getVersion();
|
||||
assertEquals(
|
||||
"TokenInfoDictionary's FST is not the latest version: expected "
|
||||
+ FST.VERSION_CURRENT
|
||||
+ " but got: "
|
||||
+ actualVersion
|
||||
+ "; run \"./gradlew :lucene:analysis:kuromoji:regenerate\" to regenerate this FST",
|
||||
FST.VERSION_CURRENT,
|
||||
actualVersion);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -109,7 +109,7 @@ public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphDa
|
|||
this.fst = new TokenInfoFST(fst);
|
||||
}
|
||||
|
||||
private static InputStream getClassResource(String suffix) throws IOException {
|
||||
static InputStream getClassResource(String suffix) throws IOException {
|
||||
final String resourcePath = TokenInfoDictionary.class.getSimpleName() + suffix;
|
||||
return IOUtils.requireResourceNonNull(
|
||||
TokenInfoDictionary.class.getResourceAsStream(resourcePath), resourcePath);
|
||||
|
|
|
@ -21,6 +21,8 @@ import static org.apache.lucene.analysis.morph.BinaryDictionary.DICT_FILENAME_SU
|
|||
import static org.apache.lucene.analysis.morph.BinaryDictionary.POSDICT_FILENAME_SUFFIX;
|
||||
import static org.apache.lucene.analysis.morph.BinaryDictionary.TARGETMAP_FILENAME_SUFFIX;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
|
@ -28,12 +30,14 @@ import java.nio.charset.StandardCharsets;
|
|||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import org.apache.lucene.analysis.ko.POS;
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.IntsRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
|
||||
/** Tests of TokenInfoDictionary build tools; run using ant test-tools */
|
||||
public class TestTokenInfoDictionary extends LuceneTestCase {
|
||||
|
@ -185,4 +189,25 @@ public class TestTokenInfoDictionary extends LuceneTestCase {
|
|||
System.out.println("checked " + numTerms + " terms, " + numWords + " words.");
|
||||
}
|
||||
}
|
||||
|
||||
// #12911: make sure our shipped binary FST for TokenInfoDictionary is the latest & greatest
|
||||
// format
|
||||
public void testBinaryFSTIsLatestFormat() throws Exception {
|
||||
try (InputStream is =
|
||||
new BufferedInputStream(
|
||||
TokenInfoDictionary.getClassResource(TokenInfoDictionary.FST_FILENAME_SUFFIX))) {
|
||||
// we only need to load the FSTMetadata to check version:
|
||||
int actualVersion =
|
||||
FST.readMetadata(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton())
|
||||
.getVersion();
|
||||
assertEquals(
|
||||
"TokenInfoDictionary's FST is not the latest version: expected "
|
||||
+ FST.VERSION_CURRENT
|
||||
+ " but got: "
|
||||
+ actualVersion
|
||||
+ "; run \"./gradlew :lucene:analysis:nori:regenerate\" to regenerate this FST",
|
||||
FST.VERSION_CURRENT,
|
||||
actualVersion);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1208,7 +1208,7 @@ public final class FST<T> implements Accountable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Represent the FST metadata
|
||||
* Represents the FST metadata.
|
||||
*
|
||||
* @param <T> the FST output type
|
||||
*/
|
||||
|
@ -1236,5 +1236,14 @@ public final class FST<T> implements Accountable {
|
|||
this.version = version;
|
||||
this.numBytes = numBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the version constant of the binary format this FST was written in. See the {@code
|
||||
* static final int VERSION} constants in FST's javadoc, e.g. {@link
|
||||
* FST#VERSION_CONTINUOUS_ARCS}.
|
||||
*/
|
||||
public int getVersion() {
|
||||
return version;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue