mirror of https://github.com/apache/lucene.git
LUCENE-9707: Hunspell: check Lucene's implementation against Hunspel's test data (#2267)
This commit is contained in:
parent
2da7a4a86d
commit
b48d5beb34
|
@ -90,6 +90,9 @@ grant {
|
||||||
|
|
||||||
// allows LuceneTestCase#runWithRestrictedPermissions to execute with lower (or no) permission
|
// allows LuceneTestCase#runWithRestrictedPermissions to execute with lower (or no) permission
|
||||||
permission java.security.SecurityPermission "createAccessControlContext";
|
permission java.security.SecurityPermission "createAccessControlContext";
|
||||||
|
|
||||||
|
// Some Hunspell tests may read from external files specified in system properties
|
||||||
|
permission java.io.FilePermission "${hunspell.repo.path}${/}-", "read";
|
||||||
};
|
};
|
||||||
|
|
||||||
// Permissions to support ant build
|
// Permissions to support ant build
|
||||||
|
|
|
@ -16,35 +16,31 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.hunspell;
|
package org.apache.lucene.analysis.hunspell;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.net.URL;
|
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.text.ParseException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import org.apache.lucene.store.ByteBuffersDirectory;
|
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class SpellCheckerTest extends StemmerTestBase {
|
public class SpellCheckerTest extends StemmerTestBase {
|
||||||
@Test
|
|
||||||
public void base() throws Exception {
|
public void testBase() throws Exception {
|
||||||
doTest("base");
|
doTest("base");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testBaseUtf() throws Exception {
|
||||||
public void baseUtf() throws Exception {
|
|
||||||
doTest("base_utf");
|
doTest("base_utf");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testKeepcase() throws Exception {
|
||||||
public void keepcase() throws Exception {
|
|
||||||
doTest("keepcase");
|
doTest("keepcase");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testAllcaps() throws Exception {
|
||||||
public void allcaps() throws Exception {
|
|
||||||
doTest("allcaps");
|
doTest("allcaps");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,63 +48,51 @@ public class SpellCheckerTest extends StemmerTestBase {
|
||||||
doTest("rep");
|
doTest("rep");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testForceUCase() throws Exception {
|
||||||
public void forceUCase() throws Exception {
|
|
||||||
doTest("forceucase");
|
doTest("forceucase");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testCheckSharpS() throws Exception {
|
||||||
public void checkSharpS() throws Exception {
|
|
||||||
doTest("checksharps");
|
doTest("checksharps");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testIJ() throws Exception {
|
||||||
public void IJ() throws Exception {
|
|
||||||
doTest("IJ");
|
doTest("IJ");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testI53643_numbersWithSeparators() throws Exception {
|
||||||
public void i53643_numbersWithSeparators() throws Exception {
|
|
||||||
doTest("i53643");
|
doTest("i53643");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testDotless_i() throws Exception {
|
||||||
public void dotless_i() throws Exception {
|
|
||||||
doTest("dotless_i");
|
doTest("dotless_i");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testNeedAffixOnAffixes() throws Exception {
|
||||||
public void needAffixOnAffixes() throws Exception {
|
|
||||||
doTest("needaffix5");
|
doTest("needaffix5");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testCompoundFlag() throws Exception {
|
||||||
public void compoundFlag() throws Exception {
|
|
||||||
doTest("compoundflag");
|
doTest("compoundflag");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testCheckCompoundCase() throws Exception {
|
||||||
public void checkCompoundCase() throws Exception {
|
|
||||||
doTest("checkcompoundcase");
|
doTest("checkcompoundcase");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testCheckCompoundDup() throws Exception {
|
||||||
public void checkCompoundDup() throws Exception {
|
|
||||||
doTest("checkcompounddup");
|
doTest("checkcompounddup");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testCheckCompoundTriple() throws Exception {
|
||||||
public void checkCompoundTriple() throws Exception {
|
|
||||||
doTest("checkcompoundtriple");
|
doTest("checkcompoundtriple");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testSimplifiedTriple() throws Exception {
|
||||||
public void simplifiedTriple() throws Exception {
|
|
||||||
doTest("simplifiedtriple");
|
doTest("simplifiedtriple");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
public void testCompoundForbid() throws Exception {
|
||||||
public void compoundForbid() throws Exception {
|
|
||||||
doTest("compoundforbid");
|
doTest("compoundforbid");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,10 +145,14 @@ public class SpellCheckerTest extends StemmerTestBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void doTest(String name) throws Exception {
|
protected void doTest(String name) throws Exception {
|
||||||
InputStream affixStream =
|
checkSpellCheckerExpectations(
|
||||||
Objects.requireNonNull(getClass().getResourceAsStream(name + ".aff"), name);
|
Path.of(getClass().getResource(name + ".aff").toURI()).getParent().resolve(name), true);
|
||||||
InputStream dictStream =
|
}
|
||||||
Objects.requireNonNull(getClass().getResourceAsStream(name + ".dic"), name);
|
|
||||||
|
static void checkSpellCheckerExpectations(Path basePath, boolean checkSuggestions)
|
||||||
|
throws IOException, ParseException {
|
||||||
|
InputStream affixStream = Files.newInputStream(Path.of(basePath.toString() + ".aff"));
|
||||||
|
InputStream dictStream = Files.newInputStream(Path.of(basePath.toString() + ".dic"));
|
||||||
|
|
||||||
SpellChecker speller;
|
SpellChecker speller;
|
||||||
try {
|
try {
|
||||||
|
@ -176,30 +164,30 @@ public class SpellCheckerTest extends StemmerTestBase {
|
||||||
IOUtils.closeWhileHandlingException(dictStream);
|
IOUtils.closeWhileHandlingException(dictStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
URL good = StemmerTestBase.class.getResource(name + ".good");
|
Path good = Path.of(basePath + ".good");
|
||||||
if (good != null) {
|
if (Files.exists(good)) {
|
||||||
for (String word : Files.readAllLines(Path.of(good.toURI()))) {
|
for (String word : Files.readAllLines(good)) {
|
||||||
assertTrue("Unexpectedly considered misspelled: " + word, speller.spell(word));
|
assertTrue("Unexpectedly considered misspelled: " + word, speller.spell(word.trim()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
URL wrong = StemmerTestBase.class.getResource(name + ".wrong");
|
Path wrong = Path.of(basePath + ".wrong");
|
||||||
URL sug = StemmerTestBase.class.getResource(name + ".sug");
|
Path sug = Path.of(basePath + ".sug");
|
||||||
if (wrong != null) {
|
if (Files.exists(wrong)) {
|
||||||
List<String> wrongWords = Files.readAllLines(Path.of(wrong.toURI()));
|
List<String> wrongWords = Files.readAllLines(wrong);
|
||||||
for (String word : wrongWords) {
|
for (String word : wrongWords) {
|
||||||
assertFalse("Unexpectedly considered correct: " + word, speller.spell(word));
|
assertFalse("Unexpectedly considered correct: " + word, speller.spell(word.trim()));
|
||||||
}
|
}
|
||||||
if (sug != null) {
|
if (Files.exists(sug) && checkSuggestions) {
|
||||||
String suggestions =
|
String suggestions =
|
||||||
wrongWords.stream()
|
wrongWords.stream()
|
||||||
.map(s -> String.join(", ", speller.suggest(s)))
|
.map(s -> String.join(", ", speller.suggest(s)))
|
||||||
.filter(s -> !s.isEmpty())
|
.filter(s -> !s.isEmpty())
|
||||||
.collect(Collectors.joining("\n"));
|
.collect(Collectors.joining("\n"));
|
||||||
assertEquals(Files.readString(Path.of(sug.toURI())).trim(), suggestions);
|
assertEquals(Files.readString(sug).trim(), suggestions);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
assertNull(".sug file without .wrong file!", sug);
|
assertFalse(".sug file without .wrong file!", Files.exists(sug));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.analysis.hunspell;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.DirectoryStream;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import org.junit.AssumptionViolatedException;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.runner.RunWith;
|
||||||
|
import org.junit.runners.Parameterized;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as {@link SpellCheckerTest}, but checks all Hunspell's test data. The path to the checked
|
||||||
|
* out Hunspell repository should be in {@code -Dhunspell.repo.path=...} system property.
|
||||||
|
*/
|
||||||
|
@RunWith(Parameterized.class)
|
||||||
|
public class TestHunspellRepositoryTestCases {
|
||||||
|
private final Path pathPrefix;
|
||||||
|
|
||||||
|
public TestHunspellRepositoryTestCases(String testName, Path pathPrefix) {
|
||||||
|
this.pathPrefix = pathPrefix;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Parameterized.Parameters(name = "{0}")
|
||||||
|
public static Collection<Object[]> data() throws IOException {
|
||||||
|
String hunspellRepo = System.getProperty("hunspell.repo.path");
|
||||||
|
if (hunspellRepo == null) {
|
||||||
|
throw new AssumptionViolatedException("hunspell.repo.path property not specified.");
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<String> names = new TreeSet<>();
|
||||||
|
Path tests = Path.of(hunspellRepo).resolve("tests");
|
||||||
|
try (DirectoryStream<Path> files = Files.newDirectoryStream(tests)) {
|
||||||
|
for (Path file : files) {
|
||||||
|
String name = file.getFileName().toString();
|
||||||
|
if (name.endsWith(".aff")) {
|
||||||
|
names.add(name.substring(0, name.length() - 4));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return names.stream().map(s -> new Object[] {s, tests.resolve(s)}).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test() throws IOException, ParseException {
|
||||||
|
SpellCheckerTest.checkSpellCheckerExpectations(pathPrefix, false);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue