mirror of https://github.com/apache/lucene.git
LUCENE-9707: Hunspell: check Lucene's implementation against Hunspel's test data (#2267)
This commit is contained in:
parent
2da7a4a86d
commit
b48d5beb34
|
@ -90,6 +90,9 @@ grant {
|
|||
|
||||
// allows LuceneTestCase#runWithRestrictedPermissions to execute with lower (or no) permission
|
||||
permission java.security.SecurityPermission "createAccessControlContext";
|
||||
|
||||
// Some Hunspell tests may read from external files specified in system properties
|
||||
permission java.io.FilePermission "${hunspell.repo.path}${/}-", "read";
|
||||
};
|
||||
|
||||
// Permissions to support ant build
|
||||
|
|
|
@ -16,35 +16,31 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.text.ParseException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
public class SpellCheckerTest extends StemmerTestBase {
|
||||
@Test
|
||||
public void base() throws Exception {
|
||||
|
||||
public void testBase() throws Exception {
|
||||
doTest("base");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void baseUtf() throws Exception {
|
||||
public void testBaseUtf() throws Exception {
|
||||
doTest("base_utf");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void keepcase() throws Exception {
|
||||
public void testKeepcase() throws Exception {
|
||||
doTest("keepcase");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void allcaps() throws Exception {
|
||||
public void testAllcaps() throws Exception {
|
||||
doTest("allcaps");
|
||||
}
|
||||
|
||||
|
@ -52,63 +48,51 @@ public class SpellCheckerTest extends StemmerTestBase {
|
|||
doTest("rep");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void forceUCase() throws Exception {
|
||||
public void testForceUCase() throws Exception {
|
||||
doTest("forceucase");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void checkSharpS() throws Exception {
|
||||
public void testCheckSharpS() throws Exception {
|
||||
doTest("checksharps");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void IJ() throws Exception {
|
||||
public void testIJ() throws Exception {
|
||||
doTest("IJ");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void i53643_numbersWithSeparators() throws Exception {
|
||||
public void testI53643_numbersWithSeparators() throws Exception {
|
||||
doTest("i53643");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void dotless_i() throws Exception {
|
||||
public void testDotless_i() throws Exception {
|
||||
doTest("dotless_i");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void needAffixOnAffixes() throws Exception {
|
||||
public void testNeedAffixOnAffixes() throws Exception {
|
||||
doTest("needaffix5");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void compoundFlag() throws Exception {
|
||||
public void testCompoundFlag() throws Exception {
|
||||
doTest("compoundflag");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void checkCompoundCase() throws Exception {
|
||||
public void testCheckCompoundCase() throws Exception {
|
||||
doTest("checkcompoundcase");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void checkCompoundDup() throws Exception {
|
||||
public void testCheckCompoundDup() throws Exception {
|
||||
doTest("checkcompounddup");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void checkCompoundTriple() throws Exception {
|
||||
public void testCheckCompoundTriple() throws Exception {
|
||||
doTest("checkcompoundtriple");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void simplifiedTriple() throws Exception {
|
||||
public void testSimplifiedTriple() throws Exception {
|
||||
doTest("simplifiedtriple");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void compoundForbid() throws Exception {
|
||||
public void testCompoundForbid() throws Exception {
|
||||
doTest("compoundforbid");
|
||||
}
|
||||
|
||||
|
@ -161,10 +145,14 @@ public class SpellCheckerTest extends StemmerTestBase {
|
|||
}
|
||||
|
||||
protected void doTest(String name) throws Exception {
|
||||
InputStream affixStream =
|
||||
Objects.requireNonNull(getClass().getResourceAsStream(name + ".aff"), name);
|
||||
InputStream dictStream =
|
||||
Objects.requireNonNull(getClass().getResourceAsStream(name + ".dic"), name);
|
||||
checkSpellCheckerExpectations(
|
||||
Path.of(getClass().getResource(name + ".aff").toURI()).getParent().resolve(name), true);
|
||||
}
|
||||
|
||||
static void checkSpellCheckerExpectations(Path basePath, boolean checkSuggestions)
|
||||
throws IOException, ParseException {
|
||||
InputStream affixStream = Files.newInputStream(Path.of(basePath.toString() + ".aff"));
|
||||
InputStream dictStream = Files.newInputStream(Path.of(basePath.toString() + ".dic"));
|
||||
|
||||
SpellChecker speller;
|
||||
try {
|
||||
|
@ -176,30 +164,30 @@ public class SpellCheckerTest extends StemmerTestBase {
|
|||
IOUtils.closeWhileHandlingException(dictStream);
|
||||
}
|
||||
|
||||
URL good = StemmerTestBase.class.getResource(name + ".good");
|
||||
if (good != null) {
|
||||
for (String word : Files.readAllLines(Path.of(good.toURI()))) {
|
||||
assertTrue("Unexpectedly considered misspelled: " + word, speller.spell(word));
|
||||
Path good = Path.of(basePath + ".good");
|
||||
if (Files.exists(good)) {
|
||||
for (String word : Files.readAllLines(good)) {
|
||||
assertTrue("Unexpectedly considered misspelled: " + word, speller.spell(word.trim()));
|
||||
}
|
||||
}
|
||||
|
||||
URL wrong = StemmerTestBase.class.getResource(name + ".wrong");
|
||||
URL sug = StemmerTestBase.class.getResource(name + ".sug");
|
||||
if (wrong != null) {
|
||||
List<String> wrongWords = Files.readAllLines(Path.of(wrong.toURI()));
|
||||
Path wrong = Path.of(basePath + ".wrong");
|
||||
Path sug = Path.of(basePath + ".sug");
|
||||
if (Files.exists(wrong)) {
|
||||
List<String> wrongWords = Files.readAllLines(wrong);
|
||||
for (String word : wrongWords) {
|
||||
assertFalse("Unexpectedly considered correct: " + word, speller.spell(word));
|
||||
assertFalse("Unexpectedly considered correct: " + word, speller.spell(word.trim()));
|
||||
}
|
||||
if (sug != null) {
|
||||
if (Files.exists(sug) && checkSuggestions) {
|
||||
String suggestions =
|
||||
wrongWords.stream()
|
||||
.map(s -> String.join(", ", speller.suggest(s)))
|
||||
.filter(s -> !s.isEmpty())
|
||||
.collect(Collectors.joining("\n"));
|
||||
assertEquals(Files.readString(Path.of(sug.toURI())).trim(), suggestions);
|
||||
assertEquals(Files.readString(sug).trim(), suggestions);
|
||||
}
|
||||
} else {
|
||||
assertNull(".sug file without .wrong file!", sug);
|
||||
assertFalse(".sug file without .wrong file!", Files.exists(sug));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.DirectoryStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.text.ParseException;
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.stream.Collectors;
|
||||
import org.junit.AssumptionViolatedException;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
|
||||
/**
|
||||
* Same as {@link SpellCheckerTest}, but checks all Hunspell's test data. The path to the checked
|
||||
* out Hunspell repository should be in {@code -Dhunspell.repo.path=...} system property.
|
||||
*/
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestHunspellRepositoryTestCases {
|
||||
private final Path pathPrefix;
|
||||
|
||||
public TestHunspellRepositoryTestCases(String testName, Path pathPrefix) {
|
||||
this.pathPrefix = pathPrefix;
|
||||
}
|
||||
|
||||
@Parameterized.Parameters(name = "{0}")
|
||||
public static Collection<Object[]> data() throws IOException {
|
||||
String hunspellRepo = System.getProperty("hunspell.repo.path");
|
||||
if (hunspellRepo == null) {
|
||||
throw new AssumptionViolatedException("hunspell.repo.path property not specified.");
|
||||
}
|
||||
|
||||
Set<String> names = new TreeSet<>();
|
||||
Path tests = Path.of(hunspellRepo).resolve("tests");
|
||||
try (DirectoryStream<Path> files = Files.newDirectoryStream(tests)) {
|
||||
for (Path file : files) {
|
||||
String name = file.getFileName().toString();
|
||||
if (name.endsWith(".aff")) {
|
||||
names.add(name.substring(0, name.length() - 4));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return names.stream().map(s -> new Object[] {s, tests.resolve(s)}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test() throws IOException, ParseException {
|
||||
SpellCheckerTest.checkSpellCheckerExpectations(pathPrefix, false);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue