LUCENE-9707: Hunspell: check Lucene's implementation against Hunspel's test data (#2267)

This commit is contained in:
Peter Gromov 2021-02-02 10:46:14 +01:00 committed by GitHub
parent 2da7a4a86d
commit b48d5beb34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 112 additions and 51 deletions

View File

@ -90,6 +90,9 @@ grant {
// allows LuceneTestCase#runWithRestrictedPermissions to execute with lower (or no) permission
permission java.security.SecurityPermission "createAccessControlContext";
// Some Hunspell tests may read from external files specified in system properties
permission java.io.FilePermission "${hunspell.repo.path}${/}-", "read";
};
// Permissions to support ant build

View File

@ -16,35 +16,31 @@
*/
package org.apache.lucene.analysis.hunspell;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.ParseException;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.util.IOUtils;
import org.junit.Test;
public class SpellCheckerTest extends StemmerTestBase {
@Test
public void base() throws Exception {
public void testBase() throws Exception {
doTest("base");
}
@Test
public void baseUtf() throws Exception {
public void testBaseUtf() throws Exception {
doTest("base_utf");
}
@Test
public void keepcase() throws Exception {
public void testKeepcase() throws Exception {
doTest("keepcase");
}
@Test
public void allcaps() throws Exception {
public void testAllcaps() throws Exception {
doTest("allcaps");
}
@ -52,63 +48,51 @@ public class SpellCheckerTest extends StemmerTestBase {
doTest("rep");
}
@Test
public void forceUCase() throws Exception {
public void testForceUCase() throws Exception {
doTest("forceucase");
}
@Test
public void checkSharpS() throws Exception {
public void testCheckSharpS() throws Exception {
doTest("checksharps");
}
@Test
public void IJ() throws Exception {
public void testIJ() throws Exception {
doTest("IJ");
}
@Test
public void i53643_numbersWithSeparators() throws Exception {
public void testI53643_numbersWithSeparators() throws Exception {
doTest("i53643");
}
@Test
public void dotless_i() throws Exception {
public void testDotless_i() throws Exception {
doTest("dotless_i");
}
@Test
public void needAffixOnAffixes() throws Exception {
public void testNeedAffixOnAffixes() throws Exception {
doTest("needaffix5");
}
@Test
public void compoundFlag() throws Exception {
public void testCompoundFlag() throws Exception {
doTest("compoundflag");
}
@Test
public void checkCompoundCase() throws Exception {
public void testCheckCompoundCase() throws Exception {
doTest("checkcompoundcase");
}
@Test
public void checkCompoundDup() throws Exception {
public void testCheckCompoundDup() throws Exception {
doTest("checkcompounddup");
}
@Test
public void checkCompoundTriple() throws Exception {
public void testCheckCompoundTriple() throws Exception {
doTest("checkcompoundtriple");
}
@Test
public void simplifiedTriple() throws Exception {
public void testSimplifiedTriple() throws Exception {
doTest("simplifiedtriple");
}
@Test
public void compoundForbid() throws Exception {
public void testCompoundForbid() throws Exception {
doTest("compoundforbid");
}
@ -161,10 +145,14 @@ public class SpellCheckerTest extends StemmerTestBase {
}
protected void doTest(String name) throws Exception {
InputStream affixStream =
Objects.requireNonNull(getClass().getResourceAsStream(name + ".aff"), name);
InputStream dictStream =
Objects.requireNonNull(getClass().getResourceAsStream(name + ".dic"), name);
checkSpellCheckerExpectations(
Path.of(getClass().getResource(name + ".aff").toURI()).getParent().resolve(name), true);
}
static void checkSpellCheckerExpectations(Path basePath, boolean checkSuggestions)
throws IOException, ParseException {
InputStream affixStream = Files.newInputStream(Path.of(basePath.toString() + ".aff"));
InputStream dictStream = Files.newInputStream(Path.of(basePath.toString() + ".dic"));
SpellChecker speller;
try {
@ -176,30 +164,30 @@ public class SpellCheckerTest extends StemmerTestBase {
IOUtils.closeWhileHandlingException(dictStream);
}
URL good = StemmerTestBase.class.getResource(name + ".good");
if (good != null) {
for (String word : Files.readAllLines(Path.of(good.toURI()))) {
assertTrue("Unexpectedly considered misspelled: " + word, speller.spell(word));
Path good = Path.of(basePath + ".good");
if (Files.exists(good)) {
for (String word : Files.readAllLines(good)) {
assertTrue("Unexpectedly considered misspelled: " + word, speller.spell(word.trim()));
}
}
URL wrong = StemmerTestBase.class.getResource(name + ".wrong");
URL sug = StemmerTestBase.class.getResource(name + ".sug");
if (wrong != null) {
List<String> wrongWords = Files.readAllLines(Path.of(wrong.toURI()));
Path wrong = Path.of(basePath + ".wrong");
Path sug = Path.of(basePath + ".sug");
if (Files.exists(wrong)) {
List<String> wrongWords = Files.readAllLines(wrong);
for (String word : wrongWords) {
assertFalse("Unexpectedly considered correct: " + word, speller.spell(word));
assertFalse("Unexpectedly considered correct: " + word, speller.spell(word.trim()));
}
if (sug != null) {
if (Files.exists(sug) && checkSuggestions) {
String suggestions =
wrongWords.stream()
.map(s -> String.join(", ", speller.suggest(s)))
.filter(s -> !s.isEmpty())
.collect(Collectors.joining("\n"));
assertEquals(Files.readString(Path.of(sug.toURI())).trim(), suggestions);
assertEquals(Files.readString(sug).trim(), suggestions);
}
} else {
assertNull(".sug file without .wrong file!", sug);
assertFalse(".sug file without .wrong file!", Files.exists(sug));
}
}
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.hunspell;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.ParseException;
import java.util.Collection;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
import org.junit.AssumptionViolatedException;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
/**
* Same as {@link SpellCheckerTest}, but checks all Hunspell's test data. The path to the checked
* out Hunspell repository should be in {@code -Dhunspell.repo.path=...} system property.
*/
@RunWith(Parameterized.class)
public class TestHunspellRepositoryTestCases {
private final Path pathPrefix;
public TestHunspellRepositoryTestCases(String testName, Path pathPrefix) {
this.pathPrefix = pathPrefix;
}
@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> data() throws IOException {
String hunspellRepo = System.getProperty("hunspell.repo.path");
if (hunspellRepo == null) {
throw new AssumptionViolatedException("hunspell.repo.path property not specified.");
}
Set<String> names = new TreeSet<>();
Path tests = Path.of(hunspellRepo).resolve("tests");
try (DirectoryStream<Path> files = Files.newDirectoryStream(tests)) {
for (Path file : files) {
String name = file.getFileName().toString();
if (name.endsWith(".aff")) {
names.add(name.substring(0, name.length() - 4));
}
}
}
return names.stream().map(s -> new Object[] {s, tests.resolve(s)}).collect(Collectors.toList());
}
@Test
public void test() throws IOException, ParseException {
SpellCheckerTest.checkSpellCheckerExpectations(pathPrefix, false);
}
}