Simplify asserts in TestWordBreakSpellChecker (#13007)

This commit is contained in:
sabi0 2024-01-10 20:33:40 +01:00 committed by GitHub
parent 89a02fa4e3
commit f67b1b3d1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 67 additions and 90 deletions

View File

@ -38,7 +38,6 @@ import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.hamcrest.MatcherAssert; import org.hamcrest.MatcherAssert;
import org.junit.Assert;
public class TestWordBreakSpellChecker extends LuceneTestCase { public class TestWordBreakSpellChecker extends LuceneTestCase {
private Directory dir; private Directory dir;
@ -134,30 +133,16 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
wbsp.setMinSuggestionFrequency(1); wbsp.setMinSuggestionFrequency(1);
CombineSuggestion[] cs = CombineSuggestion[] cs =
wbsp.suggestWordCombinations(terms, 10, ir, SuggestMode.SUGGEST_ALWAYS); wbsp.suggestWordCombinations(terms, 10, ir, SuggestMode.SUGGEST_ALWAYS);
Assert.assertTrue(cs.length == 5); assertEquals(5, cs.length);
Assert.assertTrue(cs[0].originalTermIndexes.length == 2); assertSuggestionEquals(cs[0], "hundred", 1.0f, 1, 2);
Assert.assertTrue(cs[0].originalTermIndexes[0] == 1); assertSuggestionEquals(cs[1], "eighty", 1.0f, 3, 4);
Assert.assertTrue(cs[0].originalTermIndexes[1] == 2); assertSuggestionEquals(cs[2], "yeight", 1.0f, 4, 5);
Assert.assertTrue(cs[0].suggestion.string.equals("hundred"));
Assert.assertTrue(cs[0].suggestion.score == 1);
Assert.assertTrue(cs[1].originalTermIndexes.length == 2);
Assert.assertTrue(cs[1].originalTermIndexes[0] == 3);
Assert.assertTrue(cs[1].originalTermIndexes[1] == 4);
Assert.assertTrue(cs[1].suggestion.string.equals("eighty"));
Assert.assertTrue(cs[1].suggestion.score == 1);
Assert.assertTrue(cs[2].originalTermIndexes.length == 2);
Assert.assertTrue(cs[2].originalTermIndexes[0] == 4);
Assert.assertTrue(cs[2].originalTermIndexes[1] == 5);
Assert.assertTrue(cs[2].suggestion.string.equals("yeight"));
Assert.assertTrue(cs[2].suggestion.score == 1);
for (int i = 3; i < 5; i++) { for (int i = 3; i < 5; i++) {
Assert.assertTrue(cs[i].originalTermIndexes.length == 3); assertEquals(3, cs[i].originalTermIndexes.length);
Assert.assertTrue(cs[i].suggestion.score == 2); assertEquals(2, cs[i].suggestion.score, 0);
Assert.assertTrue( assertTrue(
(cs[i].originalTermIndexes[0] == 1 (cs[i].originalTermIndexes[0] == 1
&& cs[i].originalTermIndexes[1] == 2 && cs[i].originalTermIndexes[1] == 2
&& cs[i].originalTermIndexes[2] == 3 && cs[i].originalTermIndexes[2] == 3
@ -169,20 +154,10 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
} }
cs = wbsp.suggestWordCombinations(terms, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); cs = wbsp.suggestWordCombinations(terms, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
Assert.assertTrue(cs.length == 2); assertEquals(2, cs.length);
Assert.assertTrue(cs[0].originalTermIndexes.length == 2);
Assert.assertTrue(cs[0].suggestion.score == 1);
Assert.assertTrue(cs[0].originalTermIndexes[0] == 1);
Assert.assertTrue(cs[0].originalTermIndexes[1] == 2);
Assert.assertTrue(cs[0].suggestion.string.equals("hundred"));
Assert.assertTrue(cs[0].suggestion.score == 1);
Assert.assertTrue(cs[1].originalTermIndexes.length == 3); assertSuggestionEquals(cs[0], "hundred", 1.0f, 1, 2);
Assert.assertTrue(cs[1].suggestion.score == 2); assertSuggestionEquals(cs[1], "hundredeight", 2.0f, 1, 2, 3);
Assert.assertTrue(cs[1].originalTermIndexes[0] == 1);
Assert.assertTrue(cs[1].originalTermIndexes[1] == 2);
Assert.assertTrue(cs[1].originalTermIndexes[2] == 3);
Assert.assertTrue(cs[1].suggestion.string.equals("hundredeight"));
} }
ir.close(); ir.close();
} }
@ -203,12 +178,10 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
ir, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
Assert.assertTrue(sw.length == 1); assertEquals(1, sw.length);
Assert.assertTrue(sw[0].length == 2); assertEquals(2, sw[0].length);
Assert.assertTrue(sw[0][0].string.equals("ninety")); assertSuggestionEquals(sw[0][0], "ninety", 1.0f);
Assert.assertTrue(sw[0][1].string.equals("nine")); assertSuggestionEquals(sw[0][1], "nine", 1.0f);
Assert.assertTrue(sw[0][0].score == 1);
Assert.assertTrue(sw[0][1].score == 1);
} }
{ {
Term term = new Term("numbers", "onethousand"); Term term = new Term("numbers", "onethousand");
@ -222,12 +195,10 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
ir, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
Assert.assertTrue(sw.length == 1); assertEquals(1, sw.length);
Assert.assertTrue(sw[0].length == 2); assertEquals(2, sw[0].length);
Assert.assertTrue(sw[0][0].string.equals("one")); assertSuggestionEquals(sw[0][0], "one", 1.0f);
Assert.assertTrue(sw[0][1].string.equals("thousand")); assertSuggestionEquals(sw[0][1], "thousand", 1.0f);
Assert.assertTrue(sw[0][0].score == 1);
Assert.assertTrue(sw[0][1].score == 1);
wbsp.setMaxChanges(2); wbsp.setMaxChanges(2);
wbsp.setMinSuggestionFrequency(1); wbsp.setMinSuggestionFrequency(1);
@ -238,8 +209,8 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
ir, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
Assert.assertTrue(sw.length == 1); assertEquals(1, sw.length);
Assert.assertTrue(sw[0].length == 2); assertEquals(2, sw[0].length);
wbsp.setMaxChanges(2); wbsp.setMaxChanges(2);
wbsp.setMinSuggestionFrequency(2); wbsp.setMinSuggestionFrequency(2);
@ -250,8 +221,8 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
ir, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
Assert.assertTrue(sw.length == 1); assertEquals(1, sw.length);
Assert.assertTrue(sw[0].length == 2); assertEquals(2, sw[0].length);
wbsp.setMaxChanges(2); wbsp.setMaxChanges(2);
wbsp.setMinSuggestionFrequency(1); wbsp.setMinSuggestionFrequency(1);
@ -262,24 +233,20 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
ir, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
Assert.assertTrue(sw.length == 2); assertEquals(2, sw.length);
Assert.assertTrue(sw[0].length == 2); assertEquals(2, sw[0].length);
Assert.assertTrue(sw[0][0].string.equals("one")); assertSuggestionEquals(sw[0][0], "one", 1.0f);
Assert.assertTrue(sw[0][1].string.equals("thousand")); assertSuggestionEquals(sw[0][1], "thousand", 1.0f);
Assert.assertTrue(sw[0][0].score == 1); MatcherAssert.assertThat(sw[0][1].freq, greaterThan(1));
Assert.assertTrue(sw[0][1].score == 1); MatcherAssert.assertThat(sw[0][0].freq, greaterThan(sw[0][1].freq));
Assert.assertTrue(sw[0][1].freq > 1);
Assert.assertTrue(sw[0][0].freq > sw[0][1].freq); assertEquals(3, sw[1].length);
Assert.assertTrue(sw[1].length == 3); assertSuggestionEquals(sw[1][0], "one", 2.0f);
Assert.assertTrue(sw[1][0].string.equals("one")); assertSuggestionEquals(sw[1][1], "thou", 2.0f);
Assert.assertTrue(sw[1][1].string.equals("thou")); assertSuggestionEquals(sw[1][2], "sand", 2.0f);
Assert.assertTrue(sw[1][2].string.equals("sand")); MatcherAssert.assertThat(sw[1][0].freq, greaterThan(1));
Assert.assertTrue(sw[1][0].score == 2); assertEquals(1, sw[1][1].freq);
Assert.assertTrue(sw[1][1].score == 2); assertEquals(1, sw[1][2].freq);
Assert.assertTrue(sw[1][2].score == 2);
Assert.assertTrue(sw[1][0].freq > 1);
Assert.assertTrue(sw[1][1].freq == 1);
Assert.assertTrue(sw[1][2].freq == 1);
} }
{ {
Term term = new Term("numbers", "onethousandonehundredeleven"); Term term = new Term("numbers", "onethousandonehundredeleven");
@ -293,7 +260,7 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
ir, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
Assert.assertTrue(sw.length == 0); assertEquals(0, sw.length);
wbsp.setMaxChanges(4); wbsp.setMaxChanges(4);
sw = sw =
@ -303,8 +270,8 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
ir, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
Assert.assertTrue(sw.length == 1); assertEquals(1, sw.length);
Assert.assertTrue(sw[0].length == 5); assertEquals(5, sw[0].length);
wbsp.setMaxChanges(5); wbsp.setMaxChanges(5);
sw = sw =
@ -314,12 +281,12 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
ir, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
Assert.assertTrue(sw.length == 2); assertEquals(2, sw.length);
Assert.assertTrue(sw[0].length == 5); assertEquals(5, sw[0].length);
Assert.assertTrue(sw[0][1].string.equals("thousand")); assertEquals("thousand", sw[0][1].string);
Assert.assertTrue(sw[1].length == 6); assertEquals(6, sw[1].length);
Assert.assertTrue(sw[1][1].string.equals("thou")); assertEquals("thou", sw[1][1].string);
Assert.assertTrue(sw[1][2].string.equals("sand")); assertEquals("sand", sw[1][2].string);
} }
{ {
// make sure we can handle 2-char codepoints // make sure we can handle 2-char codepoints
@ -334,7 +301,7 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
ir, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
Assert.assertTrue(sw.length == 0); assertEquals(0, sw.length);
} }
ir.close(); ir.close();
@ -352,14 +319,15 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
for (int i = 0; i < numDocs; i++) { for (int i = 0; i < numDocs; i++) {
String orig = ""; String orig = "";
if (random().nextBoolean()) { if (random().nextBoolean()) {
while (!goodTestString(orig)) { while (badTestString(orig)) {
orig = TestUtil.randomSimpleString(random(), maxLength); orig = TestUtil.randomSimpleString(random(), maxLength);
} }
} else { } else {
while (!goodTestString(orig)) { while (badTestString(orig)) {
orig = TestUtil.randomUnicodeString(random(), maxLength); orig = TestUtil.randomUnicodeString(random(), maxLength);
} }
} }
originals.add(orig); originals.add(orig);
int totalLength = orig.codePointCount(0, orig.length()); int totalLength = orig.codePointCount(0, orig.length());
int breakAt = orig.offsetByCodePoints(0, TestUtil.nextInt(random(), 1, totalLength - 1)); int breakAt = orig.offsetByCodePoints(0, TestUtil.nextInt(random(), 1, totalLength - 1));
@ -397,12 +365,12 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
boolean failed = true; boolean failed = true;
for (SuggestWord[] sw1 : sw) { for (SuggestWord[] sw1 : sw) {
Assert.assertTrue(sw1.length == 2); assertEquals(2, sw1.length);
if (sw1[0].string.equals(left) && sw1[1].string.equals(right)) { if (sw1[0].string.equals(left) && sw1[1].string.equals(right)) {
failed = false; failed = false;
} }
} }
Assert.assertFalse( assertFalse(
"Failed getting break suggestions\n >Original: " "Failed getting break suggestions\n >Original: "
+ orig + orig
+ "\n >Left: " + "\n >Left: "
@ -417,12 +385,12 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
wbsp.suggestWordCombinations(terms, originals.size(), ir, SuggestMode.SUGGEST_ALWAYS); wbsp.suggestWordCombinations(terms, originals.size(), ir, SuggestMode.SUGGEST_ALWAYS);
boolean failed = true; boolean failed = true;
for (CombineSuggestion cs1 : cs) { for (CombineSuggestion cs1 : cs) {
Assert.assertTrue(cs1.originalTermIndexes.length == 2); assertEquals(2, cs1.originalTermIndexes.length);
if (cs1.suggestion.string.equals(left + right)) { if (cs1.suggestion.string.equals(left + right)) {
failed = false; failed = false;
} }
} }
Assert.assertFalse( assertFalse(
"Failed getting combine suggestions\n >Original: " "Failed getting combine suggestions\n >Original: "
+ orig + orig
+ "\n >Left: " + "\n >Left: "
@ -435,12 +403,21 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
IOUtils.close(ir, dir, analyzer); IOUtils.close(ir, dir, analyzer);
} }
private static void assertSuggestionEquals(
CombineSuggestion cs, String word, float score, int... termIndexes) {
assertEquals(word, cs.suggestion.string);
assertEquals(score, cs.suggestion.score, 0);
assertArrayEquals(termIndexes, cs.originalTermIndexes);
}
private static void assertSuggestionEquals(SuggestWord sw, String word, float score) {
assertEquals(word, sw.string);
assertEquals(score, sw.score, 0);
}
private static final Pattern mockTokenizerWhitespacePattern = Pattern.compile("[ \\t\\r\\n]"); private static final Pattern mockTokenizerWhitespacePattern = Pattern.compile("[ \\t\\r\\n]");
private boolean goodTestString(String s) { private boolean badTestString(String s) {
if (s.codePointCount(0, s.length()) < 2 || mockTokenizerWhitespacePattern.matcher(s).find()) { return s.codePointCount(0, s.length()) < 2 || mockTokenizerWhitespacePattern.matcher(s).find();
return false;
}
return true;
} }
} }