mirror of https://github.com/apache/lucene.git
Simplify asserts in TestWordBreakSpellChecker (#13007)
This commit is contained in:
parent
89a02fa4e3
commit
f67b1b3d1f
|
@ -38,7 +38,6 @@ import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
import org.apache.lucene.tests.util.TestUtil;
|
import org.apache.lucene.tests.util.TestUtil;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.hamcrest.MatcherAssert;
|
import org.hamcrest.MatcherAssert;
|
||||||
import org.junit.Assert;
|
|
||||||
|
|
||||||
public class TestWordBreakSpellChecker extends LuceneTestCase {
|
public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
private Directory dir;
|
private Directory dir;
|
||||||
|
@ -134,30 +133,16 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
wbsp.setMinSuggestionFrequency(1);
|
wbsp.setMinSuggestionFrequency(1);
|
||||||
CombineSuggestion[] cs =
|
CombineSuggestion[] cs =
|
||||||
wbsp.suggestWordCombinations(terms, 10, ir, SuggestMode.SUGGEST_ALWAYS);
|
wbsp.suggestWordCombinations(terms, 10, ir, SuggestMode.SUGGEST_ALWAYS);
|
||||||
Assert.assertTrue(cs.length == 5);
|
assertEquals(5, cs.length);
|
||||||
|
|
||||||
Assert.assertTrue(cs[0].originalTermIndexes.length == 2);
|
assertSuggestionEquals(cs[0], "hundred", 1.0f, 1, 2);
|
||||||
Assert.assertTrue(cs[0].originalTermIndexes[0] == 1);
|
assertSuggestionEquals(cs[1], "eighty", 1.0f, 3, 4);
|
||||||
Assert.assertTrue(cs[0].originalTermIndexes[1] == 2);
|
assertSuggestionEquals(cs[2], "yeight", 1.0f, 4, 5);
|
||||||
Assert.assertTrue(cs[0].suggestion.string.equals("hundred"));
|
|
||||||
Assert.assertTrue(cs[0].suggestion.score == 1);
|
|
||||||
|
|
||||||
Assert.assertTrue(cs[1].originalTermIndexes.length == 2);
|
|
||||||
Assert.assertTrue(cs[1].originalTermIndexes[0] == 3);
|
|
||||||
Assert.assertTrue(cs[1].originalTermIndexes[1] == 4);
|
|
||||||
Assert.assertTrue(cs[1].suggestion.string.equals("eighty"));
|
|
||||||
Assert.assertTrue(cs[1].suggestion.score == 1);
|
|
||||||
|
|
||||||
Assert.assertTrue(cs[2].originalTermIndexes.length == 2);
|
|
||||||
Assert.assertTrue(cs[2].originalTermIndexes[0] == 4);
|
|
||||||
Assert.assertTrue(cs[2].originalTermIndexes[1] == 5);
|
|
||||||
Assert.assertTrue(cs[2].suggestion.string.equals("yeight"));
|
|
||||||
Assert.assertTrue(cs[2].suggestion.score == 1);
|
|
||||||
|
|
||||||
for (int i = 3; i < 5; i++) {
|
for (int i = 3; i < 5; i++) {
|
||||||
Assert.assertTrue(cs[i].originalTermIndexes.length == 3);
|
assertEquals(3, cs[i].originalTermIndexes.length);
|
||||||
Assert.assertTrue(cs[i].suggestion.score == 2);
|
assertEquals(2, cs[i].suggestion.score, 0);
|
||||||
Assert.assertTrue(
|
assertTrue(
|
||||||
(cs[i].originalTermIndexes[0] == 1
|
(cs[i].originalTermIndexes[0] == 1
|
||||||
&& cs[i].originalTermIndexes[1] == 2
|
&& cs[i].originalTermIndexes[1] == 2
|
||||||
&& cs[i].originalTermIndexes[2] == 3
|
&& cs[i].originalTermIndexes[2] == 3
|
||||||
|
@ -169,20 +154,10 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
cs = wbsp.suggestWordCombinations(terms, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
cs = wbsp.suggestWordCombinations(terms, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
|
||||||
Assert.assertTrue(cs.length == 2);
|
assertEquals(2, cs.length);
|
||||||
Assert.assertTrue(cs[0].originalTermIndexes.length == 2);
|
|
||||||
Assert.assertTrue(cs[0].suggestion.score == 1);
|
|
||||||
Assert.assertTrue(cs[0].originalTermIndexes[0] == 1);
|
|
||||||
Assert.assertTrue(cs[0].originalTermIndexes[1] == 2);
|
|
||||||
Assert.assertTrue(cs[0].suggestion.string.equals("hundred"));
|
|
||||||
Assert.assertTrue(cs[0].suggestion.score == 1);
|
|
||||||
|
|
||||||
Assert.assertTrue(cs[1].originalTermIndexes.length == 3);
|
assertSuggestionEquals(cs[0], "hundred", 1.0f, 1, 2);
|
||||||
Assert.assertTrue(cs[1].suggestion.score == 2);
|
assertSuggestionEquals(cs[1], "hundredeight", 2.0f, 1, 2, 3);
|
||||||
Assert.assertTrue(cs[1].originalTermIndexes[0] == 1);
|
|
||||||
Assert.assertTrue(cs[1].originalTermIndexes[1] == 2);
|
|
||||||
Assert.assertTrue(cs[1].originalTermIndexes[2] == 3);
|
|
||||||
Assert.assertTrue(cs[1].suggestion.string.equals("hundredeight"));
|
|
||||||
}
|
}
|
||||||
ir.close();
|
ir.close();
|
||||||
}
|
}
|
||||||
|
@ -203,12 +178,10 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
ir,
|
ir,
|
||||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
||||||
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
||||||
Assert.assertTrue(sw.length == 1);
|
assertEquals(1, sw.length);
|
||||||
Assert.assertTrue(sw[0].length == 2);
|
assertEquals(2, sw[0].length);
|
||||||
Assert.assertTrue(sw[0][0].string.equals("ninety"));
|
assertSuggestionEquals(sw[0][0], "ninety", 1.0f);
|
||||||
Assert.assertTrue(sw[0][1].string.equals("nine"));
|
assertSuggestionEquals(sw[0][1], "nine", 1.0f);
|
||||||
Assert.assertTrue(sw[0][0].score == 1);
|
|
||||||
Assert.assertTrue(sw[0][1].score == 1);
|
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
Term term = new Term("numbers", "onethousand");
|
Term term = new Term("numbers", "onethousand");
|
||||||
|
@ -222,12 +195,10 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
ir,
|
ir,
|
||||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
||||||
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
||||||
Assert.assertTrue(sw.length == 1);
|
assertEquals(1, sw.length);
|
||||||
Assert.assertTrue(sw[0].length == 2);
|
assertEquals(2, sw[0].length);
|
||||||
Assert.assertTrue(sw[0][0].string.equals("one"));
|
assertSuggestionEquals(sw[0][0], "one", 1.0f);
|
||||||
Assert.assertTrue(sw[0][1].string.equals("thousand"));
|
assertSuggestionEquals(sw[0][1], "thousand", 1.0f);
|
||||||
Assert.assertTrue(sw[0][0].score == 1);
|
|
||||||
Assert.assertTrue(sw[0][1].score == 1);
|
|
||||||
|
|
||||||
wbsp.setMaxChanges(2);
|
wbsp.setMaxChanges(2);
|
||||||
wbsp.setMinSuggestionFrequency(1);
|
wbsp.setMinSuggestionFrequency(1);
|
||||||
|
@ -238,8 +209,8 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
ir,
|
ir,
|
||||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
||||||
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
||||||
Assert.assertTrue(sw.length == 1);
|
assertEquals(1, sw.length);
|
||||||
Assert.assertTrue(sw[0].length == 2);
|
assertEquals(2, sw[0].length);
|
||||||
|
|
||||||
wbsp.setMaxChanges(2);
|
wbsp.setMaxChanges(2);
|
||||||
wbsp.setMinSuggestionFrequency(2);
|
wbsp.setMinSuggestionFrequency(2);
|
||||||
|
@ -250,8 +221,8 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
ir,
|
ir,
|
||||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
||||||
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
||||||
Assert.assertTrue(sw.length == 1);
|
assertEquals(1, sw.length);
|
||||||
Assert.assertTrue(sw[0].length == 2);
|
assertEquals(2, sw[0].length);
|
||||||
|
|
||||||
wbsp.setMaxChanges(2);
|
wbsp.setMaxChanges(2);
|
||||||
wbsp.setMinSuggestionFrequency(1);
|
wbsp.setMinSuggestionFrequency(1);
|
||||||
|
@ -262,24 +233,20 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
ir,
|
ir,
|
||||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
||||||
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
||||||
Assert.assertTrue(sw.length == 2);
|
assertEquals(2, sw.length);
|
||||||
Assert.assertTrue(sw[0].length == 2);
|
assertEquals(2, sw[0].length);
|
||||||
Assert.assertTrue(sw[0][0].string.equals("one"));
|
assertSuggestionEquals(sw[0][0], "one", 1.0f);
|
||||||
Assert.assertTrue(sw[0][1].string.equals("thousand"));
|
assertSuggestionEquals(sw[0][1], "thousand", 1.0f);
|
||||||
Assert.assertTrue(sw[0][0].score == 1);
|
MatcherAssert.assertThat(sw[0][1].freq, greaterThan(1));
|
||||||
Assert.assertTrue(sw[0][1].score == 1);
|
MatcherAssert.assertThat(sw[0][0].freq, greaterThan(sw[0][1].freq));
|
||||||
Assert.assertTrue(sw[0][1].freq > 1);
|
|
||||||
Assert.assertTrue(sw[0][0].freq > sw[0][1].freq);
|
assertEquals(3, sw[1].length);
|
||||||
Assert.assertTrue(sw[1].length == 3);
|
assertSuggestionEquals(sw[1][0], "one", 2.0f);
|
||||||
Assert.assertTrue(sw[1][0].string.equals("one"));
|
assertSuggestionEquals(sw[1][1], "thou", 2.0f);
|
||||||
Assert.assertTrue(sw[1][1].string.equals("thou"));
|
assertSuggestionEquals(sw[1][2], "sand", 2.0f);
|
||||||
Assert.assertTrue(sw[1][2].string.equals("sand"));
|
MatcherAssert.assertThat(sw[1][0].freq, greaterThan(1));
|
||||||
Assert.assertTrue(sw[1][0].score == 2);
|
assertEquals(1, sw[1][1].freq);
|
||||||
Assert.assertTrue(sw[1][1].score == 2);
|
assertEquals(1, sw[1][2].freq);
|
||||||
Assert.assertTrue(sw[1][2].score == 2);
|
|
||||||
Assert.assertTrue(sw[1][0].freq > 1);
|
|
||||||
Assert.assertTrue(sw[1][1].freq == 1);
|
|
||||||
Assert.assertTrue(sw[1][2].freq == 1);
|
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
Term term = new Term("numbers", "onethousandonehundredeleven");
|
Term term = new Term("numbers", "onethousandonehundredeleven");
|
||||||
|
@ -293,7 +260,7 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
ir,
|
ir,
|
||||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
||||||
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
||||||
Assert.assertTrue(sw.length == 0);
|
assertEquals(0, sw.length);
|
||||||
|
|
||||||
wbsp.setMaxChanges(4);
|
wbsp.setMaxChanges(4);
|
||||||
sw =
|
sw =
|
||||||
|
@ -303,8 +270,8 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
ir,
|
ir,
|
||||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
||||||
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
||||||
Assert.assertTrue(sw.length == 1);
|
assertEquals(1, sw.length);
|
||||||
Assert.assertTrue(sw[0].length == 5);
|
assertEquals(5, sw[0].length);
|
||||||
|
|
||||||
wbsp.setMaxChanges(5);
|
wbsp.setMaxChanges(5);
|
||||||
sw =
|
sw =
|
||||||
|
@ -314,12 +281,12 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
ir,
|
ir,
|
||||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
||||||
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
||||||
Assert.assertTrue(sw.length == 2);
|
assertEquals(2, sw.length);
|
||||||
Assert.assertTrue(sw[0].length == 5);
|
assertEquals(5, sw[0].length);
|
||||||
Assert.assertTrue(sw[0][1].string.equals("thousand"));
|
assertEquals("thousand", sw[0][1].string);
|
||||||
Assert.assertTrue(sw[1].length == 6);
|
assertEquals(6, sw[1].length);
|
||||||
Assert.assertTrue(sw[1][1].string.equals("thou"));
|
assertEquals("thou", sw[1][1].string);
|
||||||
Assert.assertTrue(sw[1][2].string.equals("sand"));
|
assertEquals("sand", sw[1][2].string);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
// make sure we can handle 2-char codepoints
|
// make sure we can handle 2-char codepoints
|
||||||
|
@ -334,7 +301,7 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
ir,
|
ir,
|
||||||
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
|
||||||
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
||||||
Assert.assertTrue(sw.length == 0);
|
assertEquals(0, sw.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
ir.close();
|
ir.close();
|
||||||
|
@ -352,14 +319,15 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
for (int i = 0; i < numDocs; i++) {
|
for (int i = 0; i < numDocs; i++) {
|
||||||
String orig = "";
|
String orig = "";
|
||||||
if (random().nextBoolean()) {
|
if (random().nextBoolean()) {
|
||||||
while (!goodTestString(orig)) {
|
while (badTestString(orig)) {
|
||||||
orig = TestUtil.randomSimpleString(random(), maxLength);
|
orig = TestUtil.randomSimpleString(random(), maxLength);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
while (!goodTestString(orig)) {
|
while (badTestString(orig)) {
|
||||||
orig = TestUtil.randomUnicodeString(random(), maxLength);
|
orig = TestUtil.randomUnicodeString(random(), maxLength);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
originals.add(orig);
|
originals.add(orig);
|
||||||
int totalLength = orig.codePointCount(0, orig.length());
|
int totalLength = orig.codePointCount(0, orig.length());
|
||||||
int breakAt = orig.offsetByCodePoints(0, TestUtil.nextInt(random(), 1, totalLength - 1));
|
int breakAt = orig.offsetByCodePoints(0, TestUtil.nextInt(random(), 1, totalLength - 1));
|
||||||
|
@ -397,12 +365,12 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
|
||||||
boolean failed = true;
|
boolean failed = true;
|
||||||
for (SuggestWord[] sw1 : sw) {
|
for (SuggestWord[] sw1 : sw) {
|
||||||
Assert.assertTrue(sw1.length == 2);
|
assertEquals(2, sw1.length);
|
||||||
if (sw1[0].string.equals(left) && sw1[1].string.equals(right)) {
|
if (sw1[0].string.equals(left) && sw1[1].string.equals(right)) {
|
||||||
failed = false;
|
failed = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Assert.assertFalse(
|
assertFalse(
|
||||||
"Failed getting break suggestions\n >Original: "
|
"Failed getting break suggestions\n >Original: "
|
||||||
+ orig
|
+ orig
|
||||||
+ "\n >Left: "
|
+ "\n >Left: "
|
||||||
|
@ -417,12 +385,12 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
wbsp.suggestWordCombinations(terms, originals.size(), ir, SuggestMode.SUGGEST_ALWAYS);
|
wbsp.suggestWordCombinations(terms, originals.size(), ir, SuggestMode.SUGGEST_ALWAYS);
|
||||||
boolean failed = true;
|
boolean failed = true;
|
||||||
for (CombineSuggestion cs1 : cs) {
|
for (CombineSuggestion cs1 : cs) {
|
||||||
Assert.assertTrue(cs1.originalTermIndexes.length == 2);
|
assertEquals(2, cs1.originalTermIndexes.length);
|
||||||
if (cs1.suggestion.string.equals(left + right)) {
|
if (cs1.suggestion.string.equals(left + right)) {
|
||||||
failed = false;
|
failed = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Assert.assertFalse(
|
assertFalse(
|
||||||
"Failed getting combine suggestions\n >Original: "
|
"Failed getting combine suggestions\n >Original: "
|
||||||
+ orig
|
+ orig
|
||||||
+ "\n >Left: "
|
+ "\n >Left: "
|
||||||
|
@ -435,12 +403,21 @@ public class TestWordBreakSpellChecker extends LuceneTestCase {
|
||||||
IOUtils.close(ir, dir, analyzer);
|
IOUtils.close(ir, dir, analyzer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void assertSuggestionEquals(
|
||||||
|
CombineSuggestion cs, String word, float score, int... termIndexes) {
|
||||||
|
assertEquals(word, cs.suggestion.string);
|
||||||
|
assertEquals(score, cs.suggestion.score, 0);
|
||||||
|
assertArrayEquals(termIndexes, cs.originalTermIndexes);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertSuggestionEquals(SuggestWord sw, String word, float score) {
|
||||||
|
assertEquals(word, sw.string);
|
||||||
|
assertEquals(score, sw.score, 0);
|
||||||
|
}
|
||||||
|
|
||||||
private static final Pattern mockTokenizerWhitespacePattern = Pattern.compile("[ \\t\\r\\n]");
|
private static final Pattern mockTokenizerWhitespacePattern = Pattern.compile("[ \\t\\r\\n]");
|
||||||
|
|
||||||
private boolean goodTestString(String s) {
|
private boolean badTestString(String s) {
|
||||||
if (s.codePointCount(0, s.length()) < 2 || mockTokenizerWhitespacePattern.matcher(s).find()) {
|
return s.codePointCount(0, s.length()) < 2 || mockTokenizerWhitespacePattern.matcher(s).find();
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue