Adjust middle of sentence calculation and add test verifying correct behavior (#13170)

This commit is contained in:
Tim Grein 2024-03-11 13:21:22 +01:00 committed by GitHub
parent 3f4413567d
commit 44fa35b65f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 12 additions and 1 deletions

View File

@ -148,7 +148,7 @@ public final class OpenNLPSentenceBreakIterator extends BreakIterator {
currentSentence = 0;
return DONE;
} else {
currentSentence = sentenceStarts.length / 2; // start search from the middle
currentSentence = (sentenceStarts.length - 1) / 2; // start search from the middle
moveToSentenceAt(pos, 0, sentenceStarts.length - 1);
if (0 == currentSentence) {
text.setIndex(text.getBeginIndex());

View File

@ -203,6 +203,17 @@ public class TestOpenNLPSentenceBreakIterator extends LuceneTestCase {
test0Sentences(bi);
}
public void testPrecedingWithTwoSentences() throws IOException {
NLPSentenceDetectorOp sentenceDetectorOp =
OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
bi.setText("This is sentence one. This is sentence two.");
// set pos to somewhere in the second sentence
int precedingSentence = bi.preceding(25);
assertEquals(0, precedingSentence);
}
private void test0Sentences(BreakIterator bi) {
assertEquals(0, bi.current());
assertEquals(0, bi.first());