remove UOE and make this breakiterator well-behaved

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1459607 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-03-22 01:56:37 +00:00
parent a5fd71f0ee
commit e3150fc20c
2 changed files with 64 additions and 24 deletions

View File

@ -20,11 +20,11 @@ package org.apache.lucene.search.postingshighlight;
import java.text.BreakIterator;
import java.text.CharacterIterator;
/** Just produces one single fragment for the entire
* string. */
/** Just produces one single fragment for the entire text */
final class WholeBreakIterator extends BreakIterator {
private CharacterIterator text;
private int len;
private int start;
private int end;
private int current;
@Override
@ -34,17 +34,17 @@ final class WholeBreakIterator extends BreakIterator {
@Override
public int first() {
return (current = 0);
return (current = start);
}
@Override
public int following(int pos) {
if (pos < 0 || pos > len) {
if (pos < start || pos > end) {
throw new IllegalArgumentException("offset out of bounds");
} else if (pos == len) {
} else if (pos == end) {
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
current = len;
current = end;
return DONE;
} else {
return last();
@ -58,12 +58,12 @@ final class WholeBreakIterator extends BreakIterator {
@Override
public int last() {
return (current = len);
return (current = end);
}
@Override
public int next() {
if (current == len) {
if (current == end) {
return DONE;
} else {
return last();
@ -86,12 +86,12 @@ final class WholeBreakIterator extends BreakIterator {
@Override
public int preceding(int pos) {
if (pos < 0 || pos > len) {
if (pos < start || pos > end) {
throw new IllegalArgumentException("offset out of bounds");
} else if (pos == 0) {
} else if (pos == start) {
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
current = 0;
current = start;
return DONE;
} else {
return first();
@ -100,7 +100,7 @@ final class WholeBreakIterator extends BreakIterator {
@Override
public int previous() {
if (current == 0) {
if (current == start) {
return DONE;
} else {
return first();
@ -109,11 +109,9 @@ final class WholeBreakIterator extends BreakIterator {
@Override
public void setText(CharacterIterator newText) {
if (newText.getBeginIndex() != 0) {
throw new UnsupportedOperationException();
}
len = newText.getEndIndex();
start = newText.getBeginIndex();
end = newText.getEndIndex();
text = newText;
current = 0;
current = newText.getIndex();
}
}

View File

@ -18,6 +18,8 @@ package org.apache.lucene.search.postingshighlight;
*/
import java.text.BreakIterator;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.Locale;
import org.apache.lucene.util.LuceneTestCase;
@ -34,11 +36,51 @@ public class TestWholeBreakIterator extends LuceneTestCase {
assertSameBreaks("", expected, actual);
}
/** Asserts that two breakiterators break the text the same way */
// TODO: change this to use offsets with non-zero start/end
public void testSliceEnd() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("a000", 0, 1, expected, actual);
assertSameBreaks("ab000", 0, 1, expected, actual);
assertSameBreaks("abc000", 0, 1, expected, actual);
assertSameBreaks("000", 0, 0, expected, actual);
}
public void testSliceStart() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("000a", 3, 1, expected, actual);
assertSameBreaks("000ab", 3, 2, expected, actual);
assertSameBreaks("000abc", 3, 3, expected, actual);
assertSameBreaks("000", 3, 0, expected, actual);
}
public void testSliceMiddle() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("000a000", 3, 1, expected, actual);
assertSameBreaks("000ab000", 3, 2, expected, actual);
assertSameBreaks("000abc000", 3, 3, expected, actual);
assertSameBreaks("000000", 3, 0, expected, actual);
}
public void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) {
expected.setText(text);
actual.setText(text);
assertSameBreaks(new StringCharacterIterator(text),
new StringCharacterIterator(text),
expected,
actual);
}
public void assertSameBreaks(String text, int offset, int length, BreakIterator expected, BreakIterator actual) {
assertSameBreaks(new StringCharacterIterator(text, offset, offset+length, offset),
new StringCharacterIterator(text, offset, offset+length, offset),
expected,
actual);
}
/** Asserts that two breakiterators break the text the same way */
public void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual) {
expected.setText(one);
actual.setText(two);
assertEquals(expected.current(), actual.current());
@ -64,7 +106,7 @@ public class TestWholeBreakIterator extends LuceneTestCase {
}
// following()
for (int i = 0; i <= text.length(); i++) {
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
expected.first();
actual.first();
assertEquals(expected.following(i), actual.following(i));
@ -72,7 +114,7 @@ public class TestWholeBreakIterator extends LuceneTestCase {
}
// preceding()
for (int i = 0; i <= text.length(); i++) {
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
expected.last();
actual.last();
assertEquals(expected.preceding(i), actual.preceding(i));