remove UOE and make this breakiterator well-behaved

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1459607 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-03-22 01:56:37 +00:00
parent a5fd71f0ee
commit e3150fc20c
2 changed files with 64 additions and 24 deletions

View File

@ -20,11 +20,11 @@ package org.apache.lucene.search.postingshighlight;
import java.text.BreakIterator; import java.text.BreakIterator;
import java.text.CharacterIterator; import java.text.CharacterIterator;
/** Just produces one single fragment for the entire /** Just produces one single fragment for the entire text */
* string. */
final class WholeBreakIterator extends BreakIterator { final class WholeBreakIterator extends BreakIterator {
private CharacterIterator text; private CharacterIterator text;
private int len; private int start;
private int end;
private int current; private int current;
@Override @Override
@ -34,17 +34,17 @@ final class WholeBreakIterator extends BreakIterator {
@Override @Override
public int first() { public int first() {
return (current = 0); return (current = start);
} }
@Override @Override
public int following(int pos) { public int following(int pos) {
if (pos < 0 || pos > len) { if (pos < start || pos > end) {
throw new IllegalArgumentException("offset out of bounds"); throw new IllegalArgumentException("offset out of bounds");
} else if (pos == len) { } else if (pos == end) {
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something) // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
current = len; current = end;
return DONE; return DONE;
} else { } else {
return last(); return last();
@ -58,12 +58,12 @@ final class WholeBreakIterator extends BreakIterator {
@Override @Override
public int last() { public int last() {
return (current = len); return (current = end);
} }
@Override @Override
public int next() { public int next() {
if (current == len) { if (current == end) {
return DONE; return DONE;
} else { } else {
return last(); return last();
@ -86,12 +86,12 @@ final class WholeBreakIterator extends BreakIterator {
@Override @Override
public int preceding(int pos) { public int preceding(int pos) {
if (pos < 0 || pos > len) { if (pos < start || pos > end) {
throw new IllegalArgumentException("offset out of bounds"); throw new IllegalArgumentException("offset out of bounds");
} else if (pos == 0) { } else if (pos == start) {
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something) // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
current = 0; current = start;
return DONE; return DONE;
} else { } else {
return first(); return first();
@ -100,7 +100,7 @@ final class WholeBreakIterator extends BreakIterator {
@Override @Override
public int previous() { public int previous() {
if (current == 0) { if (current == start) {
return DONE; return DONE;
} else { } else {
return first(); return first();
@ -109,11 +109,9 @@ final class WholeBreakIterator extends BreakIterator {
@Override @Override
public void setText(CharacterIterator newText) { public void setText(CharacterIterator newText) {
if (newText.getBeginIndex() != 0) { start = newText.getBeginIndex();
throw new UnsupportedOperationException(); end = newText.getEndIndex();
}
len = newText.getEndIndex();
text = newText; text = newText;
current = 0; current = newText.getIndex();
} }
} }

View File

@ -18,6 +18,8 @@ package org.apache.lucene.search.postingshighlight;
*/ */
import java.text.BreakIterator; import java.text.BreakIterator;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.Locale; import java.util.Locale;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -34,11 +36,51 @@ public class TestWholeBreakIterator extends LuceneTestCase {
assertSameBreaks("", expected, actual); assertSameBreaks("", expected, actual);
} }
/** Asserts that two breakiterators break the text the same way */ public void testSliceEnd() throws Exception {
// TODO: change this to use offsets with non-zero start/end BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("a000", 0, 1, expected, actual);
assertSameBreaks("ab000", 0, 1, expected, actual);
assertSameBreaks("abc000", 0, 1, expected, actual);
assertSameBreaks("000", 0, 0, expected, actual);
}
public void testSliceStart() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("000a", 3, 1, expected, actual);
assertSameBreaks("000ab", 3, 2, expected, actual);
assertSameBreaks("000abc", 3, 3, expected, actual);
assertSameBreaks("000", 3, 0, expected, actual);
}
public void testSliceMiddle() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("000a000", 3, 1, expected, actual);
assertSameBreaks("000ab000", 3, 2, expected, actual);
assertSameBreaks("000abc000", 3, 3, expected, actual);
assertSameBreaks("000000", 3, 0, expected, actual);
}
public void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) { public void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) {
expected.setText(text); assertSameBreaks(new StringCharacterIterator(text),
actual.setText(text); new StringCharacterIterator(text),
expected,
actual);
}
public void assertSameBreaks(String text, int offset, int length, BreakIterator expected, BreakIterator actual) {
assertSameBreaks(new StringCharacterIterator(text, offset, offset+length, offset),
new StringCharacterIterator(text, offset, offset+length, offset),
expected,
actual);
}
/** Asserts that two breakiterators break the text the same way */
public void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual) {
expected.setText(one);
actual.setText(two);
assertEquals(expected.current(), actual.current()); assertEquals(expected.current(), actual.current());
@ -64,7 +106,7 @@ public class TestWholeBreakIterator extends LuceneTestCase {
} }
// following() // following()
for (int i = 0; i <= text.length(); i++) { for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
expected.first(); expected.first();
actual.first(); actual.first();
assertEquals(expected.following(i), actual.following(i)); assertEquals(expected.following(i), actual.following(i));
@ -72,7 +114,7 @@ public class TestWholeBreakIterator extends LuceneTestCase {
} }
// preceding() // preceding()
for (int i = 0; i <= text.length(); i++) { for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
expected.last(); expected.last();
actual.last(); actual.last();
assertEquals(expected.preceding(i), actual.preceding(i)); assertEquals(expected.preceding(i), actual.preceding(i));