mirror of https://github.com/apache/lucene.git
remove UOE and make this breakiterator well-behaved
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1459607 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a5fd71f0ee
commit
e3150fc20c
|
@ -20,11 +20,11 @@ package org.apache.lucene.search.postingshighlight;
|
|||
import java.text.BreakIterator;
|
||||
import java.text.CharacterIterator;
|
||||
|
||||
/** Just produces one single fragment for the entire
|
||||
* string. */
|
||||
/** Just produces one single fragment for the entire text */
|
||||
final class WholeBreakIterator extends BreakIterator {
|
||||
private CharacterIterator text;
|
||||
private int len;
|
||||
private int start;
|
||||
private int end;
|
||||
private int current;
|
||||
|
||||
@Override
|
||||
|
@ -34,17 +34,17 @@ final class WholeBreakIterator extends BreakIterator {
|
|||
|
||||
@Override
|
||||
public int first() {
|
||||
return (current = 0);
|
||||
return (current = start);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int following(int pos) {
|
||||
if (pos < 0 || pos > len) {
|
||||
if (pos < start || pos > end) {
|
||||
throw new IllegalArgumentException("offset out of bounds");
|
||||
} else if (pos == len) {
|
||||
} else if (pos == end) {
|
||||
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
|
||||
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
|
||||
current = len;
|
||||
current = end;
|
||||
return DONE;
|
||||
} else {
|
||||
return last();
|
||||
|
@ -58,12 +58,12 @@ final class WholeBreakIterator extends BreakIterator {
|
|||
|
||||
@Override
|
||||
public int last() {
|
||||
return (current = len);
|
||||
return (current = end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next() {
|
||||
if (current == len) {
|
||||
if (current == end) {
|
||||
return DONE;
|
||||
} else {
|
||||
return last();
|
||||
|
@ -86,12 +86,12 @@ final class WholeBreakIterator extends BreakIterator {
|
|||
|
||||
@Override
|
||||
public int preceding(int pos) {
|
||||
if (pos < 0 || pos > len) {
|
||||
if (pos < start || pos > end) {
|
||||
throw new IllegalArgumentException("offset out of bounds");
|
||||
} else if (pos == 0) {
|
||||
} else if (pos == start) {
|
||||
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
|
||||
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
|
||||
current = 0;
|
||||
current = start;
|
||||
return DONE;
|
||||
} else {
|
||||
return first();
|
||||
|
@ -100,7 +100,7 @@ final class WholeBreakIterator extends BreakIterator {
|
|||
|
||||
@Override
|
||||
public int previous() {
|
||||
if (current == 0) {
|
||||
if (current == start) {
|
||||
return DONE;
|
||||
} else {
|
||||
return first();
|
||||
|
@ -109,11 +109,9 @@ final class WholeBreakIterator extends BreakIterator {
|
|||
|
||||
@Override
|
||||
public void setText(CharacterIterator newText) {
|
||||
if (newText.getBeginIndex() != 0) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
len = newText.getEndIndex();
|
||||
start = newText.getBeginIndex();
|
||||
end = newText.getEndIndex();
|
||||
text = newText;
|
||||
current = 0;
|
||||
current = newText.getIndex();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,8 @@ package org.apache.lucene.search.postingshighlight;
|
|||
*/
|
||||
|
||||
import java.text.BreakIterator;
|
||||
import java.text.CharacterIterator;
|
||||
import java.text.StringCharacterIterator;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -33,12 +35,52 @@ public class TestWholeBreakIterator extends LuceneTestCase {
|
|||
assertSameBreaks("abc", expected, actual);
|
||||
assertSameBreaks("", expected, actual);
|
||||
}
|
||||
|
||||
public void testSliceEnd() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new WholeBreakIterator();
|
||||
assertSameBreaks("a000", 0, 1, expected, actual);
|
||||
assertSameBreaks("ab000", 0, 1, expected, actual);
|
||||
assertSameBreaks("abc000", 0, 1, expected, actual);
|
||||
assertSameBreaks("000", 0, 0, expected, actual);
|
||||
}
|
||||
|
||||
public void testSliceStart() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new WholeBreakIterator();
|
||||
assertSameBreaks("000a", 3, 1, expected, actual);
|
||||
assertSameBreaks("000ab", 3, 2, expected, actual);
|
||||
assertSameBreaks("000abc", 3, 3, expected, actual);
|
||||
assertSameBreaks("000", 3, 0, expected, actual);
|
||||
}
|
||||
|
||||
public void testSliceMiddle() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new WholeBreakIterator();
|
||||
assertSameBreaks("000a000", 3, 1, expected, actual);
|
||||
assertSameBreaks("000ab000", 3, 2, expected, actual);
|
||||
assertSameBreaks("000abc000", 3, 3, expected, actual);
|
||||
assertSameBreaks("000000", 3, 0, expected, actual);
|
||||
}
|
||||
|
||||
public void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) {
|
||||
assertSameBreaks(new StringCharacterIterator(text),
|
||||
new StringCharacterIterator(text),
|
||||
expected,
|
||||
actual);
|
||||
}
|
||||
|
||||
public void assertSameBreaks(String text, int offset, int length, BreakIterator expected, BreakIterator actual) {
|
||||
assertSameBreaks(new StringCharacterIterator(text, offset, offset+length, offset),
|
||||
new StringCharacterIterator(text, offset, offset+length, offset),
|
||||
expected,
|
||||
actual);
|
||||
}
|
||||
|
||||
/** Asserts that two breakiterators break the text the same way */
|
||||
// TODO: change this to use offsets with non-zero start/end
|
||||
public void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) {
|
||||
expected.setText(text);
|
||||
actual.setText(text);
|
||||
public void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual) {
|
||||
expected.setText(one);
|
||||
actual.setText(two);
|
||||
|
||||
assertEquals(expected.current(), actual.current());
|
||||
|
||||
|
@ -64,7 +106,7 @@ public class TestWholeBreakIterator extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// following()
|
||||
for (int i = 0; i <= text.length(); i++) {
|
||||
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
|
||||
expected.first();
|
||||
actual.first();
|
||||
assertEquals(expected.following(i), actual.following(i));
|
||||
|
@ -72,7 +114,7 @@ public class TestWholeBreakIterator extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// preceding()
|
||||
for (int i = 0; i <= text.length(); i++) {
|
||||
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
|
||||
expected.last();
|
||||
actual.last();
|
||||
assertEquals(expected.preceding(i), actual.preceding(i));
|
||||
|
|
Loading…
Reference in New Issue