mirror of https://github.com/apache/lucene.git
remove UOE and make this breakiterator well-behaved
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1459607 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a5fd71f0ee
commit
e3150fc20c
|
@ -20,11 +20,11 @@ package org.apache.lucene.search.postingshighlight;
|
||||||
import java.text.BreakIterator;
|
import java.text.BreakIterator;
|
||||||
import java.text.CharacterIterator;
|
import java.text.CharacterIterator;
|
||||||
|
|
||||||
/** Just produces one single fragment for the entire
|
/** Just produces one single fragment for the entire text */
|
||||||
* string. */
|
|
||||||
final class WholeBreakIterator extends BreakIterator {
|
final class WholeBreakIterator extends BreakIterator {
|
||||||
private CharacterIterator text;
|
private CharacterIterator text;
|
||||||
private int len;
|
private int start;
|
||||||
|
private int end;
|
||||||
private int current;
|
private int current;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -34,17 +34,17 @@ final class WholeBreakIterator extends BreakIterator {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int first() {
|
public int first() {
|
||||||
return (current = 0);
|
return (current = start);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int following(int pos) {
|
public int following(int pos) {
|
||||||
if (pos < 0 || pos > len) {
|
if (pos < start || pos > end) {
|
||||||
throw new IllegalArgumentException("offset out of bounds");
|
throw new IllegalArgumentException("offset out of bounds");
|
||||||
} else if (pos == len) {
|
} else if (pos == end) {
|
||||||
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
|
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
|
||||||
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
|
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
|
||||||
current = len;
|
current = end;
|
||||||
return DONE;
|
return DONE;
|
||||||
} else {
|
} else {
|
||||||
return last();
|
return last();
|
||||||
|
@ -58,12 +58,12 @@ final class WholeBreakIterator extends BreakIterator {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int last() {
|
public int last() {
|
||||||
return (current = len);
|
return (current = end);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int next() {
|
public int next() {
|
||||||
if (current == len) {
|
if (current == end) {
|
||||||
return DONE;
|
return DONE;
|
||||||
} else {
|
} else {
|
||||||
return last();
|
return last();
|
||||||
|
@ -86,12 +86,12 @@ final class WholeBreakIterator extends BreakIterator {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int preceding(int pos) {
|
public int preceding(int pos) {
|
||||||
if (pos < 0 || pos > len) {
|
if (pos < start || pos > end) {
|
||||||
throw new IllegalArgumentException("offset out of bounds");
|
throw new IllegalArgumentException("offset out of bounds");
|
||||||
} else if (pos == 0) {
|
} else if (pos == start) {
|
||||||
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
|
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
|
||||||
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
|
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
|
||||||
current = 0;
|
current = start;
|
||||||
return DONE;
|
return DONE;
|
||||||
} else {
|
} else {
|
||||||
return first();
|
return first();
|
||||||
|
@ -100,7 +100,7 @@ final class WholeBreakIterator extends BreakIterator {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int previous() {
|
public int previous() {
|
||||||
if (current == 0) {
|
if (current == start) {
|
||||||
return DONE;
|
return DONE;
|
||||||
} else {
|
} else {
|
||||||
return first();
|
return first();
|
||||||
|
@ -109,11 +109,9 @@ final class WholeBreakIterator extends BreakIterator {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setText(CharacterIterator newText) {
|
public void setText(CharacterIterator newText) {
|
||||||
if (newText.getBeginIndex() != 0) {
|
start = newText.getBeginIndex();
|
||||||
throw new UnsupportedOperationException();
|
end = newText.getEndIndex();
|
||||||
}
|
|
||||||
len = newText.getEndIndex();
|
|
||||||
text = newText;
|
text = newText;
|
||||||
current = 0;
|
current = newText.getIndex();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,8 @@ package org.apache.lucene.search.postingshighlight;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.text.BreakIterator;
|
import java.text.BreakIterator;
|
||||||
|
import java.text.CharacterIterator;
|
||||||
|
import java.text.StringCharacterIterator;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -34,11 +36,51 @@ public class TestWholeBreakIterator extends LuceneTestCase {
|
||||||
assertSameBreaks("", expected, actual);
|
assertSameBreaks("", expected, actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Asserts that two breakiterators break the text the same way */
|
public void testSliceEnd() throws Exception {
|
||||||
// TODO: change this to use offsets with non-zero start/end
|
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||||
|
BreakIterator actual = new WholeBreakIterator();
|
||||||
|
assertSameBreaks("a000", 0, 1, expected, actual);
|
||||||
|
assertSameBreaks("ab000", 0, 1, expected, actual);
|
||||||
|
assertSameBreaks("abc000", 0, 1, expected, actual);
|
||||||
|
assertSameBreaks("000", 0, 0, expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSliceStart() throws Exception {
|
||||||
|
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||||
|
BreakIterator actual = new WholeBreakIterator();
|
||||||
|
assertSameBreaks("000a", 3, 1, expected, actual);
|
||||||
|
assertSameBreaks("000ab", 3, 2, expected, actual);
|
||||||
|
assertSameBreaks("000abc", 3, 3, expected, actual);
|
||||||
|
assertSameBreaks("000", 3, 0, expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSliceMiddle() throws Exception {
|
||||||
|
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||||
|
BreakIterator actual = new WholeBreakIterator();
|
||||||
|
assertSameBreaks("000a000", 3, 1, expected, actual);
|
||||||
|
assertSameBreaks("000ab000", 3, 2, expected, actual);
|
||||||
|
assertSameBreaks("000abc000", 3, 3, expected, actual);
|
||||||
|
assertSameBreaks("000000", 3, 0, expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
public void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) {
|
public void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) {
|
||||||
expected.setText(text);
|
assertSameBreaks(new StringCharacterIterator(text),
|
||||||
actual.setText(text);
|
new StringCharacterIterator(text),
|
||||||
|
expected,
|
||||||
|
actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void assertSameBreaks(String text, int offset, int length, BreakIterator expected, BreakIterator actual) {
|
||||||
|
assertSameBreaks(new StringCharacterIterator(text, offset, offset+length, offset),
|
||||||
|
new StringCharacterIterator(text, offset, offset+length, offset),
|
||||||
|
expected,
|
||||||
|
actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Asserts that two breakiterators break the text the same way */
|
||||||
|
public void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual) {
|
||||||
|
expected.setText(one);
|
||||||
|
actual.setText(two);
|
||||||
|
|
||||||
assertEquals(expected.current(), actual.current());
|
assertEquals(expected.current(), actual.current());
|
||||||
|
|
||||||
|
@ -64,7 +106,7 @@ public class TestWholeBreakIterator extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// following()
|
// following()
|
||||||
for (int i = 0; i <= text.length(); i++) {
|
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
|
||||||
expected.first();
|
expected.first();
|
||||||
actual.first();
|
actual.first();
|
||||||
assertEquals(expected.following(i), actual.following(i));
|
assertEquals(expected.following(i), actual.following(i));
|
||||||
|
@ -72,7 +114,7 @@ public class TestWholeBreakIterator extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// preceding()
|
// preceding()
|
||||||
for (int i = 0; i <= text.length(); i++) {
|
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
|
||||||
expected.last();
|
expected.last();
|
||||||
actual.last();
|
actual.last();
|
||||||
assertEquals(expected.preceding(i), actual.preceding(i));
|
assertEquals(expected.preceding(i), actual.preceding(i));
|
||||||
|
|
Loading…
Reference in New Issue