From e3150fc20c2cfc5a0128f765693eeee949cdfc73 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 22 Mar 2013 01:56:37 +0000 Subject: [PATCH] remove UOE and make this breakiterator well-behaved git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1459607 13f79535-47bb-0310-9956-ffa450edef68 --- .../postingshighlight/WholeBreakIterator.java | 34 ++++++------ .../TestWholeBreakIterator.java | 54 ++++++++++++++++--- 2 files changed, 64 insertions(+), 24 deletions(-) diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java index 5250f6956fe..d88205a92c1 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java @@ -20,11 +20,11 @@ package org.apache.lucene.search.postingshighlight; import java.text.BreakIterator; import java.text.CharacterIterator; -/** Just produces one single fragment for the entire - * string. */ +/** Just produces one single fragment for the entire text */ final class WholeBreakIterator extends BreakIterator { private CharacterIterator text; - private int len; + private int start; + private int end; private int current; @Override @@ -34,17 +34,17 @@ final class WholeBreakIterator extends BreakIterator { @Override public int first() { - return (current = 0); + return (current = start); } @Override public int following(int pos) { - if (pos < 0 || pos > len) { + if (pos < start || pos > end) { throw new IllegalArgumentException("offset out of bounds"); - } else if (pos == len) { + } else if (pos == end) { // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something) // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909 - current = len; + current = end; return DONE; } else { return last(); @@ -58,12 +58,12 @@ final class WholeBreakIterator extends BreakIterator { @Override public int last() { - return (current = len); + return (current = end); } @Override public int next() { - if (current == len) { + if (current == end) { return DONE; } else { return last(); @@ -86,12 +86,12 @@ final class WholeBreakIterator extends BreakIterator { @Override public int preceding(int pos) { - if (pos < 0 || pos > len) { + if (pos < start || pos > end) { throw new IllegalArgumentException("offset out of bounds"); - } else if (pos == 0) { + } else if (pos == start) { // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something) // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909 - current = 0; + current = start; return DONE; } else { return first(); @@ -100,7 +100,7 @@ final class WholeBreakIterator extends BreakIterator { @Override public int previous() { - if (current == 0) { + if (current == start) { return DONE; } else { return first(); @@ -109,11 +109,9 @@ final class WholeBreakIterator extends BreakIterator { @Override public void setText(CharacterIterator newText) { - if (newText.getBeginIndex() != 0) { - throw new UnsupportedOperationException(); - } - len = newText.getEndIndex(); + start = newText.getBeginIndex(); + end = newText.getEndIndex(); text = newText; - current = 0; + current = newText.getIndex(); } } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestWholeBreakIterator.java b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestWholeBreakIterator.java index d5964a62935..e93c759a642 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestWholeBreakIterator.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestWholeBreakIterator.java @@ -18,6 +18,8 @@ package org.apache.lucene.search.postingshighlight; */ import java.text.BreakIterator; +import java.text.CharacterIterator; +import java.text.StringCharacterIterator; import java.util.Locale; import org.apache.lucene.util.LuceneTestCase; @@ -33,12 +35,52 @@ public class TestWholeBreakIterator extends LuceneTestCase { assertSameBreaks("abc", expected, actual); assertSameBreaks("", expected, actual); } + + public void testSliceEnd() throws Exception { + BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); + BreakIterator actual = new WholeBreakIterator(); + assertSameBreaks("a000", 0, 1, expected, actual); + assertSameBreaks("ab000", 0, 1, expected, actual); + assertSameBreaks("abc000", 0, 1, expected, actual); + assertSameBreaks("000", 0, 0, expected, actual); + } + + public void testSliceStart() throws Exception { + BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); + BreakIterator actual = new WholeBreakIterator(); + assertSameBreaks("000a", 3, 1, expected, actual); + assertSameBreaks("000ab", 3, 2, expected, actual); + assertSameBreaks("000abc", 3, 3, expected, actual); + assertSameBreaks("000", 3, 0, expected, actual); + } + + public void testSliceMiddle() throws Exception { + BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); + BreakIterator actual = new WholeBreakIterator(); + assertSameBreaks("000a000", 3, 1, expected, actual); + assertSameBreaks("000ab000", 3, 2, expected, actual); + assertSameBreaks("000abc000", 3, 3, expected, actual); + assertSameBreaks("000000", 3, 0, expected, actual); + } + + public void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) { + assertSameBreaks(new StringCharacterIterator(text), + new StringCharacterIterator(text), + expected, + actual); + } + + public void assertSameBreaks(String text, int offset, int length, BreakIterator expected, BreakIterator actual) { + assertSameBreaks(new StringCharacterIterator(text, offset, offset+length, offset), + new StringCharacterIterator(text, offset, offset+length, offset), + expected, + actual); + } /** Asserts that two breakiterators break the text the same way */ - // TODO: change this to use offsets with non-zero start/end - public void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) { - expected.setText(text); - actual.setText(text); + public void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual) { + expected.setText(one); + actual.setText(two); assertEquals(expected.current(), actual.current()); @@ -64,7 +106,7 @@ public class TestWholeBreakIterator extends LuceneTestCase { } // following() - for (int i = 0; i <= text.length(); i++) { + for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) { expected.first(); actual.first(); assertEquals(expected.following(i), actual.following(i)); @@ -72,7 +114,7 @@ public class TestWholeBreakIterator extends LuceneTestCase { } // preceding() - for (int i = 0; i <= text.length(); i++) { + for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) { expected.last(); actual.last(); assertEquals(expected.preceding(i), actual.preceding(i));