LUCENE-7815: Remove more PostingsHighlighter remnants

This commit is contained in:
David Smiley 2017-05-23 17:13:03 -04:00
parent 2319d69fd3
commit 85c3ae2040
4 changed files with 0 additions and 514 deletions

View File

@ -1,150 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.postingshighlight;
import java.text.BreakIterator;
import java.text.CharacterIterator;
/**
* A {@link BreakIterator} that breaks the text whenever a certain separator, provided as a constructor argument, is found.
*/
public final class CustomSeparatorBreakIterator extends BreakIterator {
private final char separator;
private CharacterIterator text;
private int current;
public CustomSeparatorBreakIterator(char separator) {
this.separator = separator;
}
@Override
public int current() {
return current;
}
@Override
public int first() {
text.setIndex(text.getBeginIndex());
return current = text.getIndex();
}
@Override
public int last() {
text.setIndex(text.getEndIndex());
return current = text.getIndex();
}
@Override
public int next() {
if (text.getIndex() == text.getEndIndex()) {
return DONE;
} else {
return advanceForward();
}
}
private int advanceForward() {
char c;
while ((c = text.next()) != CharacterIterator.DONE) {
if (c == separator) {
return current = text.getIndex() + 1;
}
}
assert text.getIndex() == text.getEndIndex();
return current = text.getIndex();
}
@Override
public int following(int pos) {
if (pos < text.getBeginIndex() || pos > text.getEndIndex()) {
throw new IllegalArgumentException("offset out of bounds");
} else if (pos == text.getEndIndex()) {
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
// https://bugs.openjdk.java.net/browse/JDK-8015110
text.setIndex(text.getEndIndex());
current = text.getIndex();
return DONE;
} else {
text.setIndex(pos);
current = text.getIndex();
return advanceForward();
}
}
@Override
public int previous() {
if (text.getIndex() == text.getBeginIndex()) {
return DONE;
} else {
return advanceBackward();
}
}
private int advanceBackward() {
char c;
while ((c = text.previous()) != CharacterIterator.DONE) {
if (c == separator) {
return current = text.getIndex() + 1;
}
}
assert text.getIndex() == text.getBeginIndex();
return current = text.getIndex();
}
@Override
public int preceding(int pos) {
if (pos < text.getBeginIndex() || pos > text.getEndIndex()) {
throw new IllegalArgumentException("offset out of bounds");
} else if (pos == text.getBeginIndex()) {
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
// https://bugs.openjdk.java.net/browse/JDK-8015110
text.setIndex(text.getBeginIndex());
current = text.getIndex();
return DONE;
} else {
text.setIndex(pos);
current = text.getIndex();
return advanceBackward();
}
}
@Override
public int next(int n) {
if (n < 0) {
for (int i = 0; i < -n; i++) {
previous();
}
} else {
for (int i = 0; i < n; i++) {
next();
}
}
return current();
}
@Override
public CharacterIterator getText() {
return text;
}
@Override
public void setText(CharacterIterator newText) {
text = newText;
current = text.getBeginIndex();
}
}

View File

@ -1,116 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.postingshighlight;
import java.text.BreakIterator;
import java.text.CharacterIterator;
/** Just produces one single fragment for the entire text */
public final class WholeBreakIterator extends BreakIterator {
private CharacterIterator text;
private int start;
private int end;
private int current;
@Override
public int current() {
return current;
}
@Override
public int first() {
return (current = start);
}
@Override
public int following(int pos) {
if (pos < start || pos > end) {
throw new IllegalArgumentException("offset out of bounds");
} else if (pos == end) {
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
// https://bugs.openjdk.java.net/browse/JDK-8015110
current = end;
return DONE;
} else {
return last();
}
}
@Override
public CharacterIterator getText() {
return text;
}
@Override
public int last() {
return (current = end);
}
@Override
public int next() {
if (current == end) {
return DONE;
} else {
return last();
}
}
@Override
public int next(int n) {
if (n < 0) {
for (int i = 0; i < -n; i++) {
previous();
}
} else {
for (int i = 0; i < n; i++) {
next();
}
}
return current();
}
@Override
public int preceding(int pos) {
if (pos < start || pos > end) {
throw new IllegalArgumentException("offset out of bounds");
} else if (pos == start) {
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
// https://bugs.openjdk.java.net/browse/JDK-8015110
current = start;
return DONE;
} else {
return first();
}
}
@Override
public int previous() {
if (current == start) {
return DONE;
} else {
return first();
}
}
@Override
public void setText(CharacterIterator newText) {
start = newText.getBeginIndex();
end = newText.getEndIndex();
text = newText;
current = start;
}
}

View File

@ -1,114 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.postingshighlight;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.util.LuceneTestCase;
import java.text.BreakIterator;
import java.util.Locale;
import static org.apache.lucene.search.postingshighlight.TestWholeBreakIterator.assertSameBreaks;
import static org.hamcrest.CoreMatchers.equalTo;
public class TestCustomSeparatorBreakIterator extends LuceneTestCase {
private static final Character[] SEPARATORS = new Character[]{' ', '\u0000', 8233};
public void testBreakOnCustomSeparator() throws Exception {
Character separator = randomSeparator();
BreakIterator bi = new CustomSeparatorBreakIterator(separator);
String source = "this" + separator + "is" + separator + "the" + separator + "first" + separator + "sentence";
bi.setText(source);
assertThat(bi.current(), equalTo(0));
assertThat(bi.first(), equalTo(0));
assertThat(source.substring(bi.current(), bi.next()), equalTo("this" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("is" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("the" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("first" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("sentence"));
assertThat(bi.next(), equalTo(BreakIterator.DONE));
assertThat(bi.last(), equalTo(source.length()));
int current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("sentence"));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("first" + separator));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("the" + separator));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("is" + separator));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("this" + separator));
assertThat(bi.previous(), equalTo(BreakIterator.DONE));
assertThat(bi.current(), equalTo(0));
assertThat(source.substring(0, bi.following(9)), equalTo("this" + separator + "is" + separator + "the" + separator));
assertThat(source.substring(0, bi.preceding(9)), equalTo("this" + separator + "is" + separator));
assertThat(bi.first(), equalTo(0));
assertThat(source.substring(0, bi.next(3)), equalTo("this" + separator + "is" + separator + "the" + separator));
}
public void testSingleSentences() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("a", expected, actual);
assertSameBreaks("ab", expected, actual);
assertSameBreaks("abc", expected, actual);
assertSameBreaks("", expected, actual);
}
public void testSliceEnd() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("a000", 0, 1, expected, actual);
assertSameBreaks("ab000", 0, 1, expected, actual);
assertSameBreaks("abc000", 0, 1, expected, actual);
assertSameBreaks("000", 0, 0, expected, actual);
}
public void testSliceStart() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("000a", 3, 1, expected, actual);
assertSameBreaks("000ab", 3, 2, expected, actual);
assertSameBreaks("000abc", 3, 3, expected, actual);
assertSameBreaks("000", 3, 0, expected, actual);
}
public void testSliceMiddle() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("000a000", 3, 1, expected, actual);
assertSameBreaks("000ab000", 3, 2, expected, actual);
assertSameBreaks("000abc000", 3, 3, expected, actual);
assertSameBreaks("000000", 3, 0, expected, actual);
}
/** the current position must be ignored, initial position is always first() */
public void testFirstPosition() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("000ab000", 3, 2, 4, expected, actual);
}
private static char randomSeparator() {
return RandomPicks.randomFrom(random(), SEPARATORS);
}
}

View File

@ -1,134 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.postingshighlight;
import org.apache.lucene.util.LuceneTestCase;
import java.text.BreakIterator;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.Locale;
public class TestWholeBreakIterator extends LuceneTestCase {
/** For single sentences, we know WholeBreakIterator should break the same as a sentence iterator */
public void testSingleSentences() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("a", expected, actual);
assertSameBreaks("ab", expected, actual);
assertSameBreaks("abc", expected, actual);
assertSameBreaks("", expected, actual);
}
public void testSliceEnd() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("a000", 0, 1, expected, actual);
assertSameBreaks("ab000", 0, 1, expected, actual);
assertSameBreaks("abc000", 0, 1, expected, actual);
assertSameBreaks("000", 0, 0, expected, actual);
}
public void testSliceStart() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("000a", 3, 1, expected, actual);
assertSameBreaks("000ab", 3, 2, expected, actual);
assertSameBreaks("000abc", 3, 3, expected, actual);
assertSameBreaks("000", 3, 0, expected, actual);
}
public void testSliceMiddle() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("000a000", 3, 1, expected, actual);
assertSameBreaks("000ab000", 3, 2, expected, actual);
assertSameBreaks("000abc000", 3, 3, expected, actual);
assertSameBreaks("000000", 3, 0, expected, actual);
}
/** the current position must be ignored, initial position is always first() */
public void testFirstPosition() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new WholeBreakIterator();
assertSameBreaks("000ab000", 3, 2, 4, expected, actual);
}
public static void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) {
assertSameBreaks(new StringCharacterIterator(text),
new StringCharacterIterator(text),
expected,
actual);
}
public static void assertSameBreaks(String text, int offset, int length, BreakIterator expected, BreakIterator actual) {
assertSameBreaks(text, offset, length, offset, expected, actual);
}
public static void assertSameBreaks(String text, int offset, int length, int current, BreakIterator expected, BreakIterator actual) {
assertSameBreaks(new StringCharacterIterator(text, offset, offset+length, current),
new StringCharacterIterator(text, offset, offset+length, current),
expected,
actual);
}
/** Asserts that two breakiterators break the text the same way */
public static void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual) {
expected.setText(one);
actual.setText(two);
assertEquals(expected.current(), actual.current());
// next()
int v = expected.current();
while (v != BreakIterator.DONE) {
assertEquals(v = expected.next(), actual.next());
assertEquals(expected.current(), actual.current());
}
// first()
assertEquals(expected.first(), actual.first());
assertEquals(expected.current(), actual.current());
// last()
assertEquals(expected.last(), actual.last());
assertEquals(expected.current(), actual.current());
// previous()
v = expected.current();
while (v != BreakIterator.DONE) {
assertEquals(v = expected.previous(), actual.previous());
assertEquals(expected.current(), actual.current());
}
// following()
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
expected.first();
actual.first();
assertEquals(expected.following(i), actual.following(i));
assertEquals(expected.current(), actual.current());
}
// preceding()
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
expected.last();
actual.last();
assertEquals(expected.preceding(i), actual.preceding(i));
assertEquals(expected.current(), actual.current());
}
}
}