mirror of https://github.com/apache/lucene.git
LUCENE-7815: Remove more PostingsHighlighter remnants
This commit is contained in:
parent
2319d69fd3
commit
85c3ae2040
|
@ -1,150 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search.postingshighlight;
|
||||
|
||||
import java.text.BreakIterator;
|
||||
import java.text.CharacterIterator;
|
||||
|
||||
/**
|
||||
* A {@link BreakIterator} that breaks the text whenever a certain separator, provided as a constructor argument, is found.
|
||||
*/
|
||||
public final class CustomSeparatorBreakIterator extends BreakIterator {
|
||||
|
||||
private final char separator;
|
||||
private CharacterIterator text;
|
||||
private int current;
|
||||
|
||||
public CustomSeparatorBreakIterator(char separator) {
|
||||
this.separator = separator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int current() {
|
||||
return current;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int first() {
|
||||
text.setIndex(text.getBeginIndex());
|
||||
return current = text.getIndex();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int last() {
|
||||
text.setIndex(text.getEndIndex());
|
||||
return current = text.getIndex();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next() {
|
||||
if (text.getIndex() == text.getEndIndex()) {
|
||||
return DONE;
|
||||
} else {
|
||||
return advanceForward();
|
||||
}
|
||||
}
|
||||
|
||||
private int advanceForward() {
|
||||
char c;
|
||||
while ((c = text.next()) != CharacterIterator.DONE) {
|
||||
if (c == separator) {
|
||||
return current = text.getIndex() + 1;
|
||||
}
|
||||
}
|
||||
assert text.getIndex() == text.getEndIndex();
|
||||
return current = text.getIndex();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int following(int pos) {
|
||||
if (pos < text.getBeginIndex() || pos > text.getEndIndex()) {
|
||||
throw new IllegalArgumentException("offset out of bounds");
|
||||
} else if (pos == text.getEndIndex()) {
|
||||
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
|
||||
// https://bugs.openjdk.java.net/browse/JDK-8015110
|
||||
text.setIndex(text.getEndIndex());
|
||||
current = text.getIndex();
|
||||
return DONE;
|
||||
} else {
|
||||
text.setIndex(pos);
|
||||
current = text.getIndex();
|
||||
return advanceForward();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int previous() {
|
||||
if (text.getIndex() == text.getBeginIndex()) {
|
||||
return DONE;
|
||||
} else {
|
||||
return advanceBackward();
|
||||
}
|
||||
}
|
||||
|
||||
private int advanceBackward() {
|
||||
char c;
|
||||
while ((c = text.previous()) != CharacterIterator.DONE) {
|
||||
if (c == separator) {
|
||||
return current = text.getIndex() + 1;
|
||||
}
|
||||
}
|
||||
assert text.getIndex() == text.getBeginIndex();
|
||||
return current = text.getIndex();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int preceding(int pos) {
|
||||
if (pos < text.getBeginIndex() || pos > text.getEndIndex()) {
|
||||
throw new IllegalArgumentException("offset out of bounds");
|
||||
} else if (pos == text.getBeginIndex()) {
|
||||
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
|
||||
// https://bugs.openjdk.java.net/browse/JDK-8015110
|
||||
text.setIndex(text.getBeginIndex());
|
||||
current = text.getIndex();
|
||||
return DONE;
|
||||
} else {
|
||||
text.setIndex(pos);
|
||||
current = text.getIndex();
|
||||
return advanceBackward();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next(int n) {
|
||||
if (n < 0) {
|
||||
for (int i = 0; i < -n; i++) {
|
||||
previous();
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < n; i++) {
|
||||
next();
|
||||
}
|
||||
}
|
||||
return current();
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharacterIterator getText() {
|
||||
return text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setText(CharacterIterator newText) {
|
||||
text = newText;
|
||||
current = text.getBeginIndex();
|
||||
}
|
||||
}
|
|
@ -1,116 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search.postingshighlight;
|
||||
|
||||
import java.text.BreakIterator;
|
||||
import java.text.CharacterIterator;
|
||||
|
||||
/** Just produces one single fragment for the entire text */
|
||||
public final class WholeBreakIterator extends BreakIterator {
|
||||
private CharacterIterator text;
|
||||
private int start;
|
||||
private int end;
|
||||
private int current;
|
||||
|
||||
@Override
|
||||
public int current() {
|
||||
return current;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int first() {
|
||||
return (current = start);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int following(int pos) {
|
||||
if (pos < start || pos > end) {
|
||||
throw new IllegalArgumentException("offset out of bounds");
|
||||
} else if (pos == end) {
|
||||
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
|
||||
// https://bugs.openjdk.java.net/browse/JDK-8015110
|
||||
current = end;
|
||||
return DONE;
|
||||
} else {
|
||||
return last();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharacterIterator getText() {
|
||||
return text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int last() {
|
||||
return (current = end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next() {
|
||||
if (current == end) {
|
||||
return DONE;
|
||||
} else {
|
||||
return last();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next(int n) {
|
||||
if (n < 0) {
|
||||
for (int i = 0; i < -n; i++) {
|
||||
previous();
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < n; i++) {
|
||||
next();
|
||||
}
|
||||
}
|
||||
return current();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int preceding(int pos) {
|
||||
if (pos < start || pos > end) {
|
||||
throw new IllegalArgumentException("offset out of bounds");
|
||||
} else if (pos == start) {
|
||||
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
|
||||
// https://bugs.openjdk.java.net/browse/JDK-8015110
|
||||
current = start;
|
||||
return DONE;
|
||||
} else {
|
||||
return first();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int previous() {
|
||||
if (current == start) {
|
||||
return DONE;
|
||||
} else {
|
||||
return first();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setText(CharacterIterator newText) {
|
||||
start = newText.getBeginIndex();
|
||||
end = newText.getEndIndex();
|
||||
text = newText;
|
||||
current = start;
|
||||
}
|
||||
}
|
|
@ -1,114 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search.postingshighlight;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.text.BreakIterator;
|
||||
import java.util.Locale;
|
||||
|
||||
import static org.apache.lucene.search.postingshighlight.TestWholeBreakIterator.assertSameBreaks;
|
||||
import static org.hamcrest.CoreMatchers.equalTo;
|
||||
|
||||
public class TestCustomSeparatorBreakIterator extends LuceneTestCase {
|
||||
|
||||
private static final Character[] SEPARATORS = new Character[]{' ', '\u0000', 8233};
|
||||
|
||||
public void testBreakOnCustomSeparator() throws Exception {
|
||||
Character separator = randomSeparator();
|
||||
BreakIterator bi = new CustomSeparatorBreakIterator(separator);
|
||||
String source = "this" + separator + "is" + separator + "the" + separator + "first" + separator + "sentence";
|
||||
bi.setText(source);
|
||||
assertThat(bi.current(), equalTo(0));
|
||||
assertThat(bi.first(), equalTo(0));
|
||||
assertThat(source.substring(bi.current(), bi.next()), equalTo("this" + separator));
|
||||
assertThat(source.substring(bi.current(), bi.next()), equalTo("is" + separator));
|
||||
assertThat(source.substring(bi.current(), bi.next()), equalTo("the" + separator));
|
||||
assertThat(source.substring(bi.current(), bi.next()), equalTo("first" + separator));
|
||||
assertThat(source.substring(bi.current(), bi.next()), equalTo("sentence"));
|
||||
assertThat(bi.next(), equalTo(BreakIterator.DONE));
|
||||
|
||||
assertThat(bi.last(), equalTo(source.length()));
|
||||
int current = bi.current();
|
||||
assertThat(source.substring(bi.previous(), current), equalTo("sentence"));
|
||||
current = bi.current();
|
||||
assertThat(source.substring(bi.previous(), current), equalTo("first" + separator));
|
||||
current = bi.current();
|
||||
assertThat(source.substring(bi.previous(), current), equalTo("the" + separator));
|
||||
current = bi.current();
|
||||
assertThat(source.substring(bi.previous(), current), equalTo("is" + separator));
|
||||
current = bi.current();
|
||||
assertThat(source.substring(bi.previous(), current), equalTo("this" + separator));
|
||||
assertThat(bi.previous(), equalTo(BreakIterator.DONE));
|
||||
assertThat(bi.current(), equalTo(0));
|
||||
|
||||
assertThat(source.substring(0, bi.following(9)), equalTo("this" + separator + "is" + separator + "the" + separator));
|
||||
|
||||
assertThat(source.substring(0, bi.preceding(9)), equalTo("this" + separator + "is" + separator));
|
||||
|
||||
assertThat(bi.first(), equalTo(0));
|
||||
assertThat(source.substring(0, bi.next(3)), equalTo("this" + separator + "is" + separator + "the" + separator));
|
||||
}
|
||||
|
||||
public void testSingleSentences() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
|
||||
assertSameBreaks("a", expected, actual);
|
||||
assertSameBreaks("ab", expected, actual);
|
||||
assertSameBreaks("abc", expected, actual);
|
||||
assertSameBreaks("", expected, actual);
|
||||
}
|
||||
|
||||
public void testSliceEnd() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
|
||||
assertSameBreaks("a000", 0, 1, expected, actual);
|
||||
assertSameBreaks("ab000", 0, 1, expected, actual);
|
||||
assertSameBreaks("abc000", 0, 1, expected, actual);
|
||||
assertSameBreaks("000", 0, 0, expected, actual);
|
||||
}
|
||||
|
||||
public void testSliceStart() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
|
||||
assertSameBreaks("000a", 3, 1, expected, actual);
|
||||
assertSameBreaks("000ab", 3, 2, expected, actual);
|
||||
assertSameBreaks("000abc", 3, 3, expected, actual);
|
||||
assertSameBreaks("000", 3, 0, expected, actual);
|
||||
}
|
||||
|
||||
public void testSliceMiddle() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
|
||||
assertSameBreaks("000a000", 3, 1, expected, actual);
|
||||
assertSameBreaks("000ab000", 3, 2, expected, actual);
|
||||
assertSameBreaks("000abc000", 3, 3, expected, actual);
|
||||
assertSameBreaks("000000", 3, 0, expected, actual);
|
||||
}
|
||||
|
||||
/** the current position must be ignored, initial position is always first() */
|
||||
public void testFirstPosition() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
|
||||
assertSameBreaks("000ab000", 3, 2, 4, expected, actual);
|
||||
}
|
||||
|
||||
private static char randomSeparator() {
|
||||
return RandomPicks.randomFrom(random(), SEPARATORS);
|
||||
}
|
||||
}
|
|
@ -1,134 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search.postingshighlight;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.text.BreakIterator;
|
||||
import java.text.CharacterIterator;
|
||||
import java.text.StringCharacterIterator;
|
||||
import java.util.Locale;
|
||||
|
||||
public class TestWholeBreakIterator extends LuceneTestCase {
|
||||
|
||||
/** For single sentences, we know WholeBreakIterator should break the same as a sentence iterator */
|
||||
public void testSingleSentences() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new WholeBreakIterator();
|
||||
assertSameBreaks("a", expected, actual);
|
||||
assertSameBreaks("ab", expected, actual);
|
||||
assertSameBreaks("abc", expected, actual);
|
||||
assertSameBreaks("", expected, actual);
|
||||
}
|
||||
|
||||
public void testSliceEnd() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new WholeBreakIterator();
|
||||
assertSameBreaks("a000", 0, 1, expected, actual);
|
||||
assertSameBreaks("ab000", 0, 1, expected, actual);
|
||||
assertSameBreaks("abc000", 0, 1, expected, actual);
|
||||
assertSameBreaks("000", 0, 0, expected, actual);
|
||||
}
|
||||
|
||||
public void testSliceStart() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new WholeBreakIterator();
|
||||
assertSameBreaks("000a", 3, 1, expected, actual);
|
||||
assertSameBreaks("000ab", 3, 2, expected, actual);
|
||||
assertSameBreaks("000abc", 3, 3, expected, actual);
|
||||
assertSameBreaks("000", 3, 0, expected, actual);
|
||||
}
|
||||
|
||||
public void testSliceMiddle() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new WholeBreakIterator();
|
||||
assertSameBreaks("000a000", 3, 1, expected, actual);
|
||||
assertSameBreaks("000ab000", 3, 2, expected, actual);
|
||||
assertSameBreaks("000abc000", 3, 3, expected, actual);
|
||||
assertSameBreaks("000000", 3, 0, expected, actual);
|
||||
}
|
||||
|
||||
/** the current position must be ignored, initial position is always first() */
|
||||
public void testFirstPosition() throws Exception {
|
||||
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
BreakIterator actual = new WholeBreakIterator();
|
||||
assertSameBreaks("000ab000", 3, 2, 4, expected, actual);
|
||||
}
|
||||
|
||||
public static void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) {
|
||||
assertSameBreaks(new StringCharacterIterator(text),
|
||||
new StringCharacterIterator(text),
|
||||
expected,
|
||||
actual);
|
||||
}
|
||||
|
||||
public static void assertSameBreaks(String text, int offset, int length, BreakIterator expected, BreakIterator actual) {
|
||||
assertSameBreaks(text, offset, length, offset, expected, actual);
|
||||
}
|
||||
|
||||
public static void assertSameBreaks(String text, int offset, int length, int current, BreakIterator expected, BreakIterator actual) {
|
||||
assertSameBreaks(new StringCharacterIterator(text, offset, offset+length, current),
|
||||
new StringCharacterIterator(text, offset, offset+length, current),
|
||||
expected,
|
||||
actual);
|
||||
}
|
||||
|
||||
/** Asserts that two breakiterators break the text the same way */
|
||||
public static void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual) {
|
||||
expected.setText(one);
|
||||
actual.setText(two);
|
||||
|
||||
assertEquals(expected.current(), actual.current());
|
||||
|
||||
// next()
|
||||
int v = expected.current();
|
||||
while (v != BreakIterator.DONE) {
|
||||
assertEquals(v = expected.next(), actual.next());
|
||||
assertEquals(expected.current(), actual.current());
|
||||
}
|
||||
|
||||
// first()
|
||||
assertEquals(expected.first(), actual.first());
|
||||
assertEquals(expected.current(), actual.current());
|
||||
// last()
|
||||
assertEquals(expected.last(), actual.last());
|
||||
assertEquals(expected.current(), actual.current());
|
||||
|
||||
// previous()
|
||||
v = expected.current();
|
||||
while (v != BreakIterator.DONE) {
|
||||
assertEquals(v = expected.previous(), actual.previous());
|
||||
assertEquals(expected.current(), actual.current());
|
||||
}
|
||||
|
||||
// following()
|
||||
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
|
||||
expected.first();
|
||||
actual.first();
|
||||
assertEquals(expected.following(i), actual.following(i));
|
||||
assertEquals(expected.current(), actual.current());
|
||||
}
|
||||
|
||||
// preceding()
|
||||
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
|
||||
expected.last();
|
||||
actual.last();
|
||||
assertEquals(expected.preceding(i), actual.preceding(i));
|
||||
assertEquals(expected.current(), actual.current());
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue