mirror of https://github.com/apache/lucene.git
LUCENE-4863: handle case where no overrides are built (fst is null)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1460859 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b4e22c267d
commit
83027eca9e
|
@ -45,7 +45,6 @@ public final class StemmerOverrideFilter extends TokenFilter {
|
||||||
private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
|
private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
|
||||||
private final BytesReader fstReader;
|
private final BytesReader fstReader;
|
||||||
private final Arc<BytesRef> scratchArc = new FST.Arc<BytesRef>();
|
private final Arc<BytesRef> scratchArc = new FST.Arc<BytesRef>();
|
||||||
;
|
|
||||||
private final CharsRef spare = new CharsRef();
|
private final CharsRef spare = new CharsRef();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -65,6 +64,10 @@ public final class StemmerOverrideFilter extends TokenFilter {
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
if (input.incrementToken()) {
|
if (input.incrementToken()) {
|
||||||
|
if (fstReader == null) {
|
||||||
|
// No overrides
|
||||||
|
return true;
|
||||||
|
}
|
||||||
if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms
|
if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms
|
||||||
final BytesRef stem = stemmerOverrideMap.get(termAtt.buffer(), termAtt.length(), scratchArc, fstReader);
|
final BytesRef stem = stemmerOverrideMap.get(termAtt.buffer(), termAtt.length(), scratchArc, fstReader);
|
||||||
if (stem != null) {
|
if (stem != null) {
|
||||||
|
@ -106,13 +109,17 @@ public final class StemmerOverrideFilter extends TokenFilter {
|
||||||
* Returns a {@link BytesReader} to pass to the {@link #get(char[], int, Arc, BytesReader)} method.
|
* Returns a {@link BytesReader} to pass to the {@link #get(char[], int, Arc, BytesReader)} method.
|
||||||
*/
|
*/
|
||||||
BytesReader getBytesReader() {
|
BytesReader getBytesReader() {
|
||||||
|
if (fst == null) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
return fst.getBytesReader();
|
return fst.getBytesReader();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the value mapped to the given key or <code>null</code> if the key is not in the FST dictionary.
|
* Returns the value mapped to the given key or <code>null</code> if the key is not in the FST dictionary.
|
||||||
*/
|
*/
|
||||||
final BytesRef get(char[] buffer, int bufferLen, Arc<BytesRef> scratchArc, BytesReader fstReader) throws IOException {
|
BytesRef get(char[] buffer, int bufferLen, Arc<BytesRef> scratchArc, BytesReader fstReader) throws IOException {
|
||||||
BytesRef pendingOutput = fst.outputs.getNoOutput();
|
BytesRef pendingOutput = fst.outputs.getNoOutput();
|
||||||
BytesRef matchOutput = null;
|
BytesRef matchOutput = null;
|
||||||
int bufUpto = 0;
|
int bufUpto = 0;
|
||||||
|
|
|
@ -61,6 +61,14 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
|
||||||
assertTokenStreamContents(stream, new String[] {"books"});
|
assertTokenStreamContents(stream, new String[] {"books"});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testNoOverrides() throws IOException {
|
||||||
|
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
|
||||||
|
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("book"));
|
||||||
|
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
|
||||||
|
tokenizer, builder.build()));
|
||||||
|
assertTokenStreamContents(stream, new String[] {"book"});
|
||||||
|
}
|
||||||
|
|
||||||
public void testRandomRealisticWhiteSpace() throws IOException {
|
public void testRandomRealisticWhiteSpace() throws IOException {
|
||||||
Map<String,String> map = new HashMap<String,String>();
|
Map<String,String> map = new HashMap<String,String>();
|
||||||
int numTerms = atLeast(50);
|
int numTerms = atLeast(50);
|
||||||
|
|
Loading…
Reference in New Issue