LUCENE-4863: handle case where no overrides are built (fst is null)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1460859 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-03-25 19:30:20 +00:00
parent b4e22c267d
commit 83027eca9e
2 changed files with 18 additions and 3 deletions

View File

@ -45,7 +45,6 @@ public final class StemmerOverrideFilter extends TokenFilter {
private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class); private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
private final BytesReader fstReader; private final BytesReader fstReader;
private final Arc<BytesRef> scratchArc = new FST.Arc<BytesRef>(); private final Arc<BytesRef> scratchArc = new FST.Arc<BytesRef>();
;
private final CharsRef spare = new CharsRef(); private final CharsRef spare = new CharsRef();
/** /**
@ -65,6 +64,10 @@ public final class StemmerOverrideFilter extends TokenFilter {
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
if (input.incrementToken()) { if (input.incrementToken()) {
if (fstReader == null) {
// No overrides
return true;
}
if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms
final BytesRef stem = stemmerOverrideMap.get(termAtt.buffer(), termAtt.length(), scratchArc, fstReader); final BytesRef stem = stemmerOverrideMap.get(termAtt.buffer(), termAtt.length(), scratchArc, fstReader);
if (stem != null) { if (stem != null) {
@ -106,13 +109,17 @@ public final class StemmerOverrideFilter extends TokenFilter {
* Returns a {@link BytesReader} to pass to the {@link #get(char[], int, Arc, BytesReader)} method. * Returns a {@link BytesReader} to pass to the {@link #get(char[], int, Arc, BytesReader)} method.
*/ */
BytesReader getBytesReader() { BytesReader getBytesReader() {
if (fst == null) {
return null;
} else {
return fst.getBytesReader(); return fst.getBytesReader();
} }
}
/** /**
* Returns the value mapped to the given key or <code>null</code> if the key is not in the FST dictionary. * Returns the value mapped to the given key or <code>null</code> if the key is not in the FST dictionary.
*/ */
final BytesRef get(char[] buffer, int bufferLen, Arc<BytesRef> scratchArc, BytesReader fstReader) throws IOException { BytesRef get(char[] buffer, int bufferLen, Arc<BytesRef> scratchArc, BytesReader fstReader) throws IOException {
BytesRef pendingOutput = fst.outputs.getNoOutput(); BytesRef pendingOutput = fst.outputs.getNoOutput();
BytesRef matchOutput = null; BytesRef matchOutput = null;
int bufUpto = 0; int bufUpto = 0;

View File

@ -61,6 +61,14 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
assertTokenStreamContents(stream, new String[] {"books"}); assertTokenStreamContents(stream, new String[] {"books"});
} }
public void testNoOverrides() throws IOException {
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("book"));
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
tokenizer, builder.build()));
assertTokenStreamContents(stream, new String[] {"book"});
}
public void testRandomRealisticWhiteSpace() throws IOException { public void testRandomRealisticWhiteSpace() throws IOException {
Map<String,String> map = new HashMap<String,String>(); Map<String,String> map = new HashMap<String,String>();
int numTerms = atLeast(50); int numTerms = atLeast(50);