Prevent DirectCandidateGenerator to reuse an unclosed analyzer

When postFilter generates a token that is identical to the input term
DirectCandidateGenerator should not preFilter this token. If postFilter
and preFilter are the same analyzer instance it would fail with :
"TokenStream contract violation: close() call missing"

This is a forward port of @nomoa's #12670
This commit is contained in:
Nik Everett 2015-08-05 15:30:13 -04:00
parent 0b9729af5b
commit 69be7f77fc
2 changed files with 8 additions and 2 deletions

View File

@ -151,7 +151,9 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
if (posIncAttr.getPositionIncrement() > 0 && result.get().bytesEquals(candidate.term)) {
BytesRef term = result.toBytesRef();
long freq = frequency(term);
// We should not use frequency(term) here because it will analyze the term again
// If preFilter and postFilter are the same analyzer it would fail.
long freq = internalFrequency(term);
candidates.add(new Candidate(result.toBytesRef(), freq, candidate.stringDistance, score(candidate.frequency, candidate.stringDistance, dictSize), false));
} else {
candidates.add(new Candidate(result.toBytesRef(), candidate.frequency, nonErrorLikelihood, score(candidate.frequency, candidate.stringDistance, dictSize), false));

View File

@ -278,8 +278,12 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
// Test a special case where one of the suggest term is unchanged by the postFilter, 'II' here is unchanged by the reverse analyzer.
corrections = suggester.getCorrections(wrapper, new BytesRef("Quazar II"), generator, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("quasar ii"));
}
@Test
public void testMarvelHerosTrigram() throws IOException {