LUCENE-7576: detect when special case automaton is passed to Terms.intersect

This commit is contained in:
Mike McCandless 2016-12-02 15:26:04 -05:00
parent c61268f7cd
commit fcccd317dd
4 changed files with 31 additions and 2 deletions

View File

@ -81,6 +81,10 @@ Bug Fixes
* LUCENE-7536: ASCIIFoldingFilterFactory used to return an illegal multi-term
component when preserveOriginal was set to true. (Adrien Grand)
* LUCENE-7576: Fix Terms.intersect in the default codec to detect when
the incoming automaton is a special case and throw a clearer
exception than NullPointerException (Tom Mortimer via Mike McCandless)
Improvements
* LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,

View File

@ -182,6 +182,9 @@ public final class FieldReader extends Terms implements Accountable {
//System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
// can we optimize knowing that...?
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm, compiled.sinkState);
}

View File

@ -49,8 +49,12 @@ public abstract class Terms {
* provided <code>startTerm</code> must be accepted by
* the automaton.
*
* <p><b>NOTE</b>: the returned TermsEnum cannot
* seek</p>.
* <p>This is an expert low-level API and will only work
* for {@code NORMAL} compiled automata. To handle any
* compiled automata you should instead use
* {@link CompiledAutomaton#getTermsEnum} instead.
*
* <p><b>NOTE</b>: the returned TermsEnum cannot seek</p>.
*
* <p><b>NOTE</b>: the terms dictionary is free to
* return arbitrary terms as long as the resulted visited

View File

@ -998,4 +998,22 @@ public class TestTermsEnum extends LuceneTestCase {
}
dir.close();
}
// LUCENE-7576
public void testIntersectRegexp() throws Exception {
Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d);
Document doc = new Document();
doc.add(newStringField("field", "foobar", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Fields fields = MultiFields.getFields(r);
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
Terms terms = fields.terms("field");
String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
r.close();
w.close();
d.close();
}
}