Determinize automata used by IntervalsSource.regex (#13718)

This commit determinizes internal automata used in the construction of the IntervalsSource created by the regexp factory.
This commit is contained in:
Chris Hegarty 2024-09-05 14:15:24 +01:00 committed by GitHub
parent 11d7566229
commit 67c0f8e847
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 44 additions and 0 deletions

View File

@ -238,6 +238,7 @@ public final class Intervals {
*/
public static IntervalsSource regexp(BytesRef regexp, int maxExpansions) {
Automaton automaton = new RegExp(new Term("", regexp).text()).toAutomaton();
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, true, false);
return new MultiTermIntervalsSource(ca, maxExpansions, regexp.utf8ToString());
}

View File

@ -447,4 +447,24 @@ public class TestIntervalQuery extends LuceneTestCase {
field, or(term("XXX"), containing(extend(term("message"), 0, 10), term("intend"))));
checkHits(q, new int[] {});
}
public void testEquality() {
assertEquals(
new IntervalQuery("f", Intervals.regexp(new BytesRef(".*foo"))),
new IntervalQuery("f", Intervals.regexp(new BytesRef(".*foo"))));
assertEquals(
new IntervalQuery("f", Intervals.prefix(new BytesRef("p"), 1)),
new IntervalQuery("f", Intervals.prefix(new BytesRef("p"), 1)));
assertEquals(
new IntervalQuery("f", Intervals.fuzzyTerm("kot", 1)),
new IntervalQuery("f", Intervals.fuzzyTerm("kot", 1)));
assertEquals(
new IntervalQuery("f", Intervals.wildcard(new BytesRef("*.txt"))),
new IntervalQuery("f", Intervals.wildcard(new BytesRef("*.txt"))));
assertEquals(
new IntervalQuery(
"f", Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true)),
new IntervalQuery(
"f", Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true)));
}
}

View File

@ -1187,4 +1187,27 @@ public class TestIntervals extends LuceneTestCase {
checkVisits(source, 1);
}
// basic test for equality and inequality of instances created by the factories
public void testEquality() {
assertEquals(Intervals.term("wibble"), Intervals.term("wibble"));
assertEquals(Intervals.prefix(new BytesRef("p"), 1), Intervals.prefix(new BytesRef("p"), 1));
assertEquals(Intervals.fuzzyTerm("kot", 1), Intervals.fuzzyTerm("kot", 1));
assertEquals(Intervals.regexp(new BytesRef(".*ot")), Intervals.regexp(new BytesRef(".*ot")));
assertEquals(
Intervals.wildcard(new BytesRef("*.txt")), Intervals.wildcard(new BytesRef("*.txt")));
assertEquals(
Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true),
Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true));
assertNotEquals(Intervals.term("wibble"), Intervals.term("wobble"));
assertNotEquals(Intervals.prefix(new BytesRef("p"), 1), Intervals.prefix(new BytesRef("b"), 1));
assertNotEquals(Intervals.fuzzyTerm("kot", 1), Intervals.fuzzyTerm("kof", 1));
assertNotEquals(Intervals.regexp(new BytesRef(".*ot")), Intervals.regexp(new BytesRef(".*at")));
assertNotEquals(
Intervals.wildcard(new BytesRef("*.txt")), Intervals.wildcard(new BytesRef("*.tat")));
assertNotEquals(
Intervals.range(new BytesRef("warm"), new BytesRef("hot"), true, true),
Intervals.range(new BytesRef("cold"), new BytesRef("hot"), true, true));
}
}