LUCENE-10414: Add fn:fuzzyTerm interval function to flexible query parser (#668)

This commit is contained in:
Dawid Weiss 2022-02-10 12:18:13 +01:00 committed by GitHub
parent 1f1da12c89
commit f6cebac333
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 1109 additions and 902 deletions

View File

@ -144,6 +144,9 @@ New Features
* LUCENE-10403: Add ArrayUtil#grow(T[]). (Greg Miller)
* LUCENE-10414: Add fn:fuzzyTerm interval function to flexible query parser (Dawid Weiss,
Alan Woodward)
Improvements
---------------------

View File

@ -141,8 +141,27 @@ public class FuzzyQuery extends MultiTermQuery {
/** Returns the compiled automata used to match terms */
public CompiledAutomaton getAutomata() {
return getFuzzyAutomaton(term.text(), maxEdits, prefixLength, transpositions);
}
/**
* Returns the {@link CompiledAutomaton} internally used by {@link FuzzyQuery} to match terms.
* This is a very low-level method and may no longer exist in case the implementation of
* fuzzy-matching changes in the future.
*
* @lucene.internal
* @param term the term to search for
* @param maxEdits must be {@code >= 0} and {@code <=} {@link
* LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
* @param prefixLength length of common (non-fuzzy) prefix
* @param transpositions true if transpositions should be treated as a primitive edit operation.
* If this is false, comparisons will implement the classic Levenshtein algorithm.
* @return A {@link CompiledAutomaton} that matches terms that satisfy input parameters.
*/
public static CompiledAutomaton getFuzzyAutomaton(
String term, int maxEdits, int prefixLength, boolean transpositions) {
FuzzyAutomatonBuilder builder =
new FuzzyAutomatonBuilder(term.text(), maxEdits, prefixLength, transpositions);
new FuzzyAutomatonBuilder(term, maxEdits, prefixLength, transpositions);
return builder.buildMaxEditAutomaton();
}

View File

@ -332,252 +332,13 @@ public class TestMatchHighlighter extends LuceneTestCase {
}
};
String field = FLD_TEXT2;
new IndexBuilder(this::toField)
// Just one document and multiple interval queries.
.doc(field, "The quick brown fox jumps over the lazy dog")
.build(
analyzer,
reader -> {
IndexSearcher searcher = new IndexSearcher(reader);
Sort sortOrder = Sort.INDEXORDER; // So that results are consistently ordered.
// Rerun the same test on fields with offsets and without offsets.
for (String field : List.of(FLD_TEXT1, FLD_TEXT2)) {
String inputDocument = "The quick brown fox jumps over the lazy dog";
MatchHighlighter highlighter =
new MatchHighlighter(searcher, analyzer)
.appendFieldHighlighter(
FieldValueHighlighters.highlighted(
80 * 3, 1, new PassageFormatter("...", ">", "<"), fld -> true))
.appendFieldHighlighter(FieldValueHighlighters.skipRemaining());
StandardQueryParser qp = new StandardQueryParser(analyzer);
// Run all pairs of query-expected highlight.
List<String> errors = new ArrayList<>();
for (var queryHighlightPair :
new String[][] {
{
"fn:ordered(brown dog)",
"0. %s: The quick >brown fox jumps over the lazy dog<"
},
{
"fn:within(fn:or(lazy quick) 1 fn:or(dog fox))",
"0. %s: The quick brown fox jumps over the >lazy< dog"
},
{
"fn:containedBy(fox fn:ordered(brown fox dog))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:atLeast(2 fn:unordered(furry dog) fn:unordered(brown dog) lazy quick)",
"0. %s: The >quick >brown fox jumps over the lazy<<> dog<"
},
{
"fn:atLeast(2 quick fox \"furry dog\")",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{
"fn:maxgaps(0 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
},
{
"fn:maxgaps(1 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
},
{
"fn:maxwidth(2 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
},
{
"fn:maxwidth(3 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
},
{
"fn:or(quick \"fox\")",
"0. %s: The >quick< brown >fox< jumps over the lazy dog"
},
{"fn:or(\"quick fox\")"},
{
"fn:phrase(quick brown fox)",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{"fn:wildcard(jump*)", "0. %s: The quick brown fox >jumps< over the lazy dog"},
{"fn:wildcard(br*n)", "0. %s: The quick >brown< fox jumps over the lazy dog"},
{"fn:or(dog fox)", "0. %s: The quick brown >fox< jumps over the lazy >dog<"},
{
"fn:phrase(fn:ordered(quick fox) jumps)",
"0. %s: The >quick brown fox jumps< over the lazy dog"
},
{
"fn:ordered(quick jumps dog)",
"0. %s: The >quick brown fox jumps over the lazy dog<"
},
{
"fn:ordered(quick fn:or(fox dog))",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{
"fn:ordered(quick jumps fn:or(fox dog))",
"0. %s: The >quick brown fox jumps over the lazy dog<"
},
{
"fn:unordered(dog jumps quick)",
"0. %s: The >quick brown fox jumps over the lazy dog<"
},
{
"fn:unordered(fn:or(fox dog) quick)",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{
"fn:unordered(fn:phrase(brown fox) fn:phrase(fox jumps))",
"0. %s: The quick >brown fox jumps< over the lazy dog"
},
{"fn:ordered(fn:phrase(brown fox) fn:phrase(fox jumps))"},
{"fn:unorderedNoOverlaps(fn:phrase(brown fox) fn:phrase(fox jumps))"},
{
"fn:before(fn:or(brown lazy) fox)",
"0. %s: The quick >brown< fox jumps over the lazy dog"
},
{
"fn:before(fn:or(brown lazy) fn:or(dog fox))",
"0. %s: The quick >brown< fox jumps over the >lazy< dog"
},
{
"fn:after(fn:or(brown lazy) fox)",
"0. %s: The quick brown fox jumps over the >lazy< dog"
},
{
"fn:after(fn:or(brown lazy) fn:or(dog fox))",
"0. %s: The quick brown fox jumps over the >lazy< dog"
},
{"fn:extend(fox 1 2)", "0. %s: The quick >brown fox jumps over< the lazy dog"},
{
"fn:extend(fn:or(dog fox) 2 0)",
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
},
{
"fn:within(fn:or(fox dog) 1 fn:or(quick lazy))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:within(fn:or(fox dog) 2 fn:or(quick lazy))",
"0. %s: The quick brown >fox< jumps over the lazy >dog<"
},
{
"fn:notWithin(fn:or(fox dog) 1 fn:or(quick lazy))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:containedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:containedBy(fn:or(fox dog) fn:ordered(quick lazy))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:notContainedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:notContainedBy(fn:or(fox dog) fn:ordered(quick lazy))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:containing(fn:extend(fn:or(lazy brown) 1 1) fn:or(fox dog))",
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
},
{
"fn:containing(fn:atLeast(2 quick fox dog) jumps)",
"0. %s: The quick brown >fox jumps over the lazy dog<"
},
{
"fn:notContaining(fn:ordered(fn:or(the The) fn:or(fox dog)) brown)",
"0. %s: The quick brown fox jumps over >the lazy dog<"
},
{
"fn:notContaining(fn:extend(fn:or(fox dog) 1 0) fn:or(brown yellow))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
},
{
"fn:overlapping(fn:phrase(brown fox) fn:phrase(fox jumps))",
"0. %s: The quick >brown fox< jumps over the lazy dog"
},
{
"fn:overlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:nonOverlapping(fn:phrase(brown fox) fn:phrase(lazy dog))",
"0. %s: The quick >brown fox< jumps over the lazy dog"
},
{
"fn:nonOverlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
}) {
assert queryHighlightPair.length >= 1;
String queryString = queryHighlightPair[0];
var query = qp.parse(queryString, field);
var expected =
Arrays.stream(queryHighlightPair)
.skip(1)
.map(v -> String.format(Locale.ROOT, v, field))
.toArray(String[]::new);
try {
assertHighlights(
toDocList(
highlighter.highlight(searcher.search(query, 10, sortOrder), query)),
expected);
} catch (AssertionError e) {
errors.add("MISMATCH: query: " + queryString + "\n" + e.getMessage());
}
}
if (errors.size() > 0) {
throw new AssertionError(String.join("\n\n", errors));
}
});
}
/**
* Almost the same as the one above, make sure the fields indexed with offsets are also
* highlighted correctly
*/
@Test
public void testIntervalFunctionsWithOffsetField() throws Exception {
Analyzer analyzer =
new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new StandardTokenizer();
TokenStream ts = tokenizer;
ts = new LowerCaseFilter(ts);
return new TokenStreamComponents(tokenizer, ts);
}
};
String field = FLD_TEXT1;
new IndexBuilder(this::toField)
// Just one document and multiple interval queries.
.doc(field, "The quick brown fox jumps over the lazy dog")
.build(
analyzer,
reader -> {
IndexSearcher searcher = new IndexSearcher(reader);
Sort sortOrder = Sort.INDEXORDER; // So that results are consistently ordered.
MatchHighlighter highlighter =
new MatchHighlighter(searcher, analyzer)
.appendFieldHighlighter(
FieldValueHighlighters.highlighted(
80 * 3, 1, new PassageFormatter("...", ">", "<"), fld -> true))
.appendFieldHighlighter(FieldValueHighlighters.skipRemaining());
StandardQueryParser qp = new StandardQueryParser(analyzer);
// Run all pairs of query-expected highlight.
List<String> errors = new ArrayList<>();
for (var queryHighlightPair :
List<String[]> queryResultPairs =
new ArrayList<>(
Arrays.asList(
new String[][] {
{
"fn:ordered(brown dog)",
@ -622,6 +383,7 @@ public class TestMatchHighlighter extends LuceneTestCase {
},
{"fn:wildcard(jump*)", "0. %s: The quick brown fox >jumps< over the lazy dog"},
{"fn:wildcard(br*n)", "0. %s: The quick >brown< fox jumps over the lazy dog"},
{"fn:fuzzyTerm(fxo)", "0. %s: The quick brown >fox< jumps over the lazy dog"},
{"fn:or(dog fox)", "0. %s: The quick brown >fox< jumps over the lazy >dog<"},
{
"fn:phrase(fn:ordered(quick fox) jumps)",
@ -717,55 +479,83 @@ public class TestMatchHighlighter extends LuceneTestCase {
"fn:atLeast(2 fn:unordered(furry dog) fn:unordered(brown dog) lazy quick)",
"0. %s: The >quick >brown fox jumps over the lazy<<> dog<"
},
/*
The test cases below do not work for fields enabled with offset yet:
mainly "extend"
TODO: LUCENE-10229: fix these remaining cases.
}));
{"fn:extend(fox 1 2)", "0. %s: The quick >brown fox jumps over< the lazy dog"},
{
"fn:extend(fn:or(dog fox) 2 0)",
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
},
{
"fn:containedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:notContainedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:containing(fn:extend(fn:or(lazy brown) 1 1) fn:or(fox dog))",
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
},
{
"fn:notContaining(fn:extend(fn:or(fox dog) 1 0) fn:or(brown yellow))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
} */
}) {
assert queryHighlightPair.length >= 1;
String queryString = queryHighlightPair[0];
var query = qp.parse(queryString, field);
var expected =
Arrays.stream(queryHighlightPair)
.skip(1)
.map(v -> String.format(Locale.ROOT, v, field))
.toArray(String[]::new);
// TODO: LUCENE-10229: The test cases below do not work for fields enabled with offset yet:
// mainly "extend".
if (field.equals(FLD_TEXT2)) {
queryResultPairs.addAll(
Arrays.asList(
new String[][] {
{"fn:extend(fox 1 2)", "0. %s: The quick >brown fox jumps over< the lazy dog"},
{
"fn:extend(fn:or(dog fox) 2 0)",
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
},
{
"fn:containedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:notContainedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:containing(fn:extend(fn:or(lazy brown) 1 1) fn:or(fox dog))",
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
},
{
"fn:notContaining(fn:extend(fn:or(fox dog) 1 0) fn:or(brown yellow))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
}
}));
}
try {
assertHighlights(
toDocList(
highlighter.highlight(searcher.search(query, 10, sortOrder), query)),
expected);
} catch (AssertionError e) {
errors.add("MISMATCH: query: " + queryString + "\n" + e.getMessage());
// Verify assertions.
new IndexBuilder(this::toField)
// Just one document and multiple interval queries to check.
.doc(field, inputDocument)
.build(
analyzer,
reader -> {
IndexSearcher searcher = new IndexSearcher(reader);
Sort sortOrder = Sort.INDEXORDER; // So that results are consistently ordered.
MatchHighlighter highlighter =
new MatchHighlighter(searcher, analyzer)
.appendFieldHighlighter(
FieldValueHighlighters.highlighted(
80 * 3, 1, new PassageFormatter("...", ">", "<"), fld -> true))
.appendFieldHighlighter(FieldValueHighlighters.skipRemaining());
StandardQueryParser qp = new StandardQueryParser(analyzer);
// Run all pairs of query-expected highlight.
List<String> errors = new ArrayList<>();
for (var queryHighlightPair : queryResultPairs) {
assert queryHighlightPair.length >= 1;
String queryString = queryHighlightPair[0];
var query = qp.parse(queryString, field);
var expected =
Arrays.stream(queryHighlightPair)
.skip(1)
.map(v -> String.format(Locale.ROOT, v, field))
.toArray(String[]::new);
try {
assertHighlights(
toDocList(
highlighter.highlight(searcher.search(query, 10, sortOrder), query)),
expected);
} catch (AssertionError e) {
errors.add("MISMATCH: query: " + queryString + "\n" + e.getMessage());
}
}
}
if (errors.size() > 0) {
throw new AssertionError(String.join("\n\n", errors));
}
});
if (errors.size() > 0) {
throw new AssertionError(String.join("\n\n", errors));
}
});
}
}
@Test

View File

@ -25,10 +25,12 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
import org.apache.lucene.util.automaton.Operations;
/**
@ -48,6 +50,14 @@ import org.apache.lucene.util.automaton.Operations;
* {@link #or(boolean, IntervalsSource...)} factory method to prevent rewriting.
*/
public final class Intervals {
/**
* The default number of expansions in:
*
* <ul>
* <li>{@link #multiterm(CompiledAutomaton, String)}
* </ul>
*/
public static final int DEFAULT_MAX_EXPANSIONS = 128;
private Intervals() {}
@ -140,18 +150,19 @@ public final class Intervals {
/**
* Return an {@link IntervalsSource} over the disjunction of all terms that begin with a prefix
*
* @throws IllegalStateException if the prefix expands to more than 128 terms
* @throws IllegalStateException if the prefix expands to more than {@link
* #DEFAULT_MAX_EXPANSIONS} terms
*/
public static IntervalsSource prefix(BytesRef prefix) {
return prefix(prefix, 128);
return prefix(prefix, DEFAULT_MAX_EXPANSIONS);
}
/**
* Expert: Return an {@link IntervalsSource} over the disjunction of all terms that begin with a
* prefix
*
* <p>WARNING: Setting {@code maxExpansions} to higher than the default value of 128 can be both
* slow and memory-intensive
* <p>WARNING: Setting {@code maxExpansions} to higher than the default value of {@link
* #DEFAULT_MAX_EXPANSIONS} can be both slow and memory-intensive
*
* @param prefix the prefix to expand
* @param maxExpansions the maximum number of terms to expand to
@ -165,19 +176,20 @@ public final class Intervals {
/**
* Return an {@link IntervalsSource} over the disjunction of all terms that match a wildcard glob
*
* @throws IllegalStateException if the wildcard glob expands to more than 128 terms
* @throws IllegalStateException if the wildcard glob expands to more than {@link
* #DEFAULT_MAX_EXPANSIONS} terms
* @see WildcardQuery for glob format
*/
public static IntervalsSource wildcard(BytesRef wildcard) {
return wildcard(wildcard, 128);
return wildcard(wildcard, DEFAULT_MAX_EXPANSIONS);
}
/**
* Expert: Return an {@link IntervalsSource} over the disjunction of all terms that match a
* wildcard glob
*
* <p>WARNING: Setting {@code maxExpansions} to higher than the default value of 128 can be both
* slow and memory-intensive
* <p>WARNING: Setting {@code maxExpansions} to higher than the default value of {@link
* #DEFAULT_MAX_EXPANSIONS} can be both slow and memory-intensive
*
* @param wildcard the glob to expand
* @param maxExpansions the maximum number of terms to expand to
@ -194,23 +206,69 @@ public final class Intervals {
}
/**
* Expert: Return an {@link IntervalsSource} over the disjunction of all terms that are accepted
* by the given automaton
* A fuzzy term {@link IntervalsSource} matches the disjunction of intervals of terms that are
* within the specified {@code maxEdits} from the provided term.
*
* @param ca an automaton accepting matching terms
* @param pattern string representation of the given automaton, mostly used in exception messages
* @throws IllegalStateException if the automaton accepts more than 128 terms
* @see #fuzzyTerm(String, int, int, boolean, int)
* @param term the term to search for
* @param maxEdits must be {@code >= 0} and {@code <=} {@link
* LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}, use {@link FuzzyQuery#defaultMaxEdits} for
* the default, if needed.
*/
public static IntervalsSource multiterm(CompiledAutomaton ca, String pattern) {
return multiterm(ca, 128, pattern);
public static IntervalsSource fuzzyTerm(String term, int maxEdits) {
return fuzzyTerm(
term,
maxEdits,
FuzzyQuery.defaultPrefixLength,
FuzzyQuery.defaultTranspositions,
DEFAULT_MAX_EXPANSIONS);
}
/**
* A fuzzy term {@link IntervalsSource} matches the disjunction of intervals of terms that are
* within the specified {@code maxEdits} from the provided term.
*
* <p>The implementation is delegated to a {@link #multiterm(CompiledAutomaton, int, String)}
* interval source, with an automaton sourced from {@link org.apache.lucene.search.FuzzyQuery}.
*
* @param term the term to search for
* @param maxEdits must be {@code >= 0} and {@code <=} {@link
* LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}, use {@link FuzzyQuery#defaultMaxEdits} for
* the default, if needed.
* @param prefixLength length of common (non-fuzzy) prefix
* @param maxExpansions the maximum number of terms to match. Setting {@code maxExpansions} to
* higher than the default value of {@link #DEFAULT_MAX_EXPANSIONS} can be both slow and
* memory-intensive
* @param transpositions true if transpositions should be treated as a primitive edit operation.
* If this is false, comparisons will implement the classic Levenshtein algorithm.
*/
public static IntervalsSource fuzzyTerm(
String term, int maxEdits, int prefixLength, boolean transpositions, int maxExpansions) {
return Intervals.multiterm(
FuzzyQuery.getFuzzyAutomaton(term, maxEdits, prefixLength, transpositions),
maxExpansions,
term + "~" + maxEdits);
}
/**
* Expert: Return an {@link IntervalsSource} over the disjunction of all terms that are accepted
* by the given automaton
*
* <p>WARNING: Setting {@code maxExpansions} to higher than the default value of 128 can be both
* slow and memory-intensive
* @param ca an automaton accepting matching terms
* @param pattern string representation of the given automaton, mostly used in exception messages
* @throws IllegalStateException if the automaton accepts more than {@link
* #DEFAULT_MAX_EXPANSIONS} terms
*/
public static IntervalsSource multiterm(CompiledAutomaton ca, String pattern) {
return multiterm(ca, DEFAULT_MAX_EXPANSIONS, pattern);
}
/**
* Expert: Return an {@link IntervalsSource} over the disjunction of all terms that are accepted
* by the given automaton
*
* <p>WARNING: Setting {@code maxExpansions} to higher than the default value of {@link
* #DEFAULT_MAX_EXPANSIONS} can be both slow and memory-intensive
*
* @param ca an automaton accepting matching terms
* @param maxExpansions the maximum number of terms to expand to

View File

@ -40,6 +40,7 @@ import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchesIterator;
import org.apache.lucene.search.Query;
@ -1003,8 +1004,48 @@ public class TestIntervals extends LuceneTestCase {
checkVisits(Intervals.wildcard(new BytesRef("p??")), 1);
}
public void testWrappedFilters() throws IOException {
public void testFuzzyTerm() throws IOException {
IntervalsSource source = Intervals.fuzzyTerm("kot", 1); // matches 'pot'
checkIntervals(
source,
"field1",
4,
new int[][] {
{},
{2, 2, 10, 10, 17, 17, 27, 27},
{5, 5, 10, 10, 21, 21},
{3, 3},
{2, 2, 10, 10, 17, 17},
{}
});
MatchesIterator mi = getMatches(source, 4, "field1");
assertNotNull(mi);
assertMatch(mi, 2, 2, 15, 18);
assertMatch(mi, 10, 10, 63, 66);
assertMatch(mi, 17, 17, 97, 100);
// Check limits.
IllegalStateException e =
expectThrows(
IllegalStateException.class,
() -> {
IntervalsSource s =
Intervals.fuzzyTerm(
"kot",
1,
FuzzyQuery.defaultPrefixLength,
FuzzyQuery.defaultTranspositions,
1);
for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
s.intervals("field1", ctx);
}
});
assertEquals("Automaton [kot~1] expanded to too many terms (limit 1)", e.getMessage());
checkVisits(Intervals.fuzzyTerm("kot", FuzzyQuery.defaultMaxEdits), 1);
}
public void testWrappedFilters() throws IOException {
IntervalsSource source =
Intervals.or(
Intervals.term("nine"),

View File

@ -1,9 +1,9 @@
{
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java": "3d5f272a6d56b3f4962b252267ce2662e734414e",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java": "fd1fcc78bf1025fe6fe54ab6f9ae2f53cce33364",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj": "eb0d1c55d029982ab8ea433cf9ef1088ba6ea3de",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserConstants.java": "d3c5d87c46635dbb6dc03bbdc0fb662b47ec318d",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java": "d8e12b467779c1740ea2b672c10806ac25e0184e",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java": "6a9e8f350d6726d2f1b86f1cd6d5e747dda7ca6c",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj": "77ebf56c78a8614a82532ca6bdf69378afd44b8f",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserConstants.java": "4ad426d4f4b7577f4af4e9057de32c65c83b1812",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java": "88eae6e992c65b3050949054b0f636d5e2654cc8",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java": "f4cb9d01587279dba30e549ce4867e4381bbd9d7",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java": "cdfa99af5fcf6b1e50691a1c1370ba60bf0d2d2d"
}

View File

@ -50,7 +50,7 @@ public class AnalyzedText extends IntervalFunction {
}
}
private boolean requiresQuotes(String term) {
static boolean requiresQuotes(String term) {
return Pattern.compile("[\\s]").matcher(term).find();
}
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.search.FuzzyQuery;
/**
* An interval function equivalent to {@link FuzzyQuery}. A fuzzy term expands to a disjunction of
* intervals of terms that are within the specified {@code maxEdits} from the provided term. A limit
* of {@code maxExpansions} prevents the internal implementation from blowing up on too many
* potential candidate terms.
*/
public class FuzzyTerm extends IntervalFunction {
private final String term;
private final int maxEdits;
private final Integer maxExpansions;
public FuzzyTerm(String term, Integer maxEdits, Integer maxExpansions) {
this.term = term;
this.maxEdits = maxEdits == null ? FuzzyQuery.defaultMaxEdits : maxEdits;
this.maxExpansions = maxExpansions == null ? Intervals.DEFAULT_MAX_EXPANSIONS : maxExpansions;
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.fuzzyTerm(
term,
maxEdits,
FuzzyQuery.defaultPrefixLength,
FuzzyQuery.defaultTranspositions,
maxExpansions);
}
@Override
public String toString() {
return String.format(
Locale.ROOT,
"fn:fuzzyTerm(%s %d%s)",
AnalyzedText.requiresQuotes(term) ? '"' + term + '"' : term,
maxEdits,
maxExpansions);
}
}

View File

@ -25,18 +25,28 @@ import org.apache.lucene.util.BytesRef;
/** Node that represents {@link Intervals#wildcard(BytesRef)}. */
public class Wildcard extends IntervalFunction {
private final String wildcard;
private final int maxExpansions;
public Wildcard(String wildcard) {
public Wildcard(String wildcard, int maxExpansions) {
this.wildcard = wildcard;
this.maxExpansions = maxExpansions;
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.wildcard(new BytesRef(wildcard));
if (maxExpansions == 0) {
return Intervals.wildcard(new BytesRef(wildcard));
} else {
return Intervals.wildcard(new BytesRef(wildcard), maxExpansions);
}
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:wildcard(%s)", wildcard);
return String.format(
Locale.ROOT,
"fn:wildcard(%s%s)",
wildcard,
maxExpansions == 0 ? "" : " maxExpansions:" + maxExpansions);
}
}

View File

@ -47,6 +47,7 @@ import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Before;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.ContainedBy;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Containing;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Extend;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.FuzzyTerm;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.IntervalFunction;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.MaxGaps;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.MaxWidth;
@ -339,8 +340,8 @@ if (modifier != ModifierQueryNode.Modifier.MOD_NONE) {
;
}
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 55:{
jj_consume_token(55);
case 56:{
jj_consume_token(56);
minShouldMatch = jj_consume_token(NUMBER);
break;
}
@ -425,6 +426,9 @@ if (minShouldMatch != null) {
source = IntervalExtend();
{if ("" != null) return source;}
} else if (jj_2_24(2)) {
source = IntervalFuzzyTerm();
{if ("" != null) return source;}
} else if (jj_2_25(2)) {
source = IntervalText();
{if ("" != null) return source;}
} else {
@ -740,6 +744,7 @@ sources.add(source);
}
final private IntervalFunction IntervalWildcard() throws ParseException {String wildcard;
Token maxExpansions = null;
jj_consume_token(FN_PREFIX);
jj_consume_token(WILDCARD);
jj_consume_token(LPAREN);
@ -773,8 +778,70 @@ wildcard = token.image.substring(1, token.image.length() - 1);
jj_consume_token(-1);
throw new ParseException();
}
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case NUMBER:{
maxExpansions = jj_consume_token(NUMBER);
break;
}
default:
jj_la1[18] = jj_gen;
;
}
jj_consume_token(RPAREN);
{if ("" != null) return new Wildcard(wildcard);}
{if ("" != null) return new Wildcard(wildcard, maxExpansions == null ? 0 : parseInt(maxExpansions));}
throw new Error("Missing return statement in function");
}
final private IntervalFunction IntervalFuzzyTerm() throws ParseException {String term;
Token maxEdits = null;
Token maxExpansions = null;
jj_consume_token(FN_PREFIX);
jj_consume_token(FUZZYTERM);
jj_consume_token(LPAREN);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case NUMBER:
case TERM:{
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case TERM:{
jj_consume_token(TERM);
break;
}
case NUMBER:{
jj_consume_token(NUMBER);
break;
}
default:
jj_la1[19] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
term = token.image;
break;
}
case QUOTED:{
jj_consume_token(QUOTED);
term = token.image.substring(1, token.image.length() - 1);
break;
}
default:
jj_la1[20] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
if (jj_2_26(2)) {
maxEdits = jj_consume_token(NUMBER);
} else {
;
}
if (jj_2_27(2)) {
maxExpansions = jj_consume_token(NUMBER);
} else {
;
}
jj_consume_token(RPAREN);
{if ("" != null) return new FuzzyTerm(term,
maxEdits == null ? null : parseInt(maxEdits),
maxExpansions == null ? null : parseInt(maxExpansions));}
throw new Error("Missing return statement in function");
}
@ -797,7 +864,7 @@ wildcard = token.image.substring(1, token.image.length() - 1);
break;
}
default:
jj_la1[18] = jj_gen;
jj_la1[21] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@ -805,7 +872,7 @@ wildcard = token.image.substring(1, token.image.length() - 1);
break;
}
default:
jj_la1[19] = jj_gen;
jj_la1[22] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@ -821,7 +888,7 @@ wildcard = token.image.substring(1, token.image.length() - 1);
final private QueryNode FuzzyOp(CharSequence field, Token term, QueryNode node) throws ParseException {Token similarity = null;
jj_consume_token(TILDE);
if (jj_2_25(2)) {
if (jj_2_28(2)) {
similarity = jj_consume_token(NUMBER);
} else {
;
@ -861,7 +928,7 @@ float fms = org.apache.lucene.search.FuzzyQuery.defaultMaxEdits;
break;
}
default:
jj_la1[20] = jj_gen;
jj_la1[23] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@ -880,7 +947,7 @@ operator = token;
break;
}
default:
jj_la1[21] = jj_gen;
jj_la1[24] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@ -941,7 +1008,7 @@ String v = term.image.substring(1, term.image.length() - 1);
break;
}
default:
jj_la1[22] = jj_gen;
jj_la1[25] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@ -952,7 +1019,7 @@ q = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, t
break;
}
default:
jj_la1[23] = jj_gen;
jj_la1[26] = jj_gen;
;
}
break;
@ -967,7 +1034,7 @@ q = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, t
break;
}
default:
jj_la1[24] = jj_gen;
jj_la1[27] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@ -977,7 +1044,7 @@ q = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, t
break;
}
default:
jj_la1[25] = jj_gen;
jj_la1[28] = jj_gen;
;
}
{if ("" != null) return q;}
@ -997,7 +1064,7 @@ q = new SlopQueryNode(q, parseInt(slop));
break;
}
default:
jj_la1[26] = jj_gen;
jj_la1[29] = jj_gen;
;
}
{if ("" != null) return q;}
@ -1018,7 +1085,7 @@ leftInclusive = true;
break;
}
default:
jj_la1[27] = jj_gen;
jj_la1[30] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@ -1036,7 +1103,7 @@ leftInclusive = true;
break;
}
default:
jj_la1[28] = jj_gen;
jj_la1[31] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@ -1056,7 +1123,7 @@ left = token;
break;
}
default:
jj_la1[29] = jj_gen;
jj_la1[32] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@ -1072,7 +1139,7 @@ rightInclusive = true;
break;
}
default:
jj_la1[30] = jj_gen;
jj_la1[33] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@ -1292,39 +1359,28 @@ if (left.kind == RANGE_QUOTED) {
finally { jj_save(24, xla); }
}
private boolean jj_3R_39()
private boolean jj_2_26(int xla)
{
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(25)) {
jj_scanpos = xsp;
if (jj_scan_token(24)) return true;
}
return false;
jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return (!jj_3_26()); }
catch(LookaheadSuccess ls) { return true; }
finally { jj_save(25, xla); }
}
private boolean jj_3R_38()
private boolean jj_2_27(int xla)
{
if (jj_scan_token(QUOTED)) return true;
return false;
jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return (!jj_3_27()); }
catch(LookaheadSuccess ls) { return true; }
finally { jj_save(26, xla); }
}
private boolean jj_3R_32()
private boolean jj_2_28(int xla)
{
Token xsp;
xsp = jj_scanpos;
if (jj_3R_38()) {
jj_scanpos = xsp;
if (jj_3R_39()) return true;
}
return false;
}
private boolean jj_3R_26()
{
if (jj_scan_token(FN_PREFIX)) return true;
if (jj_scan_token(NOT_CONTAINED_BY)) return true;
return false;
jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return (!jj_3_28()); }
catch(LookaheadSuccess ls) { return true; }
finally { jj_save(27, xla); }
}
private boolean jj_3R_17()
@ -1341,6 +1397,24 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_11()
{
if (jj_3R_9()) return true;
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(17)) {
jj_scanpos = xsp;
if (jj_scan_token(18)) {
jj_scanpos = xsp;
if (jj_scan_token(19)) {
jj_scanpos = xsp;
if (jj_scan_token(20)) return true;
}
}
}
return false;
}
private boolean jj_3R_19()
{
if (jj_scan_token(FN_PREFIX)) return true;
@ -1348,9 +1422,9 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_45()
private boolean jj_3R_47()
{
if (jj_scan_token(RANGEIN_START)) return true;
if (jj_scan_token(TILDE)) return true;
return false;
}
@ -1361,25 +1435,6 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_41()
{
Token xsp;
xsp = jj_scanpos;
if (jj_3R_45()) {
jj_scanpos = xsp;
if (jj_scan_token(28)) return true;
}
xsp = jj_scanpos;
if (jj_scan_token(54)) {
jj_scanpos = xsp;
if (jj_scan_token(53)) {
jj_scanpos = xsp;
if (jj_scan_token(50)) return true;
}
}
return false;
}
private boolean jj_3R_24()
{
if (jj_scan_token(FN_PREFIX)) return true;
@ -1387,6 +1442,15 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_43()
{
if (jj_scan_token(QUOTED)) return true;
Token xsp;
xsp = jj_scanpos;
if (jj_3R_47()) jj_scanpos = xsp;
return false;
}
private boolean jj_3R_30()
{
if (jj_scan_token(FN_PREFIX)) return true;
@ -1401,6 +1465,12 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3_28()
{
if (jj_scan_token(NUMBER)) return true;
return false;
}
private boolean jj_3R_23()
{
if (jj_scan_token(FN_PREFIX)) return true;
@ -1421,36 +1491,69 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_11()
private boolean jj_3R_41()
{
if (jj_3R_9()) return true;
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(17)) {
jj_scanpos = xsp;
if (jj_scan_token(18)) {
jj_scanpos = xsp;
if (jj_scan_token(19)) {
jj_scanpos = xsp;
if (jj_scan_token(20)) return true;
}
}
}
if (jj_3R_45()) return true;
return false;
}
private boolean jj_3R_46()
private boolean jj_3R_37()
{
if (jj_3R_43()) return true;
return false;
}
private boolean jj_3R_45()
{
if (jj_scan_token(TILDE)) return true;
return false;
}
private boolean jj_3R_38()
{
if (jj_3R_44()) return true;
return false;
}
private boolean jj_3R_36()
{
if (jj_3R_42()) return true;
return false;
}
private boolean jj_3_2()
{
if (jj_3R_10()) return true;
return false;
}
private boolean jj_3R_13()
{
if (jj_scan_token(FN_PREFIX)) return true;
if (jj_scan_token(MAXWIDTH)) return true;
return false;
}
private boolean jj_3R_35()
{
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(25)) {
jj_scanpos = xsp;
if (jj_scan_token(24)) return true;
}
xsp = jj_scanpos;
if (jj_3R_41()) jj_scanpos = xsp;
return false;
}
private boolean jj_3R_20()
{
if (jj_scan_token(FN_PREFIX)) return true;
if (jj_scan_token(AFTER)) return true;
return false;
}
private boolean jj_3_1()
{
if (jj_3R_9()) return true;
@ -1463,32 +1566,15 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_13()
{
if (jj_scan_token(FN_PREFIX)) return true;
if (jj_scan_token(MAXWIDTH)) return true;
return false;
}
private boolean jj_3R_20()
{
if (jj_scan_token(FN_PREFIX)) return true;
if (jj_scan_token(AFTER)) return true;
return false;
}
private boolean jj_3_3()
{
if (jj_3R_11()) return true;
return false;
}
private boolean jj_3R_42()
private boolean jj_3R_34()
{
if (jj_scan_token(QUOTED)) return true;
Token xsp;
xsp = jj_scanpos;
if (jj_3R_46()) jj_scanpos = xsp;
if (jj_scan_token(REGEXPTERM)) return true;
return false;
}
@ -1506,6 +1592,25 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_10()
{
Token xsp;
xsp = jj_scanpos;
if (jj_3R_34()) {
jj_scanpos = xsp;
if (jj_3R_35()) {
jj_scanpos = xsp;
if (jj_3R_36()) {
jj_scanpos = xsp;
if (jj_3R_37()) return true;
}
}
}
xsp = jj_scanpos;
if (jj_3R_38()) jj_scanpos = xsp;
return false;
}
private boolean jj_3R_12()
{
if (jj_scan_token(FN_PREFIX)) return true;
@ -1513,9 +1618,15 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_44()
{
if (jj_scan_token(CARAT)) return true;
return false;
}
private boolean jj_3_25()
{
if (jj_scan_token(NUMBER)) return true;
if (jj_3R_33()) return true;
return false;
}
@ -1531,54 +1642,18 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_40()
{
if (jj_3R_44()) return true;
return false;
}
private boolean jj_3R_36()
{
if (jj_3R_42()) return true;
return false;
}
private boolean jj_3_22()
{
if (jj_3R_30()) return true;
return false;
}
private boolean jj_3R_44()
{
if (jj_scan_token(TILDE)) return true;
return false;
}
private boolean jj_3R_37()
{
if (jj_3R_43()) return true;
return false;
}
private boolean jj_3R_35()
{
if (jj_3R_41()) return true;
return false;
}
private boolean jj_3_21()
{
if (jj_3R_29()) return true;
return false;
}
private boolean jj_3_20()
{
if (jj_3R_28()) return true;
return false;
}
private boolean jj_3R_22()
{
if (jj_scan_token(FN_PREFIX)) return true;
@ -1586,22 +1661,9 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3_19()
private boolean jj_3_20()
{
if (jj_3R_27()) return true;
return false;
}
private boolean jj_3R_34()
{
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(25)) {
jj_scanpos = xsp;
if (jj_scan_token(24)) return true;
}
xsp = jj_scanpos;
if (jj_3R_40()) jj_scanpos = xsp;
if (jj_3R_28()) return true;
return false;
}
@ -1612,18 +1674,52 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3_19()
{
if (jj_3R_27()) return true;
return false;
}
private boolean jj_3_18()
{
if (jj_3R_26()) return true;
return false;
}
private boolean jj_3R_40()
{
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(25)) {
jj_scanpos = xsp;
if (jj_scan_token(24)) return true;
}
return false;
}
private boolean jj_3_17()
{
if (jj_3R_25()) return true;
return false;
}
private boolean jj_3R_39()
{
if (jj_scan_token(QUOTED)) return true;
return false;
}
private boolean jj_3R_33()
{
Token xsp;
xsp = jj_scanpos;
if (jj_3R_39()) {
jj_scanpos = xsp;
if (jj_3R_40()) return true;
}
return false;
}
private boolean jj_3_16()
{
if (jj_3R_24()) return true;
@ -1642,12 +1738,6 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_33()
{
if (jj_scan_token(REGEXPTERM)) return true;
return false;
}
private boolean jj_3_13()
{
if (jj_3R_21()) return true;
@ -1678,6 +1768,13 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_18()
{
if (jj_scan_token(FN_PREFIX)) return true;
if (jj_scan_token(FN_OR)) return true;
return false;
}
private boolean jj_3_8()
{
if (jj_3R_16()) return true;
@ -1690,32 +1787,6 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_18()
{
if (jj_scan_token(FN_PREFIX)) return true;
if (jj_scan_token(FN_OR)) return true;
return false;
}
private boolean jj_3R_10()
{
Token xsp;
xsp = jj_scanpos;
if (jj_3R_33()) {
jj_scanpos = xsp;
if (jj_3R_34()) {
jj_scanpos = xsp;
if (jj_3R_35()) {
jj_scanpos = xsp;
if (jj_3R_36()) return true;
}
}
}
xsp = jj_scanpos;
if (jj_3R_37()) jj_scanpos = xsp;
return false;
}
private boolean jj_3_6()
{
if (jj_3R_14()) return true;
@ -1728,18 +1799,18 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_43()
{
if (jj_scan_token(CARAT)) return true;
return false;
}
private boolean jj_3_4()
{
if (jj_3R_12()) return true;
return false;
}
private boolean jj_3_27()
{
if (jj_scan_token(NUMBER)) return true;
return false;
}
private boolean jj_3R_27()
{
if (jj_scan_token(FN_PREFIX)) return true;
@ -1747,6 +1818,18 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3_26()
{
if (jj_scan_token(NUMBER)) return true;
return false;
}
private boolean jj_3R_46()
{
if (jj_scan_token(RANGEIN_START)) return true;
return false;
}
private boolean jj_3R_15()
{
if (jj_scan_token(FN_PREFIX)) return true;
@ -1754,6 +1837,39 @@ if (left.kind == RANGE_QUOTED) {
return false;
}
private boolean jj_3R_32()
{
if (jj_scan_token(FN_PREFIX)) return true;
if (jj_scan_token(FUZZYTERM)) return true;
return false;
}
private boolean jj_3R_42()
{
Token xsp;
xsp = jj_scanpos;
if (jj_3R_46()) {
jj_scanpos = xsp;
if (jj_scan_token(28)) return true;
}
xsp = jj_scanpos;
if (jj_scan_token(55)) {
jj_scanpos = xsp;
if (jj_scan_token(54)) {
jj_scanpos = xsp;
if (jj_scan_token(51)) return true;
}
}
return false;
}
private boolean jj_3R_26()
{
if (jj_scan_token(FN_PREFIX)) return true;
if (jj_scan_token(NOT_CONTAINED_BY)) return true;
return false;
}
/** Generated Token Manager. */
public StandardSyntaxParserTokenManager token_source;
/** Current token. */
@ -1764,7 +1880,7 @@ if (left.kind == RANGE_QUOTED) {
private Token jj_scanpos, jj_lastpos;
private int jj_la;
private int jj_gen;
final private int[] jj_la1 = new int[31];
final private int[] jj_la1 = new int[34];
static private int[] jj_la1_0;
static private int[] jj_la1_1;
static {
@ -1772,12 +1888,12 @@ if (left.kind == RANGE_QUOTED) {
jj_la1_init_1();
}
private static void jj_la1_init_0() {
jj_la1_0 = new int[] {0x3f803c00,0x200,0x100,0x2400,0x3400,0x3400,0x18000,0x23800800,0x3f800800,0x200000,0x0,0x3800800,0x3800800,0x3800800,0x3800800,0x3800800,0x3000000,0x3800000,0x3000000,0x3800000,0x1e0000,0x3800000,0x3000000,0x400000,0x1f800000,0x200000,0x400000,0x18000000,0x0,0x0,0x0,};
jj_la1_0 = new int[] {0x3f803c00,0x200,0x100,0x2400,0x3400,0x3400,0x18000,0x23800800,0x3f800800,0x200000,0x0,0x3800800,0x3800800,0x3800800,0x3800800,0x3800800,0x3000000,0x3800000,0x1000000,0x3000000,0x3800000,0x3000000,0x3800000,0x1e0000,0x3800000,0x3000000,0x400000,0x1f800000,0x200000,0x400000,0x18000000,0x0,0x0,0x0,};
}
private static void jj_la1_init_1() {
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x800000,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x640000,0x640000,0x180000,};
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1000000,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xc80000,0xc80000,0x300000,};
}
final private JJCalls[] jj_2_rtns = new JJCalls[25];
final private JJCalls[] jj_2_rtns = new JJCalls[28];
private boolean jj_rescan = false;
private int jj_gc = 0;
@ -1787,7 +1903,7 @@ if (left.kind == RANGE_QUOTED) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
for (int i = 0; i < 34; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@ -1797,7 +1913,7 @@ if (left.kind == RANGE_QUOTED) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
for (int i = 0; i < 34; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@ -1807,7 +1923,7 @@ if (left.kind == RANGE_QUOTED) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
for (int i = 0; i < 34; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@ -1817,7 +1933,7 @@ if (left.kind == RANGE_QUOTED) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
for (int i = 0; i < 34; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@ -1943,12 +2059,12 @@ if (left.kind == RANGE_QUOTED) {
/** Generate ParseException. */
public ParseException generateParseException() {
jj_expentries.clear();
boolean[] la1tokens = new boolean[56];
boolean[] la1tokens = new boolean[57];
if (jj_kind >= 0) {
la1tokens[jj_kind] = true;
jj_kind = -1;
}
for (int i = 0; i < 31; i++) {
for (int i = 0; i < 34; i++) {
if (jj_la1[i] == jj_gen) {
for (int j = 0; j < 32; j++) {
if ((jj_la1_0[i] & (1<<j)) != 0) {
@ -1960,7 +2076,7 @@ if (left.kind == RANGE_QUOTED) {
}
}
}
for (int i = 0; i < 56; i++) {
for (int i = 0; i < 57; i++) {
if (la1tokens[i]) {
jj_expentry = new int[1];
jj_expentry[0] = i;
@ -1995,7 +2111,7 @@ if (left.kind == RANGE_QUOTED) {
private void jj_rescan_token() {
jj_rescan = true;
for (int i = 0; i < 25; i++) {
for (int i = 0; i < 28; i++) {
try {
JJCalls p = jj_2_rtns[i];
@ -2028,6 +2144,9 @@ if (left.kind == RANGE_QUOTED) {
case 22: jj_3_23(); break;
case 23: jj_3_24(); break;
case 24: jj_3_25(); break;
case 25: jj_3_26(); break;
case 26: jj_3_27(); break;
case 27: jj_3_28(); break;
}
}
p = p.next;

View File

@ -58,6 +58,7 @@ import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Before;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.ContainedBy;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Containing;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Extend;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.FuzzyTerm;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.IntervalFunction;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.MaxGaps;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.MaxWidth;
@ -176,6 +177,7 @@ PARSER_END(StandardSyntaxParser)
| <CONTAINING: ("containing") >
| <EXTEND: ("extend") >
| <FN_OR: ("or") >
| <FUZZYTERM: ("fuzzyterm" | "fuzzyTerm") >
| <MAXGAPS: ("maxgaps" | "maxGaps") >
| <MAXWIDTH: ("maxwidth" | "maxWidth") >
| <NON_OVERLAPPING: ("nonOverlapping" | "nonoverlapping") >
@ -403,6 +405,7 @@ private IntervalFunction IntervalFun() : {
| LOOKAHEAD(2) source = IntervalOverlapping() { return source; }
| LOOKAHEAD(2) source = IntervalNonOverlapping() { return source; }
| LOOKAHEAD(2) source = IntervalExtend() { return source; }
| LOOKAHEAD(2) source = IntervalFuzzyTerm() { return source; }
| LOOKAHEAD(2) source = IntervalText() { return source; }
}
@ -638,6 +641,7 @@ private IntervalFunction IntervalNonOverlapping() : {
private IntervalFunction IntervalWildcard() : {
String wildcard;
Token maxExpansions = null;
}
{
<FN_PREFIX> <WILDCARD>
@ -646,9 +650,32 @@ private IntervalFunction IntervalWildcard() : {
(<TERM> | <NUMBER>) { wildcard = token.image; }
| <QUOTED> { wildcard = token.image.substring(1, token.image.length() - 1); }
)
(maxExpansions = <NUMBER>)?
<RPAREN>
{
return new Wildcard(wildcard);
return new Wildcard(wildcard, maxExpansions == null ? 0 : parseInt(maxExpansions));
}
}
private IntervalFunction IntervalFuzzyTerm() : {
String term;
Token maxEdits = null;
Token maxExpansions = null;
}
{
<FN_PREFIX> <FUZZYTERM>
<LPAREN>
(
(<TERM> | <NUMBER>) { term = token.image; }
| <QUOTED> { term = token.image.substring(1, token.image.length() - 1); }
)
(LOOKAHEAD(2) maxEdits = <NUMBER>)?
(LOOKAHEAD(2) maxExpansions = <NUMBER>)?
<RPAREN>
{
return new FuzzyTerm(term,
maxEdits == null ? null : parseInt(maxEdits),
maxExpansions == null ? null : parseInt(maxExpansions));
}
}

View File

@ -81,41 +81,43 @@ public interface StandardSyntaxParserConstants {
/** RegularExpression Id. */
int FN_OR = 36;
/** RegularExpression Id. */
int MAXGAPS = 37;
int FUZZYTERM = 37;
/** RegularExpression Id. */
int MAXWIDTH = 38;
int MAXGAPS = 38;
/** RegularExpression Id. */
int NON_OVERLAPPING = 39;
int MAXWIDTH = 39;
/** RegularExpression Id. */
int NOT_CONTAINED_BY = 40;
int NON_OVERLAPPING = 40;
/** RegularExpression Id. */
int NOT_CONTAINING = 41;
int NOT_CONTAINED_BY = 41;
/** RegularExpression Id. */
int NOT_WITHIN = 42;
int NOT_CONTAINING = 42;
/** RegularExpression Id. */
int ORDERED = 43;
int NOT_WITHIN = 43;
/** RegularExpression Id. */
int OVERLAPPING = 44;
int ORDERED = 44;
/** RegularExpression Id. */
int PHRASE = 45;
int OVERLAPPING = 45;
/** RegularExpression Id. */
int UNORDERED = 46;
int PHRASE = 46;
/** RegularExpression Id. */
int UNORDERED_NO_OVERLAPS = 47;
int UNORDERED = 47;
/** RegularExpression Id. */
int WILDCARD = 48;
int UNORDERED_NO_OVERLAPS = 48;
/** RegularExpression Id. */
int WITHIN = 49;
int WILDCARD = 49;
/** RegularExpression Id. */
int RANGE_TO = 50;
int WITHIN = 50;
/** RegularExpression Id. */
int RANGEIN_END = 51;
int RANGE_TO = 51;
/** RegularExpression Id. */
int RANGEEX_END = 52;
int RANGEIN_END = 52;
/** RegularExpression Id. */
int RANGE_QUOTED = 53;
int RANGEEX_END = 53;
/** RegularExpression Id. */
int RANGE_GOOP = 54;
int RANGE_QUOTED = 54;
/** RegularExpression Id. */
int RANGE_GOOP = 55;
/** Lexical state. */
int Function = 0;
@ -163,6 +165,7 @@ public interface StandardSyntaxParserConstants {
"\"containing\"",
"\"extend\"",
"\"or\"",
"<FUZZYTERM>",
"<MAXGAPS>",
"<MAXWIDTH>",
"<NON_OVERLAPPING>",

View File

@ -106,6 +106,9 @@ public class TestStandardQPEnhancements extends LuceneTestCase {
@Test
public void testWildcard() throws Exception {
checkIntervalQueryNode("fn:wildcard(foo*)");
// Explicit maxExpansions.
checkIntervalQueryNode("fn:wildcard(foo* 128)");
}
@Test
@ -173,6 +176,15 @@ public class TestStandardQPEnhancements extends LuceneTestCase {
checkIntervalQueryNode("fn:extend(fn:ordered(big foo) 2 5)");
}
@Test
public void testFuzzy() throws Exception {
checkIntervalQueryNode("fn:fuzzyTerm(dfe)");
// Explicit maxEdits
checkIntervalQueryNode("fn:fuzzyTerm(dfe 2)");
// Explicit maxExpansions
checkIntervalQueryNode("fn:fuzzyTerm(dfe 2 128)");
}
protected void checkIntervalQueryNode(String query) throws Exception {
// Check raw parser first.
var syntaxParser = new StandardSyntaxParser(new FastCharStream(new StringReader(query)));

View File

@ -144,7 +144,7 @@ public class CheckHits {
}
/**
* Tests that a query matches the an expected set of documents using Hits.
* Tests that a query matches the expected set of documents using Hits.
*
* <p>Note that when using the Hits API, documents will only be returned if they have a positive
* normalized score.
@ -159,16 +159,16 @@ public class CheckHits {
Random random, Query query, String defaultFieldName, IndexSearcher searcher, int[] results)
throws IOException {
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
ScoreDoc[] hits = searcher.search(query, Math.max(10, results.length * 2)).scoreDocs;
Set<Integer> correct = new TreeSet<>();
for (int i = 0; i < results.length; i++) {
correct.add(Integer.valueOf(results[i]));
for (int result : results) {
correct.add(result);
}
Set<Integer> actual = new TreeSet<>();
for (int i = 0; i < hits.length; i++) {
actual.add(Integer.valueOf(hits[i].doc));
for (ScoreDoc hit : hits) {
actual.add(hit.doc);
}
assertEquals(query.toString(defaultFieldName), correct, actual);