LUCENE-10223: interval support in standard syntax parser (#429)

This commit is contained in:
Dawid Weiss 2021-11-11 08:54:59 +01:00 committed by GitHub
parent 53586d4231
commit f725b27e12
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
55 changed files with 5911 additions and 518 deletions

View File

@ -481,8 +481,11 @@ class RenderJavadocTask extends DefaultTask {
// append some special table css, prettify css
ant.concat(destfile: "${outputDir}/stylesheet.css", append: "true", fixlastline: "true", encoding: "UTF-8") {
filelist(dir: taskResources, files: "table_padding.css")
filelist(dir: project.file("${taskResources}/prettify"), files: "prettify.css")
filelist(dir: taskResources, files:
["table_padding.css",
"custom_styles.css",
"prettify/prettify.css"].join(" ")
)
}
// append prettify to scripts

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Used by interval function documentation.
*/
.example.sentence {
text-align: center;
padding: 1rem 0 2rem;
font-style: italic;
}
.example.sentence.with-highlights {
line-height: 150%;
}
.example.sentence.with-positions sub {
color: red;
font-weight: bold;
}
.example.sentence.left-aligned {
text-align: left;
}
span.highlight {
padding-bottom: 2px;
border-bottom: 2px solid red;
}

View File

@ -38,6 +38,10 @@ API Changes
New Features
---------------------
* LUCENE-10223: Add interval function support to StandardQueryParser. Add min-should-match operator
support to StandardQueryParser. Update and clean up package documentation in flexible query parser
module. (Dawid Weiss, Alan Woodward)
* LUCENE-10220: Add an utility method to get IntervalSource from analyzed text (or token stream).
(Uwe Schindler, Dawid Weiss, Alan Woodward)

View File

@ -17,6 +17,13 @@
# Apache Lucene Migration Guide
## Minor syntactical changes in StandardQueryParser (Lucene 9.1)
LUCENE-10223 adds interval functions and min-should-match support to StandardQueryParser. This
means that interval function prefixes ("fn:") and the '@' character after parentheses will
parse differently than before. If you need the exact previous behavior, clone the StandardSyntaxParser from the previous version of Lucene and create a custom query parser
with that parser.
## Directory API is now little endian (LUCENE-9047)
DataOutput's writeShort, writeInt, and writeLong methods now encode with

View File

@ -17,7 +17,6 @@
package org.apache.lucene.search.matchhighlight;
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.io.IOException;
import java.util.ArrayList;
import java.util.function.BiFunction;
import java.util.function.Consumer;
@ -34,7 +33,7 @@ import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
/**
* Utility class for building an ephemeral document index and running a block of code on its reader.
@ -82,8 +81,8 @@ class IndexBuilder {
return this;
}
public IndexBuilder build(Analyzer analyzer, IOUtils.IOConsumer<DirectoryReader> block)
throws IOException {
public IndexBuilder build(
Analyzer analyzer, LuceneTestCase.ThrowingConsumer<DirectoryReader> block) throws Exception {
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setIndexSort(new Sort(new SortField(FLD_SORT_ORDER, SortField.Type.LONG)));
try (Directory directory = new ByteBuffersDirectory()) {

View File

@ -49,6 +49,7 @@ import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.intervals.IntervalQuery;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
@ -127,7 +128,7 @@ public class TestMatchHighlighter extends LuceneTestCase {
}
@Test
public void testBasicUsage() throws IOException {
public void testBasicUsage() throws Exception {
new IndexBuilder(this::toField)
.doc(FLD_TEXT1, "foo bar baz")
.doc(FLD_TEXT1, "bar foo baz")
@ -237,7 +238,7 @@ public class TestMatchHighlighter extends LuceneTestCase {
}
@Test
public void testSynonymHighlight() throws IOException {
public void testSynonymHighlight() throws Exception {
// There is nothing special needed to highlight or process complex queries, synonyms, etc.
// Synonyms defined in the constructor of this class.
new IndexBuilder(this::toField)
@ -268,7 +269,7 @@ public class TestMatchHighlighter extends LuceneTestCase {
}
@Test
public void testAnalyzedTextIntervals() throws IOException {
public void testAnalyzedTextIntervals() throws Exception {
SynonymMap synonymMap =
buildSynonymMap(
new String[][] {
@ -319,7 +320,229 @@ public class TestMatchHighlighter extends LuceneTestCase {
}
@Test
public void testCustomFieldHighlightHandling() throws IOException {
public void testStandardQueryParserIntervalFunctions() throws Exception {
Analyzer analyzer =
new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new StandardTokenizer();
TokenStream ts = tokenizer;
ts = new LowerCaseFilter(ts);
return new TokenStreamComponents(tokenizer, ts);
}
};
// TODO: the highlights are different when the field is indexed with offsets. Weird.
// String field = FLD_TEXT1;
String field = FLD_TEXT2;
new IndexBuilder(this::toField)
// Just one document and multiple interval queries.
.doc(field, "The quick brown fox jumps over the lazy dog")
.build(
analyzer,
reader -> {
IndexSearcher searcher = new IndexSearcher(reader);
Sort sortOrder = Sort.INDEXORDER; // So that results are consistently ordered.
MatchHighlighter highlighter =
new MatchHighlighter(searcher, analyzer)
.appendFieldHighlighter(
FieldValueHighlighters.highlighted(
80 * 3, 1, new PassageFormatter("...", ">", "<"), fld -> true))
.appendFieldHighlighter(FieldValueHighlighters.skipRemaining());
StandardQueryParser qp = new StandardQueryParser(analyzer);
// Run all pairs of query-expected highlight.
List<String> errors = new ArrayList<>();
for (var queryHighlightPair :
new String[][] {
{
"fn:ordered(brown dog)",
"0. %s: The quick >brown fox jumps over the lazy dog<"
},
{
"fn:within(fn:or(lazy quick) 1 fn:or(dog fox))",
"0. %s: The quick brown fox jumps over the >lazy< dog"
},
{
"fn:containedBy(fox fn:ordered(brown fox dog))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:atLeast(2 fn:unordered(furry dog) fn:unordered(brown dog) lazy quick)",
"0. %s: The >quick >brown fox jumps over the lazy<<> dog<"
},
{
"fn:atLeast(2 quick fox \"furry dog\")",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{
"fn:maxgaps(0 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
},
{
"fn:maxgaps(1 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
},
{
"fn:maxwidth(2 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
},
{
"fn:maxwidth(3 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
},
{
"fn:or(quick \"fox\")",
"0. %s: The >quick< brown >fox< jumps over the lazy dog"
},
{"fn:or(\"quick fox\")"},
{
"fn:phrase(quick brown fox)",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{"fn:wildcard(jump*)", "0. %s: The quick brown fox >jumps< over the lazy dog"},
{"fn:wildcard(br*n)", "0. %s: The quick >brown< fox jumps over the lazy dog"},
{"fn:or(dog fox)", "0. %s: The quick brown >fox< jumps over the lazy >dog<"},
{
"fn:phrase(fn:ordered(quick fox) jumps)",
"0. %s: The >quick brown fox jumps< over the lazy dog"
},
{
"fn:ordered(quick jumps dog)",
"0. %s: The >quick brown fox jumps over the lazy dog<"
},
{
"fn:ordered(quick fn:or(fox dog))",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{
"fn:ordered(quick jumps fn:or(fox dog))",
"0. %s: The >quick brown fox jumps over the lazy dog<"
},
{
"fn:unordered(dog jumps quick)",
"0. %s: The >quick brown fox jumps over the lazy dog<"
},
{
"fn:unordered(fn:or(fox dog) quick)",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{
"fn:unordered(fn:phrase(brown fox) fn:phrase(fox jumps))",
"0. %s: The quick >brown fox jumps< over the lazy dog"
},
{"fn:ordered(fn:phrase(brown fox) fn:phrase(fox jumps))"},
{"fn:unorderedNoOverlaps(fn:phrase(brown fox) fn:phrase(fox jumps))"},
{
"fn:before(fn:or(brown lazy) fox)",
"0. %s: The quick >brown< fox jumps over the lazy dog"
},
{
"fn:before(fn:or(brown lazy) fn:or(dog fox))",
"0. %s: The quick >brown< fox jumps over the >lazy< dog"
},
{
"fn:after(fn:or(brown lazy) fox)",
"0. %s: The quick brown fox jumps over the >lazy< dog"
},
{
"fn:after(fn:or(brown lazy) fn:or(dog fox))",
"0. %s: The quick brown fox jumps over the >lazy< dog"
},
{"fn:extend(fox 1 2)", "0. %s: The quick >brown fox jumps over< the lazy dog"},
{
"fn:extend(fn:or(dog fox) 2 0)",
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
},
{
"fn:within(fn:or(fox dog) 1 fn:or(quick lazy))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:within(fn:or(fox dog) 2 fn:or(quick lazy))",
"0. %s: The quick brown >fox< jumps over the lazy >dog<"
},
{
"fn:notWithin(fn:or(fox dog) 1 fn:or(quick lazy))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:containedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:containedBy(fn:or(fox dog) fn:ordered(quick lazy))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:notContainedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:notContainedBy(fn:or(fox dog) fn:ordered(quick lazy))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:containing(fn:extend(fn:or(lazy brown) 1 1) fn:or(fox dog))",
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
},
{
"fn:containing(fn:atLeast(2 quick fox dog) jumps)",
"0. %s: The quick brown >fox jumps over the lazy dog<"
},
{
"fn:notContaining(fn:ordered(fn:or(the The) fn:or(fox dog)) brown)",
"0. %s: The quick brown fox jumps over >the lazy dog<"
},
{
"fn:notContaining(fn:extend(fn:or(fox dog) 1 0) fn:or(brown yellow))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
},
{
"fn:overlapping(fn:phrase(brown fox) fn:phrase(fox jumps))",
"0. %s: The quick >brown fox< jumps over the lazy dog"
},
{
"fn:overlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:nonOverlapping(fn:phrase(brown fox) fn:phrase(lazy dog))",
"0. %s: The quick >brown fox< jumps over the lazy dog"
},
{
"fn:nonOverlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
}) {
assert queryHighlightPair.length >= 1;
String queryString = queryHighlightPair[0];
var query = qp.parse(queryString, field);
var expected =
Arrays.stream(queryHighlightPair)
.skip(1)
.map(v -> String.format(Locale.ROOT, v, field))
.toArray(String[]::new);
try {
assertHighlights(
toDocList(
highlighter.highlight(searcher.search(query, 10, sortOrder), query)),
expected);
} catch (AssertionError e) {
errors.add("MISMATCH: query: " + queryString + "\n" + e.getMessage());
}
}
if (errors.size() > 0) {
throw new AssertionError(String.join("\n\n", errors));
}
});
}
@Test
public void testCustomFieldHighlightHandling() throws Exception {
// Match highlighter is a showcase of individual components in this package, suitable
// to create any kind of field-display designs.
//
@ -427,7 +650,7 @@ public class TestMatchHighlighter extends LuceneTestCase {
}
@Test
public void testHighlightMoreQueriesAtOnceShowoff() throws IOException {
public void testHighlightMoreQueriesAtOnceShowoff() throws Exception {
// Match highlighter underlying components are powerful enough to build interesting,
// if not always super-practical, things. In this case, we would like to highlight
// a set of matches of *more than one* query over the same set of input documents. This includes
@ -566,14 +789,15 @@ public class TestMatchHighlighter extends LuceneTestCase {
}
}
if (!Arrays.equals(
Stream.of(expectedFormattedLines).map(String::trim).toArray(),
actualLines.stream().map(String::trim).toArray())) {
var expectedTrimmed =
Stream.of(expectedFormattedLines).map(String::trim).collect(Collectors.toList());
var actualTrimmed = actualLines.stream().map(String::trim).collect(Collectors.toList());
if (!Objects.equals(expectedTrimmed, actualTrimmed)) {
throw new AssertionError(
"Actual hits were:\n"
+ String.join("\n", actualLines)
+ "\n\n but expected them to be:\n"
+ String.join("\n", expectedFormattedLines));
+ String.join("\n", actualTrimmed)
+ "\n\nbut expected them to be:\n"
+ String.join("\n", expectedTrimmed));
}
}

View File

@ -64,6 +64,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.hamcrest.MatcherAssert;
import org.hamcrest.Matchers;
import org.junit.Before;
import org.junit.Test;
@ -158,16 +159,16 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
};
@Test
public void testTermQueryWithOffsets() throws IOException {
public void testTermQueryWithOffsets() throws Exception {
checkTermQuery(FLD_TEXT_POS_OFFS);
}
@Test
public void testTermQueryWithPositions() throws IOException {
public void testTermQueryWithPositions() throws Exception {
checkTermQuery(FLD_TEXT_POS);
}
private void checkTermQuery(String field) throws IOException {
private void checkTermQuery(String field) throws Exception {
new IndexBuilder(this::toField)
.doc(field, "foo bar baz")
.doc(field, "bar foo baz")
@ -176,7 +177,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.build(
analyzer,
reader -> {
assertThat(
MatcherAssert.assertThat(
highlights(reader, new TermQuery(new Term(field, "foo"))),
containsInAnyOrder(
fmt("0: (%s: '>foo< bar baz')", field),
@ -186,16 +187,16 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
}
@Test
public void testBooleanMultifieldQueryWithOffsets() throws IOException {
public void testBooleanMultifieldQueryWithOffsets() throws Exception {
checkBooleanMultifieldQuery(FLD_TEXT_POS_OFFS);
}
@Test
public void testBooleanMultifieldQueryWithPositions() throws IOException {
public void testBooleanMultifieldQueryWithPositions() throws Exception {
checkBooleanMultifieldQuery(FLD_TEXT_POS);
}
private void checkBooleanMultifieldQuery(String field) throws IOException {
private void checkBooleanMultifieldQuery(String field) throws Exception {
Query query =
new BooleanQuery.Builder()
.add(new PhraseQuery(1, field, "foo", "baz"), BooleanClause.Occur.SHOULD)
@ -210,7 +211,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.build(
analyzer,
reader -> {
assertThat(
MatcherAssert.assertThat(
highlights(reader, query),
containsInAnyOrder(
fmt("0: (%s: '>foo bar baz< abc')", field),
@ -219,16 +220,16 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
}
@Test
public void testVariousQueryTypesWithOffsets() throws IOException {
public void testVariousQueryTypesWithOffsets() throws Exception {
checkVariousQueryTypes(FLD_TEXT_POS_OFFS);
}
@Test
public void testVariousQueryTypesWithPositions() throws IOException {
public void testVariousQueryTypesWithPositions() throws Exception {
checkVariousQueryTypes(FLD_TEXT_POS);
}
private void checkVariousQueryTypes(String field) throws IOException {
private void checkVariousQueryTypes(String field) throws Exception {
new IndexBuilder(this::toField)
.doc(field, "foo bar baz abc")
.doc(field, "bar foo baz def")
@ -236,46 +237,46 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.build(
analyzer,
reader -> {
assertThat(
MatcherAssert.assertThat(
highlights(reader, stdQueryParser.apply("foo baz", field)),
containsInAnyOrder(
fmt("0: (%s: '>foo< bar >baz< abc')", field),
fmt("1: (%s: 'bar >foo< >baz< def')", field),
fmt("2: (%s: 'bar >baz< >foo< xyz')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(reader, stdQueryParser.apply("foo OR xyz", field)),
containsInAnyOrder(
fmt("0: (%s: '>foo< bar baz abc')", field),
fmt("1: (%s: 'bar >foo< baz def')", field),
fmt("2: (%s: 'bar baz >foo< >xyz<')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(reader, stdQueryParser.apply("bas~2", field)),
containsInAnyOrder(
fmt("0: (%s: 'foo >bar< >baz< >abc<')", field),
fmt("1: (%s: '>bar< foo >baz< def')", field),
fmt("2: (%s: '>bar< >baz< foo xyz')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(reader, stdQueryParser.apply("\"foo bar\"", field)),
containsInAnyOrder((fmt("0: (%s: '>foo bar< baz abc')", field))));
assertThat(
MatcherAssert.assertThat(
highlights(reader, stdQueryParser.apply("\"foo bar\"~3", field)),
containsInAnyOrder(
fmt("0: (%s: '>foo bar< baz abc')", field),
fmt("1: (%s: '>bar foo< baz def')", field),
fmt("2: (%s: '>bar baz foo< xyz')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(reader, stdQueryParser.apply("ba*", field)),
containsInAnyOrder(
fmt("0: (%s: 'foo >bar< >baz< abc')", field),
fmt("1: (%s: '>bar< foo >baz< def')", field),
fmt("2: (%s: '>bar< >baz< foo xyz')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(reader, stdQueryParser.apply("[bar TO bas]", field)),
containsInAnyOrder(
fmt("0: (%s: 'foo >bar< baz abc')", field),
@ -284,14 +285,15 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
// Note how document '2' has 'bar' that isn't highlighted (because this
// document is excluded in the first clause).
assertThat(
MatcherAssert.assertThat(
highlights(reader, stdQueryParser.apply("([bar TO baz] -xyz) OR baz", field)),
containsInAnyOrder(
fmt("0: (%s: 'foo >bar< >>baz<< abc')", field),
fmt("1: (%s: '>bar< foo >>baz<< def')", field),
fmt("2: (%s: 'bar >baz< foo xyz')", field)));
assertThat(highlights(reader, new MatchAllDocsQuery()), Matchers.hasSize(0));
MatcherAssert.assertThat(
highlights(reader, new MatchAllDocsQuery()), Matchers.hasSize(0));
});
new IndexBuilder(this::toField)
@ -301,7 +303,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.build(
analyzer,
reader -> {
assertThat(
MatcherAssert.assertThat(
highlights(reader, stdQueryParser.apply("[bar TO baz] -bar", field)),
containsInAnyOrder(
fmt("0: (%s: 'foo >baz< foo')", field),
@ -310,7 +312,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
}
@Test
public void testIntervalQueryHighlightCrossingMultivalueBoundary() throws IOException {
public void testIntervalQueryHighlightCrossingMultivalueBoundary() throws Exception {
String field = FLD_TEXT_POS;
new IndexBuilder(this::toField)
.doc(field, "foo", "bar")
@ -328,7 +330,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
}
@Test
public void testIntervalQueries() throws IOException {
public void testIntervalQueries() throws Exception {
String field = FLD_TEXT_POS_OFFS;
new IndexBuilder(this::toField)
@ -338,7 +340,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.build(
analyzer,
reader -> {
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
new IntervalQuery(
@ -349,7 +351,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
Intervals.term("baz")))),
containsInAnyOrder(fmt("1: (field_text_offs: '>bas baz foo<')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
new IntervalQuery(
@ -359,7 +361,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
Intervals.unordered(Intervals.term("foo"), Intervals.term("bar"))))),
containsInAnyOrder(fmt("2: (field_text_offs: '>bar baz foo< xyz')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
new IntervalQuery(
@ -369,7 +371,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
Intervals.term("foo")))),
containsInAnyOrder(fmt("2: (field_text_offs: '>bar baz foo< xyz')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
new IntervalQuery(
@ -379,7 +381,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
Intervals.unordered(Intervals.term("foo"), Intervals.term("bar"))))),
containsInAnyOrder(fmt("2: (field_text_offs: '>bar baz foo< xyz')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
new IntervalQuery(
@ -392,7 +394,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
}
@Test
public void testDegenerateIntervalsWithPositions() throws IOException {
public void testDegenerateIntervalsWithPositions() throws Exception {
testDegenerateIntervals(FLD_TEXT_POS);
}
@ -401,23 +403,23 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
bugUrl =
"https://issues.apache.org/jira/browse/LUCENE-9634: "
+ "Highlighting of degenerate spans on fields with offsets doesn't work properly")
public void testDegenerateIntervalsWithOffsets() throws IOException {
public void testDegenerateIntervalsWithOffsets() throws Exception {
testDegenerateIntervals(FLD_TEXT_POS_OFFS);
}
public void testDegenerateIntervals(String field) throws IOException {
public void testDegenerateIntervals(String field) throws Exception {
new IndexBuilder(this::toField)
.doc(field, fmt("foo %s bar", STOPWORD1))
.build(
analyzer,
reader -> {
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
new IntervalQuery(field, Intervals.extend(Intervals.term("bar"), 1, 3))),
containsInAnyOrder(fmt("0: (%s: 'foo %s >bar<')", field, STOPWORD1)));
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
new IntervalQuery(field, Intervals.extend(Intervals.term("bar"), 5, 100))),
@ -426,16 +428,16 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
}
@Test
public void testMultivaluedFieldsWithOffsets() throws IOException {
public void testMultivaluedFieldsWithOffsets() throws Exception {
checkMultivaluedFields(FLD_TEXT_POS_OFFS);
}
@Test
public void testMultivaluedFieldsWithPositions() throws IOException {
public void testMultivaluedFieldsWithPositions() throws Exception {
checkMultivaluedFields(FLD_TEXT_POS);
}
public void checkMultivaluedFields(String field) throws IOException {
public void checkMultivaluedFields(String field) throws Exception {
new IndexBuilder(this::toField)
.doc(field, "foo bar", "baz abc", "bad baz")
.doc(field, "bar foo", "baz def")
@ -443,7 +445,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.build(
analyzer,
reader -> {
assertThat(
MatcherAssert.assertThat(
highlights(reader, stdQueryParser.apply("baz", field)),
containsInAnyOrder(
fmt("0: (%s: '>baz< abc | bad >baz<')", field),
@ -453,7 +455,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
}
@Test
public void testMultiFieldHighlights() throws IOException {
public void testMultiFieldHighlights() throws Exception {
for (String[] fieldPairs :
new String[][] {
{FLD_TEXT_POS_OFFS1, FLD_TEXT_POS_OFFS2},
@ -477,7 +479,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.sorted()
.collect(Collectors.joining(""));
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
stdQueryParser.apply(field1 + ":baz" + " OR " + field2 + ":bar", field1)),
@ -491,7 +493,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
* org.apache.lucene.search.BooleanClause.Occur#SHOULD} clauses. Check that this isn't the case.
*/
@Test
public void testNoRewrite() throws IOException {
public void testNoRewrite() throws Exception {
String field1 = FLD_TEXT_POS_OFFS1;
String field2 = FLD_TEXT_POS_OFFS2;
@ -510,13 +512,13 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
analyzer,
reader -> {
String expected = fmt("0: (%s: '>0100<')(%s: 'loo >bar<')", field1, field2);
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
stdQueryParser.apply(fmt("+%s:01* OR %s:bar", field1, field2), field1)),
containsInAnyOrder(expected));
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
stdQueryParser.apply(fmt("+%s:01* AND %s:bar", field1, field2), field1)),
@ -525,22 +527,22 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
}
@Test
public void testNestedQueryHitsWithOffsets() throws IOException {
public void testNestedQueryHitsWithOffsets() throws Exception {
checkNestedQueryHits(FLD_TEXT_POS_OFFS);
}
@Test
public void testNestedQueryHitsWithPositions() throws IOException {
public void testNestedQueryHitsWithPositions() throws Exception {
checkNestedQueryHits(FLD_TEXT_POS);
}
public void checkNestedQueryHits(String field) throws IOException {
public void checkNestedQueryHits(String field) throws Exception {
new IndexBuilder(this::toField)
.doc(field, "foo bar baz abc")
.build(
analyzer,
reader -> {
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
new BooleanQuery.Builder()
@ -549,7 +551,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.build()),
containsInAnyOrder(fmt("0: (%s: '>foo >bar< baz< abc')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
new BooleanQuery.Builder()
@ -571,7 +573,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
checkGraphQuery(FLD_TEXT_SYNONYMS_POS);
}
private void checkGraphQuery(String field) throws IOException {
private void checkGraphQuery(String field) throws Exception {
new IndexBuilder(this::toField)
.doc(field, "foo bar baz")
.doc(field, "bar foo baz")
@ -580,25 +582,25 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.build(
analyzer,
reader -> {
assertThat(
MatcherAssert.assertThat(
highlights(reader, new TermQuery(new Term(field, "syn1"))),
containsInAnyOrder(fmt("0: (%s: '>foo bar< baz')", field)));
// [syn2 syn3] = baz
// so both these queries highlight baz.
assertThat(
MatcherAssert.assertThat(
highlights(reader, new TermQuery(new Term(field, "syn3"))),
containsInAnyOrder(
fmt("0: (%s: 'foo bar >baz<')", field),
fmt("1: (%s: 'bar foo >baz<')", field),
fmt("2: (%s: 'bar >baz< foo')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(reader, stdQueryParser.apply(field + ":\"syn2 syn3\"", field)),
containsInAnyOrder(
fmt("0: (%s: 'foo bar >baz<')", field),
fmt("1: (%s: 'bar foo >baz<')", field),
fmt("2: (%s: 'bar >baz< foo')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(reader, stdQueryParser.apply(field + ":\"foo syn2 syn3\"", field)),
containsInAnyOrder(fmt("1: (%s: 'bar >foo baz<')", field)));
});
@ -614,7 +616,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
checkSpanQueries(FLD_TEXT_POS);
}
private void checkSpanQueries(String field) throws IOException {
private void checkSpanQueries(String field) throws Exception {
new IndexBuilder(this::toField)
.doc(field, "foo bar baz")
.doc(field, "bar foo baz")
@ -623,7 +625,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.build(
analyzer,
reader -> {
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
SpanNearQuery.newOrderedNearQuery(field)
@ -632,7 +634,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.build()),
containsInAnyOrder(fmt("1: (%s: '>bar foo< baz')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
SpanNearQuery.newOrderedNearQuery(field)
@ -642,7 +644,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.build()),
containsInAnyOrder(fmt("2: (%s: '>bar baz foo<')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
SpanNearQuery.newUnorderedNearQuery(field)
@ -653,7 +655,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
fmt("0: (%s: '>foo bar< baz')", field),
fmt("1: (%s: '>bar foo< baz')", field)));
assertThat(
MatcherAssert.assertThat(
highlights(
reader,
SpanNearQuery.newUnorderedNearQuery(field)
@ -694,7 +696,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
}
};
assertThat(
MatcherAssert.assertThat(
highlights(customSuppliers, reader, new TermQuery(new Term(field, "bar"))),
containsInAnyOrder(
fmt("0: (%s: '>foo bar<')", field),
@ -721,7 +723,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
.build(
analyzer,
reader -> {
assertThat(
MatcherAssert.assertThat(
highlights(reader, new TermQuery(new Term(field, "bar"))),
containsInAnyOrder(
fmt("0: (%s: 'foo >bar<')", field),

View File

@ -31,20 +31,20 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.CompiledAutomaton;
/**
* Constructor functions for {@link IntervalsSource} types
* Factory functions for creating {@link IntervalsSource interval sources}.
*
* <p>These sources implement minimum-interval algorithms taken from the paper <a
* href="https://vigna.di.unimi.it/ftp/papers/EfficientLazy.pdf">Efficient Optimally Lazy Algorithms
* for Minimal-Interval Semantics</a>
*
* <p>By default, sources that are sensitive to internal gaps (e.g. {@code PHRASE} and {@code
* MAXGAPS}) will rewrite their sub-sources so that disjunctions of different lengths are pulled up
* to the top of the interval tree. For example, {@code PHRASE(or(PHRASE("a", "b", "c"), "b"), "c")}
* will automatically rewrite itself to {@code OR(PHRASE("a", "b", "c", "c"), PHRASE("b", "c"))} to
* ensure that documents containing {@code "b c"} are matched. This can lead to less efficient
* queries, as more terms need to be loaded (for example, the {@code "c"} iterator above is loaded
* twice), so if you care more about speed than about accuracy you can use the {@link #or(boolean,
* IntervalsSource...)} factory method to prevent rewriting.
* <p><em>Note:</em> by default, sources that are sensitive to internal gaps (e.g. {@code PHRASE}
* and {@code MAXGAPS}) will rewrite their sub-sources so that disjunctions of different lengths are
* pulled up to the top of the interval tree. For example, {@code PHRASE(or(PHRASE("a", "b", "c"),
* "b"), "c")} will automatically rewrite itself to {@code OR(PHRASE("a", "b", "c", "c"),
* PHRASE("b", "c"))} to ensure that documents containing {@code "b c"} are matched. This can lead
* to less efficient queries, as more terms need to be loaded (for example, the {@code "c"} iterator
* above is loaded twice), so if you care more about speed than about accuracy you can use the
* {@link #or(boolean, IntervalsSource...)} factory method to prevent rewriting.
*/
public final class Intervals {
@ -94,7 +94,7 @@ public final class Intervals {
/**
* Return an {@link IntervalsSource} exposing intervals for a phrase consisting of a list of
* IntervalsSources
* {@link IntervalsSource interval sources}
*/
public static IntervalsSource phrase(IntervalsSource... subSources) {
return BlockIntervalsSource.build(Arrays.asList(subSources));

View File

@ -16,60 +16,72 @@
*/
/**
* Intervals queries
*
*
* <h2>Intervals queries</h2>
*
* This package contains experimental classes to search over intervals within fields
* <p>This package contains experimental classes to search over intervals within fields
*
* <h2>IntervalsSource</h2>
*
* The {@link org.apache.lucene.queries.intervals.IntervalsSource} class can be used to construct
* proximity relationships between terms and intervals. They can be built using static methods in
* the {@link org.apache.lucene.queries.intervals.Intervals} class
* the {@link org.apache.lucene.queries.intervals.Intervals} class.
*
* <h3>Basic intervals</h3>
*
* <ul>
* <li>{@link org.apache.lucene.queries.intervals.Intervals#term(String)} &mdash; Represents a
* single term
* single term.
* <li>{@link org.apache.lucene.queries.intervals.Intervals#phrase(java.lang.String...)} &mdash;
* Represents a phrase
* Represents a phrase.
* <li>{@link org.apache.lucene.queries.intervals.Intervals#analyzedText(java.lang.String,
* org.apache.lucene.analysis.Analyzer, java.lang.String, int, boolean)} &mdash; Represents a
* phrase (or an unordered sequence) of tokens resulting from an analysis of a given text.
* <li>{@link org.apache.lucene.queries.intervals.Intervals#ordered(IntervalsSource...)} &mdash;
* Represents an interval over an ordered set of terms or intervals
* Represents an interval over an ordered set of terms or intervals.
* <li>{@link org.apache.lucene.queries.intervals.Intervals#unordered(IntervalsSource...)} &mdash;
* Represents an interval over an unordered set of terms or intervals
* Represents an interval over an unordered set of terms or intervals.
* <li>{@link org.apache.lucene.queries.intervals.Intervals#or(IntervalsSource...)} &mdash;
* Represents the disjunction of a set of terms or intervals
* Represents the disjunction of a set of terms or intervals.
* <li>{@link
* org.apache.lucene.queries.intervals.Intervals#wildcard(org.apache.lucene.util.BytesRef)}
* &mdash; Represents an suffix wildcard (any prefix-matching term from the index).
* </ul>
*
* <h3>Filters</h3>
*
* <ul>
* <li>{@link org.apache.lucene.queries.intervals.Intervals#maxwidth(int, IntervalsSource)}
* &mdash; Filters out intervals that are larger than a set width
* &mdash; Filters out intervals that are larger than a set width.
* <li>{@link org.apache.lucene.queries.intervals.Intervals#maxgaps(int, IntervalsSource)} &mdash;
* Filters out intervals that have more than a set number of gaps between their constituent
* sub-intervals
* sub-intervals.
* <li>{@link org.apache.lucene.queries.intervals.Intervals#containedBy(IntervalsSource,
* IntervalsSource)} &mdash; Returns intervals that are contained by another interval
* IntervalsSource)} &mdash; Returns intervals that are contained by another interval.
* <li>{@link org.apache.lucene.queries.intervals.Intervals#notContainedBy(IntervalsSource,
* IntervalsSource)} &mdash; Returns intervals that are *not* contained by another interval
* IntervalsSource)} &mdash; Returns intervals that are *not* contained by another interval.
* <li>{@link org.apache.lucene.queries.intervals.Intervals#containing(IntervalsSource,
* IntervalsSource)} &mdash; Returns intervals that contain another interval
* IntervalsSource)} &mdash; Returns intervals that contain another interval.
* <li>{@link org.apache.lucene.queries.intervals.Intervals#notContaining(IntervalsSource,
* IntervalsSource)} &mdash; Returns intervals that do not contain another interval
* IntervalsSource)} &mdash; Returns intervals that do not contain another interval.
* <li>{@link org.apache.lucene.queries.intervals.Intervals#nonOverlapping(IntervalsSource,
* IntervalsSource)} &mdash; Returns intervals that do not overlap with another interval
* IntervalsSource)} &mdash; Returns intervals that do not overlap with another interval.
* <li>{@link org.apache.lucene.queries.intervals.Intervals#notWithin(IntervalsSource, int,
* IntervalsSource)} &mdash; Returns intervals that do not appear within a set number of
* positions of another interval
* positions of another interval.
* </ul>
*
* The {@link org.apache.lucene.queries.intervals.Intervals} class contains more advanced filters,
* please refer to the documentation of that class.
*
* <h2>IntervalQuery</h2>
*
* An {@link org.apache.lucene.queries.intervals.IntervalQuery} takes a field name and an {@link
* org.apache.lucene.queries.intervals.IntervalsSource}, and matches all documents that contain
* intervals defined by the source in that field.
*
* <h2>Interval query support in query parsers</h2>
*
* <p>Lucene's {@code StandardQueryParser} (from the {@code queryparser} module) supports interval
* function expressions.
*/
package org.apache.lucene.queries.intervals;

View File

@ -1,62 +0,0 @@
NOTE: often, if you are making a small change to the .jj file, you can
simply run "ant javacc" and skip the steps below. JavaCC will print
warnings like this:
Warning: ParseException.java: File is obsolete. Please rename or delete this file so that a new one can be generated for you.
which you should ignore (ie, simply keep the ParseException.java class
that's already present).
If, instead, you'd like to fully rebuild the StandardQueryParser,
here's how:
* Delete these files:
StandardQueryParser.java
StandardQueryParserConstants.java
StandardQueryParserTokenManager.java
TokenMgrError.java
JavaCharStream.java
Token.java
* Run "ant javacc". That will generate the all the classes
* To avoid lots of warnings in the generated code:
add @SupressWarnings("all"), immediately preceding the class declaration to:
QueryParserTokenManager.java
TokenMgrError.java
JavaCharStream.java
Token.java
JavaCharStream.java
* Remove all imports from TokenMgrError.java
* Fix the ParseException class:
- Change it to extend from QueryNodeParseException:
"public class ParseException extends QueryNodeParseException".
- Recreate the all the constructors like this:
public ParseException(Token currentTokenVal,
int[][] expectedTokenSequencesVal, String[] tokenImageVal) {
super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, initialise(
currentTokenVal, expectedTokenSequencesVal, tokenImageVal)));
this.currentToken = currentTokenVal;
this.expectedTokenSequences = expectedTokenSequencesVal;
this.tokenImage = tokenImageVal;
}
public ParseException(Message message) {
super(message);
}
public ParseException() {
super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, "Error"));
}
- Fix all imports

View File

@ -1,9 +1,9 @@
{
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java": "3d5f272a6d56b3f4962b252267ce2662e734414e",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java": "75e9d84f424bb697f899fe3adacc0094bac00672",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj": "08b62ed73607b1646af5dadb81c8bb34e381daee",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserConstants.java": "e73933bff38a62d90dab64f72a1a0deadfff246f",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java": "6e503b48ffa9f4648798e5394f7baeec366d1f07",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java": "fd1fcc78bf1025fe6fe54ab6f9ae2f53cce33364",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj": "eb0d1c55d029982ab8ea433cf9ef1088ba6ea3de",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserConstants.java": "d3c5d87c46635dbb6dc03bbdc0fb662b47ec318d",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java": "d8e12b467779c1740ea2b672c10806ac25e0184e",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java": "f4cb9d01587279dba30e549ce4867e4381bbd9d7",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java": "cdfa99af5fcf6b1e50691a1c1370ba60bf0d2d2d"
}

View File

@ -25,7 +25,7 @@ import org.apache.lucene.queryparser.flexible.core.util.StringUtils;
* configuration, it creates an empty {@link FieldConfig} object and delegate it to field config
* listeners, these are responsible for setting up all the field configuration.
*
* <p>{@link QueryConfigHandler} should be extended by classes that intends to provide configuration
* <p>{@link QueryConfigHandler} should be extended by classes that intend to provide configuration
* to {@link QueryNodeProcessor} objects.
*
* <p>The class that extends {@link QueryConfigHandler} should also provide {@link FieldConfig}

View File

@ -20,7 +20,6 @@ import org.apache.lucene.queryparser.flexible.messages.NLS;
/** Flexible Query Parser message bundle class */
public class QueryParserMessages extends NLS {
private static final String BUNDLE_NAME = QueryParserMessages.class.getName();
private QueryParserMessages() {
@ -52,4 +51,5 @@ public class QueryParserMessages extends NLS {
public static String NUMBER_CLASS_NOT_SUPPORTED_BY_NUMERIC_RANGE_QUERY;
public static String UNSUPPORTED_NUMERIC_DATA_TYPE;
public static String NUMERIC_CANNOT_BE_EMPTY;
public static String ANALYZER_REQUIRED;
}

View File

@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Flexible query parser is a modular, extensible framework for implementing Lucene query parsers.
* In the flexible query parser model, query parsing takes three steps: syntax parsing, processing
* (query semantics) and building (conversion to a Lucene {@link org.apache.lucene.search.Query}).
*
* <p>The flexible query parser module provides not just the framework but also the {@linkplain
* org.apache.lucene.queryparser.flexible.standard.StandardQueryParser} - the default implementation
* of a fully fledged query parser that supports most of the classic query parser's syntax but also
* adds support for interval functions, min-should-match operator on Boolean groups and many hooks
* for customization of how the parser behaves at runtime.
*
* <p>The flexible query parser is divided in two packages:
*
* <ul>
* <li>{@link org.apache.lucene.queryparser.flexible.core}: contains the query parser API classes,
* which should be extended by custom query parser implementations.
* <li>{@link org.apache.lucene.queryparser.flexible.standard}: contains an example Lucene query
* parser implementation built on top of the flexible query parser API.
* </ul>
*
* <h2>Features</h2>
*
* <ol>
* <li>full support for Boolean expressions, including groups
* <li>{@linkplain org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser syntax parsers}
* - support for arbitrary syntax parsers, that can be converted into {@link
* org.apache.lucene.queryparser.flexible.core.nodes.QueryNode} trees.
* <li>{@linkplain org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessor query
* node processors} - optimize, validate, rewrite the {@link
* org.apache.lucene.queryparser.flexible.core.nodes.QueryNode} trees
* <li>{@linkplain
* org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline processor
* pipelines} - select your favorite query processors and build a pipeline to implement the
* features you need.
* <li>{@linkplain org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler query
* configuration handlers}
* <li>{@linkplain org.apache.lucene.queryparser.flexible.core.builders.QueryBuilder query
* builders} - convert {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}
* trees into Lucene {@link org.apache.lucene.search.Query} instances.
* </ol>
*
* <h2>Design</h2>
*
* <p>The flexible query parser was designed to have a very generic architecture, so that it can be
* easily used for different products with varying query syntax needs.
*
* <p>The query parser has three layers and its core is what we call the {@linkplain
* org.apache.lucene.queryparser.flexible.core.nodes.QueryNode query node tree}. It is a tree of
* objects that represent the syntax of the original query, for example, for 'a AND b' the tree
* could look like this:
*
* <pre>
* AND
* / \
* A B
* </pre>
*
* <p>The three flexible query parser layers are:
*
* <dl>
* <dt>{@link org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser}
* <dd>This layer is the text parsing layer which simply transforms the query text string into a
* {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode} tree. Every text parser
* must implement the interface {@link
* org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser}. The default
* implementation is {@link
* org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser}.
* <dt>{@link org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessor}
* <dd>The query node processor does most of the work: it contains a chain of {@linkplain
* org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessor query node
* processors}. Each processor can walk the tree and modify nodes or even the tree's
* structure. This allows for query optimization before the node tree is converted to an
* actual query.
* <dt>{@link org.apache.lucene.queryparser.flexible.core.builders.QueryBuilder}
* <dd>The third layer is a configurable map of builders, which map {@linkplain
* org.apache.lucene.queryparser.flexible.core.nodes.QueryNode query nodes} to their adapters
* that convert each node into a {@link org.apache.lucene.search.Query}.
* </dl>
*/
package org.apache.lucene.queryparser.flexible;

View File

@ -39,57 +39,189 @@ import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
/**
* This class is a helper that enables users to easily use the Lucene query parser.
* The {@link StandardQueryParser} is a pre-assembled query parser that supports most features of
* the {@linkplain org.apache.lucene.queryparser.classic.QueryParser classic Lucene query parser},
* allows dynamic configuration of some of its features (like multi-field expansion or wildcard
* query restrictions) and adds support for new query types and expressions.
*
* <p>To construct a Query object from a query string, use the {@link #parse(String, String)}
* method:
* <p>The {@link StandardSyntaxParser} is an extension of the {@link QueryParserHelper} with
* reasonable defaults for syntax tree parsing ({@link StandardSyntaxParser}, node processor
* pipeline ({@link StandardQueryNodeProcessorPipeline} and node tree to {@link Query} builder
* ({@link StandardQueryTreeBuilder}).
*
* <pre class="prettyprint">
* StandardQueryParser queryParserHelper = new StandardQueryParser();
* Query query = queryParserHelper.parse("a AND b", "defaultField");
* </pre>
* <p>Typical usage, including configuration tweaks:
*
* <p>To change any configuration before parsing the query string do, for example: <br>
* <pre class="prettyprint">{@code
* StandardQueryParser qpHelper = new StandardQueryParser();
* StandardQueryConfigHandler config = qpHelper.getQueryConfigHandler();
* config.setAllowLeadingWildcard(true);
* config.setAnalyzer(new WhitespaceAnalyzer());
* Query query = qpHelper.parse("apache AND lucene", "defaultField");
* }</pre>
*
* <pre class="prettyprint">
* // the query config handler returned by {@link StandardQueryParser} is a {@link StandardQueryConfigHandler}
* queryParserHelper.getQueryConfigHandler().setAnalyzer(new WhitespaceAnalyzer());
* </pre>
* <h2>Supported query syntax</h2>
*
* <p>The syntax for query strings is as follows (copied from the old QueryParser javadoc): A Query
* is a series of clauses. A clause may be prefixed by:
* <p>Standard query parser borrows most of its syntax from the {@linkplain
* org.apache.lucene.queryparser.classic classic query parser} but adds more features and
* expressions on top of that syntax.
*
* <p>A <em>query</em> consists of clauses, field specifications, grouping and Boolean operators and
* interval functions. We will discuss them in order.
*
* <h3>Basic clauses</h3>
*
* <p>A query must contain one or more clauses. A clause can be a literal term, a phrase, a wildcard
* expression or other expression that
*
* <p>The following are some examples of simple one-clause queries:
*
* <ul>
* <li>a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating that the clause is
* required or prohibited respectively; or
* <li>a term followed by a colon, indicating the field to be searched. This enables one to
* construct queries which search multiple fields.
* <li><code>test</code>
* <p>selects documents containing the word <em>test</em> (term clause).
* <li><code>"test equipment"</code>
* <p>phrase search; selects documents containing the phrase <em>test equipment</em> (phrase
* clause).
* <li><code>"test failure"~4</code>
* <p>proximity search; selects documents containing the words <em>test</em> and
* <em>failure</em> within 4 words (positions) from each other. The provided "proximity" is
* technically translated into "edit distance" (maximum number of atomic word-moving
* operations required to transform the document's phrase into the query phrase).
* <li><code>tes*</code>
* <p>prefix wildcard matching; selects documents containing words starting with <em>tes</em>,
* such as: <em>test</em>, <em>testing</em> or <em>testable</em>.
* <li><code>/.est(s|ing)/</code>
* <p>documents containing words matching the provided regular expression, such as
* <em>resting</em> or <em>nests</em>.
* <li><code>nest~2</code>
* <p>fuzzy term matching; documents containing words within 2-edits distance (2 additions,
* removals or replacements of a letter) from <em>nest</em>, such as <em>test</em>,
* <em>net</em> or <em>rests</em>.
* </ul>
*
* A clause may be either:
* <h3>Field specifications</h3>
*
* <p>Most clauses can be prefixed by a field name and a colon: the clause will then apply to that
* field only. If the field specification is omitted, the query parser will expand the clause over
* all fields specified by a call to {@link StandardQueryParser#setMultiFields(CharSequence[])} or
* will use the default field provided in the call to {@link #parse(String, String)}.
*
* <p>The following are some examples of field-prefixed clauses:
*
* <ul>
* <li>a term, indicating all the documents that contain this term; or
* <li>a nested query, enclosed in parentheses. Note that this may be used with a <code>+</code>/
* <code>-</code> prefix to require any of a set of terms.
* <li><code>title:test</code>
* <p>documents containing <em>test</em> in the <code>title</code> field.
* <li><code>title:(die OR hard)</code>
* <p>documents containing <em>die</em> or <em>hard</em> in the <code>title</code> field.
* </ul>
*
* Thus, in BNF, the query grammar is:
* <h3>Boolean operators and grouping</h3>
*
* <pre>
* Query ::= ( Clause )*
* Clause ::= [&quot;+&quot;, &quot;-&quot;] [&lt;TERM&gt; &quot;:&quot;] ( &lt;TERM&gt; | &quot;(&quot; Query &quot;)&quot; )
* </pre>
* <p>You can combine clauses using Boolean AND, OR and NOT operators to form more complex
* expressions, for example:
*
* <p>Examples of appropriately formatted queries can be found in the <a
* href="{@docRoot}/org/apache/lucene/queryparser/classic/package-summary.html#package.description">
* query syntax documentation</a>.
* <ul>
* <li><code>test AND results</code>
* <p>selects documents containing both the word <em>test</em> and the word <em>results</em>.
* <li><code>test OR suite OR results</code>
* <p>selects documents with at least one of <em>test</em>, <em>suite</em> or
* <em>results</em>.
* <li><code>title:test AND NOT title:complete</code>
* <p>selects documents containing <em>test</em> and not containing <em>complete</em> in the
* <code>title</code> field.
* <li><code>title:test AND (pass* OR fail*)</code>
* <p>grouping; use parentheses to specify the precedence of terms in a Boolean clause. Query
* will match documents containing <em>test</em> in the <code>title</code> field and a word
* starting with <em>pass</em> or <em>fail</em> in the default search fields.
* <li><code>title:(pass fail skip)</code>
* <p>shorthand notation; documents containing at least one of <em>pass</em>, <em>fail</em> or
* <em>skip</em> in the <code>title</code> field.
* <li><code>title:(+test +"result unknown")</code>
* <p>shorthand notation; documents containing both <em>pass</em> and <em>result unknown</em>
* in the <code>title</code> field.
* </ul>
*
* <p>The text parser used by this helper is a {@link StandardSyntaxParser}.
* <p>Note the Boolean operators must be written in all caps, otherwise they are parsed as regular
* terms.
*
* <p>The query node processor used by this helper is a {@link StandardQueryNodeProcessorPipeline}.
* <h3>Range operators</h3>
*
* <p>The builder used by this helper is a {@link StandardQueryTreeBuilder}.
* <p>To search for ranges of textual or numeric values, use square or curly brackets, for example:
*
* <ul>
* <li><code>name:[Jones TO Smith]</code>
* <p>inclusive range; selects documents whose <code>name
* </code> field has any value between <em>Jones</em> and <em>Smith</em>, including
* boundaries.
* <li><code>score:{2.5 TO 7.3}</code>
* <p>exclusive range; selects documents whose <code>score</code> field is between 2.5 and
* 7.3, excluding boundaries.
* <li><code>score:{2.5 TO *]</code>
* <p>one-sided range; selects documents whose <code>score</code> field is larger than 2.5.
* </ul>
*
* <h3>Term boosting</h3>
*
* <p>Terms, quoted terms, term range expressions and grouped clauses can have a floating-point
* weight <em>boost</em> applied to them to increase their score relative to other clauses. For
* example:
*
* <ul>
* <li><code>jones^2 OR smith^0.5</code>
* <p>prioritize documents with <code>jones</code> term over matches on the <code>smith</code>
* term.
* <li><code>field:(a OR b NOT c)^2.5 OR field:d</code>
* <p>apply the boost to a sub-query.
* </ul>
*
* <h3>Special character escaping</h3>
*
* <p>Most search terms can be put in double quotes making special-character escaping not necessary.
* If the search term contains the quote character (or cannot be quoted for some reason), any
* character can be quoted with a backslash. For example:
*
* <ul>
* <li><code>\:\(quoted\+term\)\:</code>
* <p>a single search term <code>(quoted+term):</code> with escape sequences. An alternative
* quoted form would be simpler: <code>":(quoted+term):"
* </code>.
* </ul>
*
* <h3>Minimum-should-match constraint for Boolean disjunction groups</h3>
*
* <p>A minimum-should-match operator can be applied to a disjunction Boolean query (a query with
* only "OR"-subclauses) and forces the query to match documents with at least the provided number
* of these subclauses. For example:
*
* <ul>
* <li><code>(blue crab fish)@2</code>
* <p>matches all documents with at least two terms from the set [blue, crab, fish] (in any
* order).
* <li><code>((yellow OR blue) crab fish)@2</code>
* <p>sub-clauses of a Boolean query can themselves be complex queries; here the
* min-should-match selects documents that match at least two of the provided three
* sub-clauses.
* </ul>
*
* <h3>Interval function clauses</h3>
*
* <p>Interval functions are a powerful tool to express search needs in terms of one or more *
* contiguous fragments of text and their relationship to one another. All interval clauses start
* with the {@code fn:} prefix (possibly prefixed by a field specification). For example:
*
* <ul>
* <li><code>fn:ordered(quick brown fox)</code>
* <p>matches all documents (in the default field or in multi-field expansion) with at least
* one ordered sequence of <code>quick</code>, <code>
* brown</code> and <code>fox</code> terms.
* <li><code>title:fn:maxwidth(5 fn:atLeast(2 quick brown fox))</code>
* <p>matches all documents in the <code>title
* </code> field where at least two of the three terms (<code>quick</code>, <code>
* brown</code> and <code>fox</code>) occur within five positions of each other.
* </ul>
*
* Please refer to the {@linkplain org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn
* interval functions package} for more information on which functions are available and how they
* work.
*
* @see StandardQueryParser
* @see StandardQueryConfigHandler

View File

@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.builders;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.IntervalQueryNode;
import org.apache.lucene.search.Query;
/** Builds a {@link Query} from an {@link IntervalQueryNode}. */
public class IntervalQueryNodeBuilder implements StandardQueryBuilder {
@Override
public Query build(QueryNode queryNode) throws QueryNodeException {
return ((IntervalQueryNode) queryNode).getQuery();
}
}

View File

@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.builders;
import java.util.List;
import org.apache.lucene.queryparser.flexible.core.builders.QueryBuilder;
import org.apache.lucene.queryparser.flexible.core.builders.QueryTreeBuilder;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.MinShouldMatchNode;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
/** Builds a {@link BooleanQuery} from a {@link MinShouldMatchNode}. */
public class MinShouldMatchNodeBuilder implements QueryBuilder {
@Override
public Query build(QueryNode queryNode) {
MinShouldMatchNode mmNode = (MinShouldMatchNode) queryNode;
List<QueryNode> children = queryNode.getChildren();
if (children.size() != 1) {
throw new RuntimeException("Unexpected number of node children: " + children.size());
}
Query q = (Query) mmNode.groupQueryNode.getTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID);
BooleanQuery booleanQuery = (BooleanQuery) q;
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(mmNode.minShouldMatch);
booleanQuery.clauses().forEach(builder::add);
return builder.build();
}
}

View File

@ -29,6 +29,8 @@ import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.IntervalQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.MinShouldMatchNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.PointQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.PointRangeQueryNode;
@ -69,6 +71,8 @@ public class StandardQueryTreeBuilder extends QueryTreeBuilder implements Standa
setBuilder(SynonymQueryNode.class, new SynonymQueryNodeBuilder());
setBuilder(MultiPhraseQueryNode.class, new MultiPhraseQueryNodeBuilder());
setBuilder(MatchAllDocsQueryNode.class, new MatchAllDocsQueryNodeBuilder());
setBuilder(MinShouldMatchNode.class, new MinShouldMatchNodeBuilder());
setBuilder(IntervalQueryNode.class, new IntervalQueryNodeBuilder());
}
@Override

View File

@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.IntervalQuery;
import org.apache.lucene.queryparser.flexible.core.nodes.FieldableNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNodeImpl;
import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.IntervalFunction;
import org.apache.lucene.queryparser.flexible.standard.parser.EscapeQuerySyntaxImpl;
import org.apache.lucene.search.Query;
/** Node that represents an interval function. */
public class IntervalQueryNode extends QueryNodeImpl implements FieldableNode {
private final IntervalFunction source;
private String field;
private Analyzer analyzer;
public IntervalQueryNode(String field, IntervalFunction source) {
this.field = field;
this.source = Objects.requireNonNull(source);
}
public Query getQuery() {
Objects.requireNonNull(field, "Field must not be null for interval queries.");
Objects.requireNonNull(analyzer, "Analyzer must not be null for interval queries.");
return new IntervalQuery(field, source.toIntervalSource(field, analyzer));
}
@Override
public String toQueryString(EscapeQuerySyntax escapeSyntaxParser) {
return String.format(Locale.ROOT, "%s:%s", field, source);
}
@Override
public String toString() {
return toQueryString(new EscapeQuerySyntaxImpl());
}
@Override
public CharSequence getField() {
return field;
}
@Override
public void setField(CharSequence fieldName) {
this.field = Objects.requireNonNull(fieldName.toString());
}
@Override
public IntervalQueryNode cloneTree() {
return new IntervalQueryNode(field, source);
}
public void setAnalyzer(Analyzer analyzer) {
this.analyzer =
Objects.requireNonNull(analyzer, "Analyzer must not be null for interval queries.");
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes;
import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNodeImpl;
import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax;
/** Node that represents a minimum-should-match restriction on a {@link GroupQueryNode}. */
public class MinShouldMatchNode extends QueryNodeImpl {
public final int minShouldMatch;
public final GroupQueryNode groupQueryNode;
public MinShouldMatchNode(int minShouldMatch, GroupQueryNode groupQueryNode) {
this.minShouldMatch = minShouldMatch;
this.groupQueryNode = groupQueryNode;
this.setLeaf(false);
this.allocate();
add(groupQueryNode);
}
@Override
public CharSequence toQueryString(EscapeQuerySyntax escapeSyntaxParser) {
return groupQueryNode.toQueryString(escapeSyntaxParser) + "@" + minShouldMatch;
}
}

View File

@ -21,7 +21,7 @@ import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax;
/**
* A {@link WildcardQueryNode} represents wildcard query This does not apply to phrases. Examples:
* a*b*c Fl?w? m?ke*g
* {@code a*b*c Fl?w? m?ke*g}.
*/
public class WildcardQueryNode extends FieldQueryNode {

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#after(IntervalsSource, IntervalsSource)}. */
public class After extends IntervalFunction {
private final IntervalFunction source;
private final IntervalFunction reference;
public After(IntervalFunction source, IntervalFunction reference) {
this.source = Objects.requireNonNull(source);
this.reference = Objects.requireNonNull(reference);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.after(
source.toIntervalSource(field, analyzer), reference.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:after(%s %s)", source, reference);
}
}

View File

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.io.IOException;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#analyzedText(String, Analyzer, String, int, boolean)}. */
public class AnalyzedText extends IntervalFunction {
private final String term;
public AnalyzedText(String term) {
this.term = term;
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
int gaps = 0;
boolean ordered = true;
try {
return Intervals.analyzedText(term, analyzer, field, gaps, ordered);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public String toString() {
if (requiresQuotes(term)) {
return '"' + term + '"';
} else {
return term;
}
}
private boolean requiresQuotes(String term) {
return Pattern.compile("[\\s]").matcher(term).find();
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#atLeast(int, IntervalsSource...)}. */
public class AtLeast extends IntervalFunction {
private final int minShouldMatch;
private final List<IntervalFunction> sources;
public AtLeast(int minShouldMatch, List<IntervalFunction> sources) {
this.minShouldMatch = minShouldMatch;
this.sources = Objects.requireNonNull(sources);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.atLeast(
minShouldMatch,
sources.stream()
.map(intervalFunction -> intervalFunction.toIntervalSource(field, analyzer))
.toArray(IntervalsSource[]::new));
}
@Override
public String toString() {
return String.format(
Locale.ROOT,
"fn:atLeast(%s %s)",
minShouldMatch,
sources.stream().map(IntervalFunction::toString).collect(Collectors.joining(" ")));
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#before(IntervalsSource, IntervalsSource)}. */
public class Before extends IntervalFunction {
private final IntervalFunction source;
private final IntervalFunction reference;
public Before(IntervalFunction source, IntervalFunction reference) {
this.source = Objects.requireNonNull(source);
this.reference = Objects.requireNonNull(reference);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.before(
source.toIntervalSource(field, analyzer), reference.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:before(%s %s)", source, reference);
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#containedBy(IntervalsSource, IntervalsSource)}. */
public class ContainedBy extends IntervalFunction {
private final IntervalFunction big;
private final IntervalFunction small;
public ContainedBy(IntervalFunction small, IntervalFunction big) {
this.small = Objects.requireNonNull(small);
this.big = Objects.requireNonNull(big);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.containedBy(
small.toIntervalSource(field, analyzer), big.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:containedBy(%s %s)", small, big);
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#containing(IntervalsSource, IntervalsSource)}. */
public class Containing extends IntervalFunction {
private final IntervalFunction big;
private final IntervalFunction small;
public Containing(IntervalFunction big, IntervalFunction small) {
this.big = Objects.requireNonNull(big);
this.small = Objects.requireNonNull(small);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.containing(
big.toIntervalSource(field, analyzer), small.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:containing(%s %s)", big, small);
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#extend(IntervalsSource, int, int)}. */
public class Extend extends IntervalFunction {
private final int before, after;
private final IntervalFunction source;
public Extend(IntervalFunction source, int before, int after) {
this.source = Objects.requireNonNull(source);
this.before = before;
this.after = after;
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.extend(source.toIntervalSource(field, analyzer), before, after);
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:extend(%s %d %d)", source, before, after);
}
}

View File

@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Representation of an interval function that can be converted to {@link IntervalsSource}. */
public abstract class IntervalFunction {
public abstract IntervalsSource toIntervalSource(String field, Analyzer analyzer);
@Override
public abstract String toString();
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#maxgaps(int, IntervalsSource)}. */
public class MaxGaps extends IntervalFunction {
private final int maxGaps;
private final IntervalFunction source;
public MaxGaps(int maxGaps, IntervalFunction source) {
this.maxGaps = maxGaps;
this.source = Objects.requireNonNull(source);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.maxgaps(maxGaps, source.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:maxgaps(%s %s)", maxGaps, source);
}
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#maxwidth(int, IntervalsSource)}. */
public class MaxWidth extends IntervalFunction {
private final int width;
private final IntervalFunction source;
public MaxWidth(int width, IntervalFunction source) {
this.width = width;
this.source = Objects.requireNonNull(source);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.maxwidth(width, source.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:maxwidth(%s %s)", width, source);
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#nonOverlapping(IntervalsSource, IntervalsSource)} . */
public class NonOverlapping extends IntervalFunction {
private final IntervalFunction minuend;
private final IntervalFunction subtrahend;
public NonOverlapping(IntervalFunction minuend, IntervalFunction subtrahend) {
this.minuend = Objects.requireNonNull(minuend);
this.subtrahend = Objects.requireNonNull(subtrahend);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.nonOverlapping(
minuend.toIntervalSource(field, analyzer), subtrahend.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:nonOverlapping(%s %s)", minuend, subtrahend);
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#notContainedBy(IntervalsSource, IntervalsSource)}. */
public class NotContainedBy extends IntervalFunction {
private final IntervalFunction small;
private final IntervalFunction big;
public NotContainedBy(IntervalFunction small, IntervalFunction big) {
this.small = Objects.requireNonNull(small);
this.big = Objects.requireNonNull(big);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.notContainedBy(
small.toIntervalSource(field, analyzer), big.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:notContainedBy(%s %s)", small, big);
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#notContaining(IntervalsSource, IntervalsSource)}. */
public class NotContaining extends IntervalFunction {
private final IntervalFunction minuend;
private final IntervalFunction subtrahend;
public NotContaining(IntervalFunction minuend, IntervalFunction subtrahend) {
this.minuend = Objects.requireNonNull(minuend);
this.subtrahend = Objects.requireNonNull(subtrahend);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.notContaining(
minuend.toIntervalSource(field, analyzer), subtrahend.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:notContaining(%s %s)", minuend, subtrahend);
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#notWithin(IntervalsSource, int, IntervalsSource)}. */
public class NotWithin extends IntervalFunction {
private final int positions;
private final IntervalFunction minuend, subtrahend;
public NotWithin(IntervalFunction minuend, int positions, IntervalFunction subtrahend) {
this.positions = positions;
this.minuend = Objects.requireNonNull(minuend);
this.subtrahend = Objects.requireNonNull(subtrahend);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.notWithin(
minuend.toIntervalSource(field, analyzer),
positions,
subtrahend.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:notWithin(%s %d %s)", minuend, positions, subtrahend);
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#or(IntervalsSource...)}. */
public class Or extends IntervalFunction {
private final List<IntervalFunction> sources;
public Or(List<IntervalFunction> sources) {
this.sources = Objects.requireNonNull(sources);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.or(
sources.stream()
.map(intervalFunction -> intervalFunction.toIntervalSource(field, analyzer))
.toArray(IntervalsSource[]::new));
}
@Override
public String toString() {
return String.format(
Locale.ROOT,
"fn:or(%s)",
sources.stream().map(IntervalFunction::toString).collect(Collectors.joining(" ")));
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#ordered(IntervalsSource...)}. */
public class Ordered extends IntervalFunction {
private final List<IntervalFunction> sources;
public Ordered(List<IntervalFunction> sources) {
this.sources = Objects.requireNonNull(sources);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.ordered(
sources.stream()
.map(intervalFunction -> intervalFunction.toIntervalSource(field, analyzer))
.toArray(IntervalsSource[]::new));
}
@Override
public String toString() {
return String.format(
Locale.ROOT,
"fn:ordered(%s)",
sources.stream().map(IntervalFunction::toString).collect(Collectors.joining(" ")));
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#overlapping(IntervalsSource, IntervalsSource)}. */
public class Overlapping extends IntervalFunction {
private final IntervalFunction source;
private final IntervalFunction reference;
public Overlapping(IntervalFunction source, IntervalFunction reference) {
this.source = Objects.requireNonNull(source);
this.reference = Objects.requireNonNull(reference);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.overlapping(
source.toIntervalSource(field, analyzer), reference.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:overlapping(%s %s)", source, reference);
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#phrase(String...)}. */
public class Phrase extends IntervalFunction {
private final List<IntervalFunction> sources;
public Phrase(List<IntervalFunction> sources) {
this.sources = Objects.requireNonNull(sources);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.phrase(
sources.stream()
.map(intervalFunction -> intervalFunction.toIntervalSource(field, analyzer))
.toArray(IntervalsSource[]::new));
}
@Override
public String toString() {
return String.format(
Locale.ROOT,
"fn:phrase(%s)",
sources.stream().map(IntervalFunction::toString).collect(Collectors.joining(" ")));
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#unordered(IntervalsSource...)}. */
public class Unordered extends IntervalFunction {
private final List<IntervalFunction> sources;
public Unordered(List<IntervalFunction> sources) {
this.sources = Objects.requireNonNull(sources);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.unordered(
sources.stream()
.map(intervalFunction -> intervalFunction.toIntervalSource(field, analyzer))
.toArray(IntervalsSource[]::new));
}
@Override
public String toString() {
return String.format(
Locale.ROOT,
"fn:unordered(%s)",
sources.stream().map(IntervalFunction::toString).collect(Collectors.joining(" ")));
}
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#unorderedNoOverlaps(IntervalsSource, IntervalsSource)}. */
public class UnorderedNoOverlaps extends IntervalFunction {
private final IntervalFunction a, b;
public UnorderedNoOverlaps(IntervalFunction a, IntervalFunction b) {
this.a = Objects.requireNonNull(a);
this.b = Objects.requireNonNull(b);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.unorderedNoOverlaps(
a.toIntervalSource(field, analyzer), b.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:unorderedNoOverlaps(%s %s)", a, b);
}
}

View File

@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.util.BytesRef;
/** Node that represents {@link Intervals#wildcard(BytesRef)}. */
public class Wildcard extends IntervalFunction {
private final String wildcard;
public Wildcard(String wildcard) {
this.wildcard = wildcard;
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.wildcard(new BytesRef(wildcard));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:wildcard(%s)", wildcard);
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
/** Node that represents {@link Intervals#within(IntervalsSource, int, IntervalsSource)}. */
public class Within extends IntervalFunction {
private final int positions;
private final IntervalFunction source, reference;
public Within(IntervalFunction source, int positions, IntervalFunction reference) {
this.positions = positions;
this.source = Objects.requireNonNull(source);
this.reference = Objects.requireNonNull(reference);
}
@Override
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
return Intervals.within(
source.toIntervalSource(field, analyzer),
positions,
reference.toIntervalSource(field, analyzer));
}
@Override
public String toString() {
return String.format(Locale.ROOT, "fn:within(%s %d %s)", source, positions, reference);
}
}

View File

@ -0,0 +1,726 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This package contains classes that implement {@linkplain
* org.apache.lucene.queries.intervals.Intervals interval function} support for the {@linkplain
* org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser standard syntax
* parser}.
*
* <h2>What are interval functions?</h2>
*
* <p>Interval functions are a powerful tool to express search needs in terms of one or more
* contiguous fragments of text and their relationship to one another. Interval functions are
* implemented by an {@linkplain org.apache.lucene.queries.intervals.IntervalQuery IntervalQuery}
* but many ready-to-use factory methods are provided in the {@linkplain
* org.apache.lucene.queries.intervals.Intervals Intervals} class.
*
* <p>When Lucene indexes documents (or rather: document fields) the input text is typically split
* into <em>tokens</em>. The details of how this tokenization is performed depends on how the
* field's {@link org.apache.lucene.analysis.Analyzer} is set up. In the end, each token would
* typically have an associated <em>position</em> in the token stream. For example, the following
* sentence:
*
* <p class="example sentence with-highlights">The quick brown fox jumps over the lazy dog
*
* <p>could be transformed into the following token stream (note some token positions are "blank"
* (grayed out) &mdash; these positions reflect stop words that are typically not indexed at all).
*
* <p class="example sentence with-highlights with-positions"><span style="color:
* lightgrey">The</span><sub>&mdash;</sub> quick<sub>2</sub> brown<sub>3</sub> fox<sub>4</sub>
* jumps<sub>5</sub> over<sub>6</sub> <span style="color: lightgrey">the</span><sub>&mdash;</sub>
* lazy<sub>7</sub> dog<sub>8</sub>
*
* <p>Remembering that intervals are contiguous spans between two positions in a document, consider
* the following example interval function query: <code>fn:ordered(brown dog)</code>. This query
* selects any span of text between terms <code>brown</code> and <code>dog</code>. In our example,
* this would correspond to the highlighted fragment below.
*
* <p class="example sentence with-highlights">The quick <span class="highlight">brown fox jumps
* over the lazy dog</span>
*
* <p>This type of interval function can be called an interval <em>selector</em>. The second class
* of interval functions works by combining or filtering other intervals depending on certain
* criteria.
*
* <p>The matching interval in the above example can be of any length &mdash; if the word <code>
* brown</code> occurs at the beginning of the document and the word <code>dog</code> at the very
* end of the document, the interval would be very long (it would cover the entire document!). Let's
* say we want to restrict the matches to only those intervals with at most 3 positions between the
* search terms: <code>fn:maxgaps(3 fn:ordered(brown dog))</code>.
*
* <p>There are five tokens in between search terms (so five "gaps" between the matching interval's
* positions) and the above query no longer matches our example document at all.
*
* <p>Interval filtering functions allow expressing a variety of conditions other Lucene queries
* cannot. For example, consider this interval query that searches for words <code>lazy</code> or
* <code>quick</code> but only if they are in the neighborhood of one position from any of the words
* <code>dog</code> or <code>fox</code>:
*
* <p><code>fn:within(fn:or(lazy quick) 1 fn:or(dog fox))</code>
*
* <p>The result of this query is correctly shown below (only the word <code>lazy</code> matches the
* query, <code>quick</code> is 2 positions away from <code>fox</code>).
*
* <p class="example sentence with-highlights">The quick brown fox jumps over the <span
* class="highlight">lazy</span> dog
*
* <p>The remaining part of this document provides more information on the available functions and
* their expected behavior.
*
* <h2>Classification of interval functions</h2>
*
* <p>The following groups of interval functions are available in the {@link
* org.apache.lucene.queryparser.flexible.standard.StandardQueryParser}.
*
* <table class="table" style="width: auto">
* <caption>Interval functions grouped by similar functionality.</caption>
* <thead>
* <tr>
* <th>Terms</th>
* <th>Alternatives</th>
* <th>Length</th>
* <th>Context</th>
* <th>Ordering</th>
* <th>Containment</th>
* </tr>
* </thead>
*
* <tbody>
* <tr>
* <td>
* <em>term literals</em><br>
* <code>fn:wildcard</code><br>
* </td>
* <td>
* <code>fn:or</code><br>
* <code>fn:atLeast</code>
* </td>
* <td>
* <code>fn:maxgaps</code><br>
* <code>fn:maxwidth</code>
* </td>
* <td>
* <code>fn:before</code><br>
* <code>fn:after</code><br>
* <code>fn:extend</code><br>
* <code>fn:within</code><br>
* <code>fn:notWithin</code>
* </td>
* <td>
* <code>fn:ordered</code><br>
* <code>fn:unordered</code><br>
* <code>fn:phrase</code><br>
* <code>fn:unorderedNoOverlaps</code>
* </td>
* <td>
* <code>fn:containedBy</code><br>
* <code>fn:notContainedBy</code><br>
* <code>fn:containing</code><br>
* <code>fn:notContaining</code><br>
* <code>fn:overlapping</code><br>
* <code>fn:nonOverlapping</code>
* </td>
* </tr>
* </tbody>
* </table>
*
* <p>All examples in the description of interval functions (below) assume a document with the
* following content:
*
* <p class="example sentence with-highlights">The quick brown fox jumps over the lazy dog
*
* <h3><em>term literals</em></h3>
*
* <p>Quoted or unquoted character sequences are converted into (analyzed) text intervals. While a
* single term typically results in a single-term interval, a quoted multi-term phrase will produce
* an interval matching the corresponding sequence of tokens. Note this is different from the <code>
* fn:phrase</code> function which takes a sequence of sub-intervals.
*
* <dl class="dl-horizontal narrow">
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:or(quick "fox")</code>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick</span> brown <span class="highlight">fox</span> jumps over
* the lazy dog
* <li><code>fn:or(\"quick fox\")</code> (<em>The document would not match &mdash; no phrase
* <code>quick fox</code> exists.</em>)
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over the lazy dog
* <li><code>fn:phrase(quick brown fox)</code>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox</span> jumps over the lazy dog
* </ul>
* </dl>
*
* <h3>fn:wildcard</h3>
*
* <p>Matches the disjunction of all terms that match a wildcard glob.
*
* <p><em>Important!</em> The expanded wildcard must not match more than 128 terms. This is an
* internal limitation that prevents blowing up memory on, for example, prefix expansions that would
* cover huge numbers of alternatives.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:wildcard(glob)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>glob</code>
* <dd>term glob to expand (based on the contents of the index).
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:wildcard(jump*)</code>
* <p class="example sentence with-highlights left-aligned">The quick brown fox <span
* class="highlight">jumps</span> over the lazy dog
* <li><code>fn:wildcard(br*n)</code>
* <p class="example sentence with-highlights left-aligned">The quick <span
* class="highlight">brown</span> fox jumps over the lazy dog
* </ul>
* </dl>
*
* <h3>fn:or</h3>
*
* <p>Matches the disjunction of nested intervals.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:or(sources...)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>sources</code>
* <dd>sub-intervals (terms or other functions)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:or(dog fox)</code>
* <p class="example sentence with-highlights left-aligned">The quick brown <span
* class="highlight">fox</span> jumps over the lazy <span class="highlight">dog</span>
* </ul>
* </dl>
*
* <h3>fn:atLeast</h3>
*
* <p>Matches documents that contain at least the provided number of source intervals.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:atLeast(min sources...)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>min</code>
* <dd>an integer specifying minimum number of sub-interval arguments that must match.
* <dt><code>sources</code>
* <dd>sub-intervals (terms or other functions)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:atLeast(2 quick fox "furry dog")</code>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox</span> jumps over the lazy dog
* <li><code>fn:atLeast(2 fn:unordered(furry dog) fn:unordered(brown dog) lazy quick)</code>
* <em>(This query results in multiple overlapping intervals.)</em>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox jumps over the lazy</span> dog<br>
* The <span class="highlight">quick brown fox jumps over the lazy dog</span><br>
* The quick <span class="highlight">brown fox jumps over the lazy dog</span>
* </ul>
* </dl>
*
* <h3>fn:maxgaps</h3>
*
* <p>Accepts <code>source</code> interval if it has at most <code>max</code> position gaps.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:maxgaps(gaps source)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>gaps</code>
* <dd>an integer specifying maximum number of source's position gaps.
* <dt><code>source</code>
* <dd>source sub-interval.
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:maxgaps(0 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over the <span class="highlight">lazy dog</span>
* <li><code>fn:maxgaps(1 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))</code>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox</span> jumps over the <span class="highlight">lazy
* dog</span>
* </ul>
* </dl>
*
* <h3>fn:maxwidth</h3>
*
* <p>Accepts <code>source</code> interval if it has at most the given width (position span).
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:maxwidth(max source)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>max</code>
* <dd>an integer specifying maximum width of source's position span.
* <dt><code>source</code>
* <dd>source sub-interval.
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:maxwidth(2 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over the <span class="highlight">lazy dog</span>
* <li><code>fn:maxwidth(3 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))</code>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox</span> jumps over the <span class="highlight">lazy
* dog</span>
* </ul>
* </dl>
*
* <h3>fn:phrase</h3>
*
* <p>Matches an ordered, gapless sequence of source intervals.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:phrase(sources...)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>sources</code>
* <dd>sub-intervals (terms or other functions)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:phrase(quick brown fox)</code>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox</span> jumps over the lazy dog
* <li><code>fn:phrase(fn:ordered(quick fox) jumps)</code>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox jumps</span> over the lazy dog
* </ul>
* </dl>
*
* <h3>fn:ordered</h3>
*
* <p>Matches an ordered span containing all source intervals, possibly with gaps in between their
* respective source interval positions. Source intervals must not overlap.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:ordered(sources...)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>sources</code>
* <dd>sub-intervals (terms or other functions)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:ordered(quick jumps dog)</code>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox jumps over the lazy dog</span>
* <li><code>fn:ordered(quick fn:or(fox dog))</code> <em>(Note only the shorter match out of
* the two alternatives is included in the result; the algorithm is not required to
* return or highlight all matching interval alternatives).</em>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox</span> jumps over the lazy dog
* <li><code>fn:ordered(quick jumps fn:or(fox dog))</code>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox jumps over the lazy dog</span>
* <li><code>fn:ordered(fn:phrase(brown fox) fn:phrase(fox jumps))</code> <em>(Sources
* overlap, no matches.)</em>
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over the lazy dog
* </ul>
* </dl>
*
* <h3>fn:unordered</h3>
*
* <p>Matches an unordered span containing all source intervals, possibly with gaps in between their
* respective source interval positions. Source intervals may overlap.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:unordered(sources...)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>sources</code>
* <dd>sub-intervals (terms or other functions)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:unordered(dog jumps quick)</code>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox jumps over the lazy dog</span>
* <li><code>fn:unordered(fn:or(fox dog) quick)</code> <em>(Note only the shorter match out
* of the two alternatives is included in the result; the algorithm is not required to
* return or highlight all matching interval alternatives).</em>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox</span> jumps over the lazy dog
* <li><code>fn:unordered(fn:phrase(brown fox) fn:phrase(fox jumps))</code>
* <p class="example sentence with-highlights left-aligned">The quick <span
* class="highlight">brown fox jumps</span> over the lazy dog
* </ul>
* </dl>
*
* <h3>fn:unorderedNoOverlaps</h3>
*
* <p>Matches an unordered span containing two source intervals, possibly with gaps in between their
* respective source interval positions. Source intervals must not overlap.
*
* <p>Note that, unlike <code>fn:unordered</code>, this function takes a fixed number of arguments
* (two).
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:unorderedNoOverlaps(source1 source2)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>source1</code>
* <dd>sub-interval (term or other function)
* <dt><code>source2</code>
* <dd>sub-interval (term or other function)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:unorderedNoOverlaps(fn:phrase(fox jumps) brown)</code>
* <p class="example sentence with-highlights left-aligned">The quick <span
* class="highlight">brown fox jumps</span> over the lazy dog
* <li><code>fn:unorderedNoOverlaps(fn:phrase(brown fox) fn:phrase(fox jumps))</code>
* <em>(Sources overlap, no matches.)</em>
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over the lazy dog
* </ul>
* </dl>
*
* <h3>fn:before</h3>
*
* <p>Matches intervals from the source that appear before intervals from the reference.
*
* <p>Reference intervals will not be part of the match (this is a filtering function).
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:before(source reference)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>source</code>
* <dd>source sub-interval (term or other function)
* <dt><code>reference</code>
* <dd>reference sub-interval (term or other function)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:before(fn:or(brown lazy) fox)</code>
* <p class="example sentence with-highlights left-aligned">The quick <span
* class="highlight">brown</span> fox jumps over the lazy dog
* <li><code>fn:before(fn:or(brown lazy) fn:or(dog fox))</code>
* <p class="example sentence with-highlights left-aligned">The quick <span
* class="highlight">brown</span> fox jumps over the <span class="highlight">lazy</span>
* dog
* </ul>
* </dl>
*
* <h3>fn:after</h3>
*
* <p>Matches intervals from the source that appear after intervals from the reference.
*
* <p>Reference intervals will not be part of the match (this is a filtering function).
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:after(source reference)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>source</code>
* <dd>source sub-interval (term or other function)
* <dt><code>reference</code>
* <dd>reference sub-interval (term or other function)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:after(fn:or(brown lazy) fox)</code>
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over the <span class="highlight">lazy</span> dog
* <li><code>fn:after(fn:or(brown lazy) fn:or(dog fox))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over the <span class="highlight">lazy</span> dog
* </ul>
* </dl>
*
* <h3>fn:extend</h3>
*
* <p>Matches an interval around another source, extending its span by a number of positions before
* and after.
*
* <p>This is an advanced function that allows extending the left and right "context" of another
* interval.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:extend(source before after)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>source</code>
* <dd>source sub-interval (term or other function)
* <dt><code>before</code>
* <dd>an integer number of positions to extend to the left of the source
* <dt><code>after</code>
* <dd>an integer number of positions to extend to the right of the source
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:extend(fox 1 2)</code>
* <p class="example sentence with-highlights left-aligned">The quick <span
* class="highlight">brown fox jumps over</span> the lazy dog
* <li><code>fn:extend(fn:or(dog fox) 2 0)</code>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox</span> jumps over <span class="highlight">the lazy
* dog</span>
* </ul>
* </dl>
*
* <h3>fn:within</h3>
*
* <p>Matches intervals of the source that appear within the provided number of positions from the
* intervals of the reference.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:within(source positions reference)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>source</code>
* <dd>source sub-interval (term or other function)
* <dt><code>positions</code>
* <dd>an integer number of maximum positions between source and reference
* <dt><code>reference</code>
* <dd>reference sub-interval (term or other function)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:within(fn:or(fox dog) 1 fn:or(quick lazy))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over the lazy <span class="highlight">dog</span>
* <li><code>fn:within(fn:or(fox dog) 2 fn:or(quick lazy))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown <span
* class="highlight">fox</span> jumps over the lazy <span class="highlight">dog</span>
* </ul>
* </dl>
*
* <h3>fn:notWithin</h3>
*
* <p>Matches intervals of the source that do <em>not</em> appear within the provided number of
* positions from the intervals of the reference.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:notWithin(source positions reference)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>source</code>
* <dd>source sub-interval (term or other function)
* <dt><code>positions</code>
* <dd>an integer number of maximum positions between source and reference
* <dt><code>reference</code>
* <dd>reference sub-interval (term or other function)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:notWithin(fn:or(fox dog) 1 fn:or(quick lazy))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown <span
* class="highlight">fox</span> jumps over the lazy dog
* </ul>
* </dl>
*
* <h3>fn:containedBy</h3>
*
* <p>Matches intervals of the source that are contained by intervals of the reference.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:containedBy(source reference)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>source</code>
* <dd>source sub-interval (term or other function)
* <dt><code>reference</code>
* <dd>reference sub-interval (term or other function)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:containedBy(fn:or(fox dog) fn:ordered(quick lazy))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown <span
* class="highlight">fox</span> jumps over the lazy dog
* <li><code>fn:containedBy(fn:or(fox dog) fn:extend(lazy 3 3))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over the lazy <span class="highlight">dog</span>
* </ul>
* </dl>
*
* <h3>fn:notContainedBy</h3>
*
* <p>Matches intervals of the source that are <em>not</em> contained by intervals of the reference.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:notContainedBy(source reference)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>source</code>
* <dd>source sub-interval (term or other function)
* <dt><code>reference</code>
* <dd>reference sub-interval (term or other function)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:notContainedBy(fn:or(fox dog) fn:ordered(quick lazy))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over the lazy <span class="highlight">dog</span>
* <li><code>fn:notContainedBy(fn:or(fox dog) fn:extend(lazy 3 3))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown <span
* class="highlight">fox</span> jumps over the lazy dog
* </ul>
* </dl>
*
* <h3>fn:containing</h3>
*
* <p>Matches intervals of the source that contain at least one intervals of the reference.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:containing(source reference)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>source</code>
* <dd>source sub-interval (term or other function)
* <dt><code>reference</code>
* <dd>reference sub-interval (term or other function)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:containing(fn:extend(fn:or(lazy brown) 1 1) fn:or(fox dog))</code>
* <p class="example sentence with-highlights left-aligned">The <span
* class="highlight">quick brown fox</span> jumps over <span class="highlight">the lazy
* dog</span>
* <li><code>fn:containing(fn:atLeast(2 quick fox dog) jumps)</code>
* <p class="example sentence with-highlights left-aligned">The quick brown <span
* class="highlight">fox jumps over the lazy dog</span>
* </ul>
* </dl>
*
* <h3>fn:notContaining</h3>
*
* <p>Matches intervals of the source that do <em>not</em> contain any intervals of the reference.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:notContaining(source reference)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>source</code>
* <dd>source sub-interval (term or other function)
* <dt><code>reference</code>
* <dd>reference sub-interval (term or other function)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:notContaining(fn:extend(fn:or(fox dog) 1 0) fn:or(brown yellow))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over the <span class="highlight">lazy dog</span>
* <li><code>fn:notContaining(fn:ordered(fn:or(the The) fn:or(fox dog)) brown)</code>
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over <span class="highlight">the lazy dog</span>
* </ul>
* </dl>
*
* <h3>fn:overlapping</h3>
*
* <p>Matches intervals of the source that overlap with at least one interval of the reference.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:overlapping(source reference)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>source</code>
* <dd>source sub-interval (term or other function)
* <dt><code>reference</code>
* <dd>reference sub-interval (term or other function)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:overlapping(fn:phrase(brown fox) fn:phrase(fox jumps))</code>
* <p class="example sentence with-highlights left-aligned">The quick <span
* class="highlight">brown fox</span> jumps over the lazy dog
* <li><code>fn:overlapping(fn:or(fox dog) fn:extend(lazy 2 2))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
* over the lazy <span class="highlight">dog</span>
* </ul>
* </dl>
*
* <h3>fn:nonOverlapping</h3>
*
* <p>Matches intervals of the source that do <em>not</em> overlap with any intervals of the
* reference.
*
* <dl class="dl-horizontal narrow">
* <dt>Arguments
* <dd>
* <p><code>fn:nonOverlapping(source reference)</code>
* <dl class="dl-horizontal narrow">
* <dt><code>source</code>
* <dd>source sub-interval (term or other function)
* <dt><code>reference</code>
* <dd>reference sub-interval (term or other function)
* </dl>
* <dt>Examples
* <dd>
* <ul>
* <li><code>fn:nonOverlapping(fn:phrase(brown fox) fn:phrase(lazy dog))</code>
* <p class="example sentence with-highlights left-aligned">The quick <span
* class="highlight">brown fox</span> jumps over the lazy dog
* <li><code>fn:nonOverlapping(fn:or(fox dog) fn:extend(lazy 2 2))</code>
* <p class="example sentence with-highlights left-aligned">The quick brown <span
* class="highlight">fox</span> jumps over the lazy dog
* </ul>
* </dl>
*/
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;

View File

@ -16,10 +16,7 @@
*/
/**
* Implementation of the {@linkplain org.apache.lucene.queryparser.classic Lucene classic query
* parser} using the flexible query parser frameworks
*
* <h2>Lucene Flexible Query Parser Implementation</h2>
* Lucene Flexible Query Parser Implementation
*
* <p>The old Lucene query parser used to have only one class that performed all the parsing
* operations. In the new query parser structure, the parsing was divided in 3 steps: parsing

View File

@ -35,26 +35,50 @@ import java.io.Reader;
import java.util.Collections;
import java.util.ArrayList;
import org.apache.lucene.queryparser.flexible.messages.Message;
import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages;
import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.BoostQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode;
import org.apache.lucene.queryparser.flexible.messages.Message;
import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages;
import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.After;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.AnalyzedText;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.AtLeast;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Before;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.ContainedBy;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Containing;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Extend;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.IntervalFunction;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.MaxGaps;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.MaxWidth;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.NonOverlapping;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.NotContainedBy;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.NotContaining;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.NotWithin;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Or;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Ordered;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Overlapping;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Phrase;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Unordered;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.UnorderedNoOverlaps;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Wildcard;
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Within;
import org.apache.lucene.queryparser.flexible.standard.nodes.IntervalQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.MinShouldMatchNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode;
import static org.apache.lucene.queryparser.flexible.standard.parser.EscapeQuerySyntaxImpl.discardEscapeChar;
@ -87,6 +111,14 @@ public class StandardSyntaxParser implements SyntaxParser {
throw e;
}
}
public static float parseFloat(Token token) {
return Float.parseFloat(token.image);
}
public static int parseInt(Token token) {
return Integer.parseInt(token.image);
}
}
PARSER_END(StandardSyntaxParser)
@ -96,15 +128,15 @@ PARSER_END(StandardSyntaxParser)
<#_NUM_CHAR: ["0"-"9"] >
// Every character that follows a backslash is considered as an escaped character
| <#_ESCAPED_CHAR: "\\" ~[] >
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
"<", ">", "=", "[", "]", "\"", "{", "}", "~", "\\", "/" ]
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", "@",
"<", ">", "=", "[", "]", "\"", "{", "}", "~", "\\", "/"]
| <_ESCAPED_CHAR> ) >
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
}
<DEFAULT, Range> SKIP : {
<DEFAULT, Range, Function> SKIP : {
< <_WHITESPACE> >
}
@ -112,9 +144,9 @@ PARSER_END(StandardSyntaxParser)
<AND: ("AND" | "&&") >
| <OR: ("OR" | "||") >
| <NOT: ("NOT" | "!") >
| <FN_PREFIX: ("fn:") > : Function
| <PLUS: "+" >
| <MINUS: "-" >
| <LPAREN: "(" >
| <RPAREN: ")" >
| <OP_COLON: ":" >
| <OP_EQUAL: "=" >
@ -132,6 +164,33 @@ PARSER_END(StandardSyntaxParser)
| <RANGEEX_START: "{" > : Range
}
<DEFAULT,Function> TOKEN : {
<LPAREN: "(" > : DEFAULT
}
<Function> TOKEN : {
<ATLEAST: ("atleast" | "atLeast") >
| <AFTER: ("after") >
| <BEFORE: ("before") >
| <CONTAINED_BY: ("containedBy" | "containedby") >
| <CONTAINING: ("containing") >
| <EXTEND: ("extend") >
| <FN_OR: ("or") >
| <MAXGAPS: ("maxgaps" | "maxGaps") >
| <MAXWIDTH: ("maxwidth" | "maxWidth") >
| <NON_OVERLAPPING: ("nonOverlapping" | "nonoverlapping") >
| <NOT_CONTAINED_BY: ("notContainedBy" | "notcontainedby") >
| <NOT_CONTAINING: ("notContaining" | "notcontaining") >
| <NOT_WITHIN: ("notWithin" | "notwithin") >
| <ORDERED: ("ordered") >
| <OVERLAPPING: ("overlapping") >
| <PHRASE: ("phrase") >
| <UNORDERED: ("unordered") >
| <UNORDERED_NO_OVERLAPS: ("unorderedNoOverlaps" | "unorderednooverlaps") >
| <WILDCARD: ("wildcard") >
| <WITHIN: ("within") >
}
<Range> TOKEN : {
<RANGE_TO: "TO">
| <RANGEIN_END: "]"> : DEFAULT
@ -265,7 +324,8 @@ private QueryNode Clause(CharSequence field) : {
{
(
LOOKAHEAD(2) q = FieldRangeExpr(field)
| (LOOKAHEAD(2) field = FieldName() ( <OP_COLON> | <OP_EQUAL> ))? ( q = Term(field) | q = GroupingExpr(field))
| (LOOKAHEAD(2) field = FieldName() ( <OP_COLON> | <OP_EQUAL> ))?
(LOOKAHEAD(2) q = Term(field) | q = GroupingExpr(field) | q = IntervalExpr(field))
)
{
return q;
@ -289,17 +349,316 @@ private CharSequence FieldName() : {
* GroupingExpr ::= '(' Query ')' ('^' <NUMBER>)?
* }</pre>
*/
private GroupQueryNode GroupingExpr(CharSequence field) : {
private QueryNode GroupingExpr(CharSequence field) : {
QueryNode q;
Token boost;
Token boost, minShouldMatch = null;
}
{
<LPAREN> q = Query(field) <RPAREN> (q = Boost(q))?
<LPAREN> q = Query(field) <RPAREN> (q = Boost(q))? ("@" minShouldMatch = <NUMBER>)?
{
return new GroupQueryNode(q);
if (minShouldMatch != null) {
q = new MinShouldMatchNode(parseInt(minShouldMatch), new GroupQueryNode(q));
} else {
q = new GroupQueryNode(q);
}
return q;
}
}
/**
* An interval expression (functions) node.
*/
private IntervalQueryNode IntervalExpr(CharSequence field) : {
IntervalFunction source;
}
{
source = IntervalFun()
{
return new IntervalQueryNode(field == null ? null : field.toString(), source);
}
}
private IntervalFunction IntervalFun() : {
IntervalFunction source;
}
{
LOOKAHEAD(2) source = IntervalAtLeast() { return source; }
| LOOKAHEAD(2) source = IntervalMaxWidth() { return source; }
| LOOKAHEAD(2) source = IntervalMaxGaps() { return source; }
| LOOKAHEAD(2) source = IntervalOrdered() { return source; }
| LOOKAHEAD(2) source = IntervalUnordered() { return source; }
| LOOKAHEAD(2) source = IntervalUnorderedNoOverlaps() { return source; }
| LOOKAHEAD(2) source = IntervalOr() { return source; }
| LOOKAHEAD(2) source = IntervalWildcard() { return source; }
| LOOKAHEAD(2) source = IntervalAfter() { return source; }
| LOOKAHEAD(2) source = IntervalBefore() { return source; }
| LOOKAHEAD(2) source = IntervalPhrase() { return source; }
| LOOKAHEAD(2) source = IntervalContaining() { return source; }
| LOOKAHEAD(2) source = IntervalNotContaining() { return source; }
| LOOKAHEAD(2) source = IntervalContainedBy() { return source; }
| LOOKAHEAD(2) source = IntervalNotContainedBy() { return source; }
| LOOKAHEAD(2) source = IntervalWithin() { return source; }
| LOOKAHEAD(2) source = IntervalNotWithin() { return source; }
| LOOKAHEAD(2) source = IntervalOverlapping() { return source; }
| LOOKAHEAD(2) source = IntervalNonOverlapping() { return source; }
| LOOKAHEAD(2) source = IntervalExtend() { return source; }
| LOOKAHEAD(2) source = IntervalText() { return source; }
}
private IntervalFunction IntervalAtLeast() : {
IntervalFunction source;
ArrayList<IntervalFunction> sources = new ArrayList<IntervalFunction>();
Token minShouldMatch;
}
{
<FN_PREFIX> <ATLEAST>
<LPAREN> minShouldMatch = <NUMBER> (source = IntervalFun() { sources.add(source); })+ <RPAREN>
{
return new AtLeast(parseInt(minShouldMatch), sources);
}
}
private IntervalFunction IntervalMaxWidth() : {
IntervalFunction source;
Token maxWidth;
}
{
<FN_PREFIX> <MAXWIDTH>
<LPAREN> maxWidth = <NUMBER> source = IntervalFun() <RPAREN>
{
return new MaxWidth(parseInt(maxWidth), source);
}
}
private IntervalFunction IntervalMaxGaps() : {
IntervalFunction source;
Token maxGaps;
}
{
<FN_PREFIX> <MAXGAPS>
<LPAREN> maxGaps = <NUMBER> source = IntervalFun() <RPAREN>
{
return new MaxGaps(parseInt(maxGaps), source);
}
}
private IntervalFunction IntervalUnordered() : {
IntervalFunction source;
ArrayList<IntervalFunction> sources = new ArrayList<IntervalFunction>();
}
{
<FN_PREFIX> <UNORDERED>
<LPAREN> (source = IntervalFun() { sources.add(source); })+ <RPAREN>
{
return new Unordered(sources);
}
}
private IntervalFunction IntervalUnorderedNoOverlaps() : {
IntervalFunction a, b;
}
{
<FN_PREFIX> <UNORDERED_NO_OVERLAPS>
<LPAREN> a = IntervalFun() b = IntervalFun() <RPAREN>
{
return new UnorderedNoOverlaps(a, b);
}
}
private IntervalFunction IntervalOrdered() : {
IntervalFunction source;
ArrayList<IntervalFunction> sources = new ArrayList<IntervalFunction>();
}
{
<FN_PREFIX> <ORDERED>
<LPAREN> (source = IntervalFun() { sources.add(source); })+ <RPAREN>
{
return new Ordered(sources);
}
}
private IntervalFunction IntervalOr() : {
IntervalFunction source;
ArrayList<IntervalFunction> sources = new ArrayList<IntervalFunction>();
}
{
<FN_PREFIX> <FN_OR>
<LPAREN> (source = IntervalFun() { sources.add(source); })+ <RPAREN>
{
return new Or(sources);
}
}
private IntervalFunction IntervalPhrase() : {
IntervalFunction source;
ArrayList<IntervalFunction> sources = new ArrayList<IntervalFunction>();
}
{
<FN_PREFIX> <PHRASE>
<LPAREN> (source = IntervalFun() { sources.add(source); })+ <RPAREN>
{
return new Phrase(sources);
}
}
private IntervalFunction IntervalBefore() : {
IntervalFunction source;
IntervalFunction reference;
}
{
<FN_PREFIX> <BEFORE> <LPAREN> source = IntervalFun() reference = IntervalFun() <RPAREN>
{
return new Before(source, reference);
}
}
private IntervalFunction IntervalAfter() : {
IntervalFunction source;
IntervalFunction reference;
}
{
<FN_PREFIX> <AFTER> <LPAREN> source = IntervalFun() reference = IntervalFun() <RPAREN>
{
return new After(source, reference);
}
}
private IntervalFunction IntervalContaining() : {
IntervalFunction big;
IntervalFunction small;
}
{
<FN_PREFIX> <CONTAINING> <LPAREN> big = IntervalFun() small = IntervalFun() <RPAREN>
{
return new Containing(big, small);
}
}
private IntervalFunction IntervalNotContaining() : {
IntervalFunction minuend;
IntervalFunction subtrahend;
}
{
<FN_PREFIX> <NOT_CONTAINING> <LPAREN> minuend = IntervalFun() subtrahend = IntervalFun() <RPAREN>
{
return new NotContaining(minuend, subtrahend);
}
}
private IntervalFunction IntervalContainedBy() : {
IntervalFunction big;
IntervalFunction small;
}
{
<FN_PREFIX> <CONTAINED_BY> <LPAREN> small = IntervalFun() big = IntervalFun() <RPAREN>
{
return new ContainedBy(small, big);
}
}
private IntervalFunction IntervalNotContainedBy() : {
IntervalFunction big;
IntervalFunction small;
}
{
<FN_PREFIX> <NOT_CONTAINED_BY> <LPAREN> small = IntervalFun() big = IntervalFun() <RPAREN>
{
return new NotContainedBy(small, big);
}
}
private IntervalFunction IntervalWithin() : {
IntervalFunction source, reference;
Token positions;
}
{
<FN_PREFIX> <WITHIN>
<LPAREN>
source = IntervalFun()
positions = <NUMBER>
reference = IntervalFun()
<RPAREN>
{
return new Within(source, parseInt(positions), reference);
}
}
private IntervalFunction IntervalExtend() : {
IntervalFunction source;
Token before, after;
}
{
<FN_PREFIX> <EXTEND>
<LPAREN>
source = IntervalFun()
before = <NUMBER>
after = <NUMBER>
<RPAREN>
{
return new Extend(source, parseInt(before), parseInt(after));
}
}
private IntervalFunction IntervalNotWithin() : {
IntervalFunction minuend, subtrahend;
Token positions;
}
{
<FN_PREFIX> <NOT_WITHIN>
<LPAREN>
minuend = IntervalFun()
positions = <NUMBER>
subtrahend = IntervalFun()
<RPAREN>
{
return new NotWithin(minuend, parseInt(positions), subtrahend);
}
}
private IntervalFunction IntervalOverlapping() : {
IntervalFunction source, reference;
}
{
<FN_PREFIX> <OVERLAPPING> <LPAREN> source = IntervalFun() reference = IntervalFun() <RPAREN>
{
return new Overlapping(source, reference);
}
}
private IntervalFunction IntervalNonOverlapping() : {
IntervalFunction minuend, subtrahend;
}
{
<FN_PREFIX> <NON_OVERLAPPING> <LPAREN> minuend = IntervalFun() subtrahend = IntervalFun() <RPAREN>
{
return new NonOverlapping(minuend, subtrahend);
}
}
private IntervalFunction IntervalWildcard() : {
String wildcard;
}
{
<FN_PREFIX> <WILDCARD>
<LPAREN>
(
(<TERM> | <NUMBER>) { wildcard = token.image; }
| <QUOTED> { wildcard = token.image.substring(1, token.image.length() - 1); }
)
<RPAREN>
{
return new Wildcard(wildcard);
}
}
private IntervalFunction IntervalText() : {
}
{
(<QUOTED>) { return new AnalyzedText(token.image.substring(1, token.image.length() - 1)); }
| (<TERM> | <NUMBER>) { return new AnalyzedText(token.image); }
}
/**
* Score boost modifier.
*
@ -313,7 +672,7 @@ private QueryNode Boost(QueryNode node) : {
{
<CARAT> boost = <NUMBER>
{
return node == null ? node : new BoostQueryNode(node, Float.parseFloat(boost.image));
return node == null ? node : new BoostQueryNode(node, parseFloat(boost));
}
}
@ -332,7 +691,7 @@ private QueryNode FuzzyOp(CharSequence field, Token term, QueryNode node) : {
{
float fms = org.apache.lucene.search.FuzzyQuery.defaultMaxEdits;
if (similarity != null) {
fms = Float.parseFloat(similarity.image);
fms = parseFloat(similarity);
if (fms < 0.0f) {
throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));
} else if (fms >= 1.0f && fms != (int) fms) {
@ -411,7 +770,10 @@ private QueryNode Term(CharSequence field) : {
{
(
term = <REGEXPTERM>
{ q = new RegexpQueryNode(field, term.image.substring(1, term.image.length() - 1)); }
{
String v = term.image.substring(1, term.image.length() - 1);
q = new RegexpQueryNode(field, v, 0, v.length());
}
| (term = <TERM> | term = <NUMBER>)
{ q = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, term.endColumn); }
( q = FuzzyOp(field, term, q) )?
@ -442,7 +804,7 @@ private QueryNode QuotedTerm(CharSequence field) : {
String image = term.image.substring(1, term.image.length() - 1);
q = new QuotedFieldQueryNode(field, discardEscapeChar(image), term.beginColumn + 1, term.endColumn - 1);
}
( <TILDE> slop = <NUMBER> { q = new SlopQueryNode(q, (int) Float.parseFloat(slop.image)); } )?
( <TILDE> slop = <NUMBER> { q = new SlopQueryNode(q, parseInt(slop)); } )?
{
return q;
}

View File

@ -29,11 +29,11 @@ public interface StandardSyntaxParserConstants {
/** RegularExpression Id. */
int NOT = 10;
/** RegularExpression Id. */
int PLUS = 11;
int FN_PREFIX = 11;
/** RegularExpression Id. */
int MINUS = 12;
int PLUS = 12;
/** RegularExpression Id. */
int LPAREN = 13;
int MINUS = 13;
/** RegularExpression Id. */
int RPAREN = 14;
/** RegularExpression Id. */
@ -65,20 +65,64 @@ public interface StandardSyntaxParserConstants {
/** RegularExpression Id. */
int RANGEEX_START = 28;
/** RegularExpression Id. */
int RANGE_TO = 29;
int LPAREN = 29;
/** RegularExpression Id. */
int RANGEIN_END = 30;
int ATLEAST = 30;
/** RegularExpression Id. */
int RANGEEX_END = 31;
int AFTER = 31;
/** RegularExpression Id. */
int RANGE_QUOTED = 32;
int BEFORE = 32;
/** RegularExpression Id. */
int RANGE_GOOP = 33;
int CONTAINED_BY = 33;
/** RegularExpression Id. */
int CONTAINING = 34;
/** RegularExpression Id. */
int EXTEND = 35;
/** RegularExpression Id. */
int FN_OR = 36;
/** RegularExpression Id. */
int MAXGAPS = 37;
/** RegularExpression Id. */
int MAXWIDTH = 38;
/** RegularExpression Id. */
int NON_OVERLAPPING = 39;
/** RegularExpression Id. */
int NOT_CONTAINED_BY = 40;
/** RegularExpression Id. */
int NOT_CONTAINING = 41;
/** RegularExpression Id. */
int NOT_WITHIN = 42;
/** RegularExpression Id. */
int ORDERED = 43;
/** RegularExpression Id. */
int OVERLAPPING = 44;
/** RegularExpression Id. */
int PHRASE = 45;
/** RegularExpression Id. */
int UNORDERED = 46;
/** RegularExpression Id. */
int UNORDERED_NO_OVERLAPS = 47;
/** RegularExpression Id. */
int WILDCARD = 48;
/** RegularExpression Id. */
int WITHIN = 49;
/** RegularExpression Id. */
int RANGE_TO = 50;
/** RegularExpression Id. */
int RANGEIN_END = 51;
/** RegularExpression Id. */
int RANGEEX_END = 52;
/** RegularExpression Id. */
int RANGE_QUOTED = 53;
/** RegularExpression Id. */
int RANGE_GOOP = 54;
/** Lexical state. */
int Range = 0;
int Function = 0;
/** Lexical state. */
int DEFAULT = 1;
int Range = 1;
/** Lexical state. */
int DEFAULT = 2;
/** Literal token values. */
String[] tokenImage = {
@ -93,9 +137,9 @@ public interface StandardSyntaxParserConstants {
"<AND>",
"<OR>",
"<NOT>",
"\"fn:\"",
"\"+\"",
"\"-\"",
"\"(\"",
"\")\"",
"\":\"",
"\"=\"",
@ -111,11 +155,33 @@ public interface StandardSyntaxParserConstants {
"<REGEXPTERM>",
"\"[\"",
"\"{\"",
"\"(\"",
"<ATLEAST>",
"\"after\"",
"\"before\"",
"<CONTAINED_BY>",
"\"containing\"",
"\"extend\"",
"\"or\"",
"<MAXGAPS>",
"<MAXWIDTH>",
"<NON_OVERLAPPING>",
"<NOT_CONTAINED_BY>",
"<NOT_CONTAINING>",
"<NOT_WITHIN>",
"\"ordered\"",
"\"overlapping\"",
"\"phrase\"",
"\"unordered\"",
"<UNORDERED_NO_OVERLAPS>",
"\"wildcard\"",
"\"within\"",
"\"TO\"",
"\"]\"",
"\"}\"",
"<RANGE_QUOTED>",
"<RANGE_GOOP>",
"\"@\"",
};
}

View File

@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.processors;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler;
import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
import org.apache.lucene.queryparser.flexible.standard.nodes.IntervalQueryNode;
/**
* This processor makes sure that {@link ConfigurationKeys#ANALYZER} is defined in the {@link
* QueryConfigHandler} and injects this analyzer into {@link
* org.apache.lucene.queryparser.flexible.standard.nodes.IntervalQueryNode}s.
*
* @see ConfigurationKeys#ANALYZER
*/
public class IntervalQueryNodeProcessor extends QueryNodeProcessorImpl {
private Analyzer analyzer;
@Override
public QueryNode process(QueryNode queryTree) throws QueryNodeException {
this.analyzer = getQueryConfigHandler().get(ConfigurationKeys.ANALYZER);
return super.process(queryTree);
}
@Override
protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException {
if (node instanceof IntervalQueryNode) {
var intervalQueryNode = (IntervalQueryNode) node;
if (this.analyzer == null) {
throw new QueryNodeException(
new MessageImpl(QueryParserMessages.ANALYZER_REQUIRED, intervalQueryNode.toString()));
}
intervalQueryNode.setAnalyzer(this.analyzer);
}
return node;
}
@Override
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
return node;
}
@Override
protected List<QueryNode> setChildrenOrder(List<QueryNode> children) throws QueryNodeException {
return children;
}
}

View File

@ -66,5 +66,6 @@ public class StandardQueryNodeProcessorPipeline extends QueryNodeProcessorPipeli
add(new DefaultPhraseSlopQueryNodeProcessor());
add(new BoostQueryNodeProcessor());
add(new MultiTermRewriteMethodProcessor());
add(new IntervalQueryNodeProcessor());
}
}

View File

@ -15,151 +15,29 @@
limitations under the License.
-->
<html>
<head>
<title>
QueryParsers
</title>
</head>
<body>
<h1>Apache Lucene QueryParsers.</h1>
<p>
This module provides a number of queryparsers:
<ul>
<li><a href="#classic">Classic</a>
<li><a href="#analyzing">Analyzing</a>
<li><a href="#complexphrase">Complex Phrase</a>
<li><a href="#extendable">Extendable</a>
<li><a href="#flexible">Flexible</a>
<li><a href="#surround">Surround</a>
<li><a href="#xml">XML</a>
</ul>
<hr>
<h2><a id="classic">Classic</a></h2>
A Simple Lucene QueryParser implemented with JavaCC.
<h2><a id="analyzing">Analyzing</a></h2>
QueryParser that passes Fuzzy-, Prefix-, Range-, and WildcardQuerys through the given analyzer.
<h2><a id="complexphrase">Complex Phrase</a></h2>
QueryParser which permits complex phrase query syntax eg "(john jon jonathan~) peters*"
<h2><a id="extendable">Extendable</a></h2>
Extendable QueryParser provides a simple and flexible extension mechanism by overloading query field names.
<h2><a id="flexible">Flexible</a></h2>
<p>
This project contains the new Lucene query parser implementation, which matches the syntax of the core QueryParser but offers a more modular architecture to enable customization.
</p>
<head>
<title>
QueryParsers
</title>
</head>
<body>
<h1>Apache Lucene QueryParsers.</h1>
<p>
It's currently divided in 2 main packages:
This module provides a number of query parsers:
<ul>
<li>{@link org.apache.lucene.queryparser.flexible.core}: it contains the query parser API classes, which should be extended by query parser implementations. </li>
<li>{@link org.apache.lucene.queryparser.flexible.standard}: it contains the current Lucene query parser implementation using the new query parser API.</li>
<li>{@linkplain org.apache.lucene.queryparser.flexible flexible query parser}
<li>{@linkplain org.apache.lucene.queryparser.classic classic query parser}
<li>{@linkplain org.apache.lucene.queryparser.complexPhrase complex phrase query parser}
<li>{@linkplain org.apache.lucene.queryparser.ext extendable query parser}
<li>{@linkplain org.apache.lucene.queryparser.surround surround query parser (span queries)}
<li>{@linkplain org.apache.lucene.queryparser.xml query parser building Query objects from XML}
</ul>
<h3>Features</h3>
<ol>
<li>Full support for boolean logic (not enabled)</li>
<li>QueryNode Trees - support for several syntaxes,
that can be converted into similar syntax QueryNode trees.</li>
<li>QueryNode Processors - Optimize, validate, rewrite the
QueryNode trees</li>
<li>Processors Pipelines - Select your favorite Processor
and build a processor pipeline, to implement the features you need</li>
<li>Config Interfaces - Allow the consumer of the Query Parser to implement
a diff Config Handler Objects to suite their needs.</li>
<li>Standard Builders - convert QueryNode's into several lucene
representations. Supported conversion is using a 2.4 compatible logic</li>
<li>QueryNode tree's can be converted to a lucene 2.4 syntax string, using toQueryString</li>
</ol>
<h3>Design</h3>
<p>
This new query parser was designed to have very generic
architecture, so that it can be easily used for different
products with varying query syntaxes. This code is much more
flexible and extensible than the Lucene query parser in 2.4.X.
</p>
<p>
The new query parser goal is to separate syntax and semantics of a query. E.g. 'a AND
b', '+a +b', 'AND(a,b)' could be different syntaxes for the same query.
It distinguishes the semantics of the different query components, e.g.
whether and how to tokenize/lemmatize/normalize the different terms or
which Query objects to create for the terms. It allows to
write a parser with a new syntax, while reusing the underlying
semantics, as quickly as possible.
</p>
<p>
The query parser has three layers and its core is what we call the
QueryNode tree. It is a tree that initially represents the syntax of the
original query, e.g. for 'a AND b':
</p>
<pre>
AND
/ \
A B
</pre>
<p>
The three layers are:
</p>
<dl>
<dt>QueryParser</dt>
<dd>
This layer is the text parsing layer which simply transforms the
query text string into a {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode} tree. Every text parser
must implement the interface {@link org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser}.
Lucene default implementations implements it using JavaCC.
</dd>
If you're new to query parsers, the {@linkplain org.apache.lucene.queryparser.flexible flexible query parser}'s
{@link org.apache.lucene.queryparser.flexible.standard.StandardQueryParser} is probably a good place to start.
<dt>QueryNodeProcessor</dt>
<dd>The query node processors do most of the work. It is in fact a
configurable chain of processors. Each processors can walk the tree and
modify nodes or even the tree's structure. That makes it possible to
e.g. do query optimization before the query is executed or to tokenize
terms.
</dd>
<dt>QueryBuilder</dt>
<dd>
The third layer is a configurable map of builders, which map {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode} types to its specific
builder that will transform the QueryNode into Lucene Query object.
</dd>
</dl>
<p>
Furthermore, the query parser uses flexible configuration objects. It also uses message classes that
allow to attach resource bundles. This makes it possible to translate
messages, which is an important feature of a query parser.
</p>
<p>
This design allows to develop different query syntaxes very quickly.
</p>
<h3>StandardQueryParser and QueryParserWrapper</h3>
<p>
The classic Lucene query parser is located under
{@link org.apache.lucene.queryparser.classic}.
<p>
To make it simpler to use the new query parser
the class {@link org.apache.lucene.queryparser.flexible.standard.StandardQueryParser} may be helpful,
specially for people that do not want to extend the Query Parser.
It uses the default Lucene query processors, text parser and builders, so
you don't need to worry about dealing with those.
{@link org.apache.lucene.queryparser.flexible.standard.StandardQueryParser} usage:
<pre class="prettyprint">
StandardQueryParser qpHelper = new StandardQueryParser();
StandardQueryConfigHandler config = qpHelper.getQueryConfigHandler();
config.setAllowLeadingWildcard(true);
config.setAnalyzer(new WhitespaceAnalyzer());
Query query = qpHelper.parse("apache AND lucene", "defaultField");
</pre>
<h2><a id="surround">Surround</a></h2>
<p>
A QueryParser that supports the Span family of queries as well as pre and infix notation.
</p>
<h2><a id="xml">XML</a></h2>
A QueryParser that produces Lucene Query objects from XML streams.
</body>
</body>
</html>

View File

@ -58,3 +58,6 @@ UNSUPPORTED_NUMERIC_DATA_TYPE = Unsupported NumericField.DataType: {0}
#<CREATEDBY>Apache Lucene Community</CREATEDBY>
NUMERIC_CANNOT_BE_EMPTY = Field "{0}" is numeric and cannot have an empty value.
#<CREATEDBY>Apache Lucene Community</CREATEDBY>
ANALYZER_REQUIRED = An analyzer is required to parse interval sub-query "{0}"

View File

@ -0,0 +1,206 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard;
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.io.StringReader;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.queryparser.charstream.FastCharStream;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
import org.apache.lucene.queryparser.flexible.standard.nodes.IntervalQueryNode;
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.LuceneTestCase;
import org.hamcrest.MatcherAssert;
import org.hamcrest.Matchers;
import org.junit.Test;
/** Test interval sub-query support in {@link StandardQueryParser}. */
public class TestStandardQPEnhancements extends LuceneTestCase {
protected static final String FLD_DEFAULT = "defaultField";
protected static final String FLD_WHITESPACE = "whitespaceField";
final StandardQueryParser getQueryParser() {
var analyzer =
new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer(MockTokenizer.WHITESPACE, true));
}
};
var qp = new StandardQueryParser(analyzer);
qp.setDefaultOperator(StandardQueryConfigHandler.Operator.AND);
qp.setMultiFields(new String[] {});
return qp;
}
@Test
public void testMinShouldMatchOperator() throws Exception {
Query parsed =
parsedQuery(
String.format(
Locale.ROOT,
"(%s:foo OR %s:bar OR %s:baz)@2",
FLD_WHITESPACE,
FLD_WHITESPACE,
FLD_WHITESPACE));
MatcherAssert.assertThat(
((BooleanQuery) parsed).getMinimumNumberShouldMatch(), Matchers.equalTo(2));
}
@Test
public void testAtLeast() throws Exception {
checkIntervalQueryNode("fn:atleast(3 FOO BAR baz)");
}
@Test
public void testMaxWidth() throws Exception {
checkIntervalQueryNode("fn:maxwidth(3 fn:atleast(2 foo bar baz))");
}
@Test
public void testQuotedTerm() throws Exception {
checkIntervalQueryNode("fn:atleast(2 \"foo\" \"BAR baz\")");
}
@Test
public void testMaxGaps() throws Exception {
checkIntervalQueryNode("fn:maxgaps(2 fn:unordered(foo BAR baz))");
}
@Test
public void testOrdered() throws Exception {
checkIntervalQueryNode("fn:ordered(foo BAR baz)");
}
@Test
public void testUnordered() throws Exception {
checkIntervalQueryNode("fn:unordered(foo BAR baz)");
}
@Test
public void testOr() throws Exception {
checkIntervalQueryNode("fn:or(foo baz)");
}
@Test
public void testWildcard() throws Exception {
checkIntervalQueryNode("fn:wildcard(foo*)");
}
@Test
public void testPhrase() throws Exception {
checkIntervalQueryNode("fn:phrase(abc def fn:or(baz boo))");
}
@Test
public void testBefore() throws Exception {
checkIntervalQueryNode("fn:before(abc fn:ordered(foo bar))");
}
@Test
public void testAfter() throws Exception {
checkIntervalQueryNode("fn:after(abc fn:ordered(foo bar))");
}
@Test
public void testContaining() throws Exception {
checkIntervalQueryNode("fn:containing(big small)");
}
@Test
public void testContainedBy() throws Exception {
checkIntervalQueryNode("fn:containedBy(small big)");
}
@Test
public void testNotContaining() throws Exception {
checkIntervalQueryNode("fn:notContaining(minuend subtrahend)");
}
@Test
public void testNotContainedBy() throws Exception {
checkIntervalQueryNode("fn:notContainedBy(small big)");
}
@Test
public void testWithin() throws Exception {
checkIntervalQueryNode("fn:within(small 2 fn:ordered(big foo))");
}
@Test
public void testNotWithin() throws Exception {
checkIntervalQueryNode("fn:notWithin(small 2 fn:ordered(big foo))");
}
@Test
public void testOverlapping() throws Exception {
checkIntervalQueryNode("fn:overlapping(fn:ordered(big foo) small)");
}
@Test
public void testNonOverlapping() throws Exception {
checkIntervalQueryNode("fn:nonOverlapping(fn:ordered(big foo) small)");
}
@Test
public void testUnorderedNoOverlaps() throws Exception {
checkIntervalQueryNode("fn:unorderedNoOverlaps(fn:ordered(big foo) small)");
}
@Test
public void testExtend() throws Exception {
checkIntervalQueryNode("fn:extend(fn:ordered(big foo) 2 5)");
}
protected void checkIntervalQueryNode(String query) throws Exception {
// Check raw parser first.
var syntaxParser = new StandardSyntaxParser(new FastCharStream(new StringReader(query)));
QueryNode queryNode = syntaxParser.TopLevelQuery(FLD_DEFAULT);
MatcherAssert.assertThat(queryNode, Matchers.instanceOf(IntervalQueryNode.class));
var queryParser = getQueryParser();
Query parsedQuery;
if (RandomizedTest.randomBoolean()) {
queryParser.setMultiFields(new String[] {FLD_DEFAULT});
parsedQuery = queryParser.parse(query, null);
} else {
parsedQuery = queryParser.parse(query, FLD_DEFAULT);
}
MatcherAssert.assertThat(parsedQuery, Matchers.notNullValue());
// Emit toString() for visual diagnostics.
IntervalQueryNode intervalQueryNode = (IntervalQueryNode) queryNode;
intervalQueryNode.setAnalyzer(queryParser.getAnalyzer());
System.out.printf(
Locale.ROOT, "query: %s%n node: %s%n query: %s%n", query, queryNode, parsedQuery);
}
protected String parsed(String query) throws Exception {
return parsedQuery(query).toString("<no-default>");
}
protected Query parsedQuery(String query) throws Exception {
return getQueryParser().parse(query, /* no default field. */ null);
}
}

View File

@ -2854,6 +2854,12 @@ public abstract class LuceneTestCase extends Assert {
void run() throws Throwable;
}
/** A {@link java.util.function.Consumer} that can throw any checked exception. */
@FunctionalInterface
public interface ThrowingConsumer<T> {
void accept(T t) throws Exception;
}
/** Checks a specific exception class is thrown by the given runnable, and returns it. */
public static <T extends Throwable> T expectThrows(
Class<T> expectedType, ThrowingRunnable runnable) {