mirror of https://github.com/apache/lucene.git
LUCENE-10223: interval support in standard syntax parser (#429)
This commit is contained in:
parent
53586d4231
commit
f725b27e12
|
@ -481,8 +481,11 @@ class RenderJavadocTask extends DefaultTask {
|
|||
|
||||
// append some special table css, prettify css
|
||||
ant.concat(destfile: "${outputDir}/stylesheet.css", append: "true", fixlastline: "true", encoding: "UTF-8") {
|
||||
filelist(dir: taskResources, files: "table_padding.css")
|
||||
filelist(dir: project.file("${taskResources}/prettify"), files: "prettify.css")
|
||||
filelist(dir: taskResources, files:
|
||||
["table_padding.css",
|
||||
"custom_styles.css",
|
||||
"prettify/prettify.css"].join(" ")
|
||||
)
|
||||
}
|
||||
|
||||
// append prettify to scripts
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Used by interval function documentation.
|
||||
*/
|
||||
|
||||
.example.sentence {
|
||||
text-align: center;
|
||||
padding: 1rem 0 2rem;
|
||||
font-style: italic;
|
||||
}
|
||||
.example.sentence.with-highlights {
|
||||
line-height: 150%;
|
||||
}
|
||||
.example.sentence.with-positions sub {
|
||||
color: red;
|
||||
font-weight: bold;
|
||||
}
|
||||
.example.sentence.left-aligned {
|
||||
text-align: left;
|
||||
}
|
||||
span.highlight {
|
||||
padding-bottom: 2px;
|
||||
border-bottom: 2px solid red;
|
||||
}
|
||||
|
|
@ -38,6 +38,10 @@ API Changes
|
|||
New Features
|
||||
---------------------
|
||||
|
||||
* LUCENE-10223: Add interval function support to StandardQueryParser. Add min-should-match operator
|
||||
support to StandardQueryParser. Update and clean up package documentation in flexible query parser
|
||||
module. (Dawid Weiss, Alan Woodward)
|
||||
|
||||
* LUCENE-10220: Add an utility method to get IntervalSource from analyzed text (or token stream).
|
||||
(Uwe Schindler, Dawid Weiss, Alan Woodward)
|
||||
|
||||
|
|
|
@ -17,6 +17,13 @@
|
|||
|
||||
# Apache Lucene Migration Guide
|
||||
|
||||
## Minor syntactical changes in StandardQueryParser (Lucene 9.1)
|
||||
|
||||
LUCENE-10223 adds interval functions and min-should-match support to StandardQueryParser. This
|
||||
means that interval function prefixes ("fn:") and the '@' character after parentheses will
|
||||
parse differently than before. If you need the exact previous behavior, clone the StandardSyntaxParser from the previous version of Lucene and create a custom query parser
|
||||
with that parser.
|
||||
|
||||
## Directory API is now little endian (LUCENE-9047)
|
||||
|
||||
DataOutput's writeShort, writeInt, and writeLong methods now encode with
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
package org.apache.lucene.search.matchhighlight;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.RandomizedTest;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Consumer;
|
||||
|
@ -34,7 +33,7 @@ import org.apache.lucene.search.Sort;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
* Utility class for building an ephemeral document index and running a block of code on its reader.
|
||||
|
@ -82,8 +81,8 @@ class IndexBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
public IndexBuilder build(Analyzer analyzer, IOUtils.IOConsumer<DirectoryReader> block)
|
||||
throws IOException {
|
||||
public IndexBuilder build(
|
||||
Analyzer analyzer, LuceneTestCase.ThrowingConsumer<DirectoryReader> block) throws Exception {
|
||||
IndexWriterConfig config = new IndexWriterConfig(analyzer);
|
||||
config.setIndexSort(new Sort(new SortField(FLD_SORT_ORDER, SortField.Type.LONG)));
|
||||
try (Directory directory = new ByteBuffersDirectory()) {
|
||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.lucene.index.IndexableField;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.intervals.IntervalQuery;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -127,7 +128,7 @@ public class TestMatchHighlighter extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testBasicUsage() throws IOException {
|
||||
public void testBasicUsage() throws Exception {
|
||||
new IndexBuilder(this::toField)
|
||||
.doc(FLD_TEXT1, "foo bar baz")
|
||||
.doc(FLD_TEXT1, "bar foo baz")
|
||||
|
@ -237,7 +238,7 @@ public class TestMatchHighlighter extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testSynonymHighlight() throws IOException {
|
||||
public void testSynonymHighlight() throws Exception {
|
||||
// There is nothing special needed to highlight or process complex queries, synonyms, etc.
|
||||
// Synonyms defined in the constructor of this class.
|
||||
new IndexBuilder(this::toField)
|
||||
|
@ -268,7 +269,7 @@ public class TestMatchHighlighter extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testAnalyzedTextIntervals() throws IOException {
|
||||
public void testAnalyzedTextIntervals() throws Exception {
|
||||
SynonymMap synonymMap =
|
||||
buildSynonymMap(
|
||||
new String[][] {
|
||||
|
@ -319,7 +320,229 @@ public class TestMatchHighlighter extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testCustomFieldHighlightHandling() throws IOException {
|
||||
public void testStandardQueryParserIntervalFunctions() throws Exception {
|
||||
Analyzer analyzer =
|
||||
new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new StandardTokenizer();
|
||||
TokenStream ts = tokenizer;
|
||||
ts = new LowerCaseFilter(ts);
|
||||
return new TokenStreamComponents(tokenizer, ts);
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: the highlights are different when the field is indexed with offsets. Weird.
|
||||
// String field = FLD_TEXT1;
|
||||
String field = FLD_TEXT2;
|
||||
new IndexBuilder(this::toField)
|
||||
// Just one document and multiple interval queries.
|
||||
.doc(field, "The quick brown fox jumps over the lazy dog")
|
||||
.build(
|
||||
analyzer,
|
||||
reader -> {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
Sort sortOrder = Sort.INDEXORDER; // So that results are consistently ordered.
|
||||
|
||||
MatchHighlighter highlighter =
|
||||
new MatchHighlighter(searcher, analyzer)
|
||||
.appendFieldHighlighter(
|
||||
FieldValueHighlighters.highlighted(
|
||||
80 * 3, 1, new PassageFormatter("...", ">", "<"), fld -> true))
|
||||
.appendFieldHighlighter(FieldValueHighlighters.skipRemaining());
|
||||
|
||||
StandardQueryParser qp = new StandardQueryParser(analyzer);
|
||||
|
||||
// Run all pairs of query-expected highlight.
|
||||
List<String> errors = new ArrayList<>();
|
||||
for (var queryHighlightPair :
|
||||
new String[][] {
|
||||
{
|
||||
"fn:ordered(brown dog)",
|
||||
"0. %s: The quick >brown fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:within(fn:or(lazy quick) 1 fn:or(dog fox))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy< dog"
|
||||
},
|
||||
{
|
||||
"fn:containedBy(fox fn:ordered(brown fox dog))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:atLeast(2 fn:unordered(furry dog) fn:unordered(brown dog) lazy quick)",
|
||||
"0. %s: The >quick >brown fox jumps over the lazy<<> dog<"
|
||||
},
|
||||
{
|
||||
"fn:atLeast(2 quick fox \"furry dog\")",
|
||||
"0. %s: The >quick brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:maxgaps(0 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:maxgaps(1 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
|
||||
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:maxwidth(2 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:maxwidth(3 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
|
||||
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:or(quick \"fox\")",
|
||||
"0. %s: The >quick< brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{"fn:or(\"quick fox\")"},
|
||||
{
|
||||
"fn:phrase(quick brown fox)",
|
||||
"0. %s: The >quick brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{"fn:wildcard(jump*)", "0. %s: The quick brown fox >jumps< over the lazy dog"},
|
||||
{"fn:wildcard(br*n)", "0. %s: The quick >brown< fox jumps over the lazy dog"},
|
||||
{"fn:or(dog fox)", "0. %s: The quick brown >fox< jumps over the lazy >dog<"},
|
||||
{
|
||||
"fn:phrase(fn:ordered(quick fox) jumps)",
|
||||
"0. %s: The >quick brown fox jumps< over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:ordered(quick jumps dog)",
|
||||
"0. %s: The >quick brown fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:ordered(quick fn:or(fox dog))",
|
||||
"0. %s: The >quick brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:ordered(quick jumps fn:or(fox dog))",
|
||||
"0. %s: The >quick brown fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:unordered(dog jumps quick)",
|
||||
"0. %s: The >quick brown fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:unordered(fn:or(fox dog) quick)",
|
||||
"0. %s: The >quick brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:unordered(fn:phrase(brown fox) fn:phrase(fox jumps))",
|
||||
"0. %s: The quick >brown fox jumps< over the lazy dog"
|
||||
},
|
||||
{"fn:ordered(fn:phrase(brown fox) fn:phrase(fox jumps))"},
|
||||
{"fn:unorderedNoOverlaps(fn:phrase(brown fox) fn:phrase(fox jumps))"},
|
||||
{
|
||||
"fn:before(fn:or(brown lazy) fox)",
|
||||
"0. %s: The quick >brown< fox jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:before(fn:or(brown lazy) fn:or(dog fox))",
|
||||
"0. %s: The quick >brown< fox jumps over the >lazy< dog"
|
||||
},
|
||||
{
|
||||
"fn:after(fn:or(brown lazy) fox)",
|
||||
"0. %s: The quick brown fox jumps over the >lazy< dog"
|
||||
},
|
||||
{
|
||||
"fn:after(fn:or(brown lazy) fn:or(dog fox))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy< dog"
|
||||
},
|
||||
{"fn:extend(fox 1 2)", "0. %s: The quick >brown fox jumps over< the lazy dog"},
|
||||
{
|
||||
"fn:extend(fn:or(dog fox) 2 0)",
|
||||
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:within(fn:or(fox dog) 1 fn:or(quick lazy))",
|
||||
"0. %s: The quick brown fox jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:within(fn:or(fox dog) 2 fn:or(quick lazy))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:notWithin(fn:or(fox dog) 1 fn:or(quick lazy))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:containedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
|
||||
"0. %s: The quick brown fox jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:containedBy(fn:or(fox dog) fn:ordered(quick lazy))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:notContainedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:notContainedBy(fn:or(fox dog) fn:ordered(quick lazy))",
|
||||
"0. %s: The quick brown fox jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:containing(fn:extend(fn:or(lazy brown) 1 1) fn:or(fox dog))",
|
||||
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:containing(fn:atLeast(2 quick fox dog) jumps)",
|
||||
"0. %s: The quick brown >fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:notContaining(fn:ordered(fn:or(the The) fn:or(fox dog)) brown)",
|
||||
"0. %s: The quick brown fox jumps over >the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:notContaining(fn:extend(fn:or(fox dog) 1 0) fn:or(brown yellow))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:overlapping(fn:phrase(brown fox) fn:phrase(fox jumps))",
|
||||
"0. %s: The quick >brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:overlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
|
||||
"0. %s: The quick brown fox jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:nonOverlapping(fn:phrase(brown fox) fn:phrase(lazy dog))",
|
||||
"0. %s: The quick >brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:nonOverlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
}) {
|
||||
assert queryHighlightPair.length >= 1;
|
||||
String queryString = queryHighlightPair[0];
|
||||
var query = qp.parse(queryString, field);
|
||||
var expected =
|
||||
Arrays.stream(queryHighlightPair)
|
||||
.skip(1)
|
||||
.map(v -> String.format(Locale.ROOT, v, field))
|
||||
.toArray(String[]::new);
|
||||
|
||||
try {
|
||||
assertHighlights(
|
||||
toDocList(
|
||||
highlighter.highlight(searcher.search(query, 10, sortOrder), query)),
|
||||
expected);
|
||||
} catch (AssertionError e) {
|
||||
errors.add("MISMATCH: query: " + queryString + "\n" + e.getMessage());
|
||||
}
|
||||
}
|
||||
if (errors.size() > 0) {
|
||||
throw new AssertionError(String.join("\n\n", errors));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCustomFieldHighlightHandling() throws Exception {
|
||||
// Match highlighter is a showcase of individual components in this package, suitable
|
||||
// to create any kind of field-display designs.
|
||||
//
|
||||
|
@ -427,7 +650,7 @@ public class TestMatchHighlighter extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testHighlightMoreQueriesAtOnceShowoff() throws IOException {
|
||||
public void testHighlightMoreQueriesAtOnceShowoff() throws Exception {
|
||||
// Match highlighter underlying components are powerful enough to build interesting,
|
||||
// if not always super-practical, things. In this case, we would like to highlight
|
||||
// a set of matches of *more than one* query over the same set of input documents. This includes
|
||||
|
@ -566,14 +789,15 @@ public class TestMatchHighlighter extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
if (!Arrays.equals(
|
||||
Stream.of(expectedFormattedLines).map(String::trim).toArray(),
|
||||
actualLines.stream().map(String::trim).toArray())) {
|
||||
var expectedTrimmed =
|
||||
Stream.of(expectedFormattedLines).map(String::trim).collect(Collectors.toList());
|
||||
var actualTrimmed = actualLines.stream().map(String::trim).collect(Collectors.toList());
|
||||
if (!Objects.equals(expectedTrimmed, actualTrimmed)) {
|
||||
throw new AssertionError(
|
||||
"Actual hits were:\n"
|
||||
+ String.join("\n", actualLines)
|
||||
+ String.join("\n", actualTrimmed)
|
||||
+ "\n\nbut expected them to be:\n"
|
||||
+ String.join("\n", expectedFormattedLines));
|
||||
+ String.join("\n", expectedTrimmed));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -64,6 +64,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
@ -158,16 +159,16 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
};
|
||||
|
||||
@Test
|
||||
public void testTermQueryWithOffsets() throws IOException {
|
||||
public void testTermQueryWithOffsets() throws Exception {
|
||||
checkTermQuery(FLD_TEXT_POS_OFFS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTermQueryWithPositions() throws IOException {
|
||||
public void testTermQueryWithPositions() throws Exception {
|
||||
checkTermQuery(FLD_TEXT_POS);
|
||||
}
|
||||
|
||||
private void checkTermQuery(String field) throws IOException {
|
||||
private void checkTermQuery(String field) throws Exception {
|
||||
new IndexBuilder(this::toField)
|
||||
.doc(field, "foo bar baz")
|
||||
.doc(field, "bar foo baz")
|
||||
|
@ -176,7 +177,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.build(
|
||||
analyzer,
|
||||
reader -> {
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, new TermQuery(new Term(field, "foo"))),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: '>foo< bar baz')", field),
|
||||
|
@ -186,16 +187,16 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testBooleanMultifieldQueryWithOffsets() throws IOException {
|
||||
public void testBooleanMultifieldQueryWithOffsets() throws Exception {
|
||||
checkBooleanMultifieldQuery(FLD_TEXT_POS_OFFS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBooleanMultifieldQueryWithPositions() throws IOException {
|
||||
public void testBooleanMultifieldQueryWithPositions() throws Exception {
|
||||
checkBooleanMultifieldQuery(FLD_TEXT_POS);
|
||||
}
|
||||
|
||||
private void checkBooleanMultifieldQuery(String field) throws IOException {
|
||||
private void checkBooleanMultifieldQuery(String field) throws Exception {
|
||||
Query query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new PhraseQuery(1, field, "foo", "baz"), BooleanClause.Occur.SHOULD)
|
||||
|
@ -210,7 +211,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.build(
|
||||
analyzer,
|
||||
reader -> {
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, query),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: '>foo bar baz< abc')", field),
|
||||
|
@ -219,16 +220,16 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testVariousQueryTypesWithOffsets() throws IOException {
|
||||
public void testVariousQueryTypesWithOffsets() throws Exception {
|
||||
checkVariousQueryTypes(FLD_TEXT_POS_OFFS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVariousQueryTypesWithPositions() throws IOException {
|
||||
public void testVariousQueryTypesWithPositions() throws Exception {
|
||||
checkVariousQueryTypes(FLD_TEXT_POS);
|
||||
}
|
||||
|
||||
private void checkVariousQueryTypes(String field) throws IOException {
|
||||
private void checkVariousQueryTypes(String field) throws Exception {
|
||||
new IndexBuilder(this::toField)
|
||||
.doc(field, "foo bar baz abc")
|
||||
.doc(field, "bar foo baz def")
|
||||
|
@ -236,46 +237,46 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.build(
|
||||
analyzer,
|
||||
reader -> {
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, stdQueryParser.apply("foo baz", field)),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: '>foo< bar >baz< abc')", field),
|
||||
fmt("1: (%s: 'bar >foo< >baz< def')", field),
|
||||
fmt("2: (%s: 'bar >baz< >foo< xyz')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, stdQueryParser.apply("foo OR xyz", field)),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: '>foo< bar baz abc')", field),
|
||||
fmt("1: (%s: 'bar >foo< baz def')", field),
|
||||
fmt("2: (%s: 'bar baz >foo< >xyz<')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, stdQueryParser.apply("bas~2", field)),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: 'foo >bar< >baz< >abc<')", field),
|
||||
fmt("1: (%s: '>bar< foo >baz< def')", field),
|
||||
fmt("2: (%s: '>bar< >baz< foo xyz')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, stdQueryParser.apply("\"foo bar\"", field)),
|
||||
containsInAnyOrder((fmt("0: (%s: '>foo bar< baz abc')", field))));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, stdQueryParser.apply("\"foo bar\"~3", field)),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: '>foo bar< baz abc')", field),
|
||||
fmt("1: (%s: '>bar foo< baz def')", field),
|
||||
fmt("2: (%s: '>bar baz foo< xyz')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, stdQueryParser.apply("ba*", field)),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: 'foo >bar< >baz< abc')", field),
|
||||
fmt("1: (%s: '>bar< foo >baz< def')", field),
|
||||
fmt("2: (%s: '>bar< >baz< foo xyz')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, stdQueryParser.apply("[bar TO bas]", field)),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: 'foo >bar< baz abc')", field),
|
||||
|
@ -284,14 +285,15 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
|
||||
// Note how document '2' has 'bar' that isn't highlighted (because this
|
||||
// document is excluded in the first clause).
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, stdQueryParser.apply("([bar TO baz] -xyz) OR baz", field)),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: 'foo >bar< >>baz<< abc')", field),
|
||||
fmt("1: (%s: '>bar< foo >>baz<< def')", field),
|
||||
fmt("2: (%s: 'bar >baz< foo xyz')", field)));
|
||||
|
||||
assertThat(highlights(reader, new MatchAllDocsQuery()), Matchers.hasSize(0));
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, new MatchAllDocsQuery()), Matchers.hasSize(0));
|
||||
});
|
||||
|
||||
new IndexBuilder(this::toField)
|
||||
|
@ -301,7 +303,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.build(
|
||||
analyzer,
|
||||
reader -> {
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, stdQueryParser.apply("[bar TO baz] -bar", field)),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: 'foo >baz< foo')", field),
|
||||
|
@ -310,7 +312,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testIntervalQueryHighlightCrossingMultivalueBoundary() throws IOException {
|
||||
public void testIntervalQueryHighlightCrossingMultivalueBoundary() throws Exception {
|
||||
String field = FLD_TEXT_POS;
|
||||
new IndexBuilder(this::toField)
|
||||
.doc(field, "foo", "bar")
|
||||
|
@ -328,7 +330,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testIntervalQueries() throws IOException {
|
||||
public void testIntervalQueries() throws Exception {
|
||||
String field = FLD_TEXT_POS_OFFS;
|
||||
|
||||
new IndexBuilder(this::toField)
|
||||
|
@ -338,7 +340,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.build(
|
||||
analyzer,
|
||||
reader -> {
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
new IntervalQuery(
|
||||
|
@ -349,7 +351,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
Intervals.term("baz")))),
|
||||
containsInAnyOrder(fmt("1: (field_text_offs: '>bas baz foo<')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
new IntervalQuery(
|
||||
|
@ -359,7 +361,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
Intervals.unordered(Intervals.term("foo"), Intervals.term("bar"))))),
|
||||
containsInAnyOrder(fmt("2: (field_text_offs: '>bar baz foo< xyz')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
new IntervalQuery(
|
||||
|
@ -369,7 +371,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
Intervals.term("foo")))),
|
||||
containsInAnyOrder(fmt("2: (field_text_offs: '>bar baz foo< xyz')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
new IntervalQuery(
|
||||
|
@ -379,7 +381,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
Intervals.unordered(Intervals.term("foo"), Intervals.term("bar"))))),
|
||||
containsInAnyOrder(fmt("2: (field_text_offs: '>bar baz foo< xyz')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
new IntervalQuery(
|
||||
|
@ -392,7 +394,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testDegenerateIntervalsWithPositions() throws IOException {
|
||||
public void testDegenerateIntervalsWithPositions() throws Exception {
|
||||
testDegenerateIntervals(FLD_TEXT_POS);
|
||||
}
|
||||
|
||||
|
@ -401,23 +403,23 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
bugUrl =
|
||||
"https://issues.apache.org/jira/browse/LUCENE-9634: "
|
||||
+ "Highlighting of degenerate spans on fields with offsets doesn't work properly")
|
||||
public void testDegenerateIntervalsWithOffsets() throws IOException {
|
||||
public void testDegenerateIntervalsWithOffsets() throws Exception {
|
||||
testDegenerateIntervals(FLD_TEXT_POS_OFFS);
|
||||
}
|
||||
|
||||
public void testDegenerateIntervals(String field) throws IOException {
|
||||
public void testDegenerateIntervals(String field) throws Exception {
|
||||
new IndexBuilder(this::toField)
|
||||
.doc(field, fmt("foo %s bar", STOPWORD1))
|
||||
.build(
|
||||
analyzer,
|
||||
reader -> {
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
new IntervalQuery(field, Intervals.extend(Intervals.term("bar"), 1, 3))),
|
||||
containsInAnyOrder(fmt("0: (%s: 'foo %s >bar<')", field, STOPWORD1)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
new IntervalQuery(field, Intervals.extend(Intervals.term("bar"), 5, 100))),
|
||||
|
@ -426,16 +428,16 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testMultivaluedFieldsWithOffsets() throws IOException {
|
||||
public void testMultivaluedFieldsWithOffsets() throws Exception {
|
||||
checkMultivaluedFields(FLD_TEXT_POS_OFFS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultivaluedFieldsWithPositions() throws IOException {
|
||||
public void testMultivaluedFieldsWithPositions() throws Exception {
|
||||
checkMultivaluedFields(FLD_TEXT_POS);
|
||||
}
|
||||
|
||||
public void checkMultivaluedFields(String field) throws IOException {
|
||||
public void checkMultivaluedFields(String field) throws Exception {
|
||||
new IndexBuilder(this::toField)
|
||||
.doc(field, "foo bar", "baz abc", "bad baz")
|
||||
.doc(field, "bar foo", "baz def")
|
||||
|
@ -443,7 +445,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.build(
|
||||
analyzer,
|
||||
reader -> {
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, stdQueryParser.apply("baz", field)),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: '>baz< abc | bad >baz<')", field),
|
||||
|
@ -453,7 +455,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testMultiFieldHighlights() throws IOException {
|
||||
public void testMultiFieldHighlights() throws Exception {
|
||||
for (String[] fieldPairs :
|
||||
new String[][] {
|
||||
{FLD_TEXT_POS_OFFS1, FLD_TEXT_POS_OFFS2},
|
||||
|
@ -477,7 +479,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.sorted()
|
||||
.collect(Collectors.joining(""));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
stdQueryParser.apply(field1 + ":baz" + " OR " + field2 + ":bar", field1)),
|
||||
|
@ -491,7 +493,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
* org.apache.lucene.search.BooleanClause.Occur#SHOULD} clauses. Check that this isn't the case.
|
||||
*/
|
||||
@Test
|
||||
public void testNoRewrite() throws IOException {
|
||||
public void testNoRewrite() throws Exception {
|
||||
String field1 = FLD_TEXT_POS_OFFS1;
|
||||
String field2 = FLD_TEXT_POS_OFFS2;
|
||||
|
||||
|
@ -510,13 +512,13 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
analyzer,
|
||||
reader -> {
|
||||
String expected = fmt("0: (%s: '>0100<')(%s: 'loo >bar<')", field1, field2);
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
stdQueryParser.apply(fmt("+%s:01* OR %s:bar", field1, field2), field1)),
|
||||
containsInAnyOrder(expected));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
stdQueryParser.apply(fmt("+%s:01* AND %s:bar", field1, field2), field1)),
|
||||
|
@ -525,22 +527,22 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testNestedQueryHitsWithOffsets() throws IOException {
|
||||
public void testNestedQueryHitsWithOffsets() throws Exception {
|
||||
checkNestedQueryHits(FLD_TEXT_POS_OFFS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNestedQueryHitsWithPositions() throws IOException {
|
||||
public void testNestedQueryHitsWithPositions() throws Exception {
|
||||
checkNestedQueryHits(FLD_TEXT_POS);
|
||||
}
|
||||
|
||||
public void checkNestedQueryHits(String field) throws IOException {
|
||||
public void checkNestedQueryHits(String field) throws Exception {
|
||||
new IndexBuilder(this::toField)
|
||||
.doc(field, "foo bar baz abc")
|
||||
.build(
|
||||
analyzer,
|
||||
reader -> {
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
new BooleanQuery.Builder()
|
||||
|
@ -549,7 +551,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.build()),
|
||||
containsInAnyOrder(fmt("0: (%s: '>foo >bar< baz< abc')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
new BooleanQuery.Builder()
|
||||
|
@ -571,7 +573,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
checkGraphQuery(FLD_TEXT_SYNONYMS_POS);
|
||||
}
|
||||
|
||||
private void checkGraphQuery(String field) throws IOException {
|
||||
private void checkGraphQuery(String field) throws Exception {
|
||||
new IndexBuilder(this::toField)
|
||||
.doc(field, "foo bar baz")
|
||||
.doc(field, "bar foo baz")
|
||||
|
@ -580,25 +582,25 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.build(
|
||||
analyzer,
|
||||
reader -> {
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, new TermQuery(new Term(field, "syn1"))),
|
||||
containsInAnyOrder(fmt("0: (%s: '>foo bar< baz')", field)));
|
||||
|
||||
// [syn2 syn3] = baz
|
||||
// so both these queries highlight baz.
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, new TermQuery(new Term(field, "syn3"))),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: 'foo bar >baz<')", field),
|
||||
fmt("1: (%s: 'bar foo >baz<')", field),
|
||||
fmt("2: (%s: 'bar >baz< foo')", field)));
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, stdQueryParser.apply(field + ":\"syn2 syn3\"", field)),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: 'foo bar >baz<')", field),
|
||||
fmt("1: (%s: 'bar foo >baz<')", field),
|
||||
fmt("2: (%s: 'bar >baz< foo')", field)));
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, stdQueryParser.apply(field + ":\"foo syn2 syn3\"", field)),
|
||||
containsInAnyOrder(fmt("1: (%s: 'bar >foo baz<')", field)));
|
||||
});
|
||||
|
@ -614,7 +616,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
checkSpanQueries(FLD_TEXT_POS);
|
||||
}
|
||||
|
||||
private void checkSpanQueries(String field) throws IOException {
|
||||
private void checkSpanQueries(String field) throws Exception {
|
||||
new IndexBuilder(this::toField)
|
||||
.doc(field, "foo bar baz")
|
||||
.doc(field, "bar foo baz")
|
||||
|
@ -623,7 +625,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.build(
|
||||
analyzer,
|
||||
reader -> {
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
SpanNearQuery.newOrderedNearQuery(field)
|
||||
|
@ -632,7 +634,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.build()),
|
||||
containsInAnyOrder(fmt("1: (%s: '>bar foo< baz')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
SpanNearQuery.newOrderedNearQuery(field)
|
||||
|
@ -642,7 +644,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.build()),
|
||||
containsInAnyOrder(fmt("2: (%s: '>bar baz foo<')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
SpanNearQuery.newUnorderedNearQuery(field)
|
||||
|
@ -653,7 +655,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
fmt("0: (%s: '>foo bar< baz')", field),
|
||||
fmt("1: (%s: '>bar foo< baz')", field)));
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(
|
||||
reader,
|
||||
SpanNearQuery.newUnorderedNearQuery(field)
|
||||
|
@ -694,7 +696,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(customSuppliers, reader, new TermQuery(new Term(field, "bar"))),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: '>foo bar<')", field),
|
||||
|
@ -721,7 +723,7 @@ public class TestMatchRegionRetriever extends LuceneTestCase {
|
|||
.build(
|
||||
analyzer,
|
||||
reader -> {
|
||||
assertThat(
|
||||
MatcherAssert.assertThat(
|
||||
highlights(reader, new TermQuery(new Term(field, "bar"))),
|
||||
containsInAnyOrder(
|
||||
fmt("0: (%s: 'foo >bar<')", field),
|
||||
|
|
|
@ -31,20 +31,20 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
|
||||
/**
|
||||
* Constructor functions for {@link IntervalsSource} types
|
||||
* Factory functions for creating {@link IntervalsSource interval sources}.
|
||||
*
|
||||
* <p>These sources implement minimum-interval algorithms taken from the paper <a
|
||||
* href="https://vigna.di.unimi.it/ftp/papers/EfficientLazy.pdf">Efficient Optimally Lazy Algorithms
|
||||
* for Minimal-Interval Semantics</a>
|
||||
*
|
||||
* <p>By default, sources that are sensitive to internal gaps (e.g. {@code PHRASE} and {@code
|
||||
* MAXGAPS}) will rewrite their sub-sources so that disjunctions of different lengths are pulled up
|
||||
* to the top of the interval tree. For example, {@code PHRASE(or(PHRASE("a", "b", "c"), "b"), "c")}
|
||||
* will automatically rewrite itself to {@code OR(PHRASE("a", "b", "c", "c"), PHRASE("b", "c"))} to
|
||||
* ensure that documents containing {@code "b c"} are matched. This can lead to less efficient
|
||||
* queries, as more terms need to be loaded (for example, the {@code "c"} iterator above is loaded
|
||||
* twice), so if you care more about speed than about accuracy you can use the {@link #or(boolean,
|
||||
* IntervalsSource...)} factory method to prevent rewriting.
|
||||
* <p><em>Note:</em> by default, sources that are sensitive to internal gaps (e.g. {@code PHRASE}
|
||||
* and {@code MAXGAPS}) will rewrite their sub-sources so that disjunctions of different lengths are
|
||||
* pulled up to the top of the interval tree. For example, {@code PHRASE(or(PHRASE("a", "b", "c"),
|
||||
* "b"), "c")} will automatically rewrite itself to {@code OR(PHRASE("a", "b", "c", "c"),
|
||||
* PHRASE("b", "c"))} to ensure that documents containing {@code "b c"} are matched. This can lead
|
||||
* to less efficient queries, as more terms need to be loaded (for example, the {@code "c"} iterator
|
||||
* above is loaded twice), so if you care more about speed than about accuracy you can use the
|
||||
* {@link #or(boolean, IntervalsSource...)} factory method to prevent rewriting.
|
||||
*/
|
||||
public final class Intervals {
|
||||
|
||||
|
@ -94,7 +94,7 @@ public final class Intervals {
|
|||
|
||||
/**
|
||||
* Return an {@link IntervalsSource} exposing intervals for a phrase consisting of a list of
|
||||
* IntervalsSources
|
||||
* {@link IntervalsSource interval sources}
|
||||
*/
|
||||
public static IntervalsSource phrase(IntervalsSource... subSources) {
|
||||
return BlockIntervalsSource.build(Arrays.asList(subSources));
|
||||
|
|
|
@ -16,60 +16,72 @@
|
|||
*/
|
||||
|
||||
/**
|
||||
* Intervals queries
|
||||
*
|
||||
*
|
||||
* <h2>Intervals queries</h2>
|
||||
*
|
||||
* This package contains experimental classes to search over intervals within fields
|
||||
* <p>This package contains experimental classes to search over intervals within fields
|
||||
*
|
||||
* <h2>IntervalsSource</h2>
|
||||
*
|
||||
* The {@link org.apache.lucene.queries.intervals.IntervalsSource} class can be used to construct
|
||||
* proximity relationships between terms and intervals. They can be built using static methods in
|
||||
* the {@link org.apache.lucene.queries.intervals.Intervals} class
|
||||
* the {@link org.apache.lucene.queries.intervals.Intervals} class.
|
||||
*
|
||||
* <h3>Basic intervals</h3>
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#term(String)} — Represents a
|
||||
* single term
|
||||
* single term.
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#phrase(java.lang.String...)} —
|
||||
* Represents a phrase
|
||||
* Represents a phrase.
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#analyzedText(java.lang.String,
|
||||
* org.apache.lucene.analysis.Analyzer, java.lang.String, int, boolean)} — Represents a
|
||||
* phrase (or an unordered sequence) of tokens resulting from an analysis of a given text.
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#ordered(IntervalsSource...)} —
|
||||
* Represents an interval over an ordered set of terms or intervals
|
||||
* Represents an interval over an ordered set of terms or intervals.
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#unordered(IntervalsSource...)} —
|
||||
* Represents an interval over an unordered set of terms or intervals
|
||||
* Represents an interval over an unordered set of terms or intervals.
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#or(IntervalsSource...)} —
|
||||
* Represents the disjunction of a set of terms or intervals
|
||||
* Represents the disjunction of a set of terms or intervals.
|
||||
* <li>{@link
|
||||
* org.apache.lucene.queries.intervals.Intervals#wildcard(org.apache.lucene.util.BytesRef)}
|
||||
* — Represents an suffix wildcard (any prefix-matching term from the index).
|
||||
* </ul>
|
||||
*
|
||||
* <h3>Filters</h3>
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#maxwidth(int, IntervalsSource)}
|
||||
* — Filters out intervals that are larger than a set width
|
||||
* — Filters out intervals that are larger than a set width.
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#maxgaps(int, IntervalsSource)} —
|
||||
* Filters out intervals that have more than a set number of gaps between their constituent
|
||||
* sub-intervals
|
||||
* sub-intervals.
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#containedBy(IntervalsSource,
|
||||
* IntervalsSource)} — Returns intervals that are contained by another interval
|
||||
* IntervalsSource)} — Returns intervals that are contained by another interval.
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#notContainedBy(IntervalsSource,
|
||||
* IntervalsSource)} — Returns intervals that are *not* contained by another interval
|
||||
* IntervalsSource)} — Returns intervals that are *not* contained by another interval.
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#containing(IntervalsSource,
|
||||
* IntervalsSource)} — Returns intervals that contain another interval
|
||||
* IntervalsSource)} — Returns intervals that contain another interval.
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#notContaining(IntervalsSource,
|
||||
* IntervalsSource)} — Returns intervals that do not contain another interval
|
||||
* IntervalsSource)} — Returns intervals that do not contain another interval.
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#nonOverlapping(IntervalsSource,
|
||||
* IntervalsSource)} — Returns intervals that do not overlap with another interval
|
||||
* IntervalsSource)} — Returns intervals that do not overlap with another interval.
|
||||
* <li>{@link org.apache.lucene.queries.intervals.Intervals#notWithin(IntervalsSource, int,
|
||||
* IntervalsSource)} — Returns intervals that do not appear within a set number of
|
||||
* positions of another interval
|
||||
* positions of another interval.
|
||||
* </ul>
|
||||
*
|
||||
* The {@link org.apache.lucene.queries.intervals.Intervals} class contains more advanced filters,
|
||||
* please refer to the documentation of that class.
|
||||
*
|
||||
* <h2>IntervalQuery</h2>
|
||||
*
|
||||
* An {@link org.apache.lucene.queries.intervals.IntervalQuery} takes a field name and an {@link
|
||||
* org.apache.lucene.queries.intervals.IntervalsSource}, and matches all documents that contain
|
||||
* intervals defined by the source in that field.
|
||||
*
|
||||
* <h2>Interval query support in query parsers</h2>
|
||||
*
|
||||
* <p>Lucene's {@code StandardQueryParser} (from the {@code queryparser} module) supports interval
|
||||
* function expressions.
|
||||
*/
|
||||
package org.apache.lucene.queries.intervals;
|
||||
|
|
|
@ -1,62 +0,0 @@
|
|||
NOTE: often, if you are making a small change to the .jj file, you can
|
||||
simply run "ant javacc" and skip the steps below. JavaCC will print
|
||||
warnings like this:
|
||||
|
||||
Warning: ParseException.java: File is obsolete. Please rename or delete this file so that a new one can be generated for you.
|
||||
|
||||
which you should ignore (ie, simply keep the ParseException.java class
|
||||
that's already present).
|
||||
|
||||
If, instead, you'd like to fully rebuild the StandardQueryParser,
|
||||
here's how:
|
||||
|
||||
* Delete these files:
|
||||
|
||||
StandardQueryParser.java
|
||||
StandardQueryParserConstants.java
|
||||
StandardQueryParserTokenManager.java
|
||||
TokenMgrError.java
|
||||
JavaCharStream.java
|
||||
Token.java
|
||||
|
||||
* Run "ant javacc". That will generate the all the classes
|
||||
|
||||
* To avoid lots of warnings in the generated code:
|
||||
|
||||
add @SupressWarnings("all"), immediately preceding the class declaration to:
|
||||
|
||||
QueryParserTokenManager.java
|
||||
TokenMgrError.java
|
||||
JavaCharStream.java
|
||||
Token.java
|
||||
JavaCharStream.java
|
||||
|
||||
* Remove all imports from TokenMgrError.java
|
||||
|
||||
* Fix the ParseException class:
|
||||
|
||||
- Change it to extend from QueryNodeParseException:
|
||||
|
||||
"public class ParseException extends QueryNodeParseException".
|
||||
|
||||
- Recreate the all the constructors like this:
|
||||
|
||||
public ParseException(Token currentTokenVal,
|
||||
int[][] expectedTokenSequencesVal, String[] tokenImageVal) {
|
||||
super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, initialise(
|
||||
currentTokenVal, expectedTokenSequencesVal, tokenImageVal)));
|
||||
this.currentToken = currentTokenVal;
|
||||
this.expectedTokenSequences = expectedTokenSequencesVal;
|
||||
this.tokenImage = tokenImageVal;
|
||||
}
|
||||
|
||||
public ParseException(Message message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public ParseException() {
|
||||
super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, "Error"));
|
||||
}
|
||||
|
||||
|
||||
- Fix all imports
|
|
@ -1,9 +1,9 @@
|
|||
{
|
||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java": "3d5f272a6d56b3f4962b252267ce2662e734414e",
|
||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java": "75e9d84f424bb697f899fe3adacc0094bac00672",
|
||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj": "08b62ed73607b1646af5dadb81c8bb34e381daee",
|
||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserConstants.java": "e73933bff38a62d90dab64f72a1a0deadfff246f",
|
||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java": "6e503b48ffa9f4648798e5394f7baeec366d1f07",
|
||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java": "fd1fcc78bf1025fe6fe54ab6f9ae2f53cce33364",
|
||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj": "eb0d1c55d029982ab8ea433cf9ef1088ba6ea3de",
|
||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserConstants.java": "d3c5d87c46635dbb6dc03bbdc0fb662b47ec318d",
|
||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java": "d8e12b467779c1740ea2b672c10806ac25e0184e",
|
||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java": "f4cb9d01587279dba30e549ce4867e4381bbd9d7",
|
||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java": "cdfa99af5fcf6b1e50691a1c1370ba60bf0d2d2d"
|
||||
}
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.queryparser.flexible.core.util.StringUtils;
|
|||
* configuration, it creates an empty {@link FieldConfig} object and delegate it to field config
|
||||
* listeners, these are responsible for setting up all the field configuration.
|
||||
*
|
||||
* <p>{@link QueryConfigHandler} should be extended by classes that intends to provide configuration
|
||||
* <p>{@link QueryConfigHandler} should be extended by classes that intend to provide configuration
|
||||
* to {@link QueryNodeProcessor} objects.
|
||||
*
|
||||
* <p>The class that extends {@link QueryConfigHandler} should also provide {@link FieldConfig}
|
||||
|
|
|
@ -20,7 +20,6 @@ import org.apache.lucene.queryparser.flexible.messages.NLS;
|
|||
|
||||
/** Flexible Query Parser message bundle class */
|
||||
public class QueryParserMessages extends NLS {
|
||||
|
||||
private static final String BUNDLE_NAME = QueryParserMessages.class.getName();
|
||||
|
||||
private QueryParserMessages() {
|
||||
|
@ -52,4 +51,5 @@ public class QueryParserMessages extends NLS {
|
|||
public static String NUMBER_CLASS_NOT_SUPPORTED_BY_NUMERIC_RANGE_QUERY;
|
||||
public static String UNSUPPORTED_NUMERIC_DATA_TYPE;
|
||||
public static String NUMERIC_CANNOT_BE_EMPTY;
|
||||
public static String ANALYZER_REQUIRED;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Flexible query parser is a modular, extensible framework for implementing Lucene query parsers.
|
||||
* In the flexible query parser model, query parsing takes three steps: syntax parsing, processing
|
||||
* (query semantics) and building (conversion to a Lucene {@link org.apache.lucene.search.Query}).
|
||||
*
|
||||
* <p>The flexible query parser module provides not just the framework but also the {@linkplain
|
||||
* org.apache.lucene.queryparser.flexible.standard.StandardQueryParser} - the default implementation
|
||||
* of a fully fledged query parser that supports most of the classic query parser's syntax but also
|
||||
* adds support for interval functions, min-should-match operator on Boolean groups and many hooks
|
||||
* for customization of how the parser behaves at runtime.
|
||||
*
|
||||
* <p>The flexible query parser is divided in two packages:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@link org.apache.lucene.queryparser.flexible.core}: contains the query parser API classes,
|
||||
* which should be extended by custom query parser implementations.
|
||||
* <li>{@link org.apache.lucene.queryparser.flexible.standard}: contains an example Lucene query
|
||||
* parser implementation built on top of the flexible query parser API.
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Features</h2>
|
||||
*
|
||||
* <ol>
|
||||
* <li>full support for Boolean expressions, including groups
|
||||
* <li>{@linkplain org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser syntax parsers}
|
||||
* - support for arbitrary syntax parsers, that can be converted into {@link
|
||||
* org.apache.lucene.queryparser.flexible.core.nodes.QueryNode} trees.
|
||||
* <li>{@linkplain org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessor query
|
||||
* node processors} - optimize, validate, rewrite the {@link
|
||||
* org.apache.lucene.queryparser.flexible.core.nodes.QueryNode} trees
|
||||
* <li>{@linkplain
|
||||
* org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline processor
|
||||
* pipelines} - select your favorite query processors and build a pipeline to implement the
|
||||
* features you need.
|
||||
* <li>{@linkplain org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler query
|
||||
* configuration handlers}
|
||||
* <li>{@linkplain org.apache.lucene.queryparser.flexible.core.builders.QueryBuilder query
|
||||
* builders} - convert {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}
|
||||
* trees into Lucene {@link org.apache.lucene.search.Query} instances.
|
||||
* </ol>
|
||||
*
|
||||
* <h2>Design</h2>
|
||||
*
|
||||
* <p>The flexible query parser was designed to have a very generic architecture, so that it can be
|
||||
* easily used for different products with varying query syntax needs.
|
||||
*
|
||||
* <p>The query parser has three layers and its core is what we call the {@linkplain
|
||||
* org.apache.lucene.queryparser.flexible.core.nodes.QueryNode query node tree}. It is a tree of
|
||||
* objects that represent the syntax of the original query, for example, for 'a AND b' the tree
|
||||
* could look like this:
|
||||
*
|
||||
* <pre>
|
||||
* AND
|
||||
* / \
|
||||
* A B
|
||||
* </pre>
|
||||
*
|
||||
* <p>The three flexible query parser layers are:
|
||||
*
|
||||
* <dl>
|
||||
* <dt>{@link org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser}
|
||||
* <dd>This layer is the text parsing layer which simply transforms the query text string into a
|
||||
* {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode} tree. Every text parser
|
||||
* must implement the interface {@link
|
||||
* org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser}. The default
|
||||
* implementation is {@link
|
||||
* org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser}.
|
||||
* <dt>{@link org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessor}
|
||||
* <dd>The query node processor does most of the work: it contains a chain of {@linkplain
|
||||
* org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessor query node
|
||||
* processors}. Each processor can walk the tree and modify nodes or even the tree's
|
||||
* structure. This allows for query optimization before the node tree is converted to an
|
||||
* actual query.
|
||||
* <dt>{@link org.apache.lucene.queryparser.flexible.core.builders.QueryBuilder}
|
||||
* <dd>The third layer is a configurable map of builders, which map {@linkplain
|
||||
* org.apache.lucene.queryparser.flexible.core.nodes.QueryNode query nodes} to their adapters
|
||||
* that convert each node into a {@link org.apache.lucene.search.Query}.
|
||||
* </dl>
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible;
|
|
@ -39,57 +39,189 @@ import org.apache.lucene.search.MultiTermQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
|
||||
/**
|
||||
* This class is a helper that enables users to easily use the Lucene query parser.
|
||||
* The {@link StandardQueryParser} is a pre-assembled query parser that supports most features of
|
||||
* the {@linkplain org.apache.lucene.queryparser.classic.QueryParser classic Lucene query parser},
|
||||
* allows dynamic configuration of some of its features (like multi-field expansion or wildcard
|
||||
* query restrictions) and adds support for new query types and expressions.
|
||||
*
|
||||
* <p>To construct a Query object from a query string, use the {@link #parse(String, String)}
|
||||
* method:
|
||||
* <p>The {@link StandardSyntaxParser} is an extension of the {@link QueryParserHelper} with
|
||||
* reasonable defaults for syntax tree parsing ({@link StandardSyntaxParser}, node processor
|
||||
* pipeline ({@link StandardQueryNodeProcessorPipeline} and node tree to {@link Query} builder
|
||||
* ({@link StandardQueryTreeBuilder}).
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* StandardQueryParser queryParserHelper = new StandardQueryParser();
|
||||
* Query query = queryParserHelper.parse("a AND b", "defaultField");
|
||||
* </pre>
|
||||
* <p>Typical usage, including configuration tweaks:
|
||||
*
|
||||
* <p>To change any configuration before parsing the query string do, for example: <br>
|
||||
* <pre class="prettyprint">{@code
|
||||
* StandardQueryParser qpHelper = new StandardQueryParser();
|
||||
* StandardQueryConfigHandler config = qpHelper.getQueryConfigHandler();
|
||||
* config.setAllowLeadingWildcard(true);
|
||||
* config.setAnalyzer(new WhitespaceAnalyzer());
|
||||
* Query query = qpHelper.parse("apache AND lucene", "defaultField");
|
||||
* }</pre>
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* // the query config handler returned by {@link StandardQueryParser} is a {@link StandardQueryConfigHandler}
|
||||
* queryParserHelper.getQueryConfigHandler().setAnalyzer(new WhitespaceAnalyzer());
|
||||
* </pre>
|
||||
* <h2>Supported query syntax</h2>
|
||||
*
|
||||
* <p>The syntax for query strings is as follows (copied from the old QueryParser javadoc): A Query
|
||||
* is a series of clauses. A clause may be prefixed by:
|
||||
* <p>Standard query parser borrows most of its syntax from the {@linkplain
|
||||
* org.apache.lucene.queryparser.classic classic query parser} but adds more features and
|
||||
* expressions on top of that syntax.
|
||||
*
|
||||
* <p>A <em>query</em> consists of clauses, field specifications, grouping and Boolean operators and
|
||||
* interval functions. We will discuss them in order.
|
||||
*
|
||||
* <h3>Basic clauses</h3>
|
||||
*
|
||||
* <p>A query must contain one or more clauses. A clause can be a literal term, a phrase, a wildcard
|
||||
* expression or other expression that
|
||||
*
|
||||
* <p>The following are some examples of simple one-clause queries:
|
||||
*
|
||||
* <ul>
|
||||
* <li>a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating that the clause is
|
||||
* required or prohibited respectively; or
|
||||
* <li>a term followed by a colon, indicating the field to be searched. This enables one to
|
||||
* construct queries which search multiple fields.
|
||||
* <li><code>test</code>
|
||||
* <p>selects documents containing the word <em>test</em> (term clause).
|
||||
* <li><code>"test equipment"</code>
|
||||
* <p>phrase search; selects documents containing the phrase <em>test equipment</em> (phrase
|
||||
* clause).
|
||||
* <li><code>"test failure"~4</code>
|
||||
* <p>proximity search; selects documents containing the words <em>test</em> and
|
||||
* <em>failure</em> within 4 words (positions) from each other. The provided "proximity" is
|
||||
* technically translated into "edit distance" (maximum number of atomic word-moving
|
||||
* operations required to transform the document's phrase into the query phrase).
|
||||
* <li><code>tes*</code>
|
||||
* <p>prefix wildcard matching; selects documents containing words starting with <em>tes</em>,
|
||||
* such as: <em>test</em>, <em>testing</em> or <em>testable</em>.
|
||||
* <li><code>/.est(s|ing)/</code>
|
||||
* <p>documents containing words matching the provided regular expression, such as
|
||||
* <em>resting</em> or <em>nests</em>.
|
||||
* <li><code>nest~2</code>
|
||||
* <p>fuzzy term matching; documents containing words within 2-edits distance (2 additions,
|
||||
* removals or replacements of a letter) from <em>nest</em>, such as <em>test</em>,
|
||||
* <em>net</em> or <em>rests</em>.
|
||||
* </ul>
|
||||
*
|
||||
* A clause may be either:
|
||||
* <h3>Field specifications</h3>
|
||||
*
|
||||
* <p>Most clauses can be prefixed by a field name and a colon: the clause will then apply to that
|
||||
* field only. If the field specification is omitted, the query parser will expand the clause over
|
||||
* all fields specified by a call to {@link StandardQueryParser#setMultiFields(CharSequence[])} or
|
||||
* will use the default field provided in the call to {@link #parse(String, String)}.
|
||||
*
|
||||
* <p>The following are some examples of field-prefixed clauses:
|
||||
*
|
||||
* <ul>
|
||||
* <li>a term, indicating all the documents that contain this term; or
|
||||
* <li>a nested query, enclosed in parentheses. Note that this may be used with a <code>+</code>/
|
||||
* <code>-</code> prefix to require any of a set of terms.
|
||||
* <li><code>title:test</code>
|
||||
* <p>documents containing <em>test</em> in the <code>title</code> field.
|
||||
* <li><code>title:(die OR hard)</code>
|
||||
* <p>documents containing <em>die</em> or <em>hard</em> in the <code>title</code> field.
|
||||
* </ul>
|
||||
*
|
||||
* Thus, in BNF, the query grammar is:
|
||||
* <h3>Boolean operators and grouping</h3>
|
||||
*
|
||||
* <pre>
|
||||
* Query ::= ( Clause )*
|
||||
* Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
|
||||
* </pre>
|
||||
* <p>You can combine clauses using Boolean AND, OR and NOT operators to form more complex
|
||||
* expressions, for example:
|
||||
*
|
||||
* <p>Examples of appropriately formatted queries can be found in the <a
|
||||
* href="{@docRoot}/org/apache/lucene/queryparser/classic/package-summary.html#package.description">
|
||||
* query syntax documentation</a>.
|
||||
* <ul>
|
||||
* <li><code>test AND results</code>
|
||||
* <p>selects documents containing both the word <em>test</em> and the word <em>results</em>.
|
||||
* <li><code>test OR suite OR results</code>
|
||||
* <p>selects documents with at least one of <em>test</em>, <em>suite</em> or
|
||||
* <em>results</em>.
|
||||
* <li><code>title:test AND NOT title:complete</code>
|
||||
* <p>selects documents containing <em>test</em> and not containing <em>complete</em> in the
|
||||
* <code>title</code> field.
|
||||
* <li><code>title:test AND (pass* OR fail*)</code>
|
||||
* <p>grouping; use parentheses to specify the precedence of terms in a Boolean clause. Query
|
||||
* will match documents containing <em>test</em> in the <code>title</code> field and a word
|
||||
* starting with <em>pass</em> or <em>fail</em> in the default search fields.
|
||||
* <li><code>title:(pass fail skip)</code>
|
||||
* <p>shorthand notation; documents containing at least one of <em>pass</em>, <em>fail</em> or
|
||||
* <em>skip</em> in the <code>title</code> field.
|
||||
* <li><code>title:(+test +"result unknown")</code>
|
||||
* <p>shorthand notation; documents containing both <em>pass</em> and <em>result unknown</em>
|
||||
* in the <code>title</code> field.
|
||||
* </ul>
|
||||
*
|
||||
* <p>The text parser used by this helper is a {@link StandardSyntaxParser}.
|
||||
* <p>Note the Boolean operators must be written in all caps, otherwise they are parsed as regular
|
||||
* terms.
|
||||
*
|
||||
* <p>The query node processor used by this helper is a {@link StandardQueryNodeProcessorPipeline}.
|
||||
* <h3>Range operators</h3>
|
||||
*
|
||||
* <p>The builder used by this helper is a {@link StandardQueryTreeBuilder}.
|
||||
* <p>To search for ranges of textual or numeric values, use square or curly brackets, for example:
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>name:[Jones TO Smith]</code>
|
||||
* <p>inclusive range; selects documents whose <code>name
|
||||
* </code> field has any value between <em>Jones</em> and <em>Smith</em>, including
|
||||
* boundaries.
|
||||
* <li><code>score:{2.5 TO 7.3}</code>
|
||||
* <p>exclusive range; selects documents whose <code>score</code> field is between 2.5 and
|
||||
* 7.3, excluding boundaries.
|
||||
* <li><code>score:{2.5 TO *]</code>
|
||||
* <p>one-sided range; selects documents whose <code>score</code> field is larger than 2.5.
|
||||
* </ul>
|
||||
*
|
||||
* <h3>Term boosting</h3>
|
||||
*
|
||||
* <p>Terms, quoted terms, term range expressions and grouped clauses can have a floating-point
|
||||
* weight <em>boost</em> applied to them to increase their score relative to other clauses. For
|
||||
* example:
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>jones^2 OR smith^0.5</code>
|
||||
* <p>prioritize documents with <code>jones</code> term over matches on the <code>smith</code>
|
||||
* term.
|
||||
* <li><code>field:(a OR b NOT c)^2.5 OR field:d</code>
|
||||
* <p>apply the boost to a sub-query.
|
||||
* </ul>
|
||||
*
|
||||
* <h3>Special character escaping</h3>
|
||||
*
|
||||
* <p>Most search terms can be put in double quotes making special-character escaping not necessary.
|
||||
* If the search term contains the quote character (or cannot be quoted for some reason), any
|
||||
* character can be quoted with a backslash. For example:
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>\:\(quoted\+term\)\:</code>
|
||||
* <p>a single search term <code>(quoted+term):</code> with escape sequences. An alternative
|
||||
* quoted form would be simpler: <code>":(quoted+term):"
|
||||
* </code>.
|
||||
* </ul>
|
||||
*
|
||||
* <h3>Minimum-should-match constraint for Boolean disjunction groups</h3>
|
||||
*
|
||||
* <p>A minimum-should-match operator can be applied to a disjunction Boolean query (a query with
|
||||
* only "OR"-subclauses) and forces the query to match documents with at least the provided number
|
||||
* of these subclauses. For example:
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>(blue crab fish)@2</code>
|
||||
* <p>matches all documents with at least two terms from the set [blue, crab, fish] (in any
|
||||
* order).
|
||||
* <li><code>((yellow OR blue) crab fish)@2</code>
|
||||
* <p>sub-clauses of a Boolean query can themselves be complex queries; here the
|
||||
* min-should-match selects documents that match at least two of the provided three
|
||||
* sub-clauses.
|
||||
* </ul>
|
||||
*
|
||||
* <h3>Interval function clauses</h3>
|
||||
*
|
||||
* <p>Interval functions are a powerful tool to express search needs in terms of one or more *
|
||||
* contiguous fragments of text and their relationship to one another. All interval clauses start
|
||||
* with the {@code fn:} prefix (possibly prefixed by a field specification). For example:
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>fn:ordered(quick brown fox)</code>
|
||||
* <p>matches all documents (in the default field or in multi-field expansion) with at least
|
||||
* one ordered sequence of <code>quick</code>, <code>
|
||||
* brown</code> and <code>fox</code> terms.
|
||||
* <li><code>title:fn:maxwidth(5 fn:atLeast(2 quick brown fox))</code>
|
||||
* <p>matches all documents in the <code>title
|
||||
* </code> field where at least two of the three terms (<code>quick</code>, <code>
|
||||
* brown</code> and <code>fox</code>) occur within five positions of each other.
|
||||
* </ul>
|
||||
*
|
||||
* Please refer to the {@linkplain org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn
|
||||
* interval functions package} for more information on which functions are available and how they
|
||||
* work.
|
||||
*
|
||||
* @see StandardQueryParser
|
||||
* @see StandardQueryConfigHandler
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.builders;
|
||||
|
||||
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.IntervalQueryNode;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
/** Builds a {@link Query} from an {@link IntervalQueryNode}. */
|
||||
public class IntervalQueryNodeBuilder implements StandardQueryBuilder {
|
||||
@Override
|
||||
public Query build(QueryNode queryNode) throws QueryNodeException {
|
||||
return ((IntervalQueryNode) queryNode).getQuery();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.builders;
|
||||
|
||||
import java.util.List;
|
||||
import org.apache.lucene.queryparser.flexible.core.builders.QueryBuilder;
|
||||
import org.apache.lucene.queryparser.flexible.core.builders.QueryTreeBuilder;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.MinShouldMatchNode;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
/** Builds a {@link BooleanQuery} from a {@link MinShouldMatchNode}. */
|
||||
public class MinShouldMatchNodeBuilder implements QueryBuilder {
|
||||
@Override
|
||||
public Query build(QueryNode queryNode) {
|
||||
MinShouldMatchNode mmNode = (MinShouldMatchNode) queryNode;
|
||||
|
||||
List<QueryNode> children = queryNode.getChildren();
|
||||
if (children.size() != 1) {
|
||||
throw new RuntimeException("Unexpected number of node children: " + children.size());
|
||||
}
|
||||
|
||||
Query q = (Query) mmNode.groupQueryNode.getTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID);
|
||||
|
||||
BooleanQuery booleanQuery = (BooleanQuery) q;
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.setMinimumNumberShouldMatch(mmNode.minShouldMatch);
|
||||
booleanQuery.clauses().forEach(builder::add);
|
||||
return builder.build();
|
||||
}
|
||||
}
|
|
@ -29,6 +29,8 @@ import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode;
|
|||
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.IntervalQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.MinShouldMatchNode;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.PointQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.PointRangeQueryNode;
|
||||
|
@ -69,6 +71,8 @@ public class StandardQueryTreeBuilder extends QueryTreeBuilder implements Standa
|
|||
setBuilder(SynonymQueryNode.class, new SynonymQueryNodeBuilder());
|
||||
setBuilder(MultiPhraseQueryNode.class, new MultiPhraseQueryNodeBuilder());
|
||||
setBuilder(MatchAllDocsQueryNode.class, new MatchAllDocsQueryNodeBuilder());
|
||||
setBuilder(MinShouldMatchNode.class, new MinShouldMatchNodeBuilder());
|
||||
setBuilder(IntervalQueryNode.class, new IntervalQueryNodeBuilder());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.IntervalQuery;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.FieldableNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNodeImpl;
|
||||
import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.IntervalFunction;
|
||||
import org.apache.lucene.queryparser.flexible.standard.parser.EscapeQuerySyntaxImpl;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
/** Node that represents an interval function. */
|
||||
public class IntervalQueryNode extends QueryNodeImpl implements FieldableNode {
|
||||
private final IntervalFunction source;
|
||||
private String field;
|
||||
private Analyzer analyzer;
|
||||
|
||||
public IntervalQueryNode(String field, IntervalFunction source) {
|
||||
this.field = field;
|
||||
this.source = Objects.requireNonNull(source);
|
||||
}
|
||||
|
||||
public Query getQuery() {
|
||||
Objects.requireNonNull(field, "Field must not be null for interval queries.");
|
||||
Objects.requireNonNull(analyzer, "Analyzer must not be null for interval queries.");
|
||||
return new IntervalQuery(field, source.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toQueryString(EscapeQuerySyntax escapeSyntaxParser) {
|
||||
return String.format(Locale.ROOT, "%s:%s", field, source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return toQueryString(new EscapeQuerySyntaxImpl());
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharSequence getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setField(CharSequence fieldName) {
|
||||
this.field = Objects.requireNonNull(fieldName.toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalQueryNode cloneTree() {
|
||||
return new IntervalQueryNode(field, source);
|
||||
}
|
||||
|
||||
public void setAnalyzer(Analyzer analyzer) {
|
||||
this.analyzer =
|
||||
Objects.requireNonNull(analyzer, "Analyzer must not be null for interval queries.");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes;
|
||||
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNodeImpl;
|
||||
import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax;
|
||||
|
||||
/** Node that represents a minimum-should-match restriction on a {@link GroupQueryNode}. */
|
||||
public class MinShouldMatchNode extends QueryNodeImpl {
|
||||
public final int minShouldMatch;
|
||||
public final GroupQueryNode groupQueryNode;
|
||||
|
||||
public MinShouldMatchNode(int minShouldMatch, GroupQueryNode groupQueryNode) {
|
||||
this.minShouldMatch = minShouldMatch;
|
||||
this.groupQueryNode = groupQueryNode;
|
||||
|
||||
this.setLeaf(false);
|
||||
this.allocate();
|
||||
add(groupQueryNode);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharSequence toQueryString(EscapeQuerySyntax escapeSyntaxParser) {
|
||||
return groupQueryNode.toQueryString(escapeSyntaxParser) + "@" + minShouldMatch;
|
||||
}
|
||||
}
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax;
|
|||
|
||||
/**
|
||||
* A {@link WildcardQueryNode} represents wildcard query This does not apply to phrases. Examples:
|
||||
* a*b*c Fl?w? m?ke*g
|
||||
* {@code a*b*c Fl?w? m?ke*g}.
|
||||
*/
|
||||
public class WildcardQueryNode extends FieldQueryNode {
|
||||
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#after(IntervalsSource, IntervalsSource)}. */
|
||||
public class After extends IntervalFunction {
|
||||
private final IntervalFunction source;
|
||||
private final IntervalFunction reference;
|
||||
|
||||
public After(IntervalFunction source, IntervalFunction reference) {
|
||||
this.source = Objects.requireNonNull(source);
|
||||
this.reference = Objects.requireNonNull(reference);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.after(
|
||||
source.toIntervalSource(field, analyzer), reference.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:after(%s %s)", source, reference);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.regex.Pattern;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#analyzedText(String, Analyzer, String, int, boolean)}. */
|
||||
public class AnalyzedText extends IntervalFunction {
|
||||
private final String term;
|
||||
|
||||
public AnalyzedText(String term) {
|
||||
this.term = term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
int gaps = 0;
|
||||
boolean ordered = true;
|
||||
try {
|
||||
return Intervals.analyzedText(term, analyzer, field, gaps, ordered);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (requiresQuotes(term)) {
|
||||
return '"' + term + '"';
|
||||
} else {
|
||||
return term;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean requiresQuotes(String term) {
|
||||
return Pattern.compile("[\\s]").matcher(term).find();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#atLeast(int, IntervalsSource...)}. */
|
||||
public class AtLeast extends IntervalFunction {
|
||||
private final int minShouldMatch;
|
||||
private final List<IntervalFunction> sources;
|
||||
|
||||
public AtLeast(int minShouldMatch, List<IntervalFunction> sources) {
|
||||
this.minShouldMatch = minShouldMatch;
|
||||
this.sources = Objects.requireNonNull(sources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.atLeast(
|
||||
minShouldMatch,
|
||||
sources.stream()
|
||||
.map(intervalFunction -> intervalFunction.toIntervalSource(field, analyzer))
|
||||
.toArray(IntervalsSource[]::new));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(
|
||||
Locale.ROOT,
|
||||
"fn:atLeast(%s %s)",
|
||||
minShouldMatch,
|
||||
sources.stream().map(IntervalFunction::toString).collect(Collectors.joining(" ")));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#before(IntervalsSource, IntervalsSource)}. */
|
||||
public class Before extends IntervalFunction {
|
||||
private final IntervalFunction source;
|
||||
private final IntervalFunction reference;
|
||||
|
||||
public Before(IntervalFunction source, IntervalFunction reference) {
|
||||
this.source = Objects.requireNonNull(source);
|
||||
this.reference = Objects.requireNonNull(reference);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.before(
|
||||
source.toIntervalSource(field, analyzer), reference.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:before(%s %s)", source, reference);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#containedBy(IntervalsSource, IntervalsSource)}. */
|
||||
public class ContainedBy extends IntervalFunction {
|
||||
private final IntervalFunction big;
|
||||
private final IntervalFunction small;
|
||||
|
||||
public ContainedBy(IntervalFunction small, IntervalFunction big) {
|
||||
this.small = Objects.requireNonNull(small);
|
||||
this.big = Objects.requireNonNull(big);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.containedBy(
|
||||
small.toIntervalSource(field, analyzer), big.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:containedBy(%s %s)", small, big);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#containing(IntervalsSource, IntervalsSource)}. */
|
||||
public class Containing extends IntervalFunction {
|
||||
private final IntervalFunction big;
|
||||
private final IntervalFunction small;
|
||||
|
||||
public Containing(IntervalFunction big, IntervalFunction small) {
|
||||
this.big = Objects.requireNonNull(big);
|
||||
this.small = Objects.requireNonNull(small);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.containing(
|
||||
big.toIntervalSource(field, analyzer), small.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:containing(%s %s)", big, small);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#extend(IntervalsSource, int, int)}. */
|
||||
public class Extend extends IntervalFunction {
|
||||
private final int before, after;
|
||||
private final IntervalFunction source;
|
||||
|
||||
public Extend(IntervalFunction source, int before, int after) {
|
||||
this.source = Objects.requireNonNull(source);
|
||||
this.before = before;
|
||||
this.after = after;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.extend(source.toIntervalSource(field, analyzer), before, after);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:extend(%s %d %d)", source, before, after);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Representation of an interval function that can be converted to {@link IntervalsSource}. */
|
||||
public abstract class IntervalFunction {
|
||||
public abstract IntervalsSource toIntervalSource(String field, Analyzer analyzer);
|
||||
|
||||
@Override
|
||||
public abstract String toString();
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#maxgaps(int, IntervalsSource)}. */
|
||||
public class MaxGaps extends IntervalFunction {
|
||||
private final int maxGaps;
|
||||
private final IntervalFunction source;
|
||||
|
||||
public MaxGaps(int maxGaps, IntervalFunction source) {
|
||||
this.maxGaps = maxGaps;
|
||||
this.source = Objects.requireNonNull(source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.maxgaps(maxGaps, source.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:maxgaps(%s %s)", maxGaps, source);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#maxwidth(int, IntervalsSource)}. */
|
||||
public class MaxWidth extends IntervalFunction {
|
||||
private final int width;
|
||||
private final IntervalFunction source;
|
||||
|
||||
public MaxWidth(int width, IntervalFunction source) {
|
||||
this.width = width;
|
||||
this.source = Objects.requireNonNull(source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.maxwidth(width, source.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:maxwidth(%s %s)", width, source);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#nonOverlapping(IntervalsSource, IntervalsSource)} . */
|
||||
public class NonOverlapping extends IntervalFunction {
|
||||
private final IntervalFunction minuend;
|
||||
private final IntervalFunction subtrahend;
|
||||
|
||||
public NonOverlapping(IntervalFunction minuend, IntervalFunction subtrahend) {
|
||||
this.minuend = Objects.requireNonNull(minuend);
|
||||
this.subtrahend = Objects.requireNonNull(subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.nonOverlapping(
|
||||
minuend.toIntervalSource(field, analyzer), subtrahend.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:nonOverlapping(%s %s)", minuend, subtrahend);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#notContainedBy(IntervalsSource, IntervalsSource)}. */
|
||||
public class NotContainedBy extends IntervalFunction {
|
||||
private final IntervalFunction small;
|
||||
private final IntervalFunction big;
|
||||
|
||||
public NotContainedBy(IntervalFunction small, IntervalFunction big) {
|
||||
this.small = Objects.requireNonNull(small);
|
||||
this.big = Objects.requireNonNull(big);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.notContainedBy(
|
||||
small.toIntervalSource(field, analyzer), big.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:notContainedBy(%s %s)", small, big);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#notContaining(IntervalsSource, IntervalsSource)}. */
|
||||
public class NotContaining extends IntervalFunction {
|
||||
private final IntervalFunction minuend;
|
||||
private final IntervalFunction subtrahend;
|
||||
|
||||
public NotContaining(IntervalFunction minuend, IntervalFunction subtrahend) {
|
||||
this.minuend = Objects.requireNonNull(minuend);
|
||||
this.subtrahend = Objects.requireNonNull(subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.notContaining(
|
||||
minuend.toIntervalSource(field, analyzer), subtrahend.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:notContaining(%s %s)", minuend, subtrahend);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#notWithin(IntervalsSource, int, IntervalsSource)}. */
|
||||
public class NotWithin extends IntervalFunction {
|
||||
private final int positions;
|
||||
private final IntervalFunction minuend, subtrahend;
|
||||
|
||||
public NotWithin(IntervalFunction minuend, int positions, IntervalFunction subtrahend) {
|
||||
this.positions = positions;
|
||||
this.minuend = Objects.requireNonNull(minuend);
|
||||
this.subtrahend = Objects.requireNonNull(subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.notWithin(
|
||||
minuend.toIntervalSource(field, analyzer),
|
||||
positions,
|
||||
subtrahend.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:notWithin(%s %d %s)", minuend, positions, subtrahend);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#or(IntervalsSource...)}. */
|
||||
public class Or extends IntervalFunction {
|
||||
private final List<IntervalFunction> sources;
|
||||
|
||||
public Or(List<IntervalFunction> sources) {
|
||||
this.sources = Objects.requireNonNull(sources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.or(
|
||||
sources.stream()
|
||||
.map(intervalFunction -> intervalFunction.toIntervalSource(field, analyzer))
|
||||
.toArray(IntervalsSource[]::new));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(
|
||||
Locale.ROOT,
|
||||
"fn:or(%s)",
|
||||
sources.stream().map(IntervalFunction::toString).collect(Collectors.joining(" ")));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#ordered(IntervalsSource...)}. */
|
||||
public class Ordered extends IntervalFunction {
|
||||
private final List<IntervalFunction> sources;
|
||||
|
||||
public Ordered(List<IntervalFunction> sources) {
|
||||
this.sources = Objects.requireNonNull(sources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.ordered(
|
||||
sources.stream()
|
||||
.map(intervalFunction -> intervalFunction.toIntervalSource(field, analyzer))
|
||||
.toArray(IntervalsSource[]::new));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(
|
||||
Locale.ROOT,
|
||||
"fn:ordered(%s)",
|
||||
sources.stream().map(IntervalFunction::toString).collect(Collectors.joining(" ")));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#overlapping(IntervalsSource, IntervalsSource)}. */
|
||||
public class Overlapping extends IntervalFunction {
|
||||
private final IntervalFunction source;
|
||||
private final IntervalFunction reference;
|
||||
|
||||
public Overlapping(IntervalFunction source, IntervalFunction reference) {
|
||||
this.source = Objects.requireNonNull(source);
|
||||
this.reference = Objects.requireNonNull(reference);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.overlapping(
|
||||
source.toIntervalSource(field, analyzer), reference.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:overlapping(%s %s)", source, reference);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#phrase(String...)}. */
|
||||
public class Phrase extends IntervalFunction {
|
||||
private final List<IntervalFunction> sources;
|
||||
|
||||
public Phrase(List<IntervalFunction> sources) {
|
||||
this.sources = Objects.requireNonNull(sources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.phrase(
|
||||
sources.stream()
|
||||
.map(intervalFunction -> intervalFunction.toIntervalSource(field, analyzer))
|
||||
.toArray(IntervalsSource[]::new));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(
|
||||
Locale.ROOT,
|
||||
"fn:phrase(%s)",
|
||||
sources.stream().map(IntervalFunction::toString).collect(Collectors.joining(" ")));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#unordered(IntervalsSource...)}. */
|
||||
public class Unordered extends IntervalFunction {
|
||||
private final List<IntervalFunction> sources;
|
||||
|
||||
public Unordered(List<IntervalFunction> sources) {
|
||||
this.sources = Objects.requireNonNull(sources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.unordered(
|
||||
sources.stream()
|
||||
.map(intervalFunction -> intervalFunction.toIntervalSource(field, analyzer))
|
||||
.toArray(IntervalsSource[]::new));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(
|
||||
Locale.ROOT,
|
||||
"fn:unordered(%s)",
|
||||
sources.stream().map(IntervalFunction::toString).collect(Collectors.joining(" ")));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#unorderedNoOverlaps(IntervalsSource, IntervalsSource)}. */
|
||||
public class UnorderedNoOverlaps extends IntervalFunction {
|
||||
private final IntervalFunction a, b;
|
||||
|
||||
public UnorderedNoOverlaps(IntervalFunction a, IntervalFunction b) {
|
||||
this.a = Objects.requireNonNull(a);
|
||||
this.b = Objects.requireNonNull(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.unorderedNoOverlaps(
|
||||
a.toIntervalSource(field, analyzer), b.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:unorderedNoOverlaps(%s %s)", a, b);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Node that represents {@link Intervals#wildcard(BytesRef)}. */
|
||||
public class Wildcard extends IntervalFunction {
|
||||
private final String wildcard;
|
||||
|
||||
public Wildcard(String wildcard) {
|
||||
this.wildcard = wildcard;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.wildcard(new BytesRef(wildcard));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:wildcard(%s)", wildcard);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
|
||||
/** Node that represents {@link Intervals#within(IntervalsSource, int, IntervalsSource)}. */
|
||||
public class Within extends IntervalFunction {
|
||||
private final int positions;
|
||||
private final IntervalFunction source, reference;
|
||||
|
||||
public Within(IntervalFunction source, int positions, IntervalFunction reference) {
|
||||
this.positions = positions;
|
||||
this.source = Objects.requireNonNull(source);
|
||||
this.reference = Objects.requireNonNull(reference);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource toIntervalSource(String field, Analyzer analyzer) {
|
||||
return Intervals.within(
|
||||
source.toIntervalSource(field, analyzer),
|
||||
positions,
|
||||
reference.toIntervalSource(field, analyzer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(Locale.ROOT, "fn:within(%s %d %s)", source, positions, reference);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,726 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This package contains classes that implement {@linkplain
|
||||
* org.apache.lucene.queries.intervals.Intervals interval function} support for the {@linkplain
|
||||
* org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser standard syntax
|
||||
* parser}.
|
||||
*
|
||||
* <h2>What are interval functions?</h2>
|
||||
*
|
||||
* <p>Interval functions are a powerful tool to express search needs in terms of one or more
|
||||
* contiguous fragments of text and their relationship to one another. Interval functions are
|
||||
* implemented by an {@linkplain org.apache.lucene.queries.intervals.IntervalQuery IntervalQuery}
|
||||
* but many ready-to-use factory methods are provided in the {@linkplain
|
||||
* org.apache.lucene.queries.intervals.Intervals Intervals} class.
|
||||
*
|
||||
* <p>When Lucene indexes documents (or rather: document fields) the input text is typically split
|
||||
* into <em>tokens</em>. The details of how this tokenization is performed depends on how the
|
||||
* field's {@link org.apache.lucene.analysis.Analyzer} is set up. In the end, each token would
|
||||
* typically have an associated <em>position</em> in the token stream. For example, the following
|
||||
* sentence:
|
||||
*
|
||||
* <p class="example sentence with-highlights">The quick brown fox jumps over the lazy dog
|
||||
*
|
||||
* <p>could be transformed into the following token stream (note some token positions are "blank"
|
||||
* (grayed out) — these positions reflect stop words that are typically not indexed at all).
|
||||
*
|
||||
* <p class="example sentence with-highlights with-positions"><span style="color:
|
||||
* lightgrey">The</span><sub>—</sub> quick<sub>2</sub> brown<sub>3</sub> fox<sub>4</sub>
|
||||
* jumps<sub>5</sub> over<sub>6</sub> <span style="color: lightgrey">the</span><sub>—</sub>
|
||||
* lazy<sub>7</sub> dog<sub>8</sub>
|
||||
*
|
||||
* <p>Remembering that intervals are contiguous spans between two positions in a document, consider
|
||||
* the following example interval function query: <code>fn:ordered(brown dog)</code>. This query
|
||||
* selects any span of text between terms <code>brown</code> and <code>dog</code>. In our example,
|
||||
* this would correspond to the highlighted fragment below.
|
||||
*
|
||||
* <p class="example sentence with-highlights">The quick <span class="highlight">brown fox jumps
|
||||
* over the lazy dog</span>
|
||||
*
|
||||
* <p>This type of interval function can be called an interval <em>selector</em>. The second class
|
||||
* of interval functions works by combining or filtering other intervals depending on certain
|
||||
* criteria.
|
||||
*
|
||||
* <p>The matching interval in the above example can be of any length — if the word <code>
|
||||
* brown</code> occurs at the beginning of the document and the word <code>dog</code> at the very
|
||||
* end of the document, the interval would be very long (it would cover the entire document!). Let's
|
||||
* say we want to restrict the matches to only those intervals with at most 3 positions between the
|
||||
* search terms: <code>fn:maxgaps(3 fn:ordered(brown dog))</code>.
|
||||
*
|
||||
* <p>There are five tokens in between search terms (so five "gaps" between the matching interval's
|
||||
* positions) and the above query no longer matches our example document at all.
|
||||
*
|
||||
* <p>Interval filtering functions allow expressing a variety of conditions other Lucene queries
|
||||
* cannot. For example, consider this interval query that searches for words <code>lazy</code> or
|
||||
* <code>quick</code> but only if they are in the neighborhood of one position from any of the words
|
||||
* <code>dog</code> or <code>fox</code>:
|
||||
*
|
||||
* <p><code>fn:within(fn:or(lazy quick) 1 fn:or(dog fox))</code>
|
||||
*
|
||||
* <p>The result of this query is correctly shown below (only the word <code>lazy</code> matches the
|
||||
* query, <code>quick</code> is 2 positions away from <code>fox</code>).
|
||||
*
|
||||
* <p class="example sentence with-highlights">The quick brown fox jumps over the <span
|
||||
* class="highlight">lazy</span> dog
|
||||
*
|
||||
* <p>The remaining part of this document provides more information on the available functions and
|
||||
* their expected behavior.
|
||||
*
|
||||
* <h2>Classification of interval functions</h2>
|
||||
*
|
||||
* <p>The following groups of interval functions are available in the {@link
|
||||
* org.apache.lucene.queryparser.flexible.standard.StandardQueryParser}.
|
||||
*
|
||||
* <table class="table" style="width: auto">
|
||||
* <caption>Interval functions grouped by similar functionality.</caption>
|
||||
* <thead>
|
||||
* <tr>
|
||||
* <th>Terms</th>
|
||||
* <th>Alternatives</th>
|
||||
* <th>Length</th>
|
||||
* <th>Context</th>
|
||||
* <th>Ordering</th>
|
||||
* <th>Containment</th>
|
||||
* </tr>
|
||||
* </thead>
|
||||
*
|
||||
* <tbody>
|
||||
* <tr>
|
||||
* <td>
|
||||
* <em>term literals</em><br>
|
||||
* <code>fn:wildcard</code><br>
|
||||
* </td>
|
||||
* <td>
|
||||
* <code>fn:or</code><br>
|
||||
* <code>fn:atLeast</code>
|
||||
* </td>
|
||||
* <td>
|
||||
* <code>fn:maxgaps</code><br>
|
||||
* <code>fn:maxwidth</code>
|
||||
* </td>
|
||||
* <td>
|
||||
* <code>fn:before</code><br>
|
||||
* <code>fn:after</code><br>
|
||||
* <code>fn:extend</code><br>
|
||||
* <code>fn:within</code><br>
|
||||
* <code>fn:notWithin</code>
|
||||
* </td>
|
||||
* <td>
|
||||
* <code>fn:ordered</code><br>
|
||||
* <code>fn:unordered</code><br>
|
||||
* <code>fn:phrase</code><br>
|
||||
* <code>fn:unorderedNoOverlaps</code>
|
||||
* </td>
|
||||
* <td>
|
||||
* <code>fn:containedBy</code><br>
|
||||
* <code>fn:notContainedBy</code><br>
|
||||
* <code>fn:containing</code><br>
|
||||
* <code>fn:notContaining</code><br>
|
||||
* <code>fn:overlapping</code><br>
|
||||
* <code>fn:nonOverlapping</code>
|
||||
* </td>
|
||||
* </tr>
|
||||
* </tbody>
|
||||
* </table>
|
||||
*
|
||||
* <p>All examples in the description of interval functions (below) assume a document with the
|
||||
* following content:
|
||||
*
|
||||
* <p class="example sentence with-highlights">The quick brown fox jumps over the lazy dog
|
||||
*
|
||||
* <h3><em>term literals</em></h3>
|
||||
*
|
||||
* <p>Quoted or unquoted character sequences are converted into (analyzed) text intervals. While a
|
||||
* single term typically results in a single-term interval, a quoted multi-term phrase will produce
|
||||
* an interval matching the corresponding sequence of tokens. Note this is different from the <code>
|
||||
* fn:phrase</code> function which takes a sequence of sub-intervals.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:or(quick "fox")</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick</span> brown <span class="highlight">fox</span> jumps over
|
||||
* the lazy dog
|
||||
* <li><code>fn:or(\"quick fox\")</code> (<em>The document would not match — no phrase
|
||||
* <code>quick fox</code> exists.</em>)
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over the lazy dog
|
||||
* <li><code>fn:phrase(quick brown fox)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox</span> jumps over the lazy dog
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:wildcard</h3>
|
||||
*
|
||||
* <p>Matches the disjunction of all terms that match a wildcard glob.
|
||||
*
|
||||
* <p><em>Important!</em> The expanded wildcard must not match more than 128 terms. This is an
|
||||
* internal limitation that prevents blowing up memory on, for example, prefix expansions that would
|
||||
* cover huge numbers of alternatives.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:wildcard(glob)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>glob</code>
|
||||
* <dd>term glob to expand (based on the contents of the index).
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:wildcard(jump*)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox <span
|
||||
* class="highlight">jumps</span> over the lazy dog
|
||||
* <li><code>fn:wildcard(br*n)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick <span
|
||||
* class="highlight">brown</span> fox jumps over the lazy dog
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:or</h3>
|
||||
*
|
||||
* <p>Matches the disjunction of nested intervals.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:or(sources...)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>sources</code>
|
||||
* <dd>sub-intervals (terms or other functions)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:or(dog fox)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown <span
|
||||
* class="highlight">fox</span> jumps over the lazy <span class="highlight">dog</span>
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:atLeast</h3>
|
||||
*
|
||||
* <p>Matches documents that contain at least the provided number of source intervals.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:atLeast(min sources...)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>min</code>
|
||||
* <dd>an integer specifying minimum number of sub-interval arguments that must match.
|
||||
* <dt><code>sources</code>
|
||||
* <dd>sub-intervals (terms or other functions)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:atLeast(2 quick fox "furry dog")</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox</span> jumps over the lazy dog
|
||||
* <li><code>fn:atLeast(2 fn:unordered(furry dog) fn:unordered(brown dog) lazy quick)</code>
|
||||
* <em>(This query results in multiple overlapping intervals.)</em>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox jumps over the lazy</span> dog<br>
|
||||
* The <span class="highlight">quick brown fox jumps over the lazy dog</span><br>
|
||||
* The quick <span class="highlight">brown fox jumps over the lazy dog</span>
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:maxgaps</h3>
|
||||
*
|
||||
* <p>Accepts <code>source</code> interval if it has at most <code>max</code> position gaps.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:maxgaps(gaps source)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>gaps</code>
|
||||
* <dd>an integer specifying maximum number of source's position gaps.
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval.
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:maxgaps(0 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over the <span class="highlight">lazy dog</span>
|
||||
* <li><code>fn:maxgaps(1 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox</span> jumps over the <span class="highlight">lazy
|
||||
* dog</span>
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:maxwidth</h3>
|
||||
*
|
||||
* <p>Accepts <code>source</code> interval if it has at most the given width (position span).
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:maxwidth(max source)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>max</code>
|
||||
* <dd>an integer specifying maximum width of source's position span.
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval.
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:maxwidth(2 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over the <span class="highlight">lazy dog</span>
|
||||
* <li><code>fn:maxwidth(3 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox</span> jumps over the <span class="highlight">lazy
|
||||
* dog</span>
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:phrase</h3>
|
||||
*
|
||||
* <p>Matches an ordered, gapless sequence of source intervals.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:phrase(sources...)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>sources</code>
|
||||
* <dd>sub-intervals (terms or other functions)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:phrase(quick brown fox)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox</span> jumps over the lazy dog
|
||||
* <li><code>fn:phrase(fn:ordered(quick fox) jumps)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox jumps</span> over the lazy dog
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:ordered</h3>
|
||||
*
|
||||
* <p>Matches an ordered span containing all source intervals, possibly with gaps in between their
|
||||
* respective source interval positions. Source intervals must not overlap.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:ordered(sources...)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>sources</code>
|
||||
* <dd>sub-intervals (terms or other functions)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:ordered(quick jumps dog)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox jumps over the lazy dog</span>
|
||||
* <li><code>fn:ordered(quick fn:or(fox dog))</code> <em>(Note only the shorter match out of
|
||||
* the two alternatives is included in the result; the algorithm is not required to
|
||||
* return or highlight all matching interval alternatives).</em>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox</span> jumps over the lazy dog
|
||||
* <li><code>fn:ordered(quick jumps fn:or(fox dog))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox jumps over the lazy dog</span>
|
||||
* <li><code>fn:ordered(fn:phrase(brown fox) fn:phrase(fox jumps))</code> <em>(Sources
|
||||
* overlap, no matches.)</em>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over the lazy dog
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:unordered</h3>
|
||||
*
|
||||
* <p>Matches an unordered span containing all source intervals, possibly with gaps in between their
|
||||
* respective source interval positions. Source intervals may overlap.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:unordered(sources...)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>sources</code>
|
||||
* <dd>sub-intervals (terms or other functions)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:unordered(dog jumps quick)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox jumps over the lazy dog</span>
|
||||
* <li><code>fn:unordered(fn:or(fox dog) quick)</code> <em>(Note only the shorter match out
|
||||
* of the two alternatives is included in the result; the algorithm is not required to
|
||||
* return or highlight all matching interval alternatives).</em>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox</span> jumps over the lazy dog
|
||||
* <li><code>fn:unordered(fn:phrase(brown fox) fn:phrase(fox jumps))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick <span
|
||||
* class="highlight">brown fox jumps</span> over the lazy dog
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:unorderedNoOverlaps</h3>
|
||||
*
|
||||
* <p>Matches an unordered span containing two source intervals, possibly with gaps in between their
|
||||
* respective source interval positions. Source intervals must not overlap.
|
||||
*
|
||||
* <p>Note that, unlike <code>fn:unordered</code>, this function takes a fixed number of arguments
|
||||
* (two).
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:unorderedNoOverlaps(source1 source2)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>source1</code>
|
||||
* <dd>sub-interval (term or other function)
|
||||
* <dt><code>source2</code>
|
||||
* <dd>sub-interval (term or other function)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:unorderedNoOverlaps(fn:phrase(fox jumps) brown)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick <span
|
||||
* class="highlight">brown fox jumps</span> over the lazy dog
|
||||
* <li><code>fn:unorderedNoOverlaps(fn:phrase(brown fox) fn:phrase(fox jumps))</code>
|
||||
* <em>(Sources overlap, no matches.)</em>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over the lazy dog
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:before</h3>
|
||||
*
|
||||
* <p>Matches intervals from the source that appear before intervals from the reference.
|
||||
*
|
||||
* <p>Reference intervals will not be part of the match (this is a filtering function).
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:before(source reference)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval (term or other function)
|
||||
* <dt><code>reference</code>
|
||||
* <dd>reference sub-interval (term or other function)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:before(fn:or(brown lazy) fox)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick <span
|
||||
* class="highlight">brown</span> fox jumps over the lazy dog
|
||||
* <li><code>fn:before(fn:or(brown lazy) fn:or(dog fox))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick <span
|
||||
* class="highlight">brown</span> fox jumps over the <span class="highlight">lazy</span>
|
||||
* dog
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:after</h3>
|
||||
*
|
||||
* <p>Matches intervals from the source that appear after intervals from the reference.
|
||||
*
|
||||
* <p>Reference intervals will not be part of the match (this is a filtering function).
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:after(source reference)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval (term or other function)
|
||||
* <dt><code>reference</code>
|
||||
* <dd>reference sub-interval (term or other function)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:after(fn:or(brown lazy) fox)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over the <span class="highlight">lazy</span> dog
|
||||
* <li><code>fn:after(fn:or(brown lazy) fn:or(dog fox))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over the <span class="highlight">lazy</span> dog
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:extend</h3>
|
||||
*
|
||||
* <p>Matches an interval around another source, extending its span by a number of positions before
|
||||
* and after.
|
||||
*
|
||||
* <p>This is an advanced function that allows extending the left and right "context" of another
|
||||
* interval.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:extend(source before after)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval (term or other function)
|
||||
* <dt><code>before</code>
|
||||
* <dd>an integer number of positions to extend to the left of the source
|
||||
* <dt><code>after</code>
|
||||
* <dd>an integer number of positions to extend to the right of the source
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:extend(fox 1 2)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick <span
|
||||
* class="highlight">brown fox jumps over</span> the lazy dog
|
||||
* <li><code>fn:extend(fn:or(dog fox) 2 0)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox</span> jumps over <span class="highlight">the lazy
|
||||
* dog</span>
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:within</h3>
|
||||
*
|
||||
* <p>Matches intervals of the source that appear within the provided number of positions from the
|
||||
* intervals of the reference.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:within(source positions reference)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval (term or other function)
|
||||
* <dt><code>positions</code>
|
||||
* <dd>an integer number of maximum positions between source and reference
|
||||
* <dt><code>reference</code>
|
||||
* <dd>reference sub-interval (term or other function)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:within(fn:or(fox dog) 1 fn:or(quick lazy))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over the lazy <span class="highlight">dog</span>
|
||||
* <li><code>fn:within(fn:or(fox dog) 2 fn:or(quick lazy))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown <span
|
||||
* class="highlight">fox</span> jumps over the lazy <span class="highlight">dog</span>
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:notWithin</h3>
|
||||
*
|
||||
* <p>Matches intervals of the source that do <em>not</em> appear within the provided number of
|
||||
* positions from the intervals of the reference.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:notWithin(source positions reference)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval (term or other function)
|
||||
* <dt><code>positions</code>
|
||||
* <dd>an integer number of maximum positions between source and reference
|
||||
* <dt><code>reference</code>
|
||||
* <dd>reference sub-interval (term or other function)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:notWithin(fn:or(fox dog) 1 fn:or(quick lazy))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown <span
|
||||
* class="highlight">fox</span> jumps over the lazy dog
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:containedBy</h3>
|
||||
*
|
||||
* <p>Matches intervals of the source that are contained by intervals of the reference.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:containedBy(source reference)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval (term or other function)
|
||||
* <dt><code>reference</code>
|
||||
* <dd>reference sub-interval (term or other function)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:containedBy(fn:or(fox dog) fn:ordered(quick lazy))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown <span
|
||||
* class="highlight">fox</span> jumps over the lazy dog
|
||||
* <li><code>fn:containedBy(fn:or(fox dog) fn:extend(lazy 3 3))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over the lazy <span class="highlight">dog</span>
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:notContainedBy</h3>
|
||||
*
|
||||
* <p>Matches intervals of the source that are <em>not</em> contained by intervals of the reference.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:notContainedBy(source reference)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval (term or other function)
|
||||
* <dt><code>reference</code>
|
||||
* <dd>reference sub-interval (term or other function)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:notContainedBy(fn:or(fox dog) fn:ordered(quick lazy))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over the lazy <span class="highlight">dog</span>
|
||||
* <li><code>fn:notContainedBy(fn:or(fox dog) fn:extend(lazy 3 3))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown <span
|
||||
* class="highlight">fox</span> jumps over the lazy dog
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:containing</h3>
|
||||
*
|
||||
* <p>Matches intervals of the source that contain at least one intervals of the reference.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:containing(source reference)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval (term or other function)
|
||||
* <dt><code>reference</code>
|
||||
* <dd>reference sub-interval (term or other function)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:containing(fn:extend(fn:or(lazy brown) 1 1) fn:or(fox dog))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The <span
|
||||
* class="highlight">quick brown fox</span> jumps over <span class="highlight">the lazy
|
||||
* dog</span>
|
||||
* <li><code>fn:containing(fn:atLeast(2 quick fox dog) jumps)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown <span
|
||||
* class="highlight">fox jumps over the lazy dog</span>
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:notContaining</h3>
|
||||
*
|
||||
* <p>Matches intervals of the source that do <em>not</em> contain any intervals of the reference.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:notContaining(source reference)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval (term or other function)
|
||||
* <dt><code>reference</code>
|
||||
* <dd>reference sub-interval (term or other function)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:notContaining(fn:extend(fn:or(fox dog) 1 0) fn:or(brown yellow))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over the <span class="highlight">lazy dog</span>
|
||||
* <li><code>fn:notContaining(fn:ordered(fn:or(the The) fn:or(fox dog)) brown)</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over <span class="highlight">the lazy dog</span>
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:overlapping</h3>
|
||||
*
|
||||
* <p>Matches intervals of the source that overlap with at least one interval of the reference.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:overlapping(source reference)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval (term or other function)
|
||||
* <dt><code>reference</code>
|
||||
* <dd>reference sub-interval (term or other function)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:overlapping(fn:phrase(brown fox) fn:phrase(fox jumps))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick <span
|
||||
* class="highlight">brown fox</span> jumps over the lazy dog
|
||||
* <li><code>fn:overlapping(fn:or(fox dog) fn:extend(lazy 2 2))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown fox jumps
|
||||
* over the lazy <span class="highlight">dog</span>
|
||||
* </ul>
|
||||
* </dl>
|
||||
*
|
||||
* <h3>fn:nonOverlapping</h3>
|
||||
*
|
||||
* <p>Matches intervals of the source that do <em>not</em> overlap with any intervals of the
|
||||
* reference.
|
||||
*
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt>Arguments
|
||||
* <dd>
|
||||
* <p><code>fn:nonOverlapping(source reference)</code>
|
||||
* <dl class="dl-horizontal narrow">
|
||||
* <dt><code>source</code>
|
||||
* <dd>source sub-interval (term or other function)
|
||||
* <dt><code>reference</code>
|
||||
* <dd>reference sub-interval (term or other function)
|
||||
* </dl>
|
||||
* <dt>Examples
|
||||
* <dd>
|
||||
* <ul>
|
||||
* <li><code>fn:nonOverlapping(fn:phrase(brown fox) fn:phrase(lazy dog))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick <span
|
||||
* class="highlight">brown fox</span> jumps over the lazy dog
|
||||
* <li><code>fn:nonOverlapping(fn:or(fox dog) fn:extend(lazy 2 2))</code>
|
||||
* <p class="example sentence with-highlights left-aligned">The quick brown <span
|
||||
* class="highlight">fox</span> jumps over the lazy dog
|
||||
* </ul>
|
||||
* </dl>
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn;
|
|
@ -16,10 +16,7 @@
|
|||
*/
|
||||
|
||||
/**
|
||||
* Implementation of the {@linkplain org.apache.lucene.queryparser.classic Lucene classic query
|
||||
* parser} using the flexible query parser frameworks
|
||||
*
|
||||
* <h2>Lucene Flexible Query Parser Implementation</h2>
|
||||
* Lucene Flexible Query Parser Implementation
|
||||
*
|
||||
* <p>The old Lucene query parser used to have only one class that performed all the parsing
|
||||
* operations. In the new query parser structure, the parsing was divided in 3 steps: parsing
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -35,26 +35,50 @@ import java.io.Reader;
|
|||
import java.util.Collections;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.queryparser.flexible.messages.Message;
|
||||
import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
|
||||
import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
|
||||
import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages;
|
||||
import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.BoostQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.messages.Message;
|
||||
import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
|
||||
import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
|
||||
import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages;
|
||||
import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.After;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.AnalyzedText;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.AtLeast;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Before;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.ContainedBy;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Containing;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Extend;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.IntervalFunction;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.MaxGaps;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.MaxWidth;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.NonOverlapping;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.NotContainedBy;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.NotContaining;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.NotWithin;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Or;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Ordered;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Overlapping;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Phrase;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Unordered;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.UnorderedNoOverlaps;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Wildcard;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn.Within;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.IntervalQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.MinShouldMatchNode;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode;
|
||||
import org.apache.lucene.queryparser.charstream.CharStream;
|
||||
import org.apache.lucene.queryparser.charstream.FastCharStream;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode;
|
||||
|
||||
import static org.apache.lucene.queryparser.flexible.standard.parser.EscapeQuerySyntaxImpl.discardEscapeChar;
|
||||
|
||||
|
@ -87,6 +111,14 @@ public class StandardSyntaxParser implements SyntaxParser {
|
|||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
public static float parseFloat(Token token) {
|
||||
return Float.parseFloat(token.image);
|
||||
}
|
||||
|
||||
public static int parseInt(Token token) {
|
||||
return Integer.parseInt(token.image);
|
||||
}
|
||||
}
|
||||
PARSER_END(StandardSyntaxParser)
|
||||
|
||||
|
@ -96,7 +128,7 @@ PARSER_END(StandardSyntaxParser)
|
|||
<#_NUM_CHAR: ["0"-"9"] >
|
||||
// Every character that follows a backslash is considered as an escaped character
|
||||
| <#_ESCAPED_CHAR: "\\" ~[] >
|
||||
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
|
||||
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", "@",
|
||||
"<", ">", "=", "[", "]", "\"", "{", "}", "~", "\\", "/"]
|
||||
| <_ESCAPED_CHAR> ) >
|
||||
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
|
||||
|
@ -104,7 +136,7 @@ PARSER_END(StandardSyntaxParser)
|
|||
| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
|
||||
}
|
||||
|
||||
<DEFAULT, Range> SKIP : {
|
||||
<DEFAULT, Range, Function> SKIP : {
|
||||
< <_WHITESPACE> >
|
||||
}
|
||||
|
||||
|
@ -112,9 +144,9 @@ PARSER_END(StandardSyntaxParser)
|
|||
<AND: ("AND" | "&&") >
|
||||
| <OR: ("OR" | "||") >
|
||||
| <NOT: ("NOT" | "!") >
|
||||
| <FN_PREFIX: ("fn:") > : Function
|
||||
| <PLUS: "+" >
|
||||
| <MINUS: "-" >
|
||||
| <LPAREN: "(" >
|
||||
| <RPAREN: ")" >
|
||||
| <OP_COLON: ":" >
|
||||
| <OP_EQUAL: "=" >
|
||||
|
@ -132,6 +164,33 @@ PARSER_END(StandardSyntaxParser)
|
|||
| <RANGEEX_START: "{" > : Range
|
||||
}
|
||||
|
||||
<DEFAULT,Function> TOKEN : {
|
||||
<LPAREN: "(" > : DEFAULT
|
||||
}
|
||||
|
||||
<Function> TOKEN : {
|
||||
<ATLEAST: ("atleast" | "atLeast") >
|
||||
| <AFTER: ("after") >
|
||||
| <BEFORE: ("before") >
|
||||
| <CONTAINED_BY: ("containedBy" | "containedby") >
|
||||
| <CONTAINING: ("containing") >
|
||||
| <EXTEND: ("extend") >
|
||||
| <FN_OR: ("or") >
|
||||
| <MAXGAPS: ("maxgaps" | "maxGaps") >
|
||||
| <MAXWIDTH: ("maxwidth" | "maxWidth") >
|
||||
| <NON_OVERLAPPING: ("nonOverlapping" | "nonoverlapping") >
|
||||
| <NOT_CONTAINED_BY: ("notContainedBy" | "notcontainedby") >
|
||||
| <NOT_CONTAINING: ("notContaining" | "notcontaining") >
|
||||
| <NOT_WITHIN: ("notWithin" | "notwithin") >
|
||||
| <ORDERED: ("ordered") >
|
||||
| <OVERLAPPING: ("overlapping") >
|
||||
| <PHRASE: ("phrase") >
|
||||
| <UNORDERED: ("unordered") >
|
||||
| <UNORDERED_NO_OVERLAPS: ("unorderedNoOverlaps" | "unorderednooverlaps") >
|
||||
| <WILDCARD: ("wildcard") >
|
||||
| <WITHIN: ("within") >
|
||||
}
|
||||
|
||||
<Range> TOKEN : {
|
||||
<RANGE_TO: "TO">
|
||||
| <RANGEIN_END: "]"> : DEFAULT
|
||||
|
@ -265,7 +324,8 @@ private QueryNode Clause(CharSequence field) : {
|
|||
{
|
||||
(
|
||||
LOOKAHEAD(2) q = FieldRangeExpr(field)
|
||||
| (LOOKAHEAD(2) field = FieldName() ( <OP_COLON> | <OP_EQUAL> ))? ( q = Term(field) | q = GroupingExpr(field))
|
||||
| (LOOKAHEAD(2) field = FieldName() ( <OP_COLON> | <OP_EQUAL> ))?
|
||||
(LOOKAHEAD(2) q = Term(field) | q = GroupingExpr(field) | q = IntervalExpr(field))
|
||||
)
|
||||
{
|
||||
return q;
|
||||
|
@ -289,15 +349,314 @@ private CharSequence FieldName() : {
|
|||
* GroupingExpr ::= '(' Query ')' ('^' <NUMBER>)?
|
||||
* }</pre>
|
||||
*/
|
||||
private GroupQueryNode GroupingExpr(CharSequence field) : {
|
||||
private QueryNode GroupingExpr(CharSequence field) : {
|
||||
QueryNode q;
|
||||
Token boost;
|
||||
Token boost, minShouldMatch = null;
|
||||
}
|
||||
{
|
||||
<LPAREN> q = Query(field) <RPAREN> (q = Boost(q))?
|
||||
<LPAREN> q = Query(field) <RPAREN> (q = Boost(q))? ("@" minShouldMatch = <NUMBER>)?
|
||||
{
|
||||
return new GroupQueryNode(q);
|
||||
if (minShouldMatch != null) {
|
||||
q = new MinShouldMatchNode(parseInt(minShouldMatch), new GroupQueryNode(q));
|
||||
} else {
|
||||
q = new GroupQueryNode(q);
|
||||
}
|
||||
return q;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* An interval expression (functions) node.
|
||||
*/
|
||||
private IntervalQueryNode IntervalExpr(CharSequence field) : {
|
||||
IntervalFunction source;
|
||||
}
|
||||
{
|
||||
source = IntervalFun()
|
||||
{
|
||||
return new IntervalQueryNode(field == null ? null : field.toString(), source);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalFun() : {
|
||||
IntervalFunction source;
|
||||
}
|
||||
{
|
||||
LOOKAHEAD(2) source = IntervalAtLeast() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalMaxWidth() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalMaxGaps() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalOrdered() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalUnordered() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalUnorderedNoOverlaps() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalOr() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalWildcard() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalAfter() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalBefore() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalPhrase() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalContaining() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalNotContaining() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalContainedBy() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalNotContainedBy() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalWithin() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalNotWithin() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalOverlapping() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalNonOverlapping() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalExtend() { return source; }
|
||||
| LOOKAHEAD(2) source = IntervalText() { return source; }
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalAtLeast() : {
|
||||
IntervalFunction source;
|
||||
ArrayList<IntervalFunction> sources = new ArrayList<IntervalFunction>();
|
||||
Token minShouldMatch;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <ATLEAST>
|
||||
<LPAREN> minShouldMatch = <NUMBER> (source = IntervalFun() { sources.add(source); })+ <RPAREN>
|
||||
{
|
||||
return new AtLeast(parseInt(minShouldMatch), sources);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalMaxWidth() : {
|
||||
IntervalFunction source;
|
||||
Token maxWidth;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <MAXWIDTH>
|
||||
<LPAREN> maxWidth = <NUMBER> source = IntervalFun() <RPAREN>
|
||||
{
|
||||
return new MaxWidth(parseInt(maxWidth), source);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalMaxGaps() : {
|
||||
IntervalFunction source;
|
||||
Token maxGaps;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <MAXGAPS>
|
||||
<LPAREN> maxGaps = <NUMBER> source = IntervalFun() <RPAREN>
|
||||
{
|
||||
return new MaxGaps(parseInt(maxGaps), source);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalUnordered() : {
|
||||
IntervalFunction source;
|
||||
ArrayList<IntervalFunction> sources = new ArrayList<IntervalFunction>();
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <UNORDERED>
|
||||
<LPAREN> (source = IntervalFun() { sources.add(source); })+ <RPAREN>
|
||||
{
|
||||
return new Unordered(sources);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalUnorderedNoOverlaps() : {
|
||||
IntervalFunction a, b;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <UNORDERED_NO_OVERLAPS>
|
||||
<LPAREN> a = IntervalFun() b = IntervalFun() <RPAREN>
|
||||
{
|
||||
return new UnorderedNoOverlaps(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalOrdered() : {
|
||||
IntervalFunction source;
|
||||
ArrayList<IntervalFunction> sources = new ArrayList<IntervalFunction>();
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <ORDERED>
|
||||
<LPAREN> (source = IntervalFun() { sources.add(source); })+ <RPAREN>
|
||||
{
|
||||
return new Ordered(sources);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalOr() : {
|
||||
IntervalFunction source;
|
||||
ArrayList<IntervalFunction> sources = new ArrayList<IntervalFunction>();
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <FN_OR>
|
||||
<LPAREN> (source = IntervalFun() { sources.add(source); })+ <RPAREN>
|
||||
{
|
||||
return new Or(sources);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalPhrase() : {
|
||||
IntervalFunction source;
|
||||
ArrayList<IntervalFunction> sources = new ArrayList<IntervalFunction>();
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <PHRASE>
|
||||
<LPAREN> (source = IntervalFun() { sources.add(source); })+ <RPAREN>
|
||||
{
|
||||
return new Phrase(sources);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalBefore() : {
|
||||
IntervalFunction source;
|
||||
IntervalFunction reference;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <BEFORE> <LPAREN> source = IntervalFun() reference = IntervalFun() <RPAREN>
|
||||
{
|
||||
return new Before(source, reference);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalAfter() : {
|
||||
IntervalFunction source;
|
||||
IntervalFunction reference;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <AFTER> <LPAREN> source = IntervalFun() reference = IntervalFun() <RPAREN>
|
||||
{
|
||||
return new After(source, reference);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalContaining() : {
|
||||
IntervalFunction big;
|
||||
IntervalFunction small;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <CONTAINING> <LPAREN> big = IntervalFun() small = IntervalFun() <RPAREN>
|
||||
{
|
||||
return new Containing(big, small);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalNotContaining() : {
|
||||
IntervalFunction minuend;
|
||||
IntervalFunction subtrahend;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <NOT_CONTAINING> <LPAREN> minuend = IntervalFun() subtrahend = IntervalFun() <RPAREN>
|
||||
{
|
||||
return new NotContaining(minuend, subtrahend);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalContainedBy() : {
|
||||
IntervalFunction big;
|
||||
IntervalFunction small;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <CONTAINED_BY> <LPAREN> small = IntervalFun() big = IntervalFun() <RPAREN>
|
||||
{
|
||||
return new ContainedBy(small, big);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalNotContainedBy() : {
|
||||
IntervalFunction big;
|
||||
IntervalFunction small;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <NOT_CONTAINED_BY> <LPAREN> small = IntervalFun() big = IntervalFun() <RPAREN>
|
||||
{
|
||||
return new NotContainedBy(small, big);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalWithin() : {
|
||||
IntervalFunction source, reference;
|
||||
Token positions;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <WITHIN>
|
||||
<LPAREN>
|
||||
source = IntervalFun()
|
||||
positions = <NUMBER>
|
||||
reference = IntervalFun()
|
||||
<RPAREN>
|
||||
{
|
||||
return new Within(source, parseInt(positions), reference);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalExtend() : {
|
||||
IntervalFunction source;
|
||||
Token before, after;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <EXTEND>
|
||||
<LPAREN>
|
||||
source = IntervalFun()
|
||||
before = <NUMBER>
|
||||
after = <NUMBER>
|
||||
<RPAREN>
|
||||
{
|
||||
return new Extend(source, parseInt(before), parseInt(after));
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalNotWithin() : {
|
||||
IntervalFunction minuend, subtrahend;
|
||||
Token positions;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <NOT_WITHIN>
|
||||
<LPAREN>
|
||||
minuend = IntervalFun()
|
||||
positions = <NUMBER>
|
||||
subtrahend = IntervalFun()
|
||||
<RPAREN>
|
||||
{
|
||||
return new NotWithin(minuend, parseInt(positions), subtrahend);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalOverlapping() : {
|
||||
IntervalFunction source, reference;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <OVERLAPPING> <LPAREN> source = IntervalFun() reference = IntervalFun() <RPAREN>
|
||||
{
|
||||
return new Overlapping(source, reference);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalNonOverlapping() : {
|
||||
IntervalFunction minuend, subtrahend;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <NON_OVERLAPPING> <LPAREN> minuend = IntervalFun() subtrahend = IntervalFun() <RPAREN>
|
||||
{
|
||||
return new NonOverlapping(minuend, subtrahend);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalWildcard() : {
|
||||
String wildcard;
|
||||
}
|
||||
{
|
||||
<FN_PREFIX> <WILDCARD>
|
||||
<LPAREN>
|
||||
(
|
||||
(<TERM> | <NUMBER>) { wildcard = token.image; }
|
||||
| <QUOTED> { wildcard = token.image.substring(1, token.image.length() - 1); }
|
||||
)
|
||||
<RPAREN>
|
||||
{
|
||||
return new Wildcard(wildcard);
|
||||
}
|
||||
}
|
||||
|
||||
private IntervalFunction IntervalText() : {
|
||||
}
|
||||
{
|
||||
(<QUOTED>) { return new AnalyzedText(token.image.substring(1, token.image.length() - 1)); }
|
||||
| (<TERM> | <NUMBER>) { return new AnalyzedText(token.image); }
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -313,7 +672,7 @@ private QueryNode Boost(QueryNode node) : {
|
|||
{
|
||||
<CARAT> boost = <NUMBER>
|
||||
{
|
||||
return node == null ? node : new BoostQueryNode(node, Float.parseFloat(boost.image));
|
||||
return node == null ? node : new BoostQueryNode(node, parseFloat(boost));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -332,7 +691,7 @@ private QueryNode FuzzyOp(CharSequence field, Token term, QueryNode node) : {
|
|||
{
|
||||
float fms = org.apache.lucene.search.FuzzyQuery.defaultMaxEdits;
|
||||
if (similarity != null) {
|
||||
fms = Float.parseFloat(similarity.image);
|
||||
fms = parseFloat(similarity);
|
||||
if (fms < 0.0f) {
|
||||
throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));
|
||||
} else if (fms >= 1.0f && fms != (int) fms) {
|
||||
|
@ -411,7 +770,10 @@ private QueryNode Term(CharSequence field) : {
|
|||
{
|
||||
(
|
||||
term = <REGEXPTERM>
|
||||
{ q = new RegexpQueryNode(field, term.image.substring(1, term.image.length() - 1)); }
|
||||
{
|
||||
String v = term.image.substring(1, term.image.length() - 1);
|
||||
q = new RegexpQueryNode(field, v, 0, v.length());
|
||||
}
|
||||
| (term = <TERM> | term = <NUMBER>)
|
||||
{ q = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, term.endColumn); }
|
||||
( q = FuzzyOp(field, term, q) )?
|
||||
|
@ -442,7 +804,7 @@ private QueryNode QuotedTerm(CharSequence field) : {
|
|||
String image = term.image.substring(1, term.image.length() - 1);
|
||||
q = new QuotedFieldQueryNode(field, discardEscapeChar(image), term.beginColumn + 1, term.endColumn - 1);
|
||||
}
|
||||
( <TILDE> slop = <NUMBER> { q = new SlopQueryNode(q, (int) Float.parseFloat(slop.image)); } )?
|
||||
( <TILDE> slop = <NUMBER> { q = new SlopQueryNode(q, parseInt(slop)); } )?
|
||||
{
|
||||
return q;
|
||||
}
|
||||
|
|
|
@ -29,11 +29,11 @@ public interface StandardSyntaxParserConstants {
|
|||
/** RegularExpression Id. */
|
||||
int NOT = 10;
|
||||
/** RegularExpression Id. */
|
||||
int PLUS = 11;
|
||||
int FN_PREFIX = 11;
|
||||
/** RegularExpression Id. */
|
||||
int MINUS = 12;
|
||||
int PLUS = 12;
|
||||
/** RegularExpression Id. */
|
||||
int LPAREN = 13;
|
||||
int MINUS = 13;
|
||||
/** RegularExpression Id. */
|
||||
int RPAREN = 14;
|
||||
/** RegularExpression Id. */
|
||||
|
@ -65,20 +65,64 @@ public interface StandardSyntaxParserConstants {
|
|||
/** RegularExpression Id. */
|
||||
int RANGEEX_START = 28;
|
||||
/** RegularExpression Id. */
|
||||
int RANGE_TO = 29;
|
||||
int LPAREN = 29;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_END = 30;
|
||||
int ATLEAST = 30;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_END = 31;
|
||||
int AFTER = 31;
|
||||
/** RegularExpression Id. */
|
||||
int RANGE_QUOTED = 32;
|
||||
int BEFORE = 32;
|
||||
/** RegularExpression Id. */
|
||||
int RANGE_GOOP = 33;
|
||||
int CONTAINED_BY = 33;
|
||||
/** RegularExpression Id. */
|
||||
int CONTAINING = 34;
|
||||
/** RegularExpression Id. */
|
||||
int EXTEND = 35;
|
||||
/** RegularExpression Id. */
|
||||
int FN_OR = 36;
|
||||
/** RegularExpression Id. */
|
||||
int MAXGAPS = 37;
|
||||
/** RegularExpression Id. */
|
||||
int MAXWIDTH = 38;
|
||||
/** RegularExpression Id. */
|
||||
int NON_OVERLAPPING = 39;
|
||||
/** RegularExpression Id. */
|
||||
int NOT_CONTAINED_BY = 40;
|
||||
/** RegularExpression Id. */
|
||||
int NOT_CONTAINING = 41;
|
||||
/** RegularExpression Id. */
|
||||
int NOT_WITHIN = 42;
|
||||
/** RegularExpression Id. */
|
||||
int ORDERED = 43;
|
||||
/** RegularExpression Id. */
|
||||
int OVERLAPPING = 44;
|
||||
/** RegularExpression Id. */
|
||||
int PHRASE = 45;
|
||||
/** RegularExpression Id. */
|
||||
int UNORDERED = 46;
|
||||
/** RegularExpression Id. */
|
||||
int UNORDERED_NO_OVERLAPS = 47;
|
||||
/** RegularExpression Id. */
|
||||
int WILDCARD = 48;
|
||||
/** RegularExpression Id. */
|
||||
int WITHIN = 49;
|
||||
/** RegularExpression Id. */
|
||||
int RANGE_TO = 50;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_END = 51;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_END = 52;
|
||||
/** RegularExpression Id. */
|
||||
int RANGE_QUOTED = 53;
|
||||
/** RegularExpression Id. */
|
||||
int RANGE_GOOP = 54;
|
||||
|
||||
/** Lexical state. */
|
||||
int Range = 0;
|
||||
int Function = 0;
|
||||
/** Lexical state. */
|
||||
int DEFAULT = 1;
|
||||
int Range = 1;
|
||||
/** Lexical state. */
|
||||
int DEFAULT = 2;
|
||||
|
||||
/** Literal token values. */
|
||||
String[] tokenImage = {
|
||||
|
@ -93,9 +137,9 @@ public interface StandardSyntaxParserConstants {
|
|||
"<AND>",
|
||||
"<OR>",
|
||||
"<NOT>",
|
||||
"\"fn:\"",
|
||||
"\"+\"",
|
||||
"\"-\"",
|
||||
"\"(\"",
|
||||
"\")\"",
|
||||
"\":\"",
|
||||
"\"=\"",
|
||||
|
@ -111,11 +155,33 @@ public interface StandardSyntaxParserConstants {
|
|||
"<REGEXPTERM>",
|
||||
"\"[\"",
|
||||
"\"{\"",
|
||||
"\"(\"",
|
||||
"<ATLEAST>",
|
||||
"\"after\"",
|
||||
"\"before\"",
|
||||
"<CONTAINED_BY>",
|
||||
"\"containing\"",
|
||||
"\"extend\"",
|
||||
"\"or\"",
|
||||
"<MAXGAPS>",
|
||||
"<MAXWIDTH>",
|
||||
"<NON_OVERLAPPING>",
|
||||
"<NOT_CONTAINED_BY>",
|
||||
"<NOT_CONTAINING>",
|
||||
"<NOT_WITHIN>",
|
||||
"\"ordered\"",
|
||||
"\"overlapping\"",
|
||||
"\"phrase\"",
|
||||
"\"unordered\"",
|
||||
"<UNORDERED_NO_OVERLAPS>",
|
||||
"\"wildcard\"",
|
||||
"\"within\"",
|
||||
"\"TO\"",
|
||||
"\"]\"",
|
||||
"\"}\"",
|
||||
"<RANGE_QUOTED>",
|
||||
"<RANGE_GOOP>",
|
||||
"\"@\"",
|
||||
};
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard.processors;
|
||||
|
||||
import java.util.List;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
|
||||
import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler;
|
||||
import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl;
|
||||
import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
|
||||
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.IntervalQueryNode;
|
||||
|
||||
/**
|
||||
* This processor makes sure that {@link ConfigurationKeys#ANALYZER} is defined in the {@link
|
||||
* QueryConfigHandler} and injects this analyzer into {@link
|
||||
* org.apache.lucene.queryparser.flexible.standard.nodes.IntervalQueryNode}s.
|
||||
*
|
||||
* @see ConfigurationKeys#ANALYZER
|
||||
*/
|
||||
public class IntervalQueryNodeProcessor extends QueryNodeProcessorImpl {
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public QueryNode process(QueryNode queryTree) throws QueryNodeException {
|
||||
this.analyzer = getQueryConfigHandler().get(ConfigurationKeys.ANALYZER);
|
||||
return super.process(queryTree);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException {
|
||||
if (node instanceof IntervalQueryNode) {
|
||||
var intervalQueryNode = (IntervalQueryNode) node;
|
||||
if (this.analyzer == null) {
|
||||
throw new QueryNodeException(
|
||||
new MessageImpl(QueryParserMessages.ANALYZER_REQUIRED, intervalQueryNode.toString()));
|
||||
}
|
||||
intervalQueryNode.setAnalyzer(this.analyzer);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
|
||||
return node;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<QueryNode> setChildrenOrder(List<QueryNode> children) throws QueryNodeException {
|
||||
return children;
|
||||
}
|
||||
}
|
|
@ -66,5 +66,6 @@ public class StandardQueryNodeProcessorPipeline extends QueryNodeProcessorPipeli
|
|||
add(new DefaultPhraseSlopQueryNodeProcessor());
|
||||
add(new BoostQueryNodeProcessor());
|
||||
add(new MultiTermRewriteMethodProcessor());
|
||||
add(new IntervalQueryNodeProcessor());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,145 +21,23 @@
|
|||
</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Apache Lucene QueryParsers.</h1>
|
||||
|
||||
<p>
|
||||
This module provides a number of query parsers:
|
||||
<ul>
|
||||
<li><a href="#classic">Classic</a>
|
||||
<li><a href="#analyzing">Analyzing</a>
|
||||
<li><a href="#complexphrase">Complex Phrase</a>
|
||||
<li><a href="#extendable">Extendable</a>
|
||||
<li><a href="#flexible">Flexible</a>
|
||||
<li><a href="#surround">Surround</a>
|
||||
<li><a href="#xml">XML</a>
|
||||
</ul>
|
||||
<hr>
|
||||
<h2><a id="classic">Classic</a></h2>
|
||||
A Simple Lucene QueryParser implemented with JavaCC.
|
||||
<h2><a id="analyzing">Analyzing</a></h2>
|
||||
QueryParser that passes Fuzzy-, Prefix-, Range-, and WildcardQuerys through the given analyzer.
|
||||
<h2><a id="complexphrase">Complex Phrase</a></h2>
|
||||
QueryParser which permits complex phrase query syntax eg "(john jon jonathan~) peters*"
|
||||
<h2><a id="extendable">Extendable</a></h2>
|
||||
Extendable QueryParser provides a simple and flexible extension mechanism by overloading query field names.
|
||||
<h2><a id="flexible">Flexible</a></h2>
|
||||
<p>
|
||||
This project contains the new Lucene query parser implementation, which matches the syntax of the core QueryParser but offers a more modular architecture to enable customization.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
It's currently divided in 2 main packages:
|
||||
<ul>
|
||||
<li>{@link org.apache.lucene.queryparser.flexible.core}: it contains the query parser API classes, which should be extended by query parser implementations. </li>
|
||||
<li>{@link org.apache.lucene.queryparser.flexible.standard}: it contains the current Lucene query parser implementation using the new query parser API.</li>
|
||||
<li>{@linkplain org.apache.lucene.queryparser.flexible flexible query parser}
|
||||
<li>{@linkplain org.apache.lucene.queryparser.classic classic query parser}
|
||||
<li>{@linkplain org.apache.lucene.queryparser.complexPhrase complex phrase query parser}
|
||||
<li>{@linkplain org.apache.lucene.queryparser.ext extendable query parser}
|
||||
<li>{@linkplain org.apache.lucene.queryparser.surround surround query parser (span queries)}
|
||||
<li>{@linkplain org.apache.lucene.queryparser.xml query parser building Query objects from XML}
|
||||
</ul>
|
||||
|
||||
<h3>Features</h3>
|
||||
|
||||
<ol>
|
||||
<li>Full support for boolean logic (not enabled)</li>
|
||||
<li>QueryNode Trees - support for several syntaxes,
|
||||
that can be converted into similar syntax QueryNode trees.</li>
|
||||
<li>QueryNode Processors - Optimize, validate, rewrite the
|
||||
QueryNode trees</li>
|
||||
<li>Processors Pipelines - Select your favorite Processor
|
||||
and build a processor pipeline, to implement the features you need</li>
|
||||
<li>Config Interfaces - Allow the consumer of the Query Parser to implement
|
||||
a diff Config Handler Objects to suite their needs.</li>
|
||||
<li>Standard Builders - convert QueryNode's into several lucene
|
||||
representations. Supported conversion is using a 2.4 compatible logic</li>
|
||||
<li>QueryNode tree's can be converted to a lucene 2.4 syntax string, using toQueryString</li>
|
||||
</ol>
|
||||
|
||||
<h3>Design</h3>
|
||||
<p>
|
||||
This new query parser was designed to have very generic
|
||||
architecture, so that it can be easily used for different
|
||||
products with varying query syntaxes. This code is much more
|
||||
flexible and extensible than the Lucene query parser in 2.4.X.
|
||||
</p>
|
||||
<p>
|
||||
The new query parser goal is to separate syntax and semantics of a query. E.g. 'a AND
|
||||
b', '+a +b', 'AND(a,b)' could be different syntaxes for the same query.
|
||||
It distinguishes the semantics of the different query components, e.g.
|
||||
whether and how to tokenize/lemmatize/normalize the different terms or
|
||||
which Query objects to create for the terms. It allows to
|
||||
write a parser with a new syntax, while reusing the underlying
|
||||
semantics, as quickly as possible.
|
||||
</p>
|
||||
<p>
|
||||
The query parser has three layers and its core is what we call the
|
||||
QueryNode tree. It is a tree that initially represents the syntax of the
|
||||
original query, e.g. for 'a AND b':
|
||||
</p>
|
||||
<pre>
|
||||
AND
|
||||
/ \
|
||||
A B
|
||||
</pre>
|
||||
<p>
|
||||
The three layers are:
|
||||
</p>
|
||||
<dl>
|
||||
<dt>QueryParser</dt>
|
||||
<dd>
|
||||
This layer is the text parsing layer which simply transforms the
|
||||
query text string into a {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode} tree. Every text parser
|
||||
must implement the interface {@link org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser}.
|
||||
Lucene default implementations implements it using JavaCC.
|
||||
</dd>
|
||||
If you're new to query parsers, the {@linkplain org.apache.lucene.queryparser.flexible flexible query parser}'s
|
||||
{@link org.apache.lucene.queryparser.flexible.standard.StandardQueryParser} is probably a good place to start.
|
||||
|
||||
<dt>QueryNodeProcessor</dt>
|
||||
<dd>The query node processors do most of the work. It is in fact a
|
||||
configurable chain of processors. Each processors can walk the tree and
|
||||
modify nodes or even the tree's structure. That makes it possible to
|
||||
e.g. do query optimization before the query is executed or to tokenize
|
||||
terms.
|
||||
</dd>
|
||||
|
||||
<dt>QueryBuilder</dt>
|
||||
<dd>
|
||||
The third layer is a configurable map of builders, which map {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode} types to its specific
|
||||
builder that will transform the QueryNode into Lucene Query object.
|
||||
</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
<p>
|
||||
Furthermore, the query parser uses flexible configuration objects. It also uses message classes that
|
||||
allow to attach resource bundles. This makes it possible to translate
|
||||
messages, which is an important feature of a query parser.
|
||||
</p>
|
||||
<p>
|
||||
This design allows to develop different query syntaxes very quickly.
|
||||
</p>
|
||||
|
||||
<h3>StandardQueryParser and QueryParserWrapper</h3>
|
||||
|
||||
<p>
|
||||
The classic Lucene query parser is located under
|
||||
{@link org.apache.lucene.queryparser.classic}.
|
||||
<p>
|
||||
To make it simpler to use the new query parser
|
||||
the class {@link org.apache.lucene.queryparser.flexible.standard.StandardQueryParser} may be helpful,
|
||||
specially for people that do not want to extend the Query Parser.
|
||||
It uses the default Lucene query processors, text parser and builders, so
|
||||
you don't need to worry about dealing with those.
|
||||
|
||||
{@link org.apache.lucene.queryparser.flexible.standard.StandardQueryParser} usage:
|
||||
|
||||
<pre class="prettyprint">
|
||||
StandardQueryParser qpHelper = new StandardQueryParser();
|
||||
StandardQueryConfigHandler config = qpHelper.getQueryConfigHandler();
|
||||
config.setAllowLeadingWildcard(true);
|
||||
config.setAnalyzer(new WhitespaceAnalyzer());
|
||||
Query query = qpHelper.parse("apache AND lucene", "defaultField");
|
||||
</pre>
|
||||
<h2><a id="surround">Surround</a></h2>
|
||||
<p>
|
||||
A QueryParser that supports the Span family of queries as well as pre and infix notation.
|
||||
</p>
|
||||
<h2><a id="xml">XML</a></h2>
|
||||
A QueryParser that produces Lucene Query objects from XML streams.
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -58,3 +58,6 @@ UNSUPPORTED_NUMERIC_DATA_TYPE = Unsupported NumericField.DataType: {0}
|
|||
|
||||
#<CREATEDBY>Apache Lucene Community</CREATEDBY>
|
||||
NUMERIC_CANNOT_BE_EMPTY = Field "{0}" is numeric and cannot have an empty value.
|
||||
|
||||
#<CREATEDBY>Apache Lucene Community</CREATEDBY>
|
||||
ANALYZER_REQUIRED = An analyzer is required to parse interval sub-query "{0}"
|
|
@ -0,0 +1,206 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.queryparser.flexible.standard;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.RandomizedTest;
|
||||
import java.io.StringReader;
|
||||
import java.util.Locale;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.queryparser.charstream.FastCharStream;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.IntervalQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.junit.Test;
|
||||
|
||||
/** Test interval sub-query support in {@link StandardQueryParser}. */
|
||||
public class TestStandardQPEnhancements extends LuceneTestCase {
|
||||
protected static final String FLD_DEFAULT = "defaultField";
|
||||
protected static final String FLD_WHITESPACE = "whitespaceField";
|
||||
|
||||
final StandardQueryParser getQueryParser() {
|
||||
var analyzer =
|
||||
new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new MockTokenizer(MockTokenizer.WHITESPACE, true));
|
||||
}
|
||||
};
|
||||
|
||||
var qp = new StandardQueryParser(analyzer);
|
||||
qp.setDefaultOperator(StandardQueryConfigHandler.Operator.AND);
|
||||
qp.setMultiFields(new String[] {});
|
||||
return qp;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMinShouldMatchOperator() throws Exception {
|
||||
Query parsed =
|
||||
parsedQuery(
|
||||
String.format(
|
||||
Locale.ROOT,
|
||||
"(%s:foo OR %s:bar OR %s:baz)@2",
|
||||
FLD_WHITESPACE,
|
||||
FLD_WHITESPACE,
|
||||
FLD_WHITESPACE));
|
||||
|
||||
MatcherAssert.assertThat(
|
||||
((BooleanQuery) parsed).getMinimumNumberShouldMatch(), Matchers.equalTo(2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAtLeast() throws Exception {
|
||||
checkIntervalQueryNode("fn:atleast(3 FOO BAR baz)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxWidth() throws Exception {
|
||||
checkIntervalQueryNode("fn:maxwidth(3 fn:atleast(2 foo bar baz))");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuotedTerm() throws Exception {
|
||||
checkIntervalQueryNode("fn:atleast(2 \"foo\" \"BAR baz\")");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxGaps() throws Exception {
|
||||
checkIntervalQueryNode("fn:maxgaps(2 fn:unordered(foo BAR baz))");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOrdered() throws Exception {
|
||||
checkIntervalQueryNode("fn:ordered(foo BAR baz)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnordered() throws Exception {
|
||||
checkIntervalQueryNode("fn:unordered(foo BAR baz)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOr() throws Exception {
|
||||
checkIntervalQueryNode("fn:or(foo baz)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWildcard() throws Exception {
|
||||
checkIntervalQueryNode("fn:wildcard(foo*)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPhrase() throws Exception {
|
||||
checkIntervalQueryNode("fn:phrase(abc def fn:or(baz boo))");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBefore() throws Exception {
|
||||
checkIntervalQueryNode("fn:before(abc fn:ordered(foo bar))");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAfter() throws Exception {
|
||||
checkIntervalQueryNode("fn:after(abc fn:ordered(foo bar))");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testContaining() throws Exception {
|
||||
checkIntervalQueryNode("fn:containing(big small)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testContainedBy() throws Exception {
|
||||
checkIntervalQueryNode("fn:containedBy(small big)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNotContaining() throws Exception {
|
||||
checkIntervalQueryNode("fn:notContaining(minuend subtrahend)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNotContainedBy() throws Exception {
|
||||
checkIntervalQueryNode("fn:notContainedBy(small big)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithin() throws Exception {
|
||||
checkIntervalQueryNode("fn:within(small 2 fn:ordered(big foo))");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNotWithin() throws Exception {
|
||||
checkIntervalQueryNode("fn:notWithin(small 2 fn:ordered(big foo))");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOverlapping() throws Exception {
|
||||
checkIntervalQueryNode("fn:overlapping(fn:ordered(big foo) small)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonOverlapping() throws Exception {
|
||||
checkIntervalQueryNode("fn:nonOverlapping(fn:ordered(big foo) small)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnorderedNoOverlaps() throws Exception {
|
||||
checkIntervalQueryNode("fn:unorderedNoOverlaps(fn:ordered(big foo) small)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtend() throws Exception {
|
||||
checkIntervalQueryNode("fn:extend(fn:ordered(big foo) 2 5)");
|
||||
}
|
||||
|
||||
protected void checkIntervalQueryNode(String query) throws Exception {
|
||||
// Check raw parser first.
|
||||
var syntaxParser = new StandardSyntaxParser(new FastCharStream(new StringReader(query)));
|
||||
QueryNode queryNode = syntaxParser.TopLevelQuery(FLD_DEFAULT);
|
||||
MatcherAssert.assertThat(queryNode, Matchers.instanceOf(IntervalQueryNode.class));
|
||||
|
||||
var queryParser = getQueryParser();
|
||||
Query parsedQuery;
|
||||
if (RandomizedTest.randomBoolean()) {
|
||||
queryParser.setMultiFields(new String[] {FLD_DEFAULT});
|
||||
parsedQuery = queryParser.parse(query, null);
|
||||
} else {
|
||||
parsedQuery = queryParser.parse(query, FLD_DEFAULT);
|
||||
}
|
||||
MatcherAssert.assertThat(parsedQuery, Matchers.notNullValue());
|
||||
|
||||
// Emit toString() for visual diagnostics.
|
||||
IntervalQueryNode intervalQueryNode = (IntervalQueryNode) queryNode;
|
||||
intervalQueryNode.setAnalyzer(queryParser.getAnalyzer());
|
||||
System.out.printf(
|
||||
Locale.ROOT, "query: %s%n node: %s%n query: %s%n", query, queryNode, parsedQuery);
|
||||
}
|
||||
|
||||
protected String parsed(String query) throws Exception {
|
||||
return parsedQuery(query).toString("<no-default>");
|
||||
}
|
||||
|
||||
protected Query parsedQuery(String query) throws Exception {
|
||||
return getQueryParser().parse(query, /* no default field. */ null);
|
||||
}
|
||||
}
|
|
@ -2854,6 +2854,12 @@ public abstract class LuceneTestCase extends Assert {
|
|||
void run() throws Throwable;
|
||||
}
|
||||
|
||||
/** A {@link java.util.function.Consumer} that can throw any checked exception. */
|
||||
@FunctionalInterface
|
||||
public interface ThrowingConsumer<T> {
|
||||
void accept(T t) throws Exception;
|
||||
}
|
||||
|
||||
/** Checks a specific exception class is thrown by the given runnable, and returns it. */
|
||||
public static <T extends Throwable> T expectThrows(
|
||||
Class<T> expectedType, ThrowingRunnable runnable) {
|
||||
|
|
Loading…
Reference in New Issue