diff --git a/contrib/CHANGES.txt b/contrib/CHANGES.txt index 403a66a81b6..7d305298db5 100644 --- a/contrib/CHANGES.txt +++ b/contrib/CHANGES.txt @@ -16,6 +16,11 @@ API Changes reader. (Eirik Bjørsnøs via Mike McCandless) New features + + * LUCENE-2039: Add a extensible query parser to contrib/misc. + ExtendableQueryParser enables arbitrary parser extensions based on a + customizable field naming scheme. + (Simon Willnauer) * LUCENE-2108: Spellchecker now safely supports concurrent modifications to the spell-index. Threads can safely obtain term suggestions while the spell- diff --git a/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java b/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java new file mode 100644 index 00000000000..680e35d2271 --- /dev/null +++ b/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java @@ -0,0 +1,142 @@ +package org.apache.lucene.queryParser.ext; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.queryParser.ext.Extensions.Pair; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.Version; + +/** + * The {@link ExtendableQueryParser} enables arbitrary query parser extension + * based on a customizable field naming scheme. The lucene query syntax allows + * implicit and explicit field definitions as query prefix followed by a colon + * (':') character. The {@link ExtendableQueryParser} allows to encode extension + * keys into the field symbol associated with a registered instance of + * {@link ParserExtension}. A customizable separation character separates the + * extension key from the actual field symbol. The {@link ExtendableQueryParser} + * splits (@see {@link Extensions#splitExtensionField(String, String)}) the + * extension key from the field symbol and tries to resolve the associated + * {@link ParserExtension}. If the parser can't resolve the key or the field + * token does not contain a separation character, {@link ExtendableQueryParser} + * yields the same behavior as its super class {@link QueryParser}. Otherwise, + * if the key is associated with a {@link ParserExtension} instance, the parser + * builds an instance of {@link ExtensionQuery} to be processed by + * {@link ParserExtension#parse(ExtensionQuery)}.If a extension field does not + * contain a field part the default field for the query will be used. + *
+ * To guarantee that an extension field is processed with its associated + * extension, the extension query part must escape any special characters like + * '*' or '['. If the extension query contains any whitespace characters, the + * extension query part must be enclosed in quotes. + * Example ('_' used as separation character): + *
+ * title_customExt:"Apache Lucene\?" OR content_customExt:prefix\* + *+ * + * Search on the default field: + *
+ * _customExt:"Apache Lucene\?" OR _customExt:prefix\* + *+ * + *
+ * The {@link ExtendableQueryParser} itself does not implement the logic how + * field and extension key are separated or ordered. All logic regarding the + * extension key and field symbol parsing is located in {@link Extensions}. + * Customized extension schemes should be implemented by sub-classing + * {@link Extensions}. + *
+ *+ * For details about the default encoding scheme see {@link Extensions}. + *
+ * + * @see Extensions + * @see ParserExtension + * @see ExtensionQuery + */ +public class ExtendableQueryParser extends QueryParser { + + private final String defaultField; + private final Extensions extensions; + + /** + * Default empty extensions instance + */ + private static final Extensions DEFAULT_EXTENSION = new Extensions(); + + /** + * Creates a new {@link ExtendableQueryParser} instance + * + * @param matchVersion + * the lucene version to use. + * @param f + * the default query field + * @param a + * the analyzer used to find terms in a query string + */ + public ExtendableQueryParser(final Version matchVersion, final String f, + final Analyzer a) { + this(matchVersion, f, a, DEFAULT_EXTENSION); + + } + + /** + * Creates a new {@link ExtendableQueryParser} instance + * + * @param matchVersion + * the lucene version to use. + * @param f + * the default query field + * @param a + * the analyzer used to find terms in a query string + * @param ext + * the query parser extensions + */ + public ExtendableQueryParser(final Version matchVersion, final String f, + final Analyzer a, final Extensions ext) { + super(matchVersion, f, a); + this.defaultField = f; + this.extensions = ext; + } + + /** + * Returns the extension field delimiter character. + * + * @return the extension field delimiter character. + */ + public char getExtensionFieldDelimiter() { + return extensions.getExtensionFieldDelimiter(); + } + + @Override + protected Query getFieldQuery(final String field, final String queryText) + throws ParseException { + final Pair
+ * In addition to the key to extension mapping this class also defines the field
+ * name overloading scheme. {@link ExtendableQueryParser} uses the given
+ * extension to split the actual field name and extension key by calling
+ * {@link #splitExtensionField(String, String)}. To change the order or the key
+ * / field name encoding scheme users can subclass {@link Extensions} to
+ * implement their own.
+ *
+ * @see ExtendableQueryParser
+ * @see ParserExtension
+ */
+public class Extensions {
+ private final Map
+ * Note: {@link Extensions} subclasses must maintain the contract between
+ * {@link #buildExtensionField(String)} and
+ * {@link #splitExtensionField(String, String)} where the latter inverts the
+ * former.
+ *
+ * Note: {@link Extensions} subclasses must maintain the contract between
+ * {@link #buildExtensionField(String, String)} and
+ * {@link #splitExtensionField(String, String)} where the latter inverts the
+ * former.
+ * null
if no extension can be found for the key.
+ *
+ * @param key
+ * the extension key
+ * @return the {@link ParserExtension} instance for the given key or
+ * null
if no extension can be found for the key.
+ */
+ public final ParserExtension getExtension(String key) {
+ return this.extensions.get(key);
+ }
+
+ /**
+ * Returns the extension field delimiter
+ *
+ * @return the extension field delimiter
+ */
+ public char getExtensionFieldDelimiter() {
+ return extensionFieldDelimiter;
+ }
+
+ /**
+ * Splits a extension field and returns the field / extension part as a
+ * {@link Pair}. This method tries to split on the first occurrence of the
+ * extension field delimiter, if the delimiter is not present in the string
+ * the result will contain a null
value for the extension key and
+ * the given field string as the field value. If the given extension field
+ * string contains no field identifier the result pair will carry the given
+ * default field as the field value.
+ *
+ * @param defaultField
+ * the default query field
+ * @param field
+ * the extension field string
+ * @return a {@link Pair} with the field name as the {@link Pair#cur} and the
+ * extension key as the {@link Pair#cud}
+ */
+ public Pairnull
.
+ *
+ * @param query
+ * the extension query
+ * @return a new query instance
+ * @throws ParseException
+ * if the query can not be parsed.
+ */
+ public abstract Query parse(final ExtensionQuery query) throws ParseException;
+
+}
diff --git a/contrib/misc/src/java/org/apache/lucene/queryParser/ext/package.html b/contrib/misc/src/java/org/apache/lucene/queryParser/ext/package.html
new file mode 100644
index 00000000000..13549a8e59b
--- /dev/null
+++ b/contrib/misc/src/java/org/apache/lucene/queryParser/ext/package.html
@@ -0,0 +1,22 @@
+
+
+
+
+Extendable QueryParser provides a simple and flexible extension mechanism by overloading query field names.
+
+
diff --git a/contrib/misc/src/test/org/apache/lucene/queryParser/ext/ExtensionStub.java b/contrib/misc/src/test/org/apache/lucene/queryParser/ext/ExtensionStub.java
new file mode 100644
index 00000000000..63ce2b369f8
--- /dev/null
+++ b/contrib/misc/src/test/org/apache/lucene/queryParser/ext/ExtensionStub.java
@@ -0,0 +1,33 @@
+package org.apache.lucene.queryParser.ext;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class ExtensionStub extends ParserExtension {
+
+ @Override
+ public Query parse(ExtensionQuery components) throws ParseException {
+ return new TermQuery(new Term(components.getField(), components
+ .getRawQueryString()));
+ }
+
+}
\ No newline at end of file
diff --git a/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtendableQueryParser.java b/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtendableQueryParser.java
new file mode 100644
index 00000000000..863cfe2dece
--- /dev/null
+++ b/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtendableQueryParser.java
@@ -0,0 +1,137 @@
+package org.apache.lucene.queryParser.ext;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.TestQueryParser;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.Version;
+
+/**
+ * Testcase for the class {@link ExtendableQueryParser}
+ */
+public class TestExtendableQueryParser extends TestQueryParser {
+ private static char[] DELIMITERS = new char[] {
+ Extensions.DEFAULT_EXTENSION_FIELD_DELIMITER, '-', '|' };
+
+ public TestExtendableQueryParser(String name) {
+ super(name);
+ }
+
+ @Override
+ public QueryParser getParser(Analyzer a) throws Exception {
+ return getParser(a, null);
+ }
+
+ public QueryParser getParser(Analyzer a, Extensions extensions)
+ throws Exception {
+ if (a == null)
+ a = new SimpleAnalyzer();
+ QueryParser qp = extensions == null ? new ExtendableQueryParser(
+ Version.LUCENE_CURRENT, "field", a) : new ExtendableQueryParser(
+ Version.LUCENE_CURRENT, "field", a, extensions);
+ qp.setDefaultOperator(QueryParser.OR_OPERATOR);
+ return qp;
+ }
+
+ public void testUnescapedExtDelimiter() throws Exception {
+ Extensions ext = newExtensions(':');
+ ext.add("testExt", new ExtensionStub());
+ ExtendableQueryParser parser = (ExtendableQueryParser) getParser(null, ext);
+ try {
+ parser.parse("aField:testExt:\"foo \\& bar\"");
+ fail("extension field delimiter is not escaped");
+ } catch (ParseException e) {
+ }
+ }
+
+ public void testExtFieldUnqoted() throws Exception {
+ for (int i = 0; i < DELIMITERS.length; i++) {
+ Extensions ext = newExtensions(DELIMITERS[i]);
+ ext.add("testExt", new ExtensionStub());
+ ExtendableQueryParser parser = (ExtendableQueryParser) getParser(null,
+ ext);
+ String field = ext.buildExtensionField("testExt", "aField");
+ Query query = parser.parse(String.format("%s:foo bar", field));
+ assertTrue("expected instance of BooleanQuery but was "
+ + query.getClass(), query instanceof BooleanQuery);
+ BooleanQuery bquery = (BooleanQuery) query;
+ BooleanClause[] clauses = bquery.getClauses();
+ assertEquals(2, clauses.length);
+ BooleanClause booleanClause = clauses[0];
+ query = booleanClause.getQuery();
+ assertTrue("expected instance of TermQuery but was " + query.getClass(),
+ query instanceof TermQuery);
+ TermQuery tquery = (TermQuery) query;
+ assertEquals("aField", tquery.getTerm()
+ .field());
+ assertEquals("foo", tquery.getTerm().text());
+
+ booleanClause = clauses[1];
+ query = booleanClause.getQuery();
+ assertTrue("expected instance of TermQuery but was " + query.getClass(),
+ query instanceof TermQuery);
+ tquery = (TermQuery) query;
+ assertEquals("field", tquery.getTerm().field());
+ assertEquals("bar", tquery.getTerm().text());
+ }
+ }
+
+ public void testExtDefaultField() throws Exception {
+ for (int i = 0; i < DELIMITERS.length; i++) {
+ Extensions ext = newExtensions(DELIMITERS[i]);
+ ext.add("testExt", new ExtensionStub());
+ ExtendableQueryParser parser = (ExtendableQueryParser) getParser(null,
+ ext);
+ String field = ext.buildExtensionField("testExt");
+ Query parse = parser.parse(String.format("%s:\"foo \\& bar\"", field));
+ assertTrue("expected instance of TermQuery but was " + parse.getClass(),
+ parse instanceof TermQuery);
+ TermQuery tquery = (TermQuery) parse;
+ assertEquals("field", tquery.getTerm().field());
+ assertEquals("foo & bar", tquery.getTerm().text());
+ }
+ }
+
+ public Extensions newExtensions(char delimiter) {
+ return new Extensions(delimiter);
+ }
+
+ public void testExtField() throws Exception {
+ for (int i = 0; i < DELIMITERS.length; i++) {
+ Extensions ext = newExtensions(DELIMITERS[i]);
+ ext.add("testExt", new ExtensionStub());
+ ExtendableQueryParser parser = (ExtendableQueryParser) getParser(null,
+ ext);
+ String field = ext.buildExtensionField("testExt", "afield");
+ Query parse = parser.parse(String.format("%s:\"foo \\& bar\"", field));
+ assertTrue("expected instance of TermQuery but was " + parse.getClass(),
+ parse instanceof TermQuery);
+ TermQuery tquery = (TermQuery) parse;
+ assertEquals("afield", tquery.getTerm().field());
+ assertEquals("foo & bar", tquery.getTerm().text());
+ }
+ }
+
+}
diff --git a/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtensions.java b/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtensions.java
new file mode 100644
index 00000000000..bcfee1b5e53
--- /dev/null
+++ b/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtensions.java
@@ -0,0 +1,78 @@
+package org.apache.lucene.queryParser.ext;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ * Testcase for the {@link Extensions} class
+ */
+public class TestExtensions extends LuceneTestCase {
+
+ private Extensions ext;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ this.ext = new Extensions();
+ }
+
+ public void testBuildExtensionField() {
+ assertEquals("field\\:key", ext.buildExtensionField("key", "field"));
+ assertEquals("\\:key", ext.buildExtensionField("key"));
+
+ ext = new Extensions('.');
+ assertEquals("field.key", ext.buildExtensionField("key", "field"));
+ assertEquals(".key", ext.buildExtensionField("key"));
+ }
+
+ public void testSplitExtensionField() {
+ assertEquals("field\\:key", ext.buildExtensionField("key", "field"));
+ assertEquals("\\:key", ext.buildExtensionField("key"));
+
+ ext = new Extensions('.');
+ assertEquals("field.key", ext.buildExtensionField("key", "field"));
+ assertEquals(".key", ext.buildExtensionField("key"));
+ }
+
+ public void testAddGetExtension() {
+ ParserExtension extension = new ExtensionStub();
+ assertNull(ext.getExtension("foo"));
+ ext.add("foo", extension);
+ assertSame(extension, ext.getExtension("foo"));
+ ext.add("foo", null);
+ assertNull(ext.getExtension("foo"));
+ }
+
+ public void testGetExtDelimiter() {
+ assertEquals(Extensions.DEFAULT_EXTENSION_FIELD_DELIMITER, this.ext
+ .getExtensionFieldDelimiter());
+ ext = new Extensions('?');
+ assertEquals('?', this.ext.getExtensionFieldDelimiter());
+ }
+
+ public void testEscapeExtension() {
+ assertEquals("abc\\:\\?\\{\\}\\[\\]\\\\\\(\\)\\+\\-\\!\\~", ext
+ .escapeExtensionField("abc:?{}[]\\()+-!~"));
+ try {
+ ext.escapeExtensionField(null);
+ fail("should throw NPE - escape string is null");
+ } catch (NullPointerException e) {
+ //
+ }
+ }
+}