mirror of https://github.com/apache/lucene.git
- Added set/getLowercaseWildcardTerms methods and a few ger*Query methods
that make it easier to extend QueryParser. Contributed by: Tatu Saloranta git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149954 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a92eb6d93c
commit
05a2ea983c
|
@ -1,8 +1,8 @@
|
||||||
/* ====================================================================
|
/* ====================================================================
|
||||||
* The Apache Software License, Version 1.1
|
* The Apache Software License, Version 1.1
|
||||||
*
|
*
|
||||||
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
* Copyright (c) 2001, 2002, 2003 The Apache Software Foundation. All
|
||||||
* reserved.
|
* rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
@ -129,6 +129,11 @@ public class QueryParser {
|
||||||
Analyzer analyzer;
|
Analyzer analyzer;
|
||||||
String field;
|
String field;
|
||||||
int phraseSlop = 0;
|
int phraseSlop = 0;
|
||||||
|
/**
|
||||||
|
* Whether terms of wildcard and prefix queries are to be automatically
|
||||||
|
* lower-cased or not. Default is <code>true</code>.
|
||||||
|
*/
|
||||||
|
boolean lowercaseWildcardTerms = true;
|
||||||
|
|
||||||
/** Constructs a query parser.
|
/** Constructs a query parser.
|
||||||
* @param field the default field for query terms.
|
* @param field the default field for query terms.
|
||||||
|
@ -164,7 +169,7 @@ public class QueryParser {
|
||||||
private int operator = DEFAULT_OPERATOR_OR;
|
private int operator = DEFAULT_OPERATOR_OR;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the boolean operator of the QueryParser.
|
* Sets the boolean operator of the QueryParser.
|
||||||
* In classic mode (<code>DEFAULT_OPERATOR_OR</mode>) terms without any modifiers
|
* In classic mode (<code>DEFAULT_OPERATOR_OR</mode>) terms without any modifiers
|
||||||
* are considered optional: for example <code>capital of Hungary</code> is equal to
|
* are considered optional: for example <code>capital of Hungary</code> is equal to
|
||||||
* <code>capital OR of OR Hungary</code>.<br/>
|
* <code>capital OR of OR Hungary</code>.<br/>
|
||||||
|
@ -179,6 +184,14 @@ public class QueryParser {
|
||||||
return this.operator;
|
return this.operator;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setLowercaseWildcardTerms(boolean b) {
|
||||||
|
owercaseWildcardTerms = b;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean getLowercaseWildcardTerms() {
|
||||||
|
return lowercaseWildcardTerms;
|
||||||
|
}
|
||||||
|
|
||||||
private void addClause(Vector clauses, int conj, int mods, Query q) {
|
private void addClause(Vector clauses, int conj, int mods, Query q) {
|
||||||
boolean required, prohibited;
|
boolean required, prohibited;
|
||||||
|
|
||||||
|
@ -288,6 +301,103 @@ public class QueryParser {
|
||||||
inclusive);
|
inclusive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory method for generating query, given set of clauses.
|
||||||
|
* By default creates a boolean query composed of clauses passed in.
|
||||||
|
*
|
||||||
|
* Can be overridden by extending classes, to modify query being
|
||||||
|
* returned.
|
||||||
|
*
|
||||||
|
* @param clauses Vector that contains {@link BooleanClause} instances
|
||||||
|
* to join.
|
||||||
|
*
|
||||||
|
* @return Resulting {@link Query} object.
|
||||||
|
*/
|
||||||
|
protected Query getBooleanQuery(Vector clauses)
|
||||||
|
{
|
||||||
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
for (int i = 0; i < clauses.size(); i++) {
|
||||||
|
query.add((BooleanClause)clauses.elementAt(i));
|
||||||
|
}
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory method for generating a query. Called when parser
|
||||||
|
* parses an input term token that contains one or more wildcard
|
||||||
|
* characters (? and *), but is not a prefix term token (one
|
||||||
|
* that has just a single * character at the end)
|
||||||
|
*<p>
|
||||||
|
* Depending on settings, prefix term may be lower-cased
|
||||||
|
* automatically. It will not go through the default analyzer,
|
||||||
|
* however, since normal analyzers are unlikely to work properly
|
||||||
|
* with wildcard templates.
|
||||||
|
*<p>
|
||||||
|
* Can be overridden by extending classes, to provide custom handling for
|
||||||
|
* wild card queries (which may be necessary due to missing analyzer calls)
|
||||||
|
*
|
||||||
|
* @param field Name of the field query will use.
|
||||||
|
* @param termStr Term token that contains one or more wild card
|
||||||
|
* characters (? or *), but is not simple prefix term
|
||||||
|
*
|
||||||
|
* @return Resulting query build for the term
|
||||||
|
*/
|
||||||
|
protected Query getWildcardQuery(String field, String termStr)
|
||||||
|
{
|
||||||
|
if (lowercaseWildcardTerms) {
|
||||||
|
termStr = termStr.toLowerCase();
|
||||||
|
}
|
||||||
|
Term t = new Term(field, termStr);
|
||||||
|
return new WildcardQuery(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory method for generating a query (similar to
|
||||||
|
* (@link getWildcardQuery}). Called when parser parses an input term
|
||||||
|
* token that uses prefix notation; that is, contains a single '*' wild
|
||||||
|
* char character as it's last character. Since this is a special case
|
||||||
|
* of generic wild card term, and such a query can be optimized easily,
|
||||||
|
* this usually results in different query object.
|
||||||
|
*<p>
|
||||||
|
* Depending on settings, prefix term may be lower-cased
|
||||||
|
* automatically. It will not go through the default analyzer,
|
||||||
|
* however, since normal analyzers are unlikely to work properly
|
||||||
|
* with wildcard templates.
|
||||||
|
*<p>
|
||||||
|
* Can be overridden by extending classes, to provide custom handling for
|
||||||
|
* wild card queries (which may be necessary due to missing analyzer calls)
|
||||||
|
*
|
||||||
|
* @param field Name of the field query will use.
|
||||||
|
* @param termStr Term token to use for building term for the query
|
||||||
|
* (<b>without</b> trailing '*' character!)
|
||||||
|
*
|
||||||
|
* @return Resulting query build for the term
|
||||||
|
*/
|
||||||
|
protected Query getPrefixQuery(String field, String termStr)
|
||||||
|
{
|
||||||
|
if (lowercaseWildcardTerms) {
|
||||||
|
termStr = termStr.toLowerCase();
|
||||||
|
}
|
||||||
|
Term t = new Term(field, termStr);
|
||||||
|
return new PrefixQuery(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory method for generating a query (similar to
|
||||||
|
* (@link getWildcardQuery}). Called when parser parses
|
||||||
|
* an input term token that has the fuzzy suffix (~) appended.
|
||||||
|
*
|
||||||
|
* @param field Name of the field query will use.
|
||||||
|
* @param termStr Term token to use for building term for the query
|
||||||
|
*
|
||||||
|
* @return Resulting query build for the term
|
||||||
|
*/
|
||||||
|
protected Query getFuzzyQuery(String field, String termStr)
|
||||||
|
{
|
||||||
|
Term t = new Term(field, termStr);
|
||||||
|
return new FuzzyQuery(t);
|
||||||
|
}
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
QueryParser qp = new QueryParser("field",
|
QueryParser qp = new QueryParser("field",
|
||||||
new org.apache.lucene.analysis.SimpleAnalyzer());
|
new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||||
|
@ -420,10 +530,7 @@ Query Query(String field) :
|
||||||
if (clauses.size() == 1 && firstQuery != null)
|
if (clauses.size() == 1 && firstQuery != null)
|
||||||
return firstQuery;
|
return firstQuery;
|
||||||
else {
|
else {
|
||||||
BooleanQuery query = new BooleanQuery();
|
return getBooleanQuery(clauses);
|
||||||
for (int i = 0; i < clauses.size(); i++)
|
|
||||||
query.add((BooleanClause)clauses.elementAt(i));
|
|
||||||
return query;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -475,15 +582,16 @@ Query Term(String field) : {
|
||||||
[ <FUZZY> { fuzzy=true; } ]
|
[ <FUZZY> { fuzzy=true; } ]
|
||||||
[ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ]
|
[ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ]
|
||||||
{
|
{
|
||||||
if (wildcard)
|
if (wildcard) {
|
||||||
q = new WildcardQuery(new Term(field, term.image));
|
q = getWildcardQuery(field, term.image);
|
||||||
else if (prefix)
|
} else if (prefix) {
|
||||||
q = new PrefixQuery(new Term(field, term.image.substring
|
q = getPrefixQuery(field, term.image.substring
|
||||||
(0, term.image.length()-1)));
|
(0, term.image.length()-1));
|
||||||
else if (fuzzy)
|
} else if (fuzzy) {
|
||||||
q = new FuzzyQuery(new Term(field, term.image));
|
q = getFuzzyQuery(field, term.image);
|
||||||
else
|
} else {
|
||||||
q = getFieldQuery(field, analyzer, term.image);
|
q = getFieldQuery(field, analyzer, term.image);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
|
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
|
||||||
[ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
|
[ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
|
||||||
|
@ -530,7 +638,11 @@ Query Term(String field) : {
|
||||||
try {
|
try {
|
||||||
f = Float.valueOf(boost.image).floatValue();
|
f = Float.valueOf(boost.image).floatValue();
|
||||||
}
|
}
|
||||||
catch (Exception ignored) { }
|
catch (Exception ignored) {
|
||||||
|
/* Should this be handled somehow? (defaults to "no boost", if
|
||||||
|
* boost number is invalid)
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
// avoid boosting null queries, such as those caused by stop words
|
// avoid boosting null queries, such as those caused by stop words
|
||||||
if (q != null) {
|
if (q != null) {
|
||||||
|
|
Loading…
Reference in New Issue