- Added set/getLowercaseWildcardTerms methods and a few ger*Query methods

that make it easier to extend QueryParser.
Contributed by: Tatu Saloranta


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149954 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Otis Gospodnetic 2003-03-02 01:36:38 +00:00
parent a92eb6d93c
commit 05a2ea983c
1 changed files with 128 additions and 16 deletions

View File

@ -1,8 +1,8 @@
/* ==================================================================== /* ====================================================================
* The Apache Software License, Version 1.1 * The Apache Software License, Version 1.1
* *
* Copyright (c) 2001 The Apache Software Foundation. All rights * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation. All
* reserved. * rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions
@ -129,6 +129,11 @@ public class QueryParser {
Analyzer analyzer; Analyzer analyzer;
String field; String field;
int phraseSlop = 0; int phraseSlop = 0;
/**
* Whether terms of wildcard and prefix queries are to be automatically
* lower-cased or not. Default is <code>true</code>.
*/
boolean lowercaseWildcardTerms = true;
/** Constructs a query parser. /** Constructs a query parser.
* @param field the default field for query terms. * @param field the default field for query terms.
@ -164,7 +169,7 @@ public class QueryParser {
private int operator = DEFAULT_OPERATOR_OR; private int operator = DEFAULT_OPERATOR_OR;
/** /**
* Set the boolean operator of the QueryParser. * Sets the boolean operator of the QueryParser.
* In classic mode (<code>DEFAULT_OPERATOR_OR</mode>) terms without any modifiers * In classic mode (<code>DEFAULT_OPERATOR_OR</mode>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to * are considered optional: for example <code>capital of Hungary</code> is equal to
* <code>capital OR of OR Hungary</code>.<br/> * <code>capital OR of OR Hungary</code>.<br/>
@ -179,6 +184,14 @@ public class QueryParser {
return this.operator; return this.operator;
} }
public void setLowercaseWildcardTerms(boolean b) {
owercaseWildcardTerms = b;
}
public boolean getLowercaseWildcardTerms() {
return lowercaseWildcardTerms;
}
private void addClause(Vector clauses, int conj, int mods, Query q) { private void addClause(Vector clauses, int conj, int mods, Query q) {
boolean required, prohibited; boolean required, prohibited;
@ -288,6 +301,103 @@ public class QueryParser {
inclusive); inclusive);
} }
/**
* Factory method for generating query, given set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses Vector that contains {@link BooleanClause} instances
* to join.
*
* @return Resulting {@link Query} object.
*/
protected Query getBooleanQuery(Vector clauses)
{
BooleanQuery query = new BooleanQuery();
for (int i = 0; i < clauses.size(); i++) {
query.add((BooleanClause)clauses.elementAt(i));
}
return query;
}
/**
* Factory method for generating a query. Called when parser
* parses an input term token that contains one or more wildcard
* characters (? and *), but is not a prefix term token (one
* that has just a single * character at the end)
*<p>
* Depending on settings, prefix term may be lower-cased
* automatically. It will not go through the default analyzer,
* however, since normal analyzers are unlikely to work properly
* with wildcard templates.
*<p>
* Can be overridden by extending classes, to provide custom handling for
* wild card queries (which may be necessary due to missing analyzer calls)
*
* @param field Name of the field query will use.
* @param termStr Term token that contains one or more wild card
* characters (? or *), but is not simple prefix term
*
* @return Resulting query build for the term
*/
protected Query getWildcardQuery(String field, String termStr)
{
if (lowercaseWildcardTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new WildcardQuery(t);
}
/**
* Factory method for generating a query (similar to
* (@link getWildcardQuery}). Called when parser parses an input term
* token that uses prefix notation; that is, contains a single '*' wild
* char character as it's last character. Since this is a special case
* of generic wild card term, and such a query can be optimized easily,
* this usually results in different query object.
*<p>
* Depending on settings, prefix term may be lower-cased
* automatically. It will not go through the default analyzer,
* however, since normal analyzers are unlikely to work properly
* with wildcard templates.
*<p>
* Can be overridden by extending classes, to provide custom handling for
* wild card queries (which may be necessary due to missing analyzer calls)
*
* @param field Name of the field query will use.
* @param termStr Term token to use for building term for the query
* (<b>without</b> trailing '*' character!)
*
* @return Resulting query build for the term
*/
protected Query getPrefixQuery(String field, String termStr)
{
if (lowercaseWildcardTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new PrefixQuery(t);
}
/**
* Factory method for generating a query (similar to
* (@link getWildcardQuery}). Called when parser parses
* an input term token that has the fuzzy suffix (~) appended.
*
* @param field Name of the field query will use.
* @param termStr Term token to use for building term for the query
*
* @return Resulting query build for the term
*/
protected Query getFuzzyQuery(String field, String termStr)
{
Term t = new Term(field, termStr);
return new FuzzyQuery(t);
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
QueryParser qp = new QueryParser("field", QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer()); new org.apache.lucene.analysis.SimpleAnalyzer());
@ -420,10 +530,7 @@ Query Query(String field) :
if (clauses.size() == 1 && firstQuery != null) if (clauses.size() == 1 && firstQuery != null)
return firstQuery; return firstQuery;
else { else {
BooleanQuery query = new BooleanQuery(); return getBooleanQuery(clauses);
for (int i = 0; i < clauses.size(); i++)
query.add((BooleanClause)clauses.elementAt(i));
return query;
} }
} }
} }
@ -475,15 +582,16 @@ Query Term(String field) : {
[ <FUZZY> { fuzzy=true; } ] [ <FUZZY> { fuzzy=true; } ]
[ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ] [ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ]
{ {
if (wildcard) if (wildcard) {
q = new WildcardQuery(new Term(field, term.image)); q = getWildcardQuery(field, term.image);
else if (prefix) } else if (prefix) {
q = new PrefixQuery(new Term(field, term.image.substring q = getPrefixQuery(field, term.image.substring
(0, term.image.length()-1))); (0, term.image.length()-1));
else if (fuzzy) } else if (fuzzy) {
q = new FuzzyQuery(new Term(field, term.image)); q = getFuzzyQuery(field, term.image);
else } else {
q = getFieldQuery(field, analyzer, term.image); q = getFieldQuery(field, analyzer, term.image);
}
} }
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> ) | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
[ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> ) [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
@ -530,7 +638,11 @@ Query Term(String field) : {
try { try {
f = Float.valueOf(boost.image).floatValue(); f = Float.valueOf(boost.image).floatValue();
} }
catch (Exception ignored) { } catch (Exception ignored) {
/* Should this be handled somehow? (defaults to "no boost", if
* boost number is invalid)
*/
}
// avoid boosting null queries, such as those caused by stop words // avoid boosting null queries, such as those caused by stop words
if (q != null) { if (q != null) {