SOLR-589 - Improved handling of badly formated query strings

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@686042 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Otis Gospodnetic 2008-08-14 21:04:05 +00:00
parent a7ba71fa6d
commit 4f13f01a59
4 changed files with 44 additions and 0 deletions

View File

@ -534,6 +534,8 @@ Bug Fixes
48. SOLR-606: Fixed spell check collation offset issue. (Stefan Oestreicher , Geoffrey Young, gsingers)
49. SOLR-589: Improved handling of badly formated query strings (Sean Timm via Otis Gospodnetic)
Other Changes
1. SOLR-135: Moved common classes to org.apache.solr.common and altered the
build scripts to make two jars: apache-solr-1.3.jar and

View File

@ -142,6 +142,7 @@ class DismaxQParser extends QParser {
else {
// There is a valid query string
userQuery = U.partialEscape(U.stripUnbalancedQuotes(userQuery)).toString();
userQuery = U.stripIllegalOperators(userQuery).toString();
String minShouldMatch = solrParams.get(DMP.MM, "100%");
Query dis = up.parse(userQuery);

View File

@ -41,6 +41,7 @@ import java.io.IOException;
import java.util.*;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/**
* <p>Utilities that may be of use to RequestHandlers.</p>
@ -673,6 +674,23 @@ public class SolrPluginUtils {
return sb;
}
// Pattern to detect dangling operator(s) at end of query
// \s+[-+\s]+$
private final static Pattern DANGLING_OP_PATTERN = Pattern.compile( "\\s+[-+\\s]+$" );
// Pattern to detect consecutive + and/or - operators
// \s+[+-](?:\s*[+-]+)+
private final static Pattern CONSECUTIVE_OP_PATTERN = Pattern.compile( "\\s+[+-](?:\\s*[+-]+)+" );
/**
* Strips operators that are used illegally, otherwise reuturns it's
* input. Some examples of illegal user queries are: "chocolate +-
* chip", "chocolate - - chip", and "chocolate chip -".
*/
public static CharSequence stripIllegalOperators(CharSequence s) {
String temp = CONSECUTIVE_OP_PATTERN.matcher( s ).replaceAll( " " );
return DANGLING_OP_PATTERN.matcher( temp ).replaceAll( "" );
}
/**
* Returns it's input if there is an even (ie: balanced) number of
* '"' characters -- otherwise returns a String in which all '"'

View File

@ -66,6 +66,24 @@ public class SolrPluginUtilsTest extends AbstractSolrTestCase {
}
public void testStripIllegalOperators() {
assertEquals("",stripOp(""));
assertEquals("foo",stripOp("foo"));
assertEquals("foo -bar",stripOp("foo -bar"));
assertEquals("foo +bar",stripOp("foo +bar"));
assertEquals("foo + bar",stripOp("foo + bar"));
assertEquals("foo+ bar",stripOp("foo+ bar"));
assertEquals("foo+ bar",stripOp("foo+ bar"));
assertEquals("foo+",stripOp("foo+"));
assertEquals("foo bar",stripOp("foo bar -"));
assertEquals("foo bar ",stripOp("foo bar - + ++"));
assertEquals("foo bar",stripOp("foo --bar"));
assertEquals("foo bar ",stripOp("foo -------------------------------------------------------------------------------------------------------------------------bar --"));
assertEquals("foo bar ",stripOp("foo --bar -----------------------------------------------------------------------------------------------------------------------"));
}
public void testParseFieldBoosts() throws Exception {
Map<String,Float> e1 = new HashMap<String,Float>();
@ -315,6 +333,11 @@ public class SolrPluginUtilsTest extends AbstractSolrTestCase {
return SolrPluginUtils.stripUnbalancedQuotes(s).toString();
}
/** macro */
public String stripOp(CharSequence s) {
return SolrPluginUtils.stripIllegalOperators(s).toString();
}
/** macro */
public int calcMSM(int clauses, String spec) {
return SolrPluginUtils.calculateMinShouldMatch(clauses, spec);