Extracted validation methods so they can be tested via jsunit.

Handles query escaping via a workaround. Yay!
Checks for empty quotes.
Simplified many REs.
Allows the '=' sign.


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150966 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Kelvin Tan 2004-03-16 03:17:17 +00:00
parent f065932ff6
commit 7865adc2dd
2 changed files with 672 additions and 102 deletions

View File

@ -1,4 +1,4 @@
// Author: Kelvin Tan (kelvin@relevanz.com)
// Author: Kelvin Tan (kelvint at apache.org)
// JavaScript Lucene Query Validator
// Version: $Id$
// Tested: IE 6.0.2800 and Mozilla Firebird 0.7
@ -17,6 +17,7 @@ function setWildcardCaseInsensitive(bool)
wildcardCaseInsensitive = bool;
}
// Should the user be prompted with an alert box if validation fails?
var alertUser = true;
function setAlertUser(bool)
@ -28,88 +29,211 @@ function setAlertUser(bool)
// @param Form field that contains the query
function doCheckLuceneQuery(queryField)
{
var query = queryField.value;
return doCheckLuceneQueryValue(queryField.value)
}
// validates a lucene query.
// @param query string
function doCheckLuceneQueryValue(query)
{
if(query != null && query.length > 0)
{
query = removeEscapes(query);
// check for allowed characters
var matches = query.match(/[^a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@# ]/);
if(!checkAllowedCharacters(query)) return false;
// check * is used properly
if(!checkAsterisk(query)) return false;
// check for && usage
if(!checkAmpersands(query)) return false;
// check ^ is used properly
if(!checkCaret(query)) return false;
// check ~ is used properly
if(!checkSquiggle(query)) return false;
// check ! is used properly
if(!checkExclamationMark(query)) return false;
// check question marks are used properly
if(!checkQuestionMark(query)) return false;
// check parentheses are used properly
if(!checkParentheses(query)) return false;
// check '+' and '-' are used properly
if(!checkPlusMinus(query)) return false;
// check AND, OR and NOT are used properly
if(!checkANDORNOT(query)) return false;
// check that quote marks are closed
if(!checkQuotes(query)) return false;
// check ':' is used properly
if(!checkColon(query)) return false;
if(wildcardCaseInsensitive)
{
if(query.indexOf("*") != -1)
{
var i = query.indexOf(':');
if(i == -1)
{
queryField.value = query.toLowerCase();
}
else // found a wildcard field search
{
queryField.value = query.substring(0, i) + query.substring(i).toLowerCase();
}
}
}
return true;
}
}
// remove the escape character and the character immediately following it
function removeEscapes(query)
{
return query.replace(/\\./g, "");
}
function checkAllowedCharacters(query)
{
matches = query.match(/[^a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#$%'= ]/);
if(matches != null && matches.length > 0)
{
if(alertUser) alert("Invalid search query! The allowed characters are a-z A-Z 0-9. _ + - : () \" & * ? | ! {} [ ] ^ ~ \\ @ #. Please try again.")
if(alertUser) alert("Invalid search query! The allowed characters are a-z A-Z 0-9. _ + - : () \" & * ? | ! {} [ ] ^ ~ \\ @ = # % $ '. Please try again.")
return false;
}
return true;
}
// check wildcards are used properly
function checkAsterisk(query)
{
matches = query.match(/^[\*]*$|[\s]\*|^\*[^\s]/);
if(matches != null && matches.length > 0)
if(matches != null)
{
if(alertUser) alert("Invalid search query! The wildcard (*) character must be preceded by at least one alphabet or number. Please try again.")
return false;
}
return true;
}
// check for && usage
function checkAmpersands(query)
{
// NB: doesn't handle term1 && term2 && term3 in Firebird 0.7
matches = query.match(/[&]{2}/);
if(matches != null && matches.length > 0)
{
matches = query.match(/^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#]+( && )?[a-zA-Z0-9_+\-:.()\"*?|!{}\[\]\^~\\@#]+[ ]*)+$/); // note missing & in pattern
matches = query.match(/^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#$%'=]+( && )?[a-zA-Z0-9_+\-:.()\"*?|!{}\[\]\^~\\@#$%'=]+[ ]*)+$/); // note missing & in pattern
if(matches == null)
{
if(alertUser) alert("Invalid search query! Queries containing the special characters && must be in the form: term1 && term2. Please try again.")
return false;
}
}
return true;
}
// check ^ is used properly
matches = query.match(/^[^\^]*$|^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\~\\@#]+(\^[\d]+)?[ ]*)+$/); // note missing ^ in pattern
if(matches == null)
function checkCaret(query)
{
//matches = query.match(/^[^\^]*$|^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\~\\@#]+(\^[\d]+)?[ ]*)+$/); // note missing ^ in pattern
matches = query.match(/[^\\]\^([^\s]*[^0-9.]+)|[^\\]\^$/);
if(matches != null)
{
if(alertUser) alert("Invalid search query! The caret (^) character must be preceded by alphanumeric characters and followed by numbers. Please try again.")
return false;
}
return true;
}
// check ~ is used properly
matches = query.match(/^[^~]*$|^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^\\@#]+(~[\d.]+)?[ ]*)+$/); // note missing ~in pattern
if(matches == null)
function checkSquiggle(query)
{
//matches = query.match(/^[^~]*$|^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^\\@#]+(~[\d.]+|[^\\]\\~)?[ ]*)+$/); // note missing ~ in pattern
matches = query.match(/[^\\]~[^\s]*[^0-9\s]+/);
if(matches != null)
{
if(alertUser) alert("Invalid search query! The tilde (~) character must be preceded by alphanumeric characters and followed by numbers. Please try again.")
return false;
}
return true;
}
// check ! is used properly
function checkExclamationMark(query)
{
// NB: doesn't handle term1 ! term2 ! term3
matches = query.match(/^[^!]*$|^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#]+( ! )?[a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#]+[ ]*)+$/);
matches = query.match(/^[^!]*$|^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#$%'=]+( ! )?[a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#$%'=]+[ ]*)+$/);
if(matches == null || matches.length == 0)
{
if(alertUser) alert("Invalid search query! Queries containing the special character ! must be in the form: term1 ! term2. Please try again.")
return false;
}
return true;
}
// check question marks are used properly
matches = query.match(/^(\?)|([^a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#]\?+)/);
function checkQuestionMark(query)
{
matches = query.match(/^(\?)|([^a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#$%'=]\?+)/);
if(matches != null && matches.length > 0)
{
if(alertUser) alert("Invalid search query! The question mark (?) character must be preceded by at least one alphabet or number. Please try again.")
return false;
}
return true;
}
// check parentheses are used properly
matches = query.match(/^[^()]*$|^(\([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#]+\))*$/);
if(matches == null || matches.length == 0)
function checkParentheses(query)
{
var hasLeft = false;
var hasRight = false;
matchLeft = query.match(/[(]/g);
if(matchLeft != null) hasLeft = true
matchRight = query.match(/[)]/g);
if(matchRight != null) hasRight = true;
if(hasLeft || hasRight)
{
if(alertUser) alert("Invalid search query! Parentheses must be closed and contain at least one alphabet or number. Please try again.")
if(hasLeft && !hasRight || hasRight && !hasLeft)
{
if(alertUser) alert("Invalid search query! Parentheses must be closed. Please try again.")
return false;
}
else
{
var number = matchLeft.length + matchRight.length;
if((number % 2) > 0 || matchLeft.length != matchRight.length)
{
if(alertUser) alert("Invalid search query! Parentheses must be closed. Please try again.")
return false;
}
}
matches = query.match(/\(\)/);
if(matches != null)
{
if(alertUser) alert("Invalid search query! Parentheses must contain at least one character. Please try again.")
return false;
}
}
return true;
}
// check '+' and '-' are used properly
matches = query.match(/^[^\n+\-]*$|^([+-]?[a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#]+[ ]?)+$/);
function checkPlusMinus(query)
{
matches = query.match(/^[^\n+\-]*$|^([+-]?[a-zA-Z0-9_:.()\"*?&|!{}\[\]\^~\\@#$%'=]+[ ]?)+$/);
if(matches == null || matches.length == 0)
{
if(alertUser) alert("Invalid search query! '+' and '-' modifiers must be followed by at least one alphabet or number. Please try again.")
return false;
}
return true;
}
// check AND, OR and NOT are used properly
function checkANDORNOT(query)
{
matches = query.match(/AND|OR|NOT/);
if(matches != null && matches.length > 0)
{
@ -129,9 +253,11 @@ function doCheckLuceneQuery(queryField)
}
*/
}
return true;
}
// check that quote marks are closed
function checkQuotes(query)
{
matches = query.match(/\"/g);
if(matches != null && matches.length > 0)
{
@ -141,32 +267,23 @@ function doCheckLuceneQuery(queryField)
if(alertUser) alert("Invalid search query! Please close all quote (\") marks.");
return false;
}
matches = query.match(/""/);
if(matches != null)
{
if(alertUser) alert("Invalid search query! Quotes must contain at least one character. Please try again.")
return false;
}
}
return true;
}
// check ':' is used properly
matches = query.match(/^[^:]*$|^([a-zA-Z0-9_+\-.()\"*?&|!{}\[\]\^~\\@#]+(:[a-zA-Z0-9_+\-.()\"*?&|!{}\[\]\^~\\@#]+)?[ ]*)+$/); // note missing : in pattern
if(matches == null || matches.length == 0)
function checkColon(query)
{
matches = query.match(/[^\\\s]:[\s]|[^\\\s]:$|[\s][^\\]?:|^[^\\\s]?:/);
if(matches != null)
{
if(alertUser) alert("Invalid search query! Field declarations (:) must be preceded by at least one alphabet or number and followed by at least one alphabet or number. Please try again.")
return false;
}
if(wildcardCaseInsensitive)
{
if(query.indexOf("*") != -1)
{
var i = query.indexOf(':');
if(i == -1)
{
queryField.value = query.toLowerCase();
}
else // found a wildcard field search
{
queryField.value = query.substring(0, i) + query.substring(i).toLowerCase();
}
}
}
return true;
}
}

View File

@ -0,0 +1,453 @@
<html>
<head>
<script language="JavaScript" src="f:/jsunit/app/jsUnitCore.js"></script>
<script language="JavaScript" src="luceneQueryValidator.js"></script>
</head>
<body>
<script>
setAlertUser(false);
// additions to jsUnit
function assertTrue(comment, value)
{
return assertEquals(comment, true, value);
}
function assertFalse(comment, value)
{
return assertEquals(comment, false, value);
}
function testRemoveEscapes()
{
var query = "\\* foo \\haha";
assertEquals(query, " foo aha", removeEscapes(query));
query = "\\\\foo";
assertEquals(query, "foo", removeEscapes(query));
query = "foo\\\"";
assertEquals(query, "foo", removeEscapes(query));
}
function testCheckAllowedCharacters()
{
fail("");
}
function testCheckAsterisk()
{
var query = "foo bar is ok";
assertTrue(query, checkAsterisk(query));
query = "foo bar12* is ok*";
assertTrue(query, checkAsterisk(query));
query = "foo bar12*sdsd";
assertTrue(query, checkAsterisk(query));
query = "foo bar12*sd**sd";
assertTrue(query, checkAsterisk(query));
query = "*bar12";
assertFalse(query, checkAsterisk(query));
query = "*ba12r*";
assertFalse(query, checkAsterisk(query));
query = "bar* *bar";
assertFalse(query, checkAsterisk(query));
// test with a space in front
query = " *bar";
assertFalse(query, checkAsterisk(query));
// test the escaped case
query = "bar* \\*bar";
assertTrue(query, checkAsterisk(query));
// try including other special characters
query = "foo:bar*ba?r";
assertTrue(query, checkAsterisk(query));
query = "foo:(ba*ba?r zoo \"zaa zoo\")";
assertTrue(query, checkAsterisk(query));
}
function testCheckAmpersands()
{
var query = "foo bar is ok";
assertTrue(query, checkAmpersands(query));
query = "foo & bar";
assertTrue(query, checkAmpersands(query));
query = "foo & bar& metoo &";
assertTrue(query, checkAmpersands(query));
query = "foo && bar12isok";
assertTrue(query, checkAmpersands(query));
query = "bar12 &&";
assertFalse(query, checkAmpersands(query));
query = "bar12 && bar12 &&";
assertFalse(query, checkAmpersands(query));
query = "bar12 && ";
assertFalse(query, checkAmpersands(query));
}
function testCheckCaret()
{
var query = "foo bar is ok";
assertTrue(query, checkCaret(query));
var query = "foo bar12isok^1.0";
assertTrue(query, checkCaret(query));
query = "\"jakarta apache\"^10";
assertTrue(query, checkCaret(query));
query = "bar12^";
assertFalse(query, checkCaret(query));
query = "bar12^10 bar12^";
assertFalse(query, checkCaret(query));
query = "bar12^ ";
assertFalse(query, checkCaret(query));
query = "bar12^ me too";
assertFalse(query, checkCaret(query));
query = "bar12^foo";
assertFalse(query, checkCaret(query));
query = "bar12^1.foo";
assertFalse(query, checkCaret(query));
// test the escaped case
query = "\\^";
assertTrue(query, checkCaret(query));
query = "bar\\^";
assertTrue(query, checkCaret(query));
// try including other special characters
query = "bar*ba?r^1.0";
assertTrue(query, checkCaret(query));
}
function testCheckSquiggle()
{
var query = "foo bar is ok";
assertTrue(query, checkSquiggle(query));
var query = "foo bar12isok~10";
assertTrue(query, checkSquiggle(query));
query = "\"jakarta apache\"~10";
assertTrue(query, checkSquiggle(query));
query = "bar12~";
assertTrue(query, checkSquiggle(query));
query = "bar12~10 bar12~";
assertTrue(query, checkSquiggle(query));
query = "bar12~ ";
assertTrue(query, checkSquiggle(query));
query = "bar12~foo";
assertFalse(query, checkSquiggle(query));
query = "bar12~1f";
assertFalse(query, checkSquiggle(query))
// test the escaped case
query = "\\~";
assertTrue(query, checkSquiggle(query));
query = "bar\\~";
assertTrue(query, checkSquiggle(query));
// try including other special characters
query = "bar*ba?r~10";
assertTrue(query, checkSquiggle(query));
// FIXME: how about floating point proximity searches, e.g. foo~2.5
}
function testCheckExclamationMark()
{
var query = "foo bar is ok";
assertTrue(query, checkExclamationMark(query));
query = "foo ! bar";
assertTrue(query, checkExclamationMark(query));
query = "\"foo\" ! \"bar\"";
assertTrue(query, checkExclamationMark(query));
query = "foo!";
assertTrue(query, checkExclamationMark(query));
query = "! bar";
assertFalse(query, checkExclamationMark(query));
query = "foo !";
assertFalse(query, checkExclamationMark(query));
query = "foo ! ";
assertFalse(query, checkExclamationMark(query));
// test escaped case
query = "foo \\!";
assertTrue(query, checkExclamationMark(query));
query = "foo ! bar \\!";
assertTrue(query, checkExclamationMark(query));
query = "foo ! bar ! car";
assertTrue(query, checkExclamationMark(query));
query = "foo ! bar !";
assertFalse(query, checkExclamationMark(query));
query = "foo ! bar ! ";
assertFalse(query, checkExclamationMark(query));
// try more complex queries
query = "(foo bar) ! (car:dog*)";
assertTrue(query, checkExclamationMark(query));
}
function testCheckQuestionMark()
{
var query = "foo bar is ok";
assertTrue(query, checkQuestionMark(query));
query = "foo bar12? is ok?";
assertTrue(query, checkQuestionMark(query));
query = "foo bar12?sdsd";
assertTrue(query, checkQuestionMark(query));
query = "foo bar12?sd??sd";
assertTrue(query, checkQuestionMark(query));
query = "?bar12";
assertFalse(query, checkQuestionMark(query));
query = "?ba12r?";
assertFalse(query, checkQuestionMark(query));
query = "bar? ?bar";
assertFalse(query, checkQuestionMark(query));
// test with a space in front
query = " ?bar";
assertFalse(query, checkQuestionMark(query));
// test the escaped case
query = "bar? \\?bar";
assertTrue(query, checkQuestionMark(query));
// try including other special characters
query = "foo:bar*ba?r";
assertTrue(query, checkQuestionMark(query));
query = "foo:(ba*ba?r zoo \"zaa zoo\")";
assertTrue(query, checkQuestionMark(query));
}
function testCheckParentheses()
{
var query = "foo bar is ok";
assertTrue(query, checkParentheses(query));
query = "(foobar12:isok)";
assertTrue(query, checkParentheses(query));
query = "(foobar12):(sdsd* me too)";
assertTrue(query, checkParentheses(query));
query = "(bar12";
assertFalse(query, checkParentheses(query));
query = "ba12r)";
assertFalse(query, checkParentheses(query));
query = "()";
assertFalse(query, checkParentheses(query));
query = "))";
assertFalse(query, checkParentheses(query));
query = "(foo bar) (bar";
assertFalse(query, checkParentheses(query));
query = "(foo bar) bar) me too";
assertFalse(query, checkParentheses(query));
// test with a space in front
query = " (bar";
assertFalse(query, checkParentheses(query));
// test the escaped case
query = "foo\\)";
assertTrue(query, doCheckLuceneQueryValue(query));
query = "foo\\) (foo bar)";
assertTrue(query, doCheckLuceneQueryValue(query));
// try including other special characters
query = "-(foo bar*ba?r)";
assertTrue(query, checkParentheses(query));
query = "+foo:(ba*ba?r zoo -(zaa zoo))";
assertTrue(query, checkParentheses(query));
query = "((bar12";
assertFalse(query, checkParentheses(query));
query = "((bar12)";
assertFalse(query, checkParentheses(query));
}
function testCheckPlusMinus()
{
var query = "foo bar is ok";
assertTrue(query, checkPlusMinus(query));
query = "+bar -foo";
assertTrue(query, checkPlusMinus(query));
// is this allowed?
query = "baa+foo +foo-bar";
assertTrue(query, checkPlusMinus(query));
query = "baa+";
assertFalse(query, checkPlusMinus(query));
query = "++baa";
assertFalse(query, checkPlusMinus(query));
query = "+";
assertFalse(query, checkPlusMinus(query));
query = "-";
assertFalse(query, checkPlusMinus(query));
// test the escaped case
query = "foo\\+";
assertTrue(query, doCheckLuceneQueryValue(query));
// try including other special characters
query = "-(foo bar*ba?r)";
assertTrue(query, checkParentheses(query));
query = "+foo:(ba*ba?r zoo -(zaa zoo))";
assertTrue(query, checkParentheses(query));
}
function testCheckANDORNOT()
{
fail("");
}
function testCheckQuotes()
{
var query = "foo bar is ok";
assertTrue(query, checkQuotes(query));
query = "\"foobar12:isok\"";
assertTrue(query, checkQuotes(query));
query = "\"(foobar12)\":(sdsd* me too)";
assertTrue(query, checkQuotes(query));
query = "\"bar12";
assertFalse(query, checkQuotes(query));
query = "\"\"";
assertFalse(query, checkQuotes(query));
query = "ba12r\"";
assertFalse(query, checkQuotes(query));
query = "\"foo bar\" \"bar";
assertFalse(query, checkQuotes(query));
query = "\"foo bar\" bar\" me too";
assertFalse(query, checkQuotes(query));
// test with a space in front
query = " \"bar";
assertFalse(query, checkQuotes(query));
// test the escaped case
query = "foo\\\"";
assertTrue(query, doCheckLuceneQueryValue(query));
query = "foo\\\" \"foo bar\"";
assertTrue(query, doCheckLuceneQueryValue(query));
// try including other special characters
query = "\"foo bar*ba?r\"";
assertTrue(query, checkQuotes(query));
query = "foo:(ba*ba?r zoo \"zaa zoo\")";
assertTrue(query, checkQuotes(query));
query = "\\\"\\\"bar12\\\"";
assertTrue(query, doCheckLuceneQueryValue(query));
query = "\\\"\\\"bar12\\\"\\\"";
assertTrue(query, doCheckLuceneQueryValue(query));
}
function testCheckColon()
{
var query = "foo bar is ok";
assertTrue(query, checkColon(query));
query = "foobar12:isok";
assertTrue(query, checkColon(query));
query = "(foobar12):(sdsd* me too)";
assertTrue(query, checkColon(query));
query = "bar12:";
assertFalse(query, checkColon(query));
query = ":ba12r";
assertFalse(query, checkColon(query));
query = "foo:bar :bar";
assertFalse(query, checkColon(query));
query = "foo:bar bar: me too";
assertFalse(query, checkColon(query));
// test with a space in front
query = " :bar";
assertFalse(query, checkColon(query));
// test the escaped case
query = "foo\\:";
assertTrue(query, checkColon(query));
query = "foo\\: foo:bar";
assertTrue(query, checkColon(query));
// try including other special characters
query = "foo:bar*ba?r";
assertTrue(query, checkColon(query));
query = "foo:(ba*ba?r zoo \"zaa zoo\")";
assertTrue(query, checkColon(query));
}
</script>
</body>
</html>