Extracted validation methods so they can be tested via jsunit.

Handles query escaping via a workaround. Yay!
Checks for empty quotes.
Simplified many REs.
Allows the '=' sign.


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150966 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Kelvin Tan 2004-03-16 03:17:17 +00:00
parent f065932ff6
commit 7865adc2dd
2 changed files with 672 additions and 102 deletions

View File

@ -1,4 +1,4 @@
// Author: Kelvin Tan (kelvin@relevanz.com) // Author: Kelvin Tan (kelvint at apache.org)
// JavaScript Lucene Query Validator // JavaScript Lucene Query Validator
// Version: $Id$ // Version: $Id$
// Tested: IE 6.0.2800 and Mozilla Firebird 0.7 // Tested: IE 6.0.2800 and Mozilla Firebird 0.7
@ -17,6 +17,7 @@ function setWildcardCaseInsensitive(bool)
wildcardCaseInsensitive = bool; wildcardCaseInsensitive = bool;
} }
// Should the user be prompted with an alert box if validation fails?
var alertUser = true; var alertUser = true;
function setAlertUser(bool) function setAlertUser(bool)
@ -28,88 +29,211 @@ function setAlertUser(bool)
// @param Form field that contains the query // @param Form field that contains the query
function doCheckLuceneQuery(queryField) function doCheckLuceneQuery(queryField)
{ {
var query = queryField.value; return doCheckLuceneQueryValue(queryField.value)
if(query != null && query.length > 0)
{
// check for allowed characters
var matches = query.match(/[^a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@# ]/);
if(matches != null && matches.length > 0)
{
if(alertUser) alert("Invalid search query! The allowed characters are a-z A-Z 0-9. _ + - : () \" & * ? | ! {} [ ] ^ ~ \\ @ #. Please try again.")
return false;
} }
// check wildcards are used properly // validates a lucene query.
matches = query.match(/^[\*]*$|[\s]\*|^\*[^\s]/); // @param query string
function doCheckLuceneQueryValue(query)
{
if(query != null && query.length > 0)
{
query = removeEscapes(query);
// check for allowed characters
if(!checkAllowedCharacters(query)) return false;
// check * is used properly
if(!checkAsterisk(query)) return false;
// check for && usage
if(!checkAmpersands(query)) return false;
// check ^ is used properly
if(!checkCaret(query)) return false;
// check ~ is used properly
if(!checkSquiggle(query)) return false;
// check ! is used properly
if(!checkExclamationMark(query)) return false;
// check question marks are used properly
if(!checkQuestionMark(query)) return false;
// check parentheses are used properly
if(!checkParentheses(query)) return false;
// check '+' and '-' are used properly
if(!checkPlusMinus(query)) return false;
// check AND, OR and NOT are used properly
if(!checkANDORNOT(query)) return false;
// check that quote marks are closed
if(!checkQuotes(query)) return false;
// check ':' is used properly
if(!checkColon(query)) return false;
if(wildcardCaseInsensitive)
{
if(query.indexOf("*") != -1)
{
var i = query.indexOf(':');
if(i == -1)
{
queryField.value = query.toLowerCase();
}
else // found a wildcard field search
{
queryField.value = query.substring(0, i) + query.substring(i).toLowerCase();
}
}
}
return true;
}
}
// remove the escape character and the character immediately following it
function removeEscapes(query)
{
return query.replace(/\\./g, "");
}
function checkAllowedCharacters(query)
{
matches = query.match(/[^a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#$%'= ]/);
if(matches != null && matches.length > 0) if(matches != null && matches.length > 0)
{
if(alertUser) alert("Invalid search query! The allowed characters are a-z A-Z 0-9. _ + - : () \" & * ? | ! {} [ ] ^ ~ \\ @ = # % $ '. Please try again.")
return false;
}
return true;
}
function checkAsterisk(query)
{
matches = query.match(/^[\*]*$|[\s]\*|^\*[^\s]/);
if(matches != null)
{ {
if(alertUser) alert("Invalid search query! The wildcard (*) character must be preceded by at least one alphabet or number. Please try again.") if(alertUser) alert("Invalid search query! The wildcard (*) character must be preceded by at least one alphabet or number. Please try again.")
return false; return false;
} }
return true;
}
// check for && usage function checkAmpersands(query)
{
// NB: doesn't handle term1 && term2 && term3 in Firebird 0.7 // NB: doesn't handle term1 && term2 && term3 in Firebird 0.7
matches = query.match(/[&]{2}/); matches = query.match(/[&]{2}/);
if(matches != null && matches.length > 0) if(matches != null && matches.length > 0)
{ {
matches = query.match(/^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#]+( && )?[a-zA-Z0-9_+\-:.()\"*?|!{}\[\]\^~\\@#]+[ ]*)+$/); // note missing & in pattern matches = query.match(/^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#$%'=]+( && )?[a-zA-Z0-9_+\-:.()\"*?|!{}\[\]\^~\\@#$%'=]+[ ]*)+$/); // note missing & in pattern
if(matches == null) if(matches == null)
{ {
if(alertUser) alert("Invalid search query! Queries containing the special characters && must be in the form: term1 && term2. Please try again.") if(alertUser) alert("Invalid search query! Queries containing the special characters && must be in the form: term1 && term2. Please try again.")
return false; return false;
} }
} }
return true;
}
// check ^ is used properly function checkCaret(query)
matches = query.match(/^[^\^]*$|^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\~\\@#]+(\^[\d]+)?[ ]*)+$/); // note missing ^ in pattern {
if(matches == null) //matches = query.match(/^[^\^]*$|^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\~\\@#]+(\^[\d]+)?[ ]*)+$/); // note missing ^ in pattern
matches = query.match(/[^\\]\^([^\s]*[^0-9.]+)|[^\\]\^$/);
if(matches != null)
{ {
if(alertUser) alert("Invalid search query! The caret (^) character must be preceded by alphanumeric characters and followed by numbers. Please try again.") if(alertUser) alert("Invalid search query! The caret (^) character must be preceded by alphanumeric characters and followed by numbers. Please try again.")
return false; return false;
} }
return true;
}
// check ~ is used properly function checkSquiggle(query)
matches = query.match(/^[^~]*$|^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^\\@#]+(~[\d.]+)?[ ]*)+$/); // note missing ~in pattern {
if(matches == null) //matches = query.match(/^[^~]*$|^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^\\@#]+(~[\d.]+|[^\\]\\~)?[ ]*)+$/); // note missing ~ in pattern
matches = query.match(/[^\\]~[^\s]*[^0-9\s]+/);
if(matches != null)
{ {
if(alertUser) alert("Invalid search query! The tilde (~) character must be preceded by alphanumeric characters and followed by numbers. Please try again.") if(alertUser) alert("Invalid search query! The tilde (~) character must be preceded by alphanumeric characters and followed by numbers. Please try again.")
return false; return false;
} }
return true;
}
// check ! is used properly function checkExclamationMark(query)
{
// NB: doesn't handle term1 ! term2 ! term3 // NB: doesn't handle term1 ! term2 ! term3
matches = query.match(/^[^!]*$|^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#]+( ! )?[a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#]+[ ]*)+$/); matches = query.match(/^[^!]*$|^([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#$%'=]+( ! )?[a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#$%'=]+[ ]*)+$/);
if(matches == null || matches.length == 0) if(matches == null || matches.length == 0)
{ {
if(alertUser) alert("Invalid search query! Queries containing the special character ! must be in the form: term1 ! term2. Please try again.") if(alertUser) alert("Invalid search query! Queries containing the special character ! must be in the form: term1 ! term2. Please try again.")
return false; return false;
} }
return true;
}
// check question marks are used properly function checkQuestionMark(query)
matches = query.match(/^(\?)|([^a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#]\?+)/); {
matches = query.match(/^(\?)|([^a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#$%'=]\?+)/);
if(matches != null && matches.length > 0) if(matches != null && matches.length > 0)
{ {
if(alertUser) alert("Invalid search query! The question mark (?) character must be preceded by at least one alphabet or number. Please try again.") if(alertUser) alert("Invalid search query! The question mark (?) character must be preceded by at least one alphabet or number. Please try again.")
return false; return false;
} }
return true;
// check parentheses are used properly
matches = query.match(/^[^()]*$|^(\([a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#]+\))*$/);
if(matches == null || matches.length == 0)
{
if(alertUser) alert("Invalid search query! Parentheses must be closed and contain at least one alphabet or number. Please try again.")
return false;
} }
// check '+' and '-' are used properly function checkParentheses(query)
matches = query.match(/^[^\n+\-]*$|^([+-]?[a-zA-Z0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#]+[ ]?)+$/); {
var hasLeft = false;
var hasRight = false;
matchLeft = query.match(/[(]/g);
if(matchLeft != null) hasLeft = true
matchRight = query.match(/[)]/g);
if(matchRight != null) hasRight = true;
if(hasLeft || hasRight)
{
if(hasLeft && !hasRight || hasRight && !hasLeft)
{
if(alertUser) alert("Invalid search query! Parentheses must be closed. Please try again.")
return false;
}
else
{
var number = matchLeft.length + matchRight.length;
if((number % 2) > 0 || matchLeft.length != matchRight.length)
{
if(alertUser) alert("Invalid search query! Parentheses must be closed. Please try again.")
return false;
}
}
matches = query.match(/\(\)/);
if(matches != null)
{
if(alertUser) alert("Invalid search query! Parentheses must contain at least one character. Please try again.")
return false;
}
}
return true;
}
function checkPlusMinus(query)
{
matches = query.match(/^[^\n+\-]*$|^([+-]?[a-zA-Z0-9_:.()\"*?&|!{}\[\]\^~\\@#$%'=]+[ ]?)+$/);
if(matches == null || matches.length == 0) if(matches == null || matches.length == 0)
{ {
if(alertUser) alert("Invalid search query! '+' and '-' modifiers must be followed by at least one alphabet or number. Please try again.") if(alertUser) alert("Invalid search query! '+' and '-' modifiers must be followed by at least one alphabet or number. Please try again.")
return false; return false;
} }
return true;
}
// check AND, OR and NOT are used properly function checkANDORNOT(query)
{
matches = query.match(/AND|OR|NOT/); matches = query.match(/AND|OR|NOT/);
if(matches != null && matches.length > 0) if(matches != null && matches.length > 0)
{ {
@ -129,9 +253,11 @@ function doCheckLuceneQuery(queryField)
} }
*/ */
} }
return true;
}
function checkQuotes(query)
// check that quote marks are closed {
matches = query.match(/\"/g); matches = query.match(/\"/g);
if(matches != null && matches.length > 0) if(matches != null && matches.length > 0)
{ {
@ -141,32 +267,23 @@ function doCheckLuceneQuery(queryField)
if(alertUser) alert("Invalid search query! Please close all quote (\") marks."); if(alertUser) alert("Invalid search query! Please close all quote (\") marks.");
return false; return false;
} }
matches = query.match(/""/);
if(matches != null)
{
if(alertUser) alert("Invalid search query! Quotes must contain at least one character. Please try again.")
return false;
}
}
return true;
} }
// check ':' is used properly function checkColon(query)
matches = query.match(/^[^:]*$|^([a-zA-Z0-9_+\-.()\"*?&|!{}\[\]\^~\\@#]+(:[a-zA-Z0-9_+\-.()\"*?&|!{}\[\]\^~\\@#]+)?[ ]*)+$/); // note missing : in pattern {
if(matches == null || matches.length == 0) matches = query.match(/[^\\\s]:[\s]|[^\\\s]:$|[\s][^\\]?:|^[^\\\s]?:/);
if(matches != null)
{ {
if(alertUser) alert("Invalid search query! Field declarations (:) must be preceded by at least one alphabet or number and followed by at least one alphabet or number. Please try again.") if(alertUser) alert("Invalid search query! Field declarations (:) must be preceded by at least one alphabet or number and followed by at least one alphabet or number. Please try again.")
return false; return false;
} }
if(wildcardCaseInsensitive)
{
if(query.indexOf("*") != -1)
{
var i = query.indexOf(':');
if(i == -1)
{
queryField.value = query.toLowerCase();
}
else // found a wildcard field search
{
queryField.value = query.substring(0, i) + query.substring(i).toLowerCase();
}
}
}
return true; return true;
} }
}

View File

@ -0,0 +1,453 @@
<html>
<head>
<script language="JavaScript" src="f:/jsunit/app/jsUnitCore.js"></script>
<script language="JavaScript" src="luceneQueryValidator.js"></script>
</head>
<body>
<script>
setAlertUser(false);
// additions to jsUnit
function assertTrue(comment, value)
{
return assertEquals(comment, true, value);
}
function assertFalse(comment, value)
{
return assertEquals(comment, false, value);
}
function testRemoveEscapes()
{
var query = "\\* foo \\haha";
assertEquals(query, " foo aha", removeEscapes(query));
query = "\\\\foo";
assertEquals(query, "foo", removeEscapes(query));
query = "foo\\\"";
assertEquals(query, "foo", removeEscapes(query));
}
function testCheckAllowedCharacters()
{
fail("");
}
function testCheckAsterisk()
{
var query = "foo bar is ok";
assertTrue(query, checkAsterisk(query));
query = "foo bar12* is ok*";
assertTrue(query, checkAsterisk(query));
query = "foo bar12*sdsd";
assertTrue(query, checkAsterisk(query));
query = "foo bar12*sd**sd";
assertTrue(query, checkAsterisk(query));
query = "*bar12";
assertFalse(query, checkAsterisk(query));
query = "*ba12r*";
assertFalse(query, checkAsterisk(query));
query = "bar* *bar";
assertFalse(query, checkAsterisk(query));
// test with a space in front
query = " *bar";
assertFalse(query, checkAsterisk(query));
// test the escaped case
query = "bar* \\*bar";
assertTrue(query, checkAsterisk(query));
// try including other special characters
query = "foo:bar*ba?r";
assertTrue(query, checkAsterisk(query));
query = "foo:(ba*ba?r zoo \"zaa zoo\")";
assertTrue(query, checkAsterisk(query));
}
function testCheckAmpersands()
{
var query = "foo bar is ok";
assertTrue(query, checkAmpersands(query));
query = "foo & bar";
assertTrue(query, checkAmpersands(query));
query = "foo & bar& metoo &";
assertTrue(query, checkAmpersands(query));
query = "foo && bar12isok";
assertTrue(query, checkAmpersands(query));
query = "bar12 &&";
assertFalse(query, checkAmpersands(query));
query = "bar12 && bar12 &&";
assertFalse(query, checkAmpersands(query));
query = "bar12 && ";
assertFalse(query, checkAmpersands(query));
}
function testCheckCaret()
{
var query = "foo bar is ok";
assertTrue(query, checkCaret(query));
var query = "foo bar12isok^1.0";
assertTrue(query, checkCaret(query));
query = "\"jakarta apache\"^10";
assertTrue(query, checkCaret(query));
query = "bar12^";
assertFalse(query, checkCaret(query));
query = "bar12^10 bar12^";
assertFalse(query, checkCaret(query));
query = "bar12^ ";
assertFalse(query, checkCaret(query));
query = "bar12^ me too";
assertFalse(query, checkCaret(query));
query = "bar12^foo";
assertFalse(query, checkCaret(query));
query = "bar12^1.foo";
assertFalse(query, checkCaret(query));
// test the escaped case
query = "\\^";
assertTrue(query, checkCaret(query));
query = "bar\\^";
assertTrue(query, checkCaret(query));
// try including other special characters
query = "bar*ba?r^1.0";
assertTrue(query, checkCaret(query));
}
function testCheckSquiggle()
{
var query = "foo bar is ok";
assertTrue(query, checkSquiggle(query));
var query = "foo bar12isok~10";
assertTrue(query, checkSquiggle(query));
query = "\"jakarta apache\"~10";
assertTrue(query, checkSquiggle(query));
query = "bar12~";
assertTrue(query, checkSquiggle(query));
query = "bar12~10 bar12~";
assertTrue(query, checkSquiggle(query));
query = "bar12~ ";
assertTrue(query, checkSquiggle(query));
query = "bar12~foo";
assertFalse(query, checkSquiggle(query));
query = "bar12~1f";
assertFalse(query, checkSquiggle(query))
// test the escaped case
query = "\\~";
assertTrue(query, checkSquiggle(query));
query = "bar\\~";
assertTrue(query, checkSquiggle(query));
// try including other special characters
query = "bar*ba?r~10";
assertTrue(query, checkSquiggle(query));
// FIXME: how about floating point proximity searches, e.g. foo~2.5
}
function testCheckExclamationMark()
{
var query = "foo bar is ok";
assertTrue(query, checkExclamationMark(query));
query = "foo ! bar";
assertTrue(query, checkExclamationMark(query));
query = "\"foo\" ! \"bar\"";
assertTrue(query, checkExclamationMark(query));
query = "foo!";
assertTrue(query, checkExclamationMark(query));
query = "! bar";
assertFalse(query, checkExclamationMark(query));
query = "foo !";
assertFalse(query, checkExclamationMark(query));
query = "foo ! ";
assertFalse(query, checkExclamationMark(query));
// test escaped case
query = "foo \\!";
assertTrue(query, checkExclamationMark(query));
query = "foo ! bar \\!";
assertTrue(query, checkExclamationMark(query));
query = "foo ! bar ! car";
assertTrue(query, checkExclamationMark(query));
query = "foo ! bar !";
assertFalse(query, checkExclamationMark(query));
query = "foo ! bar ! ";
assertFalse(query, checkExclamationMark(query));
// try more complex queries
query = "(foo bar) ! (car:dog*)";
assertTrue(query, checkExclamationMark(query));
}
function testCheckQuestionMark()
{
var query = "foo bar is ok";
assertTrue(query, checkQuestionMark(query));
query = "foo bar12? is ok?";
assertTrue(query, checkQuestionMark(query));
query = "foo bar12?sdsd";
assertTrue(query, checkQuestionMark(query));
query = "foo bar12?sd??sd";
assertTrue(query, checkQuestionMark(query));
query = "?bar12";
assertFalse(query, checkQuestionMark(query));
query = "?ba12r?";
assertFalse(query, checkQuestionMark(query));
query = "bar? ?bar";
assertFalse(query, checkQuestionMark(query));
// test with a space in front
query = " ?bar";
assertFalse(query, checkQuestionMark(query));
// test the escaped case
query = "bar? \\?bar";
assertTrue(query, checkQuestionMark(query));
// try including other special characters
query = "foo:bar*ba?r";
assertTrue(query, checkQuestionMark(query));
query = "foo:(ba*ba?r zoo \"zaa zoo\")";
assertTrue(query, checkQuestionMark(query));
}
function testCheckParentheses()
{
var query = "foo bar is ok";
assertTrue(query, checkParentheses(query));
query = "(foobar12:isok)";
assertTrue(query, checkParentheses(query));
query = "(foobar12):(sdsd* me too)";
assertTrue(query, checkParentheses(query));
query = "(bar12";
assertFalse(query, checkParentheses(query));
query = "ba12r)";
assertFalse(query, checkParentheses(query));
query = "()";
assertFalse(query, checkParentheses(query));
query = "))";
assertFalse(query, checkParentheses(query));
query = "(foo bar) (bar";
assertFalse(query, checkParentheses(query));
query = "(foo bar) bar) me too";
assertFalse(query, checkParentheses(query));
// test with a space in front
query = " (bar";
assertFalse(query, checkParentheses(query));
// test the escaped case
query = "foo\\)";
assertTrue(query, doCheckLuceneQueryValue(query));
query = "foo\\) (foo bar)";
assertTrue(query, doCheckLuceneQueryValue(query));
// try including other special characters
query = "-(foo bar*ba?r)";
assertTrue(query, checkParentheses(query));
query = "+foo:(ba*ba?r zoo -(zaa zoo))";
assertTrue(query, checkParentheses(query));
query = "((bar12";
assertFalse(query, checkParentheses(query));
query = "((bar12)";
assertFalse(query, checkParentheses(query));
}
function testCheckPlusMinus()
{
var query = "foo bar is ok";
assertTrue(query, checkPlusMinus(query));
query = "+bar -foo";
assertTrue(query, checkPlusMinus(query));
// is this allowed?
query = "baa+foo +foo-bar";
assertTrue(query, checkPlusMinus(query));
query = "baa+";
assertFalse(query, checkPlusMinus(query));
query = "++baa";
assertFalse(query, checkPlusMinus(query));
query = "+";
assertFalse(query, checkPlusMinus(query));
query = "-";
assertFalse(query, checkPlusMinus(query));
// test the escaped case
query = "foo\\+";
assertTrue(query, doCheckLuceneQueryValue(query));
// try including other special characters
query = "-(foo bar*ba?r)";
assertTrue(query, checkParentheses(query));
query = "+foo:(ba*ba?r zoo -(zaa zoo))";
assertTrue(query, checkParentheses(query));
}
function testCheckANDORNOT()
{
fail("");
}
function testCheckQuotes()
{
var query = "foo bar is ok";
assertTrue(query, checkQuotes(query));
query = "\"foobar12:isok\"";
assertTrue(query, checkQuotes(query));
query = "\"(foobar12)\":(sdsd* me too)";
assertTrue(query, checkQuotes(query));
query = "\"bar12";
assertFalse(query, checkQuotes(query));
query = "\"\"";
assertFalse(query, checkQuotes(query));
query = "ba12r\"";
assertFalse(query, checkQuotes(query));
query = "\"foo bar\" \"bar";
assertFalse(query, checkQuotes(query));
query = "\"foo bar\" bar\" me too";
assertFalse(query, checkQuotes(query));
// test with a space in front
query = " \"bar";
assertFalse(query, checkQuotes(query));
// test the escaped case
query = "foo\\\"";
assertTrue(query, doCheckLuceneQueryValue(query));
query = "foo\\\" \"foo bar\"";
assertTrue(query, doCheckLuceneQueryValue(query));
// try including other special characters
query = "\"foo bar*ba?r\"";
assertTrue(query, checkQuotes(query));
query = "foo:(ba*ba?r zoo \"zaa zoo\")";
assertTrue(query, checkQuotes(query));
query = "\\\"\\\"bar12\\\"";
assertTrue(query, doCheckLuceneQueryValue(query));
query = "\\\"\\\"bar12\\\"\\\"";
assertTrue(query, doCheckLuceneQueryValue(query));
}
function testCheckColon()
{
var query = "foo bar is ok";
assertTrue(query, checkColon(query));
query = "foobar12:isok";
assertTrue(query, checkColon(query));
query = "(foobar12):(sdsd* me too)";
assertTrue(query, checkColon(query));
query = "bar12:";
assertFalse(query, checkColon(query));
query = ":ba12r";
assertFalse(query, checkColon(query));
query = "foo:bar :bar";
assertFalse(query, checkColon(query));
query = "foo:bar bar: me too";
assertFalse(query, checkColon(query));
// test with a space in front
query = " :bar";
assertFalse(query, checkColon(query));
// test the escaped case
query = "foo\\:";
assertTrue(query, checkColon(query));
query = "foo\\: foo:bar";
assertTrue(query, checkColon(query));
// try including other special characters
query = "foo:bar*ba?r";
assertTrue(query, checkColon(query));
query = "foo:(ba*ba?r zoo \"zaa zoo\")";
assertTrue(query, checkColon(query));
}
</script>
</body>
</html>