BAEL-6626 (#14345)
* BALE-6626 Check If a String Contains Non-Alphanumeric Characters in Java * Move patterns to class level. --------- Co-authored-by: parthiv39731 <parthiv39731@gmail.com>
This commit is contained in:
parent
03385bed6a
commit
de3fc4f21a
|
@ -0,0 +1,50 @@
|
||||||
|
package com.baeldung.nonalphanumeric;
|
||||||
|
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class NonAlphaNumRegexChecker {
|
||||||
|
|
||||||
|
private static final Pattern PATTERN_NON_ALPHNUM_ANYLANG = Pattern.compile("[^\\p{IsAlphabetic}\\p{IsDigit}]");
|
||||||
|
private static final Pattern PATTERN_NON_ALPHNUM_USASCII = Pattern.compile("[^a-zA-Z0-9]+");
|
||||||
|
/**
|
||||||
|
* checks if a non-alphanumeric character is present. this method would return true if
|
||||||
|
* it comes across a non english or non US-ASCII letter
|
||||||
|
*
|
||||||
|
* @param str - String to check for special character
|
||||||
|
* @return true if special character found else false
|
||||||
|
*/
|
||||||
|
public static boolean isNonAlphanumeric(String str) {
|
||||||
|
// Pattern pattern = Pattern.compile("\\W"); //same as [^a-zA-Z0-9]+
|
||||||
|
// Pattern pattern = Pattern.compile("[^a-zA-Z0-9||\\s]+"); //ignores space
|
||||||
|
Matcher matcher = PATTERN_NON_ALPHNUM_USASCII.matcher(str);
|
||||||
|
return matcher.find();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks for non-alphanumeric characters from all language scripts
|
||||||
|
*
|
||||||
|
* @param input - String to check for special character
|
||||||
|
* @return true if special character found else false
|
||||||
|
*/
|
||||||
|
public static boolean containsNonAlphanumeric(String input) {
|
||||||
|
// Pattern pattern = Pattern.compile("[^\\p{Alnum}]", Pattern.UNICODE_CHARACTER_CLASS); //Binary properties
|
||||||
|
Matcher matcher = PATTERN_NON_ALPHNUM_ANYLANG.matcher(input);
|
||||||
|
return matcher.find();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* checks for non-alphanumeric character. it returns true if it detects any character other than the
|
||||||
|
* specified script argument. example of script - Character.UnicodeScript.GEORGIAN.name()
|
||||||
|
*
|
||||||
|
* @param input - String to check for special character
|
||||||
|
* @param script - language script
|
||||||
|
* @return true if special character found else false
|
||||||
|
*/
|
||||||
|
public static boolean containsNonAlphanumeric(String input, String script) {
|
||||||
|
String regexScriptClass = "\\p{" + "Is" + script + "}";
|
||||||
|
Pattern pattern = Pattern.compile("[^" + regexScriptClass + "\\p{IsDigit}]"); //Binary properties
|
||||||
|
Matcher matcher = pattern.matcher(input);
|
||||||
|
return matcher.find();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,49 @@
|
||||||
|
package com.baeldung.nonalphanumeric;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
public class NonAlphaNumericChecker {
|
||||||
|
/**
|
||||||
|
* Checks for non-alphanumeric characters in any Unicode Script
|
||||||
|
* @param str - String to check for special characters
|
||||||
|
* @return true if special character found else false
|
||||||
|
*/
|
||||||
|
public static boolean isNonAlphanumericAnyLangScript(String str) {
|
||||||
|
for (int i = 0; i < str.length(); i++) {
|
||||||
|
char c = str.charAt(i);
|
||||||
|
if (!Character.isLetterOrDigit(c)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* checks for special characters,returns false if any character
|
||||||
|
* found other than the script argument
|
||||||
|
* @param str - String to check for special characters
|
||||||
|
* @param script - Language script
|
||||||
|
* @return true if special character found else false
|
||||||
|
*/
|
||||||
|
public static boolean isNonAlphanumericInLangScript(String str, String script) {
|
||||||
|
|
||||||
|
for (int i = 0; i < str.length(); i++) {
|
||||||
|
char c = str.charAt(i);
|
||||||
|
// script e.g., Character.UnicodeScript.of(c).toString().equalsIgnoreCase(Character.UnicodeScript.LATIN.toString())
|
||||||
|
if (!Character.UnicodeScript.of(c).toString().equalsIgnoreCase(script)
|
||||||
|
&& !Character.isDigit(c)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* checks for special characters in any lang
|
||||||
|
* @param str - String to check for special characters
|
||||||
|
* @return true if special character found else false
|
||||||
|
*/
|
||||||
|
public static boolean isNonAlphanumericAnyLangScriptV2(String str) {
|
||||||
|
return !StringUtils.isAlphanumeric(str);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,117 @@
|
||||||
|
package com.baeldung.nonalphanumeric;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
|
||||||
|
public class NonAlphaNumRegexCheckerUnitTest {
|
||||||
|
@Test
|
||||||
|
public void whenStrLatinOrHasNonAlphaNum_ThenRetTrue() {
|
||||||
|
//alphabets with special character
|
||||||
|
String str1 = "W$nder^ful";
|
||||||
|
//digits with special character
|
||||||
|
String str2 = "123$%45";
|
||||||
|
//alphabets and digits with special characters
|
||||||
|
String str3 = "W@nd$r123$%45";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertTrue(ERROR_MSG + str1, NonAlphaNumRegexChecker.isNonAlphanumeric(str1));
|
||||||
|
assertTrue(ERROR_MSG + str2, NonAlphaNumRegexChecker.isNonAlphanumeric(str2));
|
||||||
|
assertTrue(ERROR_MSG + str3, NonAlphaNumRegexChecker.isNonAlphanumeric(str3));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void whenStrLatinOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||||
|
//only alphabets
|
||||||
|
String str1 = "Wonderful";
|
||||||
|
//only digits
|
||||||
|
String str2 = "12345";
|
||||||
|
//mix of alphabet and digit
|
||||||
|
String str3 = "5Won6der1234";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, non alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertFalse(ERROR_MSG + str1, NonAlphaNumRegexChecker.isNonAlphanumeric(str1));
|
||||||
|
assertFalse(ERROR_MSG + str2, NonAlphaNumRegexChecker.isNonAlphanumeric(str2));
|
||||||
|
assertFalse(ERROR_MSG + str3, NonAlphaNumRegexChecker.isNonAlphanumeric(str3));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void whenStrNonLatinOrHasNonAlphaNum_ThenRetTrue() {
|
||||||
|
//special character in Georgian text
|
||||||
|
String str1 = "##მშვენიერი@";
|
||||||
|
//special character with Turkish text
|
||||||
|
String str2 = "müthiş#$";
|
||||||
|
//No special character in Georgian text
|
||||||
|
String str3 = "მშვენიერი";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertTrue(ERROR_MSG + str1, NonAlphaNumRegexChecker.isNonAlphanumeric(str1));
|
||||||
|
assertTrue(ERROR_MSG + str2, NonAlphaNumRegexChecker.isNonAlphanumeric(str2));
|
||||||
|
assertTrue(ERROR_MSG + str3, NonAlphaNumRegexChecker.isNonAlphanumeric(str3));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void whenStrAnyLangOrHasNonAlphaNum_ThenRetTrue() {
|
||||||
|
//special character in Georgian text
|
||||||
|
String str1 = "##მშვენიერი@";
|
||||||
|
//special character with Turkish text
|
||||||
|
String str2 = "müthiş#$";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertTrue(ERROR_MSG + str1, NonAlphaNumRegexChecker.containsNonAlphanumeric(str1));
|
||||||
|
assertTrue(ERROR_MSG + str2, NonAlphaNumRegexChecker.containsNonAlphanumeric(str2));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void whenStrAnyLangOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||||
|
//Georgian text with no special char
|
||||||
|
String str1 = "მშვენიერი";
|
||||||
|
//Turkish text with no special char
|
||||||
|
String str2 = "müthiş";
|
||||||
|
//Latin text with no special char
|
||||||
|
String str3 = "Wonderful";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertFalse(ERROR_MSG + str1, NonAlphaNumRegexChecker.containsNonAlphanumeric(str1));
|
||||||
|
assertFalse(ERROR_MSG + str2, NonAlphaNumRegexChecker.containsNonAlphanumeric(str2));
|
||||||
|
assertFalse(ERROR_MSG + str3, NonAlphaNumRegexChecker.containsNonAlphanumeric(str3));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenLang_whenStrHasDiffLangOrHasNonAlphaNum_ThenRetTrue() {
|
||||||
|
String script1 = Character.UnicodeScript.MYANMAR.name(); //script used for Burmese Lang
|
||||||
|
//text in Burmese with special char
|
||||||
|
String str1 = "အံ့ဩ%စ##ရာ";
|
||||||
|
//special character and english char in Burmese
|
||||||
|
String str2 = "အံ့ADFဩစ%ရာ*^";
|
||||||
|
//English character in Burmese
|
||||||
|
String str3 = "အံ့ဩစTရာWon";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertTrue(ERROR_MSG + str1, NonAlphaNumRegexChecker.containsNonAlphanumeric(str1, script1));
|
||||||
|
assertTrue(ERROR_MSG + str2, NonAlphaNumRegexChecker.containsNonAlphanumeric(str2, script1));
|
||||||
|
assertTrue(ERROR_MSG + str3, NonAlphaNumRegexChecker.containsNonAlphanumeric(str3, script1));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenLang_whenStrHasSameLangOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||||
|
String script1 = Character.UnicodeScript.MYANMAR.name(); //script used for Burmese Lang
|
||||||
|
String script2 = Character.UnicodeScript.GREEK.name();
|
||||||
|
//text in Burmese
|
||||||
|
String str1 = "အံ့ဩစရာ";
|
||||||
|
//text in Greek
|
||||||
|
String str2 = "Εκπληκτικός";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertFalse(ERROR_MSG + str1, NonAlphaNumRegexChecker.containsNonAlphanumeric(str1, script1));
|
||||||
|
assertFalse(ERROR_MSG + str2, NonAlphaNumRegexChecker.containsNonAlphanumeric(str2, script2));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,159 @@
|
||||||
|
package com.baeldung.nonalphanumeric;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
public class NonAlphaNumericCheckerUnitTest {
|
||||||
|
@Test
|
||||||
|
public void whenStrLatinOrHasNonAlphaNum_ThenRetTrue() {
|
||||||
|
String latin = Character.UnicodeScript.LATIN.name();
|
||||||
|
//alphabets with special character
|
||||||
|
String str1 = "W$nder^ful";
|
||||||
|
//digits with special character
|
||||||
|
String str2 = "123$%45";
|
||||||
|
//alphabets and digits with special characters
|
||||||
|
String str3 = "W@nd$r123$%45";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, latin));
|
||||||
|
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, latin));
|
||||||
|
assertTrue(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str3, latin));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void whenStrLatinOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||||
|
String latin = Character.UnicodeScript.LATIN.name();
|
||||||
|
//only alphabets
|
||||||
|
String str1 = "Wonderful";
|
||||||
|
//only digits
|
||||||
|
String str2 = "12345";
|
||||||
|
//mix of alphabet and digit
|
||||||
|
String str3 = "5Won6der1234";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, non alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertFalse(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, latin));
|
||||||
|
assertFalse(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, latin));
|
||||||
|
assertFalse(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str3, latin));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void whenStrNonLatinOrHasNonAlphaNum_ThenRetTrue() {
|
||||||
|
String latin = Character.UnicodeScript.LATIN.name();
|
||||||
|
//special character in Georgian text
|
||||||
|
String str1 = "##მშვენიერი@";
|
||||||
|
//special character with Turkish text
|
||||||
|
String str2 = "müthiş#$";
|
||||||
|
//No special character in Georgian text
|
||||||
|
String str3 = "მშვენიერი";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, latin));
|
||||||
|
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, latin));
|
||||||
|
assertTrue(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str3, latin));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void whenStrAnyLangOrHasNonAlphaNum_ThenRetTrue() {
|
||||||
|
//special character in Georgian text
|
||||||
|
String str1 = "##მშვენიერი@";
|
||||||
|
//special character with Turkish text
|
||||||
|
String str2 = "müthiş#$";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str1));
|
||||||
|
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str2));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void whenStrAnyLangOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||||
|
//Georgian text with no special char
|
||||||
|
String str1 = "მშვენიერი";
|
||||||
|
//Turkish text with no special char
|
||||||
|
String str2 = "müthiş";
|
||||||
|
//Latin text with no special char
|
||||||
|
String str3 = "Wonderful";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertFalse(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str1));
|
||||||
|
assertFalse(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str2));
|
||||||
|
assertFalse(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str3));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenLang_whenStrHasDiffLangOrHasNonAlphaNum_ThenRetTrue() {
|
||||||
|
String script1 = Character.UnicodeScript.MYANMAR.name(); //script used for Burmese Lang
|
||||||
|
//text in Burmese with special char
|
||||||
|
String str1 = "အံ့ဩ%စ##ရာ";
|
||||||
|
//special character and english char in Burmese
|
||||||
|
String str2 = "အံ့ADFဩစ%ရာ*^";
|
||||||
|
//English character in Burmese
|
||||||
|
String str3 = "အံ့ဩစTရာWon";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, script1));
|
||||||
|
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, script1));
|
||||||
|
assertTrue(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str3, script1));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenLang_whenStrHasSameLangOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||||
|
String script1 = Character.UnicodeScript.MYANMAR.name(); //script used for Burmese Lang
|
||||||
|
String script2 = Character.UnicodeScript.GREEK.name();
|
||||||
|
//text in Burmese
|
||||||
|
String str1 = "အံ့ဩစရာ";
|
||||||
|
//text in Greek
|
||||||
|
String str2 = "Εκπληκτικός";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertFalse(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, script1));
|
||||||
|
assertFalse(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, script2));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenComLangImpl_whenStrAnyLangOrHasNonAlphaNum_ThenRetTrue() {
|
||||||
|
//special character in Georgian text
|
||||||
|
String str1 = "##მშვენიერი@";
|
||||||
|
//special character with Turkish text
|
||||||
|
String str2 = "müthiş#$";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str1));
|
||||||
|
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str2));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenComLangImpl_whenStrAnyLangOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||||
|
//Georgian text with no special char
|
||||||
|
String str1 = "მშვენიერი";
|
||||||
|
//Turkish text with no special char
|
||||||
|
String str2 = "müthiş";
|
||||||
|
//Latin text with no special char
|
||||||
|
String str3 = "Wonderful";
|
||||||
|
//Burmese in Myanmar script
|
||||||
|
// String str4 = "အံ့ဩစရာ";
|
||||||
|
//only digits
|
||||||
|
String str5 = "3465";
|
||||||
|
//Error message
|
||||||
|
String ERROR_MSG = "Test failed, alphanumeric char found in ";
|
||||||
|
|
||||||
|
assertFalse(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str1));
|
||||||
|
assertFalse(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str2));
|
||||||
|
assertFalse(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str3));
|
||||||
|
//StringUtils.isAlphanumeric is unable to support supplementary unicode character and hence fails
|
||||||
|
//assertFalse(ERROR_MSG + str4, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str4));
|
||||||
|
assertFalse(ERROR_MSG + str5, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str5));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue