BAEL-6626 (#14345)
* BALE-6626 Check If a String Contains Non-Alphanumeric Characters in Java * Move patterns to class level. --------- Co-authored-by: parthiv39731 <parthiv39731@gmail.com>
This commit is contained in:
parent
03385bed6a
commit
de3fc4f21a
|
@ -0,0 +1,50 @@
|
|||
package com.baeldung.nonalphanumeric;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class NonAlphaNumRegexChecker {
|
||||
|
||||
private static final Pattern PATTERN_NON_ALPHNUM_ANYLANG = Pattern.compile("[^\\p{IsAlphabetic}\\p{IsDigit}]");
|
||||
private static final Pattern PATTERN_NON_ALPHNUM_USASCII = Pattern.compile("[^a-zA-Z0-9]+");
|
||||
/**
|
||||
* checks if a non-alphanumeric character is present. this method would return true if
|
||||
* it comes across a non english or non US-ASCII letter
|
||||
*
|
||||
* @param str - String to check for special character
|
||||
* @return true if special character found else false
|
||||
*/
|
||||
public static boolean isNonAlphanumeric(String str) {
|
||||
// Pattern pattern = Pattern.compile("\\W"); //same as [^a-zA-Z0-9]+
|
||||
// Pattern pattern = Pattern.compile("[^a-zA-Z0-9||\\s]+"); //ignores space
|
||||
Matcher matcher = PATTERN_NON_ALPHNUM_USASCII.matcher(str);
|
||||
return matcher.find();
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for non-alphanumeric characters from all language scripts
|
||||
*
|
||||
* @param input - String to check for special character
|
||||
* @return true if special character found else false
|
||||
*/
|
||||
public static boolean containsNonAlphanumeric(String input) {
|
||||
// Pattern pattern = Pattern.compile("[^\\p{Alnum}]", Pattern.UNICODE_CHARACTER_CLASS); //Binary properties
|
||||
Matcher matcher = PATTERN_NON_ALPHNUM_ANYLANG.matcher(input);
|
||||
return matcher.find();
|
||||
}
|
||||
|
||||
/**
|
||||
* checks for non-alphanumeric character. it returns true if it detects any character other than the
|
||||
* specified script argument. example of script - Character.UnicodeScript.GEORGIAN.name()
|
||||
*
|
||||
* @param input - String to check for special character
|
||||
* @param script - language script
|
||||
* @return true if special character found else false
|
||||
*/
|
||||
public static boolean containsNonAlphanumeric(String input, String script) {
|
||||
String regexScriptClass = "\\p{" + "Is" + script + "}";
|
||||
Pattern pattern = Pattern.compile("[^" + regexScriptClass + "\\p{IsDigit}]"); //Binary properties
|
||||
Matcher matcher = pattern.matcher(input);
|
||||
return matcher.find();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package com.baeldung.nonalphanumeric;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
public class NonAlphaNumericChecker {
|
||||
/**
|
||||
* Checks for non-alphanumeric characters in any Unicode Script
|
||||
* @param str - String to check for special characters
|
||||
* @return true if special character found else false
|
||||
*/
|
||||
public static boolean isNonAlphanumericAnyLangScript(String str) {
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
char c = str.charAt(i);
|
||||
if (!Character.isLetterOrDigit(c)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* checks for special characters,returns false if any character
|
||||
* found other than the script argument
|
||||
* @param str - String to check for special characters
|
||||
* @param script - Language script
|
||||
* @return true if special character found else false
|
||||
*/
|
||||
public static boolean isNonAlphanumericInLangScript(String str, String script) {
|
||||
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
char c = str.charAt(i);
|
||||
// script e.g., Character.UnicodeScript.of(c).toString().equalsIgnoreCase(Character.UnicodeScript.LATIN.toString())
|
||||
if (!Character.UnicodeScript.of(c).toString().equalsIgnoreCase(script)
|
||||
&& !Character.isDigit(c)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* checks for special characters in any lang
|
||||
* @param str - String to check for special characters
|
||||
* @return true if special character found else false
|
||||
*/
|
||||
public static boolean isNonAlphanumericAnyLangScriptV2(String str) {
|
||||
return !StringUtils.isAlphanumeric(str);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
package com.baeldung.nonalphanumeric;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
|
||||
public class NonAlphaNumRegexCheckerUnitTest {
|
||||
@Test
|
||||
public void whenStrLatinOrHasNonAlphaNum_ThenRetTrue() {
|
||||
//alphabets with special character
|
||||
String str1 = "W$nder^ful";
|
||||
//digits with special character
|
||||
String str2 = "123$%45";
|
||||
//alphabets and digits with special characters
|
||||
String str3 = "W@nd$r123$%45";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||
|
||||
assertTrue(ERROR_MSG + str1, NonAlphaNumRegexChecker.isNonAlphanumeric(str1));
|
||||
assertTrue(ERROR_MSG + str2, NonAlphaNumRegexChecker.isNonAlphanumeric(str2));
|
||||
assertTrue(ERROR_MSG + str3, NonAlphaNumRegexChecker.isNonAlphanumeric(str3));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenStrLatinOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||
//only alphabets
|
||||
String str1 = "Wonderful";
|
||||
//only digits
|
||||
String str2 = "12345";
|
||||
//mix of alphabet and digit
|
||||
String str3 = "5Won6der1234";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, non alphanumeric char found in ";
|
||||
|
||||
assertFalse(ERROR_MSG + str1, NonAlphaNumRegexChecker.isNonAlphanumeric(str1));
|
||||
assertFalse(ERROR_MSG + str2, NonAlphaNumRegexChecker.isNonAlphanumeric(str2));
|
||||
assertFalse(ERROR_MSG + str3, NonAlphaNumRegexChecker.isNonAlphanumeric(str3));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenStrNonLatinOrHasNonAlphaNum_ThenRetTrue() {
|
||||
//special character in Georgian text
|
||||
String str1 = "##მშვენიერი@";
|
||||
//special character with Turkish text
|
||||
String str2 = "müthiş#$";
|
||||
//No special character in Georgian text
|
||||
String str3 = "მშვენიერი";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||
|
||||
assertTrue(ERROR_MSG + str1, NonAlphaNumRegexChecker.isNonAlphanumeric(str1));
|
||||
assertTrue(ERROR_MSG + str2, NonAlphaNumRegexChecker.isNonAlphanumeric(str2));
|
||||
assertTrue(ERROR_MSG + str3, NonAlphaNumRegexChecker.isNonAlphanumeric(str3));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenStrAnyLangOrHasNonAlphaNum_ThenRetTrue() {
|
||||
//special character in Georgian text
|
||||
String str1 = "##მშვენიერი@";
|
||||
//special character with Turkish text
|
||||
String str2 = "müthiş#$";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||
|
||||
assertTrue(ERROR_MSG + str1, NonAlphaNumRegexChecker.containsNonAlphanumeric(str1));
|
||||
assertTrue(ERROR_MSG + str2, NonAlphaNumRegexChecker.containsNonAlphanumeric(str2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenStrAnyLangOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||
//Georgian text with no special char
|
||||
String str1 = "მშვენიერი";
|
||||
//Turkish text with no special char
|
||||
String str2 = "müthiş";
|
||||
//Latin text with no special char
|
||||
String str3 = "Wonderful";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||
|
||||
assertFalse(ERROR_MSG + str1, NonAlphaNumRegexChecker.containsNonAlphanumeric(str1));
|
||||
assertFalse(ERROR_MSG + str2, NonAlphaNumRegexChecker.containsNonAlphanumeric(str2));
|
||||
assertFalse(ERROR_MSG + str3, NonAlphaNumRegexChecker.containsNonAlphanumeric(str3));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenLang_whenStrHasDiffLangOrHasNonAlphaNum_ThenRetTrue() {
|
||||
String script1 = Character.UnicodeScript.MYANMAR.name(); //script used for Burmese Lang
|
||||
//text in Burmese with special char
|
||||
String str1 = "အံ့ဩ%စ##ရာ";
|
||||
//special character and english char in Burmese
|
||||
String str2 = "အံ့ADFဩစ%ရာ*^";
|
||||
//English character in Burmese
|
||||
String str3 = "အံ့ဩစTရာWon";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||
|
||||
assertTrue(ERROR_MSG + str1, NonAlphaNumRegexChecker.containsNonAlphanumeric(str1, script1));
|
||||
assertTrue(ERROR_MSG + str2, NonAlphaNumRegexChecker.containsNonAlphanumeric(str2, script1));
|
||||
assertTrue(ERROR_MSG + str3, NonAlphaNumRegexChecker.containsNonAlphanumeric(str3, script1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenLang_whenStrHasSameLangOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||
String script1 = Character.UnicodeScript.MYANMAR.name(); //script used for Burmese Lang
|
||||
String script2 = Character.UnicodeScript.GREEK.name();
|
||||
//text in Burmese
|
||||
String str1 = "အံ့ဩစရာ";
|
||||
//text in Greek
|
||||
String str2 = "Εκπληκτικός";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, alphanumeric char found in ";
|
||||
|
||||
assertFalse(ERROR_MSG + str1, NonAlphaNumRegexChecker.containsNonAlphanumeric(str1, script1));
|
||||
assertFalse(ERROR_MSG + str2, NonAlphaNumRegexChecker.containsNonAlphanumeric(str2, script2));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,159 @@
|
|||
package com.baeldung.nonalphanumeric;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class NonAlphaNumericCheckerUnitTest {
|
||||
@Test
|
||||
public void whenStrLatinOrHasNonAlphaNum_ThenRetTrue() {
|
||||
String latin = Character.UnicodeScript.LATIN.name();
|
||||
//alphabets with special character
|
||||
String str1 = "W$nder^ful";
|
||||
//digits with special character
|
||||
String str2 = "123$%45";
|
||||
//alphabets and digits with special characters
|
||||
String str3 = "W@nd$r123$%45";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||
|
||||
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, latin));
|
||||
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, latin));
|
||||
assertTrue(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str3, latin));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenStrLatinOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||
String latin = Character.UnicodeScript.LATIN.name();
|
||||
//only alphabets
|
||||
String str1 = "Wonderful";
|
||||
//only digits
|
||||
String str2 = "12345";
|
||||
//mix of alphabet and digit
|
||||
String str3 = "5Won6der1234";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, non alphanumeric char found in ";
|
||||
|
||||
assertFalse(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, latin));
|
||||
assertFalse(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, latin));
|
||||
assertFalse(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str3, latin));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenStrNonLatinOrHasNonAlphaNum_ThenRetTrue() {
|
||||
String latin = Character.UnicodeScript.LATIN.name();
|
||||
//special character in Georgian text
|
||||
String str1 = "##მშვენიერი@";
|
||||
//special character with Turkish text
|
||||
String str2 = "müthiş#$";
|
||||
//No special character in Georgian text
|
||||
String str3 = "მშვენიერი";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||
|
||||
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, latin));
|
||||
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, latin));
|
||||
assertTrue(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str3, latin));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenStrAnyLangOrHasNonAlphaNum_ThenRetTrue() {
|
||||
//special character in Georgian text
|
||||
String str1 = "##მშვენიერი@";
|
||||
//special character with Turkish text
|
||||
String str2 = "müthiş#$";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||
|
||||
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str1));
|
||||
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenStrAnyLangOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||
//Georgian text with no special char
|
||||
String str1 = "მშვენიერი";
|
||||
//Turkish text with no special char
|
||||
String str2 = "müthiş";
|
||||
//Latin text with no special char
|
||||
String str3 = "Wonderful";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||
|
||||
assertFalse(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str1));
|
||||
assertFalse(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str2));
|
||||
assertFalse(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str3));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenLang_whenStrHasDiffLangOrHasNonAlphaNum_ThenRetTrue() {
|
||||
String script1 = Character.UnicodeScript.MYANMAR.name(); //script used for Burmese Lang
|
||||
//text in Burmese with special char
|
||||
String str1 = "အံ့ဩ%စ##ရာ";
|
||||
//special character and english char in Burmese
|
||||
String str2 = "အံ့ADFဩစ%ရာ*^";
|
||||
//English character in Burmese
|
||||
String str3 = "အံ့ဩစTရာWon";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||
|
||||
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, script1));
|
||||
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, script1));
|
||||
assertTrue(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str3, script1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenLang_whenStrHasSameLangOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||
String script1 = Character.UnicodeScript.MYANMAR.name(); //script used for Burmese Lang
|
||||
String script2 = Character.UnicodeScript.GREEK.name();
|
||||
//text in Burmese
|
||||
String str1 = "အံ့ဩစရာ";
|
||||
//text in Greek
|
||||
String str2 = "Εκπληκτικός";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, alphanumeric char found in ";
|
||||
|
||||
assertFalse(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, script1));
|
||||
assertFalse(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, script2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenComLangImpl_whenStrAnyLangOrHasNonAlphaNum_ThenRetTrue() {
|
||||
//special character in Georgian text
|
||||
String str1 = "##მშვენიერი@";
|
||||
//special character with Turkish text
|
||||
String str2 = "müthiş#$";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
|
||||
|
||||
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str1));
|
||||
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenComLangImpl_whenStrAnyLangOrHasNoNonAlphaNum_ThenRetFalse() {
|
||||
//Georgian text with no special char
|
||||
String str1 = "მშვენიერი";
|
||||
//Turkish text with no special char
|
||||
String str2 = "müthiş";
|
||||
//Latin text with no special char
|
||||
String str3 = "Wonderful";
|
||||
//Burmese in Myanmar script
|
||||
// String str4 = "အံ့ဩစရာ";
|
||||
//only digits
|
||||
String str5 = "3465";
|
||||
//Error message
|
||||
String ERROR_MSG = "Test failed, alphanumeric char found in ";
|
||||
|
||||
assertFalse(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str1));
|
||||
assertFalse(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str2));
|
||||
assertFalse(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str3));
|
||||
//StringUtils.isAlphanumeric is unable to support supplementary unicode character and hence fails
|
||||
//assertFalse(ERROR_MSG + str4, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str4));
|
||||
assertFalse(ERROR_MSG + str5, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str5));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue