* BALE-6626
Check If a String Contains Non-Alphanumeric Characters in Java

* Move patterns to class level.

---------

Co-authored-by: parthiv39731 <parthiv39731@gmail.com>
This commit is contained in:
parthiv39731 2023-07-04 05:17:45 -07:00 committed by GitHub
parent 03385bed6a
commit de3fc4f21a
4 changed files with 375 additions and 0 deletions

View File

@ -0,0 +1,50 @@
package com.baeldung.nonalphanumeric;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class NonAlphaNumRegexChecker {
private static final Pattern PATTERN_NON_ALPHNUM_ANYLANG = Pattern.compile("[^\\p{IsAlphabetic}\\p{IsDigit}]");
private static final Pattern PATTERN_NON_ALPHNUM_USASCII = Pattern.compile("[^a-zA-Z0-9]+");
/**
* checks if a non-alphanumeric character is present. this method would return true if
* it comes across a non english or non US-ASCII letter
*
* @param str - String to check for special character
* @return true if special character found else false
*/
public static boolean isNonAlphanumeric(String str) {
// Pattern pattern = Pattern.compile("\\W"); //same as [^a-zA-Z0-9]+
// Pattern pattern = Pattern.compile("[^a-zA-Z0-9||\\s]+"); //ignores space
Matcher matcher = PATTERN_NON_ALPHNUM_USASCII.matcher(str);
return matcher.find();
}
/**
* Checks for non-alphanumeric characters from all language scripts
*
* @param input - String to check for special character
* @return true if special character found else false
*/
public static boolean containsNonAlphanumeric(String input) {
// Pattern pattern = Pattern.compile("[^\\p{Alnum}]", Pattern.UNICODE_CHARACTER_CLASS); //Binary properties
Matcher matcher = PATTERN_NON_ALPHNUM_ANYLANG.matcher(input);
return matcher.find();
}
/**
* checks for non-alphanumeric character. it returns true if it detects any character other than the
* specified script argument. example of script - Character.UnicodeScript.GEORGIAN.name()
*
* @param input - String to check for special character
* @param script - language script
* @return true if special character found else false
*/
public static boolean containsNonAlphanumeric(String input, String script) {
String regexScriptClass = "\\p{" + "Is" + script + "}";
Pattern pattern = Pattern.compile("[^" + regexScriptClass + "\\p{IsDigit}]"); //Binary properties
Matcher matcher = pattern.matcher(input);
return matcher.find();
}
}

View File

@ -0,0 +1,49 @@
package com.baeldung.nonalphanumeric;
import org.apache.commons.lang3.StringUtils;
public class NonAlphaNumericChecker {
/**
* Checks for non-alphanumeric characters in any Unicode Script
* @param str - String to check for special characters
* @return true if special character found else false
*/
public static boolean isNonAlphanumericAnyLangScript(String str) {
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
if (!Character.isLetterOrDigit(c)) {
return true;
}
}
return false;
}
/**
* checks for special characters,returns false if any character
* found other than the script argument
* @param str - String to check for special characters
* @param script - Language script
* @return true if special character found else false
*/
public static boolean isNonAlphanumericInLangScript(String str, String script) {
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
// script e.g., Character.UnicodeScript.of(c).toString().equalsIgnoreCase(Character.UnicodeScript.LATIN.toString())
if (!Character.UnicodeScript.of(c).toString().equalsIgnoreCase(script)
&& !Character.isDigit(c)) {
return true;
}
}
return false;
}
/**
* checks for special characters in any lang
* @param str - String to check for special characters
* @return true if special character found else false
*/
public static boolean isNonAlphanumericAnyLangScriptV2(String str) {
return !StringUtils.isAlphanumeric(str);
}
}

View File

@ -0,0 +1,117 @@
package com.baeldung.nonalphanumeric;
import org.junit.Test;
import static org.junit.Assert.*;
public class NonAlphaNumRegexCheckerUnitTest {
@Test
public void whenStrLatinOrHasNonAlphaNum_ThenRetTrue() {
//alphabets with special character
String str1 = "W$nder^ful";
//digits with special character
String str2 = "123$%45";
//alphabets and digits with special characters
String str3 = "W@nd$r123$%45";
//Error message
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
assertTrue(ERROR_MSG + str1, NonAlphaNumRegexChecker.isNonAlphanumeric(str1));
assertTrue(ERROR_MSG + str2, NonAlphaNumRegexChecker.isNonAlphanumeric(str2));
assertTrue(ERROR_MSG + str3, NonAlphaNumRegexChecker.isNonAlphanumeric(str3));
}
@Test
public void whenStrLatinOrHasNoNonAlphaNum_ThenRetFalse() {
//only alphabets
String str1 = "Wonderful";
//only digits
String str2 = "12345";
//mix of alphabet and digit
String str3 = "5Won6der1234";
//Error message
String ERROR_MSG = "Test failed, non alphanumeric char found in ";
assertFalse(ERROR_MSG + str1, NonAlphaNumRegexChecker.isNonAlphanumeric(str1));
assertFalse(ERROR_MSG + str2, NonAlphaNumRegexChecker.isNonAlphanumeric(str2));
assertFalse(ERROR_MSG + str3, NonAlphaNumRegexChecker.isNonAlphanumeric(str3));
}
@Test
public void whenStrNonLatinOrHasNonAlphaNum_ThenRetTrue() {
//special character in Georgian text
String str1 = "##მშვენიერი@";
//special character with Turkish text
String str2 = "müthiş#$";
//No special character in Georgian text
String str3 = "მშვენიერი";
//Error message
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
assertTrue(ERROR_MSG + str1, NonAlphaNumRegexChecker.isNonAlphanumeric(str1));
assertTrue(ERROR_MSG + str2, NonAlphaNumRegexChecker.isNonAlphanumeric(str2));
assertTrue(ERROR_MSG + str3, NonAlphaNumRegexChecker.isNonAlphanumeric(str3));
}
@Test
public void whenStrAnyLangOrHasNonAlphaNum_ThenRetTrue() {
//special character in Georgian text
String str1 = "##მშვენიერი@";
//special character with Turkish text
String str2 = "müthiş#$";
//Error message
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
assertTrue(ERROR_MSG + str1, NonAlphaNumRegexChecker.containsNonAlphanumeric(str1));
assertTrue(ERROR_MSG + str2, NonAlphaNumRegexChecker.containsNonAlphanumeric(str2));
}
@Test
public void whenStrAnyLangOrHasNoNonAlphaNum_ThenRetFalse() {
//Georgian text with no special char
String str1 = "მშვენიერი";
//Turkish text with no special char
String str2 = "müthiş";
//Latin text with no special char
String str3 = "Wonderful";
//Error message
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
assertFalse(ERROR_MSG + str1, NonAlphaNumRegexChecker.containsNonAlphanumeric(str1));
assertFalse(ERROR_MSG + str2, NonAlphaNumRegexChecker.containsNonAlphanumeric(str2));
assertFalse(ERROR_MSG + str3, NonAlphaNumRegexChecker.containsNonAlphanumeric(str3));
}
@Test
public void givenLang_whenStrHasDiffLangOrHasNonAlphaNum_ThenRetTrue() {
String script1 = Character.UnicodeScript.MYANMAR.name(); //script used for Burmese Lang
//text in Burmese with special char
String str1 = "အံ့ဩ%စ##ရာ";
//special character and english char in Burmese
String str2 = "အံ့ADFဩစ%ရာ*^";
//English character in Burmese
String str3 = "အံ့ဩစTရာWon";
//Error message
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
assertTrue(ERROR_MSG + str1, NonAlphaNumRegexChecker.containsNonAlphanumeric(str1, script1));
assertTrue(ERROR_MSG + str2, NonAlphaNumRegexChecker.containsNonAlphanumeric(str2, script1));
assertTrue(ERROR_MSG + str3, NonAlphaNumRegexChecker.containsNonAlphanumeric(str3, script1));
}
@Test
public void givenLang_whenStrHasSameLangOrHasNoNonAlphaNum_ThenRetFalse() {
String script1 = Character.UnicodeScript.MYANMAR.name(); //script used for Burmese Lang
String script2 = Character.UnicodeScript.GREEK.name();
//text in Burmese
String str1 = "အံ့ဩစရာ";
//text in Greek
String str2 = "Εκπληκτικός";
//Error message
String ERROR_MSG = "Test failed, alphanumeric char found in ";
assertFalse(ERROR_MSG + str1, NonAlphaNumRegexChecker.containsNonAlphanumeric(str1, script1));
assertFalse(ERROR_MSG + str2, NonAlphaNumRegexChecker.containsNonAlphanumeric(str2, script2));
}
}

View File

@ -0,0 +1,159 @@
package com.baeldung.nonalphanumeric;
import org.junit.Test;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
public class NonAlphaNumericCheckerUnitTest {
@Test
public void whenStrLatinOrHasNonAlphaNum_ThenRetTrue() {
String latin = Character.UnicodeScript.LATIN.name();
//alphabets with special character
String str1 = "W$nder^ful";
//digits with special character
String str2 = "123$%45";
//alphabets and digits with special characters
String str3 = "W@nd$r123$%45";
//Error message
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, latin));
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, latin));
assertTrue(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str3, latin));
}
@Test
public void whenStrLatinOrHasNoNonAlphaNum_ThenRetFalse() {
String latin = Character.UnicodeScript.LATIN.name();
//only alphabets
String str1 = "Wonderful";
//only digits
String str2 = "12345";
//mix of alphabet and digit
String str3 = "5Won6der1234";
//Error message
String ERROR_MSG = "Test failed, non alphanumeric char found in ";
assertFalse(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, latin));
assertFalse(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, latin));
assertFalse(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str3, latin));
}
@Test
public void whenStrNonLatinOrHasNonAlphaNum_ThenRetTrue() {
String latin = Character.UnicodeScript.LATIN.name();
//special character in Georgian text
String str1 = "##მშვენიერი@";
//special character with Turkish text
String str2 = "müthiş#$";
//No special character in Georgian text
String str3 = "მშვენიერი";
//Error message
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, latin));
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, latin));
assertTrue(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str3, latin));
}
@Test
public void whenStrAnyLangOrHasNonAlphaNum_ThenRetTrue() {
//special character in Georgian text
String str1 = "##მშვენიერი@";
//special character with Turkish text
String str2 = "müthiş#$";
//Error message
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str1));
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str2));
}
@Test
public void whenStrAnyLangOrHasNoNonAlphaNum_ThenRetFalse() {
//Georgian text with no special char
String str1 = "მშვენიერი";
//Turkish text with no special char
String str2 = "müthiş";
//Latin text with no special char
String str3 = "Wonderful";
//Error message
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
assertFalse(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str1));
assertFalse(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str2));
assertFalse(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericAnyLangScript(str3));
}
@Test
public void givenLang_whenStrHasDiffLangOrHasNonAlphaNum_ThenRetTrue() {
String script1 = Character.UnicodeScript.MYANMAR.name(); //script used for Burmese Lang
//text in Burmese with special char
String str1 = "အံ့ဩ%စ##ရာ";
//special character and english char in Burmese
String str2 = "အံ့ADFဩစ%ရာ*^";
//English character in Burmese
String str3 = "အံ့ဩစTရာWon";
//Error message
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, script1));
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, script1));
assertTrue(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str3, script1));
}
@Test
public void givenLang_whenStrHasSameLangOrHasNoNonAlphaNum_ThenRetFalse() {
String script1 = Character.UnicodeScript.MYANMAR.name(); //script used for Burmese Lang
String script2 = Character.UnicodeScript.GREEK.name();
//text in Burmese
String str1 = "အံ့ဩစရာ";
//text in Greek
String str2 = "Εκπληκτικός";
//Error message
String ERROR_MSG = "Test failed, alphanumeric char found in ";
assertFalse(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str1, script1));
assertFalse(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericInLangScript(str2, script2));
}
@Test
public void givenComLangImpl_whenStrAnyLangOrHasNonAlphaNum_ThenRetTrue() {
//special character in Georgian text
String str1 = "##მშვენიერი@";
//special character with Turkish text
String str2 = "müthiş#$";
//Error message
String ERROR_MSG = "Test failed, no alphanumeric char found in ";
assertTrue(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str1));
assertTrue(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str2));
}
@Test
public void givenComLangImpl_whenStrAnyLangOrHasNoNonAlphaNum_ThenRetFalse() {
//Georgian text with no special char
String str1 = "მშვენიერი";
//Turkish text with no special char
String str2 = "müthiş";
//Latin text with no special char
String str3 = "Wonderful";
//Burmese in Myanmar script
// String str4 = "အံ့ဩစရာ";
//only digits
String str5 = "3465";
//Error message
String ERROR_MSG = "Test failed, alphanumeric char found in ";
assertFalse(ERROR_MSG + str1, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str1));
assertFalse(ERROR_MSG + str2, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str2));
assertFalse(ERROR_MSG + str3, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str3));
//StringUtils.isAlphanumeric is unable to support supplementary unicode character and hence fails
//assertFalse(ERROR_MSG + str4, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str4));
assertFalse(ERROR_MSG + str5, NonAlphaNumericChecker.isNonAlphanumericAnyLangScriptV2(str5));
}
}