Updated Rejex to catter for apostrophes in words like John's

This commit is contained in:
alfred.samanga@gmail.com 2019-08-22 16:48:40 +02:00
parent 0c27e46de3
commit 73bde4e2c3
2 changed files with 13 additions and 2 deletions

View File

@ -10,7 +10,7 @@ public class WordCounter {
if (arg == null) {
return 0;
}
final String[] words = arg.split("[\\pP\\s]+");
final String[] words = arg.split("[\\pP\\s&&[^']]+");
return words.length;
}
@ -32,7 +32,7 @@ public class WordCounter {
int characterCounter = 0;
while (characterCounter < stringLength) {
if (Character.isLetter(arg.charAt(characterCounter)) && flag == SEPARATOR) {
if ((Character.isLetter(arg.charAt(characterCounter)) || isAllowedWordPunct(arg.charAt(characterCounter))) && flag == SEPARATOR) {
flag = WORD;
count++;
} else if (!Character.isLetter(arg.charAt(characterCounter))) {
@ -42,4 +42,8 @@ public class WordCounter {
}
return count;
}
private static boolean isAllowedWordPunct(char charAt) {
return charAt == '\'';
}
}

View File

@ -3,6 +3,9 @@ package com.baeldung.string.wordcount;
import static org.junit.Assert.assertEquals;
import org.junit.Test;
import java.util.StringTokenizer;
public class WordCountUnitTest {
private String string1 = "This is a test sentence with eight words";
private String string2 = "This#is%a test sentence with eight words";
@ -10,11 +13,15 @@ public class WordCountUnitTest {
@Test
public void givenStringWith8Words_whenUsingRegexCount_ThenResultEqual8() {
assertEquals(8, WordCounter.countWordsUsingRegex(string2));
assertEquals(9, WordCounter.countWordsUsingRegex("no&one#should%ever-write-like,this;but:well"));
assertEquals(7, WordCounter.countWordsUsingRegex("the farmer's wife--she was from Albuquerque"));
}
@Test
public void givenStringWith8Words_whenUsingManualMethod_ThenWordCountEqual8() {
assertEquals(8, WordCounter.countWordsManually(string1));
assertEquals(9, WordCounter.countWordsManually("no&one#should%ever-write-like,this but well"));
assertEquals(7, WordCounter.countWordsManually("the farmer's wife--she was from Albuquerque"));
}
@Test