From c23cf58cd84a43e722f3cce6787c02250800154e Mon Sep 17 00:00:00 2001 From: "alfred.samanga@gmail.com" Date: Wed, 14 Aug 2019 17:13:49 +0200 Subject: [PATCH 1/7] Implemented the code to count words in a string in Java --- .../string/wordcount/WordCounter.java | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java diff --git a/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java b/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java new file mode 100644 index 0000000000..6b061c7925 --- /dev/null +++ b/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java @@ -0,0 +1,58 @@ +package com.baeldung.string.wordcount; + +import java.util.StringTokenizer; + +/** + * Created by alfred on 14 August 2019 + */ +public class WordCounter { + public static void main(String[] args) { + //test string + String random = "Lorem%Ipsum is-simply dummy text."; + + System.out.printf("Using delimiter, word count: %d\n", countWordsUsingTokenizer(random)); + System.out.printf("Using no library method, word count: %d\n", countWordsManually(random)); + System.out.printf("Using punctuation regex, word count: %d\n", countWordsUsingRegex(random)); + System.out.printf("\n\n%s\n\n", random); + + } + + public static int countWordsUsingRegex(String arg) { + if (arg == null) { + return 0; + } + final String[] words = arg.split("\\pP|\\s+"); + return words.length; + } + + public static int countWordsUsingTokenizer(String arg) { + if (arg == null) { + return 0; + } + final StringTokenizer stringTokenizer = new StringTokenizer(arg); + return stringTokenizer.countTokens(); + } + + public static int countWordsManually(String arg) { + if (arg == null) { + return 0; + } + + int count = 0; + + boolean isAWord = false; + int argEnd = arg.length() - 1; + + for (int i = 0; i < arg.length(); i++) { + if (Character.isLetter(arg.charAt(i)) && i != argEnd) { + isAWord = true; + } else if (!Character.isLetter(arg.charAt(i)) && isAWord) { + count++; + isAWord = false; + } else if (Character.isLetter(arg.charAt(i)) && i == argEnd) { + count++; + } + } + return count; + } +} From 0dcf1268610cd24bdd5049073d3d9eaa10d20f4b Mon Sep 17 00:00:00 2001 From: "alfred.samanga@gmail.com" Date: Wed, 14 Aug 2019 20:39:00 +0200 Subject: [PATCH 2/7] Added test code for counting words in a string in Java --- .../main/java/com/baeldung/string/README.md | 3 --- .../string/wordcount/WordCounter.java | 12 +--------- .../string/wordcount/WordCountUnitTest.java | 24 +++++++++++++++++++ 3 files changed, 25 insertions(+), 14 deletions(-) delete mode 100644 java-strings-3/src/main/java/com/baeldung/string/README.md create mode 100644 java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java diff --git a/java-strings-3/src/main/java/com/baeldung/string/README.md b/java-strings-3/src/main/java/com/baeldung/string/README.md deleted file mode 100644 index e02980e93f..0000000000 --- a/java-strings-3/src/main/java/com/baeldung/string/README.md +++ /dev/null @@ -1,3 +0,0 @@ -This file exists to ensure this empty directory is committed in Git. - -Please remove this file when this directory is populated. \ No newline at end of file diff --git a/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java b/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java index 6b061c7925..1ee7e4a4ef 100644 --- a/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java +++ b/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java @@ -2,19 +2,9 @@ package com.baeldung.string.wordcount; import java.util.StringTokenizer; -/** - * Created by alfred on 14 August 2019 - */ public class WordCounter { public static void main(String[] args) { - //test string - String random = "Lorem%Ipsum is-simply dummy text."; - - System.out.printf("Using delimiter, word count: %d\n", countWordsUsingTokenizer(random)); - System.out.printf("Using no library method, word count: %d\n", countWordsManually(random)); - System.out.printf("Using punctuation regex, word count: %d\n", countWordsUsingRegex(random)); - System.out.printf("\n\n%s\n\n", random); - + } public static int countWordsUsingRegex(String arg) { diff --git a/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java b/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java new file mode 100644 index 0000000000..a8f6a37434 --- /dev/null +++ b/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java @@ -0,0 +1,24 @@ +package com.baeldung.string.wordcount; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.Test; +public class WordCountUnitTest { + private String string1 = "This is a test sentence with eight words"; + private String string2 = "This#is%a test sentence with eight words"; + + @Test + public void givenStringWith8Words_whenUsingRegexCount_ThenResultEqual8() { + assertThat(WordCounter.countWordsUsingRegex(string2) == 8).isTrue(); + } + + @Test + public void givenStringWith8Words_whenUsingManualMethod_ThenWordCountEqual8() { + assertThat(WordCounter.countWordsManually(string1) == 8).isTrue(); + } + + @Test + public void givenAStringWith8Words_whenUsingTokenizer_ThenWordCountEqual8() { + assertThat(WordCounter.countWordsUsingTokenizer(string1) == 8).isTrue(); + } +} From f9cf8eefe2a402b2d15aad66431228f87f6e4738 Mon Sep 17 00:00:00 2001 From: "alfred.samanga@gmail.com" Date: Sat, 17 Aug 2019 08:09:15 +0200 Subject: [PATCH 3/7] Removed unnecessary main method. Changed assert methods to the one for comparing integers --- .../java/com/baeldung/string/wordcount/WordCounter.java | 4 ---- .../com/baeldung/string/wordcount/WordCountUnitTest.java | 8 ++++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java b/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java index 1ee7e4a4ef..85d0efd242 100644 --- a/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java +++ b/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java @@ -3,10 +3,6 @@ package com.baeldung.string.wordcount; import java.util.StringTokenizer; public class WordCounter { - public static void main(String[] args) { - - } - public static int countWordsUsingRegex(String arg) { if (arg == null) { return 0; diff --git a/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java b/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java index a8f6a37434..19e13572b7 100644 --- a/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java +++ b/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java @@ -1,6 +1,6 @@ package com.baeldung.string.wordcount; -import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.assertEquals; import org.junit.Test; public class WordCountUnitTest { @@ -9,16 +9,16 @@ public class WordCountUnitTest { @Test public void givenStringWith8Words_whenUsingRegexCount_ThenResultEqual8() { - assertThat(WordCounter.countWordsUsingRegex(string2) == 8).isTrue(); + assertEquals(8, WordCounter.countWordsUsingRegex(string2)); } @Test public void givenStringWith8Words_whenUsingManualMethod_ThenWordCountEqual8() { - assertThat(WordCounter.countWordsManually(string1) == 8).isTrue(); + assertEquals(8, WordCounter.countWordsManually(string1)); } @Test public void givenAStringWith8Words_whenUsingTokenizer_ThenWordCountEqual8() { - assertThat(WordCounter.countWordsUsingTokenizer(string1) == 8).isTrue(); + assertEquals(8, WordCounter.countWordsUsingTokenizer(string1)); } } From 0c27e46de32bd97c1c66afcf7502782c90a3ecdf Mon Sep 17 00:00:00 2001 From: "alfred.samanga@gmail.com" Date: Sun, 18 Aug 2019 20:31:07 +0200 Subject: [PATCH 4/7] Changed implementation of the method that manually count words in a string for originality. --- .../string/wordcount/WordCounter.java | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java b/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java index 85d0efd242..11c2fc7ae5 100644 --- a/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java +++ b/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java @@ -3,11 +3,14 @@ package com.baeldung.string.wordcount; import java.util.StringTokenizer; public class WordCounter { + static final int WORD = 0; + static final int SEPARATOR = 1; + public static int countWordsUsingRegex(String arg) { if (arg == null) { return 0; } - final String[] words = arg.split("\\pP|\\s+"); + final String[] words = arg.split("[\\pP\\s]+"); return words.length; } @@ -23,21 +26,19 @@ public class WordCounter { if (arg == null) { return 0; } - + int flag = SEPARATOR; int count = 0; + int stringLength = arg.length(); + int characterCounter = 0; - boolean isAWord = false; - int argEnd = arg.length() - 1; - - for (int i = 0; i < arg.length(); i++) { - if (Character.isLetter(arg.charAt(i)) && i != argEnd) { - isAWord = true; - } else if (!Character.isLetter(arg.charAt(i)) && isAWord) { - count++; - isAWord = false; - } else if (Character.isLetter(arg.charAt(i)) && i == argEnd) { + while (characterCounter < stringLength) { + if (Character.isLetter(arg.charAt(characterCounter)) && flag == SEPARATOR) { + flag = WORD; count++; + } else if (!Character.isLetter(arg.charAt(characterCounter))) { + flag = SEPARATOR; } + characterCounter++; } return count; } From 73bde4e2c3a4b8d17b8116d6ef0a28899629ae2c Mon Sep 17 00:00:00 2001 From: "alfred.samanga@gmail.com" Date: Thu, 22 Aug 2019 16:48:40 +0200 Subject: [PATCH 5/7] Updated Rejex to catter for apostrophes in words like John's --- .../java/com/baeldung/string/wordcount/WordCounter.java | 8 ++++++-- .../com/baeldung/string/wordcount/WordCountUnitTest.java | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java b/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java index 11c2fc7ae5..70a4042dc8 100644 --- a/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java +++ b/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java @@ -10,7 +10,7 @@ public class WordCounter { if (arg == null) { return 0; } - final String[] words = arg.split("[\\pP\\s]+"); + final String[] words = arg.split("[\\pP\\s&&[^']]+"); return words.length; } @@ -32,7 +32,7 @@ public class WordCounter { int characterCounter = 0; while (characterCounter < stringLength) { - if (Character.isLetter(arg.charAt(characterCounter)) && flag == SEPARATOR) { + if ((Character.isLetter(arg.charAt(characterCounter)) || isAllowedWordPunct(arg.charAt(characterCounter))) && flag == SEPARATOR) { flag = WORD; count++; } else if (!Character.isLetter(arg.charAt(characterCounter))) { @@ -42,4 +42,8 @@ public class WordCounter { } return count; } + + private static boolean isAllowedWordPunct(char charAt) { + return charAt == '\''; + } } diff --git a/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java b/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java index 19e13572b7..357254f84c 100644 --- a/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java +++ b/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java @@ -3,6 +3,9 @@ package com.baeldung.string.wordcount; import static org.junit.Assert.assertEquals; import org.junit.Test; + +import java.util.StringTokenizer; + public class WordCountUnitTest { private String string1 = "This is a test sentence with eight words"; private String string2 = "This#is%a test sentence with eight words"; @@ -10,11 +13,15 @@ public class WordCountUnitTest { @Test public void givenStringWith8Words_whenUsingRegexCount_ThenResultEqual8() { assertEquals(8, WordCounter.countWordsUsingRegex(string2)); + assertEquals(9, WordCounter.countWordsUsingRegex("no&one#should%ever-write-like,this;but:well")); + assertEquals(7, WordCounter.countWordsUsingRegex("the farmer's wife--she was from Albuquerque")); } @Test public void givenStringWith8Words_whenUsingManualMethod_ThenWordCountEqual8() { assertEquals(8, WordCounter.countWordsManually(string1)); + assertEquals(9, WordCounter.countWordsManually("no&one#should%ever-write-like,this but well")); + assertEquals(7, WordCounter.countWordsManually("the farmer's wife--she was from Albuquerque")); } @Test From 3058af87a960f221f3af6c3f12c99c9e51894961 Mon Sep 17 00:00:00 2001 From: "alfred.samanga@gmail.com" Date: Thu, 22 Aug 2019 20:51:43 +0200 Subject: [PATCH 6/7] Updated manual word count code to catter for apostrophes in words like John's --- .../java/com/baeldung/string/wordcount/WordCounter.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java b/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java index 70a4042dc8..30275773a6 100644 --- a/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java +++ b/java-strings-3/src/main/java/com/baeldung/string/wordcount/WordCounter.java @@ -32,10 +32,10 @@ public class WordCounter { int characterCounter = 0; while (characterCounter < stringLength) { - if ((Character.isLetter(arg.charAt(characterCounter)) || isAllowedWordPunct(arg.charAt(characterCounter))) && flag == SEPARATOR) { + if (isAllowedInWord(arg.charAt(characterCounter)) && flag == SEPARATOR) { flag = WORD; count++; - } else if (!Character.isLetter(arg.charAt(characterCounter))) { + } else if (!isAllowedInWord(arg.charAt(characterCounter))) { flag = SEPARATOR; } characterCounter++; @@ -43,7 +43,7 @@ public class WordCounter { return count; } - private static boolean isAllowedWordPunct(char charAt) { - return charAt == '\''; + private static boolean isAllowedInWord(char charAt) { + return charAt == '\'' || Character.isLetter(charAt); } } From 07b01f6dbc636f6ca15469ab2b30a2ea25994091 Mon Sep 17 00:00:00 2001 From: "alfred.samanga@gmail.com" Date: Thu, 22 Aug 2019 21:00:45 +0200 Subject: [PATCH 7/7] Updated Unit tests --- .../java/com/baeldung/string/wordcount/WordCountUnitTest.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java b/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java index 357254f84c..fdd045978f 100644 --- a/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java +++ b/java-strings-3/src/test/java/com/baeldung/string/wordcount/WordCountUnitTest.java @@ -27,5 +27,9 @@ public class WordCountUnitTest { @Test public void givenAStringWith8Words_whenUsingTokenizer_ThenWordCountEqual8() { assertEquals(8, WordCounter.countWordsUsingTokenizer(string1)); + assertEquals(3, new StringTokenizer("three blind mice").countTokens()); + assertEquals(4, new StringTokenizer("see\thow\tthey\trun").countTokens()); + assertEquals(7, new StringTokenizer("the farmer's wife--she was from Albuquerque", " -").countTokens()); + assertEquals(10, new StringTokenizer("did,you,ever,see,such,a,sight,in,your,life", ",").countTokens()); } }