[split-digits] Split a String Into Digits- and Non-Digits-Elements (#14349)

* [split-digits] Split a String Into Digits- and Non-Digits-Elements

* [split-digits] JMH benchmark

* [split-digits] using enum
This commit is contained in:
Kai Yuan 2023-07-06 01:20:28 +02:00 committed by GitHub
parent 37add6dc4d
commit b64958a6bf
3 changed files with 127 additions and 1 deletions

View File

@ -18,7 +18,16 @@
<artifactId>commons-lang3</artifactId>
<version>${apache.commons-lang.version}</version>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>${jmh.version}</version>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>${jmh.version}</version>
</dependency>
</dependencies>
<build>
@ -38,6 +47,7 @@
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<apache.commons-lang.version>3.12.0</apache.commons-lang.version>
<jmh.version>1.36</jmh.version>
</properties>
</project>

View File

@ -0,0 +1,47 @@
package com.baeldung.digitsandnondigits;
import static com.baeldung.digitsandnondigits.SplitDigitsAndNondigitsUnitTest.parseString;
import java.util.concurrent.TimeUnit;
import org.junit.jupiter.api.Test;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
@State(Scope.Benchmark)
@Threads(1)
@BenchmarkMode(Mode.Throughput)
@Fork(warmups = 1, value = 1)
@Warmup(iterations = 2, time = 10, timeUnit = TimeUnit.MILLISECONDS)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public class BenchmarkLiveTest {
private static final String INPUT = "01Michael Jackson23Michael Jordan42Michael Bolton999Michael Johnson000";
@Param({ "10000" })
public int iterations;
@Benchmark
public void regexBased(Blackhole blackhole) {
blackhole.consume(INPUT.split("(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)"));
}
@Benchmark
public void nonRegexBased(Blackhole blackhole) {
blackhole.consume(parseString(INPUT));
}
@Test
public void benchmark() throws Exception {
String[] argv = {};
org.openjdk.jmh.Main.main(argv);
}
}

View File

@ -0,0 +1,69 @@
package com.baeldung.digitsandnondigits;
import static com.baeldung.digitsandnondigits.SplitDigitsAndNondigitsUnitTest.State.INIT;
import static com.baeldung.digitsandnondigits.SplitDigitsAndNondigitsUnitTest.State.PARSING_DIGIT;
import static com.baeldung.digitsandnondigits.SplitDigitsAndNondigitsUnitTest.State.PARSING_NON_DIGIT;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.junit.jupiter.api.Test;
public class SplitDigitsAndNondigitsUnitTest {
private static final String INPUT1 = "01Michael Jackson23Michael Jordan42Michael Bolton999Michael Johnson000";
private static final String[] EXPECTED1 = new String[] { "01", "Michael Jackson", "23", "Michael Jordan", "42", "Michael Bolton", "999", "Michael Johnson", "000" };
private static final List<String> EXPECTED_LIST1 = Arrays.asList(EXPECTED1);
private static final String INPUT2 = "Michael Jackson01Michael Jordan23Michael Bolton42Michael Johnson999Great Michaels";
private static final String[] EXPECTED2 = new String[] { "Michael Jackson", "01", "Michael Jordan", "23", "Michael Bolton", "42", "Michael Johnson", "999", "Great Michaels" };
private static final List<String> EXPECTED_LIST2 = Arrays.asList(EXPECTED2);
@Test
void whenUsingLookaroundRegex_thenGetExpectedResult() {
String splitRE = "(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)";
String[] result1 = INPUT1.split(splitRE);
assertArrayEquals(EXPECTED1, result1);
String[] result2 = INPUT2.split(splitRE);
assertArrayEquals(EXPECTED2, result2);
}
enum State {
INIT, PARSING_DIGIT, PARSING_NON_DIGIT
}
static List<String> parseString(String input) {
List<String> result = new ArrayList<>();
int start = 0;
State state = INIT;
for (int i = 0; i < input.length(); i++) {
if (input.charAt(i) >= '0' && input.charAt(i) <= '9') {
if (state == PARSING_NON_DIGIT) {
result.add(input.substring(start, i));
start = i;
}
state = PARSING_DIGIT;
} else {
if (state == PARSING_DIGIT) {
result.add(input.substring(start, i));
start = i;
}
state = PARSING_NON_DIGIT;
}
}
result.add(input.substring(start));
return result;
}
@Test
void whenCheckEachChar_thenGetExpectedResult() {
List<String> result1 = parseString(INPUT1);
assertEquals(EXPECTED_LIST1, result1);
List<String> result2 = parseString(INPUT2);
assertEquals(EXPECTED_LIST2, result2);
}
}